x86: mmiotrace full patch, preview 1

author Pekka Paalanen <pq@iki.fi>

Mon, 12 May 2008 19:20:57 +0000 (21:20 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Sat, 24 May 2008 09:22:12 +0000 (11:22 +0200)
author Pekka Paalanen <pq@iki.fi>
Mon, 12 May 2008 19:20:57 +0000 (21:20 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Sat, 24 May 2008 09:22:12 +0000 (11:22 +0200)
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c

index 027a5b6a12b2bb35f349f43d3e39a83624efb545..a4f93b4120c155c20889947942d7bdb5c286908d 100644 (file)
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
  static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  struct mm_struct init_mm = INIT_MM(init_mm);
  EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
-EXPORT_SYMBOL_GPL(init_mm);
  
  /*
   * Initial thread structure.
diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile

index d6905f7f981bbd19754005e4999458d4f6e418d0..cf1e747b463ee32b84a9e368049ab90268bbc3f7 100644 (file)
--- a/arch/x86/kernel/mmiotrace/Makefile
+++ b/arch/x86/kernel/mmiotrace/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o
-
-obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
+obj-$(CONFIG_MMIOTRACE_HOOKS)  += kmmio.o
+obj-$(CONFIG_MMIOTRACE)                += mmiotrace.o
+mmiotrace-objs                 := pf_in.o mmio-mod.o
+obj-$(CONFIG_MMIOTRACE_TEST)   += testmmiotrace.o
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c

index 5e239d0b8467351657ca1f7b4b2852288e80673a..539a9b19588f8dd403d5f42eb6c84dc8548e46f3 100644 (file)
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
   */
  
  #include <linux/version.h>
+#include <linux/list.h>
  #include <linux/spinlock.h>
  #include <linux/hash.h>
  #include <linux/init.h>
@@ -17,70 +18,119 @@
  #include <linux/ptrace.h>
  #include <linux/preempt.h>
  #include <linux/percpu.h>
+#include <linux/kdebug.h>
  #include <asm/io.h>
  #include <asm/cacheflush.h>
  #include <asm/errno.h>
  #include <asm/tlbflush.h>
  #include <asm/pgtable.h>
  
-#include "kmmio.h"
+#include <linux/mmiotrace.h>
  
-#define KMMIO_HASH_BITS 6
-#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
  #define KMMIO_PAGE_HASH_BITS 4
  #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
  
+struct kmmio_fault_page {
+       struct list_head list;
+       struct kmmio_fault_page *release_next;
+       unsigned long page; /* location of the fault page */
+
+       /*
+        * Number of times this page has been registered as a part
+        * of a probe. If zero, page is disarmed and this may be freed.
+        * Used only by writers (RCU).
+        */
+       int count;
+};
+
+struct kmmio_delayed_release {
+       struct rcu_head rcu;
+       struct kmmio_fault_page *release_list;
+};
+
  struct kmmio_context {
         struct kmmio_fault_page *fpage;
         struct kmmio_probe *probe;
         unsigned long saved_flags;
+       unsigned long addr;
         int active;
  };
  
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
-                                               unsigned long address);
  static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
                                                                 void *args);
  
+static DECLARE_MUTEX(kmmio_init_mutex);
  static DEFINE_SPINLOCK(kmmio_lock);
  
  /* These are protected by kmmio_lock */
+static int kmmio_initialized;
  unsigned int kmmio_count;
-static unsigned int handler_registered;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
  static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
  static LIST_HEAD(kmmio_probes);
  
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+       return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+
  /* Accessed per-cpu */
  static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
  
+/* protected by kmmio_init_mutex */
  static struct notifier_block nb_die = {
         .notifier_call = kmmio_die_notifier
  };
  
-int init_kmmio(void)
+/**
+ * Makes sure kmmio is initialized and usable.
+ * This must be called before any other kmmio function defined here.
+ * May sleep.
+ */
+void reference_kmmio(void)
  {
-       int i;
-       for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
-               INIT_LIST_HEAD(&kmmio_page_table[i]);
-
-       register_die_notifier(&nb_die);
-       return 0;
+       down(&kmmio_init_mutex);
+       spin_lock_irq(&kmmio_lock);
+       if (!kmmio_initialized) {
+               int i;
+               for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+                       INIT_LIST_HEAD(&kmmio_page_table[i]);
+               if (register_die_notifier(&nb_die))
+                       BUG();
+       }
+       kmmio_initialized++;
+       spin_unlock_irq(&kmmio_lock);
+       up(&kmmio_init_mutex);
  }
+EXPORT_SYMBOL_GPL(reference_kmmio);
  
-void cleanup_kmmio(void)
+/**
+ * Clean up kmmio after use. This must be called for every call to
+ * reference_kmmio(). All probes registered after the corresponding
+ * reference_kmmio() must have been unregistered when calling this.
+ * May sleep.
+ */
+void unreference_kmmio(void)
  {
-       /*
-        * Assume the following have been already cleaned by calling
-        * unregister_kmmio_probe() appropriately:
-        * kmmio_page_table, kmmio_probes
-        */
-       if (handler_registered) {
-               if (mmiotrace_unregister_pf(&kmmio_page_fault))
-                       BUG();
-               synchronize_rcu();
+       bool unreg = false;
+
+       down(&kmmio_init_mutex);
+       spin_lock_irq(&kmmio_lock);
+
+       if (kmmio_initialized == 1) {
+               BUG_ON(is_kmmio_active());
+               unreg = true;
         }
-       unregister_die_notifier(&nb_die);
+       kmmio_initialized--;
+       BUG_ON(kmmio_initialized < 0);
+       spin_unlock_irq(&kmmio_lock);
+
+       if (unreg)
+               unregister_die_notifier(&nb_die); /* calls sync_rcu() */
+       up(&kmmio_init_mutex);
  }
+EXPORT_SYMBOL(unreference_kmmio);
  
  /*
   * this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
   * Overlap a Point (might be simple)
   * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
   */
-/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
  static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
  {
         struct kmmio_probe *p;
-       list_for_each_entry(p, &kmmio_probes, list) {
+       list_for_each_entry_rcu(p, &kmmio_probes, list) {
                 if (addr >= p->addr && addr <= (p->addr + p->len))
                         return p;
         }
         return NULL;
  }
  
+/* You must be holding RCU read lock. */
  static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
  {
-       struct list_head *head, *tmp;
+       struct list_head *head;
+       struct kmmio_fault_page *p;
  
         page &= PAGE_MASK;
-       head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
-       list_for_each(tmp, head) {
-               struct kmmio_fault_page *p
-                       = list_entry(tmp, struct kmmio_fault_page, list);
+       head = kmmio_page_list(page);
+       list_for_each_entry_rcu(p, head, list) {
                 if (p->page == page)
                         return p;
         }
-
         return NULL;
  }
  
+/** Mark the given page as not present. Access to it will trigger a fault. */
  static void arm_kmmio_fault_page(unsigned long page, int *page_level)
  {
         unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
         pte_t *pte = lookup_address(address, &level);
  
         if (!pte) {
-               printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-                                               __FUNCTION__, page);
+               pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+                                                       __func__, page);
                 return;
         }
  
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
         __flush_tlb_one(page);
  }
  
+/** Mark the given page as present. */
  static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
  {
         unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
         pte_t *pte = lookup_address(address, &level);
  
         if (!pte) {
-               printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-                                               __FUNCTION__, page);
+               pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+                                                       __func__, page);
                 return;
         }
  
@@ -169,13 +220,25 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
         __flush_tlb_one(page);
  }
  
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
  /*
   * Interrupts are disabled on entry as trap3 is an interrupt gate
   * and they remain disabled thorough out this function.
   */
-static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
  {
-       struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+       struct kmmio_context *ctx;
+       struct kmmio_fault_page *faultpage;
  
         /*
          * Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
          * XXX what if an interrupt occurs between returning from
          * do_page_fault() and entering the single-step exception handler?
          * And that interrupt triggers a kmmio trap?
+        * XXX If we tracing an interrupt service routine or whatever, is
+        * this enough to keep it on the current cpu?
          */
         preempt_disable();
  
-       /* interrupts disabled and CPU-local data => atomicity guaranteed. */
+       rcu_read_lock();
+       faultpage = get_kmmio_fault_page(addr);
+       if (!faultpage) {
+               /*
+                * Either this page fault is not caused by kmmio, or
+                * another CPU just pulled the kmmio probe from under
+                * our feet. In the latter case all hell breaks loose.
+                */
+               goto no_kmmio;
+       }
+
+       ctx = &get_cpu_var(kmmio_ctx);
         if (ctx->active) {
                 /*
-                * This avoids a deadlock with kmmio_lock.
+                * Prevent overwriting already in-flight context.
                  * If this page fault really was due to kmmio trap,
                  * all hell breaks loose.
                  */
-               printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, "
-                                       "for address %lu. Ignoring.\n",
+               pr_emerg("kmmio: recursive probe hit on CPU %d, "
+                                       "for address 0x%08lx. Ignoring.\n",
                                         smp_processor_id(), addr);
-               goto no_kmmio;
+               goto no_kmmio_ctx;
         }
         ctx->active++;
  
-       /*
-        * Acquire the kmmio lock to prevent changes affecting
-        * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
-        * returned pointers.
-        * The lock is released in post_kmmio_handler().
-        * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
-        */
-       spin_lock(&kmmio_lock);
-
-       ctx->fpage = get_kmmio_fault_page(addr);
-       if (!ctx->fpage) {
-               /* this page fault is not caused by kmmio */
-               goto no_kmmio_locked;
-       }
-
+       ctx->fpage = faultpage;
         ctx->probe = get_kmmio_probe(addr);
         ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
+       ctx->addr = addr;
  
         if (ctx->probe && ctx->probe->pre_handler)
                 ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
         regs->flags |= TF_MASK;
         regs->flags &= ~IF_MASK;
  
-       /* We hold lock, now we set present bit in PTE and single step. */
+       /* Now we set present bit in PTE and single step. */
         disarm_kmmio_fault_page(ctx->fpage->page, NULL);
  
         put_cpu_var(kmmio_ctx);
+       rcu_read_unlock();
         return 1;
  
-no_kmmio_locked:
-       spin_unlock(&kmmio_lock);
-       ctx->active--;
+no_kmmio_ctx:
+       put_cpu_var(kmmio_ctx);
  no_kmmio:
+       rcu_read_unlock();
         preempt_enable_no_resched();
-       put_cpu_var(kmmio_ctx);
-       /* page fault not handled by kmmio */
-       return 0;
+       return 0; /* page fault not handled by kmmio */
  }
  
  /*
   * Interrupts are disabled on entry as trap1 is an interrupt gate
   * and they remain disabled thorough out this function.
- * And we hold kmmio lock.
+ * This must always get called as the pair to kmmio_handler().
   */
  static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
  {
         int ret = 0;
+       struct kmmio_probe *probe;
+       struct kmmio_fault_page *faultpage;
         struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
  
         if (!ctx->active)
                 goto out;
  
+       rcu_read_lock();
+
+       faultpage = get_kmmio_fault_page(ctx->addr);
+       probe = get_kmmio_probe(ctx->addr);
+       if (faultpage != ctx->fpage || probe != ctx->probe) {
+               /*
+                * The trace setup changed after kmmio_handler() and before
+                * running this respective post handler. User does not want
+                * the result anymore.
+                */
+               ctx->probe = NULL;
+               ctx->fpage = NULL;
+       }
+
         if (ctx->probe && ctx->probe->post_handler)
                 ctx->probe->post_handler(ctx->probe, condition, regs);
  
-       arm_kmmio_fault_page(ctx->fpage->page, NULL);
+       if (ctx->fpage)
+               arm_kmmio_fault_page(ctx->fpage->page, NULL);
  
         regs->flags &= ~TF_MASK;
         regs->flags |= ctx->saved_flags;
  
         /* These were acquired in kmmio_handler(). */
         ctx->active--;
-       spin_unlock(&kmmio_lock);
+       BUG_ON(ctx->active);
         preempt_enable_no_resched();
  
         /*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
         if (!(regs->flags & TF_MASK))
                 ret = 1;
  
+       rcu_read_unlock();
  out:
         put_cpu_var(kmmio_ctx);
         return ret;
  }
  
+/* You must be holding kmmio_lock. */
  static int add_kmmio_fault_page(unsigned long page)
  {
         struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
         page &= PAGE_MASK;
         f = get_kmmio_fault_page(page);
         if (f) {
+               if (!f->count)
+                       arm_kmmio_fault_page(f->page, NULL);
                 f->count++;
                 return 0;
         }
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
  
         f->count = 1;
         f->page = page;
-       list_add(&f->list,
-                &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
+       list_add_rcu(&f->list, kmmio_page_list(f->page));
  
         arm_kmmio_fault_page(f->page, NULL);
  
         return 0;
  }
  
-static void release_kmmio_fault_page(unsigned long page)
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+                               struct kmmio_fault_page **release_list)
  {
         struct kmmio_fault_page *f;
  
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
                 return;
  
         f->count--;
+       BUG_ON(f->count < 0);
         if (!f->count) {
                 disarm_kmmio_fault_page(f->page, NULL);
-               list_del(&f->list);
+               f->release_next = *release_list;
+               *release_list = f;
         }
  }
  
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
                 ret = -EEXIST;
                 goto out;
         }
-       list_add(&p->list, &kmmio_probes);
-       /*printk("adding fault pages...\n");*/
+       list_add_rcu(&p->list, &kmmio_probes);
         while (size < p->len) {
                 if (add_kmmio_fault_page(p->addr + size))
-                       printk(KERN_ERR "mmio: Unable to set page fault.\n");
+                       pr_err("kmmio: Unable to set page fault.\n");
                 size += PAGE_SIZE;
         }
-
-       if (!handler_registered) {
-               if (mmiotrace_register_pf(&kmmio_page_fault))
-                       printk(KERN_ERR "mmiotrace: Cannot register page "
-                                       "fault handler.\n");
-               else
-                       handler_registered++;
-       }
-
  out:
         spin_unlock_irq(&kmmio_lock);
         /*
          * XXX: What should I do here?
          * Here was a call to global_flush_tlb(), but it does not exist
-        * anymore.
+        * anymore. It seems it's not needed after all.
          */
         return ret;
  }
+EXPORT_SYMBOL(register_kmmio_probe);
  
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+       struct kmmio_delayed_release *dr = container_of(
+                                               head,
+                                               struct kmmio_delayed_release,
+                                               rcu);
+       struct kmmio_fault_page *p = dr->release_list;
+       while (p) {
+               struct kmmio_fault_page *next = p->release_next;
+               BUG_ON(p->count);
+               kfree(p);
+               p = next;
+       }
+       kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+       struct kmmio_delayed_release *dr = container_of(
+                                               head,
+                                               struct kmmio_delayed_release,
+                                               rcu);
+       struct kmmio_fault_page *p = dr->release_list;
+       struct kmmio_fault_page **prevp = &dr->release_list;
+       unsigned long flags;
+       spin_lock_irqsave(&kmmio_lock, flags);
+       while (p) {
+               if (!p->count)
+                       list_del_rcu(&p->list);
+               else
+                       *prevp = p->release_next;
+               prevp = &p->release_next;
+               p = p->release_next;
+       }
+       spin_unlock_irqrestore(&kmmio_lock, flags);
+       /* This is the real RCU destroy call. */
+       call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ *    Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ *    Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ *    Actally free the kmmio_fault_page structs as with RCU.
+ */
  void unregister_kmmio_probe(struct kmmio_probe *p)
  {
         unsigned long size = 0;
+       struct kmmio_fault_page *release_list = NULL;
+       struct kmmio_delayed_release *drelease;
  
         spin_lock_irq(&kmmio_lock);
         while (size < p->len) {
-               release_kmmio_fault_page(p->addr + size);
+               release_kmmio_fault_page(p->addr + size, &release_list);
                 size += PAGE_SIZE;
         }
-       list_del(&p->list);
+       list_del_rcu(&p->list);
         kmmio_count--;
         spin_unlock_irq(&kmmio_lock);
-}
  
-/*
- * According to 2.6.20, mainly x86_64 arch:
- * This is being called from do_page_fault(), via the page fault notifier
- * chain. The chain is called for both user space faults and kernel space
- * faults (address >= TASK_SIZE64), except not on faults serviced by
- * vmalloc_fault().
- *
- * We may be in an interrupt or a critical section. Also prefecthing may
- * trigger a page fault. We may be in the middle of process switch.
- * The page fault hook functionality has put us inside RCU read lock.
- *
- * Local interrupts are disabled, so preemption cannot happen.
- * Do not enable interrupts, do not sleep, and watch out for other CPUs.
- */
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
-                                               unsigned long address)
-{
-       if (is_kmmio_active())
-               if (kmmio_handler(regs, address) == 1)
-                       return -1;
-       return 0;
+       drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+       if (!drelease) {
+               pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+               return;
+       }
+       drelease->release_list = release_list;
+
+       /*
+        * This is not really RCU here. We have just disarmed a set of
+        * pages so that they cannot trigger page faults anymore. However,
+        * we cannot remove the pages from kmmio_page_table,
+        * because a probe hit might be in flight on another CPU. The
+        * pages are collected into a list, and they will be removed from
+        * kmmio_page_table when it is certain that no probe hit related to
+        * these pages can be in flight. RCU grace period sounds like a
+        * good choice.
+        *
+        * If we removed the pages too early, kmmio page fault handler might
+        * not find the respective kmmio_fault_page and determine it's not
+        * a kmmio fault, when it actually is. This would lead to madness.
+        */
+       call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
  }
+EXPORT_SYMBOL(unregister_kmmio_probe);
  
  static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
                                                                 void *args)
diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h

deleted file mode 100644 (file)

index 85b7f68..0000000
--- a/arch/x86/kernel/mmiotrace/kmmio.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_KMMIO_H
-#define _LINUX_KMMIO_H
-
-#include <linux/list.h>
-#include <linux/notifier.h>
-#include <linux/smp.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/version.h>
-#include <linux/kdebug.h>
-
-struct kmmio_probe;
-struct kmmio_fault_page;
-struct pt_regs;
-
-typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
-                               struct pt_regs *, unsigned long addr);
-typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
-                               unsigned long condition, struct pt_regs *);
-
-struct kmmio_probe {
-       struct list_head list;
-
-       /* start location of the probe point */
-       unsigned long addr;
-
-       /* length of the probe region */
-       unsigned long len;
-
-        /* Called before addr is executed. */
-       kmmio_pre_handler_t pre_handler;
-
-       /* Called after addr is executed, unless... */
-       kmmio_post_handler_t post_handler;
-};
-
-struct kmmio_fault_page {
-       struct list_head list;
-
-       /* location of the fault page */
-       unsigned long page;
-
-       int count;
-};
-
-/* kmmio is active by some kmmio_probes? */
-static inline int is_kmmio_active(void)
-{
-       extern unsigned int kmmio_count;
-       return kmmio_count;
-}
-
-int init_kmmio(void);
-void cleanup_kmmio(void);
-int register_kmmio_probe(struct kmmio_probe *p);
-void unregister_kmmio_probe(struct kmmio_probe *p);
-
-#endif /* _LINUX_KMMIO_H */
diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c

index f9c609266d834533180949b3f71262525614a856..e1a508588f0307ccddc1cedcae897ecba8f46abe 100644 (file)
--- a/arch/x86/kernel/mmiotrace/mmio-mod.c
+++ b/arch/x86/kernel/mmiotrace/mmio-mod.c
@@ -32,7 +32,6 @@
  #include <asm/atomic.h>
  #include <linux/percpu.h>
  
-#include "kmmio.h"
  #include "pf_in.h"
  
  /* This app's relay channel files will appear in /debug/mmio-trace */
@@ -129,18 +128,17 @@ static void print_pte(unsigned long address)
         pte_t *pte = lookup_address(address, &level);
  
         if (!pte) {
-               printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-                                               __FUNCTION__, address);
+               pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n",
+                                                       __func__, address);
                 return;
         }
  
         if (level == PG_LEVEL_2M) {
-               printk(KERN_EMERG MODULE_NAME ": 4MB pages are not "
-                                               "currently supported: %lx\n",
-                                               address);
+               pr_emerg(MODULE_NAME ": 4MB pages are not currently "
+                                               "supported: %lx\n", address);
                 BUG();
         }
-       printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
+       pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
                                         address, pte_val(*pte),
                                         pte_val(*pte) & _PAGE_PRESENT);
  }
@@ -152,7 +150,7 @@ static void print_pte(unsigned long address)
  static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
  {
         const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
-       printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, "
+       pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, "
                                         "last fault for address: %lx\n",
                                         addr, my_reason->addr);
         print_pte(addr);
@@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
         print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
         print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
                                                         my_reason->ip);
-       printk(KERN_EMERG
-                       "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+       pr_emerg("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
                         regs->ax, regs->bx, regs->cx, regs->dx);
-       printk(KERN_EMERG
-                       "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+       pr_emerg("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
                         regs->si, regs->di, regs->bp, regs->sp);
  #else
         print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
         print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
                                                         my_reason->ip);
-       printk(KERN_EMERG "rax: %016lx   rcx: %016lx   rdx: %016lx\n",
+       pr_emerg("rax: %016lx   rcx: %016lx   rdx: %016lx\n",
                                         regs->ax, regs->cx, regs->dx);
-       printk(KERN_EMERG "rsi: %016lx   rdi: %016lx   "
-                               "rbp: %016lx   rsp: %016lx\n",
+       pr_emerg("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
                                 regs->si, regs->di, regs->bp, regs->sp);
  #endif
         put_cpu_var(pf_reason);
@@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition,
         struct trap_reason *my_reason = &get_cpu_var(pf_reason);
         struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
  
+       /*
+        * XXX: This might not get called, if the probe is removed while
+        * trace hit is on flight.
+        */
+
         /* this should always return the active_trace count to 0 */
         my_reason->active_traces--;
         if (my_reason->active_traces) {
-               printk(KERN_EMERG MODULE_NAME ": unexpected post handler");
+               pr_emerg(MODULE_NAME ": unexpected post handler");
                 BUG();
         }
  
@@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
         atomic_t *drop = &per_cpu(dropped, cpu);
         int count;
         if (relay_buf_full(buf)) {
-               if (atomic_inc_return(drop) == 1) {
-                       printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n",
-                                                                       cpu);
-               }
+               if (atomic_inc_return(drop) == 1)
+                       pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu);
                 return 0;
-       } else if ((count = atomic_read(drop))) {
-               printk(KERN_ERR MODULE_NAME
-                                       ": cpu %d buffer no longer full, "
-                                       "missed %d events.\n",
-                                       cpu, count);
+       }
+       count = atomic_read(drop);
+       if (count) {
+               pr_err(MODULE_NAME ": cpu %d buffer no longer full, "
+                                               "missed %d events.\n",
+                                               cpu, count);
                 atomic_sub(count, drop);
         }
  
@@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
         /* Don't trace the low PCI/ISA area, it's always mapped.. */
         if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
                                         (offset + size > ISA_START_ADDRESS)) {
-               printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low "
-                                               "PCI/ISA area (0x%lx-0x%lx)\n",
+               pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area "
+                                               "(0x%lx-0x%lx)\n",
                                                 offset, offset + size);
                 return;
         }
@@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
  void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
  {
         void __iomem *p = ioremap_cache(offset, size);
-       printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
+       pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
                                                         offset, size, p);
         ioremap_trace_core(offset, size, p);
         return p;
@@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace);
  void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
  {
         void __iomem *p = ioremap_nocache(offset, size);
-       printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
+       pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
                                                         offset, size, p);
         ioremap_trace_core(offset, size, p);
         return p;
@@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr)
         };
         struct remap_trace *trace;
         struct remap_trace *tmp;
-       printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr);
+       pr_debug(MODULE_NAME ": Unmapping %p.\n", addr);
         record_timestamp(&event.header);
  
         spin_lock(&trace_list_lock);
@@ -481,7 +480,7 @@ static void clear_trace_list(void)
  
         spin_lock(&trace_list_lock);
         list_for_each_entry_safe(trace, tmp, &trace_list, list) {
-               printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped "
+               pr_warning(MODULE_NAME ": purging non-iounmapped "
                                         "trace @0x%08lx, size 0x%lx.\n",
                                         trace->probe.addr, trace->probe.len);
                 if (!nommiotrace)
@@ -500,39 +499,37 @@ static int __init init(void)
  
         dir = debugfs_create_dir(APP_DIR, NULL);
         if (!dir) {
-               printk(KERN_ERR MODULE_NAME
-                               ": Couldn't create relay app directory.\n");
+               pr_err(MODULE_NAME ": Couldn't create relay app directory.\n");
                 return -ENOMEM;
         }
  
         chan = create_channel(subbuf_size, n_subbufs);
         if (!chan) {
                 debugfs_remove(dir);
-               printk(KERN_ERR MODULE_NAME
-                               ": relay app channel creation failed\n");
+               pr_err(MODULE_NAME ": relay app channel creation failed\n");
                 return -ENOMEM;
         }
  
-       init_kmmio();
+       reference_kmmio();
  
         proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
         if (proc_marker_file)
                 proc_marker_file->write_proc = write_marker;
  
-       printk(KERN_DEBUG MODULE_NAME ": loaded.\n");
+       pr_debug(MODULE_NAME ": loaded.\n");
         if (nommiotrace)
-               printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n");
+               pr_info(MODULE_NAME ": MMIO tracing disabled.\n");
         if (ISA_trace)
-               printk(KERN_WARNING MODULE_NAME
-                               ": Warning! low ISA range will be traced.\n");
+               pr_warning(MODULE_NAME ": Warning! low ISA range will be "
+                                                               "traced.\n");
         return 0;
  }
  
  static void __exit cleanup(void)
  {
-       printk(KERN_DEBUG MODULE_NAME ": unload...\n");
+       pr_debug(MODULE_NAME ": unload...\n");
         clear_trace_list();
-       cleanup_kmmio();
+       unreference_kmmio();
         remove_proc_entry(MARKER_FILE, NULL);
         destroy_channel();
         if (dir)
diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c

index 67ea520dde62452ad63bf8fffa44430f3c734005..efa1911e20cad5e708d14cad403617de50d643c7 100644 (file)
--- a/arch/x86/kernel/mmiotrace/pf_in.c
+++ b/arch/x86/kernel/mmiotrace/pf_in.c
@@ -19,7 +19,7 @@
   *
   */
  
-/*  $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $
+/*  Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
   *  Copyright by Intel Crop., 2002
   *  Louis Zhuang (louis.zhuang@intel.com)
   *
diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c

index 40e66b0e64805de10778b00b64912128eda9c946..5ecff578672b92dedbd4263c9b19f57dd4c17d61 100644 (file)
--- a/arch/x86/kernel/mmiotrace/testmmiotrace.c
+++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c
@@ -41,8 +41,7 @@ static void do_test(void)
  {
         void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000);
         if (!p) {
-               printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, "
-                                                       "aborting.\n");
+               pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
                 return;
         }
         do_write_test(p);
@@ -53,14 +52,14 @@ static void do_test(void)
  static int __init init(void)
  {
         if (mmio_address == 0) {
-               printk(KERN_ERR MODULE_NAME ": you have to use the module "
-                                               "argument mmio_address.\n");
-               printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
+               pr_err(MODULE_NAME ": you have to use the module argument "
+                                                       "mmio_address.\n");
+               pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
                                 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
                 return -ENXIO;
         }
  
-       printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
+       pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
                                         "in PCI address space, and writing "
                                         "rubbish in there.\n", mmio_address);
         do_test();
@@ -69,7 +68,7 @@ static int __init init(void)
  
  static void __exit cleanup(void)
  {
-       printk(KERN_DEBUG MODULE_NAME ": unloaded.\n");
+       pr_debug(MODULE_NAME ": unloaded.\n");
  }
  
  module_init(init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index e9a086a1a9ff9e20ac2bac10ab782ed436769974..8c828a68d3b6a35588140944a797ff496066796e 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
  #include <linux/string.h>
  #include <linux/types.h>
  #include <linux/ptrace.h>
+#include <linux/mmiotrace.h>
  #include <linux/mman.h>
  #include <linux/mm.h>
  #include <linux/smp.h>
@@ -49,60 +50,14 @@
  #define PF_RSVD                (1<<3)
  #define PF_INSTR       (1<<4)
  
-#ifdef CONFIG_MMIOTRACE_HOOKS
-static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */
-static DEFINE_SPINLOCK(mmiotrace_handler_lock);
-
-int mmiotrace_register_pf(pf_handler_func new_pfh)
-{
-       int ret = 0;
-       unsigned long flags;
-       spin_lock_irqsave(&mmiotrace_handler_lock, flags);
-       if (mmiotrace_pf_handler)
-               ret = -EBUSY;
-       else
-               mmiotrace_pf_handler = new_pfh;
-       spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(mmiotrace_register_pf);
-
-/**
- * mmiotrace_unregister_pf:
- * The caller must ensure @old_pfh is not in use anymore before freeing it.
- * This function does not guarantee it. The handler function pointer is
- * protected by RCU, so you can do this by e.g. calling synchronize_rcu().
- */
-int mmiotrace_unregister_pf(pf_handler_func old_pfh)
-{
-       int ret = 0;
-       unsigned long flags;
-       spin_lock_irqsave(&mmiotrace_handler_lock, flags);
-       if (mmiotrace_pf_handler != old_pfh)
-               ret = -EPERM;
-       else
-               mmiotrace_pf_handler = NULL;
-       spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf);
-#endif /* CONFIG_MMIOTRACE_HOOKS */
-
-/* returns non-zero if do_page_fault() should return */
-static inline int call_mmiotrace(struct pt_regs *regs,
-                                       unsigned long error_code,
-                                       unsigned long address)
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
  {
  #ifdef CONFIG_MMIOTRACE_HOOKS
-       int ret = 0;
-       rcu_read_lock();
-       if (mmiotrace_pf_handler)
-               ret = mmiotrace_pf_handler(regs, error_code, address);
-       rcu_read_unlock();
-       return ret;
-#else
-       return 0;
+       if (unlikely(is_kmmio_active()))
+               if (kmmio_handler(regs, addr) == 1)
+                       return -1;
  #endif
+       return 0;
  }
  
  static inline int notify_page_fault(struct pt_regs *regs)
@@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
  
         if (notify_page_fault(regs))
                 return;
-       if (call_mmiotrace(regs, error_code, address))
+       if (unlikely(kmmio_fault(regs, address)))
                 return;
  
         /*
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h

index 7063281040da809a107ef915d15536b102dda4f1..96651bb59ba18c6f8506d985f9c98a757617a338 100644 (file)
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs);
  extern unsigned long oops_begin(void);
  extern void oops_end(unsigned long, struct pt_regs *, int signr);
  
-typedef int (*pf_handler_func)(struct pt_regs *regs,
-                               unsigned long error_code,
-                               unsigned long address);
-
-extern int mmiotrace_register_pf(pf_handler_func new_pfh);
-extern int mmiotrace_unregister_pf(pf_handler_func old_pfh);
-
  #endif
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h

index 6ec288f1fe24bfdf3aa5e39ed2ada5f6d236db69..d87a6cd8b6860db333807ef6e6c7ba421576b521 100644 (file)
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -3,6 +3,44 @@
  
  #include <asm/types.h>
  
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct kmmio_probe;
+struct pt_regs;
+
+typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
+                               struct pt_regs *, unsigned long addr);
+typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
+                               unsigned long condition, struct pt_regs *);
+
+struct kmmio_probe {
+       struct list_head list;
+       unsigned long addr; /* start location of the probe point */
+       unsigned long len; /* length of the probe region */
+       kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
+       kmmio_post_handler_t post_handler; /* Called after addr is executed */
+};
+
+/* kmmio is active by some kmmio_probes? */
+static inline int is_kmmio_active(void)
+{
+       extern unsigned int kmmio_count;
+       return kmmio_count;
+}
+
+extern void reference_kmmio(void);
+extern void unreference_kmmio(void);
+extern int register_kmmio_probe(struct kmmio_probe *p);
+extern void unregister_kmmio_probe(struct kmmio_probe *p);
+
+/* Called from page fault handler. */
+extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
+
+#endif /* __KERNEL__ */
+
+
  /*
   * If you change anything here, you must bump MMIO_VERSION.
   * This is the relay data format for user space.
author	Pekka Paalanen <pq@iki.fi>
	Mon, 12 May 2008 19:20:57 +0000 (21:20 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Sat, 24 May 2008 09:22:12 +0000 (11:22 +0200)
arch/x86/kernel/init_task.c		patch \| blob \| history
arch/x86/kernel/mmiotrace/Makefile		patch \| blob \| history
arch/x86/kernel/mmiotrace/kmmio.c		patch \| blob \| history
arch/x86/kernel/mmiotrace/kmmio.h	[deleted file]	patch \| blob \| history
arch/x86/kernel/mmiotrace/mmio-mod.c		patch \| blob \| history
arch/x86/kernel/mmiotrace/pf_in.c		patch \| blob \| history
arch/x86/kernel/mmiotrace/testmmiotrace.c		patch \| blob \| history
arch/x86/mm/fault.c		patch \| blob \| history
include/asm-x86/kdebug.h		patch \| blob \| history
include/linux/mmiotrace.h		patch \| blob \| history