]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 20:43:06 +0000 (12:43 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 20:43:06 +0000 (12:43 -0800)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile: (31 commits)
  powerpc/oprofile: fix whitespaces in op_model_cell.c
  powerpc/oprofile: IBM CELL: add SPU event profiling support
  powerpc/oprofile: fix cell/pr_util.h
  powerpc/oprofile: IBM CELL: cleanup and restructuring
  oprofile: make new cpu buffer functions part of the api
  oprofile: remove #ifdef CONFIG_OPROFILE_IBS in non-ibs code
  ring_buffer: fix ring_buffer_event_length()
  oprofile: use new data sample format for ibs
  oprofile: add op_cpu_buffer_get_data()
  oprofile: add op_cpu_buffer_add_data()
  oprofile: rework implementation of cpu buffer events
  oprofile: modify op_cpu_buffer_read_entry()
  oprofile: add op_cpu_buffer_write_reserve()
  oprofile: rename variables in add_ibs_begin()
  oprofile: rename add_sample() in cpu_buffer.c
  oprofile: rename variable ibs_allowed to has_ibs in op_model_amd.c
  oprofile: making add_sample_entry() inline
  oprofile: remove backtrace code for ibs
  oprofile: remove unused ibs macro
  oprofile: remove unused components in struct oprofile_cpu_buffer
  ...

1  2 
arch/powerpc/oprofile/cell/pr_util.h
arch/powerpc/oprofile/cell/spu_profiler.c
drivers/oprofile/buffer_sync.c
kernel/trace/ring_buffer.c

index dfdbffa068182b8383590f67d789449ca4f73171,a048b0b72be366b2bae384dbb44d89a63b1d83c3..964b93974d893003a0054109acfddb10c7a4a49b
  extern struct delayed_work spu_work;
  extern int spu_prof_running;
  
+ #define TRACE_ARRAY_SIZE 1024
+ extern spinlock_t oprof_spu_smpl_arry_lck;
  struct spu_overlay_info {     /* map of sections within an SPU overlay */
        unsigned int vma;       /* SPU virtual memory address from elf */
        unsigned int size;      /* size of section from elf */
@@@ -79,7 -83,7 +83,7 @@@ struct spu_buffer 
   * the vma-to-fileoffset map.
   */
  struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
 -                                           u64 objectid);
 +                                           unsigned long objectid);
  unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
                            unsigned int vma, const struct spu *aSpu,
                            int *grd_val);
@@@ -89,10 -93,11 +93,11 @@@ void vma_map_free(struct vma_to_fileoff
   * Entry point for SPU profiling.
   * cycles_reset is the SPU_CYCLES count value specified by the user.
   */
- int start_spu_profiling(unsigned int cycles_reset);
- void stop_spu_profiling(void);
+ int start_spu_profiling_cycles(unsigned int cycles_reset);
+ void start_spu_profiling_events(void);
  
+ void stop_spu_profiling_cycles(void);
+ void stop_spu_profiling_events(void);
  
  /* add the necessary profiling hooks */
  int spu_sync_start(void);
index 83faa958b9d4d3d353596b55c6a51eee5452ea52,de170b7ae71b7d25bdf4cd6d7088f745c0fb4a66..9305ddaac5125d941772dc45552264215d1adf8d
  #include <asm/cell-pmu.h>
  #include "pr_util.h"
  
- #define TRACE_ARRAY_SIZE 1024
  #define SCALE_SHIFT 14
  
  static u32 *samples;
  
+ /* spu_prof_running is a flag used to indicate if spu profiling is enabled
+  * or not.  It is set by the routines start_spu_profiling_cycles() and
+  * start_spu_profiling_events().  The flag is cleared by the routines
+  * stop_spu_profiling_cycles() and stop_spu_profiling_events().  These
+  * routines are called via global_start() and global_stop() which are called in
+  * op_powerpc_start() and op_powerpc_stop().  These routines are called once
+  * per system as a result of the user starting/stopping oprofile.  Hence, only
+  * one CPU per user at a time will be changing  the value of spu_prof_running.
+  * In general, OProfile does not protect against multiple users trying to run
+  * OProfile at a time.
+  */
  int spu_prof_running;
  static unsigned int profiling_interval;
  
@@@ -31,8 -41,8 +41,8 @@@
  
  #define SPU_PC_MASK        0xFFFF
  
static DEFINE_SPINLOCK(sample_array_lock);
- unsigned long sample_array_lock_flags;
DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
+ unsigned long oprof_spu_smpl_arry_lck_flags;
  
  void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
  {
@@@ -49,7 -59,7 +59,7 @@@
         * of precision.  This is close enough for the purpose at hand.
         *
         * The value of the timeout should be small enough that the hw
 -       * trace buffer will not get more then about 1/3 full for the
 +       * trace buffer will not get more than about 1/3 full for the
         * maximum user specified (the LFSR value) hw sampling frequency.
         * This is to ensure the trace buffer will never fill even if the
         * kernel thread scheduling varies under a heavy system load.
@@@ -145,13 -155,13 +155,13 @@@ static enum hrtimer_restart profile_spu
                 * sample array must be loaded and then processed for a given
                 * cpu.  The sample array is not per cpu.
                 */
-               spin_lock_irqsave(&sample_array_lock,
-                                 sample_array_lock_flags);
+               spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+                                 oprof_spu_smpl_arry_lck_flags);
                num_samples = cell_spu_pc_collection(cpu);
  
                if (num_samples == 0) {
-                       spin_unlock_irqrestore(&sample_array_lock,
-                                              sample_array_lock_flags);
+                       spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+                                              oprof_spu_smpl_arry_lck_flags);
                        continue;
                }
  
                                        num_samples);
                }
  
-               spin_unlock_irqrestore(&sample_array_lock,
-                                      sample_array_lock_flags);
+               spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+                                      oprof_spu_smpl_arry_lck_flags);
  
        }
        smp_wmb();      /* insure spu event buffer updates are written */
  
  static struct hrtimer timer;
  /*
-  * Entry point for SPU profiling.
+  * Entry point for SPU cycle profiling.
   * NOTE:  SPU profiling is done system-wide, not per-CPU.
   *
   * cycles_reset is the count value specified by the user when
   * setting up OProfile to count SPU_CYCLES.
   */
- int start_spu_profiling(unsigned int cycles_reset)
+ int start_spu_profiling_cycles(unsigned int cycles_reset)
  {
        ktime_t kt;
  
        return 0;
  }
  
- void stop_spu_profiling(void)
+ /*
+  * Entry point for SPU event profiling.
+  * NOTE:  SPU profiling is done system-wide, not per-CPU.
+  *
+  * cycles_reset is the count value specified by the user when
+  * setting up OProfile to count SPU_CYCLES.
+  */
+ void start_spu_profiling_events(void)
+ {
+       spu_prof_running = 1;
+       schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+       return;
+ }
+ void stop_spu_profiling_cycles(void)
  {
        spu_prof_running = 0;
        hrtimer_cancel(&timer);
        kfree(samples);
-       pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+       pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
+ }
+ void stop_spu_profiling_events(void)
+ {
+       spu_prof_running = 0;
  }
index 65e8294a9e29db35cfcff8a493682804e9dcffe3,ac014cb27915006b07426f509191bedbda6469c8..9da5a4b811337562dbcf708dcad927756264381d
@@@ -1,11 -1,12 +1,12 @@@
  /**
   * @file buffer_sync.c
   *
-  * @remark Copyright 2002 OProfile authors
+  * @remark Copyright 2002-2009 OProfile authors
   * @remark Read the file COPYING
   *
   * @author John Levon <levon@movementarian.org>
   * @author Barry Kasindorf
+  * @author Robert Richter <robert.richter@amd.com>
   *
   * This is the core of the buffer management. Each
   * CPU buffer is processed and entered into the
@@@ -200,7 -201,7 +201,7 @@@ static inline unsigned long fast_get_dc
  {
        unsigned long cookie;
  
 -      if (path->dentry->d_cookie)
 +      if (path->dentry->d_flags & DCACHE_COOKIE)
                return (unsigned long)path->dentry;
        get_dcookie(path, &cookie);
        return cookie;
@@@ -315,88 -316,73 +316,73 @@@ static void add_trace_begin(void
        add_event_entry(TRACE_BEGIN_CODE);
  }
  
- #ifdef CONFIG_OPROFILE_IBS
- #define IBS_FETCH_CODE_SIZE   2
- #define IBS_OP_CODE_SIZE      5
- /*
-  * Add IBS fetch and op entries to event buffer
-  */
- static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
+ static void add_data(struct op_entry *entry, struct mm_struct *mm)
  {
-       unsigned long rip;
-       int i, count;
-       unsigned long ibs_cookie = 0;
+       unsigned long code, pc, val;
+       unsigned long cookie;
        off_t offset;
-       struct op_sample *sample;
-       sample = cpu_buffer_read_entry(cpu);
-       if (!sample)
-               goto Error;
-       rip = sample->eip;
  
- #ifdef __LP64__
-       rip += sample->event << 32;
- #endif
+       if (!op_cpu_buffer_get_data(entry, &code))
+               return;
+       if (!op_cpu_buffer_get_data(entry, &pc))
+               return;
+       if (!op_cpu_buffer_get_size(entry))
+               return;
  
        if (mm) {
-               ibs_cookie = lookup_dcookie(mm, rip, &offset);
+               cookie = lookup_dcookie(mm, pc, &offset);
  
-               if (ibs_cookie == NO_COOKIE)
-                       offset = rip;
-               if (ibs_cookie == INVALID_COOKIE) {
+               if (cookie == NO_COOKIE)
+                       offset = pc;
+               if (cookie == INVALID_COOKIE) {
                        atomic_inc(&oprofile_stats.sample_lost_no_mapping);
-                       offset = rip;
+                       offset = pc;
                }
-               if (ibs_cookie != last_cookie) {
-                       add_cookie_switch(ibs_cookie);
-                       last_cookie = ibs_cookie;
+               if (cookie != last_cookie) {
+                       add_cookie_switch(cookie);
+                       last_cookie = cookie;
                }
        } else
-               offset = rip;
+               offset = pc;
  
        add_event_entry(ESCAPE_CODE);
        add_event_entry(code);
        add_event_entry(offset);        /* Offset from Dcookie */
  
-       /* we send the Dcookie offset, but send the raw Linear Add also*/
-       add_event_entry(sample->eip);
-       add_event_entry(sample->event);
-       if (code == IBS_FETCH_CODE)
-               count = IBS_FETCH_CODE_SIZE;    /*IBS FETCH is 2 int64s*/
-       else
-               count = IBS_OP_CODE_SIZE;       /*IBS OP is 5 int64s*/
-       for (i = 0; i < count; i++) {
-               sample = cpu_buffer_read_entry(cpu);
-               if (!sample)
-                       goto Error;
-               add_event_entry(sample->eip);
-               add_event_entry(sample->event);
-       }
-       return;
- Error:
-       return;
+       while (op_cpu_buffer_get_data(entry, &val))
+               add_event_entry(val);
  }
  
- #endif
- static void add_sample_entry(unsigned long offset, unsigned long event)
+ static inline void add_sample_entry(unsigned long offset, unsigned long event)
  {
        add_event_entry(offset);
        add_event_entry(event);
  }
  
  
- static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
+ /*
+  * Add a sample to the global event buffer. If possible the
+  * sample is converted into a persistent dentry/offset pair
+  * for later lookup from userspace. Return 0 on failure.
+  */
+ static int
+ add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
  {
        unsigned long cookie;
        off_t offset;
  
+       if (in_kernel) {
+               add_sample_entry(s->eip, s->event);
+               return 1;
+       }
+       /* add userspace sample */
+       if (!mm) {
+               atomic_inc(&oprofile_stats.sample_lost_no_mm);
+               return 0;
+       }
        cookie = lookup_dcookie(mm, s->eip, &offset);
  
        if (cookie == INVALID_COOKIE) {
  }
  
  
- /* Add a sample to the global event buffer. If possible the
-  * sample is converted into a persistent dentry/offset pair
-  * for later lookup from userspace.
-  */
- static int
- add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
- {
-       if (in_kernel) {
-               add_sample_entry(s->eip, s->event);
-               return 1;
-       } else if (mm) {
-               return add_us_sample(mm, s);
-       } else {
-               atomic_inc(&oprofile_stats.sample_lost_no_mm);
-       }
-       return 0;
- }
  static void release_mm(struct mm_struct *mm)
  {
        if (!mm)
@@@ -526,66 -493,69 +493,69 @@@ void sync_buffer(int cpu
  {
        struct mm_struct *mm = NULL;
        struct mm_struct *oldmm;
+       unsigned long val;
        struct task_struct *new;
        unsigned long cookie = 0;
        int in_kernel = 1;
        sync_buffer_state state = sb_buffer_start;
        unsigned int i;
        unsigned long available;
+       unsigned long flags;
+       struct op_entry entry;
+       struct op_sample *sample;
  
        mutex_lock(&buffer_mutex);
  
        add_cpu_switch(cpu);
  
-       cpu_buffer_reset(cpu);
-       available = cpu_buffer_entries(cpu);
+       op_cpu_buffer_reset(cpu);
+       available = op_cpu_buffer_entries(cpu);
  
        for (i = 0; i < available; ++i) {
-               struct op_sample *s = cpu_buffer_read_entry(cpu);
-               if (!s)
+               sample = op_cpu_buffer_read_entry(&entry, cpu);
+               if (!sample)
                        break;
  
-               if (is_code(s->eip)) {
-                       switch (s->event) {
-                       case 0:
-                       case CPU_IS_KERNEL:
+               if (is_code(sample->eip)) {
+                       flags = sample->event;
+                       if (flags & TRACE_BEGIN) {
+                               state = sb_bt_start;
+                               add_trace_begin();
+                       }
+                       if (flags & KERNEL_CTX_SWITCH) {
                                /* kernel/userspace switch */
-                               in_kernel = s->event;
+                               in_kernel = flags & IS_KERNEL;
                                if (state == sb_buffer_start)
                                        state = sb_sample_start;
-                               add_kernel_ctx_switch(s->event);
-                               break;
-                       case CPU_TRACE_BEGIN:
-                               state = sb_bt_start;
-                               add_trace_begin();
-                               break;
- #ifdef CONFIG_OPROFILE_IBS
-                       case IBS_FETCH_BEGIN:
-                               state = sb_bt_start;
-                               add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
-                               break;
-                       case IBS_OP_BEGIN:
-                               state = sb_bt_start;
-                               add_ibs_begin(cpu, IBS_OP_CODE, mm);
-                               break;
- #endif
-                       default:
+                               add_kernel_ctx_switch(flags & IS_KERNEL);
+                       }
+                       if (flags & USER_CTX_SWITCH
+                           && op_cpu_buffer_get_data(&entry, &val)) {
                                /* userspace context switch */
+                               new = (struct task_struct *)val;
                                oldmm = mm;
-                               new = (struct task_struct *)s->event;
                                release_mm(oldmm);
                                mm = take_tasks_mm(new);
                                if (mm != oldmm)
                                        cookie = get_exec_dcookie(mm);
                                add_user_ctx_switch(new, cookie);
-                               break;
-                       }
-               } else if (state >= sb_bt_start &&
-                          !add_sample(mm, s, in_kernel)) {
-                       if (state == sb_bt_start) {
-                               state = sb_bt_ignore;
-                               atomic_inc(&oprofile_stats.bt_lost_no_mapping);
                        }
+                       if (op_cpu_buffer_get_size(&entry))
+                               add_data(&entry, mm);
+                       continue;
+               }
+               if (state < sb_bt_start)
+                       /* ignore sample */
+                       continue;
+               if (add_sample(mm, sample, in_kernel))
+                       continue;
+               /* ignore backtraces if failed to add a sample */
+               if (state == sb_bt_start) {
+                       state = sb_bt_ignore;
+                       atomic_inc(&oprofile_stats.bt_lost_no_mapping);
                }
        }
        release_mm(mm);
index a9d9760dc7b62ae7d17894f815067ced748db2cd,d42b882dfe4ba87131beb591aba51179fe4bb209..8b0daf0662ef45e7ad6ad9df71e52dfd3c3459ba
  
  #include "trace.h"
  
 -/* Global flag to disable all recording to ring buffers */
 -static int ring_buffers_off __read_mostly;
 +/*
 + * A fast way to enable or disable all ring buffers is to
 + * call tracing_on or tracing_off. Turning off the ring buffers
 + * prevents all ring buffers from being recorded to.
 + * Turning this switch on, makes it OK to write to the
 + * ring buffer, if the ring buffer is enabled itself.
 + *
 + * There's three layers that must be on in order to write
 + * to the ring buffer.
 + *
 + * 1) This global flag must be set.
 + * 2) The ring buffer must be enabled for recording.
 + * 3) The per cpu buffer must be enabled for recording.
 + *
 + * In case of an anomaly, this global flag has a bit set that
 + * will permantly disable all ring buffers.
 + */
 +
 +/*
 + * Global flag to disable all recording to ring buffers
 + *  This has two bits: ON, DISABLED
 + *
 + *  ON   DISABLED
 + * ---- ----------
 + *   0      0        : ring buffers are off
 + *   1      0        : ring buffers are on
 + *   X      1        : ring buffers are permanently disabled
 + */
 +
 +enum {
 +      RB_BUFFERS_ON_BIT       = 0,
 +      RB_BUFFERS_DISABLED_BIT = 1,
 +};
 +
 +enum {
 +      RB_BUFFERS_ON           = 1 << RB_BUFFERS_ON_BIT,
 +      RB_BUFFERS_DISABLED     = 1 << RB_BUFFERS_DISABLED_BIT,
 +};
 +
 +static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
  
  /**
   * tracing_on - enable all tracing buffers
@@@ -67,7 -29,7 +67,7 @@@
   */
  void tracing_on(void)
  {
 -      ring_buffers_off = 0;
 +      set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
  }
  EXPORT_SYMBOL_GPL(tracing_on);
  
   */
  void tracing_off(void)
  {
 -      ring_buffers_off = 1;
 +      clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
  }
  EXPORT_SYMBOL_GPL(tracing_off);
  
 +/**
 + * tracing_off_permanent - permanently disable ring buffers
 + *
 + * This function, once called, will disable all ring buffers
 + * permanenty.
 + */
 +void tracing_off_permanent(void)
 +{
 +      set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
 +}
 +
 +#include "trace.h"
 +
  /* Up this if you want to test the TIME_EXTENTS and normalization */
  #define DEBUG_SHIFT 0
  
@@@ -109,7 -58,7 +109,7 @@@ u64 ring_buffer_time_stamp(int cpu
        preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
        time = sched_clock() << DEBUG_SHIFT;
 -      preempt_enable_notrace();
 +      preempt_enable_no_resched_notrace();
  
        return time;
  }
@@@ -168,7 -117,13 +168,13 @@@ rb_event_length(struct ring_buffer_even
   */
  unsigned ring_buffer_event_length(struct ring_buffer_event *event)
  {
-       return rb_event_length(event);
+       unsigned length = rb_event_length(event);
+       if (event->type != RINGBUF_TYPE_DATA)
+               return length;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+                 length -= sizeof(event->array[0]);
+       return length;
  }
  EXPORT_SYMBOL_GPL(ring_buffer_event_length);
  
@@@ -195,30 -150,26 +201,30 @@@ void *ring_buffer_event_data(struct rin
  EXPORT_SYMBOL_GPL(ring_buffer_event_data);
  
  #define for_each_buffer_cpu(buffer, cpu)              \
 -      for_each_cpu_mask(cpu, buffer->cpumask)
 +      for_each_cpu(cpu, buffer->cpumask)
  
  #define TS_SHIFT      27
  #define TS_MASK               ((1ULL << TS_SHIFT) - 1)
  #define TS_DELTA_TEST (~TS_MASK)
  
 -/*
 - * This hack stolen from mm/slob.c.
 - * We can store per page timing information in the page frame of the page.
 - * Thanks to Peter Zijlstra for suggesting this idea.
 - */
 -struct buffer_page {
 +struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
 -      local_t          write;         /* index for next write */
        local_t          commit;        /* write commited index */
 +      unsigned char    data[];        /* data of buffer page */
 +};
 +
 +struct buffer_page {
 +      local_t          write;         /* index for next write */
        unsigned         read;          /* index for next read */
        struct list_head list;          /* list of free pages */
 -      void *page;                     /* Actual data page */
 +      struct buffer_data_page *page;  /* Actual data page */
  };
  
 +static void rb_init_page(struct buffer_data_page *bpage)
 +{
 +      local_set(&bpage->commit, 0);
 +}
 +
  /*
   * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
   * this issue out.
@@@ -240,7 -191,7 +246,7 @@@ static inline int test_time_stamp(u64 d
        return 0;
  }
  
 -#define BUF_PAGE_SIZE PAGE_SIZE
 +#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
  
  /*
   * head_page == tail_page && head == tail then buffer is empty.
  struct ring_buffer_per_cpu {
        int                             cpu;
        struct ring_buffer              *buffer;
 -      spinlock_t                      lock;
 +      spinlock_t                      reader_lock; /* serialize readers */
 +      raw_spinlock_t                  lock;
        struct lock_class_key           lock_key;
        struct list_head                pages;
        struct buffer_page              *head_page;     /* read from head */
  };
  
  struct ring_buffer {
 -      unsigned long                   size;
        unsigned                        pages;
        unsigned                        flags;
        int                             cpus;
 -      cpumask_t                       cpumask;
 +      cpumask_var_t                   cpumask;
        atomic_t                        record_disabled;
  
        struct mutex                    mutex;
@@@ -282,16 -233,32 +288,16 @@@ struct ring_buffer_iter 
        u64                             read_stamp;
  };
  
 +/* buffer may be either ring_buffer or ring_buffer_per_cpu */
  #define RB_WARN_ON(buffer, cond)                              \
 -      do {                                                    \
 -              if (unlikely(cond)) {                           \
 -                      atomic_inc(&buffer->record_disabled);   \
 -                      WARN_ON(1);                             \
 -              }                                               \
 -      } while (0)
 -
 -#define RB_WARN_ON_RET(buffer, cond)                          \
 -      do {                                                    \
 -              if (unlikely(cond)) {                           \
 -                      atomic_inc(&buffer->record_disabled);   \
 -                      WARN_ON(1);                             \
 -                      return -1;                              \
 -              }                                               \
 -      } while (0)
 -
 -#define RB_WARN_ON_ONCE(buffer, cond)                         \
 -      do {                                                    \
 -              static int once;                                \
 -              if (unlikely(cond) && !once) {                  \
 -                      once++;                                 \
 +      ({                                                      \
 +              int _____ret = unlikely(cond);                  \
 +              if (_____ret) {                                 \
                        atomic_inc(&buffer->record_disabled);   \
                        WARN_ON(1);                             \
                }                                               \
 -      } while (0)
 +              _____ret;                                       \
 +      })
  
  /**
   * check_pages - integrity check of buffer pages
  static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
  
 -      RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
 -      RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
 +      if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
 +              return -1;
 +      if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
 +              return -1;
  
 -      list_for_each_entry_safe(page, tmp, head, list) {
 -              RB_WARN_ON_RET(cpu_buffer,
 -                             page->list.next->prev != &page->list);
 -              RB_WARN_ON_RET(cpu_buffer,
 -                             page->list.prev->next != &page->list);
 +      list_for_each_entry_safe(bpage, tmp, head, list) {
 +              if (RB_WARN_ON(cpu_buffer,
 +                             bpage->list.next->prev != &bpage->list))
 +                      return -1;
 +              if (RB_WARN_ON(cpu_buffer,
 +                             bpage->list.prev->next != &bpage->list))
 +                      return -1;
        }
  
        return 0;
@@@ -326,23 -289,22 +332,23 @@@ static int rb_allocate_pages(struct rin
                             unsigned nr_pages)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
        unsigned long addr;
        LIST_HEAD(pages);
        unsigned i;
  
        for (i = 0; i < nr_pages; i++) {
 -              page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
 +              bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                                    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 -              if (!page)
 +              if (!bpage)
                        goto free_pages;
 -              list_add(&page->list, &pages);
 +              list_add(&bpage->list, &pages);
  
                addr = __get_free_page(GFP_KERNEL);
                if (!addr)
                        goto free_pages;
 -              page->page = (void *)addr;
 +              bpage->page = (void *)addr;
 +              rb_init_page(bpage->page);
        }
  
        list_splice(&pages, head);
        return 0;
  
   free_pages:
 -      list_for_each_entry_safe(page, tmp, &pages, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, &pages, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        return -ENOMEM;
  }
@@@ -363,7 -325,7 +369,7 @@@ static struct ring_buffer_per_cpu 
  rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
  {
        struct ring_buffer_per_cpu *cpu_buffer;
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        unsigned long addr;
        int ret;
  
  
        cpu_buffer->cpu = cpu;
        cpu_buffer->buffer = buffer;
 -      spin_lock_init(&cpu_buffer->lock);
 +      spin_lock_init(&cpu_buffer->reader_lock);
 +      cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        INIT_LIST_HEAD(&cpu_buffer->pages);
  
 -      page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
 +      bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                            GFP_KERNEL, cpu_to_node(cpu));
 -      if (!page)
 +      if (!bpage)
                goto fail_free_buffer;
  
 -      cpu_buffer->reader_page = page;
 +      cpu_buffer->reader_page = bpage;
        addr = __get_free_page(GFP_KERNEL);
        if (!addr)
                goto fail_free_reader;
 -      page->page = (void *)addr;
 +      bpage->page = (void *)addr;
 +      rb_init_page(bpage->page);
  
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
  
  static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
  
        list_del_init(&cpu_buffer->reader_page->list);
        free_buffer_page(cpu_buffer->reader_page);
  
 -      list_for_each_entry_safe(page, tmp, head, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, head, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        kfree(cpu_buffer);
  }
@@@ -458,9 -418,6 +464,9 @@@ struct ring_buffer *ring_buffer_alloc(u
        if (!buffer)
                return NULL;
  
 +      if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
 +              goto fail_free_buffer;
 +
        buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        buffer->flags = flags;
  
        if (buffer->pages == 1)
                buffer->pages++;
  
 -      buffer->cpumask = cpu_possible_map;
 +      cpumask_copy(buffer->cpumask, cpu_possible_mask);
        buffer->cpus = nr_cpu_ids;
  
        bsize = sizeof(void *) * nr_cpu_ids;
        buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
                                  GFP_KERNEL);
        if (!buffer->buffers)
 -              goto fail_free_buffer;
 +              goto fail_free_cpumask;
  
        for_each_buffer_cpu(buffer, cpu) {
                buffer->buffers[cpu] =
        }
        kfree(buffer->buffers);
  
 + fail_free_cpumask:
 +      free_cpumask_var(buffer->cpumask);
 +
   fail_free_buffer:
        kfree(buffer);
        return NULL;
@@@ -516,8 -470,6 +522,8 @@@ ring_buffer_free(struct ring_buffer *bu
        for_each_buffer_cpu(buffer, cpu)
                rb_free_cpu_buffer(buffer->buffers[cpu]);
  
 +      free_cpumask_var(buffer->cpumask);
 +
        kfree(buffer);
  }
  EXPORT_SYMBOL_GPL(ring_buffer_free);
@@@ -527,7 -479,7 +533,7 @@@ static void rb_reset_cpu(struct ring_bu
  static void
  rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
  {
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
  
        synchronize_sched();
  
        for (i = 0; i < nr_pages; i++) {
 -              BUG_ON(list_empty(&cpu_buffer->pages));
 +              if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
 +                      return;
                p = cpu_buffer->pages.next;
 -              page = list_entry(p, struct buffer_page, list);
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +              bpage = list_entry(p, struct buffer_page, list);
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
 -      BUG_ON(list_empty(&cpu_buffer->pages));
 +      if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
 +              return;
  
        rb_reset_cpu(cpu_buffer);
  
@@@ -557,7 -507,7 +563,7 @@@ static voi
  rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
                struct list_head *pages, unsigned nr_pages)
  {
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
  
        synchronize_sched();
  
        for (i = 0; i < nr_pages; i++) {
 -              BUG_ON(list_empty(pages));
 +              if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
 +                      return;
                p = pages->next;
 -              page = list_entry(p, struct buffer_page, list);
 -              list_del_init(&page->list);
 -              list_add_tail(&page->list, &cpu_buffer->pages);
 +              bpage = list_entry(p, struct buffer_page, list);
 +              list_del_init(&bpage->list);
 +              list_add_tail(&bpage->list, &cpu_buffer->pages);
        }
        rb_reset_cpu(cpu_buffer);
  
@@@ -597,7 -546,7 +603,7 @@@ int ring_buffer_resize(struct ring_buff
  {
        struct ring_buffer_per_cpu *cpu_buffer;
        unsigned nr_pages, rm_pages, new_pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
        unsigned long buffer_size;
        unsigned long addr;
        LIST_HEAD(pages);
        if (size < buffer_size) {
  
                /* easy case, just free pages */
 -              BUG_ON(nr_pages >= buffer->pages);
 +              if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
 +                      mutex_unlock(&buffer->mutex);
 +                      return -1;
 +              }
  
                rm_pages = buffer->pages - nr_pages;
  
         * add these pages to the cpu_buffers. Otherwise we just free
         * them all and return -ENOMEM;
         */
 -      BUG_ON(nr_pages <= buffer->pages);
 +      if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
 +              mutex_unlock(&buffer->mutex);
 +              return -1;
 +      }
 +
        new_pages = nr_pages - buffer->pages;
  
        for_each_buffer_cpu(buffer, cpu) {
                for (i = 0; i < new_pages; i++) {
 -                      page = kzalloc_node(ALIGN(sizeof(*page),
 +                      bpage = kzalloc_node(ALIGN(sizeof(*bpage),
                                                  cache_line_size()),
                                            GFP_KERNEL, cpu_to_node(cpu));
 -                      if (!page)
 +                      if (!bpage)
                                goto free_pages;
 -                      list_add(&page->list, &pages);
 +                      list_add(&bpage->list, &pages);
                        addr = __get_free_page(GFP_KERNEL);
                        if (!addr)
                                goto free_pages;
 -                      page->page = (void *)addr;
 +                      bpage->page = (void *)addr;
 +                      rb_init_page(bpage->page);
                }
        }
  
                rb_insert_pages(cpu_buffer, &pages, new_pages);
        }
  
 -      BUG_ON(!list_empty(&pages));
 +      if (RB_WARN_ON(buffer, !list_empty(&pages))) {
 +              mutex_unlock(&buffer->mutex);
 +              return -1;
 +      }
  
   out:
        buffer->pages = nr_pages;
        return size;
  
   free_pages:
 -      list_for_each_entry_safe(page, tmp, &pages, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, &pages, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        mutex_unlock(&buffer->mutex);
        return -ENOMEM;
@@@ -703,15 -641,9 +709,15 @@@ static inline int rb_null_event(struct 
        return event->type == RINGBUF_TYPE_PADDING;
  }
  
 -static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
 +static inline void *
 +__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
 +{
 +      return bpage->data + index;
 +}
 +
 +static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
  {
 -      return page->page + index;
 +      return bpage->page->data + index;
  }
  
  static inline struct ring_buffer_event *
@@@ -741,7 -673,7 +747,7 @@@ static inline unsigned rb_page_write(st
  
  static inline unsigned rb_page_commit(struct buffer_page *bpage)
  {
 -      return local_read(&bpage->commit);
 +      return local_read(&bpage->page->commit);
  }
  
  /* Size is determined by what has been commited */
@@@ -776,8 -708,7 +782,8 @@@ static void rb_update_overflow(struct r
             head += rb_event_length(event)) {
  
                event = __rb_page_index(cpu_buffer->head_page, head);
 -              BUG_ON(rb_null_event(event));
 +              if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 +                      return;
                /* Only count data entries */
                if (event->type != RINGBUF_TYPE_DATA)
                        continue;
  }
  
  static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 -                             struct buffer_page **page)
 +                             struct buffer_page **bpage)
  {
 -      struct list_head *p = (*page)->list.next;
 +      struct list_head *p = (*bpage)->list.next;
  
        if (p == &cpu_buffer->pages)
                p = p->next;
  
 -      *page = list_entry(p, struct buffer_page, list);
 +      *bpage = list_entry(p, struct buffer_page, list);
  }
  
  static inline unsigned
@@@ -830,18 -761,16 +836,18 @@@ rb_set_commit_event(struct ring_buffer_
        addr &= PAGE_MASK;
  
        while (cpu_buffer->commit_page->page != (void *)addr) {
 -              RB_WARN_ON(cpu_buffer,
 -                         cpu_buffer->commit_page == cpu_buffer->tail_page);
 -              cpu_buffer->commit_page->commit =
 +              if (RB_WARN_ON(cpu_buffer,
 +                        cpu_buffer->commit_page == cpu_buffer->tail_page))
 +                      return;
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
 -              cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
 +              cpu_buffer->write_stamp =
 +                      cpu_buffer->commit_page->page->time_stamp;
        }
  
        /* Now set the commit to the event's index */
 -      local_set(&cpu_buffer->commit_page->commit, index);
 +      local_set(&cpu_buffer->commit_page->page->commit, index);
  }
  
  static inline void
@@@ -855,38 -784,25 +861,38 @@@ rb_set_commit_to_write(struct ring_buff
         * back to us). This allows us to do a simple loop to
         * assign the commit to the tail.
         */
 + again:
        while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
 -              cpu_buffer->commit_page->commit =
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
 -              cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
 +              cpu_buffer->write_stamp =
 +                      cpu_buffer->commit_page->page->time_stamp;
                /* add barrier to keep gcc from optimizing too much */
                barrier();
        }
        while (rb_commit_index(cpu_buffer) !=
               rb_page_write(cpu_buffer->commit_page)) {
 -              cpu_buffer->commit_page->commit =
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                barrier();
        }
 +
 +      /* again, keep gcc from optimizing */
 +      barrier();
 +
 +      /*
 +       * If an interrupt came in just after the first while loop
 +       * and pushed the tail page forward, we will be left with
 +       * a dangling commit that will never go forward.
 +       */
 +      if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
 +              goto again;
  }
  
  static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
  {
 -      cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
 +      cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
        cpu_buffer->reader_page->read = 0;
  }
  
@@@ -905,7 -821,7 +911,7 @@@ static inline void rb_inc_iter(struct r
        else
                rb_inc_page(cpu_buffer, &iter->head_page);
  
 -      iter->read_stamp = iter->head_page->time_stamp;
 +      iter->read_stamp = iter->head_page->page->time_stamp;
        iter->head = 0;
  }
  
@@@ -979,15 -895,12 +985,15 @@@ static struct ring_buffer_event 
  __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                  unsigned type, unsigned long length, u64 *ts)
  {
 -      struct buffer_page *tail_page, *head_page, *reader_page;
 +      struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
        unsigned long tail, write;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct ring_buffer_event *event;
        unsigned long flags;
  
 +      commit_page = cpu_buffer->commit_page;
 +      /* we just need to protect against interrupts */
 +      barrier();
        tail_page = cpu_buffer->tail_page;
        write = local_add_return(length, &tail_page->write);
        tail = write - length;
        if (write > BUF_PAGE_SIZE) {
                struct buffer_page *next_page = tail_page;
  
 -              spin_lock_irqsave(&cpu_buffer->lock, flags);
 +              local_irq_save(flags);
 +              __raw_spin_lock(&cpu_buffer->lock);
  
                rb_inc_page(cpu_buffer, &next_page);
  
                reader_page = cpu_buffer->reader_page;
  
                /* we grabbed the lock before incrementing */
 -              RB_WARN_ON(cpu_buffer, next_page == reader_page);
 +              if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
 +                      goto out_unlock;
  
                /*
                 * If for some reason, we had an interrupt storm that made
                 * it all the way around the buffer, bail, and warn
                 * about it.
                 */
 -              if (unlikely(next_page == cpu_buffer->commit_page)) {
 +              if (unlikely(next_page == commit_page)) {
                        WARN_ON_ONCE(1);
                        goto out_unlock;
                }
                 */
                if (tail_page == cpu_buffer->tail_page) {
                        local_set(&next_page->write, 0);
 -                      local_set(&next_page->commit, 0);
 +                      local_set(&next_page->page->commit, 0);
                        cpu_buffer->tail_page = next_page;
  
                        /* reread the time stamp */
                        *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 -                      cpu_buffer->tail_page->time_stamp = *ts;
 +                      cpu_buffer->tail_page->page->time_stamp = *ts;
                }
  
                /*
                        rb_set_commit_to_write(cpu_buffer);
                }
  
 -              spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +              __raw_spin_unlock(&cpu_buffer->lock);
 +              local_irq_restore(flags);
  
                /* fail and let the caller try again */
                return ERR_PTR(-EAGAIN);
  
        /* We reserved something on the buffer */
  
 -      BUG_ON(write > BUF_PAGE_SIZE);
 +      if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
 +              return NULL;
  
        event = __rb_page_index(tail_page, tail);
        rb_update_event(event, type, length);
         * this page's time stamp.
         */
        if (!tail && rb_is_commit(cpu_buffer, event))
 -              cpu_buffer->commit_page->time_stamp = *ts;
 +              cpu_buffer->commit_page->page->time_stamp = *ts;
  
        return event;
  
   out_unlock:
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      local_irq_restore(flags);
        return NULL;
  }
  
@@@ -1145,7 -1053,7 +1151,7 @@@ rb_add_time_stamp(struct ring_buffer_pe
                        event->time_delta = *delta & TS_MASK;
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
 -                      cpu_buffer->commit_page->time_stamp = *ts;
 +                      cpu_buffer->commit_page->page->time_stamp = *ts;
                        event->time_delta = 0;
                        event->array[0] = 0;
                }
@@@ -1183,8 -1091,10 +1189,8 @@@ rb_reserve_next_event(struct ring_buffe
         * storm or we have something buggy.
         * Bail!
         */
 -      if (unlikely(++nr_loops > 1000)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
                return NULL;
 -      }
  
        ts = ring_buffer_time_stamp(cpu_buffer->cpu);
  
@@@ -1280,18 -1190,19 +1286,18 @@@ ring_buffer_lock_reserve(struct ring_bu
        struct ring_buffer_event *event;
        int cpu, resched;
  
 -      if (ring_buffers_off)
 +      if (ring_buffer_flags != RB_BUFFERS_ON)
                return NULL;
  
        if (atomic_read(&buffer->record_disabled))
                return NULL;
  
        /* If we are tracing schedule, we don't want to recurse */
 -      resched = need_resched();
 -      preempt_disable_notrace();
 +      resched = ftrace_preempt_disable();
  
        cpu = raw_smp_processor_id();
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                goto out;
  
        cpu_buffer = buffer->buffers[cpu];
        return event;
  
   out:
 -      if (resched)
 -              preempt_enable_no_resched_notrace();
 -      else
 -              preempt_enable_notrace();
 +      ftrace_preempt_enable(resched);
        return NULL;
  }
  EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@@ -1361,9 -1275,12 +1367,9 @@@ int ring_buffer_unlock_commit(struct ri
        /*
         * Only the last preempt count needs to restore preemption.
         */
 -      if (preempt_count() == 1) {
 -              if (per_cpu(rb_need_resched, cpu))
 -                      preempt_enable_no_resched_notrace();
 -              else
 -                      preempt_enable_notrace();
 -      } else
 +      if (preempt_count() == 1)
 +              ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
 +      else
                preempt_enable_no_resched_notrace();
  
        return 0;
@@@ -1394,17 -1311,18 +1400,17 @@@ int ring_buffer_write(struct ring_buffe
        int ret = -EBUSY;
        int cpu, resched;
  
 -      if (ring_buffers_off)
 +      if (ring_buffer_flags != RB_BUFFERS_ON)
                return -EBUSY;
  
        if (atomic_read(&buffer->record_disabled))
                return -EBUSY;
  
 -      resched = need_resched();
 -      preempt_disable_notrace();
 +      resched = ftrace_preempt_disable();
  
        cpu = raw_smp_processor_id();
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                goto out;
  
        cpu_buffer = buffer->buffers[cpu];
  
        ret = 0;
   out:
 -      if (resched)
 -              preempt_enable_no_resched_notrace();
 -      else
 -              preempt_enable_notrace();
 +      ftrace_preempt_enable(resched);
  
        return ret;
  }
@@@ -1486,7 -1407,7 +1492,7 @@@ void ring_buffer_record_disable_cpu(str
  {
        struct ring_buffer_per_cpu *cpu_buffer;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
  
        cpu_buffer = buffer->buffers[cpu];
@@@ -1506,7 -1427,7 +1512,7 @@@ void ring_buffer_record_enable_cpu(stru
  {
        struct ring_buffer_per_cpu *cpu_buffer;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
  
        cpu_buffer = buffer->buffers[cpu];
@@@ -1523,7 -1444,7 +1529,7 @@@ unsigned long ring_buffer_entries_cpu(s
  {
        struct ring_buffer_per_cpu *cpu_buffer;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 0;
  
        cpu_buffer = buffer->buffers[cpu];
@@@ -1540,7 -1461,7 +1546,7 @@@ unsigned long ring_buffer_overrun_cpu(s
  {
        struct ring_buffer_per_cpu *cpu_buffer;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 0;
  
        cpu_buffer = buffer->buffers[cpu];
@@@ -1594,7 -1515,14 +1600,7 @@@ unsigned long ring_buffer_overruns(stru
  }
  EXPORT_SYMBOL_GPL(ring_buffer_overruns);
  
 -/**
 - * ring_buffer_iter_reset - reset an iterator
 - * @iter: The iterator to reset
 - *
 - * Resets the iterator, so that it will start from the beginning
 - * again.
 - */
 -void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 +static void rb_iter_reset(struct ring_buffer_iter *iter)
  {
        struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  
        if (iter->head)
                iter->read_stamp = cpu_buffer->read_stamp;
        else
 -              iter->read_stamp = iter->head_page->time_stamp;
 +              iter->read_stamp = iter->head_page->page->time_stamp;
 +}
 +
 +/**
 + * ring_buffer_iter_reset - reset an iterator
 + * @iter: The iterator to reset
 + *
 + * Resets the iterator, so that it will start from the beginning
 + * again.
 + */
 +void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      rb_iter_reset(iter);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  }
  EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
  
@@@ -1714,8 -1625,7 +1720,8 @@@ rb_get_reader_page(struct ring_buffer_p
        unsigned long flags;
        int nr_loops = 0;
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 +      local_irq_save(flags);
 +      __raw_spin_lock(&cpu_buffer->lock);
  
   again:
        /*
         * a case where we will loop three times. There should be no
         * reason to loop four times (that I know of).
         */
 -      if (unlikely(++nr_loops > 3)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
                reader = NULL;
                goto out;
        }
                goto out;
  
        /* Never should we have an index greater than the size */
 -      RB_WARN_ON(cpu_buffer,
 -                 cpu_buffer->reader_page->read > rb_page_size(reader));
 +      if (RB_WARN_ON(cpu_buffer,
 +                     cpu_buffer->reader_page->read > rb_page_size(reader)))
 +              goto out;
  
        /* check if we caught up to the tail */
        reader = NULL;
        cpu_buffer->reader_page->list.prev = reader->list.prev;
  
        local_set(&cpu_buffer->reader_page->write, 0);
 -      local_set(&cpu_buffer->reader_page->commit, 0);
 +      local_set(&cpu_buffer->reader_page->page->commit, 0);
  
        /* Make the reader page now replace the head */
        reader->list.prev->next = &cpu_buffer->reader_page->list;
        goto again;
  
   out:
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      local_irq_restore(flags);
  
        return reader;
  }
@@@ -1792,8 -1701,7 +1798,8 @@@ static void rb_advance_reader(struct ri
        reader = rb_get_reader_page(cpu_buffer);
  
        /* This function should not be called when buffer is empty */
 -      BUG_ON(!reader);
 +      if (RB_WARN_ON(cpu_buffer, !reader))
 +              return;
  
        event = rb_reader_event(cpu_buffer);
  
@@@ -1820,9 -1728,7 +1826,9 @@@ static void rb_advance_iter(struct ring
         * Check if we are at the end of the buffer.
         */
        if (iter->head >= rb_page_size(iter->head_page)) {
 -              BUG_ON(iter->head_page == cpu_buffer->commit_page);
 +              if (RB_WARN_ON(buffer,
 +                             iter->head_page == cpu_buffer->commit_page))
 +                      return;
                rb_inc_iter(iter);
                return;
        }
         * This should not be called to advance the header if we are
         * at the tail of the buffer.
         */
 -      BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
 -             (iter->head + length > rb_commit_index(cpu_buffer)));
 +      if (RB_WARN_ON(cpu_buffer,
 +                     (iter->head_page == cpu_buffer->commit_page) &&
 +                     (iter->head + length > rb_commit_index(cpu_buffer))))
 +              return;
  
        rb_update_iter_read_stamp(iter, event);
  
                rb_advance_iter(iter);
  }
  
 -/**
 - * ring_buffer_peek - peek at the next event to be read
 - * @buffer: The ring buffer to read
 - * @cpu: The cpu to peak at
 - * @ts: The timestamp counter of this event.
 - *
 - * This will return the event that will be read next, but does
 - * not consume the data.
 - */
 -struct ring_buffer_event *
 -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 +static struct ring_buffer_event *
 +rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
  {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
        struct buffer_page *reader;
        int nr_loops = 0;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
  
        cpu_buffer = buffer->buffers[cpu];
         * can have.  Nesting 10 deep of interrupts is clearly
         * an anomaly.
         */
 -      if (unlikely(++nr_loops > 10)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
                return NULL;
 -      }
  
        reader = rb_get_reader_page(cpu_buffer);
        if (!reader)
  }
  EXPORT_SYMBOL_GPL(ring_buffer_peek);
  
 -/**
 - * ring_buffer_iter_peek - peek at the next event to be read
 - * @iter: The ring buffer iterator
 - * @ts: The timestamp counter of this event.
 - *
 - * This will return the event that will be read next, but does
 - * not increment the iterator.
 - */
 -struct ring_buffer_event *
 -ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 +static struct ring_buffer_event *
 +rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
  {
        struct ring_buffer *buffer;
        struct ring_buffer_per_cpu *cpu_buffer;
         * can have. Nesting 10 deep of interrupts is clearly
         * an anomaly.
         */
 -      if (unlikely(++nr_loops > 10)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
                return NULL;
 -      }
  
        if (rb_per_cpu_empty(cpu_buffer))
                return NULL;
  }
  EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
  
 +/**
 + * ring_buffer_peek - peek at the next event to be read
 + * @buffer: The ring buffer to read
 + * @cpu: The cpu to peak at
 + * @ts: The timestamp counter of this event.
 + *
 + * This will return the event that will be read next, but does
 + * not consume the data.
 + */
 +struct ring_buffer_event *
 +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 +      struct ring_buffer_event *event;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_buffer_peek(buffer, cpu, ts);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return event;
 +}
 +
 +/**
 + * ring_buffer_iter_peek - peek at the next event to be read
 + * @iter: The ring buffer iterator
 + * @ts: The timestamp counter of this event.
 + *
 + * This will return the event that will be read next, but does
 + * not increment the iterator.
 + */
 +struct ring_buffer_event *
 +ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      struct ring_buffer_event *event;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_iter_peek(iter, ts);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return event;
 +}
 +
  /**
   * ring_buffer_consume - return an event and consume it
   * @buffer: The ring buffer to get the next event from
  struct ring_buffer_event *
  ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  {
 -      struct ring_buffer_per_cpu *cpu_buffer;
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct ring_buffer_event *event;
 +      unsigned long flags;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
  
 -      event = ring_buffer_peek(buffer, cpu, ts);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      event = rb_buffer_peek(buffer, cpu, ts);
        if (!event)
 -              return NULL;
 +              goto out;
  
 -      cpu_buffer = buffer->buffers[cpu];
        rb_advance_reader(cpu_buffer);
  
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
        return event;
  }
  EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@@ -2070,7 -1945,7 +2076,7 @@@ ring_buffer_read_start(struct ring_buff
        struct ring_buffer_iter *iter;
        unsigned long flags;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
  
        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
        atomic_inc(&cpu_buffer->record_disabled);
        synchronize_sched();
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 -      ring_buffer_iter_reset(iter);
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      __raw_spin_lock(&cpu_buffer->lock);
 +      rb_iter_reset(iter);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  
        return iter;
  }
@@@ -2122,17 -1995,12 +2128,17 @@@ struct ring_buffer_event 
  ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  {
        struct ring_buffer_event *event;
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      unsigned long flags;
  
 -      event = ring_buffer_iter_peek(iter, ts);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_iter_peek(iter, ts);
        if (!event)
 -              return NULL;
 +              goto out;
  
        rb_advance_iter(iter);
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  
        return event;
  }
@@@ -2154,7 -2022,7 +2160,7 @@@ rb_reset_cpu(struct ring_buffer_per_cp
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
        local_set(&cpu_buffer->head_page->write, 0);
 -      local_set(&cpu_buffer->head_page->commit, 0);
 +      local_set(&cpu_buffer->head_page->page->commit, 0);
  
        cpu_buffer->head_page->read = 0;
  
  
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        local_set(&cpu_buffer->reader_page->write, 0);
 -      local_set(&cpu_buffer->reader_page->commit, 0);
 +      local_set(&cpu_buffer->reader_page->page->commit, 0);
        cpu_buffer->reader_page->read = 0;
  
        cpu_buffer->overrun = 0;
@@@ -2180,18 -2048,14 +2186,18 @@@ void ring_buffer_reset_cpu(struct ring_
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        unsigned long flags;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      __raw_spin_lock(&cpu_buffer->lock);
  
        rb_reset_cpu(cpu_buffer);
  
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  }
  EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
  
@@@ -2236,7 -2100,7 +2242,7 @@@ int ring_buffer_empty_cpu(struct ring_b
  {
        struct ring_buffer_per_cpu *cpu_buffer;
  
 -      if (!cpu_isset(cpu, buffer->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 1;
  
        cpu_buffer = buffer->buffers[cpu];
@@@ -2260,12 -2124,13 +2266,12 @@@ int ring_buffer_swap_cpu(struct ring_bu
        struct ring_buffer_per_cpu *cpu_buffer_a;
        struct ring_buffer_per_cpu *cpu_buffer_b;
  
 -      if (!cpu_isset(cpu, buffer_a->cpumask) ||
 -          !cpu_isset(cpu, buffer_b->cpumask))
 +      if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
 +          !cpumask_test_cpu(cpu, buffer_b->cpumask))
                return -EINVAL;
  
        /* At least make sure the two buffers are somewhat the same */
 -      if (buffer_a->size != buffer_b->size ||
 -          buffer_a->pages != buffer_b->pages)
 +      if (buffer_a->pages != buffer_b->pages)
                return -EINVAL;
  
        cpu_buffer_a = buffer_a->buffers[cpu];
  }
  EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
  
 +static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 +                            struct buffer_data_page *bpage)
 +{
 +      struct ring_buffer_event *event;
 +      unsigned long head;
 +
 +      __raw_spin_lock(&cpu_buffer->lock);
 +      for (head = 0; head < local_read(&bpage->commit);
 +           head += rb_event_length(event)) {
 +
 +              event = __rb_data_page_index(bpage, head);
 +              if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 +                      return;
 +              /* Only count data entries */
 +              if (event->type != RINGBUF_TYPE_DATA)
 +                      continue;
 +              cpu_buffer->entries--;
 +      }
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +}
 +
 +/**
 + * ring_buffer_alloc_read_page - allocate a page to read from buffer
 + * @buffer: the buffer to allocate for.
 + *
 + * This function is used in conjunction with ring_buffer_read_page.
 + * When reading a full page from the ring buffer, these functions
 + * can be used to speed up the process. The calling function should
 + * allocate a few pages first with this function. Then when it
 + * needs to get pages from the ring buffer, it passes the result
 + * of this function into ring_buffer_read_page, which will swap
 + * the page that was allocated, with the read page of the buffer.
 + *
 + * Returns:
 + *  The page allocated, or NULL on error.
 + */
 +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
 +{
 +      unsigned long addr;
 +      struct buffer_data_page *bpage;
 +
 +      addr = __get_free_page(GFP_KERNEL);
 +      if (!addr)
 +              return NULL;
 +
 +      bpage = (void *)addr;
 +
 +      return bpage;
 +}
 +
 +/**
 + * ring_buffer_free_read_page - free an allocated read page
 + * @buffer: the buffer the page was allocate for
 + * @data: the page to free
 + *
 + * Free a page allocated from ring_buffer_alloc_read_page.
 + */
 +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
 +{
 +      free_page((unsigned long)data);
 +}
 +
 +/**
 + * ring_buffer_read_page - extract a page from the ring buffer
 + * @buffer: buffer to extract from
 + * @data_page: the page to use allocated from ring_buffer_alloc_read_page
 + * @cpu: the cpu of the buffer to extract
 + * @full: should the extraction only happen when the page is full.
 + *
 + * This function will pull out a page from the ring buffer and consume it.
 + * @data_page must be the address of the variable that was returned
 + * from ring_buffer_alloc_read_page. This is because the page might be used
 + * to swap with a page in the ring buffer.
 + *
 + * for example:
 + *    rpage = ring_buffer_alloc_page(buffer);
 + *    if (!rpage)
 + *            return error;
 + *    ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
 + *    if (ret)
 + *            process_page(rpage);
 + *
 + * When @full is set, the function will not return true unless
 + * the writer is off the reader page.
 + *
 + * Note: it is up to the calling functions to handle sleeps and wakeups.
 + *  The ring buffer can be used anywhere in the kernel and can not
 + *  blindly call wake_up. The layer that uses the ring buffer must be
 + *  responsible for that.
 + *
 + * Returns:
 + *  1 if data has been transferred
 + *  0 if no data has been transferred.
 + */
 +int ring_buffer_read_page(struct ring_buffer *buffer,
 +                          void **data_page, int cpu, int full)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 +      struct ring_buffer_event *event;
 +      struct buffer_data_page *bpage;
 +      unsigned long flags;
 +      int ret = 0;
 +
 +      if (!data_page)
 +              return 0;
 +
 +      bpage = *data_page;
 +      if (!bpage)
 +              return 0;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      /*
 +       * rb_buffer_peek will get the next ring buffer if
 +       * the current reader page is empty.
 +       */
 +      event = rb_buffer_peek(buffer, cpu, NULL);
 +      if (!event)
 +              goto out;
 +
 +      /* check for data */
 +      if (!local_read(&cpu_buffer->reader_page->page->commit))
 +              goto out;
 +      /*
 +       * If the writer is already off of the read page, then simply
 +       * switch the read page with the given page. Otherwise
 +       * we need to copy the data from the reader to the writer.
 +       */
 +      if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
 +              unsigned int read = cpu_buffer->reader_page->read;
 +
 +              if (full)
 +                      goto out;
 +              /* The writer is still on the reader page, we must copy */
 +              bpage = cpu_buffer->reader_page->page;
 +              memcpy(bpage->data,
 +                     cpu_buffer->reader_page->page->data + read,
 +                     local_read(&bpage->commit) - read);
 +
 +              /* consume what was read */
 +              cpu_buffer->reader_page += read;
 +
 +      } else {
 +              /* swap the pages */
 +              rb_init_page(bpage);
 +              bpage = cpu_buffer->reader_page->page;
 +              cpu_buffer->reader_page->page = *data_page;
 +              cpu_buffer->reader_page->read = 0;
 +              *data_page = bpage;
 +      }
 +      ret = 1;
 +
 +      /* update the entry counter */
 +      rb_remove_entries(cpu_buffer, bpage);
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return ret;
 +}
 +
  static ssize_t
  rb_simple_read(struct file *filp, char __user *ubuf,
               size_t cnt, loff_t *ppos)
  {
 -      int *p = filp->private_data;
 +      long *p = filp->private_data;
        char buf[64];
        int r;
  
 -      /* !ring_buffers_off == tracing_on */
 -      r = sprintf(buf, "%d\n", !*p);
 +      if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
 +              r = sprintf(buf, "permanently disabled\n");
 +      else
 +              r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
  
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
@@@ -2473,7 -2176,7 +2479,7 @@@ static ssize_
  rb_simple_write(struct file *filp, const char __user *ubuf,
                size_t cnt, loff_t *ppos)
  {
 -      int *p = filp->private_data;
 +      long *p = filp->private_data;
        char buf[64];
        long val;
        int ret;
        if (ret < 0)
                return ret;
  
 -      /* !ring_buffers_off == tracing_on */
 -      *p = !val;
 +      if (val)
 +              set_bit(RB_BUFFERS_ON_BIT, p);
 +      else
 +              clear_bit(RB_BUFFERS_ON_BIT, p);
  
        (*ppos)++;
  
@@@ -2515,7 -2216,7 +2521,7 @@@ static __init int rb_init_debugfs(void
        d_tracer = tracing_init_dentry();
  
        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
 -                                  &ring_buffers_off, &rb_simple_fops);
 +                                  &ring_buffer_flags, &rb_simple_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'tracing_on' entry\n");