]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Dec 2008 01:31:25 +0000 (17:31 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Dec 2008 01:31:25 +0000 (17:31 -0800)
* 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  oprofile: select RING_BUFFER
  ring_buffer: adding EXPORT_SYMBOLs
  oprofile: fix lost sample counter
  oprofile: remove nr_available_slots()
  oprofile: port to the new ring_buffer
  ring_buffer: add remaining cpu functions to ring_buffer.h
  oprofile: moving cpu_buffer_reset() to cpu_buffer.h
  oprofile: adding cpu_buffer_entries()
  oprofile: adding cpu_buffer_write_commit()
  oprofile: adding cpu buffer r/w access functions
  ftrace: remove unused function arg in trace_iterator_increment()
  ring_buffer: update description for ring_buffer_alloc()
  oprofile: set values to default when creating oprofilefs
  oprofile: implement switch/case in buffer_sync.c
  x86/oprofile: cleanup IBS init/exit functions in op_model_amd.c
  x86/oprofile: reordering IBS code in op_model_amd.c
  oprofile: fix typo
  oprofile: whitspace changes only
  oprofile: update comment for oprofile_add_sample()
  oprofile: comment cleanup

1  2 
include/linux/ring_buffer.h
kernel/trace/ring_buffer.c
kernel/trace/trace.c

index d363467c8f13e31a7c1ff4eaa92ee0dd961d67d6,de9d8c12e5ec836075a76d8947f1b1e61d074ebc..b3b3596600826847abc6b59fff5973e7c2f78cac
@@@ -28,19 -28,17 +28,19 @@@ struct ring_buffer_event 
   *                             size = 8 bytes
   *
   * @RINGBUF_TYPE_TIME_STAMP:  Sync time stamp with external clock
 - *                             array[0] = tv_nsec
 - *                             array[1] = tv_sec
 + *                             array[0]    = tv_nsec
 + *                             array[1..2] = tv_sec
   *                             size = 16 bytes
   *
   * @RINGBUF_TYPE_DATA:                Data record
   *                             If len is zero:
   *                              array[0] holds the actual length
 - *                              array[1..(length+3)/4-1] holds data
 + *                              array[1..(length+3)/4] holds data
 + *                              size = 4 + 4 + length (bytes)
   *                             else
   *                              length = len << 2
 - *                              array[0..(length+3)/4] holds data
 + *                              array[0..(length+3)/4-1] holds data
 + *                              size = 4 + length (bytes)
   */
  enum ring_buffer_type {
        RINGBUF_TYPE_PADDING,
@@@ -118,18 -116,14 +118,20 @@@ void ring_buffer_record_enable_cpu(stru
  
  unsigned long ring_buffer_entries(struct ring_buffer *buffer);
  unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
  
  u64 ring_buffer_time_stamp(int cpu);
  void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
  
  void tracing_on(void);
  void tracing_off(void);
 +void tracing_off_permanent(void);
 +
 +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
 +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 +int ring_buffer_read_page(struct ring_buffer *buffer,
 +                        void **data_page, int cpu, int full);
  
  enum ring_buffer_flags {
        RB_FL_OVERWRITE         = 1 << 0,
index 76f34c0ef29c3aa9ea0123a3933e84c296a3caf9,30d57dd01a857968ef4e3234a8aec1e2bf740e56..1d601a7c4587eb075b8fe0100705a6d00ffc9172
  
  #include "trace.h"
  
 -/* Global flag to disable all recording to ring buffers */
 -static int ring_buffers_off __read_mostly;
 +/*
 + * A fast way to enable or disable all ring buffers is to
 + * call tracing_on or tracing_off. Turning off the ring buffers
 + * prevents all ring buffers from being recorded to.
 + * Turning this switch on, makes it OK to write to the
 + * ring buffer, if the ring buffer is enabled itself.
 + *
 + * There's three layers that must be on in order to write
 + * to the ring buffer.
 + *
 + * 1) This global flag must be set.
 + * 2) The ring buffer must be enabled for recording.
 + * 3) The per cpu buffer must be enabled for recording.
 + *
 + * In case of an anomaly, this global flag has a bit set that
 + * will permantly disable all ring buffers.
 + */
 +
 +/*
 + * Global flag to disable all recording to ring buffers
 + *  This has two bits: ON, DISABLED
 + *
 + *  ON   DISABLED
 + * ---- ----------
 + *   0      0        : ring buffers are off
 + *   1      0        : ring buffers are on
 + *   X      1        : ring buffers are permanently disabled
 + */
 +
 +enum {
 +      RB_BUFFERS_ON_BIT       = 0,
 +      RB_BUFFERS_DISABLED_BIT = 1,
 +};
 +
 +enum {
 +      RB_BUFFERS_ON           = 1 << RB_BUFFERS_ON_BIT,
 +      RB_BUFFERS_DISABLED     = 1 << RB_BUFFERS_DISABLED_BIT,
 +};
 +
 +static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
  
  /**
   * tracing_on - enable all tracing buffers
@@@ -67,8 -29,9 +67,9 @@@
   */
  void tracing_on(void)
  {
 -      ring_buffers_off = 0;
 +      set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
  }
+ EXPORT_SYMBOL_GPL(tracing_on);
  
  /**
   * tracing_off - turn off all tracing buffers
   */
  void tracing_off(void)
  {
 -      ring_buffers_off = 1;
 +      clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
  }
+ EXPORT_SYMBOL_GPL(tracing_off);
  
 +/**
 + * tracing_off_permanent - permanently disable ring buffers
 + *
 + * This function, once called, will disable all ring buffers
 + * permanenty.
 + */
 +void tracing_off_permanent(void)
 +{
 +      set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
 +}
 +
 +#include "trace.h"
 +
  /* Up this if you want to test the TIME_EXTENTS and normalization */
  #define DEBUG_SHIFT 0
  
@@@ -107,16 -58,18 +109,18 @@@ u64 ring_buffer_time_stamp(int cpu
        preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
        time = sched_clock() << DEBUG_SHIFT;
 -      preempt_enable_notrace();
 +      preempt_enable_no_resched_notrace();
  
        return time;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
  
  void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
  {
        /* Just stupid testing the normalize function and deltas */
        *ts >>= DEBUG_SHIFT;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
  
  #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
  #define RB_ALIGNMENT_SHIFT    2
@@@ -166,6 -119,7 +170,7 @@@ unsigned ring_buffer_event_length(struc
  {
        return rb_event_length(event);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
  
  /* inline for ring buffer fast paths */
  static inline void *
@@@ -187,6 -141,7 +192,7 @@@ void *ring_buffer_event_data(struct rin
  {
        return rb_event_data(event);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
  
  #define for_each_buffer_cpu(buffer, cpu)              \
        for_each_cpu_mask(cpu, buffer->cpumask)
  #define TS_MASK               ((1ULL << TS_SHIFT) - 1)
  #define TS_DELTA_TEST (~TS_MASK)
  
 -/*
 - * This hack stolen from mm/slob.c.
 - * We can store per page timing information in the page frame of the page.
 - * Thanks to Peter Zijlstra for suggesting this idea.
 - */
 -struct buffer_page {
 +struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
 -      local_t          write;         /* index for next write */
        local_t          commit;        /* write commited index */
 +      unsigned char    data[];        /* data of buffer page */
 +};
 +
 +struct buffer_page {
 +      local_t          write;         /* index for next write */
        unsigned         read;          /* index for next read */
        struct list_head list;          /* list of free pages */
 -      void *page;                     /* Actual data page */
 +      struct buffer_data_page *page;  /* Actual data page */
  };
  
 +static void rb_init_page(struct buffer_data_page *bpage)
 +{
 +      local_set(&bpage->commit, 0);
 +}
 +
  /*
   * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
   * this issue out.
@@@ -234,7 -185,7 +240,7 @@@ static inline int test_time_stamp(u64 d
        return 0;
  }
  
 -#define BUF_PAGE_SIZE PAGE_SIZE
 +#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
  
  /*
   * head_page == tail_page && head == tail then buffer is empty.
  struct ring_buffer_per_cpu {
        int                             cpu;
        struct ring_buffer              *buffer;
 -      spinlock_t                      lock;
 +      spinlock_t                      reader_lock; /* serialize readers */
 +      raw_spinlock_t                  lock;
        struct lock_class_key           lock_key;
        struct list_head                pages;
        struct buffer_page              *head_page;     /* read from head */
  };
  
  struct ring_buffer {
 -      unsigned long                   size;
        unsigned                        pages;
        unsigned                        flags;
        int                             cpus;
@@@ -276,16 -227,32 +282,16 @@@ struct ring_buffer_iter 
        u64                             read_stamp;
  };
  
 +/* buffer may be either ring_buffer or ring_buffer_per_cpu */
  #define RB_WARN_ON(buffer, cond)                              \
 -      do {                                                    \
 -              if (unlikely(cond)) {                           \
 -                      atomic_inc(&buffer->record_disabled);   \
 -                      WARN_ON(1);                             \
 -              }                                               \
 -      } while (0)
 -
 -#define RB_WARN_ON_RET(buffer, cond)                          \
 -      do {                                                    \
 -              if (unlikely(cond)) {                           \
 -                      atomic_inc(&buffer->record_disabled);   \
 -                      WARN_ON(1);                             \
 -                      return -1;                              \
 -              }                                               \
 -      } while (0)
 -
 -#define RB_WARN_ON_ONCE(buffer, cond)                         \
 -      do {                                                    \
 -              static int once;                                \
 -              if (unlikely(cond) && !once) {                  \
 -                      once++;                                 \
 +      ({                                                      \
 +              int _____ret = unlikely(cond);                  \
 +              if (_____ret) {                                 \
                        atomic_inc(&buffer->record_disabled);   \
                        WARN_ON(1);                             \
                }                                               \
 -      } while (0)
 +              _____ret;                                       \
 +      })
  
  /**
   * check_pages - integrity check of buffer pages
  static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
  
 -      RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
 -      RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
 +      if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
 +              return -1;
 +      if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
 +              return -1;
  
 -      list_for_each_entry_safe(page, tmp, head, list) {
 -              RB_WARN_ON_RET(cpu_buffer,
 -                             page->list.next->prev != &page->list);
 -              RB_WARN_ON_RET(cpu_buffer,
 -                             page->list.prev->next != &page->list);
 +      list_for_each_entry_safe(bpage, tmp, head, list) {
 +              if (RB_WARN_ON(cpu_buffer,
 +                             bpage->list.next->prev != &bpage->list))
 +                      return -1;
 +              if (RB_WARN_ON(cpu_buffer,
 +                             bpage->list.prev->next != &bpage->list))
 +                      return -1;
        }
  
        return 0;
@@@ -320,23 -283,22 +326,23 @@@ static int rb_allocate_pages(struct rin
                             unsigned nr_pages)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
        unsigned long addr;
        LIST_HEAD(pages);
        unsigned i;
  
        for (i = 0; i < nr_pages; i++) {
 -              page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
 +              bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                                    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 -              if (!page)
 +              if (!bpage)
                        goto free_pages;
 -              list_add(&page->list, &pages);
 +              list_add(&bpage->list, &pages);
  
                addr = __get_free_page(GFP_KERNEL);
                if (!addr)
                        goto free_pages;
 -              page->page = (void *)addr;
 +              bpage->page = (void *)addr;
 +              rb_init_page(bpage->page);
        }
  
        list_splice(&pages, head);
        return 0;
  
   free_pages:
 -      list_for_each_entry_safe(page, tmp, &pages, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, &pages, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        return -ENOMEM;
  }
@@@ -357,7 -319,7 +363,7 @@@ static struct ring_buffer_per_cpu 
  rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
  {
        struct ring_buffer_per_cpu *cpu_buffer;
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        unsigned long addr;
        int ret;
  
  
        cpu_buffer->cpu = cpu;
        cpu_buffer->buffer = buffer;
 -      spin_lock_init(&cpu_buffer->lock);
 +      spin_lock_init(&cpu_buffer->reader_lock);
 +      cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        INIT_LIST_HEAD(&cpu_buffer->pages);
  
 -      page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
 +      bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                            GFP_KERNEL, cpu_to_node(cpu));
 -      if (!page)
 +      if (!bpage)
                goto fail_free_buffer;
  
 -      cpu_buffer->reader_page = page;
 +      cpu_buffer->reader_page = bpage;
        addr = __get_free_page(GFP_KERNEL);
        if (!addr)
                goto fail_free_reader;
 -      page->page = (void *)addr;
 +      bpage->page = (void *)addr;
 +      rb_init_page(bpage->page);
  
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
  
  static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
  {
        struct list_head *head = &cpu_buffer->pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
  
        list_del_init(&cpu_buffer->reader_page->list);
        free_buffer_page(cpu_buffer->reader_page);
  
 -      list_for_each_entry_safe(page, tmp, head, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, head, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        kfree(cpu_buffer);
  }
@@@ -427,7 -387,7 +433,7 @@@ extern int ring_buffer_page_too_big(voi
  
  /**
   * ring_buffer_alloc - allocate a new ring_buffer
-  * @size: the size in bytes that is needed.
+  * @size: the size in bytes per cpu that is needed.
   * @flags: attributes to set for the ring buffer.
   *
   * Currently the only flag that is available is the RB_FL_OVERWRITE
@@@ -490,6 -450,7 +496,7 @@@ struct ring_buffer *ring_buffer_alloc(u
        kfree(buffer);
        return NULL;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_alloc);
  
  /**
   * ring_buffer_free - free a ring buffer.
@@@ -505,13 -466,14 +512,14 @@@ ring_buffer_free(struct ring_buffer *bu
  
        kfree(buffer);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_free);
  
  static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
  
  static void
  rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
  {
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
  
        synchronize_sched();
  
        for (i = 0; i < nr_pages; i++) {
 -              BUG_ON(list_empty(&cpu_buffer->pages));
 +              if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
 +                      return;
                p = cpu_buffer->pages.next;
 -              page = list_entry(p, struct buffer_page, list);
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +              bpage = list_entry(p, struct buffer_page, list);
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
 -      BUG_ON(list_empty(&cpu_buffer->pages));
 +      if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
 +              return;
  
        rb_reset_cpu(cpu_buffer);
  
@@@ -541,7 -501,7 +549,7 @@@ static voi
  rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
                struct list_head *pages, unsigned nr_pages)
  {
 -      struct buffer_page *page;
 +      struct buffer_page *bpage;
        struct list_head *p;
        unsigned i;
  
        synchronize_sched();
  
        for (i = 0; i < nr_pages; i++) {
 -              BUG_ON(list_empty(pages));
 +              if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
 +                      return;
                p = pages->next;
 -              page = list_entry(p, struct buffer_page, list);
 -              list_del_init(&page->list);
 -              list_add_tail(&page->list, &cpu_buffer->pages);
 +              bpage = list_entry(p, struct buffer_page, list);
 +              list_del_init(&bpage->list);
 +              list_add_tail(&bpage->list, &cpu_buffer->pages);
        }
        rb_reset_cpu(cpu_buffer);
  
@@@ -581,7 -540,7 +589,7 @@@ int ring_buffer_resize(struct ring_buff
  {
        struct ring_buffer_per_cpu *cpu_buffer;
        unsigned nr_pages, rm_pages, new_pages;
 -      struct buffer_page *page, *tmp;
 +      struct buffer_page *bpage, *tmp;
        unsigned long buffer_size;
        unsigned long addr;
        LIST_HEAD(pages);
        if (size < buffer_size) {
  
                /* easy case, just free pages */
 -              BUG_ON(nr_pages >= buffer->pages);
 +              if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
 +                      mutex_unlock(&buffer->mutex);
 +                      return -1;
 +              }
  
                rm_pages = buffer->pages - nr_pages;
  
         * add these pages to the cpu_buffers. Otherwise we just free
         * them all and return -ENOMEM;
         */
 -      BUG_ON(nr_pages <= buffer->pages);
 +      if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
 +              mutex_unlock(&buffer->mutex);
 +              return -1;
 +      }
 +
        new_pages = nr_pages - buffer->pages;
  
        for_each_buffer_cpu(buffer, cpu) {
                for (i = 0; i < new_pages; i++) {
 -                      page = kzalloc_node(ALIGN(sizeof(*page),
 +                      bpage = kzalloc_node(ALIGN(sizeof(*bpage),
                                                  cache_line_size()),
                                            GFP_KERNEL, cpu_to_node(cpu));
 -                      if (!page)
 +                      if (!bpage)
                                goto free_pages;
 -                      list_add(&page->list, &pages);
 +                      list_add(&bpage->list, &pages);
                        addr = __get_free_page(GFP_KERNEL);
                        if (!addr)
                                goto free_pages;
 -                      page->page = (void *)addr;
 +                      bpage->page = (void *)addr;
 +                      rb_init_page(bpage->page);
                }
        }
  
                rb_insert_pages(cpu_buffer, &pages, new_pages);
        }
  
 -      BUG_ON(!list_empty(&pages));
 +      if (RB_WARN_ON(buffer, !list_empty(&pages))) {
 +              mutex_unlock(&buffer->mutex);
 +              return -1;
 +      }
  
   out:
        buffer->pages = nr_pages;
        return size;
  
   free_pages:
 -      list_for_each_entry_safe(page, tmp, &pages, list) {
 -              list_del_init(&page->list);
 -              free_buffer_page(page);
 +      list_for_each_entry_safe(bpage, tmp, &pages, list) {
 +              list_del_init(&bpage->list);
 +              free_buffer_page(bpage);
        }
        mutex_unlock(&buffer->mutex);
        return -ENOMEM;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_resize);
  
  static inline int rb_null_event(struct ring_buffer_event *event)
  {
        return event->type == RINGBUF_TYPE_PADDING;
  }
  
 -static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
 +static inline void *
 +__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
 +{
 +      return bpage->data + index;
 +}
 +
 +static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
  {
 -      return page->page + index;
 +      return bpage->page->data + index;
  }
  
  static inline struct ring_buffer_event *
@@@ -724,7 -667,7 +733,7 @@@ static inline unsigned rb_page_write(st
  
  static inline unsigned rb_page_commit(struct buffer_page *bpage)
  {
 -      return local_read(&bpage->commit);
 +      return local_read(&bpage->page->commit);
  }
  
  /* Size is determined by what has been commited */
@@@ -759,8 -702,7 +768,8 @@@ static void rb_update_overflow(struct r
             head += rb_event_length(event)) {
  
                event = __rb_page_index(cpu_buffer->head_page, head);
 -              BUG_ON(rb_null_event(event));
 +              if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 +                      return;
                /* Only count data entries */
                if (event->type != RINGBUF_TYPE_DATA)
                        continue;
  }
  
  static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 -                             struct buffer_page **page)
 +                             struct buffer_page **bpage)
  {
 -      struct list_head *p = (*page)->list.next;
 +      struct list_head *p = (*bpage)->list.next;
  
        if (p == &cpu_buffer->pages)
                p = p->next;
  
 -      *page = list_entry(p, struct buffer_page, list);
 +      *bpage = list_entry(p, struct buffer_page, list);
  }
  
  static inline unsigned
@@@ -813,18 -755,16 +822,18 @@@ rb_set_commit_event(struct ring_buffer_
        addr &= PAGE_MASK;
  
        while (cpu_buffer->commit_page->page != (void *)addr) {
 -              RB_WARN_ON(cpu_buffer,
 -                         cpu_buffer->commit_page == cpu_buffer->tail_page);
 -              cpu_buffer->commit_page->commit =
 +              if (RB_WARN_ON(cpu_buffer,
 +                        cpu_buffer->commit_page == cpu_buffer->tail_page))
 +                      return;
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
 -              cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
 +              cpu_buffer->write_stamp =
 +                      cpu_buffer->commit_page->page->time_stamp;
        }
  
        /* Now set the commit to the event's index */
 -      local_set(&cpu_buffer->commit_page->commit, index);
 +      local_set(&cpu_buffer->commit_page->page->commit, index);
  }
  
  static inline void
@@@ -838,38 -778,25 +847,38 @@@ rb_set_commit_to_write(struct ring_buff
         * back to us). This allows us to do a simple loop to
         * assign the commit to the tail.
         */
 + again:
        while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
 -              cpu_buffer->commit_page->commit =
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
 -              cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
 +              cpu_buffer->write_stamp =
 +                      cpu_buffer->commit_page->page->time_stamp;
                /* add barrier to keep gcc from optimizing too much */
                barrier();
        }
        while (rb_commit_index(cpu_buffer) !=
               rb_page_write(cpu_buffer->commit_page)) {
 -              cpu_buffer->commit_page->commit =
 +              cpu_buffer->commit_page->page->commit =
                        cpu_buffer->commit_page->write;
                barrier();
        }
 +
 +      /* again, keep gcc from optimizing */
 +      barrier();
 +
 +      /*
 +       * If an interrupt came in just after the first while loop
 +       * and pushed the tail page forward, we will be left with
 +       * a dangling commit that will never go forward.
 +       */
 +      if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
 +              goto again;
  }
  
  static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
  {
 -      cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
 +      cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
        cpu_buffer->reader_page->read = 0;
  }
  
@@@ -888,7 -815,7 +897,7 @@@ static inline void rb_inc_iter(struct r
        else
                rb_inc_page(cpu_buffer, &iter->head_page);
  
 -      iter->read_stamp = iter->head_page->time_stamp;
 +      iter->read_stamp = iter->head_page->page->time_stamp;
        iter->head = 0;
  }
  
@@@ -962,15 -889,12 +971,15 @@@ static struct ring_buffer_event 
  __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                  unsigned type, unsigned long length, u64 *ts)
  {
 -      struct buffer_page *tail_page, *head_page, *reader_page;
 +      struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
        unsigned long tail, write;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct ring_buffer_event *event;
        unsigned long flags;
  
 +      commit_page = cpu_buffer->commit_page;
 +      /* we just need to protect against interrupts */
 +      barrier();
        tail_page = cpu_buffer->tail_page;
        write = local_add_return(length, &tail_page->write);
        tail = write - length;
        if (write > BUF_PAGE_SIZE) {
                struct buffer_page *next_page = tail_page;
  
 -              spin_lock_irqsave(&cpu_buffer->lock, flags);
 +              local_irq_save(flags);
 +              __raw_spin_lock(&cpu_buffer->lock);
  
                rb_inc_page(cpu_buffer, &next_page);
  
                reader_page = cpu_buffer->reader_page;
  
                /* we grabbed the lock before incrementing */
 -              RB_WARN_ON(cpu_buffer, next_page == reader_page);
 +              if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
 +                      goto out_unlock;
  
                /*
                 * If for some reason, we had an interrupt storm that made
                 * it all the way around the buffer, bail, and warn
                 * about it.
                 */
 -              if (unlikely(next_page == cpu_buffer->commit_page)) {
 +              if (unlikely(next_page == commit_page)) {
                        WARN_ON_ONCE(1);
                        goto out_unlock;
                }
                 */
                if (tail_page == cpu_buffer->tail_page) {
                        local_set(&next_page->write, 0);
 -                      local_set(&next_page->commit, 0);
 +                      local_set(&next_page->page->commit, 0);
                        cpu_buffer->tail_page = next_page;
  
                        /* reread the time stamp */
                        *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 -                      cpu_buffer->tail_page->time_stamp = *ts;
 +                      cpu_buffer->tail_page->page->time_stamp = *ts;
                }
  
                /*
                        rb_set_commit_to_write(cpu_buffer);
                }
  
 -              spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +              __raw_spin_unlock(&cpu_buffer->lock);
 +              local_irq_restore(flags);
  
                /* fail and let the caller try again */
                return ERR_PTR(-EAGAIN);
  
        /* We reserved something on the buffer */
  
 -      BUG_ON(write > BUF_PAGE_SIZE);
 +      if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
 +              return NULL;
  
        event = __rb_page_index(tail_page, tail);
        rb_update_event(event, type, length);
         * this page's time stamp.
         */
        if (!tail && rb_is_commit(cpu_buffer, event))
 -              cpu_buffer->commit_page->time_stamp = *ts;
 +              cpu_buffer->commit_page->page->time_stamp = *ts;
  
        return event;
  
   out_unlock:
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      local_irq_restore(flags);
        return NULL;
  }
  
@@@ -1128,7 -1047,7 +1137,7 @@@ rb_add_time_stamp(struct ring_buffer_pe
                        event->time_delta = *delta & TS_MASK;
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
 -                      cpu_buffer->commit_page->time_stamp = *ts;
 +                      cpu_buffer->commit_page->page->time_stamp = *ts;
                        event->time_delta = 0;
                        event->array[0] = 0;
                }
@@@ -1166,8 -1085,10 +1175,8 @@@ rb_reserve_next_event(struct ring_buffe
         * storm or we have something buggy.
         * Bail!
         */
 -      if (unlikely(++nr_loops > 1000)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
                return NULL;
 -      }
  
        ts = ring_buffer_time_stamp(cpu_buffer->cpu);
  
@@@ -1263,14 -1184,15 +1272,14 @@@ ring_buffer_lock_reserve(struct ring_bu
        struct ring_buffer_event *event;
        int cpu, resched;
  
 -      if (ring_buffers_off)
 +      if (ring_buffer_flags != RB_BUFFERS_ON)
                return NULL;
  
        if (atomic_read(&buffer->record_disabled))
                return NULL;
  
        /* If we are tracing schedule, we don't want to recurse */
 -      resched = need_resched();
 -      preempt_disable_notrace();
 +      resched = ftrace_preempt_disable();
  
        cpu = raw_smp_processor_id();
  
        return event;
  
   out:
 -      if (resched)
 -              preempt_enable_no_resched_notrace();
 -      else
 -              preempt_enable_notrace();
 +      ftrace_preempt_enable(resched);
        return NULL;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
  
  static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
@@@ -1343,13 -1269,17 +1353,14 @@@ int ring_buffer_unlock_commit(struct ri
        /*
         * Only the last preempt count needs to restore preemption.
         */
 -      if (preempt_count() == 1) {
 -              if (per_cpu(rb_need_resched, cpu))
 -                      preempt_enable_no_resched_notrace();
 -              else
 -                      preempt_enable_notrace();
 -      } else
 +      if (preempt_count() == 1)
 +              ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
 +      else
                preempt_enable_no_resched_notrace();
  
        return 0;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
  
  /**
   * ring_buffer_write - write data to the buffer without reserving
@@@ -1375,13 -1305,14 +1386,13 @@@ int ring_buffer_write(struct ring_buffe
        int ret = -EBUSY;
        int cpu, resched;
  
 -      if (ring_buffers_off)
 +      if (ring_buffer_flags != RB_BUFFERS_ON)
                return -EBUSY;
  
        if (atomic_read(&buffer->record_disabled))
                return -EBUSY;
  
 -      resched = need_resched();
 -      preempt_disable_notrace();
 +      resched = ftrace_preempt_disable();
  
        cpu = raw_smp_processor_id();
  
  
        ret = 0;
   out:
 -      if (resched)
 -              preempt_enable_no_resched_notrace();
 -      else
 -              preempt_enable_notrace();
 +      ftrace_preempt_enable(resched);
  
        return ret;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_write);
  
  static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
  {
@@@ -1437,6 -1372,7 +1449,7 @@@ void ring_buffer_record_disable(struct 
  {
        atomic_inc(&buffer->record_disabled);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
  
  /**
   * ring_buffer_record_enable - enable writes to the buffer
@@@ -1449,6 -1385,7 +1462,7 @@@ void ring_buffer_record_enable(struct r
  {
        atomic_dec(&buffer->record_disabled);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
  
  /**
   * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
@@@ -1470,6 -1407,7 +1484,7 @@@ void ring_buffer_record_disable_cpu(str
        cpu_buffer = buffer->buffers[cpu];
        atomic_inc(&cpu_buffer->record_disabled);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
  
  /**
   * ring_buffer_record_enable_cpu - enable writes to the buffer
@@@ -1489,6 -1427,7 +1504,7 @@@ void ring_buffer_record_enable_cpu(stru
        cpu_buffer = buffer->buffers[cpu];
        atomic_dec(&cpu_buffer->record_disabled);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
  
  /**
   * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
@@@ -1505,6 -1444,7 +1521,7 @@@ unsigned long ring_buffer_entries_cpu(s
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->entries;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
  
  /**
   * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
@@@ -1521,6 -1461,7 +1538,7 @@@ unsigned long ring_buffer_overrun_cpu(s
        cpu_buffer = buffer->buffers[cpu];
        return cpu_buffer->overrun;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
  
  /**
   * ring_buffer_entries - get the number of entries in a buffer
@@@ -1543,6 -1484,7 +1561,7 @@@ unsigned long ring_buffer_entries(struc
  
        return entries;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_entries);
  
  /**
   * ring_buffer_overrun_cpu - get the number of overruns in buffer
@@@ -1565,8 -1507,16 +1584,9 @@@ unsigned long ring_buffer_overruns(stru
  
        return overruns;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_overruns);
  
 -/**
 - * ring_buffer_iter_reset - reset an iterator
 - * @iter: The iterator to reset
 - *
 - * Resets the iterator, so that it will start from the beginning
 - * again.
 - */
 -void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 +static void rb_iter_reset(struct ring_buffer_iter *iter)
  {
        struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
  
        if (iter->head)
                iter->read_stamp = cpu_buffer->read_stamp;
        else
 -              iter->read_stamp = iter->head_page->time_stamp;
 +              iter->read_stamp = iter->head_page->page->time_stamp;
 +}
 +
 +/**
 + * ring_buffer_iter_reset - reset an iterator
 + * @iter: The iterator to reset
 + *
 + * Resets the iterator, so that it will start from the beginning
 + * again.
 + */
 +void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      rb_iter_reset(iter);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
  
  /**
   * ring_buffer_iter_empty - check if an iterator has no more to read
@@@ -1614,6 -1548,7 +1635,7 @@@ int ring_buffer_iter_empty(struct ring_
        return iter->head_page == cpu_buffer->commit_page &&
                iter->head == rb_commit_index(cpu_buffer);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
  
  static void
  rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
@@@ -1684,8 -1619,7 +1706,8 @@@ rb_get_reader_page(struct ring_buffer_p
        unsigned long flags;
        int nr_loops = 0;
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 +      local_irq_save(flags);
 +      __raw_spin_lock(&cpu_buffer->lock);
  
   again:
        /*
         * a case where we will loop three times. There should be no
         * reason to loop four times (that I know of).
         */
 -      if (unlikely(++nr_loops > 3)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
                reader = NULL;
                goto out;
        }
                goto out;
  
        /* Never should we have an index greater than the size */
 -      RB_WARN_ON(cpu_buffer,
 -                 cpu_buffer->reader_page->read > rb_page_size(reader));
 +      if (RB_WARN_ON(cpu_buffer,
 +                     cpu_buffer->reader_page->read > rb_page_size(reader)))
 +              goto out;
  
        /* check if we caught up to the tail */
        reader = NULL;
        cpu_buffer->reader_page->list.prev = reader->list.prev;
  
        local_set(&cpu_buffer->reader_page->write, 0);
 -      local_set(&cpu_buffer->reader_page->commit, 0);
 +      local_set(&cpu_buffer->reader_page->page->commit, 0);
  
        /* Make the reader page now replace the head */
        reader->list.prev->next = &cpu_buffer->reader_page->list;
        goto again;
  
   out:
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      local_irq_restore(flags);
  
        return reader;
  }
@@@ -1762,8 -1695,7 +1784,8 @@@ static void rb_advance_reader(struct ri
        reader = rb_get_reader_page(cpu_buffer);
  
        /* This function should not be called when buffer is empty */
 -      BUG_ON(!reader);
 +      if (RB_WARN_ON(cpu_buffer, !reader))
 +              return;
  
        event = rb_reader_event(cpu_buffer);
  
@@@ -1790,9 -1722,7 +1812,9 @@@ static void rb_advance_iter(struct ring
         * Check if we are at the end of the buffer.
         */
        if (iter->head >= rb_page_size(iter->head_page)) {
 -              BUG_ON(iter->head_page == cpu_buffer->commit_page);
 +              if (RB_WARN_ON(buffer,
 +                             iter->head_page == cpu_buffer->commit_page))
 +                      return;
                rb_inc_iter(iter);
                return;
        }
         * This should not be called to advance the header if we are
         * at the tail of the buffer.
         */
 -      BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
 -             (iter->head + length > rb_commit_index(cpu_buffer)));
 +      if (RB_WARN_ON(cpu_buffer,
 +                     (iter->head_page == cpu_buffer->commit_page) &&
 +                     (iter->head + length > rb_commit_index(cpu_buffer))))
 +              return;
  
        rb_update_iter_read_stamp(iter, event);
  
                rb_advance_iter(iter);
  }
  
 -/**
 - * ring_buffer_peek - peek at the next event to be read
 - * @buffer: The ring buffer to read
 - * @cpu: The cpu to peak at
 - * @ts: The timestamp counter of this event.
 - *
 - * This will return the event that will be read next, but does
 - * not consume the data.
 - */
 -struct ring_buffer_event *
 -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 +static struct ring_buffer_event *
 +rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
  {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
         * can have.  Nesting 10 deep of interrupts is clearly
         * an anomaly.
         */
 -      if (unlikely(++nr_loops > 10)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
                return NULL;
 -      }
  
        reader = rb_get_reader_page(cpu_buffer);
        if (!reader)
  
        return NULL;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_peek);
  
 -/**
 - * ring_buffer_iter_peek - peek at the next event to be read
 - * @iter: The ring buffer iterator
 - * @ts: The timestamp counter of this event.
 - *
 - * This will return the event that will be read next, but does
 - * not increment the iterator.
 - */
 -struct ring_buffer_event *
 -ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 +static struct ring_buffer_event *
 +rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
  {
        struct ring_buffer *buffer;
        struct ring_buffer_per_cpu *cpu_buffer;
         * can have. Nesting 10 deep of interrupts is clearly
         * an anomaly.
         */
 -      if (unlikely(++nr_loops > 10)) {
 -              RB_WARN_ON(cpu_buffer, 1);
 +      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
                return NULL;
 -      }
  
        if (rb_per_cpu_empty(cpu_buffer))
                return NULL;
  
        return NULL;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
  
 +/**
 + * ring_buffer_peek - peek at the next event to be read
 + * @buffer: The ring buffer to read
 + * @cpu: The cpu to peak at
 + * @ts: The timestamp counter of this event.
 + *
 + * This will return the event that will be read next, but does
 + * not consume the data.
 + */
 +struct ring_buffer_event *
 +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 +      struct ring_buffer_event *event;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_buffer_peek(buffer, cpu, ts);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return event;
 +}
 +
 +/**
 + * ring_buffer_iter_peek - peek at the next event to be read
 + * @iter: The ring buffer iterator
 + * @ts: The timestamp counter of this event.
 + *
 + * This will return the event that will be read next, but does
 + * not increment the iterator.
 + */
 +struct ring_buffer_event *
 +ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      struct ring_buffer_event *event;
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_iter_peek(iter, ts);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return event;
 +}
 +
  /**
   * ring_buffer_consume - return an event and consume it
   * @buffer: The ring buffer to get the next event from
  struct ring_buffer_event *
  ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  {
 -      struct ring_buffer_per_cpu *cpu_buffer;
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct ring_buffer_event *event;
 +      unsigned long flags;
  
        if (!cpu_isset(cpu, buffer->cpumask))
                return NULL;
  
 -      event = ring_buffer_peek(buffer, cpu, ts);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      event = rb_buffer_peek(buffer, cpu, ts);
        if (!event)
 -              return NULL;
 +              goto out;
  
 -      cpu_buffer = buffer->buffers[cpu];
        rb_advance_reader(cpu_buffer);
  
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
        return event;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  
  /**
   * ring_buffer_read_start - start a non consuming read of the buffer
@@@ -2051,14 -1953,13 +2076,15 @@@ ring_buffer_read_start(struct ring_buff
        atomic_inc(&cpu_buffer->record_disabled);
        synchronize_sched();
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 -      ring_buffer_iter_reset(iter);
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      __raw_spin_lock(&cpu_buffer->lock);
 +      rb_iter_reset(iter);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  
        return iter;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_read_start);
  
  /**
   * ring_buffer_finish - finish reading the iterator of the buffer
@@@ -2075,6 -1976,7 +2101,7 @@@ ring_buffer_read_finish(struct ring_buf
        atomic_dec(&cpu_buffer->record_disabled);
        kfree(iter);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
  
  /**
   * ring_buffer_read - read the next item in the ring buffer by the iterator
@@@ -2087,20 -1989,16 +2114,21 @@@ struct ring_buffer_event 
  ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  {
        struct ring_buffer_event *event;
 +      struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 +      unsigned long flags;
  
 -      event = ring_buffer_iter_peek(iter, ts);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +      event = rb_iter_peek(iter, ts);
        if (!event)
 -              return NULL;
 +              goto out;
  
        rb_advance_iter(iter);
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  
        return event;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_read);
  
  /**
   * ring_buffer_size - return the size of the ring buffer (in bytes)
@@@ -2110,6 -2008,7 +2138,7 @@@ unsigned long ring_buffer_size(struct r
  {
        return BUF_PAGE_SIZE * buffer->pages;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_size);
  
  static void
  rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
        local_set(&cpu_buffer->head_page->write, 0);
 -      local_set(&cpu_buffer->head_page->commit, 0);
 +      local_set(&cpu_buffer->head_page->page->commit, 0);
  
        cpu_buffer->head_page->read = 0;
  
  
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        local_set(&cpu_buffer->reader_page->write, 0);
 -      local_set(&cpu_buffer->reader_page->commit, 0);
 +      local_set(&cpu_buffer->reader_page->page->commit, 0);
        cpu_buffer->reader_page->read = 0;
  
        cpu_buffer->overrun = 0;
@@@ -2146,16 -2045,13 +2175,17 @@@ void ring_buffer_reset_cpu(struct ring_
        if (!cpu_isset(cpu, buffer->cpumask))
                return;
  
 -      spin_lock_irqsave(&cpu_buffer->lock, flags);
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      __raw_spin_lock(&cpu_buffer->lock);
  
        rb_reset_cpu(cpu_buffer);
  
 -      spin_unlock_irqrestore(&cpu_buffer->lock, flags);
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
  
  /**
   * ring_buffer_reset - reset a ring buffer
@@@ -2168,6 -2064,7 +2198,7 @@@ void ring_buffer_reset(struct ring_buff
        for_each_buffer_cpu(buffer, cpu)
                ring_buffer_reset_cpu(buffer, cpu);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset);
  
  /**
   * rind_buffer_empty - is the ring buffer empty?
@@@ -2186,6 -2083,7 +2217,7 @@@ int ring_buffer_empty(struct ring_buffe
        }
        return 1;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_empty);
  
  /**
   * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
@@@ -2202,6 -2100,7 +2234,7 @@@ int ring_buffer_empty_cpu(struct ring_b
        cpu_buffer = buffer->buffers[cpu];
        return rb_per_cpu_empty(cpu_buffer);
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
  
  /**
   * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
@@@ -2224,7 -2123,8 +2257,7 @@@ int ring_buffer_swap_cpu(struct ring_bu
                return -EINVAL;
  
        /* At least make sure the two buffers are somewhat the same */
 -      if (buffer_a->size != buffer_b->size ||
 -          buffer_a->pages != buffer_b->pages)
 +      if (buffer_a->pages != buffer_b->pages)
                return -EINVAL;
  
        cpu_buffer_a = buffer_a->buffers[cpu];
  
        return 0;
  }
+ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
  
 +static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 +                            struct buffer_data_page *bpage)
 +{
 +      struct ring_buffer_event *event;
 +      unsigned long head;
 +
 +      __raw_spin_lock(&cpu_buffer->lock);
 +      for (head = 0; head < local_read(&bpage->commit);
 +           head += rb_event_length(event)) {
 +
 +              event = __rb_data_page_index(bpage, head);
 +              if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 +                      return;
 +              /* Only count data entries */
 +              if (event->type != RINGBUF_TYPE_DATA)
 +                      continue;
 +              cpu_buffer->entries--;
 +      }
 +      __raw_spin_unlock(&cpu_buffer->lock);
 +}
 +
 +/**
 + * ring_buffer_alloc_read_page - allocate a page to read from buffer
 + * @buffer: the buffer to allocate for.
 + *
 + * This function is used in conjunction with ring_buffer_read_page.
 + * When reading a full page from the ring buffer, these functions
 + * can be used to speed up the process. The calling function should
 + * allocate a few pages first with this function. Then when it
 + * needs to get pages from the ring buffer, it passes the result
 + * of this function into ring_buffer_read_page, which will swap
 + * the page that was allocated, with the read page of the buffer.
 + *
 + * Returns:
 + *  The page allocated, or NULL on error.
 + */
 +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
 +{
 +      unsigned long addr;
 +      struct buffer_data_page *bpage;
 +
 +      addr = __get_free_page(GFP_KERNEL);
 +      if (!addr)
 +              return NULL;
 +
 +      bpage = (void *)addr;
 +
 +      return bpage;
 +}
 +
 +/**
 + * ring_buffer_free_read_page - free an allocated read page
 + * @buffer: the buffer the page was allocate for
 + * @data: the page to free
 + *
 + * Free a page allocated from ring_buffer_alloc_read_page.
 + */
 +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
 +{
 +      free_page((unsigned long)data);
 +}
 +
 +/**
 + * ring_buffer_read_page - extract a page from the ring buffer
 + * @buffer: buffer to extract from
 + * @data_page: the page to use allocated from ring_buffer_alloc_read_page
 + * @cpu: the cpu of the buffer to extract
 + * @full: should the extraction only happen when the page is full.
 + *
 + * This function will pull out a page from the ring buffer and consume it.
 + * @data_page must be the address of the variable that was returned
 + * from ring_buffer_alloc_read_page. This is because the page might be used
 + * to swap with a page in the ring buffer.
 + *
 + * for example:
 + *    rpage = ring_buffer_alloc_page(buffer);
 + *    if (!rpage)
 + *            return error;
 + *    ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
 + *    if (ret)
 + *            process_page(rpage);
 + *
 + * When @full is set, the function will not return true unless
 + * the writer is off the reader page.
 + *
 + * Note: it is up to the calling functions to handle sleeps and wakeups.
 + *  The ring buffer can be used anywhere in the kernel and can not
 + *  blindly call wake_up. The layer that uses the ring buffer must be
 + *  responsible for that.
 + *
 + * Returns:
 + *  1 if data has been transferred
 + *  0 if no data has been transferred.
 + */
 +int ring_buffer_read_page(struct ring_buffer *buffer,
 +                          void **data_page, int cpu, int full)
 +{
 +      struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 +      struct ring_buffer_event *event;
 +      struct buffer_data_page *bpage;
 +      unsigned long flags;
 +      int ret = 0;
 +
 +      if (!data_page)
 +              return 0;
 +
 +      bpage = *data_page;
 +      if (!bpage)
 +              return 0;
 +
 +      spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 +
 +      /*
 +       * rb_buffer_peek will get the next ring buffer if
 +       * the current reader page is empty.
 +       */
 +      event = rb_buffer_peek(buffer, cpu, NULL);
 +      if (!event)
 +              goto out;
 +
 +      /* check for data */
 +      if (!local_read(&cpu_buffer->reader_page->page->commit))
 +              goto out;
 +      /*
 +       * If the writer is already off of the read page, then simply
 +       * switch the read page with the given page. Otherwise
 +       * we need to copy the data from the reader to the writer.
 +       */
 +      if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
 +              unsigned int read = cpu_buffer->reader_page->read;
 +
 +              if (full)
 +                      goto out;
 +              /* The writer is still on the reader page, we must copy */
 +              bpage = cpu_buffer->reader_page->page;
 +              memcpy(bpage->data,
 +                     cpu_buffer->reader_page->page->data + read,
 +                     local_read(&bpage->commit) - read);
 +
 +              /* consume what was read */
 +              cpu_buffer->reader_page += read;
 +
 +      } else {
 +              /* swap the pages */
 +              rb_init_page(bpage);
 +              bpage = cpu_buffer->reader_page->page;
 +              cpu_buffer->reader_page->page = *data_page;
 +              cpu_buffer->reader_page->read = 0;
 +              *data_page = bpage;
 +      }
 +      ret = 1;
 +
 +      /* update the entry counter */
 +      rb_remove_entries(cpu_buffer, bpage);
 + out:
 +      spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 +
 +      return ret;
 +}
 +
  static ssize_t
  rb_simple_read(struct file *filp, char __user *ubuf,
               size_t cnt, loff_t *ppos)
  {
 -      int *p = filp->private_data;
 +      long *p = filp->private_data;
        char buf[64];
        int r;
  
 -      /* !ring_buffers_off == tracing_on */
 -      r = sprintf(buf, "%d\n", !*p);
 +      if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
 +              r = sprintf(buf, "permanently disabled\n");
 +      else
 +              r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
  
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
@@@ -2431,7 -2170,7 +2465,7 @@@ static ssize_
  rb_simple_write(struct file *filp, const char __user *ubuf,
                size_t cnt, loff_t *ppos)
  {
 -      int *p = filp->private_data;
 +      long *p = filp->private_data;
        char buf[64];
        long val;
        int ret;
        if (ret < 0)
                return ret;
  
 -      /* !ring_buffers_off == tracing_on */
 -      *p = !val;
 +      if (val)
 +              set_bit(RB_BUFFERS_ON_BIT, p);
 +      else
 +              clear_bit(RB_BUFFERS_ON_BIT, p);
  
        (*ppos)++;
  
@@@ -2473,7 -2210,7 +2507,7 @@@ static __init int rb_init_debugfs(void
        d_tracer = tracing_init_dentry();
  
        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
 -                                  &ring_buffers_off, &rb_simple_fops);
 +                                  &ring_buffer_flags, &rb_simple_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'tracing_on' entry\n");
  
diff --combined kernel/trace/trace.c
index f4bb3800318bd7a777efa47eb4cfc01032f0ba85,a96b335fe75ca80bf4fcbdc75d5575a5c19a88d7..3608f6cb2f7ad5918d45d5887737a7c7c0e6b50c
@@@ -30,7 -30,6 +30,7 @@@
  #include <linux/gfp.h>
  #include <linux/fs.h>
  #include <linux/kprobes.h>
 +#include <linux/seq_file.h>
  #include <linux/writeback.h>
  
  #include <linux/stacktrace.h>
  unsigned long __read_mostly   tracing_max_latency = (cycle_t)ULONG_MAX;
  unsigned long __read_mostly   tracing_thresh;
  
 +/*
 + * We need to change this state when a selftest is running.
 + * A selftest will lurk into the ring-buffer to count the
 + * entries inserted during the selftest although some concurrent
 + * insertions into the ring-buffer such as ftrace_printk could occurred
 + * at the same time, giving false positive or negative results.
 + */
 +static bool __read_mostly tracing_selftest_running;
 +
 +/* For tracers that don't implement custom flags */
 +static struct tracer_opt dummy_tracer_opt[] = {
 +      { }
 +};
 +
 +static struct tracer_flags dummy_tracer_flags = {
 +      .val = 0,
 +      .opts = dummy_tracer_opt
 +};
 +
 +static int dummy_set_flag(u32 old_flags, u32 bit, int set)
 +{
 +      return 0;
 +}
 +
 +/*
 + * Kill all tracing for good (never come back).
 + * It is initialized to 1 but will turn to zero if the initialization
 + * of the tracer is successful. But that is the only place that sets
 + * this back to zero.
 + */
 +int tracing_disabled = 1;
 +
  static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
  
  static inline void ftrace_disable_cpu(void)
@@@ -95,36 -62,7 +95,36 @@@ static cpumask_t __read_mostly              tracing
  #define for_each_tracing_cpu(cpu)     \
        for_each_cpu_mask(cpu, tracing_buffer_mask)
  
 -static int tracing_disabled = 1;
 +/*
 + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 + *
 + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 + * is set, then ftrace_dump is called. This will output the contents
 + * of the ftrace buffers to the console.  This is very useful for
 + * capturing traces that lead to crashes and outputing it to a
 + * serial console.
 + *
 + * It is default off, but you can enable it with either specifying
 + * "ftrace_dump_on_oops" in the kernel command line, or setting
 + * /proc/sys/kernel/ftrace_dump_on_oops to true.
 + */
 +int ftrace_dump_on_oops;
 +
 +static int tracing_set_tracer(char *buf);
 +
 +static int __init set_ftrace(char *str)
 +{
 +      tracing_set_tracer(str);
 +      return 1;
 +}
 +__setup("ftrace", set_ftrace);
 +
 +static int __init set_ftrace_dump_on_oops(char *str)
 +{
 +      ftrace_dump_on_oops = 1;
 +      return 1;
 +}
 +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
  
  long
  ns2usecs(cycle_t nsec)
@@@ -174,19 -112,6 +174,19 @@@ static DEFINE_PER_CPU(struct trace_arra
  /* tracer_enabled is used to toggle activation of a tracer */
  static int                    tracer_enabled = 1;
  
 +/**
 + * tracing_is_enabled - return tracer_enabled status
 + *
 + * This function is used by other tracers to know the status
 + * of the tracer_enabled flag.  Tracers may use this function
 + * to know if it should enable their features when starting
 + * up. See irqsoff tracer for an example (start_irqsoff_tracer).
 + */
 +int tracing_is_enabled(void)
 +{
 +      return tracer_enabled;
 +}
 +
  /* function tracing enabled */
  int                           ftrace_function_enabled;
  
@@@ -228,9 -153,8 +228,9 @@@ static DEFINE_MUTEX(trace_types_lock)
  /* trace_wait is a waitqueue for tasks blocked on trace_poll */
  static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
  
 -/* trace_flags holds iter_ctrl options */
 -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
 +/* trace_flags holds trace_options default values */
 +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 +      TRACE_ITER_ANNOTATE;
  
  /**
   * trace_wake_up - wake up tasks waiting for trace input
@@@ -269,6 -193,13 +269,6 @@@ unsigned long nsecs_to_usecs(unsigned l
        return nsecs / 1000;
  }
  
 -/*
 - * TRACE_ITER_SYM_MASK masks the options in trace_flags that
 - * control the output of kernel symbols.
 - */
 -#define TRACE_ITER_SYM_MASK \
 -      (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
 -
  /* These must match the bit postions in trace_iterator_flags */
  static const char *trace_options[] = {
        "print-parent",
        "stacktrace",
        "sched-tree",
        "ftrace_printk",
 +      "ftrace_preempt",
 +      "branch",
 +      "annotate",
 +      "userstacktrace",
 +      "sym-userobj",
 +      "printk-msg-only",
        NULL
  };
  
@@@ -321,7 -246,7 +321,7 @@@ __update_max_tr(struct trace_array *tr
  
        memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
        data->pid = tsk->pid;
 -      data->uid = tsk->uid;
 +      data->uid = task_uid(tsk);
        data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
        data->policy = tsk->policy;
        data->rt_priority = tsk->rt_priority;
@@@ -434,28 -359,6 +434,28 @@@ trace_seq_putmem_hex(struct trace_seq *
        return trace_seq_putmem(s, hex, j);
  }
  
 +static int
 +trace_seq_path(struct trace_seq *s, struct path *path)
 +{
 +      unsigned char *p;
 +
 +      if (s->len >= (PAGE_SIZE - 1))
 +              return 0;
 +      p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
 +      if (!IS_ERR(p)) {
 +              p = mangle_path(s->buffer + s->len, p, "\n");
 +              if (p) {
 +                      s->len = p - s->buffer;
 +                      return 1;
 +              }
 +      } else {
 +              s->buffer[s->len++] = '?';
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
  static void
  trace_seq_reset(struct trace_seq *s)
  {
@@@ -567,17 -470,7 +567,17 @@@ int register_tracer(struct tracer *type
                return -1;
        }
  
 +      /*
 +       * When this gets called we hold the BKL which means that
 +       * preemption is disabled. Various trace selftests however
 +       * need to disable and enable preemption for successful tests.
 +       * So we drop the BKL here and grab it after the tests again.
 +       */
 +      unlock_kernel();
        mutex_lock(&trace_types_lock);
 +
 +      tracing_selftest_running = true;
 +
        for (t = trace_types; t; t = t->next) {
                if (strcmp(type->name, t->name) == 0) {
                        /* already found */
                }
        }
  
 +      if (!type->set_flag)
 +              type->set_flag = &dummy_set_flag;
 +      if (!type->flags)
 +              type->flags = &dummy_tracer_flags;
 +      else
 +              if (!type->flags->opts)
 +                      type->flags->opts = dummy_tracer_opt;
 +
  #ifdef CONFIG_FTRACE_STARTUP_TEST
        if (type->selftest) {
                struct tracer *saved_tracer = current_trace;
                struct trace_array *tr = &global_trace;
 -              int saved_ctrl = tr->ctrl;
                int i;
 +
                /*
                 * Run a selftest on this tracer.
                 * Here we reset the trace buffer, and set the current
                 * internal tracing to verify that everything is in order.
                 * If we fail, we do not register this tracer.
                 */
 -              for_each_tracing_cpu(i) {
 +              for_each_tracing_cpu(i)
                        tracing_reset(tr, i);
 -              }
 +
                current_trace = type;
 -              tr->ctrl = 0;
                /* the test is responsible for initializing and enabling */
                pr_info("Testing tracer %s: ", type->name);
                ret = type->selftest(type, tr);
                /* the test is responsible for resetting too */
                current_trace = saved_tracer;
 -              tr->ctrl = saved_ctrl;
                if (ret) {
                        printk(KERN_CONT "FAILED!\n");
                        goto out;
                }
                /* Only reset on passing, to avoid touching corrupted buffers */
 -              for_each_tracing_cpu(i) {
 +              for_each_tracing_cpu(i)
                        tracing_reset(tr, i);
 -              }
 +
                printk(KERN_CONT "PASSED\n");
        }
  #endif
                max_tracer_type_len = len;
  
   out:
 +      tracing_selftest_running = false;
        mutex_unlock(&trace_types_lock);
 +      lock_kernel();
  
        return ret;
  }
@@@ -679,16 -564,6 +679,16 @@@ void tracing_reset(struct trace_array *
        ftrace_enable_cpu();
  }
  
 +void tracing_reset_online_cpus(struct trace_array *tr)
 +{
 +      int cpu;
 +
 +      tr->time_start = ftrace_now(tr->cpu);
 +
 +      for_each_online_cpu(cpu)
 +              tracing_reset(tr, cpu);
 +}
 +
  #define SAVED_CMDLINES 128
  static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
  static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
@@@ -706,91 -581,6 +706,91 @@@ static void trace_init_cmdlines(void
        cmdline_idx = 0;
  }
  
 +static int trace_stop_count;
 +static DEFINE_SPINLOCK(tracing_start_lock);
 +
 +/**
 + * ftrace_off_permanent - disable all ftrace code permanently
 + *
 + * This should only be called when a serious anomally has
 + * been detected.  This will turn off the function tracing,
 + * ring buffers, and other tracing utilites. It takes no
 + * locks and can be called from any context.
 + */
 +void ftrace_off_permanent(void)
 +{
 +      tracing_disabled = 1;
 +      ftrace_stop();
 +      tracing_off_permanent();
 +}
 +
 +/**
 + * tracing_start - quick start of the tracer
 + *
 + * If tracing is enabled but was stopped by tracing_stop,
 + * this will start the tracer back up.
 + */
 +void tracing_start(void)
 +{
 +      struct ring_buffer *buffer;
 +      unsigned long flags;
 +
 +      if (tracing_disabled)
 +              return;
 +
 +      spin_lock_irqsave(&tracing_start_lock, flags);
 +      if (--trace_stop_count)
 +              goto out;
 +
 +      if (trace_stop_count < 0) {
 +              /* Someone screwed up their debugging */
 +              WARN_ON_ONCE(1);
 +              trace_stop_count = 0;
 +              goto out;
 +      }
 +
 +
 +      buffer = global_trace.buffer;
 +      if (buffer)
 +              ring_buffer_record_enable(buffer);
 +
 +      buffer = max_tr.buffer;
 +      if (buffer)
 +              ring_buffer_record_enable(buffer);
 +
 +      ftrace_start();
 + out:
 +      spin_unlock_irqrestore(&tracing_start_lock, flags);
 +}
 +
 +/**
 + * tracing_stop - quick stop of the tracer
 + *
 + * Light weight way to stop tracing. Use in conjunction with
 + * tracing_start.
 + */
 +void tracing_stop(void)
 +{
 +      struct ring_buffer *buffer;
 +      unsigned long flags;
 +
 +      ftrace_stop();
 +      spin_lock_irqsave(&tracing_start_lock, flags);
 +      if (trace_stop_count++)
 +              goto out;
 +
 +      buffer = global_trace.buffer;
 +      if (buffer)
 +              ring_buffer_record_disable(buffer);
 +
 +      buffer = max_tr.buffer;
 +      if (buffer)
 +              ring_buffer_record_disable(buffer);
 +
 + out:
 +      spin_unlock_irqrestore(&tracing_start_lock, flags);
 +}
 +
  void trace_stop_cmdline_recording(void);
  
  static void trace_save_cmdline(struct task_struct *tsk)
        spin_unlock(&trace_cmdline_lock);
  }
  
 -static char *trace_find_cmdline(int pid)
 +char *trace_find_cmdline(int pid)
  {
        char *cmdline = "<...>";
        unsigned map;
@@@ -865,7 -655,6 +865,7 @@@ tracing_generic_entry_update(struct tra
  
        entry->preempt_count            = pc & 0xff;
        entry->pid                      = (tsk) ? tsk->pid : 0;
 +      entry->tgid                     = (tsk) ? tsk->tgid : 0;
        entry->flags =
  #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@@ -902,56 -691,6 +902,56 @@@ trace_function(struct trace_array *tr, 
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
  }
  
 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 +static void __trace_graph_entry(struct trace_array *tr,
 +                              struct trace_array_cpu *data,
 +                              struct ftrace_graph_ent *trace,
 +                              unsigned long flags,
 +                              int pc)
 +{
 +      struct ring_buffer_event *event;
 +      struct ftrace_graph_ent_entry *entry;
 +      unsigned long irq_flags;
 +
 +      if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
 +              return;
 +
 +      event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
 +                                       &irq_flags);
 +      if (!event)
 +              return;
 +      entry   = ring_buffer_event_data(event);
 +      tracing_generic_entry_update(&entry->ent, flags, pc);
 +      entry->ent.type                 = TRACE_GRAPH_ENT;
 +      entry->graph_ent                        = *trace;
 +      ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
 +}
 +
 +static void __trace_graph_return(struct trace_array *tr,
 +                              struct trace_array_cpu *data,
 +                              struct ftrace_graph_ret *trace,
 +                              unsigned long flags,
 +                              int pc)
 +{
 +      struct ring_buffer_event *event;
 +      struct ftrace_graph_ret_entry *entry;
 +      unsigned long irq_flags;
 +
 +      if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
 +              return;
 +
 +      event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
 +                                       &irq_flags);
 +      if (!event)
 +              return;
 +      entry   = ring_buffer_event_data(event);
 +      tracing_generic_entry_update(&entry->ent, flags, pc);
 +      entry->ent.type                 = TRACE_GRAPH_RET;
 +      entry->ret                              = *trace;
 +      ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
 +}
 +#endif
 +
  void
  ftrace(struct trace_array *tr, struct trace_array_cpu *data,
         unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@@ -1003,46 -742,6 +1003,46 @@@ void __trace_stack(struct trace_array *
        ftrace_trace_stack(tr, data, flags, skip, preempt_count());
  }
  
 +static void ftrace_trace_userstack(struct trace_array *tr,
 +                 struct trace_array_cpu *data,
 +                 unsigned long flags, int pc)
 +{
 +#ifdef CONFIG_STACKTRACE
 +      struct ring_buffer_event *event;
 +      struct userstack_entry *entry;
 +      struct stack_trace trace;
 +      unsigned long irq_flags;
 +
 +      if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
 +              return;
 +
 +      event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 +                                       &irq_flags);
 +      if (!event)
 +              return;
 +      entry   = ring_buffer_event_data(event);
 +      tracing_generic_entry_update(&entry->ent, flags, pc);
 +      entry->ent.type         = TRACE_USER_STACK;
 +
 +      memset(&entry->caller, 0, sizeof(entry->caller));
 +
 +      trace.nr_entries        = 0;
 +      trace.max_entries       = FTRACE_STACK_ENTRIES;
 +      trace.skip              = 0;
 +      trace.entries           = entry->caller;
 +
 +      save_stack_trace_user(&trace);
 +      ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 +#endif
 +}
 +
 +void __trace_userstack(struct trace_array *tr,
 +                 struct trace_array_cpu *data,
 +                 unsigned long flags)
 +{
 +      ftrace_trace_userstack(tr, data, flags, preempt_count());
 +}
 +
  static void
  ftrace_trace_special(void *__tr, void *__data,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
        entry->arg3                     = arg3;
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, irq_flags, 4, pc);
 +      ftrace_trace_userstack(tr, data, irq_flags, pc);
  
        trace_wake_up();
  }
@@@ -1105,7 -803,6 +1105,7 @@@ tracing_sched_switch_trace(struct trace
        entry->next_cpu = task_cpu(next);
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, flags, 5, pc);
 +      ftrace_trace_userstack(tr, data, flags, pc);
  }
  
  void
@@@ -1135,7 -832,6 +1135,7 @@@ tracing_sched_wakeup_trace(struct trace
        entry->next_cpu                 = task_cpu(wakee);
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
        ftrace_trace_stack(tr, data, flags, 6, pc);
 +      ftrace_trace_userstack(tr, data, flags, pc);
  
        trace_wake_up();
  }
@@@ -1145,28 -841,26 +1145,28 @@@ ftrace_special(unsigned long arg1, unsi
  {
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
 +      unsigned long flags;
        int cpu;
        int pc;
  
 -      if (tracing_disabled || !tr->ctrl)
 +      if (tracing_disabled)
                return;
  
        pc = preempt_count();
 -      preempt_disable_notrace();
 +      local_irq_save(flags);
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
  
 -      if (likely(!atomic_read(&data->disabled)))
 +      if (likely(atomic_inc_return(&data->disabled) == 1))
                ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
  
 -      preempt_enable_notrace();
 +      atomic_dec(&data->disabled);
 +      local_irq_restore(flags);
  }
  
  #ifdef CONFIG_FUNCTION_TRACER
  static void
 -function_trace_call(unsigned long ip, unsigned long parent_ip)
 +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
  {
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
                return;
  
        pc = preempt_count();
 -      resched = need_resched();
 -      preempt_disable_notrace();
 +      resched = ftrace_preempt_disable();
        local_save_flags(flags);
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
                trace_function(tr, data, ip, parent_ip, flags, pc);
  
        atomic_dec(&data->disabled);
 -      if (resched)
 -              preempt_enable_no_resched_notrace();
 -      else
 -              preempt_enable_notrace();
 +      ftrace_preempt_enable(resched);
  }
  
 +static void
 +function_trace_call(unsigned long ip, unsigned long parent_ip)
 +{
 +      struct trace_array *tr = &global_trace;
 +      struct trace_array_cpu *data;
 +      unsigned long flags;
 +      long disabled;
 +      int cpu;
 +      int pc;
 +
 +      if (unlikely(!ftrace_function_enabled))
 +              return;
 +
 +      /*
 +       * Need to use raw, since this must be called before the
 +       * recursive protection is performed.
 +       */
 +      local_irq_save(flags);
 +      cpu = raw_smp_processor_id();
 +      data = tr->data[cpu];
 +      disabled = atomic_inc_return(&data->disabled);
 +
 +      if (likely(disabled == 1)) {
 +              pc = preempt_count();
 +              trace_function(tr, data, ip, parent_ip, flags, pc);
 +      }
 +
 +      atomic_dec(&data->disabled);
 +      local_irq_restore(flags);
 +}
 +
 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 +int trace_graph_entry(struct ftrace_graph_ent *trace)
 +{
 +      struct trace_array *tr = &global_trace;
 +      struct trace_array_cpu *data;
 +      unsigned long flags;
 +      long disabled;
 +      int cpu;
 +      int pc;
 +
 +      if (!ftrace_trace_task(current))
 +              return 0;
 +
 +      if (!ftrace_graph_addr(trace->func))
 +              return 0;
 +
 +      local_irq_save(flags);
 +      cpu = raw_smp_processor_id();
 +      data = tr->data[cpu];
 +      disabled = atomic_inc_return(&data->disabled);
 +      if (likely(disabled == 1)) {
 +              pc = preempt_count();
 +              __trace_graph_entry(tr, data, trace, flags, pc);
 +      }
 +      /* Only do the atomic if it is not already set */
 +      if (!test_tsk_trace_graph(current))
 +              set_tsk_trace_graph(current);
 +      atomic_dec(&data->disabled);
 +      local_irq_restore(flags);
 +
 +      return 1;
 +}
 +
 +void trace_graph_return(struct ftrace_graph_ret *trace)
 +{
 +      struct trace_array *tr = &global_trace;
 +      struct trace_array_cpu *data;
 +      unsigned long flags;
 +      long disabled;
 +      int cpu;
 +      int pc;
 +
 +      local_irq_save(flags);
 +      cpu = raw_smp_processor_id();
 +      data = tr->data[cpu];
 +      disabled = atomic_inc_return(&data->disabled);
 +      if (likely(disabled == 1)) {
 +              pc = preempt_count();
 +              __trace_graph_return(tr, data, trace, flags, pc);
 +      }
 +      if (!trace->depth)
 +              clear_tsk_trace_graph(current);
 +      atomic_dec(&data->disabled);
 +      local_irq_restore(flags);
 +}
 +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 +
  static struct ftrace_ops trace_ops __read_mostly =
  {
        .func = function_trace_call,
  void tracing_start_function_trace(void)
  {
        ftrace_function_enabled = 0;
 +
 +      if (trace_flags & TRACE_ITER_PREEMPTONLY)
 +              trace_ops.func = function_trace_call_preempt_only;
 +      else
 +              trace_ops.func = function_trace_call;
 +
        register_ftrace_function(&trace_ops);
 -      if (tracer_enabled)
 -              ftrace_function_enabled = 1;
 +      ftrace_function_enabled = 1;
  }
  
  void tracing_stop_function_trace(void)
  
  enum trace_file_type {
        TRACE_FILE_LAT_FMT      = 1,
 +      TRACE_FILE_ANNOTATE     = 2,
  };
  
- static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ static void trace_iterator_increment(struct trace_iterator *iter)
  {
        /* Don't allow ftrace to trace into the ring buffers */
        ftrace_disable_cpu();
@@@ -1389,7 -993,7 +1389,7 @@@ static void *find_next_entry_inc(struc
        iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
  
        if (iter->ent)
-               trace_iterator_increment(iter, iter->cpu);
+               trace_iterator_increment(iter);
  
        return iter->ent ? iter : NULL;
  }
@@@ -1443,6 -1047,10 +1443,6 @@@ static void *s_start(struct seq_file *m
  
        atomic_inc(&trace_record_cmdline_disabled);
  
 -      /* let the tracer grab locks here if needed */
 -      if (current_trace->start)
 -              current_trace->start(iter);
 -
        if (*pos != iter->pos) {
                iter->ent = NULL;
                iter->cpu = 0;
  
  static void s_stop(struct seq_file *m, void *p)
  {
 -      struct trace_iterator *iter = m->private;
 -
        atomic_dec(&trace_record_cmdline_disabled);
 -
 -      /* let the tracer release locks here if needed */
 -      if (current_trace && current_trace == iter->trace && iter->trace->stop)
 -              iter->trace->stop(iter);
 -
        mutex_unlock(&trace_types_lock);
  }
  
@@@ -1528,7 -1143,7 +1528,7 @@@ seq_print_sym_offset(struct trace_seq *
  # define IP_FMT "%016lx"
  #endif
  
 -static int
 +int
  seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
  {
        int ret;
        return ret;
  }
  
 +static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 +                                  unsigned long ip, unsigned long sym_flags)
 +{
 +      struct file *file = NULL;
 +      unsigned long vmstart = 0;
 +      int ret = 1;
 +
 +      if (mm) {
 +              const struct vm_area_struct *vma;
 +
 +              down_read(&mm->mmap_sem);
 +              vma = find_vma(mm, ip);
 +              if (vma) {
 +                      file = vma->vm_file;
 +                      vmstart = vma->vm_start;
 +              }
 +              if (file) {
 +                      ret = trace_seq_path(s, &file->f_path);
 +                      if (ret)
 +                              ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
 +              }
 +              up_read(&mm->mmap_sem);
 +      }
 +      if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
 +              ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
 +      return ret;
 +}
 +
 +static int
 +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 +                    unsigned long sym_flags)
 +{
 +      struct mm_struct *mm = NULL;
 +      int ret = 1;
 +      unsigned int i;
 +
 +      if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
 +              struct task_struct *task;
 +              /*
 +               * we do the lookup on the thread group leader,
 +               * since individual threads might have already quit!
 +               */
 +              rcu_read_lock();
 +              task = find_task_by_vpid(entry->ent.tgid);
 +              if (task)
 +                      mm = get_task_mm(task);
 +              rcu_read_unlock();
 +      }
 +
 +      for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 +              unsigned long ip = entry->caller[i];
 +
 +              if (ip == ULONG_MAX || !ret)
 +                      break;
 +              if (i && ret)
 +                      ret = trace_seq_puts(s, " <- ");
 +              if (!ip) {
 +                      if (ret)
 +                              ret = trace_seq_puts(s, "??");
 +                      continue;
 +              }
 +              if (!ret)
 +                      break;
 +              if (ret)
 +                      ret = seq_print_user_ip(s, mm, ip, sym_flags);
 +      }
 +
 +      if (mm)
 +              mmput(mm);
 +      return ret;
 +}
 +
  static void print_lat_help_header(struct seq_file *m)
  {
        seq_puts(m, "#                  _------=> CPU#            \n");
@@@ -1758,13 -1301,6 +1758,13 @@@ lat_print_timestamp(struct trace_seq *s
  
  static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
  
 +static int task_state_char(unsigned long state)
 +{
 +      int bit = state ? __ffs(state) + 1 : 0;
 +
 +      return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
 +}
 +
  /*
   * The message is supposed to contain an ending newline.
   * If the printing stops prematurely, try to add a newline of our own.
@@@ -1802,23 -1338,6 +1802,23 @@@ void trace_seq_print_cont(struct trace_
                trace_seq_putc(s, '\n');
  }
  
 +static void test_cpu_buff_start(struct trace_iterator *iter)
 +{
 +      struct trace_seq *s = &iter->seq;
 +
 +      if (!(trace_flags & TRACE_ITER_ANNOTATE))
 +              return;
 +
 +      if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
 +              return;
 +
 +      if (cpu_isset(iter->cpu, iter->started))
 +              return;
 +
 +      cpu_set(iter->cpu, iter->started);
 +      trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
 +}
 +
  static enum print_line_t
  print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
  {
        char *comm;
        int S, T;
        int i;
 -      unsigned state;
  
        if (entry->type == TRACE_CONT)
                return TRACE_TYPE_HANDLED;
  
 +      test_cpu_buff_start(iter);
 +
        next_entry = find_next_entry(iter, NULL, &next_ts);
        if (!next_entry)
                next_ts = iter->ts;
  
                trace_assign_type(field, entry);
  
 -              T = field->next_state < sizeof(state_to_char) ?
 -                      state_to_char[field->next_state] : 'X';
 -
 -              state = field->prev_state ?
 -                      __ffs(field->prev_state) + 1 : 0;
 -              S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
 +              T = task_state_char(field->next_state);
 +              S = task_state_char(field->prev_state);
                comm = trace_find_cmdline(field->next_pid);
                trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
                                 field->prev_pid,
                        trace_seq_print_cont(s, iter);
                break;
        }
 +      case TRACE_BRANCH: {
 +              struct trace_branch *field;
 +
 +              trace_assign_type(field, entry);
 +
 +              trace_seq_printf(s, "[%s] %s:%s:%d\n",
 +                               field->correct ? "  ok  " : " MISS ",
 +                               field->func,
 +                               field->file,
 +                               field->line);
 +              break;
 +      }
 +      case TRACE_USER_STACK: {
 +              struct userstack_entry *field;
 +
 +              trace_assign_type(field, entry);
 +
 +              seq_print_userip_objs(field, s, sym_flags);
 +              trace_seq_putc(s, '\n');
 +              break;
 +      }
        default:
                trace_seq_printf(s, "Unknown type %d\n", entry->type);
        }
@@@ -1971,8 -1472,6 +1971,8 @@@ static enum print_line_t print_trace_fm
        if (entry->type == TRACE_CONT)
                return TRACE_TYPE_HANDLED;
  
 +      test_cpu_buff_start(iter);
 +
        comm = trace_find_cmdline(iter->ent->pid);
  
        t = ns2usecs(iter->ts);
  
                trace_assign_type(field, entry);
  
 -              S = field->prev_state < sizeof(state_to_char) ?
 -                      state_to_char[field->prev_state] : 'X';
 -              T = field->next_state < sizeof(state_to_char) ?
 -                      state_to_char[field->next_state] : 'X';
 +              T = task_state_char(field->next_state);
 +              S = task_state_char(field->prev_state);
                ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
                                       field->prev_pid,
                                       field->prev_prio,
                        trace_seq_print_cont(s, iter);
                break;
        }
 +      case TRACE_GRAPH_RET: {
 +              return print_graph_function(iter);
 +      }
 +      case TRACE_GRAPH_ENT: {
 +              return print_graph_function(iter);
 +      }
 +      case TRACE_BRANCH: {
 +              struct trace_branch *field;
 +
 +              trace_assign_type(field, entry);
 +
 +              trace_seq_printf(s, "[%s] %s:%s:%d\n",
 +                               field->correct ? "  ok  " : " MISS ",
 +                               field->func,
 +                               field->file,
 +                               field->line);
 +              break;
 +      }
 +      case TRACE_USER_STACK: {
 +              struct userstack_entry *field;
 +
 +              trace_assign_type(field, entry);
 +
 +              ret = seq_print_userip_objs(field, s, sym_flags);
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +              ret = trace_seq_putc(s, '\n');
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +              break;
 +      }
        }
        return TRACE_TYPE_HANDLED;
  }
@@@ -2151,9 -1621,12 +2151,9 @@@ static enum print_line_t print_raw_fmt(
  
                trace_assign_type(field, entry);
  
 -              S = field->prev_state < sizeof(state_to_char) ?
 -                      state_to_char[field->prev_state] : 'X';
 -              T = field->next_state < sizeof(state_to_char) ?
 -                      state_to_char[field->next_state] : 'X';
 -              if (entry->type == TRACE_WAKE)
 -                      S = '+';
 +              T = task_state_char(field->next_state);
 +              S = entry->type == TRACE_WAKE ? '+' :
 +                      task_state_char(field->prev_state);
                ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
                                       field->prev_pid,
                                       field->prev_prio,
                break;
        }
        case TRACE_SPECIAL:
 +      case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
  
@@@ -2240,9 -1712,12 +2240,9 @@@ static enum print_line_t print_hex_fmt(
  
                trace_assign_type(field, entry);
  
 -              S = field->prev_state < sizeof(state_to_char) ?
 -                      state_to_char[field->prev_state] : 'X';
 -              T = field->next_state < sizeof(state_to_char) ?
 -                      state_to_char[field->next_state] : 'X';
 -              if (entry->type == TRACE_WAKE)
 -                      S = '+';
 +              T = task_state_char(field->next_state);
 +              S = entry->type == TRACE_WAKE ? '+' :
 +                      task_state_char(field->prev_state);
                SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
                SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
                SEQ_PUT_HEX_FIELD_RET(s, S);
                break;
        }
        case TRACE_SPECIAL:
 +      case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
  
        return TRACE_TYPE_HANDLED;
  }
  
 +static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
 +{
 +      struct trace_seq *s = &iter->seq;
 +      struct trace_entry *entry = iter->ent;
 +      struct print_entry *field;
 +      int ret;
 +
 +      trace_assign_type(field, entry);
 +
 +      ret = trace_seq_printf(s, field->buf);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      if (entry->flags & TRACE_FLAG_CONT)
 +              trace_seq_print_cont(s, iter);
 +
 +      return TRACE_TYPE_HANDLED;
 +}
 +
  static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
  {
        struct trace_seq *s = &iter->seq;
                break;
        }
        case TRACE_SPECIAL:
 +      case TRACE_USER_STACK:
        case TRACE_STACK: {
                struct special_entry *field;
  
@@@ -2369,11 -1823,6 +2369,11 @@@ static enum print_line_t print_trace_li
                        return ret;
        }
  
 +      if (iter->ent->type == TRACE_PRINT &&
 +                      trace_flags & TRACE_ITER_PRINTK &&
 +                      trace_flags & TRACE_ITER_PRINTK_MSGONLY)
 +              return print_printk_msg_only(iter);
 +
        if (trace_flags & TRACE_ITER_BIN)
                return print_bin_fmt(iter);
  
@@@ -2398,9 -1847,7 +2398,9 @@@ static int s_show(struct seq_file *m, v
                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
                        seq_puts(m, "#\n");
                }
 -              if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
 +              if (iter->trace && iter->trace->print_header)
 +                      iter->trace->print_header(m);
 +              else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
                        /* print nothing if the buffers are empty */
                        if (trace_empty(iter))
                                return 0;
@@@ -2452,15 -1899,6 +2452,15 @@@ __tracing_open(struct inode *inode, str
        iter->trace = current_trace;
        iter->pos = -1;
  
 +      /* Notify the tracer early; before we stop tracing. */
 +      if (iter->trace && iter->trace->open)
 +              iter->trace->open(iter);
 +
 +      /* Annotate start of buffers if we had overruns */
 +      if (ring_buffer_overruns(iter->tr->buffer))
 +              iter->iter_flags |= TRACE_FILE_ANNOTATE;
 +
 +
        for_each_tracing_cpu(cpu) {
  
                iter->buffer_iter[cpu] =
        m->private = iter;
  
        /* stop the trace while dumping */
 -      if (iter->tr->ctrl) {
 -              tracer_enabled = 0;
 -              ftrace_function_enabled = 0;
 -      }
 -
 -      if (iter->trace && iter->trace->open)
 -                      iter->trace->open(iter);
 +      tracing_stop();
  
        mutex_unlock(&trace_types_lock);
  
@@@ -2522,7 -1966,14 +2522,7 @@@ int tracing_release(struct inode *inode
                iter->trace->close(iter);
  
        /* reenable tracing if it was previously enabled */
 -      if (iter->tr->ctrl) {
 -              tracer_enabled = 1;
 -              /*
 -               * It is safe to enable function tracing even if it
 -               * isn't used
 -               */
 -              ftrace_function_enabled = 1;
 -      }
 +      tracing_start();
        mutex_unlock(&trace_types_lock);
  
        seq_release(inode, file);
@@@ -2700,7 -2151,7 +2700,7 @@@ tracing_cpumask_write(struct file *filp
        if (err)
                goto err_unlock;
  
 -      raw_local_irq_disable();
 +      local_irq_disable();
        __raw_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
                /*
                }
        }
        __raw_spin_unlock(&ftrace_max_lock);
 -      raw_local_irq_enable();
 +      local_irq_enable();
  
        tracing_cpumask = tracing_cpumask_new;
  
@@@ -2738,16 -2189,13 +2738,16 @@@ static struct file_operations tracing_c
  };
  
  static ssize_t
 -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
 +tracing_trace_options_read(struct file *filp, char __user *ubuf,
                       size_t cnt, loff_t *ppos)
  {
 +      int i;
        char *buf;
        int r = 0;
        int len = 0;
 -      int i;
 +      u32 tracer_flags = current_trace->flags->val;
 +      struct tracer_opt *trace_opts = current_trace->flags->opts;
 +
  
        /* calulate max size */
        for (i = 0; trace_options[i]; i++) {
                len += 3; /* "no" and space */
        }
  
 +      /*
 +       * Increase the size with names of options specific
 +       * of the current tracer.
 +       */
 +      for (i = 0; trace_opts[i].name; i++) {
 +              len += strlen(trace_opts[i].name);
 +              len += 3; /* "no" and space */
 +      }
 +
        /* +2 for \n and \0 */
        buf = kmalloc(len + 2, GFP_KERNEL);
        if (!buf)
                        r += sprintf(buf + r, "no%s ", trace_options[i]);
        }
  
 +      for (i = 0; trace_opts[i].name; i++) {
 +              if (tracer_flags & trace_opts[i].bit)
 +                      r += sprintf(buf + r, "%s ",
 +                              trace_opts[i].name);
 +              else
 +                      r += sprintf(buf + r, "no%s ",
 +                              trace_opts[i].name);
 +      }
 +
        r += sprintf(buf + r, "\n");
        WARN_ON(r >= len + 2);
  
        return r;
  }
  
 +/* Try to assign a tracer specific option */
 +static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 +{
 +      struct tracer_flags *trace_flags = trace->flags;
 +      struct tracer_opt *opts = NULL;
 +      int ret = 0, i = 0;
 +      int len;
 +
 +      for (i = 0; trace_flags->opts[i].name; i++) {
 +              opts = &trace_flags->opts[i];
 +              len = strlen(opts->name);
 +
 +              if (strncmp(cmp, opts->name, len) == 0) {
 +                      ret = trace->set_flag(trace_flags->val,
 +                              opts->bit, !neg);
 +                      break;
 +              }
 +      }
 +      /* Not found */
 +      if (!trace_flags->opts[i].name)
 +              return -EINVAL;
 +
 +      /* Refused to handle */
 +      if (ret)
 +              return ret;
 +
 +      if (neg)
 +              trace_flags->val &= ~opts->bit;
 +      else
 +              trace_flags->val |= opts->bit;
 +
 +      return 0;
 +}
 +
  static ssize_t
 -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
 +tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
  {
        char buf[64];
        char *cmp = buf;
        int neg = 0;
 +      int ret;
        int i;
  
        if (cnt >= sizeof(buf))
                        break;
                }
        }
 -      /*
 -       * If no option could be set, return an error:
 -       */
 -      if (!trace_options[i])
 -              return -EINVAL;
 +
 +      /* If no option could be set, test the specific tracer options */
 +      if (!trace_options[i]) {
 +              ret = set_tracer_option(current_trace, cmp, neg);
 +              if (ret)
 +                      return ret;
 +      }
  
        filp->f_pos += cnt;
  
  
  static struct file_operations tracing_iter_fops = {
        .open           = tracing_open_generic,
 -      .read           = tracing_iter_ctrl_read,
 -      .write          = tracing_iter_ctrl_write,
 +      .read           = tracing_trace_options_read,
 +      .write          = tracing_trace_options_write,
  };
  
  static const char readme_msg[] =
        "# echo sched_switch > /debug/tracing/current_tracer\n"
        "# cat /debug/tracing/current_tracer\n"
        "sched_switch\n"
 -      "# cat /debug/tracing/iter_ctrl\n"
 +      "# cat /debug/tracing/trace_options\n"
        "noprint-parent nosym-offset nosym-addr noverbose\n"
 -      "# echo print-parent > /debug/tracing/iter_ctrl\n"
 +      "# echo print-parent > /debug/tracing/trace_options\n"
        "# echo 1 > /debug/tracing/tracing_enabled\n"
        "# cat /debug/tracing/trace > /tmp/trace.txt\n"
        "echo 0 > /debug/tracing/tracing_enabled\n"
@@@ -2918,10 -2311,11 +2918,10 @@@ static ssize_
  tracing_ctrl_read(struct file *filp, char __user *ubuf,
                  size_t cnt, loff_t *ppos)
  {
 -      struct trace_array *tr = filp->private_data;
        char buf[64];
        int r;
  
 -      r = sprintf(buf, "%ld\n", tr->ctrl);
 +      r = sprintf(buf, "%u\n", tracer_enabled);
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
  
@@@ -2949,18 -2343,16 +2949,18 @@@ tracing_ctrl_write(struct file *filp, c
        val = !!val;
  
        mutex_lock(&trace_types_lock);
 -      if (tr->ctrl ^ val) {
 -              if (val)
 +      if (tracer_enabled ^ val) {
 +              if (val) {
                        tracer_enabled = 1;
 -              else
 +                      if (current_trace->start)
 +                              current_trace->start(tr);
 +                      tracing_start();
 +              } else {
                        tracer_enabled = 0;
 -
 -              tr->ctrl = val;
 -
 -              if (current_trace && current_trace->ctrl_update)
 -                      current_trace->ctrl_update(tr);
 +                      tracing_stop();
 +                      if (current_trace->stop)
 +                              current_trace->stop(tr);
 +              }
        }
        mutex_unlock(&trace_types_lock);
  
@@@ -2986,11 -2378,29 +2986,11 @@@ tracing_set_trace_read(struct file *fil
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
  
 -static ssize_t
 -tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 -                      size_t cnt, loff_t *ppos)
 +static int tracing_set_tracer(char *buf)
  {
        struct trace_array *tr = &global_trace;
        struct tracer *t;
 -      char buf[max_tracer_type_len+1];
 -      int i;
 -      size_t ret;
 -
 -      ret = cnt;
 -
 -      if (cnt > max_tracer_type_len)
 -              cnt = max_tracer_type_len;
 -
 -      if (copy_from_user(&buf, ubuf, cnt))
 -              return -EFAULT;
 -
 -      buf[cnt] = 0;
 -
 -      /* strip ending whitespace. */
 -      for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
 -              buf[i] = 0;
 +      int ret = 0;
  
        mutex_lock(&trace_types_lock);
        for (t = trace_types; t; t = t->next) {
        if (t == current_trace)
                goto out;
  
 +      trace_branch_disable();
        if (current_trace && current_trace->reset)
                current_trace->reset(tr);
  
        current_trace = t;
 -      if (t->init)
 -              t->init(tr);
 +      if (t->init) {
 +              ret = t->init(tr);
 +              if (ret)
 +                      goto out;
 +      }
  
 +      trace_branch_enable(tr);
   out:
        mutex_unlock(&trace_types_lock);
  
 -      if (ret > 0)
 -              filp->f_pos += ret;
 +      return ret;
 +}
 +
 +static ssize_t
 +tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 +                      size_t cnt, loff_t *ppos)
 +{
 +      char buf[max_tracer_type_len+1];
 +      int i;
 +      size_t ret;
 +      int err;
 +
 +      ret = cnt;
 +
 +      if (cnt > max_tracer_type_len)
 +              cnt = max_tracer_type_len;
 +
 +      if (copy_from_user(&buf, ubuf, cnt))
 +              return -EFAULT;
 +
 +      buf[cnt] = 0;
 +
 +      /* strip ending whitespace. */
 +      for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
 +              buf[i] = 0;
 +
 +      err = tracing_set_tracer(buf);
 +      if (err)
 +              return err;
 +
 +      filp->f_pos += ret;
  
        return ret;
  }
@@@ -3116,10 -2492,6 +3116,10 @@@ static int tracing_open_pipe(struct ino
                return -ENOMEM;
  
        mutex_lock(&trace_types_lock);
 +
 +      /* trace pipe does not show start of buffer */
 +      cpus_setall(iter->started);
 +
        iter->tr = &global_trace;
        iter->trace = current_trace;
        filp->private_data = iter;
@@@ -3295,7 -2667,7 +3295,7 @@@ tracing_entries_read(struct file *filp
        char buf[64];
        int r;
  
 -      r = sprintf(buf, "%lu\n", tr->entries);
 +      r = sprintf(buf, "%lu\n", tr->entries >> 10);
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
  
@@@ -3306,6 -2678,7 +3306,6 @@@ tracing_entries_write(struct file *filp
        unsigned long val;
        char buf[64];
        int ret, cpu;
 -      struct trace_array *tr = filp->private_data;
  
        if (cnt >= sizeof(buf))
                return -EINVAL;
  
        mutex_lock(&trace_types_lock);
  
 -      if (tr->ctrl) {
 -              cnt = -EBUSY;
 -              pr_info("ftrace: please disable tracing"
 -                      " before modifying buffer size\n");
 -              goto out;
 -      }
 +      tracing_stop();
  
        /* disable all cpu buffers */
        for_each_tracing_cpu(cpu) {
                        atomic_inc(&max_tr.data[cpu]->disabled);
        }
  
 +      /* value is in KB */
 +      val <<= 10;
 +
        if (val != global_trace.entries) {
                ret = ring_buffer_resize(global_trace.buffer, val);
                if (ret < 0) {
                        atomic_dec(&max_tr.data[cpu]->disabled);
        }
  
 +      tracing_start();
        max_tr.entries = global_trace.entries;
        mutex_unlock(&trace_types_lock);
  
@@@ -3388,7 -2762,7 +3388,7 @@@ static int mark_printk(const char *fmt
        int ret;
        va_list args;
        va_start(args, fmt);
 -      ret = trace_vprintk(0, fmt, args);
 +      ret = trace_vprintk(0, -1, fmt, args);
        va_end(args);
        return ret;
  }
@@@ -3399,8 -2773,9 +3399,8 @@@ tracing_mark_write(struct file *filp, c
  {
        char *buf;
        char *end;
 -      struct trace_array *tr = &global_trace;
  
 -      if (!tr->ctrl || tracing_disabled)
 +      if (tracing_disabled)
                return -EINVAL;
  
        if (cnt > TRACE_BUF_SIZE)
@@@ -3466,38 -2841,22 +3466,38 @@@ static struct file_operations tracing_m
  
  #ifdef CONFIG_DYNAMIC_FTRACE
  
 +int __weak ftrace_arch_read_dyn_info(char *buf, int size)
 +{
 +      return 0;
 +}
 +
  static ssize_t
 -tracing_read_long(struct file *filp, char __user *ubuf,
 +tracing_read_dyn_info(struct file *filp, char __user *ubuf,
                  size_t cnt, loff_t *ppos)
  {
 +      static char ftrace_dyn_info_buffer[1024];
 +      static DEFINE_MUTEX(dyn_info_mutex);
        unsigned long *p = filp->private_data;
 -      char buf[64];
 +      char *buf = ftrace_dyn_info_buffer;
 +      int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
        int r;
  
 -      r = sprintf(buf, "%ld\n", *p);
 +      mutex_lock(&dyn_info_mutex);
 +      r = sprintf(buf, "%ld ", *p);
  
 -      return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 +      r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
 +      buf[r++] = '\n';
 +
 +      r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 +
 +      mutex_unlock(&dyn_info_mutex);
 +
 +      return r;
  }
  
 -static struct file_operations tracing_read_long_fops = {
 +static struct file_operations tracing_dyn_info_fops = {
        .open           = tracing_open_generic,
 -      .read           = tracing_read_long,
 +      .read           = tracing_read_dyn_info,
  };
  #endif
  
@@@ -3538,10 -2897,10 +3538,10 @@@ static __init int tracer_init_debugfs(v
        if (!entry)
                pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
  
 -      entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
 +      entry = debugfs_create_file("trace_options", 0644, d_tracer,
                                    NULL, &tracing_iter_fops);
        if (!entry)
 -              pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
 +              pr_warning("Could not create debugfs 'trace_options' entry\n");
  
        entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
                                    NULL, &tracing_cpumask_fops);
                pr_warning("Could not create debugfs "
                           "'trace_pipe' entry\n");
  
 -      entry = debugfs_create_file("trace_entries", 0644, d_tracer,
 +      entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
                                    &global_trace, &tracing_entries_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
 -                         "'trace_entries' entry\n");
 +                         "'buffer_size_kb' entry\n");
  
        entry = debugfs_create_file("trace_marker", 0220, d_tracer,
                                    NULL, &tracing_mark_fops);
  #ifdef CONFIG_DYNAMIC_FTRACE
        entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                                    &ftrace_update_tot_cnt,
 -                                  &tracing_read_long_fops);
 +                                  &tracing_dyn_info_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
                           "'dyn_ftrace_total_info' entry\n");
        return 0;
  }
  
 -int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
  {
        static DEFINE_SPINLOCK(trace_buf_lock);
        static char trace_buf[TRACE_BUF_SIZE];
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
 -      struct print_entry *entry;
 -      unsigned long flags, irq_flags;
        int cpu, len = 0, size, pc;
 +      struct print_entry *entry;
 +      unsigned long irq_flags;
  
 -      if (!tr->ctrl || tracing_disabled)
 +      if (tracing_disabled || tracing_selftest_running)
                return 0;
  
        pc = preempt_count();
        if (unlikely(atomic_read(&data->disabled)))
                goto out;
  
 -      spin_lock_irqsave(&trace_buf_lock, flags);
 +      pause_graph_tracing();
 +      spin_lock_irqsave(&trace_buf_lock, irq_flags);
        len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
  
        len = min(len, TRACE_BUF_SIZE-1);
        if (!event)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
 -      tracing_generic_entry_update(&entry->ent, flags, pc);
 +      tracing_generic_entry_update(&entry->ent, irq_flags, pc);
        entry->ent.type                 = TRACE_PRINT;
        entry->ip                       = ip;
 +      entry->depth                    = depth;
  
        memcpy(&entry->buf, trace_buf, len);
        entry->buf[len] = 0;
        ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
  
   out_unlock:
 -      spin_unlock_irqrestore(&trace_buf_lock, flags);
 -
 +      spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
 +      unpause_graph_tracing();
   out:
        preempt_enable_notrace();
  
@@@ -3680,7 -3037,7 +3680,7 @@@ int __ftrace_printk(unsigned long ip, c
                return 0;
  
        va_start(ap, fmt);
 -      ret = trace_vprintk(ip, fmt, ap);
 +      ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
        va_end(ap);
        return ret;
  }
@@@ -3689,8 -3046,7 +3689,8 @@@ EXPORT_SYMBOL_GPL(__ftrace_printk)
  static int trace_panic_handler(struct notifier_block *this,
                               unsigned long event, void *unused)
  {
 -      ftrace_dump();
 +      if (ftrace_dump_on_oops)
 +              ftrace_dump();
        return NOTIFY_OK;
  }
  
@@@ -3706,8 -3062,7 +3706,8 @@@ static int trace_die_handler(struct not
  {
        switch (val) {
        case DIE_OOPS:
 -              ftrace_dump();
 +              if (ftrace_dump_on_oops)
 +                      ftrace_dump();
                break;
        default:
                break;
@@@ -3748,6 -3103,7 +3748,6 @@@ trace_printk_seq(struct trace_seq *s
        trace_seq_reset(s);
  }
  
 -
  void ftrace_dump(void)
  {
        static DEFINE_SPINLOCK(ftrace_dump_lock);
                atomic_inc(&global_trace.data[cpu]->disabled);
        }
  
 +      /* don't look at user memory in panic mode */
 +      trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 +
        printk(KERN_TRACE "Dumping ftrace buffer:\n");
  
        iter.tr = &global_trace;
@@@ -3868,6 -3221,7 +3868,6 @@@ __init static int tracer_alloc_buffers(
  #endif
  
        /* All seems OK, enable tracing */
 -      global_trace.ctrl = tracer_enabled;
        tracing_disabled = 0;
  
        atomic_notifier_chain_register(&panic_notifier_list,