* 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
oprofile: select RING_BUFFER
ring_buffer: adding EXPORT_SYMBOLs
oprofile: fix lost sample counter
oprofile: remove nr_available_slots()
oprofile: port to the new ring_buffer
ring_buffer: add remaining cpu functions to ring_buffer.h
oprofile: moving cpu_buffer_reset() to cpu_buffer.h
oprofile: adding cpu_buffer_entries()
oprofile: adding cpu_buffer_write_commit()
oprofile: adding cpu buffer r/w access functions
ftrace: remove unused function arg in trace_iterator_increment()
ring_buffer: update description for ring_buffer_alloc()
oprofile: set values to default when creating oprofilefs
oprofile: implement switch/case in buffer_sync.c
x86/oprofile: cleanup IBS init/exit functions in op_model_amd.c
x86/oprofile: reordering IBS code in op_model_amd.c
oprofile: fix typo
oprofile: whitspace changes only
oprofile: update comment for oprofile_add_sample()
oprofile: comment cleanup
* size = 8 bytes
*
* @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
- * array[0] = tv_nsec
- * array[1] = tv_sec
+ * array[0] = tv_nsec
+ * array[1..2] = tv_sec
* size = 16 bytes
*
* @RINGBUF_TYPE_DATA: Data record
* If len is zero:
* array[0] holds the actual length
- * array[1..(length+3)/4-1] holds data
+ * array[1..(length+3)/4] holds data
+ * size = 4 + 4 + length (bytes)
* else
* length = len << 2
- * array[0..(length+3)/4] holds data
+ * array[0..(length+3)/4-1] holds data
+ * size = 4 + length (bytes)
*/
enum ring_buffer_type {
RINGBUF_TYPE_PADDING,
unsigned long ring_buffer_entries(struct ring_buffer *buffer);
unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
u64 ring_buffer_time_stamp(int cpu);
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
void tracing_on(void);
void tracing_off(void);
+void tracing_off_permanent(void);
+
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
+int ring_buffer_read_page(struct ring_buffer *buffer,
+ void **data_page, int cpu, int full);
enum ring_buffer_flags {
RB_FL_OVERWRITE = 1 << 0,
#include "trace.h"
-/* Global flag to disable all recording to ring buffers */
-static int ring_buffers_off __read_mostly;
+/*
+ * A fast way to enable or disable all ring buffers is to
+ * call tracing_on or tracing_off. Turning off the ring buffers
+ * prevents all ring buffers from being recorded to.
+ * Turning this switch on, makes it OK to write to the
+ * ring buffer, if the ring buffer is enabled itself.
+ *
+ * There's three layers that must be on in order to write
+ * to the ring buffer.
+ *
+ * 1) This global flag must be set.
+ * 2) The ring buffer must be enabled for recording.
+ * 3) The per cpu buffer must be enabled for recording.
+ *
+ * In case of an anomaly, this global flag has a bit set that
+ * will permantly disable all ring buffers.
+ */
+
+/*
+ * Global flag to disable all recording to ring buffers
+ * This has two bits: ON, DISABLED
+ *
+ * ON DISABLED
+ * ---- ----------
+ * 0 0 : ring buffers are off
+ * 1 0 : ring buffers are on
+ * X 1 : ring buffers are permanently disabled
+ */
+
+enum {
+ RB_BUFFERS_ON_BIT = 0,
+ RB_BUFFERS_DISABLED_BIT = 1,
+};
+
+enum {
+ RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
+ RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
+};
+
+static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
/**
* tracing_on - enable all tracing buffers
*/
void tracing_on(void)
{
- ring_buffers_off = 0;
+ set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
}
+ EXPORT_SYMBOL_GPL(tracing_on);
/**
* tracing_off - turn off all tracing buffers
*/
void tracing_off(void)
{
- ring_buffers_off = 1;
+ clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
}
+ EXPORT_SYMBOL_GPL(tracing_off);
+/**
+ * tracing_off_permanent - permanently disable ring buffers
+ *
+ * This function, once called, will disable all ring buffers
+ * permanenty.
+ */
+void tracing_off_permanent(void)
+{
+ set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
+}
+
+#include "trace.h"
+
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
time = sched_clock() << DEBUG_SHIFT;
- preempt_enable_notrace();
+ preempt_enable_no_resched_notrace();
return time;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
{
/* Just stupid testing the normalize function and deltas */
*ts >>= DEBUG_SHIFT;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
#define RB_ALIGNMENT_SHIFT 2
{
return rb_event_length(event);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
/* inline for ring buffer fast paths */
static inline void *
{
return rb_event_data(event);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
#define for_each_buffer_cpu(buffer, cpu) \
for_each_cpu_mask(cpu, buffer->cpumask)
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
-/*
- * This hack stolen from mm/slob.c.
- * We can store per page timing information in the page frame of the page.
- * Thanks to Peter Zijlstra for suggesting this idea.
- */
-struct buffer_page {
+struct buffer_data_page {
u64 time_stamp; /* page time stamp */
- local_t write; /* index for next write */
local_t commit; /* write commited index */
+ unsigned char data[]; /* data of buffer page */
+};
+
+struct buffer_page {
+ local_t write; /* index for next write */
unsigned read; /* index for next read */
struct list_head list; /* list of free pages */
- void *page; /* Actual data page */
+ struct buffer_data_page *page; /* Actual data page */
};
+static void rb_init_page(struct buffer_data_page *bpage)
+{
+ local_set(&bpage->commit, 0);
+}
+
/*
* Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
* this issue out.
return 0;
}
-#define BUF_PAGE_SIZE PAGE_SIZE
+#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
/*
* head_page == tail_page && head == tail then buffer is empty.
struct ring_buffer_per_cpu {
int cpu;
struct ring_buffer *buffer;
- spinlock_t lock;
+ spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head pages;
struct buffer_page *head_page; /* read from head */
};
struct ring_buffer {
- unsigned long size;
unsigned pages;
unsigned flags;
int cpus;
u64 read_stamp;
};
+/* buffer may be either ring_buffer or ring_buffer_per_cpu */
#define RB_WARN_ON(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- } \
- } while (0)
-
-#define RB_WARN_ON_RET(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- return -1; \
- } \
- } while (0)
-
-#define RB_WARN_ON_ONCE(buffer, cond) \
- do { \
- static int once; \
- if (unlikely(cond) && !once) { \
- once++; \
+ ({ \
+ int _____ret = unlikely(cond); \
+ if (_____ret) { \
atomic_inc(&buffer->record_disabled); \
WARN_ON(1); \
} \
- } while (0)
+ _____ret; \
+ })
/**
* check_pages - integrity check of buffer pages
static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
- RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
- RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+ if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
+ return -1;
- list_for_each_entry_safe(page, tmp, head, list) {
- RB_WARN_ON_RET(cpu_buffer,
- page->list.next->prev != &page->list);
- RB_WARN_ON_RET(cpu_buffer,
- page->list.prev->next != &page->list);
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.next->prev != &bpage->list))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.prev->next != &bpage->list))
+ return -1;
}
return 0;
unsigned nr_pages)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long addr;
LIST_HEAD(pages);
unsigned i;
for (i = 0; i < nr_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
list_splice(&pages, head);
return 0;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
return -ENOMEM;
}
rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct buffer_page *page;
+ struct buffer_page *bpage;
unsigned long addr;
int ret;
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->lock);
+ spin_lock_init(&cpu_buffer->reader_lock);
+ cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&cpu_buffer->pages);
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto fail_free_buffer;
- cpu_buffer->reader_page = page;
+ cpu_buffer->reader_page = bpage;
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto fail_free_reader;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
list_del_init(&cpu_buffer->reader_page->list);
free_buffer_page(cpu_buffer->reader_page);
- list_for_each_entry_safe(page, tmp, head, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
kfree(cpu_buffer);
}
/**
* ring_buffer_alloc - allocate a new ring_buffer
- * @size: the size in bytes that is needed.
+ * @size: the size in bytes per cpu that is needed.
* @flags: attributes to set for the ring buffer.
*
* Currently the only flag that is available is the RB_FL_OVERWRITE
kfree(buffer);
return NULL;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_alloc);
/**
* ring_buffer_free - free a ring buffer.
kfree(buffer);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_free);
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static void
rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
p = cpu_buffer->pages.next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- free_buffer_page(page);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
rb_reset_cpu(cpu_buffer);
rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *pages, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
+ return;
p = pages->next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- list_add_tail(&page->list, &cpu_buffer->pages);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ list_add_tail(&bpage->list, &cpu_buffer->pages);
}
rb_reset_cpu(cpu_buffer);
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned nr_pages, rm_pages, new_pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long buffer_size;
unsigned long addr;
LIST_HEAD(pages);
if (size < buffer_size) {
/* easy case, just free pages */
- BUG_ON(nr_pages >= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
rm_pages = buffer->pages - nr_pages;
* add these pages to the cpu_buffers. Otherwise we just free
* them all and return -ENOMEM;
*/
- BUG_ON(nr_pages <= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
new_pages = nr_pages - buffer->pages;
for_each_buffer_cpu(buffer, cpu) {
for (i = 0; i < new_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage),
cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
}
rb_insert_pages(cpu_buffer, &pages, new_pages);
}
- BUG_ON(!list_empty(&pages));
+ if (RB_WARN_ON(buffer, !list_empty(&pages))) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
out:
buffer->pages = nr_pages;
return size;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
mutex_unlock(&buffer->mutex);
return -ENOMEM;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_resize);
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type == RINGBUF_TYPE_PADDING;
}
-static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *
+__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+{
+ return bpage->data + index;
+}
+
+static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
{
- return page->page + index;
+ return bpage->page->data + index;
}
static inline struct ring_buffer_event *
static inline unsigned rb_page_commit(struct buffer_page *bpage)
{
- return local_read(&bpage->commit);
+ return local_read(&bpage->page->commit);
}
/* Size is determined by what has been commited */
head += rb_event_length(event)) {
event = __rb_page_index(cpu_buffer->head_page, head);
- BUG_ON(rb_null_event(event));
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
/* Only count data entries */
if (event->type != RINGBUF_TYPE_DATA)
continue;
}
static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
- struct buffer_page **page)
+ struct buffer_page **bpage)
{
- struct list_head *p = (*page)->list.next;
+ struct list_head *p = (*bpage)->list.next;
if (p == &cpu_buffer->pages)
p = p->next;
- *page = list_entry(p, struct buffer_page, list);
+ *bpage = list_entry(p, struct buffer_page, list);
}
static inline unsigned
addr &= PAGE_MASK;
while (cpu_buffer->commit_page->page != (void *)addr) {
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->commit_page == cpu_buffer->tail_page);
- cpu_buffer->commit_page->commit =
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->commit_page == cpu_buffer->tail_page))
+ return;
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
}
/* Now set the commit to the event's index */
- local_set(&cpu_buffer->commit_page->commit, index);
+ local_set(&cpu_buffer->commit_page->page->commit, index);
}
static inline void
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
+ again:
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
barrier();
}
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+ cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
cpu_buffer->reader_page->read = 0;
}
else
rb_inc_page(cpu_buffer, &iter->head_page);
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
}
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts)
{
- struct buffer_page *tail_page, *head_page, *reader_page;
+ struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
unsigned long tail, write;
struct ring_buffer *buffer = cpu_buffer->buffer;
struct ring_buffer_event *event;
unsigned long flags;
+ commit_page = cpu_buffer->commit_page;
+ /* we just need to protect against interrupts */
+ barrier();
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
tail = write - length;
if (write > BUF_PAGE_SIZE) {
struct buffer_page *next_page = tail_page;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
rb_inc_page(cpu_buffer, &next_page);
reader_page = cpu_buffer->reader_page;
/* we grabbed the lock before incrementing */
- RB_WARN_ON(cpu_buffer, next_page == reader_page);
+ if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+ goto out_unlock;
/*
* If for some reason, we had an interrupt storm that made
* it all the way around the buffer, bail, and warn
* about it.
*/
- if (unlikely(next_page == cpu_buffer->commit_page)) {
+ if (unlikely(next_page == commit_page)) {
WARN_ON_ONCE(1);
goto out_unlock;
}
*/
if (tail_page == cpu_buffer->tail_page) {
local_set(&next_page->write, 0);
- local_set(&next_page->commit, 0);
+ local_set(&next_page->page->commit, 0);
cpu_buffer->tail_page = next_page;
/* reread the time stamp */
*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
- cpu_buffer->tail_page->time_stamp = *ts;
+ cpu_buffer->tail_page->page->time_stamp = *ts;
}
/*
rb_set_commit_to_write(cpu_buffer);
}
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
/* fail and let the caller try again */
return ERR_PTR(-EAGAIN);
/* We reserved something on the buffer */
- BUG_ON(write > BUF_PAGE_SIZE);
+ if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+ return NULL;
event = __rb_page_index(tail_page, tail);
rb_update_event(event, type, length);
* this page's time stamp.
*/
if (!tail && rb_is_commit(cpu_buffer, event))
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
return event;
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return NULL;
}
event->time_delta = *delta & TS_MASK;
event->array[0] = *delta >> TS_SHIFT;
} else {
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
event->time_delta = 0;
event->array[0] = 0;
}
* storm or we have something buggy.
* Bail!
*/
- if (unlikely(++nr_loops > 1000)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
return NULL;
- }
ts = ring_buffer_time_stamp(cpu_buffer->cpu);
struct ring_buffer_event *event;
int cpu, resched;
- if (ring_buffers_off)
+ if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
if (atomic_read(&buffer->record_disabled))
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
return event;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return NULL;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
/*
* Only the last preempt count needs to restore preemption.
*/
- if (preempt_count() == 1) {
- if (per_cpu(rb_need_resched, cpu))
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
- } else
+ if (preempt_count() == 1)
+ ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+ else
preempt_enable_no_resched_notrace();
return 0;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
/**
* ring_buffer_write - write data to the buffer without reserving
int ret = -EBUSY;
int cpu, resched;
- if (ring_buffers_off)
+ if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
ret = 0;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return ret;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_write);
static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
{
{
atomic_inc(&buffer->record_disabled);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
/**
* ring_buffer_record_enable - enable writes to the buffer
{
atomic_dec(&buffer->record_disabled);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
/**
* ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
cpu_buffer = buffer->buffers[cpu];
atomic_inc(&cpu_buffer->record_disabled);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
/**
* ring_buffer_record_enable_cpu - enable writes to the buffer
cpu_buffer = buffer->buffers[cpu];
atomic_dec(&cpu_buffer->record_disabled);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
cpu_buffer = buffer->buffers[cpu];
return cpu_buffer->entries;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
/**
* ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
cpu_buffer = buffer->buffers[cpu];
return cpu_buffer->overrun;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
/**
* ring_buffer_entries - get the number of entries in a buffer
return entries;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_entries);
/**
* ring_buffer_overrun_cpu - get the number of overruns in buffer
return overruns;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_overruns);
-/**
- * ring_buffer_iter_reset - reset an iterator
- * @iter: The iterator to reset
- *
- * Resets the iterator, so that it will start from the beginning
- * again.
- */
-void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+static void rb_iter_reset(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
if (iter->head)
iter->read_stamp = cpu_buffer->read_stamp;
else
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rb_iter_reset(iter);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
/**
* ring_buffer_iter_empty - check if an iterator has no more to read
return iter->head_page == cpu_buffer->commit_page &&
iter->head == rb_commit_index(cpu_buffer);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
static void
rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long flags;
int nr_loops = 0;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
again:
/*
* a case where we will loop three times. There should be no
* reason to loop four times (that I know of).
*/
- if (unlikely(++nr_loops > 3)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
reader = NULL;
goto out;
}
goto out;
/* Never should we have an index greater than the size */
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->reader_page->read > rb_page_size(reader));
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->reader_page->read > rb_page_size(reader)))
+ goto out;
/* check if we caught up to the tail */
reader = NULL;
cpu_buffer->reader_page->list.prev = reader->list.prev;
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
/* Make the reader page now replace the head */
reader->list.prev->next = &cpu_buffer->reader_page->list;
goto again;
out:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return reader;
}
reader = rb_get_reader_page(cpu_buffer);
/* This function should not be called when buffer is empty */
- BUG_ON(!reader);
+ if (RB_WARN_ON(cpu_buffer, !reader))
+ return;
event = rb_reader_event(cpu_buffer);
* Check if we are at the end of the buffer.
*/
if (iter->head >= rb_page_size(iter->head_page)) {
- BUG_ON(iter->head_page == cpu_buffer->commit_page);
+ if (RB_WARN_ON(buffer,
+ iter->head_page == cpu_buffer->commit_page))
+ return;
rb_inc_iter(iter);
return;
}
* This should not be called to advance the header if we are
* at the tail of the buffer.
*/
- BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
- (iter->head + length > rb_commit_index(cpu_buffer)));
+ if (RB_WARN_ON(cpu_buffer,
+ (iter->head_page == cpu_buffer->commit_page) &&
+ (iter->head + length > rb_commit_index(cpu_buffer))))
+ return;
rb_update_iter_read_stamp(iter, event);
rb_advance_iter(iter);
}
-/**
- * ring_buffer_peek - peek at the next event to be read
- * @buffer: The ring buffer to read
- * @cpu: The cpu to peak at
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not consume the data.
- */
-struct ring_buffer_event *
-ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+static struct ring_buffer_event *
+rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
return NULL;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_peek);
-/**
- * ring_buffer_iter_peek - peek at the next event to be read
- * @iter: The ring buffer iterator
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not increment the iterator.
- */
-struct ring_buffer_event *
-ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+static struct ring_buffer_event *
+rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
return NULL;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_buffer_peek(buffer, cpu, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
/**
* ring_buffer_consume - return an event and consume it
* @buffer: The ring buffer to get the next event from
struct ring_buffer_event *
ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
{
- struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
struct ring_buffer_event *event;
+ unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
return NULL;
- event = ring_buffer_peek(buffer, cpu, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ event = rb_buffer_peek(buffer, cpu, ts);
if (!event)
- return NULL;
+ goto out;
- cpu_buffer = buffer->buffers[cpu];
rb_advance_reader(cpu_buffer);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
return event;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_consume);
/**
* ring_buffer_read_start - start a non consuming read of the buffer
atomic_inc(&cpu_buffer->record_disabled);
synchronize_sched();
- spin_lock_irqsave(&cpu_buffer->lock, flags);
- ring_buffer_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+ rb_iter_reset(iter);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return iter;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_read_start);
/**
* ring_buffer_finish - finish reading the iterator of the buffer
atomic_dec(&cpu_buffer->record_disabled);
kfree(iter);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
/**
* ring_buffer_read - read the next item in the ring buffer by the iterator
ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer_event *event;
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
- event = ring_buffer_iter_peek(iter, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
if (!event)
- return NULL;
+ goto out;
rb_advance_iter(iter);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_read);
/**
* ring_buffer_size - return the size of the ring buffer (in bytes)
{
return BUF_PAGE_SIZE * buffer->pages;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_size);
static void
rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->head_page
= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
local_set(&cpu_buffer->head_page->write, 0);
- local_set(&cpu_buffer->head_page->commit, 0);
+ local_set(&cpu_buffer->head_page->page->commit, 0);
cpu_buffer->head_page->read = 0;
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
cpu_buffer->reader_page->read = 0;
cpu_buffer->overrun = 0;
if (!cpu_isset(cpu, buffer->cpumask))
return;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ __raw_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
/**
* ring_buffer_reset - reset a ring buffer
for_each_buffer_cpu(buffer, cpu)
ring_buffer_reset_cpu(buffer, cpu);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_reset);
/**
* rind_buffer_empty - is the ring buffer empty?
}
return 1;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_empty);
/**
* ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
cpu_buffer = buffer->buffers[cpu];
return rb_per_cpu_empty(cpu_buffer);
}
+ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
/**
* ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
return -EINVAL;
/* At least make sure the two buffers are somewhat the same */
- if (buffer_a->size != buffer_b->size ||
- buffer_a->pages != buffer_b->pages)
+ if (buffer_a->pages != buffer_b->pages)
return -EINVAL;
cpu_buffer_a = buffer_a->buffers[cpu];
return 0;
}
+ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_data_page *bpage)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ __raw_spin_lock(&cpu_buffer->lock);
+ for (head = 0; head < local_read(&bpage->commit);
+ head += rb_event_length(event)) {
+
+ event = __rb_data_page_index(bpage, head);
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->entries--;
+ }
+ __raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ * The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+ unsigned long addr;
+ struct buffer_data_page *bpage;
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return NULL;
+
+ bpage = (void *)addr;
+
+ return bpage;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+ free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ * rpage = ring_buffer_alloc_page(buffer);
+ * if (!rpage)
+ * return error;
+ * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ * if (ret)
+ * process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ * The ring buffer can be used anywhere in the kernel and can not
+ * blindly call wake_up. The layer that uses the ring buffer must be
+ * responsible for that.
+ *
+ * Returns:
+ * 1 if data has been transferred
+ * 0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+ void **data_page, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!data_page)
+ return 0;
+
+ bpage = *data_page;
+ if (!bpage)
+ return 0;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ /*
+ * rb_buffer_peek will get the next ring buffer if
+ * the current reader page is empty.
+ */
+ event = rb_buffer_peek(buffer, cpu, NULL);
+ if (!event)
+ goto out;
+
+ /* check for data */
+ if (!local_read(&cpu_buffer->reader_page->page->commit))
+ goto out;
+ /*
+ * If the writer is already off of the read page, then simply
+ * switch the read page with the given page. Otherwise
+ * we need to copy the data from the reader to the writer.
+ */
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+ unsigned int read = cpu_buffer->reader_page->read;
+
+ if (full)
+ goto out;
+ /* The writer is still on the reader page, we must copy */
+ bpage = cpu_buffer->reader_page->page;
+ memcpy(bpage->data,
+ cpu_buffer->reader_page->page->data + read,
+ local_read(&bpage->commit) - read);
+
+ /* consume what was read */
+ cpu_buffer->reader_page += read;
+
+ } else {
+ /* swap the pages */
+ rb_init_page(bpage);
+ bpage = cpu_buffer->reader_page->page;
+ cpu_buffer->reader_page->page = *data_page;
+ cpu_buffer->reader_page->read = 0;
+ *data_page = bpage;
+ }
+ ret = 1;
+
+ /* update the entry counter */
+ rb_remove_entries(cpu_buffer, bpage);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- int *p = filp->private_data;
+ long *p = filp->private_data;
char buf[64];
int r;
- /* !ring_buffers_off == tracing_on */
- r = sprintf(buf, "%d\n", !*p);
+ if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
+ r = sprintf(buf, "permanently disabled\n");
+ else
+ r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
rb_simple_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- int *p = filp->private_data;
+ long *p = filp->private_data;
char buf[64];
long val;
int ret;
if (ret < 0)
return ret;
- /* !ring_buffers_off == tracing_on */
- *p = !val;
+ if (val)
+ set_bit(RB_BUFFERS_ON_BIT, p);
+ else
+ clear_bit(RB_BUFFERS_ON_BIT, p);
(*ppos)++;
d_tracer = tracing_init_dentry();
entry = debugfs_create_file("tracing_on", 0644, d_tracer,
- &ring_buffers_off, &rb_simple_fops);
+ &ring_buffer_flags, &rb_simple_fops);
if (!entry)
pr_warning("Could not create debugfs 'tracing_on' entry\n");
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/kprobes.h>
+#include <linux/seq_file.h>
#include <linux/writeback.h>
#include <linux/stacktrace.h>
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
+/*
+ * We need to change this state when a selftest is running.
+ * A selftest will lurk into the ring-buffer to count the
+ * entries inserted during the selftest although some concurrent
+ * insertions into the ring-buffer such as ftrace_printk could occurred
+ * at the same time, giving false positive or negative results.
+ */
+static bool __read_mostly tracing_selftest_running;
+
+/* For tracers that don't implement custom flags */
+static struct tracer_opt dummy_tracer_opt[] = {
+ { }
+};
+
+static struct tracer_flags dummy_tracer_flags = {
+ .val = 0,
+ .opts = dummy_tracer_opt
+};
+
+static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+{
+ return 0;
+}
+
+/*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+int tracing_disabled = 1;
+
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
-static int tracing_disabled = 1;
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
+
+static int tracing_set_tracer(char *buf);
+
+static int __init set_ftrace(char *str)
+{
+ tracing_set_tracer(str);
+ return 1;
+}
+__setup("ftrace", set_ftrace);
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+ ftrace_dump_on_oops = 1;
+ return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
long
ns2usecs(cycle_t nsec)
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+/**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+int tracing_is_enabled(void)
+{
+ return tracer_enabled;
+}
+
/* function tracing enabled */
int ftrace_function_enabled;
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
-/* trace_flags holds iter_ctrl options */
-unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+/* trace_flags holds trace_options default values */
+unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+ TRACE_ITER_ANNOTATE;
/**
* trace_wake_up - wake up tasks waiting for trace input
return nsecs / 1000;
}
-/*
- * TRACE_ITER_SYM_MASK masks the options in trace_flags that
- * control the output of kernel symbols.
- */
-#define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
-
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
"stacktrace",
"sched-tree",
"ftrace_printk",
+ "ftrace_preempt",
+ "branch",
+ "annotate",
+ "userstacktrace",
+ "sym-userobj",
+ "printk-msg-only",
NULL
};
memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
data->pid = tsk->pid;
- data->uid = tsk->uid;
+ data->uid = task_uid(tsk);
data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
data->policy = tsk->policy;
data->rt_priority = tsk->rt_priority;
return trace_seq_putmem(s, hex, j);
}
+static int
+trace_seq_path(struct trace_seq *s, struct path *path)
+{
+ unsigned char *p;
+
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+ p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+ if (!IS_ERR(p)) {
+ p = mangle_path(s->buffer + s->len, p, "\n");
+ if (p) {
+ s->len = p - s->buffer;
+ return 1;
+ }
+ } else {
+ s->buffer[s->len++] = '?';
+ return 1;
+ }
+
+ return 0;
+}
+
static void
trace_seq_reset(struct trace_seq *s)
{
return -1;
}
+ /*
+ * When this gets called we hold the BKL which means that
+ * preemption is disabled. Various trace selftests however
+ * need to disable and enable preemption for successful tests.
+ * So we drop the BKL here and grab it after the tests again.
+ */
+ unlock_kernel();
mutex_lock(&trace_types_lock);
+
+ tracing_selftest_running = true;
+
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
}
}
+ if (!type->set_flag)
+ type->set_flag = &dummy_set_flag;
+ if (!type->flags)
+ type->flags = &dummy_tracer_flags;
+ else
+ if (!type->flags->opts)
+ type->flags->opts = dummy_tracer_opt;
+
#ifdef CONFIG_FTRACE_STARTUP_TEST
if (type->selftest) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
- int saved_ctrl = tr->ctrl;
int i;
+
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
current_trace = type;
- tr->ctrl = 0;
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
- tr->ctrl = saved_ctrl;
if (ret) {
printk(KERN_CONT "FAILED!\n");
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
printk(KERN_CONT "PASSED\n");
}
#endif
max_tracer_type_len = len;
out:
+ tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
+ lock_kernel();
return ret;
}
ftrace_enable_cpu();
}
+void tracing_reset_online_cpus(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
#define SAVED_CMDLINES 128
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
cmdline_idx = 0;
}
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
+/**
+ * ftrace_off_permanent - disable all ftrace code permanently
+ *
+ * This should only be called when a serious anomally has
+ * been detected. This will turn off the function tracing,
+ * ring buffers, and other tracing utilites. It takes no
+ * locks and can be called from any context.
+ */
+void ftrace_off_permanent(void)
+{
+ tracing_disabled = 1;
+ ftrace_stop();
+ tracing_off_permanent();
+}
+
+/**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+void tracing_start(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
spin_unlock(&trace_cmdline_lock);
}
-static char *trace_find_cmdline(int pid)
+char *trace_find_cmdline(int pid)
{
char *cmdline = "<...>";
unsigned map;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->tgid = (tsk) ? tsk->tgid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void __trace_graph_entry(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ent *trace,
+ unsigned long flags,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_ENT;
+ entry->graph_ent = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+}
+
+static void __trace_graph_return(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ret *trace,
+ unsigned long flags,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_RET;
+ entry->ret = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+}
+#endif
+
void
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
ftrace_trace_stack(tr, data, flags, skip, preempt_count());
}
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags, int pc)
+{
+#ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct userstack_entry *entry;
+ struct stack_trace trace;
+ unsigned long irq_flags;
+
+ if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_USER_STACK;
+
+ memset(&entry->caller, 0, sizeof(entry->caller));
+
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = 0;
+ trace.entries = entry->caller;
+
+ save_stack_trace_user(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
+}
+
+void __trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags)
+{
+ ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
static void
ftrace_trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
entry->arg3 = arg3;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+ ftrace_trace_userstack(tr, data, irq_flags, pc);
trace_wake_up();
}
entry->next_cpu = task_cpu(next);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 5, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
}
void
entry->next_cpu = task_cpu(wakee);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 6, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
trace_wake_up();
}
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
+ unsigned long flags;
int cpu;
int pc;
- if (tracing_disabled || !tr->ctrl)
+ if (tracing_disabled)
return;
pc = preempt_count();
- preempt_disable_notrace();
+ local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
- if (likely(!atomic_read(&data->disabled)))
+ if (likely(atomic_inc_return(&data->disabled) == 1))
ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
- preempt_enable_notrace();
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_TRACER
static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+int trace_graph_entry(struct ftrace_graph_ent *trace)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (!ftrace_trace_task(current))
+ return 0;
+
+ if (!ftrace_graph_addr(trace->func))
+ return 0;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_entry(tr, data, trace, flags, pc);
+ }
+ /* Only do the atomic if it is not already set */
+ if (!test_tsk_trace_graph(current))
+ set_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+
+ return 1;
+}
+
+void trace_graph_return(struct ftrace_graph_ret *trace)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_return(tr, data, trace, flags, pc);
+ }
+ if (!trace->depth)
+ clear_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
static struct ftrace_ops trace_ops __read_mostly =
{
.func = function_trace_call,
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
enum trace_file_type {
TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
};
- static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ static void trace_iterator_increment(struct trace_iterator *iter)
{
/* Don't allow ftrace to trace into the ring buffers */
ftrace_disable_cpu();
iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
if (iter->ent)
- trace_iterator_increment(iter, iter->cpu);
+ trace_iterator_increment(iter);
return iter->ent ? iter : NULL;
}
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
# define IP_FMT "%016lx"
#endif
-static int
+int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
int ret;
return ret;
}
+static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags)
+{
+ struct file *file = NULL;
+ unsigned long vmstart = 0;
+ int ret = 1;
+
+ if (mm) {
+ const struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma) {
+ file = vma->vm_file;
+ vmstart = vma->vm_start;
+ }
+ if (file) {
+ ret = trace_seq_path(s, &file->f_path);
+ if (ret)
+ ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+ }
+ up_read(&mm->mmap_sem);
+ }
+ if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+ unsigned long sym_flags)
+{
+ struct mm_struct *mm = NULL;
+ int ret = 1;
+ unsigned int i;
+
+ if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ struct task_struct *task;
+ /*
+ * we do the lookup on the thread group leader,
+ * since individual threads might have already quit!
+ */
+ rcu_read_lock();
+ task = find_task_by_vpid(entry->ent.tgid);
+ if (task)
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ unsigned long ip = entry->caller[i];
+
+ if (ip == ULONG_MAX || !ret)
+ break;
+ if (i && ret)
+ ret = trace_seq_puts(s, " <- ");
+ if (!ip) {
+ if (ret)
+ ret = trace_seq_puts(s, "??");
+ continue;
+ }
+ if (!ret)
+ break;
+ if (ret)
+ ret = seq_print_user_ip(s, mm, ip, sym_flags);
+ }
+
+ if (mm)
+ mmput(mm);
+ return ret;
+}
+
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+static int task_state_char(unsigned long state)
+{
+ int bit = state ? __ffs(state) + 1 : 0;
+
+ return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
/*
* The message is supposed to contain an ending newline.
* If the printing stops prematurely, try to add a newline of our own.
trace_seq_putc(s, '\n');
}
+static void test_cpu_buff_start(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+
+ if (!(trace_flags & TRACE_ITER_ANNOTATE))
+ return;
+
+ if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
+ return;
+
+ if (cpu_isset(iter->cpu, iter->started))
+ return;
+
+ cpu_set(iter->cpu, iter->started);
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+}
+
static enum print_line_t
print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
{
char *comm;
int S, T;
int i;
- unsigned state;
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
next_entry = find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
next_ts = iter->ts;
trace_assign_type(field, entry);
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
-
- state = field->prev_state ?
- __ffs(field->prev_state) + 1 : 0;
- S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
comm = trace_find_cmdline(field->next_pid);
trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
field->prev_pid,
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_userip_objs(field, s, sym_flags);
+ trace_seq_putc(s, '\n');
+ break;
+ }
default:
trace_seq_printf(s, "Unknown type %d\n", entry->type);
}
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
comm = trace_find_cmdline(iter->ent->pid);
t = ns2usecs(iter->ts);
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
field->prev_pid,
field->prev_prio,
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_GRAPH_RET: {
+ return print_graph_function(iter);
+ }
+ case TRACE_GRAPH_ENT: {
+ return print_graph_function(iter);
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_userip_objs(field, s, sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_putc(s, '\n');
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
}
return TRACE_TYPE_HANDLED;
}
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
field->prev_pid,
field->prev_prio,
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
SEQ_PUT_HEX_FIELD_RET(s, S);
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
return TRACE_TYPE_HANDLED;
}
+static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *field;
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, field->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ return TRACE_TYPE_HANDLED;
+}
+
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
return ret;
}
+ if (iter->ent->type == TRACE_PRINT &&
+ trace_flags & TRACE_ITER_PRINTK &&
+ trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ return print_printk_msg_only(iter);
+
if (trace_flags & TRACE_ITER_BIN)
return print_bin_fmt(iter);
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
}
- if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ if (iter->trace && iter->trace->print_header)
+ iter->trace->print_header(m);
+ else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return 0;
iter->trace = current_trace;
iter->pos = -1;
+ /* Notify the tracer early; before we stop tracing. */
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->tr->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
m->private = iter;
/* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
-
- if (iter->trace && iter->trace->open)
- iter->trace->open(iter);
+ tracing_stop();
mutex_unlock(&trace_types_lock);
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
if (err)
goto err_unlock;
- raw_local_irq_disable();
+ local_irq_disable();
__raw_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
}
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_enable();
+ local_irq_enable();
tracing_cpumask = tracing_cpumask_new;
};
static ssize_t
-tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+tracing_trace_options_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ int i;
char *buf;
int r = 0;
int len = 0;
- int i;
+ u32 tracer_flags = current_trace->flags->val;
+ struct tracer_opt *trace_opts = current_trace->flags->opts;
+
/* calulate max size */
for (i = 0; trace_options[i]; i++) {
len += 3; /* "no" and space */
}
+ /*
+ * Increase the size with names of options specific
+ * of the current tracer.
+ */
+ for (i = 0; trace_opts[i].name; i++) {
+ len += strlen(trace_opts[i].name);
+ len += 3; /* "no" and space */
+ }
+
/* +2 for \n and \0 */
buf = kmalloc(len + 2, GFP_KERNEL);
if (!buf)
r += sprintf(buf + r, "no%s ", trace_options[i]);
}
+ for (i = 0; trace_opts[i].name; i++) {
+ if (tracer_flags & trace_opts[i].bit)
+ r += sprintf(buf + r, "%s ",
+ trace_opts[i].name);
+ else
+ r += sprintf(buf + r, "no%s ",
+ trace_opts[i].name);
+ }
+
r += sprintf(buf + r, "\n");
WARN_ON(r >= len + 2);
return r;
}
+/* Try to assign a tracer specific option */
+static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+{
+ struct tracer_flags *trace_flags = trace->flags;
+ struct tracer_opt *opts = NULL;
+ int ret = 0, i = 0;
+ int len;
+
+ for (i = 0; trace_flags->opts[i].name; i++) {
+ opts = &trace_flags->opts[i];
+ len = strlen(opts->name);
+
+ if (strncmp(cmp, opts->name, len) == 0) {
+ ret = trace->set_flag(trace_flags->val,
+ opts->bit, !neg);
+ break;
+ }
+ }
+ /* Not found */
+ if (!trace_flags->opts[i].name)
+ return -EINVAL;
+
+ /* Refused to handle */
+ if (ret)
+ return ret;
+
+ if (neg)
+ trace_flags->val &= ~opts->bit;
+ else
+ trace_flags->val |= opts->bit;
+
+ return 0;
+}
+
static ssize_t
-tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp = buf;
int neg = 0;
+ int ret;
int i;
if (cnt >= sizeof(buf))
break;
}
}
- /*
- * If no option could be set, return an error:
- */
- if (!trace_options[i])
- return -EINVAL;
+
+ /* If no option could be set, test the specific tracer options */
+ if (!trace_options[i]) {
+ ret = set_tracer_option(current_trace, cmp, neg);
+ if (ret)
+ return ret;
+ }
filp->f_pos += cnt;
static struct file_operations tracing_iter_fops = {
.open = tracing_open_generic,
- .read = tracing_iter_ctrl_read,
- .write = tracing_iter_ctrl_write,
+ .read = tracing_trace_options_read,
+ .write = tracing_trace_options_write,
};
static const char readme_msg[] =
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
- "# cat /debug/tracing/iter_ctrl\n"
+ "# cat /debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
- "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
"echo 0 > /debug/tracing/tracing_enabled\n"
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- ret = cnt;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
if (t == current_trace)
goto out;
+ trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
current_trace = t;
- if (t->init)
- t->init(tr);
+ if (t->init) {
+ ret = t->init(tr);
+ if (ret)
+ goto out;
+ }
+ trace_branch_enable(tr);
out:
mutex_unlock(&trace_types_lock);
- if (ret > 0)
- filp->f_pos += ret;
+ return ret;
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+ int err;
+
+ ret = cnt;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ err = tracing_set_tracer(buf);
+ if (err)
+ return err;
+
+ filp->f_pos += ret;
return ret;
}
return -ENOMEM;
mutex_lock(&trace_types_lock);
+
+ /* trace pipe does not show start of buffer */
+ cpus_setall(iter->started);
+
iter->tr = &global_trace;
iter->trace = current_trace;
filp->private_data = iter;
char buf[64];
int r;
- r = sprintf(buf, "%lu\n", tr->entries);
+ r = sprintf(buf, "%lu\n", tr->entries >> 10);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
unsigned long val;
char buf[64];
int ret, cpu;
- struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
return -EINVAL;
mutex_lock(&trace_types_lock);
- if (tr->ctrl) {
- cnt = -EBUSY;
- pr_info("ftrace: please disable tracing"
- " before modifying buffer size\n");
- goto out;
- }
+ tracing_stop();
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
atomic_inc(&max_tr.data[cpu]->disabled);
}
+ /* value is in KB */
+ val <<= 10;
+
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
atomic_dec(&max_tr.data[cpu]->disabled);
}
+ tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
int ret;
va_list args;
va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
+ ret = trace_vprintk(0, -1, fmt, args);
va_end(args);
return ret;
}
{
char *buf;
char *end;
- struct trace_array *tr = &global_trace;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled)
return -EINVAL;
if (cnt > TRACE_BUF_SIZE)
#ifdef CONFIG_DYNAMIC_FTRACE
+int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ return 0;
+}
+
static ssize_t
-tracing_read_long(struct file *filp, char __user *ubuf,
+tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
-static struct file_operations tracing_read_long_fops = {
+static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
if (!entry)
pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
- entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ entry = debugfs_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
if (!entry)
- pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+ pr_warning("Could not create debugfs 'trace_options' entry\n");
entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
pr_warning("Could not create debugfs "
"'trace_pipe' entry\n");
- entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'trace_entries' entry\n");
+ "'buffer_size_kb' entry\n");
entry = debugfs_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
return 0;
}
-int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
{
static DEFINE_SPINLOCK(trace_buf_lock);
static char trace_buf[TRACE_BUF_SIZE];
struct ring_buffer_event *event;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
- struct print_entry *entry;
- unsigned long flags, irq_flags;
int cpu, len = 0, size, pc;
+ struct print_entry *entry;
+ unsigned long irq_flags;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled || tracing_selftest_running)
return 0;
pc = preempt_count();
if (unlikely(atomic_read(&data->disabled)))
goto out;
- spin_lock_irqsave(&trace_buf_lock, flags);
+ pause_graph_tracing();
+ spin_lock_irqsave(&trace_buf_lock, irq_flags);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
len = min(len, TRACE_BUF_SIZE-1);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
+ tracing_generic_entry_update(&entry->ent, irq_flags, pc);
entry->ent.type = TRACE_PRINT;
entry->ip = ip;
+ entry->depth = depth;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
out_unlock:
- spin_unlock_irqrestore(&trace_buf_lock, flags);
-
+ spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+ unpause_graph_tracing();
out:
preempt_enable_notrace();
return 0;
va_start(ap, fmt);
- ret = trace_vprintk(ip, fmt, ap);
+ ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
va_end(ap);
return ret;
}
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
atomic_inc(&global_trace.data[cpu]->disabled);
}
+ /* don't look at user memory in panic mode */
+ trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
printk(KERN_TRACE "Dumping ftrace buffer:\n");
iter.tr = &global_trace;
#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
tracing_disabled = 0;
atomic_notifier_chain_register(&panic_notifier_list,