]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'x86/core' into tracing/ftrace
authorIngo Molnar <mingo@elte.hu>
Tue, 10 Mar 2009 09:16:17 +0000 (10:16 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 10 Mar 2009 09:17:48 +0000 (10:17 +0100)
Semantic merge:

  kernel/trace/trace_functions_graph.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
arch/x86/kernel/cpu/intel.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
kernel/module.c
kernel/trace/trace_functions_graph.c

index 1a89a2b68d1539a92939e4d33747a1bcd916390c,191117f1ad51d06475be918516e109fd31bc0d71..c1c04bf0df77bfda7b1cdd1c23fb22eeb2ec334d
@@@ -4,7 -4,6 +4,7 @@@
  #include <linux/string.h>
  #include <linux/bitops.h>
  #include <linux/smp.h>
 +#include <linux/sched.h>
  #include <linux/thread_info.h>
  #include <linux/module.h>
  
@@@ -14,6 -13,7 +14,7 @@@
  #include <asm/uaccess.h>
  #include <asm/ds.h>
  #include <asm/bugs.h>
+ #include <asm/cpu.h>
  
  #ifdef CONFIG_X86_64
  #include <asm/topology.h>
@@@ -56,16 -56,11 +57,16 @@@ static void __cpuinit early_init_intel(
  
        /*
         * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
 -       * with P/T states and does not stop in deep C-states
 +       * with P/T states and does not stop in deep C-states.
 +       *
 +       * It is also reliable across cores and sockets. (but not across
 +       * cabinets - we turn it off in that case explicitly.)
         */
        if (c->x86_power & (1 << 8)) {
                set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
                set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
 +              set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
 +              sched_clock_stable = 1;
        }
  
        /*
@@@ -116,6 -111,28 +117,28 @@@ static void __cpuinit trap_init_f00f_bu
  }
  #endif
  
+ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+ {
+ #ifdef CONFIG_SMP
+       /* calling is from identify_secondary_cpu() ? */
+       if (c->cpu_index == boot_cpu_id)
+               return;
+       /*
+        * Mask B, Pentium, but not Pentium MMX
+        */
+       if (c->x86 == 5 &&
+           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_model <= 3) {
+               /*
+                * Remember we have B step Pentia with bugs
+                */
+               WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
+                                   "with B stepping processors.\n");
+       }
+ #endif
+ }
  static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
  {
        unsigned long lo, hi;
  #ifdef CONFIG_X86_NUMAQ
        numaq_tsc_disable();
  #endif
+       intel_smp_check(c);
  }
  #else
  static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --combined arch/x86/mm/init_32.c
index d7f5060ab21c44f47bd46abe8583f21e7b28ae65,db81e9a8556b3b0bba854aeba5740bd0cc039039..749559ed80f5d99e1771826155ac8b27e1f2a3f2
@@@ -806,11 -806,6 +806,6 @@@ static unsigned long __init setup_node_
  {
        unsigned long bootmap_size;
  
-       if (start_pfn > max_low_pfn)
-               return bootmap;
-       if (end_pfn > max_low_pfn)
-               end_pfn = max_low_pfn;
        /* don't touch min_low_pfn */
        bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
                                         bootmap >> PAGE_SHIFT,
@@@ -843,13 -838,23 +838,23 @@@ void __init setup_bootmem_allocator(voi
                 max_pfn_mapped<<PAGE_SHIFT);
        printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
  
+       for_each_online_node(nodeid) {
+                unsigned long start_pfn, end_pfn;
  #ifdef CONFIG_NEED_MULTIPLE_NODES
-       for_each_online_node(nodeid)
-               bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid],
-                                       node_end_pfn[nodeid], bootmap);
+               start_pfn = node_start_pfn[nodeid];
+               end_pfn = node_end_pfn[nodeid];
+               if (start_pfn > max_low_pfn)
+                       continue;
+               if (end_pfn > max_low_pfn)
+                       end_pfn = max_low_pfn;
  #else
-       bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap);
+               start_pfn = 0;
+               end_pfn = max_low_pfn;
  #endif
+               bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
+                                                bootmap);
+       }
  
        after_bootmem = 1;
  }
@@@ -1049,47 -1054,17 +1054,47 @@@ static noinline int do_test_wp_bit(void
  const int rodata_test_data = 0xC3;
  EXPORT_SYMBOL_GPL(rodata_test_data);
  
 +static int kernel_set_to_readonly;
 +
 +void set_kernel_text_rw(void)
 +{
 +      unsigned long start = PFN_ALIGN(_text);
 +      unsigned long size = PFN_ALIGN(_etext) - start;
 +
 +      if (!kernel_set_to_readonly)
 +              return;
 +
 +      pr_debug("Set kernel text: %lx - %lx for read write\n",
 +               start, start+size);
 +
 +      set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 +}
 +
 +void set_kernel_text_ro(void)
 +{
 +      unsigned long start = PFN_ALIGN(_text);
 +      unsigned long size = PFN_ALIGN(_etext) - start;
 +
 +      if (!kernel_set_to_readonly)
 +              return;
 +
 +      pr_debug("Set kernel text: %lx - %lx for read only\n",
 +               start, start+size);
 +
 +      set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 +}
 +
  void mark_rodata_ro(void)
  {
        unsigned long start = PFN_ALIGN(_text);
        unsigned long size = PFN_ALIGN(_etext) - start;
  
 -#ifndef CONFIG_DYNAMIC_FTRACE
 -      /* Dynamic tracing modifies the kernel text section */
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
        printk(KERN_INFO "Write protecting the kernel text: %luk\n",
                size >> 10);
  
 +      kernel_set_to_readonly = 1;
 +
  #ifdef CONFIG_CPA_DEBUG
        printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
                start, start+size);
        printk(KERN_INFO "Testing CPA: write protecting again\n");
        set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
  #endif
 -#endif /* CONFIG_DYNAMIC_FTRACE */
  
        start += size;
        size = (unsigned long)__end_rodata - start;
diff --combined arch/x86/mm/init_64.c
index 66d6be85df82d7e31820e57423b085bce4e355c5,54efa57d1c039b648d6eb77bef1878c10e9cdb0b..1753e8020df6ec8aa3eefea7342386224e595f2c
@@@ -85,7 -85,7 +85,7 @@@ early_param("gbpages", parse_direct_gbp
  pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
  EXPORT_SYMBOL_GPL(__supported_pte_mask);
  
- static int do_not_nx __cpuinitdata;
+ static int disable_nx __cpuinitdata;
  
  /*
   * noexec=on|off
@@@ -100,9 -100,9 +100,9 @@@ static int __init nonx_setup(char *str
                return -EINVAL;
        if (!strncmp(str, "on", 2)) {
                __supported_pte_mask |= _PAGE_NX;
-               do_not_nx = 0;
+               disable_nx = 0;
        } else if (!strncmp(str, "off", 3)) {
-               do_not_nx = 1;
+               disable_nx = 1;
                __supported_pte_mask &= ~_PAGE_NX;
        }
        return 0;
@@@ -114,7 -114,7 +114,7 @@@ void __cpuinit check_efer(void
        unsigned long efer;
  
        rdmsrl(MSR_EFER, efer);
-       if (!(efer & EFER_NX) || do_not_nx)
+       if (!(efer & EFER_NX) || disable_nx)
                __supported_pte_mask &= ~_PAGE_NX;
  }
  
@@@ -734,48 -734,21 +734,48 @@@ void __init mem_init(void
  const int rodata_test_data = 0xC3;
  EXPORT_SYMBOL_GPL(rodata_test_data);
  
 +static int kernel_set_to_readonly;
 +
 +void set_kernel_text_rw(void)
 +{
 +      unsigned long start = PFN_ALIGN(_stext);
 +      unsigned long end = PFN_ALIGN(__start_rodata);
 +
 +      if (!kernel_set_to_readonly)
 +              return;
 +
 +      pr_debug("Set kernel text: %lx - %lx for read write\n",
 +               start, end);
 +
 +      set_memory_rw(start, (end - start) >> PAGE_SHIFT);
 +}
 +
 +void set_kernel_text_ro(void)
 +{
 +      unsigned long start = PFN_ALIGN(_stext);
 +      unsigned long end = PFN_ALIGN(__start_rodata);
 +
 +      if (!kernel_set_to_readonly)
 +              return;
 +
 +      pr_debug("Set kernel text: %lx - %lx for read only\n",
 +               start, end);
 +
 +      set_memory_ro(start, (end - start) >> PAGE_SHIFT);
 +}
 +
  void mark_rodata_ro(void)
  {
        unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
        unsigned long rodata_start =
                ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
  
 -#ifdef CONFIG_DYNAMIC_FTRACE
 -      /* Dynamic tracing modifies the kernel text section */
 -      start = rodata_start;
 -#endif
 -
        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
               (end - start) >> 10);
        set_memory_ro(start, (end - start) >> PAGE_SHIFT);
  
 +      kernel_set_to_readonly = 1;
 +
        /*
         * The rodata section (but not the kernel text!) should also be
         * not-executable.
diff --combined kernel/module.c
index 90a6d63d92119615218578a9a01ccf3775c35ae2,f0e04d6b67d8cb325526dce1fdb456265ba3f926..8b742f2b3845b5d765a25014ac8361413938b40b
@@@ -381,7 -381,7 +381,7 @@@ static void *percpu_modalloc(unsigned l
                align = PAGE_SIZE;
        }
  
-       ptr = __alloc_percpu(size, align);
+       ptr = __alloc_reserved_percpu(size, align);
        if (!ptr)
                printk(KERN_WARNING
                       "Could not allocate %lu bytes percpu data\n", size);
@@@ -2769,7 -2769,7 +2769,7 @@@ int is_module_address(unsigned long add
  
  
  /* Is this a valid kernel address? */
 -__notrace_funcgraph struct module *__module_text_address(unsigned long addr)
 +struct module *__module_text_address(unsigned long addr)
  {
        struct module *mod;
  
index 453ebd3b636edb912935655b239e97607c8991e6,930c08e5b38e0d35ab93b79896b0150907d31e50..35257be6a9d6fb2a3352995d78a0e95abee65e94
@@@ -1,7 -1,7 +1,7 @@@
  /*
   *
   * Function graph tracer.
 - * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
 + * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
   * Mostly borrowed from function tracer which
   * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
   *
@@@ -12,7 -12,6 +12,7 @@@
  #include <linux/fs.h>
  
  #include "trace.h"
 +#include "trace_output.h"
  
  #define TRACE_GRAPH_INDENT    2
  
  #define TRACE_GRAPH_PRINT_CPU         0x2
  #define TRACE_GRAPH_PRINT_OVERHEAD    0x4
  #define TRACE_GRAPH_PRINT_PROC                0x8
 +#define TRACE_GRAPH_PRINT_DURATION    0x10
 +#define TRACE_GRAPH_PRINT_ABS_TIME    0X20
  
  static struct tracer_opt trace_opts[] = {
 -      /* Display overruns ? */
 +      /* Display overruns? (for self-debug purpose) */
        { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
        /* Display CPU ? */
        { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
        { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
        /* Display proc name/pid */
        { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
 +      /* Display duration of execution */
 +      { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
 +      /* Display absolute time of an entry */
 +      { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
        { } /* Empty entry */
  };
  
  static struct tracer_flags tracer_flags = {
        /* Don't display overruns and proc by default */
 -      .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
 +      .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
 +             TRACE_GRAPH_PRINT_DURATION,
        .opts = trace_opts
  };
  
  /* pid on the last trace processed */
 -static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
  
 -static int graph_trace_init(struct trace_array *tr)
 +
 +/* Add a function return address to the trace stack on thread info.*/
 +int
 +ftrace_push_return_trace(unsigned long ret, unsigned long long time,
 +                       unsigned long func, int *depth)
 +{
 +      int index;
 +
 +      if (!current->ret_stack)
 +              return -EBUSY;
 +
 +      /* The return trace stack is full */
 +      if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
 +              atomic_inc(&current->trace_overrun);
 +              return -EBUSY;
 +      }
 +
 +      index = ++current->curr_ret_stack;
 +      barrier();
 +      current->ret_stack[index].ret = ret;
 +      current->ret_stack[index].func = func;
 +      current->ret_stack[index].calltime = time;
 +      *depth = index;
 +
 +      return 0;
 +}
 +
 +/* Retrieve a function return address to the trace stack on thread info.*/
 +void
 +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 +{
 +      int index;
 +
 +      index = current->curr_ret_stack;
 +
 +      if (unlikely(index < 0)) {
 +              ftrace_graph_stop();
 +              WARN_ON(1);
 +              /* Might as well panic, otherwise we have no where to go */
 +              *ret = (unsigned long)panic;
 +              return;
 +      }
 +
 +      *ret = current->ret_stack[index].ret;
 +      trace->func = current->ret_stack[index].func;
 +      trace->calltime = current->ret_stack[index].calltime;
 +      trace->overrun = atomic_read(&current->trace_overrun);
 +      trace->depth = index;
 +      barrier();
 +      current->curr_ret_stack--;
 +
 +}
 +
 +/*
 + * Send the trace to the ring-buffer.
 + * @return the original return address.
 + */
 +unsigned long ftrace_return_to_handler(void)
  {
 -      int cpu, ret;
 +      struct ftrace_graph_ret trace;
 +      unsigned long ret;
 +
 +      ftrace_pop_return_trace(&trace, &ret);
 +      trace.rettime = trace_clock_local();
 +      ftrace_graph_return(&trace);
 +
 +      if (unlikely(!ret)) {
 +              ftrace_graph_stop();
 +              WARN_ON(1);
 +              /* Might as well panic. What else to do? */
 +              ret = (unsigned long)panic;
 +      }
  
 -      for_each_online_cpu(cpu)
 -              tracing_reset(tr, cpu);
 +      return ret;
 +}
  
 -      ret = register_ftrace_graph(&trace_graph_return,
 +static int graph_trace_init(struct trace_array *tr)
 +{
 +      int ret = register_ftrace_graph(&trace_graph_return,
                                        &trace_graph_entry);
        if (ret)
                return ret;
@@@ -231,25 -153,17 +231,25 @@@ print_graph_proc(struct trace_seq *s, p
  
  /* If the pid changed since the last trace, output this event */
  static enum print_line_t
 -verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 +verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
  {
        pid_t prev_pid;
 +      pid_t *last_pid;
        int ret;
  
 -      if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
 +      if (!last_pids_cpu)
 +              return TRACE_TYPE_HANDLED;
 +
 +      last_pid = per_cpu_ptr(last_pids_cpu, cpu);
 +
 +      if (*last_pid == pid)
                return TRACE_TYPE_HANDLED;
  
 -      prev_pid = last_pid[cpu];
 -      last_pid[cpu] = pid;
 +      prev_pid = *last_pid;
 +      *last_pid = pid;
  
 +      if (prev_pid == -1)
 +              return TRACE_TYPE_HANDLED;
  /*
   * Context-switch trace line:
  
        ret = trace_seq_printf(s,
                " ------------------------------------------\n");
        if (!ret)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        ret = print_graph_cpu(s, cpu);
        if (ret == TRACE_TYPE_PARTIAL_LINE)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        ret = print_graph_proc(s, prev_pid);
        if (ret == TRACE_TYPE_PARTIAL_LINE)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        ret = trace_seq_printf(s, " => ");
        if (!ret)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        ret = print_graph_proc(s, pid);
        if (ret == TRACE_TYPE_PARTIAL_LINE)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        ret = trace_seq_printf(s,
                "\n ------------------------------------------\n\n");
        if (!ret)
 -              TRACE_TYPE_PARTIAL_LINE;
 +              return TRACE_TYPE_PARTIAL_LINE;
  
 -      return ret;
 +      return TRACE_TYPE_HANDLED;
  }
  
 -static bool
 -trace_branch_is_leaf(struct trace_iterator *iter,
 +static struct ftrace_graph_ret_entry *
 +get_return_for_leaf(struct trace_iterator *iter,
                struct ftrace_graph_ent_entry *curr)
  {
        struct ring_buffer_iter *ring_iter;
  
        ring_iter = iter->buffer_iter[iter->cpu];
  
 -      if (!ring_iter)
 -              return false;
 -
 -      event = ring_buffer_iter_peek(ring_iter, NULL);
 +      /* First peek to compare current entry and the next one */
 +      if (ring_iter)
 +              event = ring_buffer_iter_peek(ring_iter, NULL);
 +      else {
 +      /* We need to consume the current entry to see the next one */
 +              ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
 +              event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
 +                                      NULL);
 +      }
  
        if (!event)
 -              return false;
 +              return NULL;
  
        next = ring_buffer_event_data(event);
  
        if (next->ent.type != TRACE_GRAPH_RET)
 -              return false;
 +              return NULL;
  
        if (curr->ent.pid != next->ent.pid ||
                        curr->graph_ent.func != next->ret.func)
 -              return false;
 +              return NULL;
 +
 +      /* this is a leaf, now advance the iterator */
 +      if (ring_iter)
 +              ring_buffer_read(ring_iter, NULL);
  
 -      return true;
 +      return next;
 +}
 +
 +/* Signal a overhead of time execution to the output */
 +static int
 +print_graph_overhead(unsigned long long duration, struct trace_seq *s)
 +{
 +      /* If duration disappear, we don't need anything */
 +      if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
 +              return 1;
 +
 +      /* Non nested entry or return */
 +      if (duration == -1)
 +              return trace_seq_printf(s, "  ");
 +
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 +              /* Duration exceeded 100 msecs */
 +              if (duration > 100000ULL)
 +                      return trace_seq_printf(s, "! ");
 +
 +              /* Duration exceeded 10 msecs */
 +              if (duration > 10000ULL)
 +                      return trace_seq_printf(s, "+ ");
 +      }
 +
 +      return trace_seq_printf(s, "  ");
 +}
 +
 +static int print_graph_abs_time(u64 t, struct trace_seq *s)
 +{
 +      unsigned long usecs_rem;
 +
 +      usecs_rem = do_div(t, NSEC_PER_SEC);
 +      usecs_rem /= 1000;
 +
 +      return trace_seq_printf(s, "%5lu.%06lu |  ",
 +                      (unsigned long)t, usecs_rem);
  }
  
  static enum print_line_t
 -print_graph_irq(struct trace_seq *s, unsigned long addr,
 -                              enum trace_type type, int cpu, pid_t pid)
 +print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 +              enum trace_type type, int cpu, pid_t pid)
  {
        int ret;
 +      struct trace_seq *s = &iter->seq;
  
        if (addr < (unsigned long)__irqentry_text_start ||
                addr >= (unsigned long)__irqentry_text_end)
                return TRACE_TYPE_UNHANDLED;
  
 -      if (type == TRACE_GRAPH_ENT) {
 -              ret = trace_seq_printf(s, "==========> |  ");
 -      } else {
 -              /* Cpu */
 -              if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
 -                      ret = print_graph_cpu(s, cpu);
 -                      if (ret == TRACE_TYPE_PARTIAL_LINE)
 -                              return TRACE_TYPE_PARTIAL_LINE;
 -              }
 -              /* Proc */
 -              if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
 -                      ret = print_graph_proc(s, pid);
 -                      if (ret == TRACE_TYPE_PARTIAL_LINE)
 -                              return TRACE_TYPE_PARTIAL_LINE;
 +      /* Absolute time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
 +              ret = print_graph_abs_time(iter->ts, s);
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
  
 -                      ret = trace_seq_printf(s, " | ");
 -                      if (!ret)
 -                              return TRACE_TYPE_PARTIAL_LINE;
 -              }
 +      /* Cpu */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
 +              ret = print_graph_cpu(s, cpu);
 +              if (ret == TRACE_TYPE_PARTIAL_LINE)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
 +      /* Proc */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
 +              ret = print_graph_proc(s, pid);
 +              if (ret == TRACE_TYPE_PARTIAL_LINE)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +              ret = trace_seq_printf(s, " | ");
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
  
 -              /* No overhead */
 -              if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -                      ret = trace_seq_printf(s, "  ");
 -                      if (!ret)
 -                              return TRACE_TYPE_PARTIAL_LINE;
 -              }
 +      /* No overhead */
 +      ret = print_graph_overhead(-1, s);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      if (type == TRACE_GRAPH_ENT)
 +              ret = trace_seq_printf(s, "==========>");
 +      else
 +              ret = trace_seq_printf(s, "<==========");
 +
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      /* Don't close the duration column if haven't one */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 +              trace_seq_printf(s, " |");
 +      ret = trace_seq_printf(s, "\n");
  
 -              ret = trace_seq_printf(s, "<========== |\n");
 -      }
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
        return TRACE_TYPE_HANDLED;
@@@ -432,7 -288,7 +432,7 @@@ print_graph_duration(unsigned long lon
        sprintf(msecs_str, "%lu", (unsigned long) duration);
  
        /* Print msecs */
 -      ret = trace_seq_printf(s, msecs_str);
 +      ret = trace_seq_printf(s, "%s", msecs_str);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
  
  
  }
  
 -/* Signal a overhead of time execution to the output */
 -static int
 -print_graph_overhead(unsigned long long duration, struct trace_seq *s)
 -{
 -      /* Duration exceeded 100 msecs */
 -      if (duration > 100000ULL)
 -              return trace_seq_printf(s, "! ");
 -
 -      /* Duration exceeded 10 msecs */
 -      if (duration > 10000ULL)
 -              return trace_seq_printf(s, "+ ");
 -
 -      return trace_seq_printf(s, "  ");
 -}
 -
  /* Case of a leaf function on its call entry */
  static enum print_line_t
  print_graph_entry_leaf(struct trace_iterator *iter,
 -              struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
 +              struct ftrace_graph_ent_entry *entry,
 +              struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
  {
 -      struct ftrace_graph_ret_entry *ret_entry;
        struct ftrace_graph_ret *graph_ret;
 -      struct ring_buffer_event *event;
        struct ftrace_graph_ent *call;
        unsigned long long duration;
        int ret;
        int i;
  
 -      event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 -      ret_entry = ring_buffer_event_data(event);
        graph_ret = &ret_entry->ret;
        call = &entry->graph_ent;
        duration = graph_ret->rettime - graph_ret->calltime;
  
        /* Overhead */
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -              ret = print_graph_overhead(duration, s);
 -              if (!ret)
 -                      return TRACE_TYPE_PARTIAL_LINE;
 -      }
 +      ret = print_graph_overhead(duration, s);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        /* Duration */
 -      ret = print_graph_duration(duration, s);
 -      if (ret == TRACE_TYPE_PARTIAL_LINE)
 -              return TRACE_TYPE_PARTIAL_LINE;
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
 +              ret = print_graph_duration(duration, s);
 +              if (ret == TRACE_TYPE_PARTIAL_LINE)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
  
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@@ -520,17 -394,25 +520,17 @@@ print_graph_entry_nested(struct ftrace_
        struct ftrace_graph_ent *call = &entry->graph_ent;
  
        /* No overhead */
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -              ret = trace_seq_printf(s, "  ");
 -              if (!ret)
 -                      return TRACE_TYPE_PARTIAL_LINE;
 -      }
 +      ret = print_graph_overhead(-1, s);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
  
 -      /* Interrupt */
 -      ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
 -      if (ret == TRACE_TYPE_UNHANDLED) {
 -              /* No time */
 +      /* No time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
                ret = trace_seq_printf(s, "            |  ");
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
 -      } else {
 -              if (ret == TRACE_TYPE_PARTIAL_LINE)
 -                      return TRACE_TYPE_PARTIAL_LINE;
        }
  
 -
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
                ret = trace_seq_printf(s, " ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
  
 -      return TRACE_TYPE_HANDLED;
 +      /*
 +       * we already consumed the current entry to check the next one
 +       * and see if this is a leaf.
 +       */
 +      return TRACE_TYPE_NO_CONSUME;
  }
  
  static enum print_line_t
  print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 -                      struct trace_iterator *iter, int cpu)
 +                      struct trace_iterator *iter)
  {
        int ret;
 +      int cpu = iter->cpu;
 +      pid_t *last_entry = iter->private;
        struct trace_entry *ent = iter->ent;
 +      struct ftrace_graph_ent *call = &field->graph_ent;
 +      struct ftrace_graph_ret_entry *leaf_ret;
  
        /* Pid */
 -      if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
 +      if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      /* Interrupt */
 +      ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
 +      if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
  
 +      /* Absolute time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
 +              ret = print_graph_abs_time(iter->ts, s);
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
 +
        /* Cpu */
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
                ret = print_graph_cpu(s, cpu);
                        return TRACE_TYPE_PARTIAL_LINE;
        }
  
 -      if (trace_branch_is_leaf(iter, field))
 -              return print_graph_entry_leaf(iter, field, s);
 +      leaf_ret = get_return_for_leaf(iter, field);
 +      if (leaf_ret)
 +              return print_graph_entry_leaf(iter, field, leaf_ret, s);
        else
                return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
  
  
  static enum print_line_t
  print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 -                 struct trace_entry *ent, int cpu)
 +                 struct trace_entry *ent, struct trace_iterator *iter)
  {
        int i;
        int ret;
 +      int cpu = iter->cpu;
 +      pid_t *last_pid = iter->private, pid = ent->pid;
        unsigned long long duration = trace->rettime - trace->calltime;
  
        /* Pid */
 -      if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
 +      if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
  
 +      /* Absolute time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
 +              ret = print_graph_abs_time(iter->ts, s);
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
 +
        /* Cpu */
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
                ret = print_graph_cpu(s, cpu);
        }
  
        /* Overhead */
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -              ret = print_graph_overhead(duration, s);
 -              if (!ret)
 -                      return TRACE_TYPE_PARTIAL_LINE;
 -      }
 +      ret = print_graph_overhead(duration, s);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
  
        /* Duration */
 -      ret = print_graph_duration(duration, s);
 -      if (ret == TRACE_TYPE_PARTIAL_LINE)
 -              return TRACE_TYPE_PARTIAL_LINE;
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
 +              ret = print_graph_duration(duration, s);
 +              if (ret == TRACE_TYPE_PARTIAL_LINE)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
  
        /* Closing brace */
        for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
                        return TRACE_TYPE_PARTIAL_LINE;
        }
  
 -      ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
 +      ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid);
        if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
  
@@@ -689,23 -541,14 +689,23 @@@ print_graph_comment(struct print_entry 
  {
        int i;
        int ret;
 +      int cpu = iter->cpu;
 +      pid_t *last_pid = iter->private;
  
        /* Pid */
 -      if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
 +      if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
  
 +      /* Absolute time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
 +              ret = print_graph_abs_time(iter->ts, s);
 +              if (!ret)
 +                      return TRACE_TYPE_PARTIAL_LINE;
 +      }
 +
        /* Cpu */
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
 -              ret = print_graph_cpu(s, iter->cpu);
 +              ret = print_graph_cpu(s, cpu);
                if (ret == TRACE_TYPE_PARTIAL_LINE)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
        }
  
        /* No overhead */
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -              ret = trace_seq_printf(s, "  ");
 +      ret = print_graph_overhead(-1, s);
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      /* No time */
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
 +              ret = trace_seq_printf(s, "            |  ");
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
  
 -      /* No time */
 -      ret = trace_seq_printf(s, "            |  ");
 -      if (!ret)
 -              return TRACE_TYPE_PARTIAL_LINE;
 -
        /* Indentation */
        if (trace->depth > 0)
                for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
                }
  
        /* The comment */
 -      ret = trace_seq_printf(s, "/* %s", trace->buf);
 +      ret = trace_seq_printf(s, "/* ");
 +      if (!ret)
 +              return TRACE_TYPE_PARTIAL_LINE;
 +
 +      ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
  
 -      if (ent->flags & TRACE_FLAG_CONT)
 -              trace_seq_print_cont(s, iter);
 +      /* Strip ending newline */
 +      if (s->buffer[s->len - 1] == '\n') {
 +              s->buffer[s->len - 1] = '\0';
 +              s->len--;
 +      }
  
        ret = trace_seq_printf(s, " */\n");
        if (!ret)
@@@ -774,12 -610,13 +774,12 @@@ print_graph_function(struct trace_itera
        case TRACE_GRAPH_ENT: {
                struct ftrace_graph_ent_entry *field;
                trace_assign_type(field, entry);
 -              return print_graph_entry(field, s, iter,
 -                                       iter->cpu);
 +              return print_graph_entry(field, s, iter);
        }
        case TRACE_GRAPH_RET: {
                struct ftrace_graph_ret_entry *field;
                trace_assign_type(field, entry);
 -              return print_graph_return(&field->ret, s, entry, iter->cpu);
 +              return print_graph_return(&field->ret, s, entry, iter);
        }
        case TRACE_PRINT: {
                struct print_entry *field;
@@@ -795,64 -632,33 +795,64 @@@ static void print_graph_headers(struct 
  {
        /* 1st line */
        seq_printf(s, "# ");
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
 +              seq_printf(s, "     TIME       ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
 -              seq_printf(s, "CPU ");
 +              seq_printf(s, "CPU");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 -              seq_printf(s, "TASK/PID     ");
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
 -              seq_printf(s, "OVERHEAD/");
 -      seq_printf(s, "DURATION            FUNCTION CALLS\n");
 +              seq_printf(s, "  TASK/PID      ");
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 +              seq_printf(s, "  DURATION   ");
 +      seq_printf(s, "               FUNCTION CALLS\n");
  
        /* 2nd line */
        seq_printf(s, "# ");
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
 +              seq_printf(s, "      |         ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
 -              seq_printf(s, "|   ");
 +              seq_printf(s, "|  ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 -              seq_printf(s, "|      |     ");
 -      if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
 -              seq_printf(s, "|        ");
 -              seq_printf(s, "|                   |   |   |   |\n");
 -      } else
 -              seq_printf(s, "    |               |   |   |   |\n");
 +              seq_printf(s, "  |    |        ");
 +      if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 +              seq_printf(s, "   |   |      ");
 +      seq_printf(s, "               |   |   |   |\n");
  }
-       percpu_free(iter->private);
 +
 +static void graph_trace_open(struct trace_iterator *iter)
 +{
 +      /* pid on the last trace processed */
 +      pid_t *last_pid = alloc_percpu(pid_t);
 +      int cpu;
 +
 +      if (!last_pid)
 +              pr_warning("function graph tracer: not enough memory\n");
 +      else
 +              for_each_possible_cpu(cpu) {
 +                      pid_t *pid = per_cpu_ptr(last_pid, cpu);
 +                      *pid = -1;
 +              }
 +
 +      iter->private = last_pid;
 +}
 +
 +static void graph_trace_close(struct trace_iterator *iter)
 +{
++      free_percpu(iter->private);
 +}
 +
  static struct tracer graph_trace __read_mostly = {
        .name           = "function_graph",
 +      .open           = graph_trace_open,
 +      .close          = graph_trace_close,
 +      .wait_pipe      = poll_wait_pipe,
        .init           = graph_trace_init,
        .reset          = graph_trace_reset,
        .print_line     = print_graph_function,
        .print_header   = print_graph_headers,
        .flags          = &tracer_flags,
 +#ifdef CONFIG_FTRACE_SELFTEST
 +      .selftest       = trace_selftest_startup_function_graph,
 +#endif
  };
  
  static __init int init_graph_trace(void)