gcc seems to expect that lr isn't clobbered by mcount, because for a
function starting with:
	static int func(void)
	{
		void *ra = __builtin_return_address(0);
		printk(KERN_EMERG "__builtin_return_address(0) = %pS\n", ra)
		...
the following assembler is generated by gcc 4.3.2:
	   0:   
e1a0c00d        mov     ip, sp
	   4:   
e92dd810        push    {r4, fp, ip, lr, pc}
	   8:   
e24cb004        sub     fp, ip, #4      ; 0x4
	   c:   
ebfffffe        bl      0 <mcount>
	  10:   
e59f0034        ldr     r0, [pc, #52]
	  14:   
e1a0100e        mov     r1, lr
	  18:   
ebfffffe        bl      0 <printk>
Without this patch obviously __builtin_return_address(0) yields
func+0x10 instead of the return address of the caller.
Note this patch fixes a similar issue for the routines used with dynamic
ftrace even though this isn't currently selectable for ARM.
Cc: Abhishek Sagar <sagar.abhishek@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
        .globl mcount_call
 mcount_call:
        bl ftrace_stub
+       ldr lr, [fp, #-4]                       @ restore lr
        ldmia sp!, {r0-r3, pc}
 
 ENTRY(ftrace_caller)
        .globl ftrace_call
 ftrace_call:
        bl ftrace_stub
+       ldr lr, [fp, #-4]                       @ restore lr
        ldmia sp!, {r0-r3, pc}
 
 #else
        adr r0, ftrace_stub
        cmp r0, r2
        bne trace
+       ldr lr, [fp, #-4]                       @ restore lr
        ldmia sp!, {r0-r3, pc}
 
 trace:
        sub r0, r0, #MCOUNT_INSN_SIZE
        mov lr, pc
        mov pc, r2
+       mov lr, r1                              @ restore lr
        ldmia sp!, {r0-r3, pc}
 
 #endif /* CONFIG_DYNAMIC_FTRACE */