ldmia   %1, {r8 - r14}\n\
        msr     cpsr_c, %0      @ return to SVC mode\n\
        mov     r0, r0\n\
-       ldmea   fp, {fp, sp, pc}"
+       ldmfd   sp, {fp, sp, pc}"
        : "=&r" (tmp)
        : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
 }
        stmia   %1, {r8 - r14}\n\
        msr     cpsr_c, %0      @ return to SVC mode\n\
        mov     r0, r0\n\
-       ldmea   fp, {fp, sp, pc}"
+       ldmfd   sp, {fp, sp, pc}"
        : "=&r" (tmp)
        : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
 }
 
  */
 
                .macro  save_regs
+               mov     ip, sp
                stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
+               sub     fp, ip, #4
                .endm
 
-               .macro  load_regs,flags
-               LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+               .macro  load_regs
+               ldmfd   sp, {r1, r4 - r8, fp, sp, pc}
                .endm
 
                .macro  load1b, reg1
 
 sum    .req    r3
 
 .Lzero:                mov     r0, sum
-               load_regs       ea
+               load_regs
 
                /*
                 * Align an unaligned destination pointer.  We know that
                b       .Ldone
 
 FN_ENTRY
-               mov     ip, sp
                save_regs
-               sub     fp, ip, #4
 
                cmp     len, #8                 @ Ensure that we have at least
                blo     .Lless8                 @ 8 bytes to copy.
                ldr     sum, [sp, #0]           @ dst
                tst     sum, #1
                movne   r0, r0, ror #8
-               load_regs       ea
+               load_regs
 
 .Lsrc_not_aligned:
                adc     sum, sum, #0            @ include C from dst alignment
 
                .text
 
                .macro  save_regs
+               mov     ip, sp
                stmfd   sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}
+               sub     fp, ip, #4
                .endm
 
-               .macro  load_regs,flags
-               ldm\flags       fp, {r1, r2, r4-r8, fp, sp, pc}
+               .macro  load_regs
+               ldmfd   sp, {r1, r2, r4-r8, fp, sp, pc}
                .endm
 
                .macro  load1b, reg1
 6002:          teq     r2, r1
                strneb  r0, [r1], #1
                bne     6002b
-               load_regs       ea
+               load_regs
                .previous