__unlazy_fpu(prev_p);
 
+
+       /* we're going to use this soon, after a few expensive things */
+       if (next_p->fpu_counter > 5)
+               prefetch(&next->i387.fxsave);
+
        /*
         * Reload esp0.
         */
 
        disable_tsc(prev_p, next_p);
 
+       /* If the task has used fpu the last 5 timeslices, just do a full
+        * restore of the math state immediately to avoid the trap; the
+        * chances of needing FPU soon are obviously high now
+        */
+       if (next_p->fpu_counter > 5)
+               math_state_restore();
+
        return prev_p;
 }
 
 
  * Must be called with kernel preemption disabled (in this case,
  * local interrupts are disabled at the call-site in entry.S).
  */
-asmlinkage void math_state_restore(struct pt_regs regs)
+asmlinkage void math_state_restore(void)
 {
        struct thread_info *thread = current_thread_info();
        struct task_struct *tsk = thread->task;
                init_fpu(tsk);
        restore_fpu(tsk);
        thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
+       tsk->fpu_counter++;
 }
 
 #ifndef CONFIG_MATH_EMULATION
 
 
 #define __unlazy_fpu( tsk ) do { \
        if (task_thread_info(tsk)->status & TS_USEDFPU) \
-               save_init_fpu( tsk ); \
+               save_init_fpu( tsk );                   \
+       else                                            \
+               tsk->fpu_counter = 0;                   \
 } while (0)
 
 #define __clear_fpu( tsk )                                     \
 extern unsigned short get_fpu_cwd( struct task_struct *tsk );
 extern unsigned short get_fpu_swd( struct task_struct *tsk );
 extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+extern asmlinkage void math_state_restore(void);
 
 /*
  * Signal frame handlers...