Merge branch 'linus' into x86/apic

author Ingo Molnar <mingo@elte.hu>

Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)

committer Ingo Molnar <mingo@elte.hu>

Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)
author Ingo Molnar <mingo@elte.hu>
Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)
committer Ingo Molnar <mingo@elte.hu>
Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)
diff --combined arch/arm/kernel/irq.c

index 4bb723eadad13c73dc5da7ba61ef2e2367fbc9ba,363db186cb93334791588db5b2158e6f295c5c54..45eacb5a2ecd80fb7a30dc56d6f3386f1c842f19
--- 1/arch/arm/kernel/irq.c
--- 2/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@@ -101,14 -101,9 +101,14 @@@ unlock
   /* Handle bad interrupts */
   static struct irq_desc bad_irq_desc = {
         .handle_irq = handle_bad_irq,
-       .lock = SPIN_LOCK_UNLOCKED
+       .lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
   };
   
+ +#ifdef CONFIG_CPUMASK_OFFSTACK
+ +/* We are not allocating bad_irq_desc.affinity or .pending_mask */
+ +#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK."
+ +#endif
+ +
   /*
    * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
    * come via this function.  Instead, they should provide their
@@@ -166,7 -161,7 +166,7 @@@ void __init init_IRQ(void
                 irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE;
   
   #ifdef CONFIG_SMP
- -      bad_irq_desc.affinity = CPU_MASK_ALL;
+ +      cpumask_setall(bad_irq_desc.affinity);
         bad_irq_desc.cpu = smp_processor_id();
   #endif
         init_arch_irq();
@@@ -196,16 -191,15 +196,16 @@@ void migrate_irqs(void
                 struct irq_desc *desc = irq_desc + i;
   
                 if (desc->cpu == cpu) {
- -                      unsigned int newcpu = any_online_cpu(desc->affinity);
- -
- -                      if (newcpu == NR_CPUS) {
+ +                      unsigned int newcpu = cpumask_any_and(desc->affinity,
+ +                                                            cpu_online_mask);
+ +                      if (newcpu >= nr_cpu_ids) {
                                 if (printk_ratelimit())
                                         printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
                                                i, cpu);
   
- -                              cpus_setall(desc->affinity);
- -                              newcpu = any_online_cpu(desc->affinity);
+ +                              cpumask_setall(desc->affinity);
+ +                              newcpu = cpumask_any_and(desc->affinity,
+ +                                                       cpu_online_mask);
                         }
   
                         route_irq(desc, i, newcpu);
diff --combined arch/blackfin/kernel/irqchip.c

index 5780d6df154250112a128d19ffd0bbb06298064a,75724eee6494c65c87545e14682f8cfe213877dd..23e9aa080710f095e3389b74892c2b6933dbdbda
--- 1/arch/blackfin/kernel/irqchip.c
--- 2/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@@ -35,6 -35,7 +35,7 @@@
   #include <linux/interrupt.h>
   #include <linux/irq.h>
   #include <asm/trace.h>
+ #include <asm/pda.h>
   
   static atomic_t irq_err_count;
   static spinlock_t irq_controller_lock;
@@@ -69,11 -70,6 +70,11 @@@ static struct irq_desc bad_irq_desc = 
   #endif
   };
   
+ +#ifdef CONFIG_CPUMASK_OFFSTACK
+ +/* We are not allocating a variable-sized bad_irq_desc.affinity */
+ +#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK."
+ +#endif
+ +
   int show_interrupts(struct seq_file *p, void *v)
   {
         int i = *(loff_t *) v, j;
@@@ -96,8 -92,13 +97,13 @@@
                 seq_putc(p, '\n');
    skip:
                 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-       } else if (i == NR_IRQS)
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
+               seq_printf(p, "     CORE  Non Maskable Interrupt\n");
                 seq_printf(p, "Err: %10u\n",  atomic_read(&irq_err_count));
+       }
         return 0;
   }
   
diff --combined arch/sparc/kernel/irq_64.c

index 4ac5c651e00dc832cbc22ee68081795572fff369,e289376198eb2be4e722b5cbf1a31d440c580631..3d2c6baae96bf05b251d188ce1739150af3065fd
--- 1/arch/sparc/kernel/irq_64.c
--- 2/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -196,6 -196,11 +196,11 @@@ int show_interrupts(struct seq_file *p
                 seq_putc(p, '\n');
   skip:
                 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", cpu_data(j).__nmi_count);
+               seq_printf(p, "     Non-maskable interrupts\n");
         }
         return 0;
   }
@@@ -247,10 -252,9 +252,10 @@@ struct irq_handler_data 
   #ifdef CONFIG_SMP
   static int irq_choose_cpu(unsigned int virt_irq)
   {
- -      cpumask_t mask = irq_desc[virt_irq].affinity;
+ +      cpumask_t mask;
         int cpuid;
   
+ +      cpumask_copy(&mask, irq_desc[virt_irq].affinity);
         if (cpus_equal(mask, CPU_MASK_ALL)) {
                 static int irq_rover;
                 static DEFINE_SPINLOCK(irq_rover_lock);
@@@ -779,69 -783,6 +784,6 @@@ void do_softirq(void
         local_irq_restore(flags);
   }
   
- static void unhandled_perf_irq(struct pt_regs *regs)
- {
-       unsigned long pcr, pic;
- 
-       read_pcr(pcr);
-       read_pic(pic);
- 
-       write_pcr(0);
- 
-       printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
-              smp_processor_id());
-       printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
-              smp_processor_id(), pcr, pic);
- }
- 
- /* Almost a direct copy of the powerpc PMC code.  */
- static DEFINE_SPINLOCK(perf_irq_lock);
- static void *perf_irq_owner_caller; /* mostly for debugging */
- static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
- 
- /* Invoked from level 15 PIL handler in trap table.  */
- void perfctr_irq(int irq, struct pt_regs *regs)
- {
-       clear_softint(1 << irq);
-       perf_irq(regs);
- }
- 
- int register_perfctr_intr(void (*handler)(struct pt_regs *))
- {
-       int ret;
- 
-       if (!handler)
-               return -EINVAL;
- 
-       spin_lock(&perf_irq_lock);
-       if (perf_irq != unhandled_perf_irq) {
-               printk(KERN_WARNING "register_perfctr_intr: "
-                      "perf IRQ busy (reserved by caller %p)\n",
-                      perf_irq_owner_caller);
-               ret = -EBUSY;
-               goto out;
-       }
- 
-       perf_irq_owner_caller = __builtin_return_address(0);
-       perf_irq = handler;
- 
-       ret = 0;
- out:
-       spin_unlock(&perf_irq_lock);
- 
-       return ret;
- }
- EXPORT_SYMBOL_GPL(register_perfctr_intr);
- 
- void release_perfctr_intr(void (*handler)(struct pt_regs *))
- {
-       spin_lock(&perf_irq_lock);
-       perf_irq_owner_caller = NULL;
-       perf_irq = unhandled_perf_irq;
-       spin_unlock(&perf_irq_lock);
- }
- EXPORT_SYMBOL_GPL(release_perfctr_intr);
- 
   #ifdef CONFIG_HOTPLUG_CPU
   void fixup_irqs(void)
   {
@@@ -855,7 -796,7 +797,7 @@@
                     !(irq_desc[irq].status & IRQ_PER_CPU)) {
                         if (irq_desc[irq].chip->set_affinity)
                                 irq_desc[irq].chip->set_affinity(irq,
- -                                      &irq_desc[irq].affinity);
+ +                                      irq_desc[irq].affinity);
                 }
                 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
         }
diff --combined arch/x86/ia32/ia32entry.S

index 9c79b247700801e6467b5629e7ee757f09a44b7e,5a0d76dc56a46431690702be8f745d6813db4cfc..097a6b64c24ddfb3c348b0a16d3b6d90242f68a9
--- 1/arch/x86/ia32/ia32entry.S
--- 2/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@@ -112,8 -112,8 +112,8 @@@ ENTRY(ia32_sysenter_target
         CFI_DEF_CFA     rsp,0
         CFI_REGISTER    rsp,rbp
         SWAPGS_UNSAFE_STACK
- -      movq    %gs:pda_kernelstack, %rsp
- -      addq    $(PDA_STACKOFFSET),%rsp 
+ +      movq    PER_CPU_VAR(kernel_stack), %rsp
+ +      addq    $(KERNEL_STACK_OFFSET),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs, here we enable it straight after entry:
@@@ -273,13 -273,13 +273,13 @@@ ENDPROC(ia32_sysenter_target
   ENTRY(ia32_cstar_target)
         CFI_STARTPROC32 simple
         CFI_SIGNAL_FRAME
- -      CFI_DEF_CFA     rsp,PDA_STACKOFFSET
+ +      CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
         CFI_REGISTER    rip,rcx
         /*CFI_REGISTER  rflags,r11*/
         SWAPGS_UNSAFE_STACK
         movl    %esp,%r8d
         CFI_REGISTER    rsp,r8
- -      movq    %gs:pda_kernelstack,%rsp
+ +      movq    PER_CPU_VAR(kernel_stack),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs and here we enable it straight after entry:
@@@ -418,9 -418,9 +418,9 @@@ ENTRY(ia32_syscall
         orl   $TS_COMPAT,TI_status(%r10)
         testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
         jnz ia32_tracesys
- ia32_do_syscall:      
         cmpl $(IA32_NR_syscalls-1),%eax
-       ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
+       ja ia32_badsys
+ ia32_do_call:
         IA32_ARG_FIXUP
         call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
   ia32_sysret:
@@@ -435,7 -435,9 +435,9 @@@ ia32_tracesys
         call syscall_trace_enter
         LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
         RESTORE_REST
-       jmp ia32_do_syscall
+       cmpl $(IA32_NR_syscalls-1),%eax
+       ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
+       jmp ia32_do_call
   END(ia32_syscall)
   
   ia32_badsys:
diff --combined arch/x86/include/asm/paravirt.h

index ff691736f5e9657db383f7e0a2a13d6d2b36b02b,c09a1412758431271633f1af7793d708d59a24f2..1c244b64573feadfc30c5ab49f234a80411d9ab6
--- 1/arch/x86/include/asm/paravirt.h
--- 2/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@@ -12,38 -12,21 +12,38 @@@
   #define CLBR_EAX  (1 << 0)
   #define CLBR_ECX  (1 << 1)
   #define CLBR_EDX  (1 << 2)
+ +#define CLBR_EDI  (1 << 3)
   
- -#ifdef CONFIG_X86_64
- -#define CLBR_RSI  (1 << 3)
- -#define CLBR_RDI  (1 << 4)
+ +#ifdef CONFIG_X86_32
+ +/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+ +#define CLBR_ANY  ((1 << 4) - 1)
+ +
+ +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
+ +#define CLBR_RET_REG  (CLBR_EAX | CLBR_EDX)
+ +#define CLBR_SCRATCH  (0)
+ +#else
+ +#define CLBR_RAX  CLBR_EAX
+ +#define CLBR_RCX  CLBR_ECX
+ +#define CLBR_RDX  CLBR_EDX
+ +#define CLBR_RDI  CLBR_EDI
+ +#define CLBR_RSI  (1 << 4)
   #define CLBR_R8   (1 << 5)
   #define CLBR_R9   (1 << 6)
   #define CLBR_R10  (1 << 7)
   #define CLBR_R11  (1 << 8)
+ +
   #define CLBR_ANY  ((1 << 9) - 1)
+ +
+ +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+ +                       CLBR_RCX | CLBR_R8 | CLBR_R9)
+ +#define CLBR_RET_REG  (CLBR_RAX)
+ +#define CLBR_SCRATCH  (CLBR_R10 | CLBR_R11)
+ +
   #include <asm/desc_defs.h>
- -#else
- -/* CLBR_ANY should match all regs platform has. For i386, that's just it */
- -#define CLBR_ANY  ((1 << 3) - 1)
   #endif /* X86_64 */
   
+ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+ +
   #ifndef __ASSEMBLY__
   #include <linux/types.h>
   #include <linux/cpumask.h>
@@@ -57,14 -40,6 +57,14 @@@ struct tss_struct
   struct mm_struct;
   struct desc_struct;
   
+ +/*
+ + * Wrapper type for pointers to code which uses the non-standard
+ + * calling convention.  See PV_CALL_SAVE_REGS_THUNK below.
+ + */
+ +struct paravirt_callee_save {
+ +      void *func;
+ +};
+ +
   /* general info */
   struct pv_info {
         unsigned int kernel_rpl;
@@@ -214,15 -189,11 +214,15 @@@ struct pv_irq_ops 
          * expected to use X86_EFLAGS_IF; all other bits
          * returned from save_fl are undefined, and may be ignored by
          * restore_fl.
+ +       *
+ +       * NOTE: These functions callers expect the callee to preserve
+ +       * more registers than the standard C calling convention.
          */
- -      unsigned long (*save_fl)(void);
- -      void (*restore_fl)(unsigned long);
- -      void (*irq_disable)(void);
- -      void (*irq_enable)(void);
+ +      struct paravirt_callee_save save_fl;
+ +      struct paravirt_callee_save restore_fl;
+ +      struct paravirt_callee_save irq_disable;
+ +      struct paravirt_callee_save irq_enable;
+ +
         void (*safe_halt)(void);
         void (*halt)(void);
   
@@@ -273,8 -244,7 +273,8 @@@ struct pv_mmu_ops 
         void (*flush_tlb_user)(void);
         void (*flush_tlb_kernel)(void);
         void (*flush_tlb_single)(unsigned long addr);
- -      void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+ +      void (*flush_tlb_others)(const struct cpumask *cpus,
+ +                               struct mm_struct *mm,
                                  unsigned long va);
   
         /* Hooks for allocating and freeing a pagetable top-level */
@@@ -308,11 -278,12 +308,11 @@@
         void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
                                         pte_t *ptep, pte_t pte);
   
- -      pteval_t (*pte_val)(pte_t);
- -      pteval_t (*pte_flags)(pte_t);
- -      pte_t (*make_pte)(pteval_t pte);
+ +      struct paravirt_callee_save pte_val;
+ +      struct paravirt_callee_save make_pte;
   
- -      pgdval_t (*pgd_val)(pgd_t);
- -      pgd_t (*make_pgd)(pgdval_t pgd);
+ +      struct paravirt_callee_save pgd_val;
+ +      struct paravirt_callee_save make_pgd;
   
   #if PAGETABLE_LEVELS >= 3
   #ifdef CONFIG_X86_PAE
@@@ -327,12 -298,12 +327,12 @@@
   
         void (*set_pud)(pud_t *pudp, pud_t pudval);
   
- -      pmdval_t (*pmd_val)(pmd_t);
- -      pmd_t (*make_pmd)(pmdval_t pmd);
+ +      struct paravirt_callee_save pmd_val;
+ +      struct paravirt_callee_save make_pmd;
   
   #if PAGETABLE_LEVELS == 4
- -      pudval_t (*pud_val)(pud_t);
- -      pud_t (*make_pud)(pudval_t pud);
+ +      struct paravirt_callee_save pud_val;
+ +      struct paravirt_callee_save make_pud;
   
         void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
   #endif        /* PAGETABLE_LEVELS == 4 */
@@@ -417,8 -388,6 +417,8 @@@ extern struct pv_lock_ops pv_lock_ops
         asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
   
   unsigned paravirt_patch_nop(void);
+ +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+ +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
   unsigned paravirt_patch_ignore(unsigned len);
   unsigned paravirt_patch_call(void *insnbuf,
                              const void *target, u16 tgt_clobbers,
@@@ -510,45 -479,25 +510,45 @@@ int paravirt_disable_iospace(void)
    * makes sure the incoming and outgoing types are always correct.
    */
   #ifdef CONFIG_X86_32
- -#define PVOP_VCALL_ARGS                       unsigned long __eax, __edx, __ecx
+ +#define PVOP_VCALL_ARGS                               \
+ +      unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
   #define PVOP_CALL_ARGS                        PVOP_VCALL_ARGS
+ +
+ +#define PVOP_CALL_ARG1(x)             "a" ((unsigned long)(x))
+ +#define PVOP_CALL_ARG2(x)             "d" ((unsigned long)(x))
+ +#define PVOP_CALL_ARG3(x)             "c" ((unsigned long)(x))
+ +
   #define PVOP_VCALL_CLOBBERS           "=a" (__eax), "=d" (__edx),     \
                                         "=c" (__ecx)
   #define PVOP_CALL_CLOBBERS            PVOP_VCALL_CLOBBERS
+ +
+ +#define PVOP_VCALLEE_CLOBBERS         "=a" (__eax), "=d" (__edx)
+ +#define PVOP_CALLEE_CLOBBERS          PVOP_VCALLEE_CLOBBERS
+ +
   #define EXTRA_CLOBBERS
   #define VEXTRA_CLOBBERS
- -#else
- -#define PVOP_VCALL_ARGS               unsigned long __edi, __esi, __edx, __ecx
+ +#else  /* CONFIG_X86_64 */
+ +#define PVOP_VCALL_ARGS                                       \
+ +      unsigned long __edi = __edi, __esi = __esi,     \
+ +              __edx = __edx, __ecx = __ecx
   #define PVOP_CALL_ARGS                PVOP_VCALL_ARGS, __eax
+ +
+ +#define PVOP_CALL_ARG1(x)             "D" ((unsigned long)(x))
+ +#define PVOP_CALL_ARG2(x)             "S" ((unsigned long)(x))
+ +#define PVOP_CALL_ARG3(x)             "d" ((unsigned long)(x))
+ +#define PVOP_CALL_ARG4(x)             "c" ((unsigned long)(x))
+ +
   #define PVOP_VCALL_CLOBBERS   "=D" (__edi),                           \
                                 "=S" (__esi), "=d" (__edx),             \
                                 "=c" (__ecx)
- -
   #define PVOP_CALL_CLOBBERS    PVOP_VCALL_CLOBBERS, "=a" (__eax)
   
+ +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+ +#define PVOP_CALLEE_CLOBBERS  PVOP_VCALLEE_CLOBBERS
+ +
   #define EXTRA_CLOBBERS         , "r8", "r9", "r10", "r11"
   #define VEXTRA_CLOBBERS        , "rax", "r8", "r9", "r10", "r11"
- -#endif
+ +#endif        /* CONFIG_X86_32 */
   
   #ifdef CONFIG_PARAVIRT_DEBUG
   #define PVOP_TEST_NULL(op)    BUG_ON(op == NULL)
@@@ -556,11 -505,10 +556,11 @@@
   #define PVOP_TEST_NULL(op)    ((void)op)
   #endif
   
- -#define __PVOP_CALL(rettype, op, pre, post, ...)                      \
+ +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,               \
+ +                    pre, post, ...)                                   \
         ({                                                              \
                 rettype __ret;                                          \
- -              PVOP_CALL_ARGS;                                 \
+ +              PVOP_CALL_ARGS;                                         \
                 PVOP_TEST_NULL(op);                                     \
                 /* This is 32-bit specific, but is okay in 64-bit */    \
                 /* since this condition will never hold */              \
@@@ -568,113 -516,70 +568,113 @@@
                         asm volatile(pre                                \
                                      paravirt_alt(PARAVIRT_CALL)        \
                                      post                               \
- -                                   : PVOP_CALL_CLOBBERS               \
+ +                                   : call_clbr                        \
                                      : paravirt_type(op),               \
- -                                     paravirt_clobber(CLBR_ANY),      \
+ +                                     paravirt_clobber(clbr),          \
                                        ##__VA_ARGS__                    \
- -                                   : "memory", "cc" EXTRA_CLOBBERS);  \
+ +                                   : "memory", "cc" extra_clbr);      \
                         __ret = (rettype)((((u64)__edx) << 32) | __eax); \
                 } else {                                                \
                         asm volatile(pre                                \
                                      paravirt_alt(PARAVIRT_CALL)        \
                                      post                               \
- -                                   : PVOP_CALL_CLOBBERS               \
+ +                                   : call_clbr                        \
                                      : paravirt_type(op),               \
- -                                     paravirt_clobber(CLBR_ANY),      \
+ +                                     paravirt_clobber(clbr),          \
                                        ##__VA_ARGS__                    \
- -                                   : "memory", "cc" EXTRA_CLOBBERS);  \
+ +                                   : "memory", "cc" extra_clbr);      \
                         __ret = (rettype)__eax;                         \
                 }                                                       \
                 __ret;                                                  \
         })
- -#define __PVOP_VCALL(op, pre, post, ...)                              \
+ +
+ +#define __PVOP_CALL(rettype, op, pre, post, ...)                      \
+ +      ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,        \
+ +                    EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+ +
+ +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)                        \
+ +      ____PVOP_CALL(rettype, op.func, CLBR_RET_REG,                   \
+ +                    PVOP_CALLEE_CLOBBERS, ,                           \
+ +                    pre, post, ##__VA_ARGS__)
+ +
+ +
+ +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)       \
         ({                                                              \
                 PVOP_VCALL_ARGS;                                        \
                 PVOP_TEST_NULL(op);                                     \
                 asm volatile(pre                                        \
                              paravirt_alt(PARAVIRT_CALL)                \
                              post                                       \
- -                           : PVOP_VCALL_CLOBBERS                      \
+ +                           : call_clbr                                \
                              : paravirt_type(op),                       \
- -                             paravirt_clobber(CLBR_ANY),              \
+ +                             paravirt_clobber(clbr),                  \
                                ##__VA_ARGS__                            \
- -                           : "memory", "cc" VEXTRA_CLOBBERS);         \
+ +                           : "memory", "cc" extra_clbr);              \
         })
   
+ +#define __PVOP_VCALL(op, pre, post, ...)                              \
+ +      ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,               \
+ +                     VEXTRA_CLOBBERS,                                 \
+ +                     pre, post, ##__VA_ARGS__)
+ +
+ +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...)                       \
+ +      ____PVOP_CALL(rettype, op.func, CLBR_RET_REG,                   \
+ +                    PVOP_VCALLEE_CLOBBERS, ,                          \
+ +                    pre, post, ##__VA_ARGS__)
+ +
+ +
+ +
   #define PVOP_CALL0(rettype, op)                                               \
         __PVOP_CALL(rettype, op, "", "")
   #define PVOP_VCALL0(op)                                                       \
         __PVOP_VCALL(op, "", "")
   
+ +#define PVOP_CALLEE0(rettype, op)                                     \
+ +      __PVOP_CALLEESAVE(rettype, op, "", "")
+ +#define PVOP_VCALLEE0(op)                                             \
+ +      __PVOP_VCALLEESAVE(op, "", "")
+ +
+ +
   #define PVOP_CALL1(rettype, op, arg1)                                 \
- -      __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
+ +      __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
   #define PVOP_VCALL1(op, arg1)                                         \
- -      __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
+ +      __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+ +
+ +#define PVOP_CALLEE1(rettype, op, arg1)                                       \
+ +      __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ +#define PVOP_VCALLEE1(op, arg1)                                               \
+ +      __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+ +
   
   #define PVOP_CALL2(rettype, op, arg1, arg2)                           \
- -      __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
- -      "1" ((unsigned long)(arg2)))
+ +      __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),          \
+ +                  PVOP_CALL_ARG2(arg2))
   #define PVOP_VCALL2(op, arg1, arg2)                                   \
- -      __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
- -      "1" ((unsigned long)(arg2)))
+ +      __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),                  \
+ +                   PVOP_CALL_ARG2(arg2))
+ +
+ +#define PVOP_CALLEE2(rettype, op, arg1, arg2)                         \
+ +      __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),    \
+ +                        PVOP_CALL_ARG2(arg2))
+ +#define PVOP_VCALLEE2(op, arg1, arg2)                                 \
+ +      __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),            \
+ +                         PVOP_CALL_ARG2(arg2))
+ +
   
   #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)                     \
- -      __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
- -      "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ +      __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),          \
+ +                  PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
   #define PVOP_VCALL3(op, arg1, arg2, arg3)                             \
- -      __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
- -      "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ +      __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),                  \
+ +                   PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
   
   /* This is the only difference in x86_64. We can make it much simpler */
   #ifdef CONFIG_X86_32
   #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                       \
         __PVOP_CALL(rettype, op,                                        \
                     "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
- -                  "0" ((u32)(arg1)), "1" ((u32)(arg2)),               \
- -                  "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+ +                  PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),         \
+ +                  PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
   #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                               \
         __PVOP_VCALL(op,                                                \
                     "push %[_arg4];", "lea 4(%%esp),%%esp;",            \
@@@ -682,13 -587,13 +682,13 @@@
                     "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
   #else
   #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)                       \
- -      __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),   \
- -      "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),         \
- -      "3"((unsigned long)(arg4)))
+ +      __PVOP_CALL(rettype, op, "", "",                                \
+ +                  PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),         \
+ +                  PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
   #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)                               \
- -      __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),           \
- -      "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),         \
- -      "3"((unsigned long)(arg4)))
+ +      __PVOP_VCALL(op, "", "",                                        \
+ +                   PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),        \
+ +                   PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
   #endif
   
   static inline int paravirt_enabled(void)
@@@ -1079,11 -984,10 +1079,11 @@@ static inline void __flush_tlb_single(u
         PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
   }
   
- -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+ +static inline void flush_tlb_others(const struct cpumask *cpumask,
+ +                                  struct mm_struct *mm,
                                     unsigned long va)
   {
- -      PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+ +      PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
   }
   
   static inline int paravirt_pgd_alloc(struct mm_struct *mm)
@@@ -1155,13 -1059,13 +1155,13 @@@ static inline pte_t __pte(pteval_t val
         pteval_t ret;
   
         if (sizeof(pteval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pteval_t,
- -                               pv_mmu_ops.make_pte,
- -                               val, (u64)val >> 32);
+ +              ret = PVOP_CALLEE2(pteval_t,
+ +                                 pv_mmu_ops.make_pte,
+ +                                 val, (u64)val >> 32);
         else
- -              ret = PVOP_CALL1(pteval_t,
- -                               pv_mmu_ops.make_pte,
- -                               val);
+ +              ret = PVOP_CALLEE1(pteval_t,
+ +                                 pv_mmu_ops.make_pte,
+ +                                 val);
   
         return (pte_t) { .pte = ret };
   }
@@@ -1171,12 -1075,29 +1171,12 @@@ static inline pteval_t pte_val(pte_t pt
         pteval_t ret;
   
         if (sizeof(pteval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
- -                               pte.pte, (u64)pte.pte >> 32);
- -      else
- -              ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
- -                               pte.pte);
- -
- -      return ret;
- -}
- -
- -static inline pteval_t pte_flags(pte_t pte)
- -{
- -      pteval_t ret;
- -
- -      if (sizeof(pteval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
- -                               pte.pte, (u64)pte.pte >> 32);
+ +              ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
+ +                                 pte.pte, (u64)pte.pte >> 32);
         else
- -              ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
- -                               pte.pte);
+ +              ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
+ +                                 pte.pte);
   
- -#ifdef CONFIG_PARAVIRT_DEBUG
- -      BUG_ON(ret & PTE_PFN_MASK);
- -#endif
         return ret;
   }
   
@@@ -1185,11 -1106,11 +1185,11 @@@ static inline pgd_t __pgd(pgdval_t val
         pgdval_t ret;
   
         if (sizeof(pgdval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
- -                               val, (u64)val >> 32);
+ +              ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
+ +                                 val, (u64)val >> 32);
         else
- -              ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
- -                               val);
+ +              ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
+ +                                 val);
   
         return (pgd_t) { ret };
   }
@@@ -1199,11 -1120,11 +1199,11 @@@ static inline pgdval_t pgd_val(pgd_t pg
         pgdval_t ret;
   
         if (sizeof(pgdval_t) > sizeof(long))
- -              ret =  PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
- -                                pgd.pgd, (u64)pgd.pgd >> 32);
+ +              ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
+ +                                  pgd.pgd, (u64)pgd.pgd >> 32);
         else
- -              ret =  PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
- -                                pgd.pgd);
+ +              ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
+ +                                  pgd.pgd);
   
         return ret;
   }
@@@ -1267,11 -1188,11 +1267,11 @@@ static inline pmd_t __pmd(pmdval_t val
         pmdval_t ret;
   
         if (sizeof(pmdval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
- -                               val, (u64)val >> 32);
+ +              ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
+ +                                 val, (u64)val >> 32);
         else
- -              ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
- -                               val);
+ +              ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
+ +                                 val);
   
         return (pmd_t) { ret };
   }
@@@ -1281,11 -1202,11 +1281,11 @@@ static inline pmdval_t pmd_val(pmd_t pm
         pmdval_t ret;
   
         if (sizeof(pmdval_t) > sizeof(long))
- -              ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
- -                                pmd.pmd, (u64)pmd.pmd >> 32);
+ +              ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
+ +                                  pmd.pmd, (u64)pmd.pmd >> 32);
         else
- -              ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
- -                                pmd.pmd);
+ +              ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
+ +                                  pmd.pmd);
   
         return ret;
   }
@@@ -1307,11 -1228,11 +1307,11 @@@ static inline pud_t __pud(pudval_t val
         pudval_t ret;
   
         if (sizeof(pudval_t) > sizeof(long))
- -              ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
- -                               val, (u64)val >> 32);
+ +              ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
+ +                                 val, (u64)val >> 32);
         else
- -              ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
- -                               val);
+ +              ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
+ +                                 val);
   
         return (pud_t) { ret };
   }
@@@ -1321,11 -1242,11 +1321,11 @@@ static inline pudval_t pud_val(pud_t pu
         pudval_t ret;
   
         if (sizeof(pudval_t) > sizeof(long))
- -              ret =  PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
- -                                pud.pud, (u64)pud.pud >> 32);
+ +              ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
+ +                                  pud.pud, (u64)pud.pud >> 32);
         else
- -              ret =  PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
- -                                pud.pud);
+ +              ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
+ +                                  pud.pud);
   
         return ret;
   }
@@@ -1466,10 -1387,9 +1466,10 @@@ static inline void __set_fixmap(unsigne
   }
   
   void _paravirt_nop(void);
- -#define paravirt_nop  ((void *)_paravirt_nop)
+ +u32 _paravirt_ident_32(u32);
+ +u64 _paravirt_ident_64(u64);
   
- -void paravirt_use_bytelocks(void);
+ +#define paravirt_nop  ((void *)_paravirt_nop)
   
   #ifdef CONFIG_SMP
   
@@@ -1482,6 -1402,7 +1482,7 @@@ static inline int __raw_spin_is_contend
   {
         return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
   }
+ #define __raw_spin_is_contended       __raw_spin_is_contended
   
   static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
   {
@@@ -1518,37 -1439,12 +1519,37 @@@ extern struct paravirt_patch_site __par
         __parainstructions_end[];
   
   #ifdef CONFIG_X86_32
- -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
- -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+ +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
+ +#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+ +
+ +/* save and restore all caller-save registers, except return value */
+ +#define PV_SAVE_ALL_CALLER_REGS               "pushl %ecx;"
+ +#define PV_RESTORE_ALL_CALLER_REGS    "popl  %ecx;"
+ +
   #define PV_FLAGS_ARG "0"
   #define PV_EXTRA_CLOBBERS
   #define PV_VEXTRA_CLOBBERS
   #else
+ +/* save and restore all caller-save registers, except return value */
+ +#define PV_SAVE_ALL_CALLER_REGS                                               \
+ +      "push %rcx;"                                                    \
+ +      "push %rdx;"                                                    \
+ +      "push %rsi;"                                                    \
+ +      "push %rdi;"                                                    \
+ +      "push %r8;"                                                     \
+ +      "push %r9;"                                                     \
+ +      "push %r10;"                                                    \
+ +      "push %r11;"
+ +#define PV_RESTORE_ALL_CALLER_REGS                                    \
+ +      "pop %r11;"                                                     \
+ +      "pop %r10;"                                                     \
+ +      "pop %r9;"                                                      \
+ +      "pop %r8;"                                                      \
+ +      "pop %rdi;"                                                     \
+ +      "pop %rsi;"                                                     \
+ +      "pop %rdx;"                                                     \
+ +      "pop %rcx;"
+ +
   /* We save some registers, but all of them, that's too much. We clobber all
    * caller saved registers but the argument parameter */
   #define PV_SAVE_REGS "pushq %%rdi;"
@@@ -1558,76 -1454,52 +1559,76 @@@
   #define PV_FLAGS_ARG "D"
   #endif
   
+ +/*
+ + * Generate a thunk around a function which saves all caller-save
+ + * registers except for the return value.  This allows C functions to
+ + * be called from assembler code where fewer than normal registers are
+ + * available.  It may also help code generation around calls from C
+ + * code if the common case doesn't use many registers.
+ + *
+ + * When a callee is wrapped in a thunk, the caller can assume that all
+ + * arg regs and all scratch registers are preserved across the
+ + * call. The return value in rax/eax will not be saved, even for void
+ + * functions.
+ + */
+ +#define PV_CALLEE_SAVE_REGS_THUNK(func)                                       \
+ +      extern typeof(func) __raw_callee_save_##func;                   \
+ +      static void *__##func##__ __used = func;                        \
+ +                                                                      \
+ +      asm(".pushsection .text;"                                       \
+ +          "__raw_callee_save_" #func ": "                             \
+ +          PV_SAVE_ALL_CALLER_REGS                                     \
+ +          "call " #func ";"                                           \
+ +          PV_RESTORE_ALL_CALLER_REGS                                  \
+ +          "ret;"                                                      \
+ +          ".popsection")
+ +
+ +/* Get a reference to a callee-save function */
+ +#define PV_CALLEE_SAVE(func)                                          \
+ +      ((struct paravirt_callee_save) { __raw_callee_save_##func })
+ +
+ +/* Promise that "func" already uses the right calling convention */
+ +#define __PV_IS_CALLEE_SAVE(func)                     \
+ +      ((struct paravirt_callee_save) { func })
+ +
   static inline unsigned long __raw_local_save_flags(void)
   {
         unsigned long f;
   
- -      asm volatile(paravirt_alt(PV_SAVE_REGS
- -                                PARAVIRT_CALL
- -                                PV_RESTORE_REGS)
+ +      asm volatile(paravirt_alt(PARAVIRT_CALL)
                      : "=a"(f)
                      : paravirt_type(pv_irq_ops.save_fl),
                        paravirt_clobber(CLBR_EAX)
- -                   : "memory", "cc" PV_VEXTRA_CLOBBERS);
+ +                   : "memory", "cc");
         return f;
   }
   
   static inline void raw_local_irq_restore(unsigned long f)
   {
- -      asm volatile(paravirt_alt(PV_SAVE_REGS
- -                                PARAVIRT_CALL
- -                                PV_RESTORE_REGS)
+ +      asm volatile(paravirt_alt(PARAVIRT_CALL)
                      : "=a"(f)
                      : PV_FLAGS_ARG(f),
                        paravirt_type(pv_irq_ops.restore_fl),
                        paravirt_clobber(CLBR_EAX)
- -                   : "memory", "cc" PV_EXTRA_CLOBBERS);
+ +                   : "memory", "cc");
   }
   
   static inline void raw_local_irq_disable(void)
   {
- -      asm volatile(paravirt_alt(PV_SAVE_REGS
- -                                PARAVIRT_CALL
- -                                PV_RESTORE_REGS)
+ +      asm volatile(paravirt_alt(PARAVIRT_CALL)
                      :
                      : paravirt_type(pv_irq_ops.irq_disable),
                        paravirt_clobber(CLBR_EAX)
- -                   : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ +                   : "memory", "eax", "cc");
   }
   
   static inline void raw_local_irq_enable(void)
   {
- -      asm volatile(paravirt_alt(PV_SAVE_REGS
- -                                PARAVIRT_CALL
- -                                PV_RESTORE_REGS)
+ +      asm volatile(paravirt_alt(PARAVIRT_CALL)
                      :
                      : paravirt_type(pv_irq_ops.irq_enable),
                        paravirt_clobber(CLBR_EAX)
- -                   : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ +                   : "memory", "eax", "cc");
   }
   
   static inline unsigned long __raw_local_irq_save(void)
@@@ -1670,49 -1542,33 +1671,49 @@@
         .popsection
   
   
+ +#define COND_PUSH(set, mask, reg)                     \
+ +      .if ((~(set)) & mask); push %reg; .endif
+ +#define COND_POP(set, mask, reg)                      \
+ +      .if ((~(set)) & mask); pop %reg; .endif
+ +
   #ifdef CONFIG_X86_64
- -#define PV_SAVE_REGS                          \
- -      push %rax;                              \
- -      push %rcx;                              \
- -      push %rdx;                              \
- -      push %rsi;                              \
- -      push %rdi;                              \
- -      push %r8;                               \
- -      push %r9;                               \
- -      push %r10;                              \
- -      push %r11
- -#define PV_RESTORE_REGS                               \
- -      pop %r11;                               \
- -      pop %r10;                               \
- -      pop %r9;                                \
- -      pop %r8;                                \
- -      pop %rdi;                               \
- -      pop %rsi;                               \
- -      pop %rdx;                               \
- -      pop %rcx;                               \
- -      pop %rax
+ +
+ +#define PV_SAVE_REGS(set)                     \
+ +      COND_PUSH(set, CLBR_RAX, rax);          \
+ +      COND_PUSH(set, CLBR_RCX, rcx);          \
+ +      COND_PUSH(set, CLBR_RDX, rdx);          \
+ +      COND_PUSH(set, CLBR_RSI, rsi);          \
+ +      COND_PUSH(set, CLBR_RDI, rdi);          \
+ +      COND_PUSH(set, CLBR_R8, r8);            \
+ +      COND_PUSH(set, CLBR_R9, r9);            \
+ +      COND_PUSH(set, CLBR_R10, r10);          \
+ +      COND_PUSH(set, CLBR_R11, r11)
+ +#define PV_RESTORE_REGS(set)                  \
+ +      COND_POP(set, CLBR_R11, r11);           \
+ +      COND_POP(set, CLBR_R10, r10);           \
+ +      COND_POP(set, CLBR_R9, r9);             \
+ +      COND_POP(set, CLBR_R8, r8);             \
+ +      COND_POP(set, CLBR_RDI, rdi);           \
+ +      COND_POP(set, CLBR_RSI, rsi);           \
+ +      COND_POP(set, CLBR_RDX, rdx);           \
+ +      COND_POP(set, CLBR_RCX, rcx);           \
+ +      COND_POP(set, CLBR_RAX, rax)
+ +
   #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
   #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
   #define PARA_INDIRECT(addr)   *addr(%rip)
   #else
- -#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
- -#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+ +#define PV_SAVE_REGS(set)                     \
+ +      COND_PUSH(set, CLBR_EAX, eax);          \
+ +      COND_PUSH(set, CLBR_EDI, edi);          \
+ +      COND_PUSH(set, CLBR_ECX, ecx);          \
+ +      COND_PUSH(set, CLBR_EDX, edx)
+ +#define PV_RESTORE_REGS(set)                  \
+ +      COND_POP(set, CLBR_EDX, edx);           \
+ +      COND_POP(set, CLBR_ECX, ecx);           \
+ +      COND_POP(set, CLBR_EDI, edi);           \
+ +      COND_POP(set, CLBR_EAX, eax)
+ +
   #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
   #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
   #define PARA_INDIRECT(addr)   *%cs:addr
@@@ -1724,15 -1580,15 +1725,15 @@@
   
   #define DISABLE_INTERRUPTS(clobbers)                                  \
         PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
- -                PV_SAVE_REGS;                                         \
+ +                PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
                   call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
- -                PV_RESTORE_REGS;)                     \
+ +                PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
   
   #define ENABLE_INTERRUPTS(clobbers)                                   \
         PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
- -                PV_SAVE_REGS;                                         \
+ +                PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
                   call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
- -                PV_RESTORE_REGS;)
+ +                PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
   
   #define USERGS_SYSRET32                                                       \
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),       \
@@@ -1762,15 -1618,11 +1763,15 @@@
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
                   swapgs)
   
+ +/*
+ + * Note: swapgs is very special, and in practise is either going to be
+ + * implemented with a single "swapgs" instruction or something very
+ + * special.  Either way, we don't need to save any registers for
+ + * it.
+ + */
   #define SWAPGS                                                                \
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
- -                PV_SAVE_REGS;                                         \
- -                call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
- -                PV_RESTORE_REGS                                       \
+ +                call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)          \
                  )
   
   #define GET_CR2_INTO_RCX                              \
diff --combined arch/x86/include/asm/processor.h

index d211f955199826b3c9720c0ea28bb080faea0f72,3bfd5235a9eb46dbf5e1386cf8ba34b6e8765066..a6643f68fbb142185b506bf3425066fe32ea7193
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -73,7 -73,7 +73,7 @@@ struct cpuinfo_x86 
         char                    pad0;
   #else
         /* Number of 4K pages in DTLB/ITLB combined(in pages): */
- -      int                      x86_tlbsize;
+ +      int                     x86_tlbsize;
         __u8                    x86_virt_bits;
         __u8                    x86_phys_bits;
   #endif
@@@ -353,7 -353,7 +353,7 @@@ struct i387_soft_struct 
         u8                      no_update;
         u8                      rm;
         u8                      alimit;
-       struct info             *info;
+       struct math_emu_info    *info;
         u32                     entry_eip;
   };
   
@@@ -378,22 -378,6 +378,22 @@@ union thread_xstate 
   
   #ifdef CONFIG_X86_64
   DECLARE_PER_CPU(struct orig_ist, orig_ist);
+ +
+ +union irq_stack_union {
+ +      char irq_stack[IRQ_STACK_SIZE];
+ +      /*
+ +       * GCC hardcodes the stack canary as %gs:40.  Since the
+ +       * irq_stack is the object at %gs:0, we reserve the bottom
+ +       * 48 bytes of the irq stack for the canary.
+ +       */
+ +      struct {
+ +              char gs_base[40];
+ +              unsigned long stack_canary;
+ +      };
+ +};
+ +
+ +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+ +DECLARE_PER_CPU(char *, irq_stack_ptr);
   #endif
   
   extern void print_cpu_info(struct cpuinfo_x86 *);
@@@ -768,9 -752,9 +768,9 @@@ extern int sysenter_setup(void)
   extern struct desc_ptr                early_gdt_descr;
   
   extern void cpu_set_gdt(int);
- -extern void switch_to_new_gdt(void);
+ +extern void switch_to_new_gdt(int);
+ +extern void load_percpu_segment(int);
   extern void cpu_init(void);
- -extern void init_gdt(int cpu);
   
   static inline unsigned long get_debugctlmsr(void)
   {
diff --combined arch/x86/include/asm/spinlock.h

index 139b4249a5ec4fcb09294bb58e4eaa7f0848be1a,8247e94ac6b18dc337f9404cffc97800660c64d8..3a569665668020755ac910307ee21114f7f7a7ca
--- 1/arch/x86/include/asm/spinlock.h
--- 2/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@@ -172,8 -172,70 +172,8 @@@ static inline int __ticket_spin_is_cont
         return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
   }
   
- -#ifdef CONFIG_PARAVIRT
- -/*
- - * Define virtualization-friendly old-style lock byte lock, for use in
- - * pv_lock_ops if desired.
- - *
- - * This differs from the pre-2.6.24 spinlock by always using xchgb
- - * rather than decb to take the lock; this allows it to use a
- - * zero-initialized lock structure.  It also maintains a 1-byte
- - * contention counter, so that we can implement
- - * __byte_spin_is_contended.
- - */
- -struct __byte_spinlock {
- -      s8 lock;
- -      s8 spinners;
- -};
- -
- -static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
- -{
- -      struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- -      return bl->lock != 0;
- -}
- -
- -static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
- -{
- -      struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- -      return bl->spinners != 0;
- -}
- -
- -static inline void __byte_spin_lock(raw_spinlock_t *lock)
- -{
- -      struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- -      s8 val = 1;
- -
- -      asm("1: xchgb %1, %0\n"
- -          "   test %1,%1\n"
- -          "   jz 3f\n"
- -          "   " LOCK_PREFIX "incb %2\n"
- -          "2: rep;nop\n"
- -          "   cmpb $1, %0\n"
- -          "   je 2b\n"
- -          "   " LOCK_PREFIX "decb %2\n"
- -          "   jmp 1b\n"
- -          "3:"
- -          : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
- -}
- -
- -static inline int __byte_spin_trylock(raw_spinlock_t *lock)
- -{
- -      struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- -      u8 old = 1;
- -
- -      asm("xchgb %1,%0"
- -          : "+m" (bl->lock), "+q" (old) : : "memory");
+ +#ifndef CONFIG_PARAVIRT
   
- -      return old == 0;
- -}
- -
- -static inline void __byte_spin_unlock(raw_spinlock_t *lock)
- -{
- -      struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- -      smp_wmb();
- -      bl->lock = 0;
- -}
- -#else  /* !CONFIG_PARAVIRT */
   static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
   {
         return __ticket_spin_is_locked(lock);
@@@ -183,6 -245,7 +183,7 @@@ static inline int __raw_spin_is_contend
   {
         return __ticket_spin_is_contended(lock);
   }
+ #define __raw_spin_is_contended       __raw_spin_is_contended
   
   static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
   {
@@@ -205,7 -268,7 +206,7 @@@ static __always_inline void __raw_spin_
         __raw_spin_lock(lock);
   }
   
- -#endif        /* CONFIG_PARAVIRT */
+ +#endif
   
   static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
   {
@@@ -267,7 -330,8 +268,7 @@@ static inline int __raw_read_trylock(ra
   {
         atomic_t *count = (atomic_t *)lock;
   
- -      atomic_dec(count);
- -      if (atomic_read(count) >= 0)
+ +      if (atomic_dec_return(count) >= 0)
                 return 1;
         atomic_inc(count);
         return 0;
diff --combined arch/x86/kernel/acpi/sleep.c

index 4abff454c55b98ba3f3d6c28fbacb52a012e6095,a60c1f3bcb87826ebdfffc15f2ca78488308ba2f..7c243a2c5115dfa6653d7a4fec05d6521eb5ef4f
--- 1/arch/x86/kernel/acpi/sleep.c
--- 2/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@@ -101,7 -101,6 +101,7 @@@ int acpi_save_state_mem(void
         stack_start.sp = temp_stack + sizeof(temp_stack);
         early_gdt_descr.address =
                         (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ +      initial_gs = per_cpu_offset(smp_processor_id());
   #endif
         initial_code = (unsigned long)wakeup_long64;
         saved_magic = 0x123456789abcdef0;
@@@ -157,11 -156,11 +157,11 @@@ static int __init acpi_sleep_setup(cha
   #ifdef CONFIG_HIBERNATION
                 if (strncmp(str, "s4_nohwsig", 10) == 0)
                         acpi_no_s4_hw_signature();
+               if (strncmp(str, "s4_nonvs", 8) == 0)
+                       acpi_s4_no_nvs();
   #endif
                 if (strncmp(str, "old_ordering", 12) == 0)
                         acpi_old_suspend_ordering();
-               if (strncmp(str, "s4_nonvs", 8) == 0)
-                       acpi_s4_no_nvs();
                 str = strchr(str, ',');
                 if (str != NULL)
                         str += strspn(str, ", \t");
diff --combined arch/x86/kernel/cpu/intel.c

index 1cef0aa5e5dcb1dbd989218c802af8917f0e5678,24ff26a38adecff5ef27d4e76fb74caebd1fbb88..1f137a87d4bd19a86d1eaa4d47fc840ff9f3dfb3
--- 1/arch/x86/kernel/cpu/intel.c
--- 2/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@@ -24,7 -24,7 +24,7 @@@
   #ifdef CONFIG_X86_LOCAL_APIC
   #include <asm/mpspec.h>
   #include <asm/apic.h>
- -#include <mach_apic.h>
+ +#include <asm/genapic.h>
   #endif
   
   static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@@ -63,18 -63,6 +63,18 @@@
                 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
         }
   
+ +      /*
+ +       * There is a known erratum on Pentium III and Core Solo
+ +       * and Core Duo CPUs.
+ +       * " Page with PAT set to WC while associated MTRR is UC
+ +       *   may consolidate to UC "
+ +       * Because of this erratum, it is better to stick with
+ +       * setting WC in MTRR rather than using PAT on these CPUs.
+ +       *
+ +       * Enable PAT WC only on P4, Core 2 or later CPUs.
+ +       */
+ +      if (c->x86 == 6 && c->x86_model < 15)
+ +              clear_cpu_cap(c, X86_FEATURE_PAT);
   }
   
   #ifdef CONFIG_X86_32
@@@ -303,6 -291,9 +303,9 @@@ static void __cpuinit init_intel(struc
                 ds_init_intel(c);
         }
   
+       if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+               set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
+ 
   #ifdef CONFIG_X86_64
         if (c->x86 == 15)
                 c->x86_cache_alignment = c->x86_clflush_size * 2;
diff --combined arch/x86/kernel/process.c

index 89537f678b2da46df732f0f57b525dc41fdd5e65,6d12f7e37f8c1da5a465b110ff75ecae8c01f0c8..87b69d4fac164d9614ccc6cf750cf4c890e43b4a
--- 1/arch/x86/kernel/process.c
--- 2/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@@ -180,6 -180,9 +180,9 @@@ void mwait_idle_with_hints(unsigned lon
   
         trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
         if (!need_resched()) {
+               if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+                       clflush((void *)&current_thread_info()->flags);
+ 
                 __monitor((void *)&current_thread_info()->flags, 0, 0);
                 smp_mb();
                 if (!need_resched())
@@@ -194,6 -197,9 +197,9 @@@ static void mwait_idle(void
         struct power_trace it;
         if (!need_resched()) {
                 trace_power_start(&it, POWER_CSTATE, 1);
+               if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+                       clflush((void *)&current_thread_info()->flags);
+ 
                 __monitor((void *)&current_thread_info()->flags, 0, 0);
                 smp_mb();
                 if (!need_resched())
@@@ -344,7 -350,7 +350,7 @@@ static void c1e_idle(void
   
   void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
   {
- -#ifdef CONFIG_X86_SMP
+ +#ifdef CONFIG_SMP
         if (pm_idle == poll_idle && smp_num_siblings > 1) {
                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
                         " performance may degrade.\n");
diff --combined arch/x86/kernel/traps.c

index 214bc327a0c310d0ca3285f7c84c75eb4ad05a07,7932338d7cb3112c7222121e513db0ca71190fa0..0d032d2d8a184e8b8d6921306f2b2041e2f4b23c
--- 1/arch/x86/kernel/traps.c
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -54,11 -54,12 +54,11 @@@
   #include <asm/desc.h>
   #include <asm/i387.h>
   
- -#include <mach_traps.h>
+ +#include <asm/mach_traps.h>
   
   #ifdef CONFIG_X86_64
   #include <asm/pgalloc.h>
   #include <asm/proto.h>
- -#include <asm/pda.h>
   #else
   #include <asm/processor-flags.h>
   #include <asm/arch_hooks.h>
@@@ -895,7 -896,7 +895,7 @@@ asmlinkage void math_state_restore(void
   EXPORT_SYMBOL_GPL(math_state_restore);
   
   #ifndef CONFIG_MATH_EMULATION
- asmlinkage void math_emulate(long arg)
+ void math_emulate(struct math_emu_info *info)
   {
         printk(KERN_EMERG
                 "math-emulation not enabled and no coprocessor found.\n");
@@@ -905,16 -906,19 +905,19 @@@
   }
   #endif /* CONFIG_MATH_EMULATION */
   
- dotraplinkage void __kprobes
- do_device_not_available(struct pt_regs *regs, long error)
+ dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
   {
   #ifdef CONFIG_X86_32
         if (read_cr0() & X86_CR0_EM) {
-               conditional_sti(regs);
-               math_emulate(0);
+               struct math_emu_info info = { };
+ 
+               conditional_sti(&regs);
+ 
+               info.regs = &regs;
+               math_emulate(&info);
         } else {
                 math_state_restore(); /* interrupts still off */
-               conditional_sti(regs);
+               conditional_sti(&regs);
         }
   #else
         math_state_restore();
diff --combined arch/x86/kernel/vmi_32.c

index eb9e7347928eec5cd494b201b2c5df73c9dda96c,bef58b4982dbc449696b624e24318cdf5b773926..f052c84ecbe4a53993998cd9a1c6836b2b32c69d
--- 1/arch/x86/kernel/vmi_32.c
--- 2/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@@ -320,6 -320,16 +320,16 @@@ static void vmi_release_pmd(unsigned lo
         vmi_ops.release_page(pfn, VMI_PAGE_L2);
   }
   
+ /*
+  * We use the pgd_free hook for releasing the pgd page:
+  */
+ static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+ {
+       unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
+ 
+       vmi_ops.release_page(pfn, VMI_PAGE_L2);
+ }
+ 
   /*
    * Helper macros for MMU update flags.  We can defer updates until a flush
    * or page invalidation only if the update is to the current address space
@@@ -670,11 -680,10 +680,11 @@@ static inline int __init activate_vmi(v
         para_fill(pv_mmu_ops.write_cr2, SetCR2);
         para_fill(pv_mmu_ops.write_cr3, SetCR3);
         para_fill(pv_cpu_ops.write_cr4, SetCR4);
- -      para_fill(pv_irq_ops.save_fl, GetInterruptMask);
- -      para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
- -      para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
- -      para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
+ +
+ +      para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
+ +      para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
+ +      para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
+ +      para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
   
         para_fill(pv_cpu_ops.wbinvd, WBINVD);
         para_fill(pv_cpu_ops.read_tsc, RDTSC);
@@@ -763,6 -772,7 +773,7 @@@
         if (vmi_ops.release_page) {
                 pv_mmu_ops.release_pte = vmi_release_pte;
                 pv_mmu_ops.release_pmd = vmi_release_pmd;
+               pv_mmu_ops.pgd_free = vmi_pgd_free;
         }
   
         /* Set linear is needed in all cases */
diff --combined arch/x86/mm/fault.c

index d3eee74f830ad71bbc2fdad868e9f3549a581d96,c76ef1d701c9f48625aed06d4e7b4ec3d98e8862..2a9ea3aee4935b801f1405550592988eefc59236
--- 1/arch/x86/mm/fault.c
--- 2/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -26,7 -26,6 +26,7 @@@
   #include <linux/kprobes.h>
   #include <linux/uaccess.h>
   #include <linux/kdebug.h>
+ +#include <linux/magic.h>
   
   #include <asm/system.h>
   #include <asm/desc.h>
@@@ -92,8 -91,8 +92,8 @@@ static inline int notify_page_fault(str
    *
    * Opcode checker based on code by Richard Brunner
    */
- -static int is_prefetch(struct pt_regs *regs, unsigned long addr,
- -                     unsigned long error_code)
+ +static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
+ +                      unsigned long addr)
   {
         unsigned char *instr;
         int scan_more = 1;
@@@ -410,16 -409,17 +410,16 @@@ static void show_fault_oops(struct pt_r
   }
   
   #ifdef CONFIG_X86_64
- -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- -                               unsigned long error_code)
+ +static noinline void pgtable_bad(struct pt_regs *regs,
+ +                       unsigned long error_code, unsigned long address)
   {
         unsigned long flags = oops_begin();
         int sig = SIGKILL;
- -      struct task_struct *tsk;
+ +      struct task_struct *tsk = current;
   
         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- -             current->comm, address);
+ +             tsk->comm, address);
         dump_pagetable(address);
- -      tsk = current;
         tsk->thread.cr2 = address;
         tsk->thread.trap_no = 14;
         tsk->thread.error_code = error_code;
@@@ -429,196 -429,6 +429,196 @@@
   }
   #endif
   
+ +static noinline void no_context(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      struct task_struct *tsk = current;
+ +      unsigned long *stackend;
+ +
+ +#ifdef CONFIG_X86_64
+ +      unsigned long flags;
+ +      int sig;
+ +#endif
+ +
+ +      /* Are we prepared to handle this kernel fault?  */
+ +      if (fixup_exception(regs))
+ +              return;
+ +
+ +      /*
+ +       * X86_32
+ +       * Valid to do another page fault here, because if this fault
+ +       * had been triggered by is_prefetch fixup_exception would have
+ +       * handled it.
+ +       *
+ +       * X86_64
+ +       * Hall of shame of CPU/BIOS bugs.
+ +       */
+ +      if (is_prefetch(regs, error_code, address))
+ +              return;
+ +
+ +      if (is_errata93(regs, address))
+ +              return;
+ +
+ +      /*
+ +       * Oops. The kernel tried to access some bad page. We'll have to
+ +       * terminate things with extreme prejudice.
+ +       */
+ +#ifdef CONFIG_X86_32
+ +      bust_spinlocks(1);
+ +#else
+ +      flags = oops_begin();
+ +#endif
+ +
+ +      show_fault_oops(regs, error_code, address);
+ +
+ +      stackend = end_of_stack(tsk);
+ +      if (*stackend != STACK_END_MAGIC)
+ +              printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+ +
+ +      tsk->thread.cr2 = address;
+ +      tsk->thread.trap_no = 14;
+ +      tsk->thread.error_code = error_code;
+ +
+ +#ifdef CONFIG_X86_32
+ +      die("Oops", regs, error_code);
+ +      bust_spinlocks(0);
+ +      do_exit(SIGKILL);
+ +#else
+ +      sig = SIGKILL;
+ +      if (__die("Oops", regs, error_code))
+ +              sig = 0;
+ +      /* Executive summary in case the body of the oops scrolled away */
+ +      printk(KERN_EMERG "CR2: %016lx\n", address);
+ +      oops_end(flags, regs, sig);
+ +#endif
+ +}
+ +
+ +static void __bad_area_nosemaphore(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address,
+ +                      int si_code)
+ +{
+ +      struct task_struct *tsk = current;
+ +
+ +      /* User mode accesses just cause a SIGSEGV */
+ +      if (error_code & PF_USER) {
+ +              /*
+ +               * It's possible to have interrupts off here.
+ +               */
+ +              local_irq_enable();
+ +
+ +              /*
+ +               * Valid to do another page fault here because this one came
+ +               * from user space.
+ +               */
+ +              if (is_prefetch(regs, error_code, address))
+ +                      return;
+ +
+ +              if (is_errata100(regs, address))
+ +                      return;
+ +
+ +              if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ +                  printk_ratelimit()) {
+ +                      printk(
+ +                      "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ +                      task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ +                      tsk->comm, task_pid_nr(tsk), address,
+ +                      (void *) regs->ip, (void *) regs->sp, error_code);
+ +                      print_vma_addr(" in ", regs->ip);
+ +                      printk("\n");
+ +              }
+ +
+ +              tsk->thread.cr2 = address;
+ +              /* Kernel addresses are always protection faults */
+ +              tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ +              tsk->thread.trap_no = 14;
+ +              force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ +              return;
+ +      }
+ +
+ +      if (is_f00f_bug(regs, address))
+ +              return;
+ +
+ +      no_context(regs, error_code, address);
+ +}
+ +
+ +static noinline void bad_area_nosemaphore(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+ +}
+ +
+ +static void __bad_area(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address,
+ +                      int si_code)
+ +{
+ +      struct mm_struct *mm = current->mm;
+ +
+ +      /*
+ +       * Something tried to access memory that isn't in our memory map..
+ +       * Fix it, but check if it's kernel or user first..
+ +       */
+ +      up_read(&mm->mmap_sem);
+ +
+ +      __bad_area_nosemaphore(regs, error_code, address, si_code);
+ +}
+ +
+ +static noinline void bad_area(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      __bad_area(regs, error_code, address, SEGV_MAPERR);
+ +}
+ +
+ +static noinline void bad_area_access_error(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      __bad_area(regs, error_code, address, SEGV_ACCERR);
+ +}
+ +
+ +/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
+ +static void out_of_memory(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      /*
+ +       * We ran out of memory, call the OOM killer, and return the userspace
+ +       * (which will retry the fault, or kill us if we got oom-killed).
+ +       */
+ +      up_read(&current->mm->mmap_sem);
+ +      pagefault_out_of_memory();
+ +}
+ +
+ +static void do_sigbus(struct pt_regs *regs,
+ +                      unsigned long error_code, unsigned long address)
+ +{
+ +      struct task_struct *tsk = current;
+ +      struct mm_struct *mm = tsk->mm;
+ +
+ +      up_read(&mm->mmap_sem);
+ +
+ +      /* Kernel mode? Handle exceptions or die */
+ +      if (!(error_code & PF_USER))
+ +              no_context(regs, error_code, address);
+ +#ifdef CONFIG_X86_32
+ +      /* User space => ok to do another page fault */
+ +      if (is_prefetch(regs, error_code, address))
+ +              return;
+ +#endif
+ +      tsk->thread.cr2 = address;
+ +      tsk->thread.error_code = error_code;
+ +      tsk->thread.trap_no = 14;
+ +      force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+ +}
+ +
+ +static noinline void mm_fault_error(struct pt_regs *regs,
+ +              unsigned long error_code, unsigned long address, unsigned int fault)
+ +{
+ +      if (fault & VM_FAULT_OOM)
+ +              out_of_memory(regs, error_code, address);
+ +      else if (fault & VM_FAULT_SIGBUS)
+ +              do_sigbus(regs, error_code, address);
+ +      else
+ +              BUG();
+ +}
+ +
   static int spurious_fault_check(unsigned long error_code, pte_t *pte)
   {
         if ((error_code & PF_WRITE) && !pte_write(*pte))
@@@ -638,8 -448,8 +638,8 @@@
    * There are no security implications to leaving a stale TLB when
    * increasing the permissions on a page.
    */
- -static int spurious_fault(unsigned long address,
- -                        unsigned long error_code)
+ +static noinline int spurious_fault(unsigned long error_code,
+ +                              unsigned long address)
   {
         pgd_t *pgd;
         pud_t *pud;
@@@ -684,7 -494,7 +684,7 @@@
    *
    * This assumes no large pages in there.
    */
- -static int vmalloc_fault(unsigned long address)
+ +static noinline int vmalloc_fault(unsigned long address)
   {
   #ifdef CONFIG_X86_32
         unsigned long pgd_paddr;
@@@ -763,25 -573,6 +763,25 @@@
   
   int show_unhandled_signals = 1;
   
+ +static inline int access_error(unsigned long error_code, int write,
+ +                              struct vm_area_struct *vma)
+ +{
+ +      if (write) {
+ +              /* write, present and write, not present */
+ +              if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ +                      return 1;
+ +      } else if (unlikely(error_code & PF_PROT)) {
+ +              /* read, present */
+ +              return 1;
+ +      } else {
+ +              /* read, not present */
+ +              if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   /*
    * This routine handles page faults.  It determines the address,
    * and the problem, and then passes it off to one of the appropriate
@@@ -792,12 -583,16 +792,12 @@@ asmlinkag
   #endif
   void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
   {
+ +      unsigned long address;
         struct task_struct *tsk;
         struct mm_struct *mm;
         struct vm_area_struct *vma;
- -      unsigned long address;
- -      int write, si_code;
+ +      int write;
         int fault;
- -#ifdef CONFIG_X86_64
- -      unsigned long flags;
- -      int sig;
- -#endif
   
         tsk = current;
         mm = tsk->mm;
@@@ -806,8 -601,8 +806,6 @@@
         /* get the address */
         address = read_cr2();
   
-       if (unlikely(notify_page_fault(regs)))
-               return;
- -      si_code = SEGV_MAPERR;
- -
         if (unlikely(kmmio_fault(regs, address)))
                 return;
   
@@@ -834,17 -629,23 +832,22 @@@
                         return;
   
                 /* Can handle a stale RO->RW TLB */
- -              if (spurious_fault(address, error_code))
+ +              if (spurious_fault(error_code, address))
                         return;
   
+               /* kprobes don't want to hook the spurious faults. */
+               if (notify_page_fault(regs))
+                       return;
                 /*
                  * Don't take the mm semaphore here. If we fixup a prefetch
                  * fault we could otherwise deadlock.
                  */
- -              goto bad_area_nosemaphore;
+ +              bad_area_nosemaphore(regs, error_code, address);
+ +              return;
         }
   
- -      /* kprobes don't want to hook the spurious faults. */
- -      if (notify_page_fault(regs))
++      if (unlikely(notify_page_fault(regs)))
+               return;
- -
         /*
          * It's safe to allow irq's after cr2 has been saved and the
          * vmalloc fault has been handled.
@@@ -860,17 -661,15 +863,17 @@@
   
   #ifdef CONFIG_X86_64
         if (unlikely(error_code & PF_RSVD))
- -              pgtable_bad(address, regs, error_code);
+ +              pgtable_bad(regs, error_code, address);
   #endif
   
         /*
          * If we're in an interrupt, have no user context or are running in an
          * atomic region then we must not take the fault.
          */
- -      if (unlikely(in_atomic() || !mm))
- -              goto bad_area_nosemaphore;
+ +      if (unlikely(in_atomic() || !mm)) {
+ +              bad_area_nosemaphore(regs, error_code, address);
+ +              return;
+ +      }
   
         /*
          * When running in the kernel we expect faults to occur only to
@@@ -888,26 -687,20 +891,26 @@@
          * source.  If this is invalid we can skip the address space check,
          * thus avoiding the deadlock.
          */
- -      if (!down_read_trylock(&mm->mmap_sem)) {
+ +      if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
                 if ((error_code & PF_USER) == 0 &&
- -                  !search_exception_tables(regs->ip))
- -                      goto bad_area_nosemaphore;
+ +                  !search_exception_tables(regs->ip)) {
+ +                      bad_area_nosemaphore(regs, error_code, address);
+ +                      return;
+ +              }
                 down_read(&mm->mmap_sem);
         }
   
         vma = find_vma(mm, address);
- -      if (!vma)
- -              goto bad_area;
- -      if (vma->vm_start <= address)
+ +      if (unlikely(!vma)) {
+ +              bad_area(regs, error_code, address);
+ +              return;
+ +      }
+ +      if (likely(vma->vm_start <= address))
                 goto good_area;
- -      if (!(vma->vm_flags & VM_GROWSDOWN))
- -              goto bad_area;
+ +      if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ +              bad_area(regs, error_code, address);
+ +              return;
+ +      }
         if (error_code & PF_USER) {
                 /*
                  * Accessing the stack below %sp is always a bug.
@@@ -915,25 -708,31 +918,25 @@@
                  * and pusha to work.  ("enter $65535,$31" pushes
                  * 32 pointers and then decrements %sp by 65535.)
                  */
- -              if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
- -                      goto bad_area;
+ +              if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
+ +                      bad_area(regs, error_code, address);
+ +                      return;
+ +              }
         }
- -      if (expand_stack(vma, address))
- -              goto bad_area;
- -/*
- - * Ok, we have a good vm_area for this memory access, so
- - * we can handle it..
- - */
+ +      if (unlikely(expand_stack(vma, address))) {
+ +              bad_area(regs, error_code, address);
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * Ok, we have a good vm_area for this memory access, so
+ +       * we can handle it..
+ +       */
   good_area:
- -      si_code = SEGV_ACCERR;
- -      write = 0;
- -      switch (error_code & (PF_PROT|PF_WRITE)) {
- -      default:        /* 3: write, present */
- -              /* fall through */
- -      case PF_WRITE:          /* write, not present */
- -              if (!(vma->vm_flags & VM_WRITE))
- -                      goto bad_area;
- -              write++;
- -              break;
- -      case PF_PROT:           /* read, present */
- -              goto bad_area;
- -      case 0:                 /* read, not present */
- -              if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- -                      goto bad_area;
+ +      write = error_code & PF_WRITE;
+ +      if (unlikely(access_error(error_code, write, vma))) {
+ +              bad_area_access_error(regs, error_code, address);
+ +              return;
         }
   
         /*
@@@ -943,8 -742,11 +946,8 @@@
          */
         fault = handle_mm_fault(mm, vma, address, write);
         if (unlikely(fault & VM_FAULT_ERROR)) {
- -              if (fault & VM_FAULT_OOM)
- -                      goto out_of_memory;
- -              else if (fault & VM_FAULT_SIGBUS)
- -                      goto do_sigbus;
- -              BUG();
+ +              mm_fault_error(regs, error_code, address, fault);
+ +              return;
         }
         if (fault & VM_FAULT_MAJOR)
                 tsk->maj_flt++;
@@@ -962,6 -764,128 +965,6 @@@
         }
   #endif
         up_read(&mm->mmap_sem);
- -      return;
- -
- -/*
- - * Something tried to access memory that isn't in our memory map..
- - * Fix it, but check if it's kernel or user first..
- - */
- -bad_area:
- -      up_read(&mm->mmap_sem);
- -
- -bad_area_nosemaphore:
- -      /* User mode accesses just cause a SIGSEGV */
- -      if (error_code & PF_USER) {
- -              /*
- -               * It's possible to have interrupts off here.
- -               */
- -              local_irq_enable();
- -
- -              /*
- -               * Valid to do another page fault here because this one came
- -               * from user space.
- -               */
- -              if (is_prefetch(regs, address, error_code))
- -                      return;
- -
- -              if (is_errata100(regs, address))
- -                      return;
- -
- -              if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- -                  printk_ratelimit()) {
- -                      printk(
- -                      "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
- -                      task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- -                      tsk->comm, task_pid_nr(tsk), address,
- -                      (void *) regs->ip, (void *) regs->sp, error_code);
- -                      print_vma_addr(" in ", regs->ip);
- -                      printk("\n");
- -              }
- -
- -              tsk->thread.cr2 = address;
- -              /* Kernel addresses are always protection faults */
- -              tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- -              tsk->thread.trap_no = 14;
- -              force_sig_info_fault(SIGSEGV, si_code, address, tsk);
- -              return;
- -      }
- -
- -      if (is_f00f_bug(regs, address))
- -              return;
- -
- -no_context:
- -      /* Are we prepared to handle this kernel fault?  */
- -      if (fixup_exception(regs))
- -              return;
- -
- -      /*
- -       * X86_32
- -       * Valid to do another page fault here, because if this fault
- -       * had been triggered by is_prefetch fixup_exception would have
- -       * handled it.
- -       *
- -       * X86_64
- -       * Hall of shame of CPU/BIOS bugs.
- -       */
- -      if (is_prefetch(regs, address, error_code))
- -              return;
- -
- -      if (is_errata93(regs, address))
- -              return;
- -
- -/*
- - * Oops. The kernel tried to access some bad page. We'll have to
- - * terminate things with extreme prejudice.
- - */
- -#ifdef CONFIG_X86_32
- -      bust_spinlocks(1);
- -#else
- -      flags = oops_begin();
- -#endif
- -
- -      show_fault_oops(regs, error_code, address);
- -
- -      tsk->thread.cr2 = address;
- -      tsk->thread.trap_no = 14;
- -      tsk->thread.error_code = error_code;
- -
- -#ifdef CONFIG_X86_32
- -      die("Oops", regs, error_code);
- -      bust_spinlocks(0);
- -      do_exit(SIGKILL);
- -#else
- -      sig = SIGKILL;
- -      if (__die("Oops", regs, error_code))
- -              sig = 0;
- -      /* Executive summary in case the body of the oops scrolled away */
- -      printk(KERN_EMERG "CR2: %016lx\n", address);
- -      oops_end(flags, regs, sig);
- -#endif
- -
- -out_of_memory:
- -      /*
- -       * We ran out of memory, call the OOM killer, and return the userspace
- -       * (which will retry the fault, or kill us if we got oom-killed).
- -       */
- -      up_read(&mm->mmap_sem);
- -      pagefault_out_of_memory();
- -      return;
- -
- -do_sigbus:
- -      up_read(&mm->mmap_sem);
- -
- -      /* Kernel mode? Handle exceptions or die */
- -      if (!(error_code & PF_USER))
- -              goto no_context;
- -#ifdef CONFIG_X86_32
- -      /* User space => ok to do another page fault */
- -      if (is_prefetch(regs, address, error_code))
- -              return;
- -#endif
- -      tsk->thread.cr2 = address;
- -      tsk->thread.error_code = error_code;
- -      tsk->thread.trap_no = 14;
- -      force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
   }
   
   DEFINE_SPINLOCK(pgd_lock);
diff --combined drivers/misc/Kconfig

index 396d935012f2d5bae33b3bb623b4b54701383c99,c64e6798878a529035154426c3db9af0d587bdb3..1c484084ed4f874c78cf7ccbf555350148a2a00c
--- 1/drivers/misc/Kconfig
--- 2/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@@ -162,7 -162,7 +162,7 @@@ config ENCLOSURE_SERVICE
   config SGI_XP
         tristate "Support communication between SGI SSIs"
         depends on NET
- -      depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
+ +      depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
         select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
         select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
         select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
@@@ -189,7 -189,7 +189,7 @@@ config HP_IL
   
   config SGI_GRU
         tristate "SGI GRU driver"
- -      depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ +      depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
         default n
         select MMU_NOTIFIER
         ---help---
@@@ -217,6 -217,7 +217,7 @@@ config DELL_LAPTO
         depends on EXPERIMENTAL
         depends on BACKLIGHT_CLASS_DEVICE
         depends on RFKILL
+       depends on POWER_SUPPLY
         default n
         ---help---
         This driver adds support for rfkill and backlight control to Dell
diff --combined include/linux/sched.h

index 2225c207801cd8a52801a9f351c3ec56931aa93c,8981e52c714f05f19ad5727031058509e06bf6a8..f0a50b20e8a03c2f98dec1cd859aa6e06853f7e2
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -453,23 -453,33 +453,33 @@@ struct task_cputime 
         cputime_t utime;
         cputime_t stime;
         unsigned long long sum_exec_runtime;
-       spinlock_t lock;
   };
   /* Alternate field names when used to cache expirations. */
   #define prof_exp      stime
   #define virt_exp      utime
   #define sched_exp     sum_exec_runtime
   
+ #define INIT_CPUTIME  \
+       (struct task_cputime) {                                 \
+               .utime = cputime_zero,                          \
+               .stime = cputime_zero,                          \
+               .sum_exec_runtime = 0,                          \
+       }
+ 
   /**
-  * struct thread_group_cputime - thread group interval timer counts
-  * @totals:           thread group interval timers; substructure for
-  *                    uniprocessor kernel, per-cpu for SMP kernel.
+  * struct thread_group_cputimer - thread group interval timer counts
+  * @cputime:          thread group interval timers.
+  * @running:          non-zero when there are timers running and
+  *                    @cputime receives updates.
+  * @lock:             lock for fields in this struct.
    *
    * This structure contains the version of task_cputime, above, that is
-  * used for thread group CPU clock calculations.
+  * used for thread group CPU timer calculations.
    */
- struct thread_group_cputime {
-       struct task_cputime totals;
+ struct thread_group_cputimer {
+       struct task_cputime cputime;
+       int running;
+       spinlock_t lock;
   };
   
   /*
@@@ -518,10 -528,10 +528,10 @@@ struct signal_struct 
         cputime_t it_prof_incr, it_virt_incr;
   
         /*
-        * Thread group totals for process CPU clocks.
-        * See thread_group_cputime(), et al, for details.
+        * Thread group totals for process CPU timers.
+        * See thread_group_cputimer(), et al, for details.
          */
-       struct thread_group_cputime cputime;
+       struct thread_group_cputimer cputimer;
   
         /* Earliest-expiration cache. */
         struct task_cputime cputime_expires;
@@@ -558,7 -568,7 +568,7 @@@
          * Live threads maintain their own counters and add to these
          * in __exit_signal, except for the group leader.
          */
-       cputime_t cutime, cstime;
+       cputime_t utime, stime, cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@@ -566,6 -576,14 +576,14 @@@
         unsigned long inblock, oublock, cinblock, coublock;
         struct task_io_accounting ioac;
   
+       /*
+        * Cumulative ns of schedule CPU time fo dead threads in the
+        * group, not including a zombie group leader, (This only differs
+        * from jiffies_to_ns(utime + stime) if sched_clock uses something
+        * other than jiffies.)
+        */
+       unsigned long long sum_sched_runtime;
+ 
         /*
          * We don't bother to synchronize most readers of this at all,
          * because there is no reader checking a limit that actually needs
@@@ -1160,9 -1178,10 +1178,9 @@@ struct task_struct 
         pid_t pid;
         pid_t tgid;
   
- -#ifdef CONFIG_CC_STACKPROTECTOR
         /* Canary value for the -fstack-protector gcc feature */
         unsigned long stack_canary;
- -#endif
+ +
         /* 
          * pointers to (original) parent process, youngest child, younger sibling,
          * older sibling, respectively.  (p->father can be replaced with 
@@@ -2068,19 -2087,6 +2086,19 @@@ static inline int object_is_on_stack(vo
   
   extern void thread_info_cache_init(void);
   
+ +#ifdef CONFIG_DEBUG_STACK_USAGE
+ +static inline unsigned long stack_not_used(struct task_struct *p)
+ +{
+ +      unsigned long *n = end_of_stack(p);
+ +
+ +      do {    /* Skip over canary */
+ +              n++;
+ +      } while (!*n);
+ +
+ +      return (unsigned long)n - (unsigned long)end_of_stack(p);
+ +}
+ +#endif
+ +
   /* set thread flags in other task's structures
    * - see asm/thread_info.h for TIF_xxxx flags available
    */
@@@ -2194,27 -2200,14 +2212,14 @@@ static inline int spin_needbreak(spinlo
   /*
    * Thread group CPU time accounting.
    */
- 
- static inline
- void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
- {
-       struct task_cputime *totals = &tsk->signal->cputime.totals;
-       unsigned long flags;
- 
-       spin_lock_irqsave(&totals->lock, flags);
-       *times = *totals;
-       spin_unlock_irqrestore(&totals->lock, flags);
- }
+ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
   
   static inline void thread_group_cputime_init(struct signal_struct *sig)
   {
-       sig->cputime.totals = (struct task_cputime){
-               .utime = cputime_zero,
-               .stime = cputime_zero,
-               .sum_exec_runtime = 0,
-       };
- 
-       spin_lock_init(&sig->cputime.totals.lock);
+       sig->cputimer.cputime = INIT_CPUTIME;
+       spin_lock_init(&sig->cputimer.lock);
+       sig->cputimer.running = 0;
   }
   
   static inline void thread_group_cputime_free(struct signal_struct *sig)
diff --combined kernel/exit.c

index 70612c19ac96b92ca575ec8a5f67e7df0d2fecb9,efd30ccf38584f48c6ef68da62b9236479b6da69..167e1e3ad7c61f5c0a73db9dc457c30a97527a91
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -118,6 -118,8 +118,8 @@@ static void __exit_signal(struct task_s
                  * We won't ever get here for the group leader, since it
                  * will have been the last reference on the signal_struct.
                  */
+               sig->utime = cputime_add(sig->utime, task_utime(tsk));
+               sig->stime = cputime_add(sig->stime, task_stime(tsk));
                 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                 sig->min_flt += tsk->min_flt;
                 sig->maj_flt += tsk->maj_flt;
@@@ -126,6 -128,7 +128,7 @@@
                 sig->inblock += task_io_get_inblock(tsk);
                 sig->oublock += task_io_get_oublock(tsk);
                 task_io_accounting_add(&sig->ioac, &tsk->ioac);
+               sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                 sig = NULL; /* Marker for below. */
         }
   
@@@ -977,9 -980,12 +980,9 @@@ static void check_stack_usage(void
   {
         static DEFINE_SPINLOCK(low_water_lock);
         static int lowest_to_date = THREAD_SIZE;
- -      unsigned long *n = end_of_stack(current);
         unsigned long free;
   
- -      while (*n == 0)
- -              n++;
- -      free = (unsigned long)n - (unsigned long)end_of_stack(current);
+ +      free = stack_not_used(current);
   
         if (free >= lowest_to_date)
                 return;
diff --combined kernel/fork.c

index 99309df985bf404a5f94aae5b86df6581d19bd2c,a66fbde20715bb2d93180b7b3d03541d31d9c068..8de303bdd4e51915c20c463c2ab0217e94b5b3ad
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -61,7 -61,6 +61,7 @@@
   #include <linux/proc_fs.h>
   #include <linux/blkdev.h>
   #include <trace/sched.h>
+ +#include <linux/magic.h>
   
   #include <asm/pgtable.h>
   #include <asm/pgalloc.h>
@@@ -213,8 -212,6 +213,8 @@@ static struct task_struct *dup_task_str
   {
         struct task_struct *tsk;
         struct thread_info *ti;
+ +      unsigned long *stackend;
+ +
         int err;
   
         prepare_to_copy(orig);
@@@ -240,8 -237,6 +240,8 @@@
                 goto out;
   
         setup_thread_stack(tsk, orig);
+ +      stackend = end_of_stack(tsk);
+ +      *stackend = STACK_END_MAGIC;    /* for overflow detection */
   
   #ifdef CONFIG_CC_STACKPROTECTOR
         tsk->stack_canary = get_random_int();
@@@ -856,13 -851,14 +856,14 @@@ static int copy_signal(unsigned long cl
         sig->tty_old_pgrp = NULL;
         sig->tty = NULL;
   
-       sig->cutime = sig->cstime = cputime_zero;
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
         sig->gtime = cputime_zero;
         sig->cgtime = cputime_zero;
         sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
         sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
         sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
         task_io_accounting_init(&sig->ioac);
+       sig->sum_sched_runtime = 0;
         taskstats_tgid_init(sig);
   
         task_lock(current->group_leader);
@@@ -1010,6 -1006,7 +1011,7 @@@ static struct task_struct *copy_process
          * triggers too late. This doesn't hurt, the check is only there
          * to stop root fork bombs.
          */
+       retval = -EAGAIN;
         if (nr_threads >= max_threads)
                 goto bad_fork_cleanup_count;
   
@@@ -1098,7 -1095,7 +1100,7 @@@
   #ifdef CONFIG_DEBUG_MUTEXES
         p->blocked_on = NULL; /* not blocked yet */
   #endif
-       if (unlikely(ptrace_reparented(current)))
+       if (unlikely(current->ptrace))
                 ptrace_fork(p, clone_flags);
   
         /* Perform scheduler related setup. Assign this task to a CPU. */
diff --combined kernel/sched.c

index 400756169aa7e247be8134911c3e02bf14da2579,c1d0ed360088f5a5378ae4b6db9555e7e0b90f21..61245b8d0f1671b3592696cc20f5ab8694c9ee44
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -2266,16 -2266,6 +2266,6 @@@ static int try_to_wake_up(struct task_s
         if (!sched_feat(SYNC_WAKEUPS))
                 sync = 0;
   
-       if (!sync) {
-               if (current->se.avg_overlap < sysctl_sched_migration_cost &&
-                         p->se.avg_overlap < sysctl_sched_migration_cost)
-                       sync = 1;
-       } else {
-               if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
-                         p->se.avg_overlap >= sysctl_sched_migration_cost)
-                       sync = 0;
-       }
- 
   #ifdef CONFIG_SMP
         if (sched_feat(LB_WAKEUP_UPDATE)) {
                 struct sched_domain *sd;
@@@ -3890,19 -3880,24 +3880,24 @@@ int select_nohz_load_balancer(int stop_
         int cpu = smp_processor_id();
   
         if (stop_tick) {
-               cpumask_set_cpu(cpu, nohz.cpu_mask);
                 cpu_rq(cpu)->in_nohz_recently = 1;
   
-               /*
-                * If we are going offline and still the leader, give up!
-                */
-               if (!cpu_active(cpu) &&
-                   atomic_read(&nohz.load_balancer) == cpu) {
+               if (!cpu_active(cpu)) {
+                       if (atomic_read(&nohz.load_balancer) != cpu)
+                               return 0;
+ 
+                       /*
+                        * If we are going offline and still the leader,
+                        * give up!
+                        */
                         if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
                                 BUG();
+ 
                         return 0;
                 }
   
+               cpumask_set_cpu(cpu, nohz.cpu_mask);
+ 
                 /* time for ilb owner also to sleep */
                 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
                         if (atomic_read(&nohz.load_balancer) == cpu)
@@@ -4697,8 -4692,8 +4692,8 @@@ EXPORT_SYMBOL(default_wake_function)
    * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
    * zero in this (rare) case, and we handle it by continuing to scan the queue.
    */
- static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-                            int nr_exclusive, int sync, void *key)
+ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+                       int nr_exclusive, int sync, void *key)
   {
         wait_queue_t *curr, *next;
   
@@@ -5949,7 -5944,12 +5944,7 @@@ void sched_show_task(struct task_struc
                 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
   #endif
   #ifdef CONFIG_DEBUG_STACK_USAGE
- -      {
- -              unsigned long *n = end_of_stack(p);
- -              while (!*n)
- -                      n++;
- -              free = (unsigned long)n - (unsigned long)end_of_stack(p);
- -      }
+ +      free = stack_not_used(p);
   #endif
         printk(KERN_CONT "%5lu %5d %6d\n", free,
                 task_pid_nr(p), task_pid_nr(p->real_parent));
author	Ingo Molnar <mingo@elte.hu>
	Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 13 Feb 2009 08:44:22 +0000 (09:44 +0100)
		1	2
arch/arm/kernel/irq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/blackfin/kernel/irqchip.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/ia32/ia32entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/paravirt.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/spinlock.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/acpi/sleep.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/vmi_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history