/* Handle bad interrupts */
static struct irq_desc bad_irq_desc = {
.handle_irq = handle_bad_irq,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
};
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* We are not allocating bad_irq_desc.affinity or .pending_mask */
+#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK."
+#endif
+
/*
* do_IRQ handles all hardware IRQ's. Decoded IRQs should not
* come via this function. Instead, they should provide their
irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE;
#ifdef CONFIG_SMP
- bad_irq_desc.affinity = CPU_MASK_ALL;
+ cpumask_setall(bad_irq_desc.affinity);
bad_irq_desc.cpu = smp_processor_id();
#endif
init_arch_irq();
struct irq_desc *desc = irq_desc + i;
if (desc->cpu == cpu) {
- unsigned int newcpu = any_online_cpu(desc->affinity);
-
- if (newcpu == NR_CPUS) {
+ unsigned int newcpu = cpumask_any_and(desc->affinity,
+ cpu_online_mask);
+ if (newcpu >= nr_cpu_ids) {
if (printk_ratelimit())
printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
i, cpu);
- cpus_setall(desc->affinity);
- newcpu = any_online_cpu(desc->affinity);
+ cpumask_setall(desc->affinity);
+ newcpu = cpumask_any_and(desc->affinity,
+ cpu_online_mask);
}
route_irq(desc, i, newcpu);
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/trace.h>
+ #include <asm/pda.h>
static atomic_t irq_err_count;
static spinlock_t irq_controller_lock;
#endif
};
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* We are not allocating a variable-sized bad_irq_desc.affinity */
+#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK."
+#endif
+
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, j;
seq_putc(p, '\n');
skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS)
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
+ seq_printf(p, " CORE Non Maskable Interrupt\n");
seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
+ }
return 0;
}
seq_putc(p, '\n');
skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).__nmi_count);
+ seq_printf(p, " Non-maskable interrupts\n");
}
return 0;
}
#ifdef CONFIG_SMP
static int irq_choose_cpu(unsigned int virt_irq)
{
- cpumask_t mask = irq_desc[virt_irq].affinity;
+ cpumask_t mask;
int cpuid;
+ cpumask_copy(&mask, irq_desc[virt_irq].affinity);
if (cpus_equal(mask, CPU_MASK_ALL)) {
static int irq_rover;
static DEFINE_SPINLOCK(irq_rover_lock);
local_irq_restore(flags);
}
- static void unhandled_perf_irq(struct pt_regs *regs)
- {
- unsigned long pcr, pic;
-
- read_pcr(pcr);
- read_pic(pic);
-
- write_pcr(0);
-
- printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
- smp_processor_id());
- printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
- smp_processor_id(), pcr, pic);
- }
-
- /* Almost a direct copy of the powerpc PMC code. */
- static DEFINE_SPINLOCK(perf_irq_lock);
- static void *perf_irq_owner_caller; /* mostly for debugging */
- static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
-
- /* Invoked from level 15 PIL handler in trap table. */
- void perfctr_irq(int irq, struct pt_regs *regs)
- {
- clear_softint(1 << irq);
- perf_irq(regs);
- }
-
- int register_perfctr_intr(void (*handler)(struct pt_regs *))
- {
- int ret;
-
- if (!handler)
- return -EINVAL;
-
- spin_lock(&perf_irq_lock);
- if (perf_irq != unhandled_perf_irq) {
- printk(KERN_WARNING "register_perfctr_intr: "
- "perf IRQ busy (reserved by caller %p)\n",
- perf_irq_owner_caller);
- ret = -EBUSY;
- goto out;
- }
-
- perf_irq_owner_caller = __builtin_return_address(0);
- perf_irq = handler;
-
- ret = 0;
- out:
- spin_unlock(&perf_irq_lock);
-
- return ret;
- }
- EXPORT_SYMBOL_GPL(register_perfctr_intr);
-
- void release_perfctr_intr(void (*handler)(struct pt_regs *))
- {
- spin_lock(&perf_irq_lock);
- perf_irq_owner_caller = NULL;
- perf_irq = unhandled_perf_irq;
- spin_unlock(&perf_irq_lock);
- }
- EXPORT_SYMBOL_GPL(release_perfctr_intr);
-
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
{
!(irq_desc[irq].status & IRQ_PER_CPU)) {
if (irq_desc[irq].chip->set_affinity)
irq_desc[irq].chip->set_affinity(irq,
- &irq_desc[irq].affinity);
+ irq_desc[irq].affinity);
}
spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
}
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
- movq %gs:pda_kernelstack, %rsp
- addq $(PDA_STACKOFFSET),%rsp
+ movq PER_CPU_VAR(kernel_stack), %rsp
+ addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
- movq %gs:pda_kernelstack,%rsp
+ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- ia32_do_syscall:
cmpl $(IA32_NR_syscalls-1),%eax
- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
+ ja ia32_badsys
+ ia32_do_call:
IA32_ARG_FIXUP
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- jmp ia32_do_syscall
+ cmpl $(IA32_NR_syscalls-1),%eax
+ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
+ jmp ia32_do_call
END(ia32_syscall)
ia32_badsys:
#define CLBR_EAX (1 << 0)
#define CLBR_ECX (1 << 1)
#define CLBR_EDX (1 << 2)
+#define CLBR_EDI (1 << 3)
-#ifdef CONFIG_X86_64
-#define CLBR_RSI (1 << 3)
-#define CLBR_RDI (1 << 4)
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY ((1 << 4) - 1)
+
+#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
+#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
+#define CLBR_SCRATCH (0)
+#else
+#define CLBR_RAX CLBR_EAX
+#define CLBR_RCX CLBR_ECX
+#define CLBR_RDX CLBR_EDX
+#define CLBR_RDI CLBR_EDI
+#define CLBR_RSI (1 << 4)
#define CLBR_R8 (1 << 5)
#define CLBR_R9 (1 << 6)
#define CLBR_R10 (1 << 7)
#define CLBR_R11 (1 << 8)
+
#define CLBR_ANY ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+ CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG (CLBR_RAX)
+#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
+
#include <asm/desc_defs.h>
-#else
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY ((1 << 3) - 1)
#endif /* X86_64 */
+#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/cpumask.h>
struct mm_struct;
struct desc_struct;
+/*
+ * Wrapper type for pointers to code which uses the non-standard
+ * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
+ */
+struct paravirt_callee_save {
+ void *func;
+};
+
/* general info */
struct pv_info {
unsigned int kernel_rpl;
* expected to use X86_EFLAGS_IF; all other bits
* returned from save_fl are undefined, and may be ignored by
* restore_fl.
+ *
+ * NOTE: These functions callers expect the callee to preserve
+ * more registers than the standard C calling convention.
*/
- unsigned long (*save_fl)(void);
- void (*restore_fl)(unsigned long);
- void (*irq_disable)(void);
- void (*irq_enable)(void);
+ struct paravirt_callee_save save_fl;
+ struct paravirt_callee_save restore_fl;
+ struct paravirt_callee_save irq_disable;
+ struct paravirt_callee_save irq_enable;
+
void (*safe_halt)(void);
void (*halt)(void);
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_single)(unsigned long addr);
- void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+ void (*flush_tlb_others)(const struct cpumask *cpus,
+ struct mm_struct *mm,
unsigned long va);
/* Hooks for allocating and freeing a pagetable top-level */
void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
- pteval_t (*pte_val)(pte_t);
- pteval_t (*pte_flags)(pte_t);
- pte_t (*make_pte)(pteval_t pte);
+ struct paravirt_callee_save pte_val;
+ struct paravirt_callee_save make_pte;
- pgdval_t (*pgd_val)(pgd_t);
- pgd_t (*make_pgd)(pgdval_t pgd);
+ struct paravirt_callee_save pgd_val;
+ struct paravirt_callee_save make_pgd;
#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
void (*set_pud)(pud_t *pudp, pud_t pudval);
- pmdval_t (*pmd_val)(pmd_t);
- pmd_t (*make_pmd)(pmdval_t pmd);
+ struct paravirt_callee_save pmd_val;
+ struct paravirt_callee_save make_pmd;
#if PAGETABLE_LEVELS == 4
- pudval_t (*pud_val)(pud_t);
- pud_t (*make_pud)(pudval_t pud);
+ struct paravirt_callee_save pud_val;
+ struct paravirt_callee_save make_pud;
void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
#endif /* PAGETABLE_LEVELS == 4 */
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
unsigned paravirt_patch_nop(void);
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
unsigned paravirt_patch_ignore(unsigned len);
unsigned paravirt_patch_call(void *insnbuf,
const void *target, u16 tgt_clobbers,
* makes sure the incoming and outgoing types are always correct.
*/
#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx
+#define PVOP_VCALL_ARGS \
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
+
+#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
+
#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
+
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
#define EXTRA_CLOBBERS
#define VEXTRA_CLOBBERS
-#else
-#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx
+#else /* CONFIG_X86_64 */
+#define PVOP_VCALL_ARGS \
+ unsigned long __edi = __edi, __esi = __esi, \
+ __edx = __edx, __ecx = __ecx
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+
+#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
+
#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
"=S" (__esi), "=d" (__edx), \
"=c" (__ecx)
-
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
-#endif
+#endif /* CONFIG_X86_32 */
#ifdef CONFIG_PARAVIRT_DEBUG
#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
#define PVOP_TEST_NULL(op) ((void)op)
#endif
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
+#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
+ pre, post, ...) \
({ \
rettype __ret; \
- PVOP_CALL_ARGS; \
+ PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
/* This is 32-bit specific, but is okay in 64-bit */ \
/* since this condition will never hold */ \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_CALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" EXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_CALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" EXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
__ret = (rettype)__eax; \
} \
__ret; \
})
-#define __PVOP_VCALL(op, pre, post, ...) \
+
+#define __PVOP_CALL(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
+ EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_CALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
({ \
PVOP_VCALL_ARGS; \
PVOP_TEST_NULL(op); \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_VCALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" VEXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
})
+#define __PVOP_VCALL(op, pre, post, ...) \
+ ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, \
+ pre, post, ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+
#define PVOP_CALL0(rettype, op) \
__PVOP_CALL(rettype, op, "", "")
#define PVOP_VCALL0(op) \
__PVOP_VCALL(op, "", "")
+#define PVOP_CALLEE0(rettype, op) \
+ __PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_VCALLEE0(op) \
+ __PVOP_VCALLEESAVE(op, "", "")
+
+
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+
+#define PVOP_CALLEE1(rettype, op, arg1) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALLEE1(op, arg1) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1" ((unsigned long)(arg2)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1" ((unsigned long)(arg2)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
+#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALLEE2(op, arg1, arg2) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
/* This is the only difference in x86_64. We can make it much simpler */
#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
__PVOP_VCALL(op, \
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
"2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
#else
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
- "3"((unsigned long)(arg4)))
+ __PVOP_CALL(rettype, op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
- "3"((unsigned long)(arg4)))
+ __PVOP_VCALL(op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#endif
static inline int paravirt_enabled(void)
PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
}
-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+static inline void flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
unsigned long va)
{
- PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+ PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t,
- pv_mmu_ops.make_pte,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pteval_t,
+ pv_mmu_ops.make_pte,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pteval_t,
- pv_mmu_ops.make_pte,
- val);
+ ret = PVOP_CALLEE1(pteval_t,
+ pv_mmu_ops.make_pte,
+ val);
return (pte_t) { .pte = ret };
}
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
- pte.pte, (u64)pte.pte >> 32);
- else
- ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
- pte.pte);
-
- return ret;
-}
-
-static inline pteval_t pte_flags(pte_t pte)
-{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
- pte.pte, (u64)pte.pte >> 32);
+ ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte, (u64)pte.pte >> 32);
else
- ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
- pte.pte);
+ ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte);
-#ifdef CONFIG_PARAVIRT_DEBUG
- BUG_ON(ret & PTE_PFN_MASK);
-#endif
return ret;
}
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
- val);
+ ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
+ val);
return (pgd_t) { ret };
}
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
- pgd.pgd, (u64)pgd.pgd >> 32);
+ ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd, (u64)pgd.pgd >> 32);
else
- ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
- pgd.pgd);
+ ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd);
return ret;
}
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
- val);
+ ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
+ val);
return (pmd_t) { ret };
}
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
- pmd.pmd, (u64)pmd.pmd >> 32);
+ ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd, (u64)pmd.pmd >> 32);
else
- ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
- pmd.pmd);
+ ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd);
return ret;
}
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
- val);
+ ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
+ val);
return (pud_t) { ret };
}
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
- pud.pud, (u64)pud.pud >> 32);
+ ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud, (u64)pud.pud >> 32);
else
- ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
- pud.pud);
+ ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud);
return ret;
}
}
void _paravirt_nop(void);
-#define paravirt_nop ((void *)_paravirt_nop)
+u32 _paravirt_ident_32(u32);
+u64 _paravirt_ident_64(u64);
-void paravirt_use_bytelocks(void);
+#define paravirt_nop ((void *)_paravirt_nop)
#ifdef CONFIG_SMP
{
return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
+ #define __raw_spin_is_contended __raw_spin_is_contended
static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
{
__parainstructions_end[];
#ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
-#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
+#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
+#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
+
#define PV_FLAGS_ARG "0"
#define PV_EXTRA_CLOBBERS
#define PV_VEXTRA_CLOBBERS
#else
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS \
+ "push %rcx;" \
+ "push %rdx;" \
+ "push %rsi;" \
+ "push %rdi;" \
+ "push %r8;" \
+ "push %r9;" \
+ "push %r10;" \
+ "push %r11;"
+#define PV_RESTORE_ALL_CALLER_REGS \
+ "pop %r11;" \
+ "pop %r10;" \
+ "pop %r9;" \
+ "pop %r8;" \
+ "pop %rdi;" \
+ "pop %rsi;" \
+ "pop %rdx;" \
+ "pop %rcx;"
+
/* We save some registers, but all of them, that's too much. We clobber all
* caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
#define PV_FLAGS_ARG "D"
#endif
+/*
+ * Generate a thunk around a function which saves all caller-save
+ * registers except for the return value. This allows C functions to
+ * be called from assembler code where fewer than normal registers are
+ * available. It may also help code generation around calls from C
+ * code if the common case doesn't use many registers.
+ *
+ * When a callee is wrapped in a thunk, the caller can assume that all
+ * arg regs and all scratch registers are preserved across the
+ * call. The return value in rax/eax will not be saved, even for void
+ * functions.
+ */
+#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+ extern typeof(func) __raw_callee_save_##func; \
+ static void *__##func##__ __used = func; \
+ \
+ asm(".pushsection .text;" \
+ "__raw_callee_save_" #func ": " \
+ PV_SAVE_ALL_CALLER_REGS \
+ "call " #func ";" \
+ PV_RESTORE_ALL_CALLER_REGS \
+ "ret;" \
+ ".popsection")
+
+/* Get a reference to a callee-save function */
+#define PV_CALLEE_SAVE(func) \
+ ((struct paravirt_callee_save) { __raw_callee_save_##func })
+
+/* Promise that "func" already uses the right calling convention */
+#define __PV_IS_CALLEE_SAVE(func) \
+ ((struct paravirt_callee_save) { func })
+
static inline unsigned long __raw_local_save_flags(void)
{
unsigned long f;
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
: "=a"(f)
: paravirt_type(pv_irq_ops.save_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc" PV_VEXTRA_CLOBBERS);
+ : "memory", "cc");
return f;
}
static inline void raw_local_irq_restore(unsigned long f)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
: "=a"(f)
: PV_FLAGS_ARG(f),
paravirt_type(pv_irq_ops.restore_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "cc");
}
static inline void raw_local_irq_disable(void)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
:
: paravirt_type(pv_irq_ops.irq_disable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "eax", "cc");
}
static inline void raw_local_irq_enable(void)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
:
: paravirt_type(pv_irq_ops.irq_enable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "eax", "cc");
}
static inline unsigned long __raw_local_irq_save(void)
.popsection
+#define COND_PUSH(set, mask, reg) \
+ .if ((~(set)) & mask); push %reg; .endif
+#define COND_POP(set, mask, reg) \
+ .if ((~(set)) & mask); pop %reg; .endif
+
#ifdef CONFIG_X86_64
-#define PV_SAVE_REGS \
- push %rax; \
- push %rcx; \
- push %rdx; \
- push %rsi; \
- push %rdi; \
- push %r8; \
- push %r9; \
- push %r10; \
- push %r11
-#define PV_RESTORE_REGS \
- pop %r11; \
- pop %r10; \
- pop %r9; \
- pop %r8; \
- pop %rdi; \
- pop %rsi; \
- pop %rdx; \
- pop %rcx; \
- pop %rax
+
+#define PV_SAVE_REGS(set) \
+ COND_PUSH(set, CLBR_RAX, rax); \
+ COND_PUSH(set, CLBR_RCX, rcx); \
+ COND_PUSH(set, CLBR_RDX, rdx); \
+ COND_PUSH(set, CLBR_RSI, rsi); \
+ COND_PUSH(set, CLBR_RDI, rdi); \
+ COND_PUSH(set, CLBR_R8, r8); \
+ COND_PUSH(set, CLBR_R9, r9); \
+ COND_PUSH(set, CLBR_R10, r10); \
+ COND_PUSH(set, CLBR_R11, r11)
+#define PV_RESTORE_REGS(set) \
+ COND_POP(set, CLBR_R11, r11); \
+ COND_POP(set, CLBR_R10, r10); \
+ COND_POP(set, CLBR_R9, r9); \
+ COND_POP(set, CLBR_R8, r8); \
+ COND_POP(set, CLBR_RDI, rdi); \
+ COND_POP(set, CLBR_RSI, rsi); \
+ COND_POP(set, CLBR_RDX, rdx); \
+ COND_POP(set, CLBR_RCX, rcx); \
+ COND_POP(set, CLBR_RAX, rax)
+
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
#else
-#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
-#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PV_SAVE_REGS(set) \
+ COND_PUSH(set, CLBR_EAX, eax); \
+ COND_PUSH(set, CLBR_EDI, edi); \
+ COND_PUSH(set, CLBR_ECX, ecx); \
+ COND_PUSH(set, CLBR_EDX, edx)
+#define PV_RESTORE_REGS(set) \
+ COND_POP(set, CLBR_EDX, edx); \
+ COND_POP(set, CLBR_ECX, ecx); \
+ COND_POP(set, CLBR_EDI, edi); \
+ COND_POP(set, CLBR_EAX, eax)
+
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
#define PARA_INDIRECT(addr) *%cs:addr
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
- PV_SAVE_REGS; \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
- PV_RESTORE_REGS;) \
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
- PV_SAVE_REGS; \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
- PV_RESTORE_REGS;)
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define USERGS_SYSRET32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
swapgs)
+/*
+ * Note: swapgs is very special, and in practise is either going to be
+ * implemented with a single "swapgs" instruction or something very
+ * special. Either way, we don't need to save any registers for
+ * it.
+ */
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- PV_SAVE_REGS; \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
- PV_RESTORE_REGS \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
)
#define GET_CR2_INTO_RCX \
char pad0;
#else
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
- int x86_tlbsize;
+ int x86_tlbsize;
__u8 x86_virt_bits;
__u8 x86_phys_bits;
#endif
u8 no_update;
u8 rm;
u8 alimit;
- struct info *info;
+ struct math_emu_info *info;
u32 entry_eip;
};
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
+
+union irq_stack_union {
+ char irq_stack[IRQ_STACK_SIZE];
+ /*
+ * GCC hardcodes the stack canary as %gs:40. Since the
+ * irq_stack is the object at %gs:0, we reserve the bottom
+ * 48 bytes of the irq stack for the canary.
+ */
+ struct {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+DECLARE_PER_CPU(char *, irq_stack_ptr);
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
-extern void switch_to_new_gdt(void);
+extern void switch_to_new_gdt(int);
+extern void load_percpu_segment(int);
extern void cpu_init(void);
-extern void init_gdt(int cpu);
static inline unsigned long get_debugctlmsr(void)
{
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
-#ifdef CONFIG_PARAVIRT
-/*
- * Define virtualization-friendly old-style lock byte lock, for use in
- * pv_lock_ops if desired.
- *
- * This differs from the pre-2.6.24 spinlock by always using xchgb
- * rather than decb to take the lock; this allows it to use a
- * zero-initialized lock structure. It also maintains a 1-byte
- * contention counter, so that we can implement
- * __byte_spin_is_contended.
- */
-struct __byte_spinlock {
- s8 lock;
- s8 spinners;
-};
-
-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- return bl->lock != 0;
-}
-
-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- return bl->spinners != 0;
-}
-
-static inline void __byte_spin_lock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- s8 val = 1;
-
- asm("1: xchgb %1, %0\n"
- " test %1,%1\n"
- " jz 3f\n"
- " " LOCK_PREFIX "incb %2\n"
- "2: rep;nop\n"
- " cmpb $1, %0\n"
- " je 2b\n"
- " " LOCK_PREFIX "decb %2\n"
- " jmp 1b\n"
- "3:"
- : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
-}
-
-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- u8 old = 1;
-
- asm("xchgb %1,%0"
- : "+m" (bl->lock), "+q" (old) : : "memory");
+#ifndef CONFIG_PARAVIRT
- return old == 0;
-}
-
-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- smp_wmb();
- bl->lock = 0;
-}
-#else /* !CONFIG_PARAVIRT */
static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
{
return __ticket_spin_is_locked(lock);
{
return __ticket_spin_is_contended(lock);
}
+ #define __raw_spin_is_contended __raw_spin_is_contended
static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
{
__raw_spin_lock(lock);
}
-#endif /* CONFIG_PARAVIRT */
+#endif
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
{
{
atomic_t *count = (atomic_t *)lock;
- atomic_dec(count);
- if (atomic_read(count) >= 0)
+ if (atomic_dec_return(count) >= 0)
return 1;
atomic_inc(count);
return 0;
stack_start.sp = temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_table(smp_processor_id());
+ initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0;
#ifdef CONFIG_HIBERNATION
if (strncmp(str, "s4_nohwsig", 10) == 0)
acpi_no_s4_hw_signature();
+ if (strncmp(str, "s4_nonvs", 8) == 0)
+ acpi_s4_no_nvs();
#endif
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
- if (strncmp(str, "s4_nonvs", 8) == 0)
- acpi_s4_no_nvs();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
-#include <mach_apic.h>
+#include <asm/genapic.h>
#endif
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
}
+ /*
+ * There is a known erratum on Pentium III and Core Solo
+ * and Core Duo CPUs.
+ * " Page with PAT set to WC while associated MTRR is UC
+ * may consolidate to UC "
+ * Because of this erratum, it is better to stick with
+ * setting WC in MTRR rather than using PAT on these CPUs.
+ *
+ * Enable PAT WC only on P4, Core 2 or later CPUs.
+ */
+ if (c->x86 == 6 && c->x86_model < 15)
+ clear_cpu_cap(c, X86_FEATURE_PAT);
}
#ifdef CONFIG_X86_32
ds_init_intel(c);
}
+ if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+ set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
+
#ifdef CONFIG_X86_64
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ clflush((void *)¤t_thread_info()->flags);
+
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
struct power_trace it;
if (!need_resched()) {
trace_power_start(&it, POWER_CSTATE, 1);
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ clflush((void *)¤t_thread_info()->flags);
+
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
#include <asm/desc.h>
#include <asm/i387.h>
-#include <mach_traps.h>
+#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pda.h>
#else
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
EXPORT_SYMBOL_GPL(math_state_restore);
#ifndef CONFIG_MATH_EMULATION
- asmlinkage void math_emulate(long arg)
+ void math_emulate(struct math_emu_info *info)
{
printk(KERN_EMERG
"math-emulation not enabled and no coprocessor found.\n");
}
#endif /* CONFIG_MATH_EMULATION */
- dotraplinkage void __kprobes
- do_device_not_available(struct pt_regs *regs, long error)
+ dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
{
#ifdef CONFIG_X86_32
if (read_cr0() & X86_CR0_EM) {
- conditional_sti(regs);
- math_emulate(0);
+ struct math_emu_info info = { };
+
+ conditional_sti(®s);
+
+ info.regs = ®s;
+ math_emulate(&info);
} else {
math_state_restore(); /* interrupts still off */
- conditional_sti(regs);
+ conditional_sti(®s);
}
#else
math_state_restore();
vmi_ops.release_page(pfn, VMI_PAGE_L2);
}
+ /*
+ * We use the pgd_free hook for releasing the pgd page:
+ */
+ static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+ {
+ unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
+
+ vmi_ops.release_page(pfn, VMI_PAGE_L2);
+ }
+
/*
* Helper macros for MMU update flags. We can defer updates until a flush
* or page invalidation only if the update is to the current address space
para_fill(pv_mmu_ops.write_cr2, SetCR2);
para_fill(pv_mmu_ops.write_cr3, SetCR3);
para_fill(pv_cpu_ops.write_cr4, SetCR4);
- para_fill(pv_irq_ops.save_fl, GetInterruptMask);
- para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
- para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
- para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
+
+ para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
+ para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
+ para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
+ para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
para_fill(pv_cpu_ops.wbinvd, WBINVD);
para_fill(pv_cpu_ops.read_tsc, RDTSC);
if (vmi_ops.release_page) {
pv_mmu_ops.release_pte = vmi_release_pte;
pv_mmu_ops.release_pmd = vmi_release_pmd;
+ pv_mmu_ops.pgd_free = vmi_pgd_free;
}
/* Set linear is needed in all cases */
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
+#include <linux/magic.h>
#include <asm/system.h>
#include <asm/desc.h>
*
* Opcode checker based on code by Richard Brunner
*/
-static int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
+static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
+ unsigned long addr)
{
unsigned char *instr;
int scan_more = 1;
}
#ifdef CONFIG_X86_64
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
+static noinline void pgtable_bad(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
{
unsigned long flags = oops_begin();
int sig = SIGKILL;
- struct task_struct *tsk;
+ struct task_struct *tsk = current;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- current->comm, address);
+ tsk->comm, address);
dump_pagetable(address);
- tsk = current;
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
}
#endif
+static noinline void no_context(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ struct task_struct *tsk = current;
+ unsigned long *stackend;
+
+#ifdef CONFIG_X86_64
+ unsigned long flags;
+ int sig;
+#endif
+
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * X86_32
+ * Valid to do another page fault here, because if this fault
+ * had been triggered by is_prefetch fixup_exception would have
+ * handled it.
+ *
+ * X86_64
+ * Hall of shame of CPU/BIOS bugs.
+ */
+ if (is_prefetch(regs, error_code, address))
+ return;
+
+ if (is_errata93(regs, address))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+#ifdef CONFIG_X86_32
+ bust_spinlocks(1);
+#else
+ flags = oops_begin();
+#endif
+
+ show_fault_oops(regs, error_code, address);
+
+ stackend = end_of_stack(tsk);
+ if (*stackend != STACK_END_MAGIC)
+ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+
+#ifdef CONFIG_X86_32
+ die("Oops", regs, error_code);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+#else
+ sig = SIGKILL;
+ if (__die("Oops", regs, error_code))
+ sig = 0;
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+ oops_end(flags, regs, sig);
+#endif
+}
+
+static void __bad_area_nosemaphore(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address,
+ int si_code)
+{
+ struct task_struct *tsk = current;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & PF_USER) {
+ /*
+ * It's possible to have interrupts off here.
+ */
+ local_irq_enable();
+
+ /*
+ * Valid to do another page fault here because this one came
+ * from user space.
+ */
+ if (is_prefetch(regs, error_code, address))
+ return;
+
+ if (is_errata100(regs, address))
+ return;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
+ printk(
+ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), address,
+ (void *) regs->ip, (void *) regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
+
+ tsk->thread.cr2 = address;
+ /* Kernel addresses are always protection faults */
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ return;
+ }
+
+ if (is_f00f_bug(regs, address))
+ return;
+
+ no_context(regs, error_code, address);
+}
+
+static noinline void bad_area_nosemaphore(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+}
+
+static void __bad_area(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address,
+ int si_code)
+{
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+ up_read(&mm->mmap_sem);
+
+ __bad_area_nosemaphore(regs, error_code, address, si_code);
+}
+
+static noinline void bad_area(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ __bad_area(regs, error_code, address, SEGV_MAPERR);
+}
+
+static noinline void bad_area_access_error(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ __bad_area(regs, error_code, address, SEGV_ACCERR);
+}
+
+/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
+static void out_of_memory(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ up_read(¤t->mm->mmap_sem);
+ pagefault_out_of_memory();
+}
+
+static void do_sigbus(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+
+ up_read(&mm->mmap_sem);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!(error_code & PF_USER))
+ no_context(regs, error_code, address);
+#ifdef CONFIG_X86_32
+ /* User space => ok to do another page fault */
+ if (is_prefetch(regs, error_code, address))
+ return;
+#endif
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+}
+
+static noinline void mm_fault_error(struct pt_regs *regs,
+ unsigned long error_code, unsigned long address, unsigned int fault)
+{
+ if (fault & VM_FAULT_OOM)
+ out_of_memory(regs, error_code, address);
+ else if (fault & VM_FAULT_SIGBUS)
+ do_sigbus(regs, error_code, address);
+ else
+ BUG();
+}
+
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static int spurious_fault(unsigned long address,
- unsigned long error_code)
+static noinline int spurious_fault(unsigned long error_code,
+ unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
*
* This assumes no large pages in there.
*/
-static int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
{
#ifdef CONFIG_X86_32
unsigned long pgd_paddr;
int show_unhandled_signals = 1;
+static inline int access_error(unsigned long error_code, int write,
+ struct vm_area_struct *vma)
+{
+ if (write) {
+ /* write, present and write, not present */
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return 1;
+ } else if (unlikely(error_code & PF_PROT)) {
+ /* read, present */
+ return 1;
+ } else {
+ /* read, not present */
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
#endif
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
+ unsigned long address;
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
- unsigned long address;
- int write, si_code;
+ int write;
int fault;
-#ifdef CONFIG_X86_64
- unsigned long flags;
- int sig;
-#endif
tsk = current;
mm = tsk->mm;
/* get the address */
address = read_cr2();
- if (unlikely(notify_page_fault(regs)))
- return;
- si_code = SEGV_MAPERR;
-
if (unlikely(kmmio_fault(regs, address)))
return;
return;
/* Can handle a stale RO->RW TLB */
- if (spurious_fault(address, error_code))
+ if (spurious_fault(error_code, address))
return;
+ /* kprobes don't want to hook the spurious faults. */
+ if (notify_page_fault(regs))
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
*/
- goto bad_area_nosemaphore;
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
}
- /* kprobes don't want to hook the spurious faults. */
- if (notify_page_fault(regs))
++ if (unlikely(notify_page_fault(regs)))
+ return;
-
/*
* It's safe to allow irq's after cr2 has been saved and the
* vmalloc fault has been handled.
#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
- pgtable_bad(address, regs, error_code);
+ pgtable_bad(regs, error_code, address);
#endif
/*
* If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault.
*/
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
+ if (unlikely(in_atomic() || !mm)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
/*
* When running in the kernel we expect faults to occur only to
* source. If this is invalid we can skip the address space check,
* thus avoiding the deadlock.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
+ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if ((error_code & PF_USER) == 0 &&
- !search_exception_tables(regs->ip))
- goto bad_area_nosemaphore;
+ !search_exception_tables(regs->ip)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
down_read(&mm->mmap_sem);
}
vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
+ if (unlikely(!vma)) {
+ bad_area(regs, error_code, address);
+ return;
+ }
+ if (likely(vma->vm_start <= address))
goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, error_code, address);
+ return;
+ }
if (error_code & PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* and pusha to work. ("enter $65535,$31" pushes
* 32 pointers and then decrements %sp by 65535.)
*/
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
- goto bad_area;
+ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
+ bad_area(regs, error_code, address);
+ return;
+ }
}
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
+ if (unlikely(expand_stack(vma, address))) {
+ bad_area(regs, error_code, address);
+ return;
+ }
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
good_area:
- si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & (PF_PROT|PF_WRITE)) {
- default: /* 3: write, present */
- /* fall through */
- case PF_WRITE: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case PF_PROT: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
+ write = error_code & PF_WRITE;
+ if (unlikely(access_error(error_code, write, vma))) {
+ bad_area_access_error(regs, error_code, address);
+ return;
}
/*
*/
fault = handle_mm_fault(mm, vma, address, write);
if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
+ mm_fault_error(regs, error_code, address, fault);
+ return;
}
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
}
#endif
up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
- /*
- * It's possible to have interrupts off here.
- */
- local_irq_enable();
-
- /*
- * Valid to do another page fault here because this one came
- * from user space.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata100(regs, address))
- return;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(
- "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
- (void *) regs->ip, (void *) regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
- return;
- }
-
- if (is_f00f_bug(regs, address))
- return;
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * X86_32
- * Valid to do another page fault here, because if this fault
- * had been triggered by is_prefetch fixup_exception would have
- * handled it.
- *
- * X86_64
- * Hall of shame of CPU/BIOS bugs.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata93(regs, address))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-#ifdef CONFIG_X86_32
- bust_spinlocks(1);
-#else
- flags = oops_begin();
-#endif
-
- show_fault_oops(regs, error_code, address);
-
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
-
-#ifdef CONFIG_X86_32
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-#else
- sig = SIGKILL;
- if (__die("Oops", regs, error_code))
- sig = 0;
- /* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags, regs, sig);
-#endif
-
-out_of_memory:
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
- */
- up_read(&mm->mmap_sem);
- pagefault_out_of_memory();
- return;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & PF_USER))
- goto no_context;
-#ifdef CONFIG_X86_32
- /* User space => ok to do another page fault */
- if (is_prefetch(regs, address, error_code))
- return;
-#endif
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
DEFINE_SPINLOCK(pgd_lock);
config SGI_XP
tristate "Support communication between SGI SSIs"
depends on NET
- depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
+ depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
config SGI_GRU
tristate "SGI GRU driver"
- depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
default n
select MMU_NOTIFIER
---help---
depends on EXPERIMENTAL
depends on BACKLIGHT_CLASS_DEVICE
depends on RFKILL
+ depends on POWER_SUPPLY
default n
---help---
This driver adds support for rfkill and backlight control to Dell
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
- spinlock_t lock;
};
/* Alternate field names when used to cache expirations. */
#define prof_exp stime
#define virt_exp utime
#define sched_exp sum_exec_runtime
+ #define INIT_CPUTIME \
+ (struct task_cputime) { \
+ .utime = cputime_zero, \
+ .stime = cputime_zero, \
+ .sum_exec_runtime = 0, \
+ }
+
/**
- * struct thread_group_cputime - thread group interval timer counts
- * @totals: thread group interval timers; substructure for
- * uniprocessor kernel, per-cpu for SMP kernel.
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime: thread group interval timers.
+ * @running: non-zero when there are timers running and
+ * @cputime receives updates.
+ * @lock: lock for fields in this struct.
*
* This structure contains the version of task_cputime, above, that is
- * used for thread group CPU clock calculations.
+ * used for thread group CPU timer calculations.
*/
- struct thread_group_cputime {
- struct task_cputime totals;
+ struct thread_group_cputimer {
+ struct task_cputime cputime;
+ int running;
+ spinlock_t lock;
};
/*
cputime_t it_prof_incr, it_virt_incr;
/*
- * Thread group totals for process CPU clocks.
- * See thread_group_cputime(), et al, for details.
+ * Thread group totals for process CPU timers.
+ * See thread_group_cputimer(), et al, for details.
*/
- struct thread_group_cputime cputime;
+ struct thread_group_cputimer cputimer;
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;
* Live threads maintain their own counters and add to these
* in __exit_signal, except for the group leader.
*/
- cputime_t cutime, cstime;
+ cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long inblock, oublock, cinblock, coublock;
struct task_io_accounting ioac;
+ /*
+ * Cumulative ns of schedule CPU time fo dead threads in the
+ * group, not including a zombie group leader, (This only differs
+ * from jiffies_to_ns(utime + stime) if sched_clock uses something
+ * other than jiffies.)
+ */
+ unsigned long long sum_sched_runtime;
+
/*
* We don't bother to synchronize most readers of this at all,
* because there is no reader checking a limit that actually needs
pid_t pid;
pid_t tgid;
-#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
-#endif
+
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
extern void thread_info_cache_init(void);
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+ unsigned long *n = end_of_stack(p);
+
+ do { /* Skip over canary */
+ n++;
+ } while (!*n);
+
+ return (unsigned long)n - (unsigned long)end_of_stack(p);
+}
+#endif
+
/* set thread flags in other task's structures
* - see asm/thread_info.h for TIF_xxxx flags available
*/
/*
* Thread group CPU time accounting.
*/
-
- static inline
- void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
- {
- struct task_cputime *totals = &tsk->signal->cputime.totals;
- unsigned long flags;
-
- spin_lock_irqsave(&totals->lock, flags);
- *times = *totals;
- spin_unlock_irqrestore(&totals->lock, flags);
- }
+ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
static inline void thread_group_cputime_init(struct signal_struct *sig)
{
- sig->cputime.totals = (struct task_cputime){
- .utime = cputime_zero,
- .stime = cputime_zero,
- .sum_exec_runtime = 0,
- };
-
- spin_lock_init(&sig->cputime.totals.lock);
+ sig->cputimer.cputime = INIT_CPUTIME;
+ spin_lock_init(&sig->cputimer.lock);
+ sig->cputimer.running = 0;
}
static inline void thread_group_cputime_free(struct signal_struct *sig)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
+ sig->utime = cputime_add(sig->utime, task_utime(tsk));
+ sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
{
static DEFINE_SPINLOCK(low_water_lock);
static int lowest_to_date = THREAD_SIZE;
- unsigned long *n = end_of_stack(current);
unsigned long free;
- while (*n == 0)
- n++;
- free = (unsigned long)n - (unsigned long)end_of_stack(current);
+ free = stack_not_used(current);
if (free >= lowest_to_date)
return;
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <trace/sched.h>
+#include <linux/magic.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
{
struct task_struct *tsk;
struct thread_info *ti;
+ unsigned long *stackend;
+
int err;
prepare_to_copy(orig);
goto out;
setup_thread_stack(tsk, orig);
+ stackend = end_of_stack(tsk);
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
#ifdef CONFIG_CC_STACKPROTECTOR
tsk->stack_canary = get_random_int();
sig->tty_old_pgrp = NULL;
sig->tty = NULL;
- sig->cutime = sig->cstime = cputime_zero;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
sig->cgtime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
task_io_accounting_init(&sig->ioac);
+ sig->sum_sched_runtime = 0;
taskstats_tgid_init(sig);
task_lock(current->group_leader);
* triggers too late. This doesn't hurt, the check is only there
* to stop root fork bombs.
*/
+ retval = -EAGAIN;
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
- if (unlikely(ptrace_reparented(current)))
+ if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags);
/* Perform scheduler related setup. Assign this task to a CPU. */
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
- if (!sync) {
- if (current->se.avg_overlap < sysctl_sched_migration_cost &&
- p->se.avg_overlap < sysctl_sched_migration_cost)
- sync = 1;
- } else {
- if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
- p->se.avg_overlap >= sysctl_sched_migration_cost)
- sync = 0;
- }
-
#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE)) {
struct sched_domain *sd;
int cpu = smp_processor_id();
if (stop_tick) {
- cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
- /*
- * If we are going offline and still the leader, give up!
- */
- if (!cpu_active(cpu) &&
- atomic_read(&nohz.load_balancer) == cpu) {
+ if (!cpu_active(cpu)) {
+ if (atomic_read(&nohz.load_balancer) != cpu)
+ return 0;
+
+ /*
+ * If we are going offline and still the leader,
+ * give up!
+ */
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
+
return 0;
}
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
+
/* time for ilb owner also to sleep */
if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
- static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int sync, void *key)
+ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
printk(KERN_CONT " %016lx ", thread_saved_pc(p));
#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
- {
- unsigned long *n = end_of_stack(p);
- while (!*n)
- n++;
- free = (unsigned long)n - (unsigned long)end_of_stack(p);
- }
+ free = stack_not_used(p);
#endif
printk(KERN_CONT "%5lu %5d %6d\n", free,
task_pid_nr(p), task_pid_nr(p->real_parent));