int (*set_wallclock)(unsigned long);
unsigned long long (*sched_clock)(void);
- unsigned long (*get_cpu_khz)(void);
+ unsigned long (*get_tsc_khz)(void);
};
struct pv_cpu_ops {
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+#ifdef CONFIG_X86_64
+ void (*load_gs_index)(unsigned int idx);
+#endif
void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
const void *desc);
void (*write_gdt_entry)(struct desc_struct *,
u64 (*read_pmc)(int counter);
unsigned long long (*read_tscp)(unsigned int *aux);
- /* These two are jmp to, not actually called. */
- void (*irq_enable_syscall_ret)(void);
+ /*
+ * Atomically enable interrupts and return to userspace. This
+ * is only ever used to return to 32-bit processes; in a
+ * 64-bit kernel, it's used for 32-on-64 compat processes, but
+ * never native 64-bit processes. (Jump, not call.)
+ */
+ void (*irq_enable_sysexit)(void);
+
+ /*
+ * Switch to usermode gs and return to 64-bit usermode using
+ * sysret. Only used in 64-bit kernels to return to 64-bit
+ * processes. Usermode register state, including %rsp, must
+ * already be restored.
+ */
+ void (*usergs_sysret64)(void);
+
+ /*
+ * Switch to usermode gs and return to 32-bit usermode using
+ * sysret. Used to return to 32-on-64 compat processes.
+ * Other usermode register state, including %esp, must already
+ * be restored.
+ */
+ void (*usergs_sysret32)(void);
+
+ /* Normal iret. Jump to this with the standard iret stack
+ frame set up. */
void (*iret)(void);
void (*swapgs)(void);
void (*irq_enable)(void);
void (*safe_halt)(void);
void (*halt)(void);
+
+#ifdef CONFIG_X86_64
+ void (*adjust_exception_frame)(void);
+#endif
};
struct pv_apic_ops {
* these shouldn't be in this interface.
*/
void (*apic_write)(unsigned long reg, u32 v);
- void (*apic_write_atomic)(unsigned long reg, u32 v);
u32 (*apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
unsigned long phys, pgprot_t flags);
};
+struct raw_spinlock;
+struct pv_lock_ops {
+ int (*spin_is_locked)(struct raw_spinlock *lock);
+ int (*spin_is_contended)(struct raw_spinlock *lock);
+ void (*spin_lock)(struct raw_spinlock *lock);
+ int (*spin_trylock)(struct raw_spinlock *lock);
+ void (*spin_unlock)(struct raw_spinlock *lock);
+};
+
/* This contains all the paravirt structures: we get a convenient
* number for each function using the offset which we use to indicate
* what to patch. */
struct pv_irq_ops pv_irq_ops;
struct pv_apic_ops pv_apic_ops;
struct pv_mmu_ops pv_mmu_ops;
+ struct pv_lock_ops pv_lock_ops;
};
extern struct pv_info pv_info;
extern struct pv_irq_ops pv_irq_ops;
extern struct pv_apic_ops pv_apic_ops;
extern struct pv_mmu_ops pv_mmu_ops;
+extern struct pv_lock_ops pv_lock_ops;
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
{
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
}
-#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
+#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
static inline unsigned long long paravirt_read_pmc(int counter)
{
PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
}
+#ifdef CONFIG_X86_64
+static inline void load_gs_index(unsigned int gs)
+{
+ PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
+}
+#endif
+
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
}
-static inline void apic_write_atomic(unsigned long reg, u32 v)
-{
- PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
-}
-
static inline u32 apic_read(unsigned long reg)
{
return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
pte.pte);
+#ifdef CONFIG_PARAVIRT_DEBUG
+ BUG_ON(ret & PTE_MASK);
+#endif
return ret;
}
void _paravirt_nop(void);
#define paravirt_nop ((void *)_paravirt_nop)
+void paravirt_use_bytelocks(void);
+
+#ifdef CONFIG_SMP
+
+static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+{
+ return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+}
+
+static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+{
+ return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
+}
+
+static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+{
+ PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
+}
+
+static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+{
+ return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+}
+
+static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+{
+ PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+}
+
+#endif
+
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
u8 *instr; /* original instructions */
* caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
#define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
+#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
#define PV_FLAGS_ARG "D"
#endif
return f;
}
+
/* Make sure as little as possible of this mess escapes. */
#undef PARAVIRT_CALL
#undef __PVOP_CALL
#ifdef CONFIG_X86_64
-#define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
-#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+#define PV_SAVE_REGS \
+ push %rax; \
+ push %rcx; \
+ push %rdx; \
+ push %rsi; \
+ push %rdi; \
+ push %r8; \
+ push %r9; \
+ push %r10; \
+ push %r11
+#define PV_RESTORE_REGS \
+ pop %r11; \
+ pop %r10; \
+ pop %r9; \
+ pop %r8; \
+ pop %rdi; \
+ pop %rsi; \
+ pop %rdx; \
+ pop %rcx; \
+ pop %rax
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#define PARA_INDIRECT(addr) *addr(%rip)
#else
#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#define PARA_INDIRECT(addr) *%cs:addr
#endif
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
- jmp *%cs:pv_cpu_ops+PV_CPU_iret)
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
- PV_SAVE_REGS; \
- call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \
+ PV_SAVE_REGS; \
+ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS;) \
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
- PV_SAVE_REGS; \
- call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \
+ PV_SAVE_REGS; \
+ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS;)
-#define ENABLE_INTERRUPTS_SYSCALL_RET \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+#define USERGS_SYSRET32 \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
CLBR_NONE, \
- jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
-
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
#ifdef CONFIG_X86_32
-#define GET_CR0_INTO_EAX \
- push %ecx; push %edx; \
- call *pv_cpu_ops+PV_CPU_read_cr0; \
+#define GET_CR0_INTO_EAX \
+ push %ecx; push %edx; \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
-#else
+
+#define ENABLE_INTERRUPTS_SYSEXIT \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
+ CLBR_NONE, \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+
+
+#else /* !CONFIG_X86_32 */
+
+/*
+ * If swapgs is used while the userspace stack is still current,
+ * there's no way to call a pvop. The PV replacement *must* be
+ * inlined, or the swapgs instruction must be trapped and emulated.
+ */
+#define SWAPGS_UNSAFE_STACK \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
+ swapgs)
+
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
PV_SAVE_REGS; \
- call *pv_cpu_ops+PV_CPU_swapgs; \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
PV_RESTORE_REGS \
)
-#define GET_CR2_INTO_RCX \
- call *pv_mmu_ops+PV_MMU_read_cr2; \
- movq %rax, %rcx; \
+#define GET_CR2_INTO_RCX \
+ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
+ movq %rax, %rcx; \
xorq %rax, %rax;
-#endif
+#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
+ CLBR_NONE, \
+ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
+
+#define USERGS_SYSRET64 \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
+ CLBR_NONE, \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#define ENABLE_INTERRUPTS_SYSEXIT32 \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
+ CLBR_NONE, \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */