]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - arch/x86/xen/enlighten.c
xen: fix allocation and use of large ldts
[linux-2.6-omap-h63xx.git] / arch / x86 / xen / enlighten.c
index 87d36044054d7f90976bb6c53ef0d45adc75dacf..b011e4a5dbbe6c1fed33053a077a094ded4313af 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
+#include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -56,6 +57,18 @@ EXPORT_SYMBOL_GPL(hypercall_page);
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
+/*
+ * Identity map, in addition to plain kernel map.  This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+#ifdef CONFIG_X86_64
+/* l3 pud for userspace vsyscall mapping */
+static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+#endif /* CONFIG_X86_64 */
+
 /*
  * Note about cr3 (pagetable base) values:
  *
@@ -167,10 +180,14 @@ void xen_vcpu_restore(void)
 
 static void __init xen_banner(void)
 {
+       unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
+       struct xen_extraversion extra;
+       HYPERVISOR_xen_version(XENVER_extraversion, &extra);
+
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
               pv_info.name);
-       printk(KERN_INFO "Hypervisor signature: %s%s\n",
-              xen_start_info->magic,
+       printk(KERN_INFO "Xen version: %d.%d%s%s\n",
+              version >> 16, version & 0xffff, extra.extraversion,
               xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
@@ -308,6 +325,57 @@ static unsigned long xen_store_tr(void)
        return 0;
 }
 
+/*
+ * If 'v' is a vmalloc mapping, then find the linear mapping of the
+ * page (if any) and also set its protections to match:
+ */
+static void set_aliased_prot(void *v, pgprot_t prot)
+{
+       int level;
+       pte_t *ptep;
+       pte_t pte;
+       unsigned long pfn;
+       struct page *page;
+
+       ptep = lookup_address((unsigned long)v, &level);
+       BUG_ON(ptep == NULL);
+
+       pfn = pte_pfn(*ptep);
+       page = pfn_to_page(pfn);
+
+       pte = pfn_pte(pfn, prot);
+
+       if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
+               BUG();
+
+       if (!PageHighMem(page)) {
+               void *av = __va(PFN_PHYS(pfn));
+
+               if (av != v)
+                       if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
+                               BUG();
+       } else
+               kmap_flush_unused();
+}
+
+static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+       const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+       int i;
+
+       for(i = 0; i < entries; i += entries_per_page)
+               set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
+}
+
+static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+       const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+       int i;
+
+       for(i = 0; i < entries; i += entries_per_page)
+               set_aliased_prot(ldt + i, PAGE_KERNEL);
+}
+
 static void xen_set_ldt(const void *addr, unsigned entries)
 {
        struct mmuext_op *op;
@@ -409,7 +477,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
                                const void *ptr)
 {
        unsigned long lp = (unsigned long)&dt[entrynum];
-       xmaddr_t mach_lp = virt_to_machine(lp);
+       xmaddr_t mach_lp = arbitrary_virt_to_machine(lp);
        u64 entry = *(u64 *)ptr;
 
        preempt_disable();
@@ -542,7 +610,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 }
 
 static void xen_load_sp0(struct tss_struct *tss,
-                         struct thread_struct *thread)
+                        struct thread_struct *thread)
 {
        struct multicall_space mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@@ -765,6 +833,34 @@ static void xen_write_cr3(unsigned long cr3)
        xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
 
+static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+{
+       int ret;
+
+       ret = 0;
+
+       switch(msr) {
+#ifdef CONFIG_X86_64
+               unsigned which;
+               u64 base;
+
+       case MSR_FS_BASE:               which = SEGBASE_FS; goto set;
+       case MSR_KERNEL_GS_BASE:        which = SEGBASE_GS_USER; goto set;
+       case MSR_GS_BASE:               which = SEGBASE_GS_KERNEL; goto set;
+
+       set:
+               base = ((u64)high << 32) | low;
+               if (HYPERVISOR_set_segment_base(which, base) != 0)
+                       ret = -EFAULT;
+               break;
+#endif
+       default:
+               ret = native_write_msr_safe(msr, low, high);
+       }
+
+       return ret;
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -831,12 +927,20 @@ static int xen_pgd_alloc(struct mm_struct *mm)
 #ifdef CONFIG_X86_64
        {
                struct page *page = virt_to_page(pgd);
+               pgd_t *user_pgd;
 
                BUG_ON(page->private != 0);
 
-               page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO);
-               if (page->private == 0)
-                       ret = -ENOMEM;
+               ret = -ENOMEM;
+
+               user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+               page->private = (unsigned long)user_pgd;
+
+               if (user_pgd != NULL) {
+                       user_pgd[pgd_index(VSYSCALL_START)] =
+                               __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+                       ret = 0;
+               }
 
                BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
        }
@@ -977,6 +1081,9 @@ static __init void xen_post_allocator_init(void)
        pv_mmu_ops.release_pud = xen_release_pud;
 #endif
 
+#ifdef CONFIG_X86_64
+       SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
        xen_mark_init_mm_pinned();
 }
 
@@ -1072,7 +1179,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_32
        case FIX_WP_TEST:
        case FIX_VDSO:
+# ifdef CONFIG_HIGHMEM
        case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+# endif
 #else
        case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
 #endif
@@ -1088,6 +1197,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
        }
 
        __native_set_fixmap(idx, pte);
+
+#ifdef CONFIG_X86_64
+       /* Replicate changes to map the vsyscall page into the user
+          pagetable vsyscall mapping. */
+       if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+               unsigned long vaddr = __fix_to_virt(idx);
+               set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+       }
+#endif
 }
 
 static const struct pv_info xen_info __initdata = {
@@ -1133,7 +1251,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .wbinvd = native_wbinvd,
 
        .read_msr = native_read_msr_safe,
-       .write_msr = native_write_msr_safe,
+       .write_msr = xen_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
 
@@ -1153,6 +1271,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .load_gs_index = xen_load_gs_index,
 #endif
 
+       .alloc_ldt = xen_alloc_ldt,
+       .free_ldt = xen_free_ldt,
+
        .store_gdt = native_store_gdt,
        .store_idt = native_store_idt,
        .store_tr = xen_store_tr,
@@ -1207,7 +1328,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
        .apic_write = xen_apic_write,
-       .apic_write_atomic = xen_apic_write,
        .apic_read = xen_apic_read,
        .setup_boot_clock = paravirt_nop,
        .setup_secondary_clock = paravirt_nop,
@@ -1258,7 +1378,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
        .ptep_modify_prot_commit = __ptep_modify_prot_commit,
 
        .pte_val = xen_pte_val,
-       .pte_flags = native_pte_val,
+       .pte_flags = native_pte_flags,
        .pgd_val = xen_pgd_val,
 
        .make_pte = xen_make_pte,
@@ -1369,7 +1489,7 @@ static unsigned long m2p(phys_addr_t maddr)
 {
        phys_addr_t paddr;
 
-       maddr &= PTE_MASK;
+       maddr &= PTE_PFN_MASK;
        paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
 
        return paddr;
@@ -1427,13 +1547,6 @@ static void set_page_prot(void *addr, pgprot_t prot)
                BUG();
 }
 
-/*
- * Identity map, in addition to plain kernel map.  This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
-
 static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
        unsigned pmdidx, pteidx;
@@ -1533,6 +1646,7 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
        set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
        set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
        set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+       set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
        set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
        set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);