#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/paravirt.h>
+#include <asm/linkage.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include "multicalls.h"
#include "mmu.h"
-xmaddr_t arbitrary_virt_to_machine(unsigned long address)
+#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
+
+/* Placeholder for holes in the address space */
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
+ { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+
+ /* Array of pointers to pages containing p2m entries */
+static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
+ { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+
+/* Arrays of p2m arrays expressed in mfns used for save/restore */
+static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
+
+static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
+ __page_aligned_bss;
+
+static inline unsigned p2m_top_index(unsigned long pfn)
+{
+ BUG_ON(pfn >= MAX_DOMAIN_PAGES);
+ return pfn / P2M_ENTRIES_PER_PAGE;
+}
+
+static inline unsigned p2m_index(unsigned long pfn)
+{
+ return pfn % P2M_ENTRIES_PER_PAGE;
+}
+
+/* Build the parallel p2m_top_mfn structures */
+void xen_setup_mfn_list_list(void)
+{
+ unsigned pfn, idx;
+
+ for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+ unsigned topidx = p2m_top_index(pfn);
+
+ p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+ }
+
+ for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+ unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
+ p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+ }
+
+ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ virt_to_mfn(p2m_top_mfn_list);
+ HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+}
+
+/* Set up p2m_top to point to the domain-builder provided p2m pages */
+void __init xen_build_dynamic_phys_to_machine(void)
{
+ unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+ unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
+ unsigned pfn;
+
+ for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+ unsigned topidx = p2m_top_index(pfn);
+
+ p2m_top[topidx] = &mfn_list[pfn];
+ }
+}
+
+unsigned long get_phys_to_machine(unsigned long pfn)
+{
+ unsigned topidx, idx;
+
+ if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+ return INVALID_P2M_ENTRY;
+
+ topidx = p2m_top_index(pfn);
+ idx = p2m_index(pfn);
+ return p2m_top[topidx][idx];
+}
+EXPORT_SYMBOL_GPL(get_phys_to_machine);
+
+static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+{
+ unsigned long *p;
+ unsigned i;
+
+ p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+ BUG_ON(p == NULL);
+
+ for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+ p[i] = INVALID_P2M_ENTRY;
+
+ if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
+ free_page((unsigned long)p);
+ else
+ *mfnp = virt_to_mfn(p);
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ unsigned topidx, idx;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return;
+ }
+
+ if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+ BUG_ON(mfn != INVALID_P2M_ENTRY);
+ return;
+ }
+
+ topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == p2m_missing) {
+ /* no need to allocate a page to store an invalid entry */
+ if (mfn == INVALID_P2M_ENTRY)
+ return;
+ alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+ }
+
+ idx = p2m_index(pfn);
+ p2m_top[topidx][idx] = mfn;
+}
+
+xmaddr_t arbitrary_virt_to_machine(void *vaddr)
+{
+ unsigned long address = (unsigned long)vaddr;
unsigned int level;
pte_t *pte = lookup_address(address, &level);
unsigned offset = address & ~PAGE_MASK;
BUG_ON(pte == NULL);
- return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
+ return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
}
void make_lowmem_page_readonly(void *vaddr)
}
-void xen_set_pmd(pmd_t *ptr, pmd_t val)
+static bool page_pinned(void *ptr)
+{
+ struct page *page = virt_to_page(ptr);
+
+ return PagePinned(page);
+}
+
+static void extend_mmu_update(const struct mmu_update *update)
{
struct multicall_space mcs;
struct mmu_update *u;
- preempt_disable();
+ mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
+
+ if (mcs.mc != NULL)
+ mcs.mc->args[1]++;
+ else {
+ mcs = __xen_mc_entry(sizeof(*u));
+ MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+ }
- mcs = xen_mc_entry(sizeof(*u));
u = mcs.args;
- u->ptr = virt_to_machine(ptr).maddr;
- u->val = pmd_val_ma(val);
- MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+ *u = *update;
+}
+
+void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
+{
+ struct mmu_update u;
+
+ preempt_disable();
+
+ xen_mc_batch();
+
+ /* ptr may be ioremapped for 64-bit pagetable setup */
+ u.ptr = arbitrary_virt_to_machine(ptr).maddr;
+ u.val = pmd_val_ma(val);
+ extend_mmu_update(&u);
xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
}
+void xen_set_pmd(pmd_t *ptr, pmd_t val)
+{
+ /* If page is not pinned, we can just update the entry
+ directly */
+ if (!page_pinned(ptr)) {
+ *ptr = val;
+ return;
+ }
+
+ xen_set_pmd_hyper(ptr, val);
+}
+
/*
* Associate a virtual page frame with a given physical page frame
* and protection flags for that frame.
*/
void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- /* <mfn,flags> stored as-is, to permit clearing entries */
- xen_set_pte(pte, mfn_pte(mfn, flags));
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
+ set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
}
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
preempt_enable();
}
+pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ /* Just return the pte as-is. We preserve the bits on commit */
+ return *ptep;
+}
+
+void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ struct mmu_update u;
+
+ xen_mc_batch();
+
+ u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+ u.val = pte_val_ma(pte);
+ extend_mmu_update(&u);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+}
+
/* Assume pteval_t is equivalent to all the other *val_t types. */
static pteval_t pte_mfn_to_pfn(pteval_t val)
{
if (val & _PAGE_PRESENT) {
unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
pteval_t flags = val & ~PTE_MASK;
- val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
}
return val;
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
pteval_t flags = val & ~PTE_MASK;
- val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
}
return val;
return pte_mfn_to_pfn(pmd.pmd);
}
-void xen_set_pud(pud_t *ptr, pud_t val)
+void xen_set_pud_hyper(pud_t *ptr, pud_t val)
{
- struct multicall_space mcs;
- struct mmu_update *u;
+ struct mmu_update u;
preempt_disable();
- mcs = xen_mc_entry(sizeof(*u));
- u = mcs.args;
- u->ptr = virt_to_machine(ptr).maddr;
- u->val = pud_val_ma(val);
- MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+ xen_mc_batch();
+
+ /* ptr may be ioremapped for 64-bit pagetable setup */
+ u.ptr = arbitrary_virt_to_machine(ptr).maddr;
+ u.val = pud_val_ma(val);
+ extend_mmu_update(&u);
xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
}
+void xen_set_pud(pud_t *ptr, pud_t val)
+{
+ /* If page is not pinned, we can just update the entry
+ directly */
+ if (!page_pinned(ptr)) {
+ *ptr = val;
+ return;
+ }
+
+ xen_set_pud_hyper(ptr, val);
+}
+
void xen_set_pte(pte_t *ptep, pte_t pte)
{
+#ifdef CONFIG_X86_PAE
ptep->pte_high = pte.pte_high;
smp_wmb();
ptep->pte_low = pte.pte_low;
+#else
+ *ptep = pte;
+#endif
}
+#ifdef CONFIG_X86_PAE
void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
- set_64bit((u64 *)ptep, pte_val_ma(pte));
+ set_64bit((u64 *)ptep, native_pte_val(pte));
}
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
void xen_pmd_clear(pmd_t *pmdp)
{
- xen_set_pmd(pmdp, __pmd(0));
+ set_pmd(pmdp, __pmd(0));
}
+#endif /* CONFIG_X86_PAE */
pmd_t xen_make_pmd(pmdval_t pmd)
{
return native_make_pmd(pmd);
}
+#if PAGETABLE_LEVELS == 4
+pudval_t xen_pud_val(pud_t pud)
+{
+ return pte_mfn_to_pfn(pud.pud);
+}
+
+pud_t xen_make_pud(pudval_t pud)
+{
+ pud = pte_pfn_to_mfn(pud);
+
+ return native_make_pud(pud);
+}
+
+void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
+ struct mmu_update u;
+
+ preempt_disable();
+
+ xen_mc_batch();
+
+ u.ptr = virt_to_machine(ptr).maddr;
+ u.val = pgd_val_ma(val);
+ extend_mmu_update(&u);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
+
+void xen_set_pgd(pgd_t *ptr, pgd_t val)
+{
+ /* If page is not pinned, we can just update the entry
+ directly */
+ if (!page_pinned(ptr)) {
+ *ptr = val;
+ return;
+ }
+
+ xen_set_pgd_hyper(ptr, val);
+}
+#endif /* PAGETABLE_LEVELS == 4 */
+
/*
(Yet another) pagetable walker. This one is intended for pinning a
pagetable. This means that it walks a pagetable and calls the
xen_mc_issue(0);
}
-/* The init_mm pagetable is really pinned as soon as its created, but
- that's before we have page structures to store the bits. So do all
- the book-keeping now. */
+/*
+ * On save, we need to pin all pagetables to make sure they get their
+ * mfns turned into pfns. Search the list for any unpinned pgds and pin
+ * them (unpinned pgds are not currently in use, probably because the
+ * process is under construction or destruction).
+ */
+void xen_mm_pin_all(void)
+{
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!PagePinned(page)) {
+ xen_pgd_pin((pgd_t *)page_address(page));
+ SetPageSavePinned(page);
+ }
+ }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits. So do all
+ * the book-keeping now.
+ */
static __init int mark_pinned(struct page *page, enum pt_level level)
{
SetPagePinned(page);
xen_mc_issue(0);
}
+/*
+ * On resume, undo any pinning done at save, so that the rest of the
+ * kernel doesn't see any unexpected pinned pagetables.
+ */
+void xen_mm_unpin_all(void)
+{
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (PageSavePinned(page)) {
+ BUG_ON(!PagePinned(page));
+ printk("unpinning pinned %p\n", page_address(page));
+ xen_pgd_unpin((pgd_t *)page_address(page));
+ ClearPageSavePinned(page);
+ }
+ }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
spin_lock(&next->page_table_lock);
static void drop_other_mm_ref(void *info)
{
struct mm_struct *mm = info;
+ struct mm_struct *active_mm;
+
+#ifdef CONFIG_X86_64
+ active_mm = read_pda(active_mm);
+#else
+ active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
+#endif
- if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
+ if (active_mm == mm)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
}
if (!cpus_empty(mask))
- xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+ smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
}
#else
static void drop_mm_ref(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/* pgd may not be pinned in the error exit path of execve */
- if (PagePinned(virt_to_page(mm->pgd)))
+ if (page_pinned(mm->pgd))
xen_pgd_unpin(mm->pgd);
spin_unlock(&mm->page_table_lock);