Merge branch 'linus' into x86/pat2

author Ingo Molnar <mingo@elte.hu>

Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)
author Ingo Molnar <mingo@elte.hu>
Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)
diff --combined arch/x86/mm/init_32.c

index 74780800e7e7a027c51e30d5e2458f4ec02b89bc,6b9a9358b3308e9fc4972275b60f7e3b35b12d63..c3789bb193087d99c2946700d14c2474845528c4
--- 1/arch/x86/mm/init_32.c
--- 2/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@@ -47,6 -47,7 +47,7 @@@
   #include <asm/paravirt.h>
   #include <asm/setup.h>
   #include <asm/cacheflush.h>
+ #include <asm/smp.h>
   
   unsigned int __VMALLOC_RESERVE = 128 << 20;
   
@@@ -194,30 -195,11 +195,30 @@@ static void __init kernel_physical_mapp
         pgd_t *pgd;
         pmd_t *pmd;
         pte_t *pte;
- -      unsigned pages_2m = 0, pages_4k = 0;
+ +      unsigned pages_2m, pages_4k;
+ +      int mapping_iter;
+ +
+ +      /*
+ +       * First iteration will setup identity mapping using large/small pages
+ +       * based on use_pse, with other attributes same as set by
+ +       * the early code in head_32.S
+ +       *
+ +       * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
+ +       * as desired for the kernel identity mapping.
+ +       *
+ +       * This two pass mechanism conforms to the TLB app note which says:
+ +       *
+ +       *     "Software should not write to a paging-structure entry in a way
+ +       *      that would change, for any linear address, both the page size
+ +       *      and either the page frame or attributes."
+ +       */
+ +      mapping_iter = 1;
   
         if (!cpu_has_pse)
                 use_pse = 0;
   
+ +repeat:
+ +      pages_2m = pages_4k = 0;
         pfn = start_pfn;
         pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
         pgd = pgd_base + pgd_idx;
@@@ -243,13 -225,6 +244,13 @@@
                         if (use_pse) {
                                 unsigned int addr2;
                                 pgprot_t prot = PAGE_KERNEL_LARGE;
+ +                              /*
+ +                               * first pass will use the same initial
+ +                               * identity mapping attribute + _PAGE_PSE.
+ +                               */
+ +                              pgprot_t init_prot =
+ +                                      __pgprot(PTE_IDENT_ATTR |
+ +                                               _PAGE_PSE);
   
                                 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
                                         PAGE_OFFSET + PAGE_SIZE-1;
@@@ -259,10 -234,7 +260,10 @@@
                                         prot = PAGE_KERNEL_LARGE_EXEC;
   
                                 pages_2m++;
- -                              set_pmd(pmd, pfn_pmd(pfn, prot));
+ +                              if (mapping_iter == 1)
+ +                                      set_pmd(pmd, pfn_pmd(pfn, init_prot));
+ +                              else
+ +                                      set_pmd(pmd, pfn_pmd(pfn, prot));
   
                                 pfn += PTRS_PER_PTE;
                                 continue;
@@@ -274,43 -246,17 +275,43 @@@
                         for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
                              pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
                                 pgprot_t prot = PAGE_KERNEL;
+ +                              /*
+ +                               * first pass will use the same initial
+ +                               * identity mapping attribute.
+ +                               */
+ +                              pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
   
                                 if (is_kernel_text(addr))
                                         prot = PAGE_KERNEL_EXEC;
   
                                 pages_4k++;
- -                              set_pte(pte, pfn_pte(pfn, prot));
+ +                              if (mapping_iter == 1)
+ +                                      set_pte(pte, pfn_pte(pfn, init_prot));
+ +                              else
+ +                                      set_pte(pte, pfn_pte(pfn, prot));
                         }
                 }
         }
- -      update_page_count(PG_LEVEL_2M, pages_2m);
- -      update_page_count(PG_LEVEL_4K, pages_4k);
+ +      if (mapping_iter == 1) {
+ +              /*
+ +               * update direct mapping page count only in the first
+ +               * iteration.
+ +               */
+ +              update_page_count(PG_LEVEL_2M, pages_2m);
+ +              update_page_count(PG_LEVEL_4K, pages_4k);
+ +
+ +              /*
+ +               * local global flush tlb, which will flush the previous
+ +               * mappings present in both small and large page TLB's.
+ +               */
+ +              __flush_tlb_all();
+ +
+ +              /*
+ +               * Second iteration will set the actual desired PTE attributes.
+ +               */
+ +              mapping_iter = 2;
+ +              goto repeat;
+ +      }
   }
   
   /*
@@@ -513,11 -459,7 +514,7 @@@ static void __init pagetable_init(void
   {
         pgd_t *pgd_base = swapper_pg_dir;
   
-       paravirt_pagetable_setup_start(pgd_base);
- 
         permanent_kmaps_init(pgd_base);
- 
-       paravirt_pagetable_setup_done(pgd_base);
   }
   
   #ifdef CONFIG_ACPI_SLEEP
@@@ -777,7 -719,7 +774,7 @@@ void __init setup_bootmem_allocator(voi
         after_init_bootmem = 1;
   }
   
- -static void __init find_early_table_space(unsigned long end)
+ +static void __init find_early_table_space(unsigned long end, int use_pse)
   {
         unsigned long puds, pmds, ptes, tables, start;
   
@@@ -787,7 -729,7 +784,7 @@@
         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
         tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
   
- -      if (cpu_has_pse) {
+ +      if (use_pse) {
                 unsigned long extra;
   
                 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
@@@ -827,22 -769,12 +824,22 @@@ unsigned long __init_refok init_memory_
         pgd_t *pgd_base = swapper_pg_dir;
         unsigned long start_pfn, end_pfn;
         unsigned long big_page_start;
+ +#ifdef CONFIG_DEBUG_PAGEALLOC
+ +      /*
+ +       * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ +       * This will simplify cpa(), which otherwise needs to support splitting
+ +       * large pages into small in interrupt context, etc.
+ +       */
+ +      int use_pse = 0;
+ +#else
+ +      int use_pse = cpu_has_pse;
+ +#endif
   
         /*
          * Find space for the kernel direct mapping tables.
          */
         if (!after_init_bootmem)
- -              find_early_table_space(end);
+ +              find_early_table_space(end, use_pse);
   
   #ifdef CONFIG_X86_PAE
         set_nx();
@@@ -888,7 -820,7 +885,7 @@@
         end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
         if (start_pfn < end_pfn)
                 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
- -                                              cpu_has_pse);
+ +                                           use_pse);
   
         /* tail is not big page alignment ? */
         start_pfn = end_pfn;
@@@ -1051,6 -983,7 +1048,6 @@@ void __init mem_init(void
         if (boot_cpu_data.wp_works_ok < 0)
                 test_wp_bit();
   
- -      cpa_init();
         save_pg_dir();
         zap_low_mappings();
   }
diff --combined arch/x86/mm/init_64.c

index 8c7eae490a2cb384af9a4d5b239bb0c7274d581f,770536ebf7e95c6360629d1931b910b4fc2a42db..fb30486c82f7f7c21e036feebb9cb58f957d8a36
--- 1/arch/x86/mm/init_64.c
--- 2/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@@ -225,7 -225,7 +225,7 @@@ void __init init_extra_mapping_uc(unsig
   void __init cleanup_highmap(void)
   {
         unsigned long vaddr = __START_KERNEL_map;
-       unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+       unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
         pmd_t *pmd = level2_kernel_pgt;
         pmd_t *last_pmd = pmd + PTRS_PER_PMD;
   
@@@ -271,8 -271,7 +271,8 @@@ static __ref void unmap_low_page(void *
   }
   
   static unsigned long __meminit
- -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
+ +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+ +            pgprot_t prot)
   {
         unsigned pages = 0;
         unsigned long last_map_addr = end;
@@@ -290,43 -289,36 +290,43 @@@
                         break;
                 }
   
+ +              /*
+ +               * We will re-use the existing mapping.
+ +               * Xen for example has some special requirements, like mapping
+ +               * pagetable pages as RO. So assume someone who pre-setup
+ +               * these mappings are more intelligent.
+ +               */
                 if (pte_val(*pte))
                         continue;
   
                 if (0)
                         printk("   pte=%p addr=%lx pte=%016lx\n",
                                pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
- -              set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
- -              last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
                 pages++;
+ +              set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
+ +              last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
         }
+ +
         update_page_count(PG_LEVEL_4K, pages);
   
         return last_map_addr;
   }
   
   static unsigned long __meminit
- -phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
+ +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
+ +              pgprot_t prot)
   {
         pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
   
- -      return phys_pte_init(pte, address, end);
+ +      return phys_pte_init(pte, address, end, prot);
   }
   
   static unsigned long __meminit
   phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
- -                       unsigned long page_size_mask)
+ +            unsigned long page_size_mask, pgprot_t prot)
   {
         unsigned long pages = 0;
         unsigned long last_map_addr = end;
- -      unsigned long start = address;
   
         int i = pmd_index(address);
   
@@@ -334,7 -326,6 +334,7 @@@
                 unsigned long pte_phys;
                 pmd_t *pmd = pmd_page + pmd_index(address);
                 pte_t *pte;
+ +              pgprot_t new_prot = prot;
   
                 if (address >= end) {
                         if (!after_bootmem) {
@@@ -348,40 -339,27 +348,40 @@@
                         if (!pmd_large(*pmd)) {
                                 spin_lock(&init_mm.page_table_lock);
                                 last_map_addr = phys_pte_update(pmd, address,
- -                                                              end);
+ +                                                              end, prot);
                                 spin_unlock(&init_mm.page_table_lock);
+ +                              continue;
                         }
- -                      /* Count entries we're using from level2_ident_pgt */
- -                      if (start == 0)
- -                              pages++;
- -                      continue;
+ +                      /*
+ +                       * If we are ok with PG_LEVEL_2M mapping, then we will
+ +                       * use the existing mapping,
+ +                       *
+ +                       * Otherwise, we will split the large page mapping but
+ +                       * use the same existing protection bits except for
+ +                       * large page, so that we don't violate Intel's TLB
+ +                       * Application note (317080) which says, while changing
+ +                       * the page sizes, new and old translations should
+ +                       * not differ with respect to page frame and
+ +                       * attributes.
+ +                       */
+ +                      if (page_size_mask & (1 << PG_LEVEL_2M))
+ +                              continue;
+ +                      new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
                 }
   
                 if (page_size_mask & (1<<PG_LEVEL_2M)) {
                         pages++;
                         spin_lock(&init_mm.page_table_lock);
                         set_pte((pte_t *)pmd,
- -                              pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ +                              pfn_pte(address >> PAGE_SHIFT,
+ +                                      __pgprot(pgprot_val(prot) | _PAGE_PSE)));
                         spin_unlock(&init_mm.page_table_lock);
                         last_map_addr = (address & PMD_MASK) + PMD_SIZE;
                         continue;
                 }
   
                 pte = alloc_low_page(&pte_phys);
- -              last_map_addr = phys_pte_init(pte, address, end);
+ +              last_map_addr = phys_pte_init(pte, address, end, new_prot);
                 unmap_low_page(pte);
   
                 spin_lock(&init_mm.page_table_lock);
@@@ -394,12 -372,12 +394,12 @@@
   
   static unsigned long __meminit
   phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- -                       unsigned long page_size_mask)
+ +              unsigned long page_size_mask, pgprot_t prot)
   {
         pmd_t *pmd = pmd_offset(pud, 0);
         unsigned long last_map_addr;
   
- -      last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
+ +      last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
         __flush_tlb_all();
         return last_map_addr;
   }
@@@ -416,7 -394,6 +416,7 @@@ phys_pud_init(pud_t *pud_page, unsigne
                 unsigned long pmd_phys;
                 pud_t *pud = pud_page + pud_index(addr);
                 pmd_t *pmd;
+ +              pgprot_t prot = PAGE_KERNEL;
   
                 if (addr >= end)
                         break;
@@@ -428,26 -405,10 +428,26 @@@
                 }
   
                 if (pud_val(*pud)) {
- -                      if (!pud_large(*pud))
+ +                      if (!pud_large(*pud)) {
                                 last_map_addr = phys_pmd_update(pud, addr, end,
- -                                                       page_size_mask);
- -                      continue;
+ +                                                       page_size_mask, prot);
+ +                              continue;
+ +                      }
+ +                      /*
+ +                       * If we are ok with PG_LEVEL_1G mapping, then we will
+ +                       * use the existing mapping.
+ +                       *
+ +                       * Otherwise, we will split the gbpage mapping but use
+ +                       * the same existing protection  bits except for large
+ +                       * page, so that we don't violate Intel's TLB
+ +                       * Application note (317080) which says, while changing
+ +                       * the page sizes, new and old translations should
+ +                       * not differ with respect to page frame and
+ +                       * attributes.
+ +                       */
+ +                      if (page_size_mask & (1 << PG_LEVEL_1G))
+ +                              continue;
+ +                      prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
                 }
   
                 if (page_size_mask & (1<<PG_LEVEL_1G)) {
@@@ -461,8 -422,7 +461,8 @@@
                 }
   
                 pmd = alloc_low_page(&pmd_phys);
- -              last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
+ +              last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
+ +                                            prot);
                 unmap_low_page(pmd);
   
                 spin_lock(&init_mm.page_table_lock);
@@@ -470,7 -430,6 +470,7 @@@
                 spin_unlock(&init_mm.page_table_lock);
         }
         __flush_tlb_all();
+ +
         update_page_count(PG_LEVEL_1G, pages);
   
         return last_map_addr;
@@@ -487,28 -446,27 +487,28 @@@ phys_pud_update(pgd_t *pgd, unsigned lo
         return phys_pud_init(pud, addr, end, page_size_mask);
   }
   
- -static void __init find_early_table_space(unsigned long end)
+ +static void __init find_early_table_space(unsigned long end, int use_pse,
+ +                                        int use_gbpages)
   {
         unsigned long puds, pmds, ptes, tables, start;
   
         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-       tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+       tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
- -      if (direct_gbpages) {
+ +      if (use_gbpages) {
                 unsigned long extra;
                 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
                 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
         } else
                 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-       tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+       tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
   
- -      if (cpu_has_pse) {
+ +      if (use_pse) {
                 unsigned long extra;
                 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
                 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
         } else
                 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+       tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
   
         /*
          * RED-PEN putting page tables only on node 0 could
@@@ -570,7 -528,6 +570,7 @@@ static unsigned long __init kernel_phys
                 pgd_populate(&init_mm, pgd, __va(pud_phys));
                 spin_unlock(&init_mm.page_table_lock);
         }
+ +      __flush_tlb_all();
   
         return last_map_addr;
   }
@@@ -614,7 -571,6 +614,7 @@@ unsigned long __init_refok init_memory_
   
         struct map_range mr[NR_RANGE_MR];
         int nr_range, i;
+ +      int use_pse, use_gbpages;
   
         printk(KERN_INFO "init_memory_mapping\n");
   
@@@ -628,21 -584,9 +628,21 @@@
         if (!after_bootmem)
                 init_gbpages();
   
- -      if (direct_gbpages)
+ +#ifdef CONFIG_DEBUG_PAGEALLOC
+ +      /*
+ +       * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ +       * This will simplify cpa(), which otherwise needs to support splitting
+ +       * large pages into small in interrupt context, etc.
+ +       */
+ +      use_pse = use_gbpages = 0;
+ +#else
+ +      use_pse = cpu_has_pse;
+ +      use_gbpages = direct_gbpages;
+ +#endif
+ +
+ +      if (use_gbpages)
                 page_size_mask |= 1 << PG_LEVEL_1G;
- -      if (cpu_has_pse)
+ +      if (use_pse)
                 page_size_mask |= 1 << PG_LEVEL_2M;
   
         memset(mr, 0, sizeof(mr));
@@@ -703,7 -647,7 +703,7 @@@
                          (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
   
         if (!after_bootmem)
- -              find_early_table_space(end);
+ +              find_early_table_space(end, use_pse, use_gbpages);
   
         for (i = 0; i < nr_range; i++)
                 last_map_addr = kernel_physical_mapping_init(
@@@ -862,6 -806,8 +862,6 @@@ void __init mem_init(void
                 reservedpages << (PAGE_SHIFT-10),
                 datasize >> 10,
                 initsize >> 10);
- -
- -      cpa_init();
   }
   
   void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --combined arch/x86/mm/ioremap.c

index d03c461e045e7402135c2bb086e6e5d040362f3f,cac6da54203bfdee878d61c2debb71104ce4f0b6..6ab3196d12b412979cbfba55467968b513670a49
--- 1/arch/x86/mm/ioremap.c
--- 2/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@@ -83,25 -83,6 +83,25 @@@ int page_is_ram(unsigned long pagenr
         return 0;
   }
   
+ +int pagerange_is_ram(unsigned long start, unsigned long end)
+ +{
+ +      int ram_page = 0, not_rampage = 0;
+ +      unsigned long page_nr;
+ +
+ +      for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+ +           ++page_nr) {
+ +              if (page_is_ram(page_nr))
+ +                      ram_page = 1;
+ +              else
+ +                      not_rampage = 1;
+ +
+ +              if (ram_page == not_rampage)
+ +                      return -1;
+ +      }
+ +
+ +      return ram_page;
+ +}
+ +
   /*
    * Fix up the linear direct mapping of the kernel to avoid cache attribute
    * conflicts.
@@@ -440,7 -421,7 +440,7 @@@ void unxlate_dev_mem_ptr(unsigned long 
         return;
   }
   
- int __initdata early_ioremap_debug;
+ static int __initdata early_ioremap_debug;
   
   static int __init early_ioremap_debug_setup(char *str)
   {
@@@ -566,7 -547,7 +566,7 @@@ static inline void __init early_clear_f
   }
   
   
- int __initdata early_ioremap_nested;
+ static int __initdata early_ioremap_nested;
   
   static int __init check_early_ioremap_leak(void)
   {
diff --combined arch/x86/mm/pageattr.c

index b6374d653d06d01a5bfe347f8f502aeee95b7e44,898fad617abe3dd25847f0a86348d0c8d1df5150..a9ec89c3fbca32c3fda0e65d8da8846c8f96ed2a
--- 1/arch/x86/mm/pageattr.c
--- 2/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@@ -25,27 -25,15 +25,27 @@@
    * The current flushing context - we pass it instead of 5 arguments:
    */
   struct cpa_data {
- -      unsigned long   vaddr;
+ +      unsigned long   *vaddr;
         pgprot_t        mask_set;
         pgprot_t        mask_clr;
         int             numpages;
- -      int             flushtlb;
+ +      int             flags;
         unsigned long   pfn;
         unsigned        force_split : 1;
+ +      int             curpage;
   };
   
+ +/*
+ + * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
+ + * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
+ + * entries change the page attribute in parallel to some other cpu
+ + * splitting a large page entry along with changing the attribute.
+ + */
+ +static DEFINE_SPINLOCK(cpa_lock);
+ +
+ +#define CPA_FLUSHTLB 1
+ +#define CPA_ARRAY 2
+ +
   #ifdef CONFIG_PROC_FS
   static unsigned long direct_pages_count[PG_LEVEL_NUM];
   
@@@ -96,7 -84,7 +96,7 @@@ static inline unsigned long highmap_sta
   
   static inline unsigned long highmap_end_pfn(void)
   {
-       return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+       return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
   }
   
   #endif
@@@ -202,41 -190,6 +202,41 @@@ static void cpa_flush_range(unsigned lo
         }
   }
   
+ +static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+ +{
+ +      unsigned int i, level;
+ +      unsigned long *addr;
+ +
+ +      BUG_ON(irqs_disabled());
+ +
+ +      on_each_cpu(__cpa_flush_range, NULL, 1);
+ +
+ +      if (!cache)
+ +              return;
+ +
+ +      /* 4M threshold */
+ +      if (numpages >= 1024) {
+ +              if (boot_cpu_data.x86_model >= 4)
+ +                      wbinvd();
+ +              return;
+ +      }
+ +      /*
+ +       * We only need to flush on one CPU,
+ +       * clflush is a MESI-coherent instruction that
+ +       * will cause all other CPUs to flush the same
+ +       * cachelines:
+ +       */
+ +      for (i = 0, addr = start; i < numpages; i++, addr++) {
+ +              pte_t *pte = lookup_address(*addr, &level);
+ +
+ +              /*
+ +               * Only flush present addresses:
+ +               */
+ +              if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+ +                      clflush_cache_range((void *) *addr, PAGE_SIZE);
+ +      }
+ +}
+ +
   /*
    * Certain areas of memory on x86 require very specific protection flags,
    * for example the BIOS area or kernel text. Callers don't always get this
@@@ -445,7 -398,7 +445,7 @@@ try_preserve_large_page(pte_t *kpte, un
                  */
                 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
                 __set_pmd_pte(kpte, address, new_pte);
- -              cpa->flushtlb = 1;
+ +              cpa->flags |= CPA_FLUSHTLB;
                 do_split = 0;
         }
   
@@@ -455,6 -408,84 +455,6 @@@ out_unlock
         return do_split;
   }
   
- -static LIST_HEAD(page_pool);
- -static unsigned long pool_size, pool_pages, pool_low;
- -static unsigned long pool_used, pool_failed;
- -
- -static void cpa_fill_pool(struct page **ret)
- -{
- -      gfp_t gfp = GFP_KERNEL;
- -      unsigned long flags;
- -      struct page *p;
- -
- -      /*
- -       * Avoid recursion (on debug-pagealloc) and also signal
- -       * our priority to get to these pagetables:
- -       */
- -      if (current->flags & PF_MEMALLOC)
- -              return;
- -      current->flags |= PF_MEMALLOC;
- -
- -      /*
- -       * Allocate atomically from atomic contexts:
- -       */
- -      if (in_atomic() || irqs_disabled() || debug_pagealloc)
- -              gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- -
- -      while (pool_pages < pool_size || (ret && !*ret)) {
- -              p = alloc_pages(gfp, 0);
- -              if (!p) {
- -                      pool_failed++;
- -                      break;
- -              }
- -              /*
- -               * If the call site needs a page right now, provide it:
- -               */
- -              if (ret && !*ret) {
- -                      *ret = p;
- -                      continue;
- -              }
- -              spin_lock_irqsave(&pgd_lock, flags);
- -              list_add(&p->lru, &page_pool);
- -              pool_pages++;
- -              spin_unlock_irqrestore(&pgd_lock, flags);
- -      }
- -
- -      current->flags &= ~PF_MEMALLOC;
- -}
- -
- -#define SHIFT_MB              (20 - PAGE_SHIFT)
- -#define ROUND_MB_GB           ((1 << 10) - 1)
- -#define SHIFT_MB_GB           10
- -#define POOL_PAGES_PER_GB     16
- -
- -void __init cpa_init(void)
- -{
- -      struct sysinfo si;
- -      unsigned long gb;
- -
- -      si_meminfo(&si);
- -      /*
- -       * Calculate the number of pool pages:
- -       *
- -       * Convert totalram (nr of pages) to MiB and round to the next
- -       * GiB. Shift MiB to Gib and multiply the result by
- -       * POOL_PAGES_PER_GB:
- -       */
- -      if (debug_pagealloc) {
- -              gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- -              pool_size = POOL_PAGES_PER_GB * gb;
- -      } else {
- -              pool_size = 1;
- -      }
- -      pool_low = pool_size;
- -
- -      cpa_fill_pool(NULL);
- -      printk(KERN_DEBUG
- -             "CPA: page pool initialized %lu of %lu pages preallocated\n",
- -             pool_pages, pool_size);
- -}
- -
   static int split_large_page(pte_t *kpte, unsigned long address)
   {
         unsigned long flags, pfn, pfninc = 1;
@@@ -463,15 -494,28 +463,15 @@@
         pgprot_t ref_prot;
         struct page *base;
   
- -      /*
- -       * Get a page from the pool. The pool list is protected by the
- -       * pgd_lock, which we have to take anyway for the split
- -       * operation:
- -       */
- -      spin_lock_irqsave(&pgd_lock, flags);
- -      if (list_empty(&page_pool)) {
- -              spin_unlock_irqrestore(&pgd_lock, flags);
- -              base = NULL;
- -              cpa_fill_pool(&base);
- -              if (!base)
- -                      return -ENOMEM;
- -              spin_lock_irqsave(&pgd_lock, flags);
- -      } else {
- -              base = list_first_entry(&page_pool, struct page, lru);
- -              list_del(&base->lru);
- -              pool_pages--;
- -
- -              if (pool_pages < pool_low)
- -                      pool_low = pool_pages;
- -      }
+ +      if (!debug_pagealloc)
+ +              spin_unlock(&cpa_lock);
+ +      base = alloc_pages(GFP_KERNEL, 0);
+ +      if (!debug_pagealloc)
+ +              spin_lock(&cpa_lock);
+ +      if (!base)
+ +              return -ENOMEM;
   
+ +      spin_lock_irqsave(&pgd_lock, flags);
         /*
          * Check for races, another CPU might have split this page
          * up for us already:
@@@ -528,8 -572,11 +528,8 @@@ out_unlock
          * If we dropped out via the lookup_address check under
          * pgd_lock then stick the page back into the pool:
          */
- -      if (base) {
- -              list_add(&base->lru, &page_pool);
- -              pool_pages++;
- -      } else
- -              pool_used++;
+ +      if (base)
+ +              __free_page(base);
         spin_unlock_irqrestore(&pgd_lock, flags);
   
         return 0;
@@@ -537,16 -584,11 +537,16 @@@
   
   static int __change_page_attr(struct cpa_data *cpa, int primary)
   {
- -      unsigned long address = cpa->vaddr;
+ +      unsigned long address;
         int do_split, err;
         unsigned int level;
         pte_t *kpte, old_pte;
   
+ +      if (cpa->flags & CPA_ARRAY)
+ +              address = cpa->vaddr[cpa->curpage];
+ +      else
+ +              address = *cpa->vaddr;
+ +
   repeat:
         kpte = lookup_address(address, &level);
         if (!kpte)
@@@ -558,7 -600,7 +558,7 @@@
                         return 0;
                 WARN(1, KERN_WARNING "CPA: called for zero pte. "
                        "vaddr = %lx cpa->vaddr = %lx\n", address,
- -                     cpa->vaddr);
+ +                     *cpa->vaddr);
                 return -EINVAL;
         }
   
@@@ -584,7 -626,7 +584,7 @@@
                  */
                 if (pte_val(old_pte) != pte_val(new_pte)) {
                         set_pte_atomic(kpte, new_pte);
- -                      cpa->flushtlb = 1;
+ +                      cpa->flags |= CPA_FLUSHTLB;
                 }
                 cpa->numpages = 1;
                 return 0;
@@@ -608,25 -650,7 +608,25 @@@
          */
         err = split_large_page(kpte, address);
         if (!err) {
- -              cpa->flushtlb = 1;
+ +              /*
+ +               * Do a global flush tlb after splitting the large page
+ +               * and before we do the actual change page attribute in the PTE.
+ +               *
+ +               * With out this, we violate the TLB application note, that says
+ +               * "The TLBs may contain both ordinary and large-page
+ +               *  translations for a 4-KByte range of linear addresses. This
+ +               *  may occur if software modifies the paging structures so that
+ +               *  the page size used for the address range changes. If the two
+ +               *  translations differ with respect to page frame or attributes
+ +               *  (e.g., permissions), processor behavior is undefined and may
+ +               *  be implementation-specific."
+ +               *
+ +               * We do this global tlb flush inside the cpa_lock, so that we
+ +               * don't allow any other cpu, with stale tlb entries change the
+ +               * page attribute in parallel, that also falls into the
+ +               * just split large page entry.
+ +               */
+ +              flush_tlb_all();
                 goto repeat;
         }
   
@@@ -639,7 -663,6 +639,7 @@@ static int cpa_process_alias(struct cpa
   {
         struct cpa_data alias_cpa;
         int ret = 0;
+ +      unsigned long temp_cpa_vaddr, vaddr;
   
         if (cpa->pfn >= max_pfn_mapped)
                 return 0;
@@@ -652,24 -675,16 +652,24 @@@
          * No need to redo, when the primary call touched the direct
          * mapping already:
          */
- -      if (!(within(cpa->vaddr, PAGE_OFFSET,
+ +      if (cpa->flags & CPA_ARRAY)
+ +              vaddr = cpa->vaddr[cpa->curpage];
+ +      else
+ +              vaddr = *cpa->vaddr;
+ +
+ +      if (!(within(vaddr, PAGE_OFFSET,
                     PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
   #ifdef CONFIG_X86_64
- -              || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+ +              || within(vaddr, PAGE_OFFSET + (1UL<<32),
                     PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
   #endif
         )) {
   
                 alias_cpa = *cpa;
- -              alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+ +              temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+ +              alias_cpa.vaddr = &temp_cpa_vaddr;
+ +              alias_cpa.flags &= ~CPA_ARRAY;
+ +
   
                 ret = __change_page_attr_set_clr(&alias_cpa, 0);
         }
@@@ -681,7 -696,7 +681,7 @@@
          * No need to redo, when the primary call touched the high
          * mapping already:
          */
- -      if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
+ +      if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
                 return 0;
   
         /*
@@@ -692,9 -707,8 +692,9 @@@
                 return 0;
   
         alias_cpa = *cpa;
- -      alias_cpa.vaddr =
- -              (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+ +      temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+ +      alias_cpa.vaddr = &temp_cpa_vaddr;
+ +      alias_cpa.flags &= ~CPA_ARRAY;
   
         /*
          * The high mapping range is imprecise, so ignore the return value.
@@@ -714,15 -728,8 +714,15 @@@ static int __change_page_attr_set_clr(s
                  * preservation check.
                  */
                 cpa->numpages = numpages;
+ +              /* for array changes, we can't use large page */
+ +              if (cpa->flags & CPA_ARRAY)
+ +                      cpa->numpages = 1;
   
+ +              if (!debug_pagealloc)
+ +                      spin_lock(&cpa_lock);
                 ret = __change_page_attr(cpa, checkalias);
+ +              if (!debug_pagealloc)
+ +                      spin_unlock(&cpa_lock);
                 if (ret)
                         return ret;
   
@@@ -739,11 -746,7 +739,11 @@@
                  */
                 BUG_ON(cpa->numpages > numpages);
                 numpages -= cpa->numpages;
- -              cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ +              if (cpa->flags & CPA_ARRAY)
+ +                      cpa->curpage++;
+ +              else
+ +                      *cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ +
         }
         return 0;
   }
@@@ -754,9 -757,9 +754,9 @@@ static inline int cache_attr(pgprot_t a
                 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
   }
   
- -static int change_page_attr_set_clr(unsigned long addr, int numpages,
+ +static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                     pgprot_t mask_set, pgprot_t mask_clr,
- -                                  int force_split)
+ +                                  int force_split, int array)
   {
         struct cpa_data cpa;
         int ret, cache, checkalias;
@@@ -771,38 -774,21 +771,38 @@@
                 return 0;
   
         /* Ensure we are PAGE_SIZE aligned */
- -      if (addr & ~PAGE_MASK) {
- -              addr &= PAGE_MASK;
- -              /*
- -               * People should not be passing in unaligned addresses:
- -               */
- -              WARN_ON_ONCE(1);
+ +      if (!array) {
+ +              if (*addr & ~PAGE_MASK) {
+ +                      *addr &= PAGE_MASK;
+ +                      /*
+ +                       * People should not be passing in unaligned addresses:
+ +                       */
+ +                      WARN_ON_ONCE(1);
+ +              }
+ +      } else {
+ +              int i;
+ +              for (i = 0; i < numpages; i++) {
+ +                      if (addr[i] & ~PAGE_MASK) {
+ +                              addr[i] &= PAGE_MASK;
+ +                              WARN_ON_ONCE(1);
+ +                      }
+ +              }
         }
   
+ +      /* Must avoid aliasing mappings in the highmem code */
+ +      kmap_flush_unused();
+ +
         cpa.vaddr = addr;
         cpa.numpages = numpages;
         cpa.mask_set = mask_set;
         cpa.mask_clr = mask_clr;
- -      cpa.flushtlb = 0;
+ +      cpa.flags = 0;
+ +      cpa.curpage = 0;
         cpa.force_split = force_split;
   
+ +      if (array)
+ +              cpa.flags |= CPA_ARRAY;
+ +
         /* No alias checking for _NX bit modifications */
         checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
   
@@@ -811,7 -797,7 +811,7 @@@
         /*
          * Check whether we really changed something:
          */
- -      if (!cpa.flushtlb)
+ +      if (!(cpa.flags & CPA_FLUSHTLB))
                 goto out;
   
         /*
@@@ -826,30 -812,27 +826,30 @@@
          * error case we fall back to cpa_flush_all (which uses
          * wbindv):
          */
- -      if (!ret && cpu_has_clflush)
- -              cpa_flush_range(addr, numpages, cache);
- -      else
+ +      if (!ret && cpu_has_clflush) {
+ +              if (cpa.flags & CPA_ARRAY)
+ +                      cpa_flush_array(addr, numpages, cache);
+ +              else
+ +                      cpa_flush_range(*addr, numpages, cache);
+ +      } else
                 cpa_flush_all(cache);
   
   out:
- -      cpa_fill_pool(NULL);
- -
         return ret;
   }
   
- -static inline int change_page_attr_set(unsigned long addr, int numpages,
- -                                     pgprot_t mask)
+ +static inline int change_page_attr_set(unsigned long *addr, int numpages,
+ +                                     pgprot_t mask, int array)
   {
- -      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+ +      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+ +              array);
   }
   
- -static inline int change_page_attr_clear(unsigned long addr, int numpages,
- -                                       pgprot_t mask)
+ +static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+ +                                       pgprot_t mask, int array)
   {
- -      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+ +      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+ +              array);
   }
   
   int _set_memory_uc(unsigned long addr, int numpages)
@@@ -857,8 -840,8 +857,8 @@@
         /*
          * for now UC MINUS. see comments in ioremap_nocache()
          */
- -      return change_page_attr_set(addr, numpages,
- -                                  __pgprot(_PAGE_CACHE_UC_MINUS));
+ +      return change_page_attr_set(&addr, numpages,
+ +                                  __pgprot(_PAGE_CACHE_UC_MINUS), 0);
   }
   
   int set_memory_uc(unsigned long addr, int numpages)
@@@ -874,48 -857,10 +874,48 @@@
   }
   EXPORT_SYMBOL(set_memory_uc);
   
+ +int set_memory_array_uc(unsigned long *addr, int addrinarray)
+ +{
+ +      unsigned long start;
+ +      unsigned long end;
+ +      int i;
+ +      /*
+ +       * for now UC MINUS. see comments in ioremap_nocache()
+ +       */
+ +      for (i = 0; i < addrinarray; i++) {
+ +              start = __pa(addr[i]);
+ +              for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ +                      if (end != __pa(addr[i + 1]))
+ +                              break;
+ +                      i++;
+ +              }
+ +              if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
+ +                      goto out;
+ +      }
+ +
+ +      return change_page_attr_set(addr, addrinarray,
+ +                                  __pgprot(_PAGE_CACHE_UC_MINUS), 1);
+ +out:
+ +      for (i = 0; i < addrinarray; i++) {
+ +              unsigned long tmp = __pa(addr[i]);
+ +
+ +              if (tmp == start)
+ +                      break;
+ +              for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ +                      if (end != __pa(addr[i + 1]))
+ +                              break;
+ +                      i++;
+ +              }
+ +              free_memtype(tmp, end);
+ +      }
+ +      return -EINVAL;
+ +}
+ +EXPORT_SYMBOL(set_memory_array_uc);
+ +
   int _set_memory_wc(unsigned long addr, int numpages)
   {
- -      return change_page_attr_set(addr, numpages,
- -                                  __pgprot(_PAGE_CACHE_WC));
+ +      return change_page_attr_set(&addr, numpages,
+ +                                  __pgprot(_PAGE_CACHE_WC), 0);
   }
   
   int set_memory_wc(unsigned long addr, int numpages)
@@@ -933,8 -878,8 +933,8 @@@ EXPORT_SYMBOL(set_memory_wc)
   
   int _set_memory_wb(unsigned long addr, int numpages)
   {
- -      return change_page_attr_clear(addr, numpages,
- -                                    __pgprot(_PAGE_CACHE_MASK));
+ +      return change_page_attr_clear(&addr, numpages,
+ +                                    __pgprot(_PAGE_CACHE_MASK), 0);
   }
   
   int set_memory_wb(unsigned long addr, int numpages)
@@@ -945,57 -890,39 +945,59 @@@
   }
   EXPORT_SYMBOL(set_memory_wb);
   
+ +int set_memory_array_wb(unsigned long *addr, int addrinarray)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < addrinarray; i++) {
+ +              unsigned long start = __pa(addr[i]);
+ +              unsigned long end;
+ +
+ +              for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ +                      if (end != __pa(addr[i + 1]))
+ +                              break;
+ +                      i++;
+ +              }
+ +              free_memtype(start, end);
+ +      }
+ +      return change_page_attr_clear(addr, addrinarray,
+ +                                    __pgprot(_PAGE_CACHE_MASK), 1);
+ +}
+ +EXPORT_SYMBOL(set_memory_array_wb);
+ +
   int set_memory_x(unsigned long addr, int numpages)
   {
- -      return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
+ +      return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
   }
   EXPORT_SYMBOL(set_memory_x);
   
   int set_memory_nx(unsigned long addr, int numpages)
   {
- -      return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
+ +      return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
   }
   EXPORT_SYMBOL(set_memory_nx);
   
   int set_memory_ro(unsigned long addr, int numpages)
   {
- -      return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
+ +      return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
   }
+ EXPORT_SYMBOL_GPL(set_memory_ro);
   
   int set_memory_rw(unsigned long addr, int numpages)
   {
- -      return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+ +      return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
   }
+ EXPORT_SYMBOL_GPL(set_memory_rw);
   
   int set_memory_np(unsigned long addr, int numpages)
   {
- -      return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+ +      return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
   }
   
   int set_memory_4k(unsigned long addr, int numpages)
   {
- -      return change_page_attr_set_clr(addr, numpages, __pgprot(0),
- -                                      __pgprot(0), 1);
+ +      return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+ +                                      __pgprot(0), 1, 0);
   }
   
   int set_pages_uc(struct page *page, int numpages)
@@@ -1048,38 -975,22 +1050,38 @@@ int set_pages_rw(struct page *page, in
   
   static int __set_pages_p(struct page *page, int numpages)
   {
- -      struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ +      unsigned long tempaddr = (unsigned long) page_address(page);
+ +      struct cpa_data cpa = { .vaddr = &tempaddr,
                                 .numpages = numpages,
                                 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
- -                              .mask_clr = __pgprot(0)};
+ +                              .mask_clr = __pgprot(0),
+ +                              .flags = 0};
   
- -      return __change_page_attr_set_clr(&cpa, 1);
+ +      /*
+ +       * No alias checking needed for setting present flag. otherwise,
+ +       * we may need to break large pages for 64-bit kernel text
+ +       * mappings (this adds to complexity if we want to do this from
+ +       * atomic context especially). Let's keep it simple!
+ +       */
+ +      return __change_page_attr_set_clr(&cpa, 0);
   }
   
   static int __set_pages_np(struct page *page, int numpages)
   {
- -      struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ +      unsigned long tempaddr = (unsigned long) page_address(page);
+ +      struct cpa_data cpa = { .vaddr = &tempaddr,
                                 .numpages = numpages,
                                 .mask_set = __pgprot(0),
- -                              .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
+ +                              .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ +                              .flags = 0};
   
- -      return __change_page_attr_set_clr(&cpa, 1);
+ +      /*
+ +       * No alias checking needed for setting not present flag. otherwise,
+ +       * we may need to break large pages for 64-bit kernel text
+ +       * mappings (this adds to complexity if we want to do this from
+ +       * atomic context especially). Let's keep it simple!
+ +       */
+ +      return __change_page_attr_set_clr(&cpa, 0);
   }
   
   void kernel_map_pages(struct page *page, int numpages, int enable)
@@@ -1099,8 -1010,11 +1101,8 @@@
   
         /*
          * The return value is ignored as the calls cannot fail.
- -       * Large pages are kept enabled at boot time, and are
- -       * split up quickly with DEBUG_PAGEALLOC. If a splitup
- -       * fails here (due to temporary memory shortage) no damage
- -       * is done because we just keep the largepage intact up
- -       * to the next attempt when it will likely be split up:
+ +       * Large pages for identity mappings are not used at boot time
+ +       * and hence no memory allocations during large page split.
          */
         if (enable)
                 __set_pages_p(page, numpages);
@@@ -1112,8 -1026,53 +1114,8 @@@
          * but that can deadlock->flush only current cpu:
          */
         __flush_tlb_all();
- -
- -      /*
- -       * Try to refill the page pool here. We can do this only after
- -       * the tlb flush.
- -       */
- -      cpa_fill_pool(NULL);
   }
   
- -#ifdef CONFIG_DEBUG_FS
- -static int dpa_show(struct seq_file *m, void *v)
- -{
- -      seq_puts(m, "DEBUG_PAGEALLOC\n");
- -      seq_printf(m, "pool_size     : %lu\n", pool_size);
- -      seq_printf(m, "pool_pages    : %lu\n", pool_pages);
- -      seq_printf(m, "pool_low      : %lu\n", pool_low);
- -      seq_printf(m, "pool_used     : %lu\n", pool_used);
- -      seq_printf(m, "pool_failed   : %lu\n", pool_failed);
- -
- -      return 0;
- -}
- -
- -static int dpa_open(struct inode *inode, struct file *filp)
- -{
- -      return single_open(filp, dpa_show, NULL);
- -}
- -
- -static const struct file_operations dpa_fops = {
- -      .open           = dpa_open,
- -      .read           = seq_read,
- -      .llseek         = seq_lseek,
- -      .release        = single_release,
- -};
- -
- -static int __init debug_pagealloc_proc_init(void)
- -{
- -      struct dentry *de;
- -
- -      de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
- -                               &dpa_fops);
- -      if (!de)
- -              return -ENOMEM;
- -
- -      return 0;
- -}
- -__initcall(debug_pagealloc_proc_init);
- -#endif
- -
   #ifdef CONFIG_HIBERNATION
   
   bool kernel_page_present(struct page *page)
diff --combined include/asm-x86/cacheflush.h

index 092b9b4eb00301d765aa385c3f21c74c5eb80807,59859cb28a36ce486478f147f79c7bdfa26ae5f7..68840ef1b35a01c01fa7e23e45f3b5c0415d39e8
--- 1/include/asm-x86/cacheflush.h
--- 2/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@@ -1,5 -1,5 +1,5 @@@
- #ifndef _ASM_X86_CACHEFLUSH_H
- #define _ASM_X86_CACHEFLUSH_H
+ #ifndef ASM_X86__CACHEFLUSH_H
+ #define ASM_X86__CACHEFLUSH_H
   
   /* Keep includes the same across arches.  */
   #include <linux/mm.h>
@@@ -24,8 -24,6 +24,8 @@@
   #define copy_from_user_page(vma, page, vaddr, dst, src, len)  \
         memcpy((dst), (src), (len))
   
+ +#define PG_non_WB                             PG_arch_1
+ +PAGEFLAG(NonWB, non_WB)
   
   /*
    * The set_memory_* API can be used to change various attributes of a virtual
@@@ -68,9 -66,6 +68,9 @@@ int set_memory_rw(unsigned long addr, i
   int set_memory_np(unsigned long addr, int numpages);
   int set_memory_4k(unsigned long addr, int numpages);
   
+ +int set_memory_array_uc(unsigned long *addr, int addrinarray);
+ +int set_memory_array_wb(unsigned long *addr, int addrinarray);
+ +
   /*
    * For legacy compatibility with the old APIs, a few functions
    * are provided that work on a "struct page".
@@@ -101,6 -96,8 +101,6 @@@ int set_pages_rw(struct page *page, in
   
   void clflush_cache_range(void *addr, unsigned int size);
   
- -void cpa_init(void);
- -
   #ifdef CONFIG_DEBUG_RODATA
   void mark_rodata_ro(void);
   extern const int rodata_test_data;
@@@ -115,4 -112,4 +115,4 @@@ static inline int rodata_test(void
   }
   #endif
   
- #endif
+ #endif /* ASM_X86__CACHEFLUSH_H */
diff --combined include/asm-x86/page.h

index 3407ac12ba34e6f20cfeddb3aff796f91aabe578,79544e6ffb8b8b06060ddb93988972b618e3ef02..c91574776751396207f1aa2ce903e15a82239d47
--- 1/include/asm-x86/page.h
--- 2/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@@ -1,5 -1,5 +1,5 @@@
- #ifndef _ASM_X86_PAGE_H
- #define _ASM_X86_PAGE_H
+ #ifndef ASM_X86__PAGE_H
+ #define ASM_X86__PAGE_H
   
   #include <linux/const.h>
   
@@@ -57,7 -57,6 +57,7 @@@ typedef struct { pgdval_t pgd; } pgd_t
   typedef struct { pgprotval_t pgprot; } pgprot_t;
   
   extern int page_is_ram(unsigned long pagenr);
+ +extern int pagerange_is_ram(unsigned long start, unsigned long end);
   extern int devmem_is_allowed(unsigned long pagenr);
   extern void map_devmem(unsigned long pfn, unsigned long size,
                        pgprot_t vma_prot);
@@@ -200,4 -199,4 +200,4 @@@ static inline pteval_t native_pte_flags
   #define __HAVE_ARCH_GATE_AREA 1
   
   #endif        /* __KERNEL__ */
- #endif        /* _ASM_X86_PAGE_H */
+ #endif /* ASM_X86__PAGE_H */
diff --combined include/asm-x86/pgtable.h

index bbf0f591d1b89d9414be5faeeb70484fc5c668a9,888add7b088233ab53eaf0a252f32d658de53128..ed932453ef26a3538ce3fd2c7670dfc78c18d9b9
--- 1/include/asm-x86/pgtable.h
--- 2/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@@ -1,5 -1,5 +1,5 @@@
- #ifndef _ASM_X86_PGTABLE_H
- #define _ASM_X86_PGTABLE_H
+ #ifndef ASM_X86__PGTABLE_H
+ #define ASM_X86__PGTABLE_H
   
   #define FIRST_USER_ADDRESS    0
   
@@@ -19,7 -19,6 +19,7 @@@
   #define _PAGE_BIT_UNUSED3     11
   #define _PAGE_BIT_PAT_LARGE   12      /* On 2MB or 1GB pages */
   #define _PAGE_BIT_SPECIAL     _PAGE_BIT_UNUSED1
+ +#define _PAGE_BIT_CPA_TEST    _PAGE_BIT_UNUSED1
   #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
   
   #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
@@@ -37,7 -36,6 +37,7 @@@
   #define _PAGE_PAT     (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
   #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
   #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
+ +#define _PAGE_CPA_TEST        (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
   #define __HAVE_ARCH_PTE_SPECIAL
   
   #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
@@@ -132,17 -130,6 +132,17 @@@
   #define __S110        PAGE_SHARED_EXEC
   #define __S111        PAGE_SHARED_EXEC
   
+ +/*
+ + * early identity mapping  pte attrib macros.
+ + */
+ +#ifdef CONFIG_X86_64
+ +#define __PAGE_KERNEL_IDENT_LARGE_EXEC        __PAGE_KERNEL_LARGE_EXEC
+ +#else
+ +#define PTE_IDENT_ATTR         0x003          /* PRESENT+RW */
+ +#define PDE_IDENT_ATTR         0x063          /* PRESENT+RW+DIRTY+ACCESSED */
+ +#define PGD_IDENT_ATTR         0x001          /* PRESENT (no other attributes) */
+ +#endif
+ +
   #ifndef __ASSEMBLY__
   
   /*
@@@ -199,6 -186,13 +199,13 @@@ static inline int pte_special(pte_t pte
         return pte_val(pte) & _PAGE_SPECIAL;
   }
   
+ static inline unsigned long pte_pfn(pte_t pte)
+ {
+       return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ }
+ 
+ #define pte_page(pte) pfn_to_page(pte_pfn(pte))
+ 
   static inline int pmd_large(pmd_t pte)
   {
         return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@@ -326,6 -320,8 +333,8 @@@ static inline void native_pagetable_set
   static inline void native_pagetable_setup_done(pgd_t *base) {}
   #endif
   
+ extern int arch_report_meminfo(char *page);
+ 
   #ifdef CONFIG_PARAVIRT
   #include <asm/paravirt.h>
   #else  /* !CONFIG_PARAVIRT */
@@@ -534,4 -530,4 +543,4 @@@ static inline void clone_pgd_range(pgd_
   #include <asm-generic/pgtable.h>
   #endif        /* __ASSEMBLY__ */
   
- #endif        /* _ASM_X86_PGTABLE_H */
+ #endif /* ASM_X86__PGTABLE_H */
author	Ingo Molnar <mingo@elte.hu>
	Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 10 Oct 2008 17:30:08 +0000 (19:30 +0200)
		1	2
arch/x86/mm/init_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/init_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/ioremap.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/pageattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/cacheflush.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/page.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history