]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86/mm/pageattr_32.c
x86: c_p_a() make it more robust against use of PAT bits
[linux-2.6-omap-h63xx.git] / arch / x86 / mm / pageattr_32.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5
6 #include <linux/highmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11
12 #include <asm/processor.h>
13 #include <asm/tlbflush.h>
14 #include <asm/sections.h>
15 #include <asm/uaccess.h>
16 #include <asm/pgalloc.h>
17
18 static DEFINE_SPINLOCK(cpa_lock);
19 static struct list_head df_list = LIST_HEAD_INIT(df_list);
20
21 pte_t *lookup_address(unsigned long address, int *level)
22 {
23         pgd_t *pgd = pgd_offset_k(address);
24         pud_t *pud;
25         pmd_t *pmd;
26
27         if (pgd_none(*pgd))
28                 return NULL;
29         pud = pud_offset(pgd, address);
30         if (pud_none(*pud))
31                 return NULL;
32         pmd = pmd_offset(pud, address);
33         if (pmd_none(*pmd))
34                 return NULL;
35         *level = 2;
36         if (pmd_large(*pmd))
37                 return (pte_t *)pmd;
38         *level = 3;
39
40         return pte_offset_kernel(pmd, address);
41 }
42
43 static struct page *
44 split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
45 {
46         unsigned long addr;
47         struct page *base;
48         pte_t *pbase;
49         int i;
50
51         spin_unlock_irq(&cpa_lock);
52         base = alloc_pages(GFP_KERNEL, 0);
53         spin_lock_irq(&cpa_lock);
54         if (!base)
55                 return NULL;
56
57         /*
58          * page_private is used to track the number of entries in
59          * the page table page that have non standard attributes.
60          */
61         SetPagePrivate(base);
62         page_private(base) = 0;
63
64         address = __pa(address);
65         addr = address & LARGE_PAGE_MASK;
66         pbase = (pte_t *)page_address(base);
67         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
68
69         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
70                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
71                                            addr == address ? prot : ref_prot));
72         }
73         return base;
74 }
75
76 static void cache_flush_page(struct page *p)
77 {
78         void *addr = page_address(p);
79         int i;
80
81         for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
82                 clflush(addr + i);
83 }
84
85 static void flush_kernel_map(void *arg)
86 {
87         struct list_head *lh = (struct list_head *)arg;
88         struct page *p;
89
90         /*
91          * Flush all to work around Errata in early athlons regarding
92          * large page flushing.
93          */
94         __flush_tlb_all();
95
96         /* High level code is not ready for clflush yet */
97         if (0 && cpu_has_clflush) {
98                 list_for_each_entry(p, lh, lru)
99                         cache_flush_page(p);
100         } else {
101                 if (boot_cpu_data.x86_model >= 4)
102                         wbinvd();
103         }
104 }
105
106 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
107 {
108         unsigned long flags;
109         struct page *page;
110
111         /* change init_mm */
112         set_pte_atomic(kpte, pte);
113         if (SHARED_KERNEL_PMD)
114                 return;
115
116         spin_lock_irqsave(&pgd_lock, flags);
117         for (page = pgd_list; page; page = (struct page *)page->index) {
118                 pgd_t *pgd;
119                 pud_t *pud;
120                 pmd_t *pmd;
121
122                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
123                 pud = pud_offset(pgd, address);
124                 pmd = pmd_offset(pud, address);
125                 set_pte_atomic((pte_t *)pmd, pte);
126         }
127         spin_unlock_irqrestore(&pgd_lock, flags);
128 }
129
130 /*
131  * No more special protections in this 2/4MB area - revert to a large
132  * page again.
133  */
134 static inline void revert_page(struct page *kpte_page, unsigned long address)
135 {
136         pgprot_t ref_prot;
137         pte_t *linear;
138
139         ref_prot =
140         ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
141                 ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
142
143         linear = (pte_t *)
144                 pmd_offset(pud_offset(pgd_offset_k(address), address), address);
145         set_pmd_pte(linear,  address,
146                     pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
147                             ref_prot));
148 }
149
150 static inline void save_page(struct page *kpte_page)
151 {
152         if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
153                 list_add(&kpte_page->lru, &df_list);
154 }
155
156 static int __change_page_attr(struct page *page, pgprot_t prot)
157 {
158         struct page *kpte_page;
159         unsigned long address;
160         pte_t *kpte;
161         int level;
162
163         BUG_ON(PageHighMem(page));
164         address = (unsigned long)page_address(page);
165
166         kpte = lookup_address(address, &level);
167         if (!kpte)
168                 return -EINVAL;
169
170         kpte_page = virt_to_page(kpte);
171         BUG_ON(PageLRU(kpte_page));
172         BUG_ON(PageCompound(kpte_page));
173
174         if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
175                 if (level == 3) {
176                         set_pte_atomic(kpte, mk_pte(page, prot));
177                 } else {
178                         struct page *split;
179                         pgprot_t ref_prot;
180
181                         ref_prot =
182                         ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
183                                 ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
184                         split = split_large_page(address, prot, ref_prot);
185                         if (!split)
186                                 return -ENOMEM;
187
188                         set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
189                         kpte_page = split;
190                 }
191                 page_private(kpte_page)++;
192         } else {
193                 if (level == 3) {
194                         set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
195                         BUG_ON(page_private(kpte_page) == 0);
196                         page_private(kpte_page)--;
197                 } else
198                         BUG();
199         }
200
201         /*
202          * If the pte was reserved, it means it was created at boot
203          * time (not via split_large_page) and in turn we must not
204          * replace it with a largepage.
205          */
206
207         save_page(kpte_page);
208         if (!PageReserved(kpte_page)) {
209                 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
210                         paravirt_release_pt(page_to_pfn(kpte_page));
211                         revert_page(kpte_page, address);
212                 }
213         }
214         return 0;
215 }
216
217 static inline void flush_map(struct list_head *l)
218 {
219         on_each_cpu(flush_kernel_map, l, 1, 1);
220 }
221
222 /*
223  * Change the page attributes of an page in the linear mapping.
224  *
225  * This should be used when a page is mapped with a different caching policy
226  * than write-back somewhere - some CPUs do not like it when mappings with
227  * different caching policies exist. This changes the page attributes of the
228  * in kernel linear mapping too.
229  *
230  * The caller needs to ensure that there are no conflicting mappings elsewhere.
231  * This function only deals with the kernel linear map.
232  *
233  * Caller must call global_flush_tlb() after this.
234  */
235 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
236 {
237         unsigned long flags;
238         int err = 0, i;
239
240         spin_lock_irqsave(&cpa_lock, flags);
241         for (i = 0; i < numpages; i++, page++) {
242                 err = __change_page_attr(page, prot);
243                 if (err)
244                         break;
245         }
246         spin_unlock_irqrestore(&cpa_lock, flags);
247
248         return err;
249 }
250 EXPORT_SYMBOL(change_page_attr);
251
252 void global_flush_tlb(void)
253 {
254         struct page *pg, *next;
255         struct list_head l;
256
257         BUG_ON(irqs_disabled());
258
259         spin_lock_irq(&cpa_lock);
260         list_replace_init(&df_list, &l);
261         spin_unlock_irq(&cpa_lock);
262         flush_map(&l);
263         list_for_each_entry_safe(pg, next, &l, lru) {
264                 list_del(&pg->lru);
265                 clear_bit(PG_arch_1, &pg->flags);
266                 if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
267                         continue;
268                 ClearPagePrivate(pg);
269                 __free_page(pg);
270         }
271 }
272 EXPORT_SYMBOL(global_flush_tlb);
273
274 #ifdef CONFIG_DEBUG_PAGEALLOC
275 void kernel_map_pages(struct page *page, int numpages, int enable)
276 {
277         if (PageHighMem(page))
278                 return;
279         if (!enable) {
280                 debug_check_no_locks_freed(page_address(page),
281                                            numpages * PAGE_SIZE);
282         }
283
284         /*
285          * the return value is ignored - the calls cannot fail,
286          * large pages are disabled at boot time.
287          */
288         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
289
290         /*
291          * we should perform an IPI and flush all tlbs,
292          * but that can deadlock->flush only current cpu.
293          */
294         __flush_tlb_all();
295 }
296 #endif