]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - mm/page_alloc.c
hugetlb: pull gigantic page initialisation out of the default path
[linux-2.6-omap-h63xx.git] / mm / page_alloc.c
index 9eb9eb92828510efbc7100addea5fa766c8f48ea..54069e64e3a8a4b0c9a0450aba08b7f0fa21c5f9 100644 (file)
@@ -44,7 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 
 #include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-       void *pc = page_get_page_cgroup(page);
-
        printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
                "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
                current->comm, page, (int)(2*sizeof(unsigned long)),
                (unsigned long)page->flags, page->mapping,
                page_mapcount(page), page_count(page));
-       if (pc) {
-               printk(KERN_EMERG "cgroup:%p\n", pc);
-               page_reset_bad_cgroup(page);
-       }
+
        printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
                KERN_EMERG "Backtrace:\n");
        dump_stack();
@@ -265,6 +260,23 @@ static void free_compound_page(struct page *page)
 }
 
 void prep_compound_page(struct page *page, unsigned long order)
+{
+       int i;
+       int nr_pages = 1 << order;
+
+       set_compound_page_dtor(page, free_compound_page);
+       set_compound_order(page, order);
+       __SetPageHead(page);
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
+
+               __SetPageTail(p);
+               p->first_page = page;
+       }
+}
+
+#ifdef CONFIG_HUGETLBFS
+void prep_compound_gigantic_page(struct page *page, unsigned long order)
 {
        int i;
        int nr_pages = 1 << order;
@@ -273,19 +285,17 @@ void prep_compound_page(struct page *page, unsigned long order)
        set_compound_page_dtor(page, free_compound_page);
        set_compound_order(page, order);
        __SetPageHead(page);
-       for (i = 1; i < nr_pages; i++, p++) {
-               if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
-                       p = pfn_to_page(page_to_pfn(page) + i);
+       for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
                __SetPageTail(p);
                p->first_page = page;
        }
 }
+#endif
 
 static void destroy_compound_page(struct page *page, unsigned long order)
 {
        int i;
        int nr_pages = 1 << order;
-       struct page *p = page + 1;
 
        if (unlikely(compound_order(page) != order))
                bad_page(page);
@@ -293,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order)
        if (unlikely(!PageHead(page)))
                        bad_page(page);
        __ClearPageHead(page);
-       for (i = 1; i < nr_pages; i++, p++) {
-               if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
-                       p = pfn_to_page(page_to_pfn(page) + i);
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
 
                if (unlikely(!PageTail(p) |
                                (p->first_page != page)))
@@ -454,14 +463,16 @@ static inline void __free_one_page(struct page *page,
 
 static inline int free_pages_check(struct page *page)
 {
+       free_page_mlock(page);
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
                (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
                bad_page(page);
        if (PageDirty(page))
                __ClearPageDirty(page);
+       if (PageSwapBacked(page))
+               __ClearPageSwapBacked(page);
        /*
         * For now, we report if PG_reserved was found set, but do not
         * clear it, and do not free the page.  But we shall soon need
@@ -600,7 +611,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
                (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
                bad_page(page);
@@ -614,7 +624,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 
        page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
                        1 << PG_referenced | 1 << PG_arch_1 |
-                       1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+                       1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       | 1 << PG_mlocked
+#endif
+                       );
        set_page_private(page, 0);
        set_page_refcounted(page);
 
@@ -1862,10 +1876,21 @@ void show_free_areas(void)
                }
        }
 
-       printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+       printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+               " inactive_file:%lu"
+//TODO:  check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+               " unevictable:%lu"
+#endif
+               " dirty:%lu writeback:%lu unstable:%lu\n"
                " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
-               global_page_state(NR_ACTIVE),
-               global_page_state(NR_INACTIVE),
+               global_page_state(NR_ACTIVE_ANON),
+               global_page_state(NR_ACTIVE_FILE),
+               global_page_state(NR_INACTIVE_ANON),
+               global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+               global_page_state(NR_UNEVICTABLE),
+#endif
                global_page_state(NR_FILE_DIRTY),
                global_page_state(NR_WRITEBACK),
                global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1913,13 @@ void show_free_areas(void)
                        " min:%lukB"
                        " low:%lukB"
                        " high:%lukB"
-                       " active:%lukB"
-                       " inactive:%lukB"
+                       " active_anon:%lukB"
+                       " inactive_anon:%lukB"
+                       " active_file:%lukB"
+                       " inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       " unevictable:%lukB"
+#endif
                        " present:%lukB"
                        " pages_scanned:%lu"
                        " all_unreclaimable? %s"
@@ -1899,8 +1929,13 @@ void show_free_areas(void)
                        K(zone->pages_min),
                        K(zone->pages_low),
                        K(zone->pages_high),
-                       K(zone_page_state(zone, NR_ACTIVE)),
-                       K(zone_page_state(zone, NR_INACTIVE)),
+                       K(zone_page_state(zone, NR_ACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_INACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_ACTIVE_FILE)),
+                       K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
                        K(zone->present_pages),
                        zone->pages_scanned,
                        (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3445,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait);
        pgdat->kswapd_max_order = 0;
+       pgdat_page_cgroup_init(pgdat);
        
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, memmap_pages;
+               enum lru_list l;
 
                size = zone_spanned_pages_in_node(nid, j, zones_size);
                realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3465,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                        PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
                if (realsize >= memmap_pages) {
                        realsize -= memmap_pages;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                               "%s zone: %lu pages used for memmap\n",
+                       printk(KERN_DEBUG
+                               "  %s zone: %lu pages used for memmap\n",
                                zone_names[j], memmap_pages);
                } else
                        printk(KERN_WARNING
@@ -3439,8 +3476,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                /* Account for reserved pages */
                if (j == 0 && realsize > dma_reserve) {
                        realsize -= dma_reserve;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                                       "%s zone: %lu pages reserved\n",
+                       printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
                                        zone_names[0], dma_reserve);
                }
 
@@ -3465,10 +3501,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                zone->prev_priority = DEF_PRIORITY;
 
                zone_pcp_init(zone);
-               INIT_LIST_HEAD(&zone->active_list);
-               INIT_LIST_HEAD(&zone->inactive_list);
-               zone->nr_scan_active = 0;
-               zone->nr_scan_inactive = 0;
+               for_each_lru(l) {
+                       INIT_LIST_HEAD(&zone->lru[l].list);
+                       zone->lru[l].nr_scan = 0;
+               }
+               zone->recent_rotated[0] = 0;
+               zone->recent_rotated[1] = 0;
+               zone->recent_scanned[0] = 0;
+               zone->recent_scanned[1] = 0;
                zap_zone_vm_stats(zone);
                zone->flags = 0;
                if (!size)
@@ -4210,7 +4250,7 @@ void setup_per_zone_pages_min(void)
        for_each_zone(zone) {
                u64 tmp;
 
-               spin_lock_irqsave(&zone->lru_lock, flags);
+               spin_lock_irqsave(&zone->lock, flags);
                tmp = (u64)pages_min * zone->present_pages;
                do_div(tmp, lowmem_pages);
                if (is_highmem(zone)) {
@@ -4242,13 +4282,53 @@ void setup_per_zone_pages_min(void)
                zone->pages_low   = zone->pages_min + (tmp >> 2);
                zone->pages_high  = zone->pages_min + (tmp >> 1);
                setup_zone_migrate_reserve(zone);
-               spin_unlock_irqrestore(&zone->lru_lock, flags);
+               spin_unlock_irqrestore(&zone->lock, flags);
        }
 
        /* update totalreserve_pages */
        calculate_totalreserve_pages();
 }
 
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total     target    max
+ * memory    ratio     inactive anon
+ * -------------------------------------
+ *   10MB       1         5MB
+ *  100MB       1        50MB
+ *    1GB       3       250MB
+ *   10GB      10       0.9GB
+ *  100GB      31         3GB
+ *    1TB     101        10GB
+ *   10TB     320        32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+       struct zone *zone;
+
+       for_each_zone(zone) {
+               unsigned int gb, ratio;
+
+               /* Zone size in gigabytes */
+               gb = zone->present_pages >> (30 - PAGE_SHIFT);
+               ratio = int_sqrt(10 * gb);
+               if (!ratio)
+                       ratio = 1;
+
+               zone->inactive_ratio = ratio;
+       }
+}
+
 /*
  * Initialise min_free_kbytes.
  *
@@ -4286,6 +4366,7 @@ static int __init init_per_zone_pages_min(void)
                min_free_kbytes = 65536;
        setup_per_zone_pages_min();
        setup_per_zone_lowmem_reserve();
+       setup_per_zone_inactive_ratio();
        return 0;
 }
 module_init(init_per_zone_pages_min)