slub: Add kmalloc_large_node() to support kmalloc_node fallback

[linux-2.6-omap-h63xx.git] / mm / slub.c
diff --git a/mm/slub.c b/mm/slub.c

index 0a5a1001590ba67b610cc755cfdda08e3cbf6572..ecacacdce9d7050faa2598e49d1e2992dc70507d 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
  #endif
  }
  
+/* Verify that a pointer has an address that is valid within a slab page */
  static inline int check_valid_pointer(struct kmem_cache *s,
                                 struct page *page, const void *object)
  {
@@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
   *     A. Free pointer (if we cannot overwrite object on free)
   *     B. Tracking data for SLAB_STORE_USER
   *     C. Padding to reach required alignment boundary or at mininum
- *             one word if debuggin is on to be able to detect writes
+ *             one word if debugging is on to be able to detect writes
   *             before the word boundary.
   *
   *     Padding is done using 0x5a (POISON_INUSE)
@@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
          * may return off node objects because partial slabs are obtained
          * from other nodes and filled up.
          *
-        * If /sys/slab/xx/defrag_ratio is set to 100 (which makes
+        * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
          * defrag_ratio = 1000) then every (well almost) allocation will
          * first attempt to defrag slab caches on other nodes. This means
          * scanning over all nodes to look for partial slabs which may be
@@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
                          * Adding an empty slab to the partial slabs in order
                          * to avoid page allocator overhead. This slab needs
                          * to come after the other slabs with objects in
-                        * order to fill them up. That way the size of the
-                        * partial list stays small. kmem_cache_shrink can
-                        * reclaim empty slabs from the partial list.
+                        * so that the others get filled first. That way the
+                        * size of the partial list stays small.
+                        *
+                        * kmem_cache_shrink can reclaim any empty slabs from the
+                        * partial list.
                          */
                         add_partial(n, page, 1);
                         slab_unlock(page);
@@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
         if (c->freelist)
                 stat(c, DEACTIVATE_REMOTE_FREES);
         /*
-        * Merge cpu freelist into freelist. Typically we get here
+        * Merge cpu freelist into slab freelist. Typically we get here
          * because both freelists are empty. So this is unlikely
          * to occur.
          */
@@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
  
  /*
   * Flush cpu slab.
+ *
   * Called from IPI handler with interrupts disabled.
   */
  static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
   * rest of the freelist to the lockless freelist.
   *
   * And if we were unable to get a new slab from the partial slab lists then
- * we need to allocate a new slab. This is slowest path since we may sleep.
+ * we need to allocate a new slab. This is the slowest path since it involves
+ * a call to the page allocator and the setup of a new slab.
   */
  static void *__slab_alloc(struct kmem_cache *s,
                 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
@@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s,
         slab_lock(c->page);
         if (unlikely(!node_match(c, node)))
                 goto another_slab;
+
         stat(c, ALLOC_REFILL);
+
  load_freelist:
         object = c->page->freelist;
         if (unlikely(!object))
@@ -1479,7 +1486,6 @@ load_freelist:
         if (unlikely(SlabDebug(c->page)))
                 goto debug;
  
-       object = c->page->freelist;
         c->freelist = object[c->offset];
         c->page->inuse = s->objects;
         c->page->freelist = NULL;
@@ -1535,7 +1541,6 @@ new_slab:
  
         return NULL;
  debug:
-       object = c->page->freelist;
         if (!alloc_debug_processing(s, c->page, object, addr))
                 goto another_slab;
  
@@ -1616,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  
         if (unlikely(SlabDebug(page)))
                 goto debug;
+
  checks_ok:
         prior = object[offset] = page->freelist;
         page->freelist = object;
@@ -1630,8 +1636,7 @@ checks_ok:
                 goto slab_empty;
  
         /*
-        * Objects left in the slab. If it
-        * was not on the partial list before
+        * Objects left in the slab. If it was not on the partial list before
          * then add it.
          */
         if (unlikely(!prior)) {
@@ -1681,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s,
         unsigned long flags;
  
         local_irq_save(flags);
-       debug_check_no_locks_freed(object, s->objsize);
         c = get_cpu_slab(s, smp_processor_id());
+       debug_check_no_locks_freed(object, c->objsize);
         if (likely(page == c->page && c->node >= 0)) {
                 object[c->offset] = c->freelist;
                 c->freelist = object;
@@ -1845,13 +1850,11 @@ static unsigned long calculate_alignment(unsigned long flags,
                 unsigned long align, unsigned long size)
  {
         /*
-        * If the user wants hardware cache aligned objects then
-        * follow that suggestion if the object is sufficiently
-        * large.
+        * If the user wants hardware cache aligned objects then follow that
+        * suggestion if the object is sufficiently large.
          *
-        * The hardware cache alignment cannot override the
-        * specified alignment though. If that is greater
-        * then use it.
+        * The hardware cache alignment cannot override the specified
+        * alignment though. If that is greater then use it.
          */
         if ((flags & SLAB_HWCACHE_ALIGN) &&
                         size > cache_line_size() / 2)
@@ -2049,6 +2052,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
  #endif
         init_kmem_cache_node(n);
         atomic_long_inc(&n->nr_slabs);
+
         /*
          * lockdep requires consistent irq usage for each lock
          * so even though there cannot be a race this early in
@@ -2129,6 +2133,14 @@ static int calculate_sizes(struct kmem_cache *s)
         unsigned long size = s->objsize;
         unsigned long align = s->align;
  
+       /*
+        * Round up object size to the next word boundary. We can only
+        * place the free pointer at word boundaries and this determines
+        * the possible location of the free pointer.
+        */
+       size = ALIGN(size, sizeof(void *));
+
+#ifdef CONFIG_SLUB_DEBUG
         /*
          * Determine if we can poison the object itself. If the user of
          * the slab may touch the object after free or before allocation
@@ -2140,14 +2152,7 @@ static int calculate_sizes(struct kmem_cache *s)
         else
                 s->flags &= ~__OBJECT_POISON;
  
-       /*
-        * Round up object size to the next word boundary. We can only
-        * place the free pointer at word boundaries and this determines
-        * the possible location of the free pointer.
-        */
-       size = ALIGN(size, sizeof(void *));
  
-#ifdef CONFIG_SLUB_DEBUG
         /*
          * If we are Redzoning then check if there is some space between the
          * end of the object and the free pointer. If not then add an
@@ -2300,7 +2305,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
         /*
          * We could also check if the object is on the slabs freelist.
          * But this would be too expensive and it seems that the main
-        * purpose of kmem_ptr_valid is to check if the object belongs
+        * purpose of kmem_ptr_valid() is to check if the object belongs
          * to a certain slab.
          */
         return 1;
@@ -2587,13 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags)
  }
  EXPORT_SYMBOL(__kmalloc);
  
+static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+{
+       struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
+                                               get_order(size));
+
+       if (page)
+               return page_address(page);
+       else
+               return NULL;
+}
+
  #ifdef CONFIG_NUMA
  void *__kmalloc_node(size_t size, gfp_t flags, int node)
  {
         struct kmem_cache *s;
  
         if (unlikely(size > PAGE_SIZE))
-               return kmalloc_large(size, flags);
+               return kmalloc_large_node(size, flags, node);
  
         s = get_slab(size, flags);
  
@@ -2610,19 +2626,17 @@ size_t ksize(const void *object)
         struct page *page;
         struct kmem_cache *s;
  
-       BUG_ON(!object);
         if (unlikely(object == ZERO_SIZE_PTR))
                 return 0;
  
         page = virt_to_head_page(object);
-       BUG_ON(!page);
  
         if (unlikely(!PageSlab(page)))
                 return PAGE_SIZE << compound_order(page);
  
         s = page->slab;
-       BUG_ON(!s);
  
+#ifdef CONFIG_SLUB_DEBUG
         /*
          * Debugging requires use of the padding between object
          * and whatever may come after it.
@@ -2630,6 +2644,7 @@ size_t ksize(const void *object)
         if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
                 return s->objsize;
  
+#endif
         /*
          * If we have the need to store the freelist pointer
          * back there or track user information then we can
@@ -2637,7 +2652,6 @@ size_t ksize(const void *object)
          */
         if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
                 return s->inuse;
-
         /*
          * Else we can use all the padding etc for the allocation
          */
@@ -2914,7 +2928,7 @@ void __init kmem_cache_init(void)
         /*
          * Patch up the size_index table if we have strange large alignment
          * requirements for the kmalloc array. This is only the case for
-        * mips it seems. The standard arches will not generate any code here.
+        * MIPS it seems. The standard arches will not generate any code here.
          *
          * Largest permitted alignment is 256 bytes due to the way we
          * handle the index determination for the smaller caches.
@@ -2943,7 +2957,6 @@ void __init kmem_cache_init(void)
         kmem_size = sizeof(struct kmem_cache);
  #endif
  
-
         printk(KERN_INFO
                 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
                 " CPUs=%d, Nodes=%d\n",
@@ -3040,12 +3053,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
                  */
                 for_each_online_cpu(cpu)
                         get_cpu_slab(s, cpu)->objsize = s->objsize;
+
                 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
                 up_write(&slub_lock);
+
                 if (sysfs_slab_alias(s, name))
                         goto err;
                 return s;
         }
+
         s = kmalloc(kmem_size, GFP_KERNEL);
         if (s) {
                 if (kmem_cache_open(s, GFP_KERNEL, name,
@@ -3141,7 +3157,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
         struct kmem_cache *s;
  
         if (unlikely(size > PAGE_SIZE))
-               return kmalloc_large(size, gfpflags);
+               return kmalloc_large_node(size, gfpflags, node);
  
         s = get_slab(size, gfpflags);
  
@@ -3928,7 +3944,6 @@ SLAB_ATTR(remote_node_defrag_ratio);
  #endif
  
  #ifdef CONFIG_SLUB_STATS
-
  static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
  {
         unsigned long sum  = 0;
@@ -4112,8 +4127,8 @@ static struct kset *slab_kset;
  #define ID_STR_LENGTH 64
  
  /* Create a unique string id for a slab cache:
- * format
- * :[flags-]size:[memory address of kmemcache]
+ *
+ * Format      :[flags-]size
   */
  static char *create_unique_id(struct kmem_cache *s)
  {