* All object allocations for a node occur from node specific slab lists.
*/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/poison.h>
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
int node);
static int enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
+static void cache_reap(struct work_struct *unused);
/*
* This function must be completely optimized away if a constant is passed to
static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
-/*
- * vm_enough_memory() looks at this to determine how many slab-allocated pages
- * are possibly freeable under pressure
- *
- * SLAB_RECLAIM_ACCOUNT turns this on per-slab
- */
-atomic_t slab_reclaim_pages;
-
/*
* chicken and egg problem: delay the per-cpu array allocation
* until the general caches are up.
return g_cpucache_up == FULL;
}
-static DEFINE_PER_CPU(struct work_struct, reap_work);
+static DEFINE_PER_CPU(struct delayed_work, reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
if (node == MAX_NUMNODES)
node = first_node(node_online_map);
- __get_cpu_var(reap_node) = node;
+ per_cpu(reap_node, cpu) = node;
}
static void next_reap_node(void)
*/
static void __devinit start_cpu_timer(int cpu)
{
- struct work_struct *reap_work = &per_cpu(reap_work, cpu);
+ struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
/*
* When this gets called from do_initcalls via cpucache_init(),
* init_workqueues() has already run, so keventd will be setup
* at that time.
*/
- if (keventd_up() && reap_work->func == NULL) {
+ if (keventd_up() && reap_work->work.func == NULL) {
init_reap_node(cpu);
- INIT_WORK(reap_work, cache_reap, NULL);
+ INIT_DELAYED_WORK(reap_work, cache_reap);
schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
}
}
return nr;
}
-#ifdef CONFIG_NUMA
+#ifndef CONFIG_NUMA
+
+#define drain_alien_cache(cachep, alien) do { } while (0)
+#define reap_alien(cachep, l3) do { } while (0)
+
+static inline struct array_cache **alloc_alien_cache(int node, int limit)
+{
+ return (struct array_cache **)BAD_ALIEN_MAGIC;
+}
+
+static inline void free_alien_cache(struct array_cache **ac_ptr)
+{
+}
+
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
+{
+ return 0;
+}
+
+static inline void *alternate_node_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
+{
+ return NULL;
+}
+
+static inline void *__cache_alloc_node(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ return NULL;
+}
+
+#else /* CONFIG_NUMA */
+
static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
int nodeid = slabp->nodeid;
struct kmem_list3 *l3;
struct array_cache *alien = NULL;
+ int node;
+
+ node = numa_node_id();
/*
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(slabp->nodeid == numa_node_id()))
+ if (likely(slabp->nodeid == node))
return 0;
- l3 = cachep->nodelists[numa_node_id()];
+ l3 = cachep->nodelists[node];
STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
}
return 1;
}
-
-#else
-
-#define drain_alien_cache(cachep, alien) do { } while (0)
-#define reap_alien(cachep, l3) do { } while (0)
-
-static inline struct array_cache **alloc_alien_cache(int node, int limit)
-{
- return (struct array_cache **)BAD_ALIEN_MAGIC;
-}
-
-static inline void free_alien_cache(struct array_cache **ac_ptr)
-{
-}
-
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
-{
- return 0;
-}
-
#endif
static int __cpuinit cpuup_callback(struct notifier_block *nfb,
{
struct kmem_list3 *ptr;
- BUG_ON(cachep->nodelists[nodeid] != list);
ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
BUG_ON(!ptr);
struct cache_names *names;
int i;
int order;
+ int node;
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
+ node = numa_node_id();
+
/* 1) create the cache_cache */
INIT_LIST_HEAD(&cache_chain);
list_add(&cache_cache.next, &cache_chain);
cache_cache.colour_off = cache_line_size();
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
- cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
+ cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
}
/* 5) Replace the bootstrap kmem_list3's */
{
- int node;
+ int nid;
+
/* Replace the static kmem_list3 structures for the boot cpu */
- init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
- numa_node_id());
+ init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node);
- for_each_online_node(node) {
+ for_each_online_node(nid) {
init_list(malloc_sizes[INDEX_AC].cs_cachep,
- &initkmem_list3[SIZE_AC + node], node);
+ &initkmem_list3[SIZE_AC + nid], nid);
if (INDEX_AC != INDEX_L3) {
init_list(malloc_sizes[INDEX_L3].cs_cachep,
- &initkmem_list3[SIZE_L3 + node],
- node);
+ &initkmem_list3[SIZE_L3 + nid], nid);
}
}
}
*/
flags |= __GFP_COMP;
#endif
- flags |= cachep->gfpflags;
+
+ /*
+ * Under NUMA we want memory on the indicated node. We will handle
+ * the needed fallback ourselves since we want to serve from our
+ * per node object lists first for other nodes.
+ */
+ flags |= cachep->gfpflags | GFP_THISNODE;
page = alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page)
nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_add(nr_pages, &slab_reclaim_pages);
- add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
+ add_zone_page_state(page_zone(page),
+ NR_SLAB_RECLAIMABLE, nr_pages);
+ else
+ add_zone_page_state(page_zone(page),
+ NR_SLAB_UNRECLAIMABLE, nr_pages);
for (i = 0; i < nr_pages; i++)
__SetPageSlab(page + i);
return page_address(page);
struct page *page = virt_to_page(addr);
const unsigned long nr_freed = i;
- sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ sub_zone_page_state(page_zone(page),
+ NR_SLAB_RECLAIMABLE, nr_freed);
+ else
+ sub_zone_page_state(page_zone(page),
+ NR_SLAB_UNRECLAIMABLE, nr_freed);
while (i--) {
BUG_ON(!PageSlab(page));
__ClearPageSlab(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
free_pages((unsigned long)addr, cachep->gfporder);
- if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
}
static void kmem_rcu_free(struct rcu_head *head)
static void dump_line(char *data, int offset, int limit)
{
int i;
+ unsigned char error = 0;
+ int bad_count = 0;
+
printk(KERN_ERR "%03x:", offset);
- for (i = 0; i < limit; i++)
+ for (i = 0; i < limit; i++) {
+ if (data[offset + i] != POISON_FREE) {
+ error = data[offset + i];
+ bad_count++;
+ }
printk(" %02x", (unsigned char)data[offset + i]);
+ }
printk("\n");
+
+ if (bad_count == 1) {
+ error ^= POISON_FREE;
+ if (!(error & (error - 1))) {
+ printk(KERN_ERR "Single bit error detected. Probably "
+ "bad RAM.\n");
+#ifdef CONFIG_X86
+ printk(KERN_ERR "Run memtest86+ or a similar memory "
+ "test tool.\n");
+#else
+ printk(KERN_ERR "Run a memory test tool.\n");
+#endif
+ }
+ }
}
#endif
if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
ralign = BYTES_PER_WORD;
- /* 2) arch mandated alignment: disables debug if necessary */
+ /* 2) arch mandated alignment */
if (ralign < ARCH_SLAB_MINALIGN) {
ralign = ARCH_SLAB_MINALIGN;
- if (ralign > BYTES_PER_WORD)
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
}
- /* 3) caller mandated alignment: disables debug if necessary */
+ /* 3) caller mandated alignment */
if (ralign < align) {
ralign = align;
- if (ralign > BYTES_PER_WORD)
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
}
+ /* disable debug if necessary */
+ if (ralign > BYTES_PER_WORD)
+ flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/*
* 4) Store it.
*/
* @cachep: the cache to destroy
*
* Remove a struct kmem_cache object from the slab cache.
- * Returns 0 on success.
*
* It is expected this function will be called by a module when it is
* unloaded. This will remove the cache completely, and avoid a duplicate
* The caller must guarantee that noone will allocate memory from the cache
* during the kmem_cache_destroy().
*/
-int kmem_cache_destroy(struct kmem_cache *cachep)
+void kmem_cache_destroy(struct kmem_cache *cachep)
{
BUG_ON(!cachep || in_interrupt());
list_add(&cachep->next, &cache_chain);
mutex_unlock(&cache_chain_mutex);
unlock_cpu_hotplug();
- return 1;
+ return;
}
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
__kmem_cache_destroy(cachep);
unlock_cpu_hotplug();
- return 0;
}
EXPORT_SYMBOL(kmem_cache_destroy);
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
+ int node;
+
+ node = numa_node_id();
check_irq_off();
ac = cpu_cache_get(cachep);
*/
batchcount = BATCHREFILL_LIMIT;
}
- l3 = cachep->nodelists[numa_node_id()];
+ l3 = cachep->nodelists[node];
BUG_ON(ac->avail > 0 || !l3);
spin_lock(&l3->list_lock);
STATS_SET_HIGH(cachep);
ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
- numa_node_id());
+ node);
}
check_slabp(cachep, slabp);
if (unlikely(!ac->avail)) {
int x;
- x = cache_grow(cachep, flags, numa_node_id());
+ x = cache_grow(cachep, flags, node);
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
cachep->ctor(objp, cachep, ctor_flags);
}
+#if ARCH_SLAB_MINALIGN
+ if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
+ printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
+ objp, ARCH_SLAB_MINALIGN);
+ }
+#endif
return objp;
}
#else
void *objp;
struct array_cache *ac;
-#ifdef CONFIG_NUMA
- if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
- objp = alternate_node_alloc(cachep, flags);
- if (objp != NULL)
- return objp;
- }
-#endif
-
check_irq_off();
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
gfp_t flags, void *caller)
{
unsigned long save_flags;
- void *objp;
+ void *objp = NULL;
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
- objp = ____cache_alloc(cachep, flags);
+
+ if (unlikely(NUMA_BUILD &&
+ current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
+ objp = alternate_node_alloc(cachep, flags);
+
+ if (!objp)
+ objp = ____cache_alloc(cachep, flags);
+ /*
+ * We may just have run out of memory on the local node.
+ * __cache_alloc_node() knows how to locate memory on other nodes
+ */
+ if (NUMA_BUILD && !objp)
+ objp = __cache_alloc_node(cachep, flags, numa_node_id());
local_irq_restore(save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp,
caller);
{
int nid_alloc, nid_here;
- if (in_interrupt())
+ if (in_interrupt() || (flags & __GFP_THISNODE))
return NULL;
nid_alloc = nid_here = numa_node_id();
if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
return NULL;
}
+/*
+ * Fallback function if there was no memory available and no objects on a
+ * certain node and we are allowed to fall back. We mimick the behavior of
+ * the page allocator. We fall back according to a zonelist determined by
+ * the policy layer while obeying cpuset constraints.
+ */
+void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
+{
+ struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
+ ->node_zonelists[gfp_zone(flags)];
+ struct zone **z;
+ void *obj = NULL;
+
+ for (z = zonelist->zones; *z && !obj; z++) {
+ int nid = zone_to_nid(*z);
+
+ if (zone_idx(*z) <= ZONE_NORMAL &&
+ cpuset_zone_allowed(*z, flags) &&
+ cache->nodelists[nid])
+ obj = __cache_alloc_node(cache,
+ flags | __GFP_THISNODE, nid);
+ }
+ return obj;
+}
+
/*
* A interface to enable slab creation on nodeid
*/
must_grow:
spin_unlock(&l3->list_lock);
x = cache_grow(cachep, flags, nodeid);
+ if (x)
+ goto retry;
- if (!x)
- return NULL;
+ if (!(flags & __GFP_THISNODE))
+ /* Unable to grow the cache. Fall back to other nodes. */
+ return fallback_alloc(cachep, flags);
+
+ return NULL;
- goto retry;
done:
return obj;
}
}
+#ifdef CONFIG_DEBUG_SLAB
void *__kmalloc(size_t size, gfp_t flags)
{
-#ifndef CONFIG_DEBUG_SLAB
- return __do_kmalloc(size, flags, NULL);
-#else
return __do_kmalloc(size, flags, __builtin_return_address(0));
-#endif
}
EXPORT_SYMBOL(__kmalloc);
-#ifdef CONFIG_DEBUG_SLAB
void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
{
return __do_kmalloc(size, flags, caller);
}
EXPORT_SYMBOL(__kmalloc_track_caller);
+
+#else
+void *__kmalloc(size_t size, gfp_t flags)
+{
+ return __do_kmalloc(size, flags, NULL);
+}
+EXPORT_SYMBOL(__kmalloc);
#endif
/**
* If we cannot acquire the cache chain mutex then just give up - we'll try
* again on the next iteration.
*/
-static void cache_reap(void *unused)
+static void cache_reap(struct work_struct *unused)
{
struct kmem_cache *searchp;
struct kmem_list3 *l3;