Merge remote branch 'slab/for-next'

author: Stephen Rothwell <sfr@canb.auug.org.au> 2010-03-19 12:51:03 +1100
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2010-03-19 12:51:03 +1100
commit: 69e68a47aa271ec49c080c1f427e498a25a4bf49 (patch)
tree: 7a5fdfbbfdb0d4ee7b173cbdc57572b7d24bf581
parent: 2fc2e8d8a0a8e9fa4c562cc2394abf439b902bdf (diff)
parent: 339dfb05a7f7b5bf15a017deb430e69d90811c8d (diff)
1 files changed, 170 insertions, 32 deletions
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..4bff9e57a0a8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -115,6 +115,7 @@
 #include	<linux/reciprocal_div.h>
 #include	<linux/debugobjects.h>
 #include	<linux/kmemcheck.h>
+#include	<linux/memory.h>
 
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
@@ -1102,6 +1103,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 }
 #endif
 
+/*
+ * Allocates and initializes nodelists for a node on each slab cache, used for
+ * either memory or cpu hotplug.  If memory is being hot-added, the kmem_list3
+ * will be allocated off-node since memory is not yet online for the new node.
+ * When hotplugging memory or a cpu, existing nodelists are not replaced if
+ * already in use.
+ *
+ * Must hold cache_chain_mutex.
+ */
+static int init_cache_nodelists_node(int node)
+{
+	struct kmem_cache *cachep;
+	struct kmem_list3 *l3;
+	const int memsize = sizeof(struct kmem_list3);
+
+	list_for_each_entry(cachep, &cache_chain, next) {
+		/*
+		 * Set up the size64 kmemlist for cpu before we can
+		 * begin anything. Make sure some other cpu on this
+		 * node has not already allocated this
+		 */
+		if (!cachep->nodelists[node]) {
+			l3 = kmalloc_node(memsize, GFP_KERNEL, node);
+			if (!l3)
+				return -ENOMEM;
+			kmem_list3_init(l3);
+			l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
+			    ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+
+			/*
+			 * The l3s don't come and go as CPUs come and
+			 * go.  cache_chain_mutex is sufficient
+			 * protection here.
+			 */
+			cachep->nodelists[node] = l3;
+		}
+
+		spin_lock_irq(&cachep->nodelists[node]->list_lock);
+		cachep->nodelists[node]->free_limit =
+			(1 + nr_cpus_node(node)) *
+			cachep->batchcount + cachep->num;
+		spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+	}
+	return 0;
+}
+
 static void __cpuinit cpuup_canceled(long cpu)
 {
 	struct kmem_cache *cachep;
@@ -1172,7 +1219,7 @@ static int __cpuinit cpuup_prepare(long cpu)
 	struct kmem_cache *cachep;
 	struct kmem_list3 *l3 = NULL;
 	int node = cpu_to_node(cpu);
-	const int memsize = sizeof(struct kmem_list3);
+	int err;
 
 	/*
 	 * We need to do this right in the beginning since
@@ -1180,35 +1227,9 @@ static int __cpuinit cpuup_prepare(long cpu)
 	 * kmalloc_node allows us to add the slab to the right
 	 * kmem_list3 and not this cpu's kmem_list3
 	 */
-
-	list_for_each_entry(cachep, &cache_chain, next) {
-		/*
-		 * Set up the size64 kmemlist for cpu before we can
-		 * begin anything. Make sure some other cpu on this
-		 * node has not already allocated this
-		 */
-		if (!cachep->nodelists[node]) {
-			l3 = kmalloc_node(memsize, GFP_KERNEL, node);
-			if (!l3)
-				goto bad;
-			kmem_list3_init(l3);
-			l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
-			    ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
-
-			/*
-			 * The l3s don't come and go as CPUs come and
-			 * go.  cache_chain_mutex is sufficient
-			 * protection here.
-			 */
-			cachep->nodelists[node] = l3;
-		}
-
-		spin_lock_irq(&cachep->nodelists[node]->list_lock);
-		cachep->nodelists[node]->free_limit =
-			(1 + nr_cpus_node(node)) *
-			cachep->batchcount + cachep->num;
-		spin_unlock_irq(&cachep->nodelists[node]->list_lock);
-	}
+	err = init_cache_nodelists_node(node);
+	if (err < 0)
+		goto bad;
 
 	/*
 	 * Now we can go ahead with allocating the shared arrays and
@@ -1331,11 +1352,120 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
 	&cpuup_callback, NULL, 0
 };
 
+#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * Drains and frees nodelists for a node on each slab cache, used for memory
+ * hotplug.  Returns -EBUSY if all objects cannot be drained on memory
+ * hot-remove so that the node is not removed.  When used because memory
+ * hot-add is canceled, the only result is the freed kmem_list3.
+ *
+ * Must hold cache_chain_mutex.
+ */
+static int __meminit free_cache_nodelists_node(int node)
+{
+	struct kmem_cache *cachep;
+	int ret = 0;
+
+	list_for_each_entry(cachep, &cache_chain, next) {
+		struct array_cache *shared;
+		struct array_cache **alien;
+		struct kmem_list3 *l3;
+
+		l3 = cachep->nodelists[node];
+		if (!l3)
+			continue;
+
+		spin_lock_irq(&l3->list_lock);
+		shared = l3->shared;
+		if (shared) {
+			free_block(cachep, shared->entry, shared->avail, node);
+			l3->shared = NULL;
+		}
+		alien = l3->alien;
+		l3->alien = NULL;
+		spin_unlock_irq(&l3->list_lock);
+
+		if (alien) {
+			drain_alien_cache(cachep, alien);
+			free_alien_cache(alien);
+		}
+		kfree(shared);
+
+		drain_freelist(cachep, l3, l3->free_objects);
+		if (!list_empty(&l3->slabs_full) ||
+					!list_empty(&l3->slabs_partial)) {
+			/*
+			 * Continue to iterate through each slab cache to free
+			 * as many nodelists as possible even though the
+			 * offline will be canceled.
+			 */
+			ret = -EBUSY;
+			continue;
+		}
+		kfree(l3);
+		cachep->nodelists[node] = NULL;
+	}
+	return ret;
+}
+
+/*
+ * Onlines nid either as the result of memory hot-add or canceled hot-remove.
+ */
+static int __meminit slab_node_online(int nid)
+{
+	int ret;
+	mutex_lock(&cache_chain_mutex);
+	ret = init_cache_nodelists_node(nid);
+	mutex_unlock(&cache_chain_mutex);
+	return ret;
+}
+
+/*
+ * Offlines nid either as the result of memory hot-remove or canceled hot-add.
+ */
+static int __meminit slab_node_offline(int nid)
+{
+	int ret;
+	mutex_lock(&cache_chain_mutex);
+	ret = free_cache_nodelists_node(nid);
+	mutex_unlock(&cache_chain_mutex);
+	return ret;
+}
+
+static int __meminit slab_memory_callback(struct notifier_block *self,
+					unsigned long action, void *arg)
+{
+	struct memory_notify *mnb = arg;
+	int ret = 0;
+	int nid;
+
+	nid = mnb->status_change_nid;
+	if (nid < 0)
+		goto out;
+
+	switch (action) {
+	case MEM_GOING_ONLINE:
+	case MEM_CANCEL_OFFLINE:
+		ret = slab_node_online(nid);
+		break;
+	case MEM_GOING_OFFLINE:
+	case MEM_CANCEL_ONLINE:
+		ret = slab_node_offline(nid);
+		break;
+	case MEM_ONLINE:
+	case MEM_OFFLINE:
+		break;
+	}
+out:
+	return ret ? notifier_from_errno(ret) : NOTIFY_OK;
+}
+#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
+
 /*
  * swap the static kmem_list3 with kmalloced memory
  */
-static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
-			int nodeid)
+static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
+				int nodeid)
 {
 	struct kmem_list3 *ptr;
 
@@ -1580,6 +1710,14 @@ void __init kmem_cache_init_late(void)
 	 */
 	register_cpu_notifier(&cpucache_notifier);
 
+#ifdef CONFIG_NUMA
+	/*
+	 * Register a memory hotplug callback that initializes and frees
+	 * nodelists.
+	 */
+	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
+#endif
+
 	/*
 	 * The reap timers are started later, with a module init call: That part
 	 * of the kernel is not yet operational.
author	Stephen Rothwell <sfr@canb.auug.org.au>	2010-03-19 12:51:03 +1100
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2010-03-19 12:51:03 +1100
commit	69e68a47aa271ec49c080c1f427e498a25a4bf49 (patch)
tree	7a5fdfbbfdb0d4ee7b173cbdc57572b7d24bf581
parent	2fc2e8d8a0a8e9fa4c562cc2394abf439b902bdf (diff)
parent	339dfb05a7f7b5bf15a017deb430e69d90811c8d (diff)