summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/slab.h3
-rw-r--r--mm/memcontrol.c50
-rw-r--r--mm/slab.h44
-rw-r--r--mm/slab_common.c78
4 files changed, 96 insertions, 79 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6008d884e621..bc189a43e680 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -16,6 +16,7 @@
#include <linux/overflow.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <linux/percpu-refcount.h>
/*
@@ -152,7 +153,6 @@ int kmem_cache_shrink(struct kmem_cache *);
void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
void memcg_deactivate_kmem_caches(struct mem_cgroup *);
-void memcg_destroy_kmem_caches(struct mem_cgroup *);
/*
* Please use this macro to create slab caches. Simply specify the
@@ -642,6 +642,7 @@ struct memcg_cache_params {
struct mem_cgroup *memcg;
struct list_head children_node;
struct list_head kmem_caches_node;
+ struct percpu_ref refcnt;
void (*work_fn)(struct kmem_cache *);
union {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 25e35a8b8ba2..ce4ce5e7937b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2667,12 +2667,13 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
{
struct memcg_kmem_cache_create_work *cw;
+ if (!css_tryget_online(&memcg->css))
+ return;
+
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
if (!cw)
return;
- css_get(&memcg->css);
-
cw->memcg = memcg;
cw->cachep = cachep;
INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
@@ -2707,6 +2708,7 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
{
struct mem_cgroup *memcg;
struct kmem_cache *memcg_cachep;
+ struct memcg_cache_array *arr;
int kmemcg_id;
VM_BUG_ON(!is_root_cache(cachep));
@@ -2714,14 +2716,28 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
if (memcg_kmem_bypass())
return cachep;
- memcg = get_mem_cgroup_from_current();
+ rcu_read_lock();
+
+ if (unlikely(current->active_memcg))
+ memcg = current->active_memcg;
+ else
+ memcg = mem_cgroup_from_task(current);
+
+ if (!memcg || memcg == root_mem_cgroup)
+ goto out_unlock;
+
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
if (kmemcg_id < 0)
- goto out;
+ goto out_unlock;
+
+ arr = rcu_dereference(cachep->memcg_params.memcg_caches);
- memcg_cachep = cache_from_memcg_idx(cachep, kmemcg_id);
- if (likely(memcg_cachep))
- return memcg_cachep;
+ /*
+ * Make sure we will access the up-to-date value. The code updating
+ * memcg_caches issues a write barrier to match the data dependency
+ * barrier inside READ_ONCE() (see memcg_create_kmem_cache()).
+ */
+ memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]);
/*
* If we are in a safe context (can wait, and not in interrupt
@@ -2734,10 +2750,20 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
* memcg_create_kmem_cache, this means no further allocation
* could happen with the slab_mutex held. So it's better to
* defer everything.
+ *
+ * If the memcg is dying or memcg_cache is about to be released,
+ * don't bother creating new kmem_caches. Because memcg_cachep
+ * is ZEROed as the fist step of kmem offlining, we don't need
+ * percpu_ref_tryget_live() here. css_tryget_online() check in
+ * memcg_schedule_kmem_cache_create() will prevent us from
+ * creation of a new kmem_cache.
*/
- memcg_schedule_kmem_cache_create(memcg, cachep);
-out:
- css_put(&memcg->css);
+ if (unlikely(!memcg_cachep))
+ memcg_schedule_kmem_cache_create(memcg, cachep);
+ else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt))
+ cachep = memcg_cachep;
+out_unlock:
+ rcu_read_unlock();
return cachep;
}
@@ -2748,7 +2774,7 @@ out:
void memcg_kmem_put_cache(struct kmem_cache *cachep)
{
if (!is_root_cache(cachep))
- css_put(&cachep->memcg_params.memcg->css);
+ percpu_ref_put(&cachep->memcg_params.refcnt);
}
/**
@@ -3295,7 +3321,7 @@ static void memcg_free_kmem(struct mem_cgroup *memcg)
memcg_offline_kmem(memcg);
if (memcg->kmem_state == KMEM_ALLOCATED) {
- memcg_destroy_kmem_caches(memcg);
+ WARN_ON(!list_empty(&memcg->kmem_caches));
static_branch_dec(&memcg_kmem_enabled_key);
WARN_ON(page_counter_read(&memcg->kmem));
}
diff --git a/mm/slab.h b/mm/slab.h
index 46623a576a3c..5d2b8511e6fb 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -248,31 +248,6 @@ static inline const char *cache_name(struct kmem_cache *s)
return s->name;
}
-/*
- * Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
- * That said the caller must assure the memcg's cache won't go away by either
- * taking a css reference to the owner cgroup, or holding the slab_mutex.
- */
-static inline struct kmem_cache *
-cache_from_memcg_idx(struct kmem_cache *s, int idx)
-{
- struct kmem_cache *cachep;
- struct memcg_cache_array *arr;
-
- rcu_read_lock();
- arr = rcu_dereference(s->memcg_params.memcg_caches);
-
- /*
- * Make sure we will access the up-to-date value. The code updating
- * memcg_caches issues a write barrier to match this (see
- * memcg_create_kmem_cache()).
- */
- cachep = READ_ONCE(arr->entries[idx]);
- rcu_read_unlock();
-
- return cachep;
-}
-
static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
{
if (is_root_cache(s))
@@ -284,14 +259,25 @@ static __always_inline int memcg_charge_slab(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
+ int ret;
+
if (is_root_cache(s))
return 0;
- return memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
+
+ ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
+ if (ret)
+ return ret;
+
+ percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
+
+ return 0;
}
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
+ if (!is_root_cache(s))
+ percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
memcg_kmem_uncharge(page, order);
}
@@ -323,12 +309,6 @@ static inline const char *cache_name(struct kmem_cache *s)
return s->name;
}
-static inline struct kmem_cache *
-cache_from_memcg_idx(struct kmem_cache *s, int idx)
-{
- return NULL;
-}
-
static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
{
return s;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a15557776d7d..ee3971f7fabc 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -132,6 +132,8 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
LIST_HEAD(slab_root_caches);
static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
+static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref);
+
void slab_init_memcg_params(struct kmem_cache *s)
{
s->memcg_params.root_cache = NULL;
@@ -146,6 +148,12 @@ static int init_memcg_params(struct kmem_cache *s,
struct memcg_cache_array *arr;
if (root_cache) {
+ int ret = percpu_ref_init(&s->memcg_params.refcnt,
+ kmemcg_cache_shutdown,
+ 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+
s->memcg_params.root_cache = root_cache;
INIT_LIST_HEAD(&s->memcg_params.children_node);
INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
@@ -171,6 +179,8 @@ static void destroy_memcg_params(struct kmem_cache *s)
{
if (is_root_cache(s))
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+ else
+ percpu_ref_exit(&s->memcg_params.refcnt);
}
static void free_memcg_params(struct rcu_head *rcu)
@@ -226,6 +236,7 @@ void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg)
if (is_root_cache(s)) {
list_add(&s->root_caches_node, &slab_root_caches);
} else {
+ css_get(&memcg->css);
s->memcg_params.memcg = memcg;
list_add(&s->memcg_params.children_node,
&s->memcg_params.root_cache->memcg_params.children);
@@ -241,6 +252,7 @@ static void memcg_unlink_cache(struct kmem_cache *s)
} else {
list_del(&s->memcg_params.children_node);
list_del(&s->memcg_params.kmem_caches_node);
+ css_put(&s->memcg_params.memcg->css);
}
}
#else
@@ -678,7 +690,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
}
/*
- * Since readers won't lock (see cache_from_memcg_idx()), we need a
+ * Since readers won't lock (see memcg_kmem_get_cache()), we need a
* barrier here to ensure nobody will see the kmem_cache partially
* initialized.
*/
@@ -701,16 +713,11 @@ static void kmemcg_workfn(struct work_struct *work)
get_online_mems();
mutex_lock(&slab_mutex);
-
s->memcg_params.work_fn(s);
-
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
-
- /* done, put the ref from kmemcg_cache_deactivate() */
- css_put(&s->memcg_params.memcg->css);
}
static void kmemcg_rcufn(struct rcu_head *head)
@@ -727,10 +734,38 @@ static void kmemcg_rcufn(struct rcu_head *head)
queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
}
+static void kmemcg_cache_shutdown_fn(struct kmem_cache *s)
+{
+ WARN_ON(shutdown_cache(s));
+}
+
+static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref)
+{
+ struct kmem_cache *s = container_of(percpu_ref, struct kmem_cache,
+ memcg_params.refcnt);
+ unsigned long flags;
+
+ spin_lock_irqsave(&memcg_kmem_wq_lock, flags);
+ if (s->memcg_params.root_cache->memcg_params.dying)
+ goto unlock;
+
+ s->memcg_params.work_fn = kmemcg_cache_shutdown_fn;
+ INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
+ queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
+
+unlock:
+ spin_unlock_irqrestore(&memcg_kmem_wq_lock, flags);
+}
+
+static void kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
+{
+ __kmemcg_cache_deactivate_after_rcu(s);
+ percpu_ref_kill(&s->memcg_params.refcnt);
+}
+
static void kmemcg_cache_deactivate(struct kmem_cache *s)
{
- if (WARN_ON_ONCE(is_root_cache(s)) ||
- WARN_ON_ONCE(s->memcg_params.work_fn))
+ if (WARN_ON_ONCE(is_root_cache(s)))
return;
__kmemcg_cache_deactivate(s);
@@ -744,10 +779,7 @@ static void kmemcg_cache_deactivate(struct kmem_cache *s)
if (s->memcg_params.root_cache->memcg_params.dying)
goto unlock;
- /* pin memcg so that @s doesn't get destroyed in the middle */
- css_get(&s->memcg_params.memcg->css);
-
- s->memcg_params.work_fn = __kmemcg_cache_deactivate_after_rcu;
+ s->memcg_params.work_fn = kmemcg_cache_deactivate_after_rcu;
call_rcu(&s->memcg_params.rcu_head, kmemcg_rcufn);
unlock:
spin_unlock_irq(&memcg_kmem_wq_lock);
@@ -781,28 +813,6 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
put_online_cpus();
}
-void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
-{
- struct kmem_cache *s, *s2;
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
- list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
- memcg_params.kmem_caches_node) {
- /*
- * The cgroup is about to be freed and therefore has no charges
- * left. Hence, all its caches must be empty by now.
- */
- BUG_ON(shutdown_cache(s));
- }
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-}
-
static int shutdown_memcg_caches(struct kmem_cache *s)
{
struct memcg_cache_array *arr;