summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorChris Down <chris@chrisdown.name>2019-05-31 22:30:22 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-06-09 09:16:13 +0200
commitf96109988a8edd15c668d521af49f51797e4621e (patch)
tree80eb42c18c9664ee5d6e2fa73819f2e82e5d6f0f /include
parent15ff2c6a8838bef403adc538f5a2984e848d1afd (diff)
mm, memcg: consider subtrees in memory.events
commit 9852ae3fe5293264f01c49f2571ef7688f7823ce upstream. memory.stat and other files already consider subtrees in their output, and we should too in order to not present an inconsistent interface. The current situation is fairly confusing, because people interacting with cgroups expect hierarchical behaviour in the vein of memory.stat, cgroup.events, and other files. For example, this causes confusion when debugging reclaim events under low, as currently these always read "0" at non-leaf memcg nodes, which frequently causes people to misdiagnose breach behaviour. The same confusion applies to other counters in this file when debugging issues. Aggregation is done at write time instead of at read-time since these counters aren't hot (unlike memory.stat which is per-page, so it does it at read time), and it makes sense to bundle this with the file notifications. After this patch, events are propagated up the hierarchy: [root@ktst ~]# cat /sys/fs/cgroup/system.slice/memory.events low 0 high 0 max 0 oom 0 oom_kill 0 [root@ktst ~]# systemd-run -p MemoryMax=1 true Running as unit: run-r251162a189fb4562b9dabfdc9b0422f5.service [root@ktst ~]# cat /sys/fs/cgroup/system.slice/memory.events low 0 high 0 max 7 oom 1 oom_kill 1 As this is a change in behaviour, this can be reverted to the old behaviour by mounting with the `memory_localevents' flag set. However, we use the new behaviour by default as there's a lack of evidence that there are any current users of memory.events that would find this change undesirable. akpm: this is a behaviour change, so Cc:stable. THis is so that forthcoming distros which use cgroup v2 are more likely to pick up the revised behaviour. Link: http://lkml.kernel.org/r/20190208224419.GA24772@chrisdown.name Signed-off-by: Chris Down <chris@chrisdown.name> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tejun Heo <tj@kernel.org> Cc: Roman Gushchin <guro@fb.com> Cc: Dennis Zhou <dennis@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/memcontrol.h10
2 files changed, 13 insertions, 2 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7d57890cec67..96398d8d4fd8 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -83,6 +83,11 @@ enum {
* Enable cpuset controller in v1 cgroup to use v2 behavior.
*/
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
+
+ /*
+ * Enable legacy local memory.events.
+ */
+ CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
};
/* cftype->flags */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index dbb6118370c1..28525c7a8ab0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -777,8 +777,14 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
- atomic_long_inc(&memcg->memory_events[event]);
- cgroup_file_notify(&memcg->events_file);
+ do {
+ atomic_long_inc(&memcg->memory_events[event]);
+ cgroup_file_notify(&memcg->events_file);
+
+ if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
+ break;
+ } while ((memcg = parent_mem_cgroup(memcg)) &&
+ !mem_cgroup_is_root(memcg));
}
static inline void memcg_memory_event_mm(struct mm_struct *mm,