diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 95 |
1 files changed, 64 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 425aa0caa712..e09741af816f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -631,7 +631,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, val = __this_cpu_read(memcg->stat->nr_page_events); next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ - if ((long)next - (long)val < 0) { + if ((long)(next - val) < 0) { switch (target) { case MEM_CGROUP_TARGET_THRESH: next = val + THRESHOLDS_EVENTS_TARGET; @@ -1611,9 +1611,13 @@ cleanup: * @page: the page * * This function protects unlocked LRU pages from being moved to - * another cgroup and stabilizes their page->mem_cgroup binding. + * another cgroup. + * + * It ensures lifetime of the returned memcg. Caller is responsible + * for the lifetime of the page; __unlock_page_memcg() is available + * when @page might get freed inside the locked section. */ -void lock_page_memcg(struct page *page) +struct mem_cgroup *lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1622,18 +1626,24 @@ void lock_page_memcg(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - */ + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page itself from being freed. E.g. writeback + * doesn't hold a page reference and relies on PG_writeback to + * keep off truncation, migration and so forth. + */ rcu_read_lock(); if (mem_cgroup_disabled()) - return; + return NULL; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return; + return NULL; if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg; spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1649,18 +1659,18 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; - return; + return memcg; } EXPORT_SYMBOL(lock_page_memcg); /** - * unlock_page_memcg - unlock a page->mem_cgroup binding - * @page: the page + * __unlock_page_memcg - unlock and unpin a memcg + * @memcg: the memcg + * + * Unlock and unpin a memcg returned by lock_page_memcg(). */ -void unlock_page_memcg(struct page *page) +void __unlock_page_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *memcg = page->mem_cgroup; - if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -1672,6 +1682,15 @@ void unlock_page_memcg(struct page *page) rcu_read_unlock(); } + +/** + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page + */ +void unlock_page_memcg(struct page *page) +{ + __unlock_page_memcg(page->mem_cgroup); +} EXPORT_SYMBOL(unlock_page_memcg); /* @@ -5317,38 +5336,52 @@ struct cgroup_subsys memory_cgrp_subsys = { /** * mem_cgroup_low - check if memory consumption is below the normal range - * @root: the highest ancestor to consider + * @root: the top ancestor of the sub-tree being checked * @memcg: the memory cgroup to check * * Returns %true if memory consumption of @memcg, and that of all - * configurable ancestors up to @root, is below the normal range. + * ancestors up to (but not including) @root, is below the normal range. + * + * @root is exclusive; it is never low when looked at directly and isn't + * checked when traversing the hierarchy. + * + * Excluding @root enables using memory.low to prioritize memory usage + * between cgroups within a subtree of the hierarchy that is limited by + * memory.high or memory.max. + * + * For example, given cgroup A with children B and C: + * + * A + * / \ + * B C + * + * and + * + * 1. A/memory.current > A/memory.high + * 2. A/B/memory.current < A/B/memory.low + * 3. A/C/memory.current >= A/C/memory.low + * + * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we + * should reclaim from 'C' until 'A' is no longer high or until we can + * no longer reclaim from 'C'. If 'A', i.e. @root, isn't excluded by + * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered + * low and we will reclaim indiscriminately from both 'B' and 'C'. */ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) { if (mem_cgroup_disabled()) return false; - /* - * The toplevel group doesn't have a configurable range, so - * it's never low when looked at directly, and it is not - * considered an ancestor when assessing the hierarchy. - */ - - if (memcg == root_mem_cgroup) - return false; - - if (page_counter_read(&memcg->memory) >= memcg->low) + if (!root) + root = root_mem_cgroup; + if (memcg == root) return false; - while (memcg != root) { - memcg = parent_mem_cgroup(memcg); - - if (memcg == root_mem_cgroup) - break; - + for (; memcg != root; memcg = parent_mem_cgroup(memcg)) { if (page_counter_read(&memcg->memory) >= memcg->low) return false; } + return true; } |