From a636b327f731143ccc544b966cfd8de6cb6d72c6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:08 -0800 Subject: memcg: avoid unnecessary system-wide-oom-killer Current mmtom has new oom function as pagefault_out_of_memory(). It's added for select bad process rathar than killing current. When memcg hit limit and calls OOM at page_fault, this handler called and system-wide-oom handling happens. (means kernel panics if panic_on_oom is true....) To avoid overkill, check memcg's recent behavior before starting system-wide-oom. And this patch also fixes to guarantee "don't accnout against process with TIF_MEMDIE". This is necessary for smooth OOM. [akpm@linux-foundation.org: build fix] Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Badari Pulavarty Cc: Jan Blunck Cc: Hirokazu Takahashi Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6b9e758c98a5..fd150e3a2567 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -560,6 +560,13 @@ void pagefault_out_of_memory(void) /* Got some memory back in the last second. */ return; + /* + * If this is from memcg, oom-killer is already invoked. + * and not worth to go system-wide-oom. + */ + if (mem_cgroup_oom_called(current)) + goto rest_and_return; + if (sysctl_panic_on_oom) panic("out of memory from page fault. panic_on_oom is selected.\n"); @@ -571,6 +578,7 @@ void pagefault_out_of_memory(void) * Give "p" a good chance of killing itself before we * retry to allocate memory. */ +rest_and_return: if (!test_thread_flag(TIF_MEMDIE)) schedule_timeout_uninterruptible(1); } -- cgit v1.2.3 From 7f4d454dee2e0bdd21bafd413d1c53e443a26540 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 7 Jan 2009 18:08:29 -0800 Subject: memcg: avoid deadlock caused by race between oom and cpuset_attach mpol_rebind_mm(), which can be called from cpuset_attach(), does down_write(mm->mmap_sem). This means down_write(mm->mmap_sem) can be called under cgroup_mutex. OTOH, page fault path does down_read(mm->mmap_sem) and calls mem_cgroup_try_charge_xxx(), which may eventually calls mem_cgroup_out_of_memory(). And mem_cgroup_out_of_memory() calls cgroup_lock(). This means cgroup_lock() can be called under down_read(mm->mmap_sem). If those two paths race, deadlock can happen. This patch avoid this deadlock by: - remove cgroup_lock() from mem_cgroup_out_of_memory(). - define new mutex (memcg_tasklist) and serialize mem_cgroup_move_task() (->attach handler of memory cgroup) and mem_cgroup_out_of_memory. Signed-off-by: Daisuke Nishimura Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 5 +++++ mm/oom_kill.c | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 435f08dac8bf..861037070f66 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -51,6 +51,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ #define do_swap_account (0) #endif +static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ /* * Statistics for memory cgroup. @@ -827,7 +828,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (!nr_retries--) { if (oom) { + mutex_lock(&memcg_tasklist); mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); + mutex_unlock(&memcg_tasklist); mem_over_limit->last_oom_jiffies = jiffies; } goto nomem; @@ -2211,10 +2214,12 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *old_cont, struct task_struct *p) { + mutex_lock(&memcg_tasklist); /* * FIXME: It's better to move charges of this process from old * memcg to new memcg. But it's just on TODO-List now. */ + mutex_unlock(&memcg_tasklist); } struct cgroup_subsys mem_cgroup_subsys = { diff --git a/mm/oom_kill.c b/mm/oom_kill.c index fd150e3a2567..40ba05061a4f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -429,7 +429,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) unsigned long points = 0; struct task_struct *p; - cgroup_lock(); read_lock(&tasklist_lock); retry: p = select_bad_process(&points, mem); @@ -444,7 +443,6 @@ retry: goto retry; out: read_unlock(&tasklist_lock); - cgroup_unlock(); } #endif -- cgit v1.2.3