diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 04c9143a8625..dee0f75c3013 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -40,6 +40,7 @@ #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/init.h> +#include <linux/mmu_notifier.h> #include <asm/tlb.h> #include "internal.h" @@ -490,20 +491,40 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) if (!down_read_trylock(&mm->mmap_sem)) { ret = false; + trace_skip_task_reaping(tsk->pid); goto unlock_oom; } /* - * increase mm_users only after we know we will reap something so - * that the mmput_async is called only when we have reaped something - * and delayed __mmput doesn't matter that much + * If the mm has notifiers then we would need to invalidate them around + * unmap_page_range and that is risky because notifiers can sleep and + * what they do is basically undeterministic. So let's have a short + * sleep to give the oom victim some more time. + * TODO: we really want to get rid of this ugly hack and make sure that + * notifiers cannot block for unbounded amount of time and add + * mmu_notifier_invalidate_range_{start,end} around unmap_page_range */ - if (!mmget_not_zero(mm)) { + if (mm_has_notifiers(mm)) { up_read(&mm->mmap_sem); + schedule_timeout_idle(HZ); goto unlock_oom; } /* + * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't + * work on the mm anymore. The check for MMF_OOM_SKIP must run + * under mmap_sem for reading because it serializes against the + * down_write();up_write() cycle in exit_mmap(). + */ + if (test_bit(MMF_OOM_SKIP, &mm->flags)) { + up_read(&mm->mmap_sem); + trace_skip_task_reaping(tsk->pid); + goto unlock_oom; + } + + trace_start_task_reaping(tsk->pid); + + /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault @@ -538,12 +559,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) K(get_mm_counter(mm, MM_SHMEMPAGES))); up_read(&mm->mmap_sem); - /* - * Drop our reference but make sure the mmput slow path is called from a - * different context because we shouldn't risk we get stuck there and - * put the oom_reaper out of the way. - */ - mmput_async(mm); + trace_finish_task_reaping(tsk->pid); unlock_oom: mutex_unlock(&oom_lock); return ret; @@ -615,6 +631,7 @@ static void wake_oom_reaper(struct task_struct *tsk) tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; spin_unlock(&oom_reaper_lock); + trace_wake_reaper(tsk->pid); wake_up(&oom_reaper_wait); } @@ -666,6 +683,7 @@ static void mark_oom_victim(struct task_struct *tsk) */ __thaw_task(tsk); atomic_inc(&oom_victims); + trace_mark_victim(tsk->pid); } /** @@ -817,7 +835,8 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* * If the task is already exiting, don't alarm the sysadmin or kill - * its children or threads, just set TIF_MEMDIE so it can die quickly + * its children or threads, just give it access to memory reserves + * so it can die quickly */ task_lock(p); if (task_will_free_mem(p)) { @@ -876,10 +895,15 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; mmgrab(mm); + + /* Raise event before sending signal: task reaper must see this */ + count_vm_event(OOM_KILL); + count_memcg_event_mm(mm, OOM_KILL); + /* - * We should send SIGKILL before setting TIF_MEMDIE in order to prevent - * the OOM victim from depleting the memory reserves from the user - * space under its control. + * We should send SIGKILL before granting access to memory reserves + * in order to prevent the OOM victim from depleting the memory + * reserves from the user space under its control. */ do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); mark_oom_victim(victim); |