From 1e11ad8dc42975d5c2bab7d478f6cd875602eda4 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 8 Jun 2012 13:21:26 -0700 Subject: mm, oom: fix badness score underflow If the privileges given to root threads (3% of allowable memory) or a negative value of /proc/pid/oom_score_adj happen to exceed the amount of rss of a thread, its badness score overflows as a result of commit a7f638f999ff ("mm, oom: normalize oom scores to oom_score_adj scale only for userspace"). Fix this by making the type signed and return 1, meaning the thread is still eligible for kill, if the value is negative. Reported-by: Dave Jones Acked-by: Oleg Nesterov Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ed0e19677360..416637f0e924 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -183,7 +183,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { - unsigned long points; + long points; if (oom_unkillable_task(p, memcg, nodemask)) return 0; @@ -223,7 +223,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ - return points ? points : 1; + return points > 0 ? points : 1; } /* -- cgit v1.2.3 From 61eafb00d55dfbccdfce543c6b60e369ff4f8f18 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 20 Jun 2012 12:52:58 -0700 Subject: mm, oom: fix and cleanup oom score calculations The divide in p->signal->oom_score_adj * totalpages / 1000 within oom_badness() was causing an overflow of the signed long data type. This adds both the root bias and p->signal->oom_score_adj before doing the normalization which fixes the issue and also cleans up the calculation. Tested-by: Dave Jones Signed-off-by: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 416637f0e924..77775138e930 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -184,6 +184,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; + long adj; if (oom_unkillable_task(p, memcg, nodemask)) return 0; @@ -192,7 +193,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, if (!p) return 0; - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { + adj = p->signal->oom_score_adj; + if (adj == OOM_SCORE_ADJ_MIN) { task_unlock(p); return 0; } @@ -210,14 +212,11 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) - points -= 30 * totalpages / 1000; + adj -= 30; - /* - * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may - * either completely disable oom killing or always prefer a certain - * task. - */ - points += p->signal->oom_score_adj * totalpages / 1000; + /* Normalize to oom_score_adj units */ + adj *= totalpages / 1000; + points += adj; /* * Never return 0 for an eligible task regardless of the root bonus and -- cgit v1.2.3 From dad7557eb705688040aac134efa5418b66d5ed92 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 20 Jun 2012 12:53:01 -0700 Subject: mm: fix kernel-doc warnings Fix kernel-doc warnings such as Warning(../mm/page_cgroup.c:432): No description found for parameter 'id' Warning(../mm/page_cgroup.c:432): Excess function parameter 'mem' description in 'swap_cgroup_record' Signed-off-by: Wanpeng Li Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 12 ++++++------ mm/memcontrol.c | 4 ++-- mm/oom_kill.c | 2 +- mm/page_cgroup.c | 4 ++-- mm/pagewalk.c | 1 - mm/percpu-vm.c | 1 - 6 files changed, 11 insertions(+), 13 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/memblock.c b/mm/memblock.c index 32a0a5e4d79d..b65b687e2362 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -540,9 +540,9 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) * __next_free_mem_range - next function for for_each_free_mem_range() * @idx: pointer to u64 loop variable * @nid: nid: node selector, %MAX_NUMNODES for all nodes - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * @p_nid: ptr to int for nid of the range, can be %NULL + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL * * Find the first free area from *@idx which matches @nid, fill the out * parameters, and update *@idx for the next iteration. The lower 32bit of @@ -616,9 +616,9 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid, * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() * @idx: pointer to u64 loop variable * @nid: nid: node selector, %MAX_NUMNODES for all nodes - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * @p_nid: ptr to int for nid of the range, can be %NULL + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL * * Reverse of __next_free_mem_range(). */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f8517cd28feb..f72b5e52451a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1234,7 +1234,7 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) /** * mem_cgroup_margin - calculate chargeable space of a memory cgroup - * @mem: the memory cgroup + * @memcg: the memory cgroup * * Returns the maximum amount of memory @mem can be charged with, in * pages. @@ -1508,7 +1508,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, /** * test_mem_cgroup_node_reclaimable - * @mem: the target memcg + * @memcg: the target memcg * @nid: the node ID to be checked. * @noswap : specify true here if the user wants flle only information. * diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 77775138e930..ac300c99baf6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -365,7 +365,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, /** * dump_tasks - dump current memory state of all system tasks - * @mem: current's memory controller, if constrained + * @memcg: current's memory controller, if constrained * @nodemask: nodemask passed to page allocator for mempolicy ooms * * Dumps the current memory state of all eligible tasks. Tasks not in the same diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 1ccbd714059c..eb750f851395 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -392,7 +392,7 @@ static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, /** * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. - * @end: swap entry to be cmpxchged + * @ent: swap entry to be cmpxchged * @old: old id * @new: new id * @@ -422,7 +422,7 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, /** * swap_cgroup_record - record mem_cgroup for this swp_entry. * @ent: swap entry to be recorded into - * @mem: mem_cgroup to be recorded + * @id: mem_cgroup to be recorded * * Returns old value at success, 0 at failure. * (Of course, old value can be 0.) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index aa9701e12714..6c118d012bb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -162,7 +162,6 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, /** * walk_page_range - walk a memory map's page tables with a callback - * @mm: memory map to walk * @addr: starting address * @end: ending address * @walk: set of callbacks to invoke for each level of the tree diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 405d331804c3..3707c71ae4cd 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -360,7 +360,6 @@ err_free: * @chunk: chunk to depopulate * @off: offset to the area to depopulate * @size: size of the area to depopulate in bytes - * @flush: whether to flush cache and tlb or not * * For each cpu, depopulate and unmap pages [@page_start,@page_end) * from @chunk. If @flush is true, vcache is flushed before unmapping -- cgit v1.2.3