From 64d37a2baf5e5c0f1009c0ef290a9027de721d66 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 15 Apr 2015 16:13:05 -0700 Subject: mm/memory-failure.c: define page types for action_result() in one place This cleanup patch moves all strings passed to action_result() into a singl= e array action_page_type so that a reader can easily find which kind of actio= n results are possible. And this patch also fixes the odd lines to be printed out, like "unknown page state page" or "free buddy, 2nd try page". [akpm@linux-foundation.org: rename messages, per David] [akpm@linux-foundation.org: s/DIRTY_UNEVICTABLE_LRU/CLEAN_UNEVICTABLE_LRU', per Andi] Signed-off-by: Naoya Horiguchi Reviewed-by: Andi Kleen Cc: Tony Luck Cc: "Xie XiuQi" Cc: Steven Rostedt Cc: Chen Gong Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 108 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 31 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d487f8dc6d39..5fd8931d8c31 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -521,6 +521,52 @@ static const char *action_name[] = { [RECOVERED] = "Recovered", }; +enum action_page_type { + MSG_KERNEL, + MSG_KERNEL_HIGH_ORDER, + MSG_SLAB, + MSG_DIFFERENT_COMPOUND, + MSG_POISONED_HUGE, + MSG_HUGE, + MSG_FREE_HUGE, + MSG_UNMAP_FAILED, + MSG_DIRTY_SWAPCACHE, + MSG_CLEAN_SWAPCACHE, + MSG_DIRTY_MLOCKED_LRU, + MSG_CLEAN_MLOCKED_LRU, + MSG_DIRTY_UNEVICTABLE_LRU, + MSG_CLEAN_UNEVICTABLE_LRU, + MSG_DIRTY_LRU, + MSG_CLEAN_LRU, + MSG_TRUNCATED_LRU, + MSG_BUDDY, + MSG_BUDDY_2ND, + MSG_UNKNOWN, +}; + +static const char * const action_page_types[] = { + [MSG_KERNEL] = "reserved kernel page", + [MSG_KERNEL_HIGH_ORDER] = "high-order kernel page", + [MSG_SLAB] = "kernel slab page", + [MSG_DIFFERENT_COMPOUND] = "different compound page after locking", + [MSG_POISONED_HUGE] = "huge page already hardware poisoned", + [MSG_HUGE] = "huge page", + [MSG_FREE_HUGE] = "free huge page", + [MSG_UNMAP_FAILED] = "unmapping failed page", + [MSG_DIRTY_SWAPCACHE] = "dirty swapcache page", + [MSG_CLEAN_SWAPCACHE] = "clean swapcache page", + [MSG_DIRTY_MLOCKED_LRU] = "dirty mlocked LRU page", + [MSG_CLEAN_MLOCKED_LRU] = "clean mlocked LRU page", + [MSG_DIRTY_UNEVICTABLE_LRU] = "dirty unevictable LRU page", + [MSG_CLEAN_UNEVICTABLE_LRU] = "clean unevictable LRU page", + [MSG_DIRTY_LRU] = "dirty LRU page", + [MSG_CLEAN_LRU] = "clean LRU page", + [MSG_TRUNCATED_LRU] = "already truncated LRU page", + [MSG_BUDDY] = "free buddy page", + [MSG_BUDDY_2ND] = "free buddy page (2nd try)", + [MSG_UNKNOWN] = "unknown page", +}; + /* * XXX: It is possible that a page is isolated from LRU cache, * and then kept in swap cache or failed to remove from page cache. @@ -777,10 +823,10 @@ static int me_huge_page(struct page *p, unsigned long pfn) static struct page_state { unsigned long mask; unsigned long res; - char *msg; + enum action_page_type type; int (*action)(struct page *p, unsigned long pfn); } error_states[] = { - { reserved, reserved, "reserved kernel", me_kernel }, + { reserved, reserved, MSG_KERNEL, me_kernel }, /* * free pages are specially detected outside this table: * PG_buddy pages only make a small fraction of all free pages. @@ -791,31 +837,31 @@ static struct page_state { * currently unused objects without touching them. But just * treat it as standard kernel for now. */ - { slab, slab, "kernel slab", me_kernel }, + { slab, slab, MSG_SLAB, me_kernel }, #ifdef CONFIG_PAGEFLAGS_EXTENDED - { head, head, "huge", me_huge_page }, - { tail, tail, "huge", me_huge_page }, + { head, head, MSG_HUGE, me_huge_page }, + { tail, tail, MSG_HUGE, me_huge_page }, #else - { compound, compound, "huge", me_huge_page }, + { compound, compound, MSG_HUGE, me_huge_page }, #endif - { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, - { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, + { sc|dirty, sc|dirty, MSG_DIRTY_SWAPCACHE, me_swapcache_dirty }, + { sc|dirty, sc, MSG_CLEAN_SWAPCACHE, me_swapcache_clean }, - { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, - { mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean }, + { mlock|dirty, mlock|dirty, MSG_DIRTY_MLOCKED_LRU, me_pagecache_dirty }, + { mlock|dirty, mlock, MSG_CLEAN_MLOCKED_LRU, me_pagecache_clean }, - { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, - { unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean }, + { unevict|dirty, unevict|dirty, MSG_DIRTY_UNEVICTABLE_LRU, me_pagecache_dirty }, + { unevict|dirty, unevict, MSG_CLEAN_UNEVICTABLE_LRU, me_pagecache_clean }, - { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, - { lru|dirty, lru, "clean LRU", me_pagecache_clean }, + { lru|dirty, lru|dirty, MSG_DIRTY_LRU, me_pagecache_dirty }, + { lru|dirty, lru, MSG_CLEAN_LRU, me_pagecache_clean }, /* * Catchall entry: must be at end. */ - { 0, 0, "unknown page state", me_unknown }, + { 0, 0, MSG_UNKNOWN, me_unknown }, }; #undef dirty @@ -835,10 +881,10 @@ static struct page_state { * "Dirty/Clean" indication is not 100% accurate due to the possibility of * setting PG_dirty outside page lock. See also comment above set_page_dirty(). */ -static void action_result(unsigned long pfn, char *msg, int result) +static void action_result(unsigned long pfn, enum action_page_type type, int result) { - pr_err("MCE %#lx: %s page recovery: %s\n", - pfn, msg, action_name[result]); + pr_err("MCE %#lx: recovery action for %s: %s\n", + pfn, action_page_types[type], action_name[result]); } static int page_action(struct page_state *ps, struct page *p, @@ -854,11 +900,11 @@ static int page_action(struct page_state *ps, struct page *p, count--; if (count != 0) { printk(KERN_ERR - "MCE %#lx: %s page still referenced by %d users\n", - pfn, ps->msg, count); + "MCE %#lx: %s still referenced by %d users\n", + pfn, action_page_types[ps->type], count); result = FAILED; } - action_result(pfn, ps->msg, result); + action_result(pfn, ps->type, result); /* Could do more checks here if page looks ok */ /* @@ -1106,7 +1152,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) if (!(flags & MF_COUNT_INCREASED) && !get_page_unless_zero(hpage)) { if (is_free_buddy_page(p)) { - action_result(pfn, "free buddy", DELAYED); + action_result(pfn, MSG_BUDDY, DELAYED); return 0; } else if (PageHuge(hpage)) { /* @@ -1123,12 +1169,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } set_page_hwpoison_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage); - action_result(pfn, "free huge", + action_result(pfn, MSG_FREE_HUGE, res ? IGNORED : DELAYED); unlock_page(hpage); return res; } else { - action_result(pfn, "high order kernel", IGNORED); + action_result(pfn, MSG_KERNEL_HIGH_ORDER, IGNORED); return -EBUSY; } } @@ -1150,9 +1196,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (is_free_buddy_page(p)) { if (flags & MF_COUNT_INCREASED) - action_result(pfn, "free buddy", DELAYED); + action_result(pfn, MSG_BUDDY, DELAYED); else - action_result(pfn, "free buddy, 2nd try", DELAYED); + action_result(pfn, MSG_BUDDY_2ND, + DELAYED); return 0; } } @@ -1165,7 +1212,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * If this happens just bail out. */ if (compound_head(p) != hpage) { - action_result(pfn, "different compound page after locking", IGNORED); + action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED); res = -EBUSY; goto out; } @@ -1205,8 +1252,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * on the head page to show that the hugepage is hwpoisoned */ if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { - action_result(pfn, "hugepage already hardware poisoned", - IGNORED); + action_result(pfn, MSG_POISONED_HUGE, IGNORED); unlock_page(hpage); put_page(hpage); return 0; @@ -1235,7 +1281,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) != SWAP_SUCCESS) { - action_result(pfn, "unmapping failed", IGNORED); + action_result(pfn, MSG_UNMAP_FAILED, IGNORED); res = -EBUSY; goto out; } @@ -1244,7 +1290,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * Torn down by someone else? */ if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { - action_result(pfn, "already truncated LRU", IGNORED); + action_result(pfn, MSG_TRUNCATED_LRU, IGNORED); res = -EBUSY; goto out; } -- cgit v1.2.3 From bcc54222309c70ebcb6c69c156fba4a13dee0a3b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 15 Apr 2015 16:14:38 -0700 Subject: mm: hugetlb: introduce page_huge_active We are not safe from calling isolate_huge_page() on a hugepage concurrently, which can make the victim hugepage in invalid state and results in BUG_ON(). The root problem of this is that we don't have any information on struct page (so easily accessible) about hugepages' activeness. Note that hugepages' activeness means just being linked to hstate->hugepage_activelist, which is not the same as normal pages' activeness represented by PageActive flag. Normal pages are isolated by isolate_lru_page() which prechecks PageLRU before isolation, so let's do similarly for hugetlb with a new paeg_huge_active(). set/clear_page_huge_active() should be called within hugetlb_lock. But hugetlb_cow() and hugetlb_no_page() don't do this, being justified because in these functions set_page_huge_active() is called right after the hugepage is allocated and no other thread tries to isolate it. [akpm@linux-foundation.org: s/PageHugeActive/page_huge_active/, make it return bool] [fengguang.wu@intel.com: set_page_huge_active() can be static] Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Reviewed-by: Michal Hocko Cc: Mel Gorman Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 41 ++++++++++++++++++++++++++++++++++++++--- mm/memory-failure.c | 14 ++++++++++++-- 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 995c8d65a95c..05407831016b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -924,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size) return NULL; } +/* + * Test to determine whether the hugepage is "active/in-use" (i.e. being linked + * to hstate->hugepage_activelist.) + * + * This function can be called for tail pages, but never returns true for them. + */ +bool page_huge_active(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHuge(page), page); + return PageHead(page) && PagePrivate(&page[1]); +} + +/* never called for tail page */ +static void set_page_huge_active(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHeadHuge(page), page); + SetPagePrivate(&page[1]); +} + +static void clear_page_huge_active(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHeadHuge(page), page); + ClearPagePrivate(&page[1]); +} + void free_huge_page(struct page *page) { /* @@ -952,6 +977,7 @@ void free_huge_page(struct page *page) restore_reserve = true; spin_lock(&hugetlb_lock); + clear_page_huge_active(page); hugetlb_cgroup_uncharge_page(hstate_index(h), pages_per_huge_page(h), page); if (restore_reserve) @@ -2972,6 +2998,7 @@ retry_avoidcopy: copy_user_huge_page(new_page, old_page, address, vma, pages_per_huge_page(h)); __SetPageUptodate(new_page); + set_page_huge_active(new_page); mmun_start = address & huge_page_mask(h); mmun_end = mmun_start + huge_page_size(h); @@ -3084,6 +3111,7 @@ retry: } clear_huge_page(page, address, pages_per_huge_page(h)); __SetPageUptodate(page); + set_page_huge_active(page); if (vma->vm_flags & VM_MAYSHARE) { int err; @@ -3913,19 +3941,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage) bool isolate_huge_page(struct page *page, struct list_head *list) { + bool ret = true; + VM_BUG_ON_PAGE(!PageHead(page), page); - if (!get_page_unless_zero(page)) - return false; spin_lock(&hugetlb_lock); + if (!page_huge_active(page) || !get_page_unless_zero(page)) { + ret = false; + goto unlock; + } + clear_page_huge_active(page); list_move_tail(&page->lru, list); +unlock: spin_unlock(&hugetlb_lock); - return true; + return ret; } void putback_active_hugepage(struct page *page) { VM_BUG_ON_PAGE(!PageHead(page), page); spin_lock(&hugetlb_lock); + set_page_huge_active(page); list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); spin_unlock(&hugetlb_lock); put_page(page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5fd8931d8c31..d9359b770cd9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1586,8 +1586,18 @@ static int soft_offline_huge_page(struct page *page, int flags) } unlock_page(hpage); - /* Keep page count to indicate a given hugepage is isolated. */ - list_move(&hpage->lru, &pagelist); + ret = isolate_huge_page(hpage, &pagelist); + if (ret) { + /* + * get_any_page() and isolate_huge_page() takes a refcount each, + * so need to drop one here. + */ + put_page(hpage); + } else { + pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); + return -EBUSY; + } + ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { -- cgit v1.2.3