From e0742ccf517f272c4617f4f871db52008e7ae359 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 27 Sep 2010 15:51:55 +1000 Subject: Revert "Merge remote branch 'hwpoison/hwpoison'" This reverts commit 4d55136a4ba430a172d9ae2c8520021976950d46, reversing changes made to 7a574be37ed9b93f5cba4583095bf88359c54c5e. --- Documentation/vm/page-types.c | 2 +- fs/hugetlbfs/inode.c | 15 --- include/linux/hugetlb.h | 11 +- include/linux/migrate.h | 16 --- mm/hugetlb.c | 228 +++++++++++++---------------------------- mm/memory-failure.c | 102 ++----------------- mm/migrate.c | 232 ++++-------------------------------------- mm/rmap.c | 25 +++-- 8 files changed, 116 insertions(+), 515 deletions(-) diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index cc96ee2666f2..ccd951fa94ee 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -478,7 +478,7 @@ static void prepare_hwpoison_fd(void) } if (opt_unpoison && !hwpoison_forget_fd) { - sprintf(buf, "%s/unpoison-pfn", hwpoison_debug_fs); + sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs); hwpoison_forget_fd = checked_open(buf, O_WRONLY); } } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a14328d270e8..113eba3d3c38 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -31,7 +31,6 @@ #include #include #include -#include #include @@ -574,19 +573,6 @@ static int hugetlbfs_set_page_dirty(struct page *page) return 0; } -static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) -{ - int rc; - - rc = migrate_huge_page_move_mapping(mapping, newpage, page); - if (rc) - return rc; - migrate_page_copy(newpage, page); - - return 0; -} - static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); @@ -673,7 +659,6 @@ static const struct address_space_operations hugetlbfs_aops = { .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, - .migratepage = hugetlbfs_migrate_page, }; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 796f30e00806..f479700df61b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -43,8 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, int acctflags); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); -int dequeue_hwpoisoned_huge_page(struct page *page); -void copy_huge_page(struct page *dst, struct page *src); +void __isolate_hwpoisoned_huge_page(struct page *page); extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; @@ -102,10 +101,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define huge_pte_offset(mm, address) 0 -#define dequeue_hwpoisoned_huge_page(page) 0 -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} +#define __isolate_hwpoisoned_huge_page(page) 0 #define hugetlb_change_protection(vma, address, end, newprot) @@ -232,8 +228,6 @@ struct huge_bootmem_page { struct hstate *hstate; }; -struct page *alloc_huge_page_node(struct hstate *h, int nid); - /* arch callback */ int __init alloc_bootmem_huge_page(struct hstate *h); @@ -309,7 +303,6 @@ static inline struct hstate *page_hstate(struct page *page) #else struct hstate {}; -#define alloc_huge_page_node(h, nid) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL #define hstate_vma(v) NULL diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3c1941e40e61..7238231b8dd4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -14,8 +14,6 @@ extern int migrate_page(struct address_space *, struct page *, struct page *); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, int offlining); -extern int migrate_huge_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -25,17 +23,12 @@ extern int migrate_prep_local(void); extern int migrate_vmas(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, unsigned long flags); -extern void migrate_page_copy(struct page *newpage, struct page *page); -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page); #else #define PAGE_MIGRATION 0 static inline void putback_lru_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, int offlining) { return -ENOSYS; } -static inline int migrate_huge_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } @@ -47,15 +40,6 @@ static inline int migrate_vmas(struct mm_struct *mm, return -ENOSYS; } -static inline void migrate_page_copy(struct page *newpage, - struct page *page) {} - -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) -{ - return -ENOSYS; -} - /* Possible settings for the migrate_page() method in address_operations */ #define migrate_page NULL #define fail_migrate_page NULL diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7123270bfb38..c03273807182 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page, } } -static void copy_user_gigantic_page(struct page *dst, struct page *src, +static void copy_gigantic_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); struct page *dst_base = dst; struct page *src_base = src; - + might_sleep(); for (i = 0; i < pages_per_huge_page(h); ) { cond_resched(); copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); @@ -440,15 +440,14 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src, src = mem_map_next(src, src_base, i); } } - -static void copy_user_huge_page(struct page *dst, struct page *src, +static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { - copy_user_gigantic_page(dst, src, addr, vma); + copy_gigantic_page(dst, src, addr, vma); return; } @@ -459,40 +458,6 @@ static void copy_user_huge_page(struct page *dst, struct page *src, } } -static void copy_gigantic_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < pages_per_huge_page(h); ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - - if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { - copy_gigantic_page(dst, src); - return; - } - - might_sleep(); - for (i = 0; i < pages_per_huge_page(h); i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); @@ -501,24 +466,11 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) h->free_huge_pages_node[nid]++; } -static struct page *dequeue_huge_page_node(struct hstate *h, int nid) -{ - struct page *page; - - if (list_empty(&h->hugepage_freelists[nid])) - return NULL; - page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); - list_del(&page->lru); - set_page_refcounted(page); - h->free_huge_pages--; - h->free_huge_pages_node[nid]--; - return page; -} - static struct page *dequeue_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address, int avoid_reserve) { + int nid; struct page *page = NULL; struct mempolicy *mpol; nodemask_t *nodemask; @@ -544,13 +496,19 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, for_each_zone_zonelist_nodemask(zone, z, zonelist, MAX_NR_ZONES - 1, nodemask) { - if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { - page = dequeue_huge_page_node(h, zone_to_nid(zone)); - if (page) { - if (!avoid_reserve) - decrement_hugepage_resv_vma(h, vma); - break; - } + nid = zone_to_nid(zone); + if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && + !list_empty(&h->hugepage_freelists[nid])) { + page = list_entry(h->hugepage_freelists[nid].next, + struct page, lru); + list_del(&page->lru); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; + + if (!avoid_reserve) + decrement_hugepage_resv_vma(h, vma); + + break; } } err: @@ -812,10 +770,11 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, return ret; } -static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) +static struct page *alloc_buddy_huge_page(struct hstate *h, + struct vm_area_struct *vma, unsigned long address) { struct page *page; - unsigned int r_nid; + unsigned int nid; if (h->order >= MAX_ORDER) return NULL; @@ -853,14 +812,9 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) } spin_unlock(&hugetlb_lock); - if (nid == NUMA_NO_NODE) - page = alloc_pages(htlb_alloc_mask|__GFP_COMP| - __GFP_REPEAT|__GFP_NOWARN, - huge_page_order(h)); - else - page = alloc_pages_exact_node(nid, - htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| - __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); + page = alloc_pages(htlb_alloc_mask|__GFP_COMP| + __GFP_REPEAT|__GFP_NOWARN, + huge_page_order(h)); if (page && arch_prepare_hugepage(page)) { __free_pages(page, huge_page_order(h)); @@ -869,13 +823,19 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) spin_lock(&hugetlb_lock); if (page) { - r_nid = page_to_nid(page); + /* + * This page is now managed by the hugetlb allocator and has + * no users -- drop the buddy allocator's reference. + */ + put_page_testzero(page); + VM_BUG_ON(page_count(page)); + nid = page_to_nid(page); set_compound_page_dtor(page, free_huge_page); /* * We incremented the global counters already */ - h->nr_huge_pages_node[r_nid]++; - h->surplus_huge_pages_node[r_nid]++; + h->nr_huge_pages_node[nid]++; + h->surplus_huge_pages_node[nid]++; __count_vm_event(HTLB_BUDDY_PGALLOC); } else { h->nr_huge_pages--; @@ -887,25 +847,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) return page; } -/* - * This allocation function is useful in the context where vma is irrelevant. - * E.g. soft-offlining uses this function because it only cares physical - * address of error page. - */ -struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - struct page *page; - - spin_lock(&hugetlb_lock); - page = dequeue_huge_page_node(h, nid); - spin_unlock(&hugetlb_lock); - - if (!page) - page = alloc_buddy_huge_page(h, nid); - - return page; -} - /* * Increase the hugetlb pool such that it can accomodate a reservation * of size 'delta'. @@ -930,14 +871,17 @@ static int gather_surplus_pages(struct hstate *h, int delta) retry: spin_unlock(&hugetlb_lock); for (i = 0; i < needed; i++) { - page = alloc_buddy_huge_page(h, NUMA_NO_NODE); - if (!page) + page = alloc_buddy_huge_page(h, NULL, 0); + if (!page) { /* * We were not able to allocate enough pages to * satisfy the entire reservation so we free what * we've allocated so far. */ + spin_lock(&hugetlb_lock); + needed = 0; goto free; + } list_add(&page->lru, &surplus_list); } @@ -964,31 +908,31 @@ retry: needed += allocated; h->resv_huge_pages += delta; ret = 0; - - spin_unlock(&hugetlb_lock); +free: /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) break; list_del(&page->lru); - /* - * This page is now managed by the hugetlb allocator and has - * no users -- drop the buddy allocator's reference. - */ - put_page_testzero(page); - VM_BUG_ON(page_count(page)); enqueue_huge_page(h, page); } /* Free unnecessary surplus pages to the buddy allocator */ -free: if (!list_empty(&surplus_list)) { + spin_unlock(&hugetlb_lock); list_for_each_entry_safe(page, tmp, &surplus_list, lru) { list_del(&page->lru); - put_page(page); + /* + * The page has a reference count of zero already, so + * call free_huge_page directly instead of using + * put_page. This must be done with hugetlb_lock + * unlocked which is safe because free_huge_page takes + * hugetlb_lock before deciding how to free the page. + */ + free_huge_page(page); } + spin_lock(&hugetlb_lock); } - spin_lock(&hugetlb_lock); return ret; } @@ -1108,13 +1052,14 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, spin_unlock(&hugetlb_lock); if (!page) { - page = alloc_buddy_huge_page(h, NUMA_NO_NODE); + page = alloc_buddy_huge_page(h, vma, addr); if (!page) { hugetlb_put_quota(inode->i_mapping, chg); return ERR_PTR(-VM_FAULT_SIGBUS); } } + set_page_refcounted(page); set_page_private(page, (unsigned long) mapping); vma_commit_reservation(h, vma, addr); @@ -2208,19 +2153,6 @@ nomem: return -ENOMEM; } -static int is_hugetlb_entry_migration(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return 0; - swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_migration_entry(swp)) { - return 1; - } else - return 0; -} - static int is_hugetlb_entry_hwpoisoned(pte_t pte) { swp_entry_t swp; @@ -2451,7 +2383,7 @@ retry_avoidcopy: if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - copy_user_huge_page(new_page, old_page, address, vma); + copy_huge_page(new_page, old_page, address, vma); __SetPageUptodate(new_page); /* @@ -2583,18 +2515,21 @@ retry: hugepage_add_new_anon_rmap(page, vma, address); } } else { - /* - * If memory error occurs between mmap() and fault, some process - * don't have hwpoisoned swap entry for errored virtual address. - * So we need to block hugepage fault by PG_hwpoison bit check. - */ - if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON; - goto backout_unlocked; - } page_dup_rmap(page); } + /* + * Since memory error handler replaces pte into hwpoison swap entry + * at the time of error handling, a process which reserved but not have + * the mapping to the error hugepage does not have hwpoison swap entry. + * So we need to block accesses from such a process by checking + * PG_hwpoison bit here. + */ + if (unlikely(PageHWPoison(page))) { + ret = VM_FAULT_HWPOISON; + goto backout_unlocked; + } + /* * If we are going to COW a private mapping later, we examine the * pending reservations for this page now. This will ensure that @@ -2652,10 +2587,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ptep = huge_pte_offset(mm, address); if (ptep) { entry = huge_ptep_get(ptep); - if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait(mm, (pmd_t *)ptep, address); - return 0; - } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) + if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON; } @@ -2946,40 +2878,18 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) hugetlb_acct_memory(h, -(chg - freed)); } -/* Should be called in hugetlb_lock */ -static int is_hugepage_on_freelist(struct page *hpage) -{ - struct page *page; - struct page *tmp; - struct hstate *h = page_hstate(hpage); - int nid = page_to_nid(hpage); - - list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru) - if (page == hpage) - return 1; - return 0; -} - -#ifdef CONFIG_MEMORY_FAILURE /* * This function is called from memory failure code. * Assume the caller holds page lock of the head page. */ -int dequeue_hwpoisoned_huge_page(struct page *hpage) +void __isolate_hwpoisoned_huge_page(struct page *hpage) { struct hstate *h = page_hstate(hpage); int nid = page_to_nid(hpage); - int ret = -EBUSY; spin_lock(&hugetlb_lock); - if (is_hugepage_on_freelist(hpage)) { - list_del(&hpage->lru); - set_page_refcounted(hpage); - h->free_huge_pages--; - h->free_huge_pages_node[nid]--; - ret = 0; - } + list_del(&hpage->lru); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; spin_unlock(&hugetlb_lock); - return ret; } -#endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 483a59fbff25..9c26eeca1342 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -693,10 +693,11 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) * Issues: * - Error on hugepage is contained in hugepage unit (not in raw page unit.) * To narrow down kill region to one page, we need to break up pmd. + * - To support soft-offlining for hugepage, we need to support hugepage + * migration. */ static int me_huge_page(struct page *p, unsigned long pfn) { - int res = 0; struct page *hpage = compound_head(p); /* * We can safely recover from error on free or reserved (i.e. @@ -709,9 +710,8 @@ static int me_huge_page(struct page *p, unsigned long pfn) * so there is no race between isolation and mapping/unmapping. */ if (!(page_mapping(hpage) || PageAnon(hpage))) { - res = dequeue_hwpoisoned_huge_page(hpage); - if (!res) - return RECOVERED; + __isolate_hwpoisoned_huge_page(hpage); + return RECOVERED; } return DELAYED; } @@ -981,10 +981,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) * We need/can do nothing about count=0 pages. * 1) it's a free page, and therefore in safe hand: * prep_new_page() will be the gate keeper. - * 2) it's a free hugepage, which is also safe: - * an affected hugepage will be dequeued from hugepage freelist, - * so there's no concern about reusing it ever after. - * 3) it's part of a non-compound high order page. + * 2) it's part of a non-compound high order page. * Implies some kernel user: cannot stop them from * R/W the page; let's pray that the page has been * used and will be freed some time later. @@ -996,24 +993,6 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) if (is_free_buddy_page(p)) { action_result(pfn, "free buddy", DELAYED); return 0; - } else if (PageHuge(hpage)) { - /* - * Check "just unpoisoned", "filter hit", and - * "race with other subpage." - */ - lock_page_nosync(hpage); - if (!PageHWPoison(hpage) - || (hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &mce_bad_pages); - return 0; - } - set_page_hwpoison_huge_page(hpage); - res = dequeue_hwpoisoned_huge_page(hpage); - action_result(pfn, "free huge", - res ? IGNORED : DELAYED); - unlock_page(hpage); - return res; } else { action_result(pfn, "high order kernel", IGNORED); return -EBUSY; @@ -1175,16 +1154,6 @@ int unpoison_memory(unsigned long pfn) nr_pages = 1 << compound_order(page); if (!get_page_unless_zero(page)) { - /* - * Since HWPoisoned hugepage should have non-zero refcount, - * race between memory failure and unpoison seems to happen. - * In such case unpoison fails and memory failure runs - * to the end. - */ - if (PageHuge(page)) { - pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); - return 0; - } if (TestClearPageHWPoison(p)) atomic_long_sub(nr_pages, &mce_bad_pages); pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn); @@ -1202,9 +1171,9 @@ int unpoison_memory(unsigned long pfn) pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn); atomic_long_sub(nr_pages, &mce_bad_pages); freeit = 1; - if (PageHuge(page)) - clear_page_hwpoison_huge_page(page); } + if (PageHuge(p)) + clear_page_hwpoison_huge_page(page); unlock_page(page); put_page(page); @@ -1218,11 +1187,7 @@ EXPORT_SYMBOL(unpoison_memory); static struct page *new_page(struct page *p, unsigned long private, int **x) { int nid = page_to_nid(p); - if (PageHuge(p)) - return alloc_huge_page_node(page_hstate(compound_head(p)), - nid); - else - return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); + return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); } /* @@ -1250,15 +1215,8 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) * was free. */ set_migratetype_isolate(p); - /* - * When the target page is a free hugepage, just remove it - * from free hugepage list. - */ if (!get_page_unless_zero(compound_head(p))) { - if (PageHuge(p)) { - pr_debug("get_any_page: %#lx free huge page\n", pfn); - ret = dequeue_hwpoisoned_huge_page(compound_head(p)); - } else if (is_free_buddy_page(p)) { + if (is_free_buddy_page(p)) { pr_debug("get_any_page: %#lx free buddy page\n", pfn); /* Set hwpoison bit while page is still isolated */ SetPageHWPoison(p); @@ -1277,45 +1235,6 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) return ret; } -static int soft_offline_huge_page(struct page *page, int flags) -{ - int ret; - unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_head(page); - LIST_HEAD(pagelist); - - ret = get_any_page(page, pfn, flags); - if (ret < 0) - return ret; - if (ret == 0) - goto done; - - if (PageHWPoison(hpage)) { - put_page(hpage); - pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); - return -EBUSY; - } - - /* Keep page count to indicate a given hugepage is isolated. */ - - list_add(&hpage->lru, &pagelist); - ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); - if (ret) { - pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", - pfn, ret, page->flags); - if (ret > 0) - ret = -EIO; - return ret; - } -done: - if (!PageHWPoison(hpage)) - atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - /* keep elevated page count for bad page */ - return ret; -} - /** * soft_offline_page - Soft offline a page. * @page: page to offline @@ -1343,9 +1262,6 @@ int soft_offline_page(struct page *page, int flags) int ret; unsigned long pfn = page_to_pfn(page); - if (PageHuge(page)) - return soft_offline_huge_page(page, flags); - ret = get_any_page(page, pfn, flags); if (ret < 0) return ret; diff --git a/mm/migrate.c b/mm/migrate.c index 55dbc45880c6..38e7cad782f4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include "internal.h" @@ -96,34 +95,26 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, pte_t *ptep, pte; spinlock_t *ptl; - if (unlikely(PageHuge(new))) { - ptep = huge_pte_offset(mm, addr); - if (!ptep) - goto out; - ptl = &mm->page_table_lock; - } else { - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) - goto out; + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + goto out; - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - goto out; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + goto out; - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + goto out; - ptep = pte_offset_map(pmd, addr); + ptep = pte_offset_map(pmd, addr); - if (!is_swap_pte(*ptep)) { - pte_unmap(ptep); - goto out; - } - - ptl = pte_lockptr(mm, pmd); - } + if (!is_swap_pte(*ptep)) { + pte_unmap(ptep); + goto out; + } + ptl = pte_lockptr(mm, pmd); spin_lock(ptl); pte = *ptep; if (!is_swap_pte(pte)) @@ -139,17 +130,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); if (is_write_migration_entry(entry)) pte = pte_mkwrite(pte); - if (PageHuge(new)) - pte = pte_mkhuge(pte); flush_cache_page(vma, addr, pte_pfn(pte)); set_pte_at(mm, addr, ptep, pte); - if (PageHuge(new)) { - if (PageAnon(new)) - hugepage_add_anon_rmap(new, vma, addr); - else - page_dup_rmap(new); - } else if (PageAnon(new)) + if (PageAnon(new)) page_add_anon_rmap(new, vma, addr); else page_add_file_rmap(new); @@ -291,60 +275,12 @@ static int migrate_page_move_mapping(struct address_space *mapping, return 0; } -/* - * The expected number of remaining references is the same as that - * of migrate_page_move_mapping(). - */ -int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) -{ - int expected_count; - void **pslot; - - if (!mapping) { - if (page_count(page) != 1) - return -EAGAIN; - return 0; - } - - spin_lock_irq(&mapping->tree_lock); - - pslot = radix_tree_lookup_slot(&mapping->page_tree, - page_index(page)); - - expected_count = 2 + page_has_private(page); - if (page_count(page) != expected_count || - (struct page *)radix_tree_deref_slot(pslot) != page) { - spin_unlock_irq(&mapping->tree_lock); - return -EAGAIN; - } - - if (!page_freeze_refs(page, expected_count)) { - spin_unlock_irq(&mapping->tree_lock); - return -EAGAIN; - } - - get_page(newpage); - - radix_tree_replace_slot(pslot, newpage); - - page_unfreeze_refs(page, expected_count); - - __put_page(page); - - spin_unlock_irq(&mapping->tree_lock); - return 0; -} - /* * Copy the page to its new location */ -void migrate_page_copy(struct page *newpage, struct page *page) +static void migrate_page_copy(struct page *newpage, struct page *page) { - if (PageHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); + copy_highpage(newpage, page); if (PageError(page)) SetPageError(newpage); @@ -787,92 +723,6 @@ move_newpage: return rc; } -/* - * Counterpart of unmap_and_move_page() for hugepage migration. - * - * This function doesn't wait the completion of hugepage I/O - * because there is no race between I/O and migration for hugepage. - * Note that currently hugepage I/O occurs only in direct I/O - * where no lock is held and PG_writeback is irrelevant, - * and writeback status of all subpages are counted in the reference - * count of the head page (i.e. if all subpages of a 2MB hugepage are - * under direct I/O, the reference of the head page is 512 and a bit more.) - * This means that when we try to migrate hugepage whose subpages are - * doing direct I/O, some references remain after try_to_unmap() and - * hugepage migration fails without data corruption. - * - * There is also no race when direct I/O is issued on the page under migration, - * because then pte is replaced with migration swap entry and direct I/O code - * will wait in the page fault for migration to complete. - */ -static int unmap_and_move_huge_page(new_page_t get_new_page, - unsigned long private, struct page *hpage, - int force, int offlining) -{ - int rc = 0; - int *result = NULL; - struct page *new_hpage = get_new_page(hpage, private, &result); - int rcu_locked = 0; - struct anon_vma *anon_vma = NULL; - - if (!new_hpage) - return -ENOMEM; - - rc = -EAGAIN; - - if (!trylock_page(hpage)) { - if (!force) - goto out; - lock_page(hpage); - } - - if (PageAnon(hpage)) { - rcu_read_lock(); - rcu_locked = 1; - - if (page_mapped(hpage)) { - anon_vma = page_anon_vma(hpage); - atomic_inc(&anon_vma->external_refcount); - } - } - - try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); - - if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, 1); - - if (rc) - remove_migration_ptes(hpage, hpage); - - if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, - &anon_vma->lock)) { - int empty = list_empty(&anon_vma->head); - spin_unlock(&anon_vma->lock); - if (empty) - anon_vma_free(anon_vma); - } - - if (rcu_locked) - rcu_read_unlock(); -out: - unlock_page(hpage); - - if (rc != -EAGAIN) { - list_del(&hpage->lru); - put_page(hpage); - } - - put_page(new_hpage); - - if (result) { - if (rc) - *result = rc; - else - *result = page_to_nid(new_hpage); - } - return rc; -} - /* * migrate_pages * @@ -938,52 +788,6 @@ out: return nr_failed + retry; } -int migrate_huge_pages(struct list_head *from, - new_page_t get_new_page, unsigned long private, int offlining) -{ - int retry = 1; - int nr_failed = 0; - int pass = 0; - struct page *page; - struct page *page2; - int rc; - - for (pass = 0; pass < 10 && retry; pass++) { - retry = 0; - - list_for_each_entry_safe(page, page2, from, lru) { - cond_resched(); - - rc = unmap_and_move_huge_page(get_new_page, - private, page, pass > 2, offlining); - - switch(rc) { - case -ENOMEM: - goto out; - case -EAGAIN: - retry++; - break; - case 0: - break; - default: - /* Permanent failure */ - nr_failed++; - break; - } - } - } - rc = 0; -out: - - list_for_each_entry_safe(page, page2, from, lru) - put_page(page); - - if (rc) - return rc; - - return nr_failed + retry; -} - #ifdef CONFIG_NUMA /* * Move a list of individual pages diff --git a/mm/rmap.c b/mm/rmap.c index 1ad201707d05..87343122f390 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -774,10 +774,10 @@ void page_move_anon_rmap(struct page *page, } /** - * __page_set_anon_rmap - set up new anonymous rmap - * @page: Page to add to rmap - * @vma: VM area to add page to. - * @address: User virtual address of the mapping + * __page_set_anon_rmap - setup new anonymous rmap + * @page: the page to add the mapping to + * @vma: the vm area in which the mapping is added + * @address: the user virtual address mapped * @exclusive: the page is exclusively owned by the current process */ static void __page_set_anon_rmap(struct page *page, @@ -787,16 +787,25 @@ static void __page_set_anon_rmap(struct page *page, BUG_ON(!anon_vma); - if (PageAnon(page)) - return; - /* * If the page isn't exclusively mapped into this vma, * we must use the _oldest_ possible anon_vma for the * page mapping! */ - if (!exclusive) + if (!exclusive) { + if (PageAnon(page)) + return; anon_vma = anon_vma->root; + } else { + /* + * In this case, swapped-out-but-not-discarded swap-cache + * is remapped. So, no need to update page->mapping here. + * We convice anon_vma poitned by page->mapping is not obsolete + * because vma->anon_vma is necessary to be a family of it. + */ + if (PageAnon(page)) + return; + } anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; -- cgit v1.2.3