summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2010-08-02 20:41:17 +0200
committerAndrea Arcangeli <aarcange@redhat.com>2010-08-02 18:43:12 +0000
commit114214fdcf0bdebee70103f1c1807ffe8f04ad56 (patch)
tree127104b90b85a25cd5801728b702c0178c0d61a9
parentbd0e18c73a2af88d782144f3db5078b123ede569 (diff)
split_huge_page paging
Paging logic that splits the page before it is unmapped and added to swap to ensure backwards compatibility with the legacy swap code. Eventually swap should natively pageout the hugepages to increase performance and decrease seeking and fragmentation of swap space. swapoff can just skip over huge pmd as they cannot be part of swap yet. In add_to_swap be careful to split the page only if we got a valid swap entry so we don't split hugepages with a full swap. In theory we could split pages before isolating them during the lru scan, but for khugepaged to be safe, I'm relying on either mmap_sem write mode, or PG_lock taken, so split_huge_page has to run either with mmap_sem read/write mode or PG_lock taken. Calling it from isolate_lru_page would make locking more complicated, in addition to that split_huge_page would deadlock if called by __isolate_lru_page because it has to take the lru lock to add the tail pages. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com>
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/swap_state.c6
-rw-r--r--mm/swapfile.c2
4 files changed, 11 insertions, 0 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 620b0b461593..be8fb2e44944 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -379,6 +379,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
struct task_struct *tsk;
struct anon_vma *av;
+ if (unlikely(split_huge_page(page)))
+ return;
read_lock(&tasklist_lock);
av = page_lock_anon_vma(page);
if (av == NULL) /* Not actually mapped anymore */
diff --git a/mm/rmap.c b/mm/rmap.c
index 680bdc933d0f..ae4ff0269bb3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1329,6 +1329,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
int ret;
BUG_ON(!PageLocked(page));
+ BUG_ON(PageTransHuge(page));
if (unlikely(PageKsm(page)))
ret = try_to_unmap_ksm(page, flags);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e10f5833167f..5c8cfabbc9bc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -157,6 +157,12 @@ int add_to_swap(struct page *page)
if (!entry.val)
return 0;
+ if (unlikely(PageTransHuge(page)))
+ if (unlikely(split_huge_page(page))) {
+ swapcache_free(entry, NULL);
+ return 0;
+ }
+
/*
* Radix-tree node allocations from PF_MEMALLOC contexts could
* completely exhaust the page allocator. __GFP_NOMEMALLOC
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 03aa2d55f1a2..0cb815c6bd5d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -944,6 +944,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (unlikely(pmd_trans_huge(*pmd)))
+ continue;
if (pmd_none_or_clear_bad(pmd))
continue;
ret = unuse_pte_range(vma, pmd, addr, next, entry, page);