summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorMatthew Wilcox (Oracle) <willy@infradead.org>2020-06-27 22:19:08 -0400
committerMatthew Wilcox (Oracle) <willy@infradead.org>2020-10-26 14:17:18 -0400
commitd7b0d86188965c9152eed8edc55027479bd722e9 (patch)
tree8fcd7b4eb7ee1b13e4b493ea69994dd992d31881 /mm
parentb1cbfaa28309a0033d3d81a763cbd4911ef58b50 (diff)
mm: Use multi-index entries in the page cache
We currently store order-N THPs as 2^N consecutive entries. While this consumes rather more memory than necessary, it also turns out to be buggy. A writeback operation which starts in the middle of a dirty THP will not notice as the dirty bit is only set on the head index. With multi-index entries, the dirty bit will be found no matter where in the THP the iteration starts. This does end up simplifying the page cache slightly, although not as much as I had hoped. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c68
-rw-r--r--mm/huge_memory.c20
-rw-r--r--mm/khugepaged.c12
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/shmem.c11
-rw-r--r--mm/truncate.c2
6 files changed, 74 insertions, 47 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 00de12d42bc4..2985408019fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -127,13 +127,12 @@ static void page_cache_delete(struct address_space *mapping,
/* hugetlb pages are represented by a single entry in the xarray */
if (!PageHuge(page)) {
- xas_set_order(&xas, page->index, compound_order(page));
- nr = compound_nr(page);
+ xas_set_order(&xas, page->index, thp_order(page));
+ nr = thp_nr_pages(page);
}
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(nr != 1 && shadow, page);
xas_store(&xas, shadow);
xas_init_marks(&xas);
@@ -311,19 +310,12 @@ static void page_cache_delete_batch(struct address_space *mapping,
WARN_ON_ONCE(!PageLocked(page));
- if (page->index == xas.xa_index)
- page->mapping = NULL;
+ page->mapping = NULL;
/* Leave page->index set: truncation lookup relies on it */
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + compound_nr(page) - 1 == xas.xa_index)
- i++;
+ i++;
xas_store(&xas, NULL);
- total_pages++;
+ total_pages += thp_nr_pages(page);
}
mapping->nrpages -= total_pages;
}
@@ -1956,20 +1948,24 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
indices[pvec->nr] = xas.xa_index;
if (!pagevec_add(pvec, page))
break;
- goto next;
+ continue;
unlock:
unlock_page(page);
put:
put_page(page);
-next:
- if (!xa_is_value(page) && PageTransHuge(page))
- xas_set(&xas, page->index + thp_nr_pages(page));
}
rcu_read_unlock();
return pagevec_count(pvec);
}
+static inline bool thp_last_tail(struct page *head, pgoff_t index)
+{
+ if (!PageTransCompound(head) || PageHuge(head))
+ return true;
+ return index == head->index + thp_nr_pages(head) - 1;
+}
+
/**
* find_get_pages_range - gang pagecache lookup
* @mapping: The address_space to search
@@ -2008,11 +2004,17 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
if (xa_is_value(page))
continue;
+again:
pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
+ if (!thp_last_tail(page, xas.xa_index)) {
+ xas.xa_index++;
+ page_ref_inc(page);
+ goto again;
+ }
}
/*
@@ -2071,9 +2073,15 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
if (unlikely(page != xas_reload(&xas)))
goto put_page;
+again:
pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages)
break;
+ if (!thp_last_tail(page, xas.xa_index)) {
+ xas.xa_index++;
+ page_ref_inc(page);
+ goto again;
+ }
continue;
put_page:
put_page(page);
@@ -2905,6 +2913,12 @@ void filemap_map_pages(struct vm_fault *vmf,
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
+ max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ if (max_idx == 0)
+ return;
+ if (end_pgoff >= max_idx)
+ end_pgoff = max_idx - 1;
+
rcu_read_lock();
xas_for_each(&xas, head, end_pgoff) {
if (xas_retry(&xas, head))
@@ -2924,20 +2938,16 @@ void filemap_map_pages(struct vm_fault *vmf,
/* Has the page moved or been split? */
if (unlikely(head != xas_reload(&xas)))
goto skip;
- page = find_subpage(head, xas.xa_index);
-
- if (!PageUptodate(head) ||
- PageReadahead(page) ||
- PageHWPoison(page))
+ if (!PageUptodate(head) || PageReadahead(head))
goto skip;
if (!trylock_page(head))
goto skip;
-
if (head->mapping != mapping || !PageUptodate(head))
goto unlock;
- max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
- if (xas.xa_index >= max_idx)
+ page = find_subpage(head, xas.xa_index);
+again:
+ if (PageHWPoison(page))
goto unlock;
if (mmap_miss > 0)
@@ -2949,6 +2959,14 @@ void filemap_map_pages(struct vm_fault *vmf,
last_pgoff = xas.xa_index;
if (alloc_set_pte(vmf, page))
goto unlock;
+ if (!thp_last_tail(head, xas.xa_index)) {
+ xas.xa_index++;
+ page++;
+ page_ref_inc(head);
+ if (xas.xa_index >= end_pgoff)
+ goto unlock;
+ goto again;
+ }
unlock_page(head);
goto next;
unlock:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4448b9cb4327..87460610b750 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2624,6 +2624,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct page *head = compound_head(page);
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ XA_STATE(xas, &head->mapping->i_pages, head->index);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int count, mapcount, extra_pins, ret;
@@ -2688,19 +2689,28 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
unmap_page(head);
VM_BUG_ON_PAGE(compound_mapcount(head), head);
+ if (mapping) {
+ xas_split_alloc(&xas, head, thp_order(head),
+ mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+ if (xas_error(&xas)) {
+ ret = xas_error(&xas);
+ goto out_unlock;
+ }
+ }
+
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irqsave(&pgdata->lru_lock, flags);
if (mapping) {
- XA_STATE(xas, &mapping->i_pages, page_index(head));
-
/*
* Check if the head page is present in page cache.
* We assume all tail are present too, if head is there.
*/
- xa_lock(&mapping->i_pages);
+ xas_lock(&xas);
+ xas_reset(&xas);
if (xas_load(&xas) != head)
goto fail;
+ xas_split(&xas, head, thp_order(head));
}
/* Prevent deferred_split_scan() touching ->_refcount */
@@ -2733,7 +2743,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
spin_unlock(&ds_queue->split_queue_lock);
fail: if (mapping)
- xa_unlock(&mapping->i_pages);
+ xas_unlock(&xas);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
remap_page(head, thp_nr_pages(head));
ret = -EBUSY;
@@ -2747,6 +2757,8 @@ out_unlock:
if (mapping)
i_mmap_unlock_read(mapping);
out:
+ /* Free any memory we didn't use */
+ xas_nomem(&xas, 0);
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
return ret;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3b09c7e4ae3a..c674ead8cdc0 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1645,7 +1645,10 @@ static void collapse_file(struct mm_struct *mm,
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
- /* This will be less messy when we use multi-index entries */
+ /*
+ * Ensure we have slots for all the pages in the range. This is
+ * almost certainly a no-op because most of the pages must be present
+ */
do {
xas_lock_irq(&xas);
xas_create_range(&xas);
@@ -1851,6 +1854,9 @@ out_unlock:
__mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
}
+ /* Join all the small entries into a single multi-index entry */
+ xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+ xas_store(&xas, new_page);
xa_locked:
xas_unlock_irq(&xas);
xa_unlocked:
@@ -1972,6 +1978,10 @@ static void khugepaged_scan_file(struct mm_struct *mm,
continue;
}
+ /*
+ * XXX: khugepaged should compact smaller compound pages
+ * into a PMD sized page
+ */
if (PageTransCompound(page)) {
result = SCAN_PAGE_COMPOUND;
break;
diff --git a/mm/migrate.c b/mm/migrate.c
index 262c91038c41..2dabb6e25e7c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -460,14 +460,6 @@ int migrate_page_move_mapping(struct address_space *mapping,
}
xas_store(&xas, newpage);
- if (PageTransHuge(page)) {
- int i;
-
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- xas_next(&xas);
- xas_store(&xas, newpage);
- }
- }
/*
* Drop cache reference from old page by unfreezing
diff --git a/mm/shmem.c b/mm/shmem.c
index c10f8ecf85ce..a1b18bee77b4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -670,7 +670,6 @@ static int shmem_add_to_page_cache(struct page *page,
struct mm_struct *charge_mm)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
- unsigned long i = 0;
unsigned long nr = compound_nr(page);
int error;
@@ -700,17 +699,11 @@ static int shmem_add_to_page_cache(struct page *page,
void *entry;
xas_lock_irq(&xas);
entry = xas_find_conflict(&xas);
- if (entry != expected)
+ if (entry != expected) {
xas_set_err(&xas, -EEXIST);
- xas_create_range(&xas);
- if (xas_error(&xas))
goto unlock;
-next:
- xas_store(&xas, page);
- if (++i < nr) {
- xas_next(&xas);
- goto next;
}
+ xas_store(&xas, page);
if (PageTransHuge(page)) {
count_vm_event(THP_FILE_ALLOC);
__inc_node_page_state(page, NR_SHMEM_THPS);
diff --git a/mm/truncate.c b/mm/truncate.c
index bed24857d1d2..e15eb39e3bf8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -35,10 +35,12 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
XA_STATE(xas, &mapping->i_pages, index);
+ unsigned int order;
xas_set_update(&xas, workingset_update_node);
if (xas_load(&xas) != entry)
return;
+ order = xa_get_order(&mapping->i_pages, index);
xas_store(&xas, NULL);
}