summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c273
1 files changed, 169 insertions, 104 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c7fe2f16503f..82f26cde830c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -132,44 +132,28 @@ static int page_cache_tree_insert(struct address_space *mapping,
if (!dax_mapping(mapping)) {
if (shadowp)
*shadowp = p;
- if (node)
- workingset_node_shadows_dec(node);
} else {
/* DAX can replace empty locked entry with a hole */
WARN_ON_ONCE(p !=
- (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
- RADIX_DAX_ENTRY_LOCK));
- /* DAX accounts exceptional entries as normal pages */
- if (node)
- workingset_node_pages_dec(node);
+ dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
/* Wakeup waiters for exceptional entry lock */
- dax_wake_mapping_entry_waiter(mapping, page->index,
+ dax_wake_mapping_entry_waiter(mapping, page->index, p,
false);
}
}
- radix_tree_replace_slot(slot, page);
+ __radix_tree_replace(&mapping->page_tree, node, slot, page,
+ workingset_update_node, mapping);
mapping->nrpages++;
- if (node) {
- workingset_node_pages_inc(node);
- /*
- * Don't track node that contains actual pages.
- *
- * Avoid acquiring the list_lru lock if already
- * untracked. The list_empty() test is safe as
- * node->private_list is protected by
- * mapping->tree_lock.
- */
- if (!list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- }
return 0;
}
static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
- int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
+ int i, nr;
+
+ /* hugetlb pages are represented by one entry in the radix tree */
+ nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -182,44 +166,11 @@ static void page_cache_tree_delete(struct address_space *mapping,
__radix_tree_lookup(&mapping->page_tree, page->index + i,
&node, &slot);
- radix_tree_clear_tags(&mapping->page_tree, node, slot);
-
- if (!node) {
- VM_BUG_ON_PAGE(nr != 1, page);
- /*
- * We need a node to properly account shadow
- * entries. Don't plant any without. XXX
- */
- shadow = NULL;
- }
-
- radix_tree_replace_slot(slot, shadow);
+ VM_BUG_ON_PAGE(!node && nr != 1, page);
- if (!node)
- break;
-
- workingset_node_pages_dec(node);
- if (shadow)
- workingset_node_shadows_inc(node);
- else
- if (__radix_tree_delete_node(&mapping->page_tree, node))
- continue;
-
- /*
- * Track node that only contains shadow entries. DAX mappings
- * contain no shadow entries and may contain other exceptional
- * entries so skip those.
- *
- * Avoid acquiring the list_lru lock if already tracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
- list_empty(&node->private_list)) {
- node->private_data = mapping;
- list_lru_add(&workingset_shadow_nodes,
- &node->private_list);
- }
+ radix_tree_clear_tags(&mapping->page_tree, node, slot);
+ __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+ workingset_update_node, mapping);
}
if (shadow) {
@@ -788,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
* at a cost of "thundering herd" phenomena during rare hash
* collisions.
*/
-wait_queue_head_t *page_waitqueue(struct page *page)
+#define PAGE_WAIT_TABLE_BITS 8
+#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
+static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+
+static wait_queue_head_t *page_waitqueue(struct page *page)
{
- return bit_waitqueue(page, 0);
+ return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
}
-EXPORT_SYMBOL(page_waitqueue);
-void wait_on_page_bit(struct page *page, int bit_nr)
+void __init pagecache_init(void)
{
- DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+ int i;
+
+ for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
+ init_waitqueue_head(&page_wait_table[i]);
- if (test_bit(bit_nr, &page->flags))
- __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
- TASK_UNINTERRUPTIBLE);
+ page_writeback_init();
}
-EXPORT_SYMBOL(wait_on_page_bit);
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+struct wait_page_key {
+ struct page *page;
+ int bit_nr;
+ int page_match;
+};
+
+struct wait_page_queue {
+ struct page *page;
+ int bit_nr;
+ wait_queue_t wait;
+};
+
+static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
- DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+ struct wait_page_key *key = arg;
+ struct wait_page_queue *wait_page
+ = container_of(wait, struct wait_page_queue, wait);
+
+ if (wait_page->page != key->page)
+ return 0;
+ key->page_match = 1;
- if (!test_bit(bit_nr, &page->flags))
+ if (wait_page->bit_nr != key->bit_nr)
+ return 0;
+ if (test_bit(key->bit_nr, &key->page->flags))
return 0;
- return __wait_on_bit(page_waitqueue(page), &wait,
- bit_wait_io, TASK_KILLABLE);
+ return autoremove_wake_function(wait, mode, sync, key);
}
-int wait_on_page_bit_killable_timeout(struct page *page,
- int bit_nr, unsigned long timeout)
+void wake_up_page_bit(struct page *page, int bit_nr)
{
- DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+ wait_queue_head_t *q = page_waitqueue(page);
+ struct wait_page_key key;
+ unsigned long flags;
- wait.key.timeout = jiffies + timeout;
- if (!test_bit(bit_nr, &page->flags))
- return 0;
- return __wait_on_bit(page_waitqueue(page), &wait,
- bit_wait_io_timeout, TASK_KILLABLE);
+ key.page = page;
+ key.bit_nr = bit_nr;
+ key.page_match = 0;
+
+ spin_lock_irqsave(&q->lock, flags);
+ __wake_up_locked_key(q, TASK_NORMAL, &key);
+ /*
+ * It is possible for other pages to have collided on the waitqueue
+ * hash, so in that case check for a page match. That prevents a long-
+ * term waiter
+ *
+ * It is still possible to miss a case here, when we woke page waiters
+ * and removed them from the waitqueue, but there are still other
+ * page waiters.
+ */
+ if (!waitqueue_active(q) || !key.page_match) {
+ ClearPageWaiters(page);
+ /*
+ * It's possible to miss clearing Waiters here, when we woke
+ * our page waiters, but the hashed waitqueue has waiters for
+ * other pages on it.
+ *
+ * That's okay, it's a rare case. The next waker will clear it.
+ */
+ }
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(wake_up_page_bit);
+
+static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+ struct page *page, int bit_nr, int state, bool lock)
+{
+ struct wait_page_queue wait_page;
+ wait_queue_t *wait = &wait_page.wait;
+ int ret = 0;
+
+ init_wait(wait);
+ wait->func = wake_page_function;
+ wait_page.page = page;
+ wait_page.bit_nr = bit_nr;
+
+ for (;;) {
+ spin_lock_irq(&q->lock);
+
+ if (likely(list_empty(&wait->task_list))) {
+ if (lock)
+ __add_wait_queue_tail_exclusive(q, wait);
+ else
+ __add_wait_queue(q, wait);
+ SetPageWaiters(page);
+ }
+
+ set_current_state(state);
+
+ spin_unlock_irq(&q->lock);
+
+ if (likely(test_bit(bit_nr, &page->flags))) {
+ io_schedule();
+ if (unlikely(signal_pending_state(state, current))) {
+ ret = -EINTR;
+ break;
+ }
+ }
+
+ if (lock) {
+ if (!test_and_set_bit_lock(bit_nr, &page->flags))
+ break;
+ } else {
+ if (!test_bit(bit_nr, &page->flags))
+ break;
+ }
+ }
+
+ finish_wait(q, wait);
+
+ /*
+ * A signal could leave PageWaiters set. Clearing it here if
+ * !waitqueue_active would be possible (by open-coding finish_wait),
+ * but still fail to catch it in the case of wait hash collision. We
+ * already can fail to clear wait hash collision cases, so don't
+ * bother with signals either.
+ */
+
+ return ret;
+}
+
+void wait_on_page_bit(struct page *page, int bit_nr)
+{
+ wait_queue_head_t *q = page_waitqueue(page);
+ wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
+}
+EXPORT_SYMBOL(wait_on_page_bit);
+
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+ wait_queue_head_t *q = page_waitqueue(page);
+ return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
}
-EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
/**
* add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -842,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, waiter);
+ SetPageWaiters(page);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -923,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
* __lock_page - get a lock on the page, assuming we need to sleep to get it
* @page: the page to lock
*/
-void __lock_page(struct page *page)
+void __lock_page(struct page *__page)
{
- struct page *page_head = compound_head(page);
- DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
- __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
- TASK_UNINTERRUPTIBLE);
+ struct page *page = compound_head(__page);
+ wait_queue_head_t *q = page_waitqueue(page);
+ wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
}
EXPORT_SYMBOL(__lock_page);
-int __lock_page_killable(struct page *page)
+int __lock_page_killable(struct page *__page)
{
- struct page *page_head = compound_head(page);
- DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
- return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
- bit_wait_io, TASK_KILLABLE);
+ struct page *page = compound_head(__page);
+ wait_queue_head_t *q = page_waitqueue(page);
+ return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
@@ -1686,7 +1748,7 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
int error = 0;
if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
- return -EINVAL;
+ return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
index = *ppos >> PAGE_SHIFT;
@@ -1732,6 +1794,9 @@ find_page:
if (inode->i_blkbits == PAGE_SHIFT ||
!mapping->a_ops->is_partially_uptodate)
goto page_not_up_to_date;
+ /* pipes can't handle partially uptodate pages */
+ if (unlikely(iter->type & ITER_PIPE))
+ goto page_not_up_to_date;
if (!trylock_page(page))
goto page_not_up_to_date;
/* Did it get truncated before we got the lock? */
@@ -2210,12 +2275,12 @@ page_not_uptodate:
}
EXPORT_SYMBOL(filemap_fault);
-void filemap_map_pages(struct fault_env *fe,
+void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct radix_tree_iter iter;
void **slot;
- struct file *file = fe->vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
loff_t size;
@@ -2271,11 +2336,11 @@ repeat:
if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--;
- fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
- if (fe->pte)
- fe->pte += iter.index - last_pgoff;
+ vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+ if (vmf->pte)
+ vmf->pte += iter.index - last_pgoff;
last_pgoff = iter.index;
- if (alloc_set_pte(fe, NULL, page))
+ if (alloc_set_pte(vmf, NULL, page))
goto unlock;
unlock_page(page);
goto next;
@@ -2285,7 +2350,7 @@ skip:
put_page(page);
next:
/* Huge page is mapped? No need to proceed. */
- if (pmd_trans_huge(*fe->pmd))
+ if (pmd_trans_huge(*vmf->pmd))
break;
if (iter.index == end_pgoff)
break;