summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2011-06-24 11:23:46 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2011-06-24 11:23:46 +1000
commitbe7cf21d611caac462a8bb085a8519dc8f30e1e8 (patch)
tree596905aae6f459b5ab25c63858e0b10e517b2bee /mm
parentbccaeafd7c117acee36e90d37c7e05c19be9e7bf (diff)
parent3376ae3a3a4c0dd01cef09663218b3d697053558 (diff)
Merge remote-tracking branch 'fixes/fixes'
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c14
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory-failure.c21
-rw-r--r--mm/memory.c24
-rw-r--r--mm/nommu.c1
-rw-r--r--mm/rmap.c5
-rw-r--r--mm/shmem.c88
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c164
9 files changed, 167 insertions, 153 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a8251a8d3457..cfac89d95ec1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -131,6 +131,7 @@ void __delete_from_page_cache(struct page *page)
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
+ /* Leave page->index set: truncation lookup relies upon it */
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
if (PageSwapBacked(page))
@@ -486,6 +487,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
+ /* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
page_cache_release(page);
@@ -1795,7 +1797,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
static struct page *__read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data,
gfp_t gfp)
{
@@ -1826,7 +1828,7 @@ repeat:
static struct page *do_read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data,
gfp_t gfp)
@@ -1866,7 +1868,7 @@ out:
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
- * @data: destination for read data
+ * @data: first arg to filler(data, page) function, often left as NULL
*
* Same as read_cache_page, but don't wait for page to become unlocked
* after submitting it to the filler.
@@ -1878,7 +1880,7 @@ out:
*/
struct page *read_cache_page_async(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data)
{
return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -1926,7 +1928,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
- * @data: destination for read data
+ * @data: first arg to filler(data, page) function, often left as NULL
*
* Read into the page cache. If a page already exists, and PageUptodate() is
* not set, try to fill the page then wait for it to become unlocked.
@@ -1935,7 +1937,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
*/
struct page *read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *,struct page*),
+ int (*filler)(void *, struct page *),
void *data)
{
return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf7d027a8844..ddffc74cdebe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,6 +35,7 @@
#include <linux/limits.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index eac0ba561491..740c4f52059c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -391,10 +391,11 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
struct task_struct *tsk;
struct anon_vma *av;
- read_lock(&tasklist_lock);
av = page_lock_anon_vma(page);
if (av == NULL) /* Not actually mapped anymore */
- goto out;
+ return;
+
+ read_lock(&tasklist_lock);
for_each_process (tsk) {
struct anon_vma_chain *vmac;
@@ -408,9 +409,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
add_to_kill(tsk, page, vma, to_kill, tkc);
}
}
- page_unlock_anon_vma(av);
-out:
read_unlock(&tasklist_lock);
+ page_unlock_anon_vma(av);
}
/*
@@ -424,17 +424,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
struct prio_tree_iter iter;
struct address_space *mapping = page->mapping;
- /*
- * A note on the locking order between the two locks.
- * We don't rely on this particular order.
- * If you have some other code that needs a different order
- * feel free to switch them around. Or add a reverse link
- * from mm_struct to task_struct, then this could be all
- * done without taking tasklist_lock and looping over all tasks.
- */
-
- read_lock(&tasklist_lock);
mutex_lock(&mapping->i_mmap_mutex);
+ read_lock(&tasklist_lock);
for_each_process(tsk) {
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -454,8 +445,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
add_to_kill(tsk, page, vma, to_kill, tkc);
}
}
- mutex_unlock(&mapping->i_mmap_mutex);
read_unlock(&tasklist_lock);
+ mutex_unlock(&mapping->i_mmap_mutex);
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 87d935333f0d..40b7531ee8ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2798,30 +2798,6 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
-int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
-{
- struct address_space *mapping = inode->i_mapping;
-
- /*
- * If the underlying filesystem is not going to provide
- * a way to truncate a range of blocks (punch a hole) -
- * we should return failure right now.
- */
- if (!inode->i_op->truncate_range)
- return -ENOSYS;
-
- mutex_lock(&inode->i_mutex);
- down_write(&inode->i_alloc_sem);
- unmap_mapping_range(mapping, offset, (end - offset), 1);
- truncate_inode_pages_range(mapping, offset, end);
- unmap_mapping_range(mapping, offset, (end - offset), 1);
- inode->i_op->truncate_range(inode, offset, end);
- up_write(&inode->i_alloc_sem);
- mutex_unlock(&inode->i_mutex);
-
- return 0;
-}
-
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
diff --git a/mm/nommu.c b/mm/nommu.c
index 1fd0c51b10a6..829848aba51c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1817,6 +1817,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long to, unsigned long size, pgprot_t prot)
{
vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
+ vma->vm_end = vma->vm_start + size;
return 0;
}
EXPORT_SYMBOL(remap_pfn_range);
diff --git a/mm/rmap.c b/mm/rmap.c
index 27dfd3b82b0f..23295f65ae43 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -38,9 +38,8 @@
* in arch-dependent flush_dcache_mmap_lock,
* within inode_wb_list_lock in __sync_single_inode)
*
- * (code doesn't rely on that order so it could be switched around)
- * ->tasklist_lock
- * anon_vma->mutex (memory_failure, collect_procs_anon)
+ * anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon)
+ * ->tasklist_lock
* pte map lock
*/
diff --git a/mm/shmem.c b/mm/shmem.c
index d221a1cfd7b1..c1db11cf220d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -241,9 +241,7 @@ static void shmem_free_blocks(struct inode *inode, long pages)
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
percpu_counter_add(&sbinfo->used_blocks, -pages);
- spin_lock(&inode->i_lock);
inode->i_blocks -= pages*BLOCKS_PER_PAGE;
- spin_unlock(&inode->i_lock);
}
}
@@ -432,9 +430,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
sbinfo->max_blocks - 1) >= 0)
return ERR_PTR(-ENOSPC);
percpu_counter_inc(&sbinfo->used_blocks);
- spin_lock(&inode->i_lock);
inode->i_blocks += BLOCKS_PER_PAGE;
- spin_unlock(&inode->i_lock);
}
spin_unlock(&info->lock);
@@ -539,7 +535,7 @@ static void shmem_free_pages(struct list_head *next)
} while (next);
}
-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
{
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long idx;
@@ -562,6 +558,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
spinlock_t *punch_lock;
unsigned long upper_limit;
+ truncate_inode_pages_range(inode->i_mapping, start, end);
+
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (idx >= info->next_index)
@@ -738,16 +736,8 @@ done2:
* lowered next_index. Also, though shmem_getpage checks
* i_size before adding to cache, no recheck after: so fix the
* narrow window there too.
- *
- * Recalling truncate_inode_pages_range and unmap_mapping_range
- * every time for punch_hole (which never got a chance to clear
- * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
- * yet hardly ever necessary: try to optimize them out later.
*/
truncate_inode_pages_range(inode->i_mapping, start, end);
- if (punch_hole)
- unmap_mapping_range(inode->i_mapping, start,
- end - start, 1);
}
spin_lock(&info->lock);
@@ -766,22 +756,23 @@ done2:
shmem_free_pages(pages_to_free.next);
}
}
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
- loff_t newsize = attr->ia_size;
int error;
error = inode_change_ok(inode, attr);
if (error)
return error;
- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
- && newsize != inode->i_size) {
+ if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+ loff_t oldsize = inode->i_size;
+ loff_t newsize = attr->ia_size;
struct page *page = NULL;
- if (newsize < inode->i_size) {
+ if (newsize < oldsize) {
/*
* If truncating down to a partial page, then
* if that page is already allocated, hold it
@@ -810,12 +801,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
spin_unlock(&info->lock);
}
}
-
- /* XXX(truncate): truncate_setsize should be called last */
- truncate_setsize(inode, newsize);
+ if (newsize != oldsize) {
+ i_size_write(inode, newsize);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ }
+ if (newsize < oldsize) {
+ loff_t holebegin = round_up(newsize, PAGE_SIZE);
+ unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+ shmem_truncate_range(inode, newsize, (loff_t)-1);
+ /* unmap again to remove racily COWed private pages */
+ unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+ }
if (page)
page_cache_release(page);
- shmem_truncate_range(inode, newsize, (loff_t)-1);
}
setattr_copy(inode, attr);
@@ -832,7 +830,6 @@ static void shmem_evict_inode(struct inode *inode)
struct shmem_xattr *xattr, *nxattr;
if (inode->i_mapping->a_ops == &shmem_aops) {
- truncate_inode_pages(inode->i_mapping, 0);
shmem_unacct_size(info->flags, inode->i_size);
inode->i_size = 0;
shmem_truncate_range(inode, 0, (loff_t)-1);
@@ -1420,9 +1417,7 @@ repeat:
shmem_acct_block(info->flags))
goto nospace;
percpu_counter_inc(&sbinfo->used_blocks);
- spin_lock(&inode->i_lock);
inode->i_blocks += BLOCKS_PER_PAGE;
- spin_unlock(&inode->i_lock);
} else if (shmem_acct_block(info->flags))
goto nospace;
@@ -1433,8 +1428,10 @@ repeat:
spin_unlock(&info->lock);
filepage = shmem_alloc_page(gfp, info, idx);
if (!filepage) {
+ spin_lock(&info->lock);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
+ spin_unlock(&info->lock);
error = -ENOMEM;
goto failed;
}
@@ -1448,8 +1445,10 @@ repeat:
current->mm, GFP_KERNEL);
if (error) {
page_cache_release(filepage);
+ spin_lock(&info->lock);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
+ spin_unlock(&info->lock);
filepage = NULL;
goto failed;
}
@@ -1479,10 +1478,10 @@ repeat:
* be done automatically.
*/
if (ret) {
- spin_unlock(&info->lock);
- page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
+ spin_unlock(&info->lock);
+ page_cache_release(filepage);
filepage = NULL;
if (error)
goto failed;
@@ -2706,7 +2705,7 @@ static const struct file_operations shmem_file_operations = {
};
static const struct inode_operations shmem_inode_operations = {
- .setattr = shmem_notify_change,
+ .setattr = shmem_setattr,
.truncate_range = shmem_truncate_range,
#ifdef CONFIG_TMPFS_XATTR
.setxattr = shmem_setxattr,
@@ -2739,7 +2738,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
.removexattr = shmem_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
- .setattr = shmem_notify_change,
+ .setattr = shmem_setattr,
.check_acl = generic_check_acl,
#endif
};
@@ -2752,7 +2751,7 @@ static const struct inode_operations shmem_special_inode_operations = {
.removexattr = shmem_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
- .setattr = shmem_notify_change,
+ .setattr = shmem_setattr,
.check_acl = generic_check_acl,
#endif
};
@@ -2908,6 +2907,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
return 0;
}
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+{
+ truncate_inode_pages_range(inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/**
* mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
@@ -3028,3 +3033,26 @@ int shmem_zero_setup(struct vm_area_struct *vma)
vma->vm_flags |= VM_CAN_NONLINEAR;
return 0;
}
+
+/**
+ * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping: the page's address_space
+ * @index: the page index
+ * @gfp: the page allocator flags to use if allocating
+ *
+ * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
+ * with any new page allocations done using the specified allocation flags.
+ * But read_cache_page_gfp() uses the ->readpage() method: which does not
+ * suit tmpfs, since it may have pages in swapcache, and needs to find those
+ * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
+ *
+ * Provide a stub for those callers to start using now, then later
+ * flesh it out to call shmem_getpage() with additional gfp mask, when
+ * shmem_file_splice_read() is added and shmem_readpage() is removed.
+ */
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+ pgoff_t index, gfp_t gfp)
+{
+ return read_cache_page_gfp(mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d537d29e9b7b..ff8dc1a18cb4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -14,7 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
-#include <linux/shm.h>
+#include <linux/shmem_fs.h>
#include <linux/blkdev.h>
#include <linux/random.h>
#include <linux/writeback.h>
diff --git a/mm/truncate.c b/mm/truncate.c
index 3a29a6180212..0f78f8461773 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page)
* The first pass will remove most pages, so the search cost of the second pass
* is low.
*
- * When looking at page->index outside the page lock we need to be careful to
- * copy it into a local to avoid races (it could change at any time).
- *
* We pass down the cache-hot hint to the page freeing code. Even if the
* mapping is large, it is probably the case that the final pages are the most
* recently touched, and freeing happens in ascending file offset order.
@@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
loff_t lstart, loff_t lend)
{
const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
- pgoff_t end;
const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
struct pagevec pvec;
- pgoff_t next;
+ pgoff_t index;
+ pgoff_t end;
int i;
cleancache_flush_inode(mapping);
@@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping,
end = (lend >> PAGE_CACHE_SHIFT);
pagevec_init(&pvec, 0);
- next = start;
- while (next <= end &&
- pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ index = start;
+ while (index <= end && pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- pgoff_t page_index = page->index;
- if (page_index > end) {
- next = page_index;
+ /* We rely upon deletion not changing page->index */
+ index = page->index;
+ if (index > end)
break;
- }
- if (page_index > next)
- next = page_index;
- next++;
if (!trylock_page(page))
continue;
+ WARN_ON(page->index != index);
if (PageWriteback(page)) {
unlock_page(page);
continue;
@@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
+ index++;
}
if (partial) {
@@ -264,16 +259,17 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
}
- next = start;
+ index = start;
for ( ; ; ) {
cond_resched();
- if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
- if (next == start)
+ if (!pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ if (index == start)
break;
- next = start;
+ index = start;
continue;
}
- if (pvec.pages[0]->index > end) {
+ if (index == start && pvec.pages[0]->index > end) {
pagevec_release(&pvec);
break;
}
@@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping,
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
+ /* We rely upon deletion not changing page->index */
+ index = page->index;
+ if (index > end)
break;
+
lock_page(page);
+ WARN_ON(page->index != index);
wait_on_page_writeback(page);
truncate_inode_page(mapping, page);
- if (page->index > next)
- next = page->index;
- next++;
unlock_page(page);
}
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
+ index++;
}
cleancache_flush_inode(mapping);
}
@@ -304,6 +302,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
* @lstart: offset from which to truncate
*
* Called under (and serialised by) inode->i_mutex.
+ *
+ * Note: When this function returns, there can be a page in the process of
+ * deletion (inside __delete_from_page_cache()) in the specified range. Thus
+ * mapping->nrpages can be non-zero when this function returns even after
+ * truncation of the whole mapping.
*/
void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{
@@ -328,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
struct pagevec pvec;
- pgoff_t next = start;
+ pgoff_t index = start;
unsigned long ret;
unsigned long count = 0;
int i;
pagevec_init(&pvec, 0);
- while (next <= end &&
- pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ while (index <= end && pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- pgoff_t index;
- int lock_failed;
- lock_failed = !trylock_page(page);
-
- /*
- * We really shouldn't be looking at the ->index of an
- * unlocked page. But we're not allowed to lock these
- * pages. So we rely upon nobody altering the ->index
- * of this (pinned-by-us) page.
- */
+ /* We rely upon deletion not changing page->index */
index = page->index;
- if (index > next)
- next = index;
- next++;
- if (lock_failed)
- continue;
+ if (index > end)
+ break;
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
ret = invalidate_inode_page(page);
unlock_page(page);
/*
@@ -366,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
if (!ret)
deactivate_page(page);
count += ret;
- if (next > end)
- break;
}
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
+ index++;
}
return count;
}
@@ -437,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
struct pagevec pvec;
- pgoff_t next;
+ pgoff_t index;
int i;
int ret = 0;
int ret2 = 0;
int did_range_unmap = 0;
- int wrapped = 0;
cleancache_flush_inode(mapping);
pagevec_init(&pvec, 0);
- next = start;
- while (next <= end && !wrapped &&
- pagevec_lookup(&pvec, mapping, next,
- min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ index = start;
+ while (index <= end && pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- pgoff_t page_index;
+
+ /* We rely upon deletion not changing page->index */
+ index = page->index;
+ if (index > end)
+ break;
lock_page(page);
+ WARN_ON(page->index != index);
if (page->mapping != mapping) {
unlock_page(page);
continue;
}
- page_index = page->index;
- next = page_index + 1;
- if (next == 0)
- wrapped = 1;
- if (page_index > end) {
- unlock_page(page);
- break;
- }
wait_on_page_writeback(page);
if (page_mapped(page)) {
if (!did_range_unmap) {
@@ -475,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* Zap the rest of the file in one hit.
*/
unmap_mapping_range(mapping,
- (loff_t)page_index<<PAGE_CACHE_SHIFT,
- (loff_t)(end - page_index + 1)
- << PAGE_CACHE_SHIFT,
+ (loff_t)index << PAGE_CACHE_SHIFT,
+ (loff_t)(1 + end - index)
+ << PAGE_CACHE_SHIFT,
0);
did_range_unmap = 1;
} else {
@@ -485,8 +473,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* Just zap this page
*/
unmap_mapping_range(mapping,
- (loff_t)page_index<<PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
+ (loff_t)index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
}
}
BUG_ON(page_mapped(page));
@@ -502,6 +490,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
+ index++;
}
cleancache_flush_inode(mapping);
return ret;
@@ -526,8 +515,8 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
/**
* truncate_pagecache - unmap and remove pagecache that has been truncated
* @inode: inode
- * @old: old file offset
- * @new: new file offset
+ * @oldsize: old file size
+ * @newsize: new file size
*
* inode's new i_size must already be written before truncate_pagecache
* is called.
@@ -539,9 +528,10 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
* situations such as writepage being called for a page that has already
* had its underlying blocks deallocated.
*/
-void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
{
struct address_space *mapping = inode->i_mapping;
+ loff_t holebegin = round_up(newsize, PAGE_SIZE);
/*
* unmap_mapping_range is called twice, first simply for
@@ -552,9 +542,9 @@ void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
- unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, new);
- unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ unmap_mapping_range(mapping, holebegin, 0, 1);
+ truncate_inode_pages(mapping, newsize);
+ unmap_mapping_range(mapping, holebegin, 0, 1);
}
EXPORT_SYMBOL(truncate_pagecache);
@@ -584,22 +574,48 @@ EXPORT_SYMBOL(truncate_setsize);
/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
- * @offset: file offset to start truncating
+ * @newsize: file offset to start truncating
*
* This function is deprecated and truncate_setsize or truncate_pagecache
* should be used instead, together with filesystem specific block truncation.
*/
-int vmtruncate(struct inode *inode, loff_t offset)
+int vmtruncate(struct inode *inode, loff_t newsize)
{
int error;
- error = inode_newsize_ok(inode, offset);
+ error = inode_newsize_ok(inode, newsize);
if (error)
return error;
- truncate_setsize(inode, offset);
+ truncate_setsize(inode, newsize);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
}
EXPORT_SYMBOL(vmtruncate);
+
+int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+{
+ struct address_space *mapping = inode->i_mapping;
+ loff_t holebegin = round_up(lstart, PAGE_SIZE);
+ loff_t holelen = 1 + lend - holebegin;
+
+ /*
+ * If the underlying filesystem is not going to provide
+ * a way to truncate a range of blocks (punch a hole) -
+ * we should return failure right now.
+ */
+ if (!inode->i_op->truncate_range)
+ return -ENOSYS;
+
+ mutex_lock(&inode->i_mutex);
+ down_write(&inode->i_alloc_sem);
+ unmap_mapping_range(mapping, holebegin, holelen, 1);
+ inode->i_op->truncate_range(inode, lstart, lend);
+ /* unmap again to remove racily COWed private pages */
+ unmap_mapping_range(mapping, holebegin, holelen, 1);
+ up_write(&inode->i_alloc_sem);
+ mutex_unlock(&inode->i_mutex);
+
+ return 0;
+}