From 5b8ba10198a109f8a02380648c5d29000caa9c55 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:01 -0700 Subject: mm: move vmtruncate_range to truncate.c You would expect to find vmtruncate_range() next to vmtruncate() in mm/truncate.c: move it there. Signed-off-by: Hugh Dickins Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/truncate.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'mm/truncate.c') diff --git a/mm/truncate.c b/mm/truncate.c index 3a29a6180212..5b4c3a4847e9 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -603,3 +603,27 @@ int vmtruncate(struct inode *inode, loff_t offset) return 0; } EXPORT_SYMBOL(vmtruncate); + +int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) +{ + struct address_space *mapping = inode->i_mapping; + + /* + * If the underlying filesystem is not going to provide + * a way to truncate a range of blocks (punch a hole) - + * we should return failure right now. + */ + if (!inode->i_op->truncate_range) + return -ENOSYS; + + mutex_lock(&inode->i_mutex); + down_write(&inode->i_alloc_sem); + unmap_mapping_range(mapping, offset, (end - offset), 1); + truncate_inode_pages_range(mapping, offset, end); + unmap_mapping_range(mapping, offset, (end - offset), 1); + inode->i_op->truncate_range(inode, offset, end); + up_write(&inode->i_alloc_sem); + mutex_unlock(&inode->i_mutex); + + return 0; +} -- cgit v1.2.3 From 94c1e62df4494b79782cb9c7279f827212d1de70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:03 -0700 Subject: tmpfs: take control of its truncate_range 2.6.35's new truncate convention gave tmpfs the opportunity to control its file truncation, no longer enforced from outside by vmtruncate(). We shall want to build upon that, to handle pagecache and swap together. Slightly redefine the ->truncate_range interface: let it now be called between the unmap_mapping_range()s, with the filesystem responsible for doing the truncate_inode_pages_range() from it - just as the filesystem is nowadays responsible for doing that from its ->setattr. Let's rename shmem_notify_change() to shmem_setattr(). Instead of calling the generic truncate_setsize(), bring that code in so we can call shmem_truncate_range() - which will later be updated to perform its own variant of truncate_inode_pages_range(). Remove the punch_hole unmap_mapping_range() from shmem_truncate_range(): now that the COW's unmap_mapping_range() comes after ->truncate_range, there is no need to call it a third time. Export shmem_truncate_range() and add it to the list in shmem_fs.h, so that i915_gem_object_truncate() can call it explicitly in future; get this patch in first, then update drm/i915 once this is available (until then, i915 will just be doing the truncate_inode_pages() twice). Though introduced five years ago, no other filesystem is implementing ->truncate_range, and its only other user is madvise(,,MADV_REMOVE): we expect to convert it to fallocate(,FALLOC_FL_PUNCH_HOLE,,) shortly, whereupon ->truncate_range can be removed from inode_operations - shmem_truncate_range() will help i915 across that transition too. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 1 + mm/shmem.c | 51 +++++++++++++++++++++++++++--------------------- mm/truncate.c | 4 ++-- 3 files changed, 32 insertions(+), 24 deletions(-) (limited to 'mm/truncate.c') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index cae65dc42bcc..22a20af4d785 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -61,6 +61,7 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); diff --git a/mm/shmem.c b/mm/shmem.c index d221a1cfd7b1..f1714758ea96 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -539,7 +539,7 @@ static void shmem_free_pages(struct list_head *next) } while (next); } -static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) { struct shmem_inode_info *info = SHMEM_I(inode); unsigned long idx; @@ -562,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) spinlock_t *punch_lock; unsigned long upper_limit; + truncate_inode_pages_range(inode->i_mapping, start, end); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (idx >= info->next_index) @@ -738,16 +740,8 @@ done2: * lowered next_index. Also, though shmem_getpage checks * i_size before adding to cache, no recheck after: so fix the * narrow window there too. - * - * Recalling truncate_inode_pages_range and unmap_mapping_range - * every time for punch_hole (which never got a chance to clear - * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, - * yet hardly ever necessary: try to optimize them out later. */ truncate_inode_pages_range(inode->i_mapping, start, end); - if (punch_hole) - unmap_mapping_range(inode->i_mapping, start, - end - start, 1); } spin_lock(&info->lock); @@ -766,22 +760,23 @@ done2: shmem_free_pages(pages_to_free.next); } } +EXPORT_SYMBOL_GPL(shmem_truncate_range); -static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) +static int shmem_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; - loff_t newsize = attr->ia_size; int error; error = inode_change_ok(inode, attr); if (error) return error; - if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE) - && newsize != inode->i_size) { + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { + loff_t oldsize = inode->i_size; + loff_t newsize = attr->ia_size; struct page *page = NULL; - if (newsize < inode->i_size) { + if (newsize < oldsize) { /* * If truncating down to a partial page, then * if that page is already allocated, hold it @@ -810,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) spin_unlock(&info->lock); } } - - /* XXX(truncate): truncate_setsize should be called last */ - truncate_setsize(inode, newsize); + if (newsize != oldsize) { + i_size_write(inode, newsize); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + } + if (newsize < oldsize) { + loff_t holebegin = round_up(newsize, PAGE_SIZE); + unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); + shmem_truncate_range(inode, newsize, (loff_t)-1); + /* unmap again to remove racily COWed private pages */ + unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); + } if (page) page_cache_release(page); - shmem_truncate_range(inode, newsize, (loff_t)-1); } setattr_copy(inode, attr); @@ -832,7 +834,6 @@ static void shmem_evict_inode(struct inode *inode) struct shmem_xattr *xattr, *nxattr; if (inode->i_mapping->a_ops == &shmem_aops) { - truncate_inode_pages(inode->i_mapping, 0); shmem_unacct_size(info->flags, inode->i_size); inode->i_size = 0; shmem_truncate_range(inode, 0, (loff_t)-1); @@ -2706,7 +2707,7 @@ static const struct file_operations shmem_file_operations = { }; static const struct inode_operations shmem_inode_operations = { - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .truncate_range = shmem_truncate_range, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, @@ -2739,7 +2740,7 @@ static const struct inode_operations shmem_dir_inode_operations = { .removexattr = shmem_removexattr, #endif #ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .check_acl = generic_check_acl, #endif }; @@ -2752,7 +2753,7 @@ static const struct inode_operations shmem_special_inode_operations = { .removexattr = shmem_removexattr, #endif #ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .check_acl = generic_check_acl, #endif }; @@ -2908,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) return 0; } +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +{ + truncate_inode_pages_range(inode->i_mapping, start, end); +} +EXPORT_SYMBOL_GPL(shmem_truncate_range); + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /** * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file diff --git a/mm/truncate.c b/mm/truncate.c index 5b4c3a4847e9..29a9b8a5a31a 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -619,9 +619,9 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) mutex_lock(&inode->i_mutex); down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); - truncate_inode_pages_range(mapping, offset, end); - unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); + /* unmap again to remove racily COWed private pages */ + unmap_mapping_range(mapping, offset, (end - offset), 1); up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From 08142579b6ca35883c1ed066a2681de6f6917062 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Jun 2011 16:18:10 -0700 Subject: mm: fix assertion mapping->nrpages == 0 in end_writeback() Under heavy memory and filesystem load, users observe the assertion mapping->nrpages == 0 in end_writeback() trigger. This can be caused by page reclaim reclaiming the last page from a mapping in the following race: CPU0 CPU1 ... shrink_page_list() __remove_mapping() __delete_from_page_cache() radix_tree_delete() evict_inode() truncate_inode_pages() truncate_inode_pages_range() pagevec_lookup() - finds nothing end_writeback() mapping->nrpages != 0 -> BUG page->mapping = NULL mapping->nrpages-- Fix the problem by doing a reliable check of mapping->nrpages under mapping->tree_lock in end_writeback(). Analyzed by Jay , lost in LKML, and dug out by Miklos Szeredi . Cc: Jay Cc: Miklos Szeredi Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 7 +++++++ include/linux/fs.h | 1 + mm/truncate.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'mm/truncate.c') diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803f..43566d17d1b8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e73e2e9ae33..b5b979247863 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,6 +639,7 @@ struct address_space { struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ + /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ diff --git a/mm/truncate.c b/mm/truncate.c index 29a9b8a5a31a..e13f22efaad7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range); * @lstart: offset from which to truncate * * Called under (and serialised by) inode->i_mutex. + * + * Note: When this function returns, there can be a page in the process of + * deletion (inside __delete_from_page_cache()) in the specified range. Thus + * mapping->nrpages can be non-zero when this function returns even after + * truncation of the whole mapping. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { -- cgit v1.2.3