From 9b83a6a8523a8a96b6353174b193c5c93e16c6c3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 28 Feb 2007 20:11:03 -0800 Subject: [PATCH] mm/{,tiny-}shmem.c cleanups shmem_{nopage,mmap} are no longer used in ipc/shm.c Signed-off-by: Adrian Bunk Cc: "Eric W. Biederman" Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 882053031aa0..fcb07882c8e0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1228,7 +1228,8 @@ failed: return error; } -struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +static struct page *shmem_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct page *page = NULL; @@ -1335,7 +1336,7 @@ out_nomem: return retval; } -int shmem_mmap(struct file *file, struct vm_area_struct *vma) +static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; -- cgit v1.2.3 From 759b9775c25f5e69aaea8a75c3914019e2dc5539 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 5 Mar 2007 00:30:28 -0800 Subject: [PATCH] shmem and simple const super_operations shmem's super_operations were missed from the recent const-ification; and simple_fill_super()'s, which can share with get_sb_pseudo()'s. Signed-off-by: Hugh Dickins Acked-by: Josef 'Jeff' Sipek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 10 ++++++---- mm/shmem.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'mm/shmem.c') diff --git a/fs/libfs.c b/fs/libfs.c index cf79196535ec..d93842d3c0a0 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -190,6 +190,10 @@ const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; +static const struct super_operations simple_super_operations = { + .statfs = simple_statfs, +}; + /* * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) @@ -199,7 +203,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, struct vfsmount *mnt) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); - static const struct super_operations default_ops = {.statfs = simple_statfs}; struct dentry *dentry; struct inode *root; struct qstr d_name = {.name = name, .len = strlen(name)}; @@ -212,7 +215,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = magic; - s->s_op = ops ? ops : &default_ops; + s->s_op = ops ? ops : &simple_super_operations; s->s_time_gran = 1; root = new_inode(s); if (!root) @@ -359,7 +362,6 @@ int simple_commit_write(struct file *file, struct page *page, int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) { - static struct super_operations s_ops = {.statfs = simple_statfs}; struct inode *inode; struct dentry *root; struct dentry *dentry; @@ -368,7 +370,7 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files s->s_blocksize = PAGE_CACHE_SIZE; s->s_blocksize_bits = PAGE_CACHE_SHIFT; s->s_magic = magic; - s->s_op = &s_ops; + s->s_op = &simple_super_operations; s->s_time_gran = 1; inode = new_inode(s); diff --git a/mm/shmem.c b/mm/shmem.c index fcb07882c8e0..b8c429a2d271 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -175,7 +175,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); } -static struct super_operations shmem_ops; +static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; static const struct inode_operations shmem_inode_operations; @@ -2383,7 +2383,7 @@ static const struct inode_operations shmem_special_inode_operations = { #endif }; -static struct super_operations shmem_ops = { +static const struct super_operations shmem_ops = { .alloc_inode = shmem_alloc_inode, .destroy_inode = shmem_destroy_inode, #ifdef CONFIG_TMPFS -- cgit v1.2.3 From a2646d1e6c8d2239d8054a7d342eb9775a1d273a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 29 Mar 2007 01:20:35 -0700 Subject: [PATCH] holepunch: fix shmem_truncate_range punching too far Miklos Szeredi observes BUG_ON(!entry) in shmem_writepage() triggered in rare circumstances, because shmem_truncate_range() erroneously removes partially truncated directory pages at the end of the range: later reclaim on pages pointing to these removed directories triggers the BUG. Indeed, and it can also cause data loss beyond the hole. Fix this as in the patch proposed by Miklos, but distinguish between "limit" (how far we need to search: ignore truncation's next_index optimization in the holepunch case - if there are races it's more consistent to act on the whole range specified) and "upper_limit" (how far we can free directory pages: generally we must be careful to keep partially punched pages, but can relax at end of file - i_size being held stable by i_mutex). Signed-off-by: Hugh Dickins Cc: Miklos Szeredi Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index b8c429a2d271..1077b1d903d2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -481,7 +481,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) long nr_swaps_freed = 0; int offset; int freed; - int punch_hole = 0; + int punch_hole; + unsigned long upper_limit; inode->i_ctime = inode->i_mtime = CURRENT_TIME; idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; @@ -492,11 +493,18 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) info->flags |= SHMEM_TRUNCATE; if (likely(end == (loff_t) -1)) { limit = info->next_index; + upper_limit = SHMEM_MAX_INDEX; info->next_index = idx; + punch_hole = 0; } else { - limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (limit > info->next_index) - limit = info->next_index; + if (end + 1 >= inode->i_size) { /* we may free a little more */ + limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + upper_limit = SHMEM_MAX_INDEX; + } else { + limit = (end + 1) >> PAGE_CACHE_SHIFT; + upper_limit = limit; + } punch_hole = 1; } @@ -520,10 +528,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) * If there are no indirect blocks or we are punching a hole * below indirect blocks, nothing to be done. */ - if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) + if (!topdir || limit <= SHMEM_NR_DIRECT) goto done2; - BUG_ON(limit <= SHMEM_NR_DIRECT); + upper_limit -= SHMEM_NR_DIRECT; limit -= SHMEM_NR_DIRECT; idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; offset = idx % ENTRIES_PER_PAGE; @@ -543,7 +551,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) if (*dir) { diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset) { + if (!diroff && !offset && upper_limit >= stage) { *dir = NULL; nr_pages_to_free++; list_add(&middir->lru, &pages_to_free); @@ -570,9 +578,11 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) } stage = idx + ENTRIES_PER_PAGEPAGE; middir = *dir; - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + if (upper_limit >= stage) { + *dir = NULL; + nr_pages_to_free++; + list_add(&middir->lru, &pages_to_free); + } shmem_dir_unmap(dir); cond_resched(); dir = shmem_dir_map(middir); @@ -598,7 +608,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) } if (offset) offset = 0; - else if (subdir && !page_private(subdir)) { + else if (subdir && upper_limit - idx >= ENTRIES_PER_PAGE) { dir[diroff] = NULL; nr_pages_to_free++; list_add(&subdir->lru, &pages_to_free); -- cgit v1.2.3 From 1ae7000630e3c05b6f7e3dfc76472f1bca6c1788 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 29 Mar 2007 01:20:36 -0700 Subject: [PATCH] holepunch: fix shmem_truncate_range punch locking Miklos Szeredi observes that during truncation of shmem page directories, info->lock is released to improve latency (after lowering i_size and next_index to exclude races); but this is quite wrong for holepunching, which receives no such protection from i_size or next_index, and is left vulnerable to races with shmem_unuse, shmem_getpage and shmem_writepage. Hold info->lock throughout when holepunching? No, any user could prevent rescheduling for far too long. Instead take info->lock just when needed: in shmem_free_swp when removing the swap entries, and whenever removing a directory page from the level above. But so long as we remove before scanning, we can safely skip taking the lock at the lower levels, except at misaligned start and end of the hole. Signed-off-by: Hugh Dickins Cc: Miklos Szeredi Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 23 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 1077b1d903d2..578eceafba4a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long /* * shmem_free_swp - free some swap entries in a directory * - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory + * @dir: pointer to the directory + * @edir: pointer after last entry of the directory + * @punch_lock: pointer to spinlock when needed for the holepunch case */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) +static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, + spinlock_t *punch_lock) { + spinlock_t *punch_unlock = NULL; swp_entry_t *ptr; int freed = 0; for (ptr = dir; ptr < edir; ptr++) { if (ptr->val) { + if (unlikely(punch_lock)) { + punch_unlock = punch_lock; + punch_lock = NULL; + spin_lock(punch_unlock); + if (!ptr->val) + continue; + } free_swap_and_cache(*ptr); *ptr = (swp_entry_t){0}; freed++; } } + if (punch_unlock) + spin_unlock(punch_unlock); return freed; } -static int shmem_map_and_free_swp(struct page *subdir, - int offset, int limit, struct page ***dir) +static int shmem_map_and_free_swp(struct page *subdir, int offset, + int limit, struct page ***dir, spinlock_t *punch_lock) { swp_entry_t *ptr; int freed = 0; @@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir, int size = limit - offset; if (size > LATENCY_LIMIT) size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size); + freed += shmem_free_swp(ptr+offset, ptr+offset+size, + punch_lock); if (need_resched()) { shmem_swp_unmap(ptr); if (*dir) { @@ -482,6 +495,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) int offset; int freed; int punch_hole; + spinlock_t *needs_lock; + spinlock_t *punch_lock; unsigned long upper_limit; inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -495,6 +510,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) limit = info->next_index; upper_limit = SHMEM_MAX_INDEX; info->next_index = idx; + needs_lock = NULL; punch_hole = 0; } else { if (end + 1 >= inode->i_size) { /* we may free a little more */ @@ -505,6 +521,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) limit = (end + 1) >> PAGE_CACHE_SHIFT; upper_limit = limit; } + needs_lock = &info->lock; punch_hole = 1; } @@ -521,7 +538,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) size = limit; if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); + nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); } /* @@ -531,6 +548,19 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) if (!topdir || limit <= SHMEM_NR_DIRECT) goto done2; + /* + * The truncation case has already dropped info->lock, and we're safe + * because i_size and next_index have already been lowered, preventing + * access beyond. But in the punch_hole case, we still need to take + * the lock when updating the swap directory, because there might be + * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or + * shmem_writepage. However, whenever we find we can remove a whole + * directory page (not at the misaligned start or end of the range), + * we first NULLify its pointer in the level above, and then have no + * need to take the lock when updating its contents: needs_lock and + * punch_lock (either pointing to info->lock or NULL) manage this. + */ + upper_limit -= SHMEM_NR_DIRECT; limit -= SHMEM_NR_DIRECT; idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; @@ -552,7 +582,13 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; if (!diroff && !offset && upper_limit >= stage) { - *dir = NULL; + if (needs_lock) { + spin_lock(needs_lock); + *dir = NULL; + spin_unlock(needs_lock); + needs_lock = NULL; + } else + *dir = NULL; nr_pages_to_free++; list_add(&middir->lru, &pages_to_free); } @@ -578,8 +614,16 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) } stage = idx + ENTRIES_PER_PAGEPAGE; middir = *dir; + if (punch_hole) + needs_lock = &info->lock; if (upper_limit >= stage) { - *dir = NULL; + if (needs_lock) { + spin_lock(needs_lock); + *dir = NULL; + spin_unlock(needs_lock); + needs_lock = NULL; + } else + *dir = NULL; nr_pages_to_free++; list_add(&middir->lru, &pages_to_free); } @@ -588,31 +632,37 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) dir = shmem_dir_map(middir); diroff = 0; } + punch_lock = needs_lock; subdir = dir[diroff]; - if (subdir && page_private(subdir)) { + if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { + if (needs_lock) { + spin_lock(needs_lock); + dir[diroff] = NULL; + spin_unlock(needs_lock); + punch_lock = NULL; + } else + dir[diroff] = NULL; + nr_pages_to_free++; + list_add(&subdir->lru, &pages_to_free); + } + if (subdir && page_private(subdir) /* has swap entries */) { size = limit - idx; if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; freed = shmem_map_and_free_swp(subdir, - offset, size, &dir); + offset, size, &dir, punch_lock); if (!dir) dir = shmem_dir_map(middir); nr_swaps_freed += freed; - if (offset) + if (offset || punch_lock) { spin_lock(&info->lock); - set_page_private(subdir, page_private(subdir) - freed); - if (offset) + set_page_private(subdir, + page_private(subdir) - freed); spin_unlock(&info->lock); - if (!punch_hole) - BUG_ON(page_private(subdir) > offset); - } - if (offset) - offset = 0; - else if (subdir && upper_limit - idx >= ENTRIES_PER_PAGE) { - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); + } else + BUG_ON(page_private(subdir) != freed); } + offset = 0; } done1: shmem_dir_unmap(dir); -- cgit v1.2.3 From 16a100190d39592d1d56ff5a0b978b20288c3427 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 29 Mar 2007 01:20:37 -0700 Subject: [PATCH] holepunch: fix disconnected pages after second truncate shmem_truncate_range has its own truncate_inode_pages_range, to free any pages racily instantiated while it was in progress: a SHMEM_PAGEIN flag is set when this might have happened. But holepunching gets no chance to clear that flag at the start of vmtruncate_range, so it's always set (unless a truncate came just before), so holepunch almost always does this second truncate_inode_pages_range. shmem holepunch has unlikely swap<->file races hereabouts whatever we do (without a fuller rework than is fit for this release): I was going to skip the second truncate in the punch_hole case, but Miklos points out that would make holepunch correctness more vulnerable to swapoff. So keep the second truncate, but follow it by an unmap_mapping_range to eliminate the disconnected pages (freed from pagecache while still mapped in userspace) that it might have left behind. Signed-off-by: Hugh Dickins Cc: Miklos Szeredi Cc: Badari Pulavarty Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 578eceafba4a..b2a35ebf071a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -674,8 +674,16 @@ done2: * generic_delete_inode did it, before we lowered next_index. * Also, though shmem_getpage checks i_size before adding to * cache, no recheck after: so fix the narrow window there too. + * + * Recalling truncate_inode_pages_range and unmap_mapping_range + * every time for punch_hole (which never got a chance to clear + * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, + * yet hardly ever necessary: try to optimize them out later. */ truncate_inode_pages_range(inode->i_mapping, start, end); + if (punch_hole) + unmap_mapping_range(inode->i_mapping, start, + end - start, 1); } spin_lock(&info->lock); -- cgit v1.2.3