From 193cf4b99113a4550598ba9e8343e591fc062e23 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 12 Jan 2010 16:18:08 +0200 Subject: libfs: Unexport and kill simple_prepare_write Remove the EXPORT_UNUSED_SYMBOL of simple_prepare_write Collapse simple_prepare_write into it's only caller, though making it simpler and clearer to understand. Signed-off-by: Boaz Harrosh Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index ebb1cd5bc241..2b124c825e38 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2340,8 +2340,6 @@ extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct extern int simple_sync_file(struct file *, struct dentry *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); -extern int simple_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); -- cgit v1.2.3 From 270ba5f7c5dac0bfb564aa35a536fb31ad4075bd Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 26 Jan 2010 14:12:43 +0000 Subject: fs: re-order super_block to remove 16 bytes of padding on 64bit builds re-order structure super_block to remove 16 bytes of alignment padding on 64bit builds. This shrinks the size of super_block from 712 to 696 bytes so requiring one fewer 64 byte cache lines. Signed-off-by: Richard Kennedy ----- patch against 2.6.33-rc5 compiled & tested on x86_64 AMDX2 desktop machine. I've been running with this patch applied for several weeks with no problems. regards Richard Signed-off-by: Al Viro --- include/linux/fs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b124c825e38..aa76dae673eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1314,9 +1314,9 @@ extern spinlock_t sb_lock; struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned long s_blocksize; - unsigned char s_blocksize_bits; unsigned char s_dirt; + unsigned char s_blocksize_bits; + unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; @@ -1357,16 +1357,16 @@ struct super_block { void *s_fs_info; /* Filesystem private info */ fmode_t s_mode; + /* Granularity of c/m/atime in ns. + Cannot be worse than a second */ + u32 s_time_gran; + /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. */ struct mutex s_vfs_rename_mutex; /* Kludge */ - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - /* * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" -- cgit v1.2.3 From 2ecdc82ef0b03e67ce5ecee79d0d108177a704df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Jan 2010 17:27:20 +0100 Subject: kill unused invalidate_inode_pages helper No one is calling this anymore as everyone has switched to invalidate_mapping_pages long time ago. Also update a few references to it in comments. nfs has two more, but I can't easily figure what they are actually referring to, so I left them as-is. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/usb/gadget/f_mass_storage.c | 2 +- drivers/usb/gadget/file_storage.c | 2 +- include/linux/fs.h | 6 ------ mm/filemap.c | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux/fs.h') diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c index a37640eba434..77fcd1b697e8 100644 --- a/drivers/usb/gadget/f_mass_storage.c +++ b/drivers/usb/gadget/f_mass_storage.c @@ -1041,7 +1041,7 @@ static void invalidate_sub(struct fsg_lun *curlun) unsigned long rc; rc = invalidate_mapping_pages(inode->i_mapping, 0, -1); - VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc); + VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc); } static int do_verify(struct fsg_common *common) diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 29dfb0277ffb..7dcdbda49cac 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -1448,7 +1448,7 @@ static void invalidate_sub(struct fsg_lun *curlun) unsigned long rc; rc = invalidate_mapping_pages(inode->i_mapping, 0, -1); - VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc); + VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc); } static int do_verify(struct fsg_dev *fsg) diff --git a/include/linux/fs.h b/include/linux/fs.h index aa76dae673eb..d443c9dd3caa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2058,12 +2058,6 @@ extern int invalidate_inodes(struct super_block *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); -static inline unsigned long __deprecated -invalidate_inode_pages(struct address_space *mapping) -{ - return invalidate_mapping_pages(mapping, 0, ~0UL); -} - static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || diff --git a/mm/filemap.c b/mm/filemap.c index 698ea80f2102..148b52a5bb7e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1117,7 +1117,7 @@ readpage: if (!PageUptodate(page)) { if (page->mapping == NULL) { /* - * invalidate_inode_pages got it + * invalidate_mapping_pages got it */ unlock_page(page); page_cache_release(page); -- cgit v1.2.3 From 2096f759abcb42200a81d776f597362fd9265024 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:16:21 -0500 Subject: New helper: path_is_under(path1, path2) Analog of is_subdir for vfsmount,dentry pairs, moved from audit_tree.c Signed-off-by: Al Viro --- fs/dcache.c | 24 ++++++++++++++++++++++++ include/linux/fs.h | 1 + kernel/audit_tree.c | 51 ++++++++++++--------------------------------------- 3 files changed, 37 insertions(+), 39 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/dcache.c b/fs/dcache.c index 4365998b8df4..74da947b160b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2191,6 +2191,30 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) return result; } +int path_is_under(struct path *path1, struct path *path2) +{ + struct vfsmount *mnt = path1->mnt; + struct dentry *dentry = path1->dentry; + int res; + spin_lock(&vfsmount_lock); + if (mnt != path2->mnt) { + for (;;) { + if (mnt->mnt_parent == mnt) { + spin_unlock(&vfsmount_lock); + return 0; + } + if (mnt->mnt_parent == path2->mnt) + break; + mnt = mnt->mnt_parent; + } + dentry = mnt->mnt_mountpoint; + } + res = is_subdir(dentry, path2->dentry); + spin_unlock(&vfsmount_lock); + return res; +} +EXPORT_SYMBOL(path_is_under); + void d_genocide(struct dentry *root) { struct dentry *this_parent = root; diff --git a/include/linux/fs.h b/include/linux/fs.h index d443c9dd3caa..8d53bc17f93f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2126,6 +2126,7 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); +extern int path_is_under(struct path *, struct path *); extern ino_t find_inode_number(struct dentry *, struct qstr *); #include diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 4b05bd9479db..f09b42d9c32d 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -603,22 +603,6 @@ skip_it: mutex_unlock(&audit_filter_mutex); } -static int is_under(struct vfsmount *mnt, struct dentry *dentry, - struct path *path) -{ - if (mnt != path->mnt) { - for (;;) { - if (mnt->mnt_parent == mnt) - return 0; - if (mnt->mnt_parent == path->mnt) - break; - mnt = mnt->mnt_parent; - } - dentry = mnt->mnt_mountpoint; - } - return is_subdir(dentry, path->dentry); -} - int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) { @@ -714,29 +698,24 @@ int audit_tag_tree(char *old, char *new) { struct list_head cursor, barrier; int failed = 0; - struct path path; + struct path path1, path2; struct vfsmount *tagged; struct list_head list; - struct vfsmount *mnt; - struct dentry *dentry; int err; - err = kern_path(new, 0, &path); + err = kern_path(new, 0, &path2); if (err) return err; - tagged = collect_mounts(&path); - path_put(&path); + tagged = collect_mounts(&path2); + path_put(&path2); if (!tagged) return -ENOMEM; - err = kern_path(old, 0, &path); + err = kern_path(old, 0, &path1); if (err) { drop_collected_mounts(tagged); return err; } - mnt = mntget(path.mnt); - dentry = dget(path.dentry); - path_put(&path); list_add_tail(&list, &tagged->mnt_list); @@ -747,6 +726,7 @@ int audit_tag_tree(char *old, char *new) while (cursor.next != &tree_list) { struct audit_tree *tree; struct vfsmount *p; + int good_one = 0; tree = container_of(cursor.next, struct audit_tree, list); get_tree(tree); @@ -754,23 +734,17 @@ int audit_tag_tree(char *old, char *new) list_add(&cursor, &tree->list); mutex_unlock(&audit_filter_mutex); - err = kern_path(tree->pathname, 0, &path); - if (err) { - put_tree(tree); - mutex_lock(&audit_filter_mutex); - continue; + err = kern_path(tree->pathname, 0, &path2); + if (!err) { + good_one = path_is_under(&path1, &path2); + path_put(&path2); } - spin_lock(&vfsmount_lock); - if (!is_under(mnt, dentry, &path)) { - spin_unlock(&vfsmount_lock); - path_put(&path); + if (!good_one) { put_tree(tree); mutex_lock(&audit_filter_mutex); continue; } - spin_unlock(&vfsmount_lock); - path_put(&path); list_for_each_entry(p, &list, mnt_list) { failed = tag_chunk(p->mnt_root->d_inode, tree); @@ -820,8 +794,7 @@ int audit_tag_tree(char *old, char *new) list_del(&cursor); list_del(&list); mutex_unlock(&audit_filter_mutex); - dput(dentry); - mntput(mnt); + path_put(&path1); drop_collected_mounts(tagged); return failed; } -- cgit v1.2.3 From 1f707137b55764740981d022d29c622832a61880 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 22:51:25 -0500 Subject: new helper: iterate_mounts() apply function to vfsmounts in set returned by collect_mounts(), stop if it returns non-zero. Signed-off-by: Al Viro --- fs/namespace.c | 15 +++++++++++++++ include/linux/fs.h | 3 ++- kernel/audit_tree.c | 49 ++++++++++++++++--------------------------------- 3 files changed, 33 insertions(+), 34 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namespace.c b/fs/namespace.c index d25d4602ab50..d5906c19e08e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1246,6 +1246,21 @@ void drop_collected_mounts(struct vfsmount *mnt) release_mounts(&umount_list); } +int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, + struct vfsmount *root) +{ + struct vfsmount *mnt; + int res = f(root, arg); + if (res) + return res; + list_for_each_entry(mnt, &root->mnt_list, mnt_list) { + res = f(mnt, arg); + if (res) + return res; + } + return 0; +} + static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) { struct vfsmount *p; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8d53bc17f93f..e764f247d0ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1794,7 +1794,8 @@ extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); - +extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, + struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); extern int current_umask(void); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f09b42d9c32d..028e85663f27 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -548,6 +548,11 @@ int audit_remove_tree_rule(struct audit_krule *rule) return 0; } +static int compare_root(struct vfsmount *mnt, void *arg) +{ + return mnt->mnt_root->d_inode == arg; +} + void audit_trim_trees(void) { struct list_head cursor; @@ -559,7 +564,6 @@ void audit_trim_trees(void) struct path path; struct vfsmount *root_mnt; struct node *node; - struct list_head list; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -577,24 +581,16 @@ void audit_trim_trees(void) if (!root_mnt) goto skip_it; - list_add_tail(&list, &root_mnt->mnt_list); spin_lock(&hash_lock); list_for_each_entry(node, &tree->chunks, list) { - struct audit_chunk *chunk = find_chunk(node); - struct inode *inode = chunk->watch.inode; - struct vfsmount *mnt; + struct inode *inode = find_chunk(node)->watch.inode; node->index |= 1U<<31; - list_for_each_entry(mnt, &list, mnt_list) { - if (mnt->mnt_root->d_inode == inode) { - node->index &= ~(1U<<31); - break; - } - } + if (iterate_mounts(compare_root, inode, root_mnt)) + node->index &= ~(1U<<31); } spin_unlock(&hash_lock); trim_marked(tree); put_tree(tree); - list_del_init(&list); drop_collected_mounts(root_mnt); skip_it: mutex_lock(&audit_filter_mutex); @@ -622,13 +618,17 @@ void audit_put_tree(struct audit_tree *tree) put_tree(tree); } +static int tag_mount(struct vfsmount *mnt, void *arg) +{ + return tag_chunk(mnt->mnt_root->d_inode, arg); +} + /* called with audit_filter_mutex */ int audit_add_tree_rule(struct audit_krule *rule) { struct audit_tree *seed = rule->tree, *tree; struct path path; - struct vfsmount *mnt, *p; - struct list_head list; + struct vfsmount *mnt; int err; list_for_each_entry(tree, &tree_list, list) { @@ -654,16 +654,9 @@ int audit_add_tree_rule(struct audit_krule *rule) err = -ENOMEM; goto Err; } - list_add_tail(&list, &mnt->mnt_list); get_tree(tree); - list_for_each_entry(p, &list, mnt_list) { - err = tag_chunk(p->mnt_root->d_inode, tree); - if (err) - break; - } - - list_del(&list); + err = iterate_mounts(tag_mount, tree, mnt); drop_collected_mounts(mnt); if (!err) { @@ -700,7 +693,6 @@ int audit_tag_tree(char *old, char *new) int failed = 0; struct path path1, path2; struct vfsmount *tagged; - struct list_head list; int err; err = kern_path(new, 0, &path2); @@ -717,15 +709,12 @@ int audit_tag_tree(char *old, char *new) return err; } - list_add_tail(&list, &tagged->mnt_list); - mutex_lock(&audit_filter_mutex); list_add(&barrier, &tree_list); list_add(&cursor, &barrier); while (cursor.next != &tree_list) { struct audit_tree *tree; - struct vfsmount *p; int good_one = 0; tree = container_of(cursor.next, struct audit_tree, list); @@ -746,12 +735,7 @@ int audit_tag_tree(char *old, char *new) continue; } - list_for_each_entry(p, &list, mnt_list) { - failed = tag_chunk(p->mnt_root->d_inode, tree); - if (failed) - break; - } - + failed = iterate_mounts(tag_mount, tree, tagged); if (failed) { put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -792,7 +776,6 @@ int audit_tag_tree(char *old, char *new) } list_del(&barrier); list_del(&cursor); - list_del(&list); mutex_unlock(&audit_filter_mutex); path_put(&path1); drop_collected_mounts(tagged); -- cgit v1.2.3 From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 10 Feb 2010 12:15:53 +0100 Subject: vfs: add NOFOLLOW flag to umount(2) Add a new UMOUNT_NOFOLLOW flag to umount(2). This is needed to prevent symlink attacks in unprivileged unmounts (fuse, samba, ncpfs). Additionally, return -EINVAL if an unknown flag is used (and specify an explicitly unused flag: UMOUNT_UNUSED). This makes it possible for the caller to determine if a flag is supported or not. CC: Eugene Teo CC: Michael Kerrisk Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namespace.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/fs/namespace.c b/fs/namespace.c index ffa3843404e0..8174c8ab5c70 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1136,8 +1136,15 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) { struct path path; int retval; + int lookup_flags = 0; - retval = user_path(name, &path); + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) + return -EINVAL; + + if (!(flags & UMOUNT_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + + retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); if (retval) goto out; retval = -EINVAL; diff --git a/include/linux/fs.h b/include/linux/fs.h index e764f247d0ab..5b3182c7eb5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1305,6 +1305,8 @@ extern int send_sigurg(struct fown_struct *fown); #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ +#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ +#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ extern struct list_head super_blocks; extern spinlock_t sb_lock; -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/adfs.h | 2 +- fs/adfs/inode.c | 5 +++-- fs/affs/affs.h | 3 ++- fs/affs/inode.c | 2 +- fs/bfs/inode.c | 5 +++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 4 ++-- fs/exofs/exofs.h | 2 +- fs/exofs/inode.c | 4 ++-- fs/ext2/ext2.h | 2 +- fs/ext2/inode.c | 11 +++++++++-- fs/ext3/inode.c | 4 ++-- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 6 +++--- fs/fat/inode.c | 9 +++++++-- fs/fs-writeback.c | 11 +++++------ fs/gfs2/super.c | 5 +++-- fs/hfs/hfs_fs.h | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/super.c | 3 ++- fs/jfs/inode.c | 5 ++++- fs/jfs/jfs_inode.h | 2 +- fs/minix/inode.c | 8 +++++--- fs/nfs/inode.c | 5 +++-- fs/nfs/internal.h | 2 +- fs/ntfs/dir.c | 2 +- fs/ntfs/file.c | 2 +- fs/ntfs/inode.c | 2 +- fs/ntfs/inode.h | 4 ++-- fs/ntfs/super.c | 8 ++++++++ fs/omfs/inode.c | 10 ++++++++-- fs/reiserfs/inode.c | 4 ++-- fs/sysv/inode.c | 10 ++++++++-- fs/sysv/sysv.h | 2 +- fs/ubifs/dir.c | 2 +- fs/ubifs/file.c | 8 ++++---- fs/ubifs/super.c | 2 +- fs/udf/inode.c | 4 ++-- fs/udf/udfdecl.h | 2 +- fs/ufs/inode.c | 5 +++-- fs/ufs/ufs.h | 2 +- fs/xfs/linux-2.6/xfs_super.c | 4 ++-- include/linux/ext3_fs.h | 2 +- include/linux/fs.h | 2 +- include/linux/reiserfs_fs.h | 2 +- 45 files changed, 115 insertions(+), 72 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 9cc18775b832..2ff622f6f547 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -121,7 +121,7 @@ struct adfs_discmap { /* Inode stuff */ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); -int adfs_write_inode(struct inode *inode,int unused); +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc); int adfs_notify_change(struct dentry *dentry, struct iattr *attr); /* map.c */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 3f57ce4bee5d..0f5e30978135 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -9,6 +9,7 @@ */ #include #include +#include #include "adfs.h" /* @@ -360,7 +361,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -int adfs_write_inode(struct inode *inode, int wait) +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct object_info obj; @@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait) obj.attr = ADFS_I(inode)->attr; obj.size = inode->i_size; - ret = adfs_dir_update(sb, &obj, wait); + ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0e40caaba456..861dae68ac12 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -175,7 +175,8 @@ extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, unsigned long ino); -extern int affs_write_inode(struct inode *inode, int); +extern int affs_write_inode(struct inode *inode, + struct writeback_control *wbc); extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); /* file.c */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 3c4ec7d864c4..c9744d771d98 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -166,7 +166,7 @@ bad_inode: } int -affs_write_inode(struct inode *inode, int unused) +affs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8f3d9fd89604..f22a7d3dc362 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "bfs.h" @@ -98,7 +99,7 @@ error: return ERR_PTR(-EIO); } -static int bfs_write_inode(struct inode *inode, int wait) +static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; @@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) err = -EIO; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2aa8ec6a0981..8b5cfdd4bfc1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); -int btrfs_write_inode(struct inode *inode, int wait); +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4deb280f8969..c41db6d45ab6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3968,7 +3968,7 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, int wait) +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait) if (root->fs_info->btree_inode == inode) return 0; - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 59b8bf2825c7..8442e353309f 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -261,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); extern struct inode *exofs_iget(struct super_block *, unsigned long); struct inode *exofs_new_inode(struct inode *, int); -extern int exofs_write_inode(struct inode *, int); +extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); extern void exofs_delete_inode(struct inode *); /* dir.c: */ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 5514f3c2c2f4..a17e4b733e35 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1280,9 +1280,9 @@ out: return ret; } -int exofs_write_inode(struct inode *inode, int wait) +int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) { - return exofs_update_inode(inode, wait); + return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } /* diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 061914add3cf..0b038e47ad2f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ extern struct inode *ext2_iget (struct super_block *, unsigned long); -extern int ext2_write_inode (struct inode *, int); +extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 71b032c65a02..36ae1cac767c 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); +static int __ext2_write_inode(struct inode *inode, int do_sync); + /* * Test whether an inode is a fast symlink. */ @@ -64,7 +66,7 @@ void ext2_delete_inode (struct inode * inode) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); - ext2_write_inode(inode, inode_needs_sync(inode)); + __ext2_write_inode(inode, inode_needs_sync(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -1335,7 +1337,7 @@ bad_inode: return ERR_PTR(ret); } -int ext2_write_inode(struct inode *inode, int do_sync) +static int __ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; @@ -1440,6 +1442,11 @@ int ext2_write_inode(struct inode *inode, int do_sync) return err; } +int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int ext2_sync_inode(struct inode *inode) { struct writeback_control wbc = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 455e6e6e5cb9..7aca55fcc976 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3096,7 +3096,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext3_write_inode(struct inode *inode, int wait) +int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) { if (current->flags & PF_MEMALLOC) return 0; @@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; return ext3_force_commit(inode->i_sb); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4cedc91ec59d..50af1a2c65e7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1416,7 +1416,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode(struct inode *, int); +extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..d01a6cdbf854 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5177,7 +5177,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext4_write_inode(struct inode *inode, int wait) +int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; @@ -5191,7 +5191,7 @@ int ext4_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; err = ext4_force_commit(inode->i_sb); @@ -5201,7 +5201,7 @@ int ext4_write_inode(struct inode *inode, int wait) err = ext4_get_inode_loc(inode, &iloc); if (err) return err; - if (wait) + if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { ext4_error(inode->i_sb, __func__, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 14da530b05ca..fbeecdc194dc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, return i_pos; } -static int fat_write_inode(struct inode *inode, int wait) +static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -634,9 +634,14 @@ retry: return err; } +static int fat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int fat_sync_inode(struct inode *inode) { - return fat_write_inode(inode, 1); + return __fat_write_inode(inode, 1); } EXPORT_SYMBOL_GPL(fat_sync_inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5f2721b1e4be..76fc4d594acb 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } -static int write_inode(struct inode *inode, int sync) +static int write_inode(struct inode *inode, struct writeback_control *wbc) { if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, sync); + return inode->i_sb->s_op->write_inode(inode, wbc); return 0; } @@ -421,7 +421,6 @@ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; - int wait = wbc->sync_mode == WB_SYNC_ALL; unsigned dirty; int ret; @@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * We'll have another go at writing back this inode when we * completed a full scan of b_io. */ - if (!wait) { + if (wbc->sync_mode != WB_SYNC_ALL) { requeue_io(inode); return 0; } @@ -466,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * This is important for filesystems that modify metadata on data * I/O completion. */ - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; @@ -474,7 +473,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + int err = write_inode(inode, wbc); if (ret == 0) ret = err; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5e22629da67..ca87598ead7f 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) * Returns: errno */ -static int gfs2_write_inode(struct inode *inode, int sync) +static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) do_unlock: gfs2_glock_dq_uninit(&gh); do_flush: - if (sync != 0) + if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); return ret; } diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 052387e11671..fe35e3b626c4 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops; extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); -extern int hfs_write_inode(struct inode *, int); +extern int hfs_write_inode(struct inode *, struct writeback_control *); extern int hfs_inode_setattr(struct dentry *, struct iattr *); extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 log_size, __be32 phys_size, u32 clump_size); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index a1cbff2b4d99..14f5cb1b9fdc 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, HFS_SB(inode->i_sb)->alloc_blksz); } -int hfs_write_inode(struct inode *inode, int unused) +int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct inode *main_inode = inode; struct hfs_find_data fd; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 43022f3d5148..74b473a8ef92 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -87,7 +87,8 @@ bad_inode: return ERR_PTR(err); } -static int hfsplus_write_inode(struct inode *inode, int unused) +static int hfsplus_write_inode(struct inode *inode, + struct writeback_control *wbc) { struct hfsplus_vh *vhdr; int ret = 0; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index b2ae190a77ba..182b78cc3e62 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait) return rc; } -int jfs_write_inode(struct inode *inode, int wait) +int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { + int wait = wbc->sync_mode == WB_SYNC_ALL; + if (test_cflag(COMMIT_Nolink, inode)) return 0; /* diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 1eff7db34d63..15902b03c2a7 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); -extern int jfs_write_inode(struct inode*, int); +extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_delete_inode(struct inode *); extern void jfs_dirty_inode(struct inode *); extern void jfs_truncate(struct inode *); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 74ea82d72164..756f8c93780c 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -17,8 +17,10 @@ #include #include #include +#include -static int minix_write_inode(struct inode * inode, int wait); +static int minix_write_inode(struct inode *inode, + struct writeback_control *wbc); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -static int minix_write_inode(struct inode *inode, int wait) +static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct buffer_head *bh; @@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait) bh = V2_minix_update_inode(inode); if (!bh) return -EIO; - if (wait && buffer_dirty(bh)) { + if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing minix inode [%s:%08lx]\n", diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ecd952cae1d..7f9ecc46f3fb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e464d23b32..11f82f03c5de 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); -extern int nfs_write_inode(struct inode *,int); +extern int nfs_write_inode(struct inode *, struct writeback_control *); extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 5a9e34475e37..9173e82a45d1 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, write_inode_now(bmp_vi, !datasync); iput(bmp_vi); } - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 43179ddd336f..b681c71d7069 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); /* * NOTE: If we were to use mapping->private_list (see ext2 and diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc2505abb6d7..4b57fb1eac2a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2957,7 +2957,7 @@ out: * * Return 0 on success and -errno on error. */ -int ntfs_write_inode(struct inode *vi, int sync) +int __ntfs_write_inode(struct inode *vi, int sync) { sle64 nt; ntfs_inode *ni = NTFS_I(vi); diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 117eaf8032a3..9a113544605d 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi); extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); -extern int ntfs_write_inode(struct inode *vi, int sync); +extern int __ntfs_write_inode(struct inode *vi, int sync); static inline void ntfs_commit_inode(struct inode *vi) { if (!is_bad_inode(vi)) - ntfs_write_inode(vi, 1); + __ntfs_write_inode(vi, 1); return; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 80b04770e8e9..1cf39dfaee7a 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -39,6 +39,7 @@ #include "dir.h" #include "debug.h" #include "index.h" +#include "inode.h" #include "aops.h" #include "layout.h" #include "malloc.h" @@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) return 0; } +#ifdef NTFS_RW +static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc) +{ + return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL); +} +#endif + /** * The complete super operations. */ diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index f3b7c1541f3a..75d9b5ba1d45 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "omfs.h" @@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi) oi->i_head.h_check_xor = xor; } -static int omfs_write_inode(struct inode *inode, int wait) +static int __omfs_write_inode(struct inode *inode, int wait) { struct omfs_inode *oi; struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); @@ -162,9 +163,14 @@ out: return ret; } +static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int omfs_sync_inode(struct inode *inode) { - return omfs_write_inode(inode, 1); + return __omfs_write_inode(inode, 1); } /* diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2df0f5c7c60b..0d651f980a8d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1615,7 +1615,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, ** to properly mark inodes for datasync and such, but only actually ** does something when called for a synchronous update. */ -int reiserfs_write_inode(struct inode *inode, int do_sync) +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct reiserfs_transaction_handle th; int jbegin_count = 1; @@ -1627,7 +1627,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync) ** inode needs to reach disk for safety, and they can safely be ** ignored because the altered inode has already been logged. */ - if (do_sync && !(current->flags & PF_MEMALLOC)) { + if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { reiserfs_write_lock(inode->i_sb); if (!journal_begin(&th, inode->i_sb, jbegin_count)) { reiserfs_update_sd(&th, inode); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 9824743832a7..4573734d723d 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include "sysv.h" @@ -246,7 +247,7 @@ bad_inode: return ERR_PTR(-EIO); } -int sysv_write_inode(struct inode *inode, int wait) +static int __sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); @@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait) return 0; } +int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int sysv_sync_inode(struct inode *inode) { - return sysv_write_inode(inode, 1); + return __sysv_write_inode(inode, 1); } static void sysv_delete_inode(struct inode *inode) diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 53786eb5cf60..94cb9b4d76c2 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, /* inode.c */ extern struct inode *sysv_iget(struct super_block *, unsigned int); -extern int sysv_write_inode(struct inode *, int); +extern int sysv_write_inode(struct inode *, struct writeback_control *wbc); extern int sysv_sync_inode(struct inode *); extern void sysv_set_inode(struct inode *, dev_t); extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 552fb0111fff..401e503d44a1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); return err; out_cancel: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 16a6444330ec..e26c02ab6cd5 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) /* Is the page fully inside @i_size? */ if (page->index < end_index) { if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; /* @@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) kunmap_atomic(kaddr, KM_USER0); if (i_size > synced_i_size) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; } @@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (release) ubifs_release_budget(c, &req); if (IS_SYNC(inode)) - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); return err; out: @@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) * the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 43f9d19a6f33..4d2f2157dd3f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode) /* * Note, Linux write-back code calls this without 'i_mutex'. */ -static int ubifs_write_inode(struct inode *inode, int wait) +static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 378a7592257c..b02089247296 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1373,12 +1373,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) return mode; } -int udf_write_inode(struct inode *inode, int sync) +int udf_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = udf_update_inode(inode, sync); + ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8d46f4294ee7..4223ac855da9 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *); extern void udf_read_inode(struct inode *); extern void udf_delete_inode(struct inode *); extern void udf_clear_inode(struct inode *); -extern int udf_write_inode(struct inode *, int); +extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); extern int udf_extend_file(struct inode *, struct extent_position *, struct kernel_long_ad *, sector_t); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7cf33379fd46..0a627e08610b 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ufs_fs.h" #include "ufs.h" @@ -890,11 +891,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync) return 0; } -int ufs_write_inode (struct inode * inode, int wait) +int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = ufs_update_inode (inode, wait); + ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 01d0e2a3b230..43f9f5d5670e 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern int ufs_write_inode (struct inode *, int); +extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8f117db6070e..71345a370d9f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1063,7 +1063,7 @@ xfs_log_inode( STATIC int xfs_fs_write_inode( struct inode *inode, - int sync) + struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1074,7 +1074,7 @@ xfs_fs_write_inode( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (sync) { + if (wbc->sync_mode == WB_SYNC_ALL) { /* * Make sure the inode has hit stable storage. By using the * log and the fsync transactions we reduce the IOs we have diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 6b049030fbe6..deac2566450e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -877,7 +877,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, int create); extern struct inode *ext3_iget(struct super_block *, unsigned long); -extern int ext3_write_inode (struct inode *, int); +extern int ext3_write_inode (struct inode *, struct writeback_control *); extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_delete_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b3182c7eb5f..45689621a851 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1557,7 +1557,7 @@ struct super_operations { void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); - int (*write_inode) (struct inode *, int); + int (*write_inode) (struct inode *, struct writeback_control *wbc); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 1ba3cf6edfbb..3b603f474186 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2034,7 +2034,7 @@ void reiserfs_read_locked_inode(struct inode *inode, int reiserfs_find_actor(struct inode *inode, void *p); int reiserfs_init_locked_inode(struct inode *inode, void *p); void reiserfs_delete_inode(struct inode *inode); -int reiserfs_write_inode(struct inode *inode, int); +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, -- cgit v1.2.3 From 0141450f66c3c12a3aaa869748caa64241885cdf Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 5 Mar 2010 13:42:03 -0800 Subject: readahead: introduce FMODE_RANDOM for POSIX_FADV_RANDOM This fixes inefficient page-by-page reads on POSIX_FADV_RANDOM. POSIX_FADV_RANDOM used to set ra_pages=0, which leads to poor performance: a 16K read will be carried out in 4 _sync_ 1-page reads. In other places, ra_pages==0 means - it's ramfs/tmpfs/hugetlbfs/sysfs/configfs - some IO error happened where multi-page read IO won't help or should be avoided. POSIX_FADV_RANDOM actually want a different semantics: to disable the *heuristic* readahead algorithm, and to use a dumb one which faithfully submit read IO for whatever application requests. So introduce a flag FMODE_RANDOM for POSIX_FADV_RANDOM. Note that the random hint is not likely to help random reads performance noticeably. And it may be too permissive on huge request size (its IO size is not limited by read_ahead_kb). In Quentin's report (http://lkml.org/lkml/2009/12/24/145), the overall (NFS read) performance of the application increased by 313%! Tested-by: Quentin Barnes Signed-off-by: Wu Fengguang Cc: Nick Piggin Cc: Andi Kleen Cc: Steven Whitehouse Cc: David Howells Cc: Jonathan Corbet Cc: Al Viro Cc: Christoph Hellwig Cc: Trond Myklebust Cc: Chuck Lever Cc: [2.6.33.x] Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 +++ mm/fadvise.c | 10 +++++++++- mm/readahead.c | 6 ++++++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 45689621a851..be87edcaba06 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,6 +87,9 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) +/* Expect random access pattern */ +#define FMODE_RANDOM ((__force fmode_t)4096) + /* * The below are the various read and write types that we support. Some of * them include behavioral modifiers that send information down to the diff --git a/mm/fadvise.c b/mm/fadvise.c index e43359214f6f..8d723c9e8b75 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -77,12 +77,20 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) switch (advice) { case POSIX_FADV_NORMAL: file->f_ra.ra_pages = bdi->ra_pages; + spin_lock(&file->f_lock); + file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&file->f_lock); break; case POSIX_FADV_RANDOM: - file->f_ra.ra_pages = 0; + spin_lock(&file->f_lock); + file->f_mode |= FMODE_RANDOM; + spin_unlock(&file->f_lock); break; case POSIX_FADV_SEQUENTIAL: file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&file->f_lock); + file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&file->f_lock); break; case POSIX_FADV_WILLNEED: if (!mapping->a_ops->readpage) { diff --git a/mm/readahead.c b/mm/readahead.c index 033bc135a41f..337b20e946f6 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -501,6 +501,12 @@ void page_cache_sync_readahead(struct address_space *mapping, if (!ra->ra_pages) return; + /* be dumb */ + if (filp->f_mode & FMODE_RANDOM) { + force_page_cache_readahead(mapping, filp, offset, req_size); + return; + } + /* do read-ahead */ ondemand_readahead(mapping, ra, filp, false, offset, req_size); } -- cgit v1.2.3 From 19adf9c5d5793657118f2002237c0ee49c3b6185 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 5 Mar 2010 13:42:03 -0800 Subject: include/linux/fs.h: convert FMODE_* constants to hex It was tolerable until Eric went and added 8388608. Cc: Eric Paris Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index be87edcaba06..10b8dedcd18b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -60,24 +60,24 @@ struct inodes_stat_t { */ /* file is open for reading */ -#define FMODE_READ ((__force fmode_t)1) +#define FMODE_READ ((__force fmode_t)0x1) /* file is open for writing */ -#define FMODE_WRITE ((__force fmode_t)2) +#define FMODE_WRITE ((__force fmode_t)0x2) /* file is seekable */ -#define FMODE_LSEEK ((__force fmode_t)4) +#define FMODE_LSEEK ((__force fmode_t)0x4) /* file can be accessed using pread */ -#define FMODE_PREAD ((__force fmode_t)8) +#define FMODE_PREAD ((__force fmode_t)0x8) /* file can be accessed using pwrite */ -#define FMODE_PWRITE ((__force fmode_t)16) +#define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ -#define FMODE_EXEC ((__force fmode_t)32) +#define FMODE_EXEC ((__force fmode_t)0x20) /* File is opened with O_NDELAY (only set for block devices) */ -#define FMODE_NDELAY ((__force fmode_t)64) +#define FMODE_NDELAY ((__force fmode_t)0x40) /* File is opened with O_EXCL (only set for block devices) */ -#define FMODE_EXCL ((__force fmode_t)128) +#define FMODE_EXCL ((__force fmode_t)0x80) /* File is opened using open(.., 3, ..) and is writeable only for ioctls (specialy hack for floppy.c) */ -#define FMODE_WRITE_IOCTL ((__force fmode_t)256) +#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) /* * Don't update ctime and mtime. @@ -85,10 +85,10 @@ struct inodes_stat_t { * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ -#define FMODE_NOCMTIME ((__force fmode_t)2048) +#define FMODE_NOCMTIME ((__force fmode_t)0x800) /* Expect random access pattern */ -#define FMODE_RANDOM ((__force fmode_t)4096) +#define FMODE_RANDOM ((__force fmode_t)0x1000) /* * The below are the various read and write types that we support. Some of -- cgit v1.2.3