From a30427d92d0bc152b833088e4a305bbeb1a0c162 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 18 May 2008 15:39:11 -0600 Subject: Add a comment in chrdev_open() I stared at this code for a while and almost deleted it before understanding crept into my slow brain. Hopefully this makes life easier for the next person to happen on it. Signed-off-by: Jonathan Corbet --- fs/char_dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index 68e510b88457..a54d69369b2f 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -373,6 +373,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) return -ENXIO; new = container_of(kobj, struct cdev, kobj); spin_lock(&cdev_lock); + /* Check i_cdev again in case somebody beat us to it while + we dropped the lock. */ p = inode->i_cdev; if (!p) { inode->i_cdev = p = new; -- cgit v1.2.3 From 9514dff918b947ae43b66517dc90d0e05548bd6a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 18 May 2008 15:40:00 -0600 Subject: Remove the lock_kernel() call from chrdev_open() All in-kernel char device open() functions now either have their own lock_kernel() calls or clearly do not need one. Signed-off-by: Jonathan Corbet --- fs/char_dev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index a54d69369b2f..3cb7cda3d780 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -394,11 +394,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) cdev_put(p); return -ENXIO; } - if (filp->f_op->open) { - lock_kernel(); + if (filp->f_op->open) ret = filp->f_op->open(inode,filp); - unlock_kernel(); - } if (ret) cdev_put(p); return ret; -- cgit v1.2.3 From 8f5934278d1d86590244c2791b28f77d67466007 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 May 2008 19:53:01 -0700 Subject: Replace BKL with superblock lock in fat/msdos/vfat This replaces the use of the BKL in the FAT family of filesystems with the existing superblock lock instead. The code already appears to do mostly proper locking with its own private spinlocks (and mutexes), but while the BKL could possibly have been dropped entirely, converting it to use the superblock lock (which is just a regular mutex) is the conservative thing to do. As a per-filesystem mutex, it not only won't have any of the possible latency issues related to the BKL, but the lock is obviously private to the particular filesystem instance and will thus not cause problems for entirely unrelated users like the BKL can. Signed-off-by: Linus Torvalds Cc: OGAWA Hirofumi Signed-off-by: Jonathan Corbet --- fs/fat/cache.c | 2 +- fs/fat/dir.c | 4 ++-- fs/fat/file.c | 12 +++++++----- fs/fat/inode.c | 26 ++++++++++++++++---------- fs/msdos/namei.c | 35 +++++++++++++++++++---------------- fs/vfat/namei.c | 35 +++++++++++++++++++---------------- 6 files changed, 64 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/fat/cache.c b/fs/fat/cache.c index fda25479af26..3a9ecac8d61f 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -61,7 +61,7 @@ void fat_cache_destroy(void) static inline struct fat_cache *fat_cache_alloc(struct inode *inode) { - return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); + return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS); } static inline void fat_cache_free(struct fat_cache *cache) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 486725ee99ae..34541d06e626 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -472,7 +472,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, loff_t cpos; int ret = 0; - lock_kernel(); + lock_super(sb); cpos = filp->f_pos; /* Fake . and .. for the root directory. */ @@ -654,7 +654,7 @@ FillFailed: if (unicode) __putname(unicode); out: - unlock_kernel(); + unlock_super(sb); return ret; } diff --git a/fs/fat/file.c b/fs/fat/file.c index 27cc1164ec36..7059928fb351 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -229,7 +229,8 @@ static int fat_free(struct inode *inode, int skip) void fat_truncate(struct inode *inode) { - struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); const unsigned int cluster_size = sbi->cluster_size; int nr_clusters; @@ -242,9 +243,9 @@ void fat_truncate(struct inode *inode) nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; - lock_kernel(); + lock_super(sb); fat_free(inode, nr_clusters); - unlock_kernel(); + unlock_super(sb); fat_flush_inodes(inode->i_sb, inode, NULL); } @@ -297,12 +298,13 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) int fat_setattr(struct dentry *dentry, struct iattr *attr) { + struct super_block *sb = dentry->d_sb; struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; int mask, error = 0; unsigned int ia_valid; - lock_kernel(); + lock_super(sb); /* * Expand the file. Since inode_setattr() updates ->i_size @@ -356,7 +358,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) mask = sbi->options.fs_fmask; inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); out: - unlock_kernel(); + unlock_super(sb); return error; } EXPORT_SYMBOL_GPL(fat_setattr); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4e0a3dd9d677..46a4508ffd2e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -440,14 +440,13 @@ static void fat_delete_inode(struct inode *inode) static void fat_clear_inode(struct inode *inode) { - struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); - lock_kernel(); spin_lock(&sbi->inode_hash_lock); fat_cache_inval_inode(inode); hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); - unlock_kernel(); } static void fat_write_super(struct super_block *sb) @@ -485,7 +484,7 @@ static struct kmem_cache *fat_inode_cachep; static struct inode *fat_alloc_inode(struct super_block *sb) { struct msdos_inode_info *ei; - ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS); if (!ei) return NULL; return &ei->vfs_inode; @@ -567,7 +566,7 @@ retry: if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) return 0; - lock_kernel(); + lock_super(sb); bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); if (!bh) { printk(KERN_ERR "FAT: unable to read inode block " @@ -579,7 +578,7 @@ retry: if (i_pos != MSDOS_I(inode)->i_pos) { spin_unlock(&sbi->inode_hash_lock); brelse(bh); - unlock_kernel(); + unlock_super(sb); goto retry; } @@ -606,7 +605,7 @@ retry: err = sync_dirty_buffer(bh); brelse(bh); out: - unlock_kernel(); + unlock_super(sb); return err; } @@ -736,6 +735,7 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) static struct dentry *fat_get_parent(struct dentry *child) { + struct super_block *sb = child->d_sb; struct buffer_head *bh; struct msdos_dir_entry *de; loff_t i_pos; @@ -743,14 +743,14 @@ static struct dentry *fat_get_parent(struct dentry *child) struct inode *inode; int err; - lock_kernel(); + lock_super(sb); err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); if (err) { parent = ERR_PTR(err); goto out; } - inode = fat_build_inode(child->d_sb, de, i_pos); + inode = fat_build_inode(sb, de, i_pos); brelse(bh); if (IS_ERR(inode)) { parent = ERR_CAST(inode); @@ -762,7 +762,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = ERR_PTR(-ENOMEM); } out: - unlock_kernel(); + unlock_super(sb); return parent; } @@ -1172,6 +1172,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, long error; char buf[50]; + /* + * GFP_KERNEL is ok here, because while we do hold the + * supeblock lock, memory pressure can't call back into + * the filesystem, since we're only just about to mount + * it and have no inodes etc active! + */ sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 05ff4f1d7026..1f7f2956412a 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -214,7 +214,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &msdos_dentry_operations; - lock_kernel(); + lock_super(sb); res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); if (res == -ENOENT) goto add; @@ -232,7 +232,7 @@ add: if (dentry) dentry->d_op = &msdos_dentry_operations; out: - unlock_kernel(); + unlock_super(sb); if (!res) return dentry; return ERR_PTR(res); @@ -286,7 +286,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, unsigned char msdos_name[MSDOS_NAME]; int err, is_hid; - lock_kernel(); + lock_super(sb); err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options); @@ -315,7 +315,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, d_instantiate(dentry, inode); out: - unlock_kernel(); + unlock_super(sb); if (!err) err = fat_flush_inodes(sb, dir, inode); return err; @@ -324,11 +324,12 @@ out: /***** Remove a directory */ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) { + struct super_block *sb = dir->i_sb; struct inode *inode = dentry->d_inode; struct fat_slot_info sinfo; int err; - lock_kernel(); + lock_super(sb); /* * Check whether the directory is not in use, then check * whether it is empty. @@ -349,9 +350,9 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ctime = CURRENT_TIME_SEC; fat_detach(inode); out: - unlock_kernel(); + unlock_super(sb); if (!err) - err = fat_flush_inodes(inode->i_sb, dir, inode); + err = fat_flush_inodes(sb, dir, inode); return err; } @@ -366,7 +367,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct timespec ts; int err, is_hid, cluster; - lock_kernel(); + lock_super(sb); err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options); @@ -404,14 +405,14 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); - unlock_kernel(); + unlock_super(sb); fat_flush_inodes(sb, dir, inode); return 0; out_free: fat_free_clusters(dir, cluster); out: - unlock_kernel(); + unlock_super(sb); return err; } @@ -419,10 +420,11 @@ out: static int msdos_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + struct super_block *sb= inode->i_sb; struct fat_slot_info sinfo; int err; - lock_kernel(); + lock_super(sb); err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); if (err) goto out; @@ -434,9 +436,9 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) inode->i_ctime = CURRENT_TIME_SEC; fat_detach(inode); out: - unlock_kernel(); + unlock_super(sb); if (!err) - err = fat_flush_inodes(inode->i_sb, dir, inode); + err = fat_flush_inodes(sb, dir, inode); return err; } @@ -618,10 +620,11 @@ error_inode: static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { + struct super_block *sb = old_dir->i_sb; unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; int err, is_hid; - lock_kernel(); + lock_super(sb); err = msdos_format_name(old_dentry->d_name.name, old_dentry->d_name.len, old_msdos_name, @@ -640,9 +643,9 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, new_dir, new_msdos_name, new_dentry, is_hid); out: - unlock_kernel(); + unlock_super(sb); if (!err) - err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); + err = fat_flush_inodes(sb, old_dir, new_dir); return err; } diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index a3522727ea5b..b546ba69be82 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -645,7 +645,7 @@ static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir, if (len == 0) return -ENOENT; - slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_KERNEL); + slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS); if (slots == NULL) return -ENOMEM; @@ -687,7 +687,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, struct dentry *alias; int err, table; - lock_kernel(); + lock_super(sb); table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; dentry->d_op = &vfat_dentry_ops[table]; @@ -699,7 +699,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - unlock_kernel(); + unlock_super(sb); return ERR_CAST(inode); } alias = d_find_alias(inode); @@ -708,13 +708,13 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, dput(alias); else { iput(inode); - unlock_kernel(); + unlock_super(sb); return alias; } } error: - unlock_kernel(); + unlock_super(sb); dentry->d_op = &vfat_dentry_ops[table]; dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); @@ -734,7 +734,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, struct timespec ts; int err; - lock_kernel(); + lock_super(sb); ts = CURRENT_TIME_SEC; err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); @@ -755,17 +755,18 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); out: - unlock_kernel(); + unlock_super(sb); return err; } static int vfat_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; int err; - lock_kernel(); + lock_super(sb); err = fat_dir_empty(inode); if (err) @@ -783,7 +784,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); out: - unlock_kernel(); + unlock_super(sb); return err; } @@ -791,10 +792,11 @@ out: static int vfat_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; int err; - lock_kernel(); + lock_super(sb); err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) @@ -807,7 +809,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); out: - unlock_kernel(); + unlock_super(sb); return err; } @@ -820,7 +822,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct timespec ts; int err, cluster; - lock_kernel(); + lock_super(sb); ts = CURRENT_TIME_SEC; cluster = fat_alloc_new_dir(dir, &ts); @@ -849,13 +851,13 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); - unlock_kernel(); + unlock_super(sb); return 0; out_free: fat_free_clusters(dir, cluster); out: - unlock_kernel(); + unlock_super(sb); return err; } @@ -869,11 +871,12 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, struct timespec ts; loff_t dotdot_i_pos, new_i_pos; int err, is_dir, update_dotdot, corrupt = 0; + struct super_block *sb = old_dir->i_sb; old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; - lock_kernel(); + lock_super(sb); err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); if (err) goto out; @@ -951,7 +954,7 @@ out: brelse(sinfo.bh); brelse(dotdot_bh); brelse(old_sinfo.bh); - unlock_kernel(); + unlock_super(sb); return err; -- cgit v1.2.3 From 514bcc66d4072a221a8dfd341a4006385a441918 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 May 2008 19:15:48 +0200 Subject: dlm-user: BKL pushdown Signed-off-by: Arnd Bergmann --- fs/dlm/user.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dlm/user.c b/fs/dlm/user.c index ebbcf38fd33b..f976f303c196 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -618,13 +619,17 @@ static int device_open(struct inode *inode, struct file *file) struct dlm_user_proc *proc; struct dlm_ls *ls; + lock_kernel(); ls = dlm_find_lockspace_device(iminor(inode)); - if (!ls) + if (!ls) { + unlock_kernel(); return -ENOENT; + } proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); if (!proc) { dlm_put_lockspace(ls); + unlock_kernel(); return -ENOMEM; } @@ -636,6 +641,7 @@ static int device_open(struct inode *inode, struct file *file) spin_lock_init(&proc->locks_spin); init_waitqueue_head(&proc->wait); file->private_data = proc; + unlock_kernel(); return 0; } @@ -870,6 +876,7 @@ static unsigned int device_poll(struct file *file, poll_table *wait) static int ctl_device_open(struct inode *inode, struct file *file) { + cycle_kernel_lock(); file->private_data = NULL; return 0; } -- cgit v1.2.3 From daed1fe5581d294651c62707bd5a2de668d25412 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 May 2008 19:16:28 +0200 Subject: ocfs2-stack_user: BKL pushdown Signed-off-by: Arnd Bergmann --- fs/ocfs2/stack_user.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index b503772cd0ec..cd120011104d 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -619,10 +620,12 @@ static int ocfs2_control_open(struct inode *inode, struct file *file) return -ENOMEM; p->op_this_node = -1; + lock_kernel(); mutex_lock(&ocfs2_control_lock); file->private_data = p; list_add(&p->op_list, &ocfs2_control_private_list); mutex_unlock(&ocfs2_control_lock); + unlock_kernel(); return 0; } -- cgit v1.2.3 From e57eae43136302f6a19d04b12797ba0780c0d347 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 29 May 2008 17:14:05 -0600 Subject: Make FAT users happier by not deadlocking The FAT BKL removal patch can cause deadlocks. It turns out that the new lock_super() calls are unneeded, remove them (as directed by Linus). Reported-by: "Tony Luck" Signed-off-by: Jonathan Corbet --- fs/fat/file.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fat/file.c b/fs/fat/file.c index 7059928fb351..bdf91e97397d 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -229,8 +228,7 @@ static int fat_free(struct inode *inode, int skip) void fat_truncate(struct inode *inode) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); const unsigned int cluster_size = sbi->cluster_size; int nr_clusters; @@ -243,9 +241,7 @@ void fat_truncate(struct inode *inode) nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; - lock_super(sb); fat_free(inode, nr_clusters); - unlock_super(sb); fat_flush_inodes(inode->i_sb, inode, NULL); } @@ -298,14 +294,11 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) int fat_setattr(struct dentry *dentry, struct iattr *attr) { - struct super_block *sb = dentry->d_sb; struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; int mask, error = 0; unsigned int ia_valid; - lock_super(sb); - /* * Expand the file. Since inode_setattr() updates ->i_size * before calling the ->truncate(), but FAT needs to fill the @@ -358,7 +351,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) mask = sbi->options.fs_fmask; inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); out: - unlock_super(sb); return error; } EXPORT_SYMBOL_GPL(fat_setattr); -- cgit v1.2.3 From a9f88f0c4fed5d1ed6fb4a0389891d327a147130 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Jun 2008 11:05:24 +0200 Subject: Remove BKL from remote_llseek v2 - Replace remote_llseek with generic_file_llseek_unlocked (to force compilation failures in all users) - Change all users to either use generic_file_llseek_unlocked directly or take the BKL around. I changed the file systems who don't use the BKL for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS take the BKL, but explicitely in their own source now. I moved them all over in a single patch to avoid unbisectable sections. Open problem: 32bit kernels can corrupt fpos because its modification is not atomic, but they can do that anyways because there's other paths who modify it without BKL. Do we need a special lock for the pos/f_version = 0 checks? Trond says the NFS BKL is likely not needed, but keep it for now until his full audit. v2: Use generic_file_llseek_unlocked instead of remote_llseek_unlocked and factor duplicated code (suggested by hch) Cc: Trond.Myklebust@netapp.com Cc: swhiteho@redhat.com Cc: sfrench@samba.org Cc: vandrove@vc.cvut.cz Signed-off-by: Andi Kleen Signed-off-by: Andi Kleen Signed-off-by: Jonathan Corbet --- fs/cifs/cifsfs.c | 2 +- fs/gfs2/ops_file.c | 4 ++-- fs/ncpfs/file.c | 12 +++++++++++- fs/nfs/file.c | 6 +++++- fs/read_write.c | 38 +++++++++++--------------------------- fs/smbfs/file.c | 11 ++++++++++- include/linux/fs.h | 3 ++- 7 files changed, 42 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 86b4d5f405ae..22857c639df5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -612,7 +612,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) if (retval < 0) return (loff_t)retval; } - return remote_llseek(file, offset, origin); + return generic_file_llseek_unlocked(file, offset, origin); } struct file_system_type cifs_fs_type = { diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e1b7d525a066..24dd59450088 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -62,11 +62,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (!error) { - error = remote_llseek(file, offset, origin); + error = generic_file_llseek_unlocked(file, offset, origin); gfs2_glock_dq_uninit(&i_gh); } } else - error = remote_llseek(file, offset, origin); + error = generic_file_llseek_unlocked(file, offset, origin); return error; } diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 2b145de45b39..6a7d901f1936 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "ncplib_kernel.h" @@ -281,9 +282,18 @@ static int ncp_release(struct inode *inode, struct file *file) { return 0; } +static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t ret; + lock_kernel(); + ret = generic_file_llseek_unlocked(file, offset, origin); + unlock_kernel(); + return ret; +} + const struct file_operations ncp_file_operations = { - .llseek = remote_llseek, + .llseek = ncp_remote_llseek, .read = ncp_file_read, .write = ncp_file_write, .ioctl = ncp_ioctl, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d84a3d8f32af..4e98a56a1777 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,6 +170,7 @@ force_reval: static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { + loff_t loff; /* origin == SEEK_END => we must revalidate the cached file length */ if (origin == SEEK_END) { struct inode *inode = filp->f_mapping->host; @@ -177,7 +178,10 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) if (retval < 0) return (loff_t)retval; } - return remote_llseek(filp, offset, origin); + lock_kernel(); /* BKL needed? */ + loff = generic_file_llseek_unlocked(filp, offset, origin); + unlock_kernel(); + return loff; } /* diff --git a/fs/read_write.c b/fs/read_write.c index f0d1240a5c69..9ba495d5a29b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -31,12 +31,12 @@ const struct file_operations generic_ro_fops = { EXPORT_SYMBOL(generic_ro_fops); -loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) +loff_t +generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) { loff_t retval; struct inode *inode = file->f_mapping->host; - mutex_lock(&inode->i_mutex); switch (origin) { case SEEK_END: offset += inode->i_size; @@ -46,42 +46,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) } retval = -EINVAL; if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { + /* Special lock needed here? */ if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; } retval = offset; } - mutex_unlock(&inode->i_mutex); return retval; } +EXPORT_SYMBOL(generic_file_llseek_unlocked); -EXPORT_SYMBOL(generic_file_llseek); - -loff_t remote_llseek(struct file *file, loff_t offset, int origin) +loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) { - loff_t retval; - - lock_kernel(); - switch (origin) { - case SEEK_END: - offset += i_size_read(file->f_path.dentry->d_inode); - break; - case SEEK_CUR: - offset += file->f_pos; - } - retval = -EINVAL; - if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) { - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - retval = offset; - } - unlock_kernel(); - return retval; + loff_t n; + mutex_lock(&file->f_dentry->d_inode->i_mutex); + n = generic_file_llseek_unlocked(file, offset, origin); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); + return n; } -EXPORT_SYMBOL(remote_llseek); +EXPORT_SYMBOL(generic_file_llseek); loff_t no_llseek(struct file *file, loff_t offset, int origin) { diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index efbe29af3d7a..2294783320cb 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) return error; } +static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t ret; + lock_kernel(); + ret = generic_file_llseek_unlocked(file, offset, origin); + unlock_kernel(); + return ret; +} + const struct file_operations smb_file_operations = { - .llseek = remote_llseek, + .llseek = smb_remote_llseek, .read = do_sync_read, .aio_read = smb_file_aio_read, .write = do_sync_write, diff --git a/include/linux/fs.h b/include/linux/fs.h index d490779f18d9..0965cbfff87b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1871,7 +1871,8 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, + int origin); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From 100a1c591c95f6fee49fde98322251a3f76ea636 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 19 Jun 2008 16:18:25 -0600 Subject: ecryptfs: fasync BKL pushdown Signed-off-by: Jonathan Corbet --- fs/ecryptfs/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2258b8f654a6..24749bf0668f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "ecryptfs_kernel.h" /** @@ -277,9 +278,11 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) int rc = 0; struct file *lower_file = NULL; + lock_kernel(); lower_file = ecryptfs_file_to_lower(file); if (lower_file->f_op && lower_file->f_op->fasync) rc = lower_file->f_op->fasync(fd, lower_file, flag); + unlock_kernel(); return rc; } -- cgit v1.2.3 From e845d44b29e3bf7cfa981c19ad0fb9979d6051fc Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 20 Jun 2008 09:12:01 -0600 Subject: Call fasync() functions without the BKL lock_kernel() calls have been pushed down into code which needs it, so there is no need to take the BKL at this level anymore. This work inspired and aided by Andi Kleen's unlocked_fasync() patches. Acked-by: Andi Kleen Signed-off-by: Jonathan Corbet --- fs/fcntl.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index bfd776509a72..330a7d782591 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -227,7 +226,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; - lock_kernel(); if ((arg ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); @@ -238,7 +236,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); out: - unlock_kernel(); return error; } -- cgit v1.2.3