From 3be25f49b9d6a97eae9bcb96d3292072b7658bd8 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:26 -0400 Subject: fsnotify: add marks to inodes so groups can interpret how to handle those inodes This patch creates a way for fsnotify groups to attach marks to inodes. These marks have little meaning to the generic fsnotify infrastructure and thus their meaning should be interpreted by the group that attached them to the inode's list. dnotify and inotify will make use of these markings to indicate which inodes are of interest to their respective groups. But this implementation has the useful property that in the future other listeners could actually use the marks for the exact opposite reason, aka to indicate which inodes it had NO interest in. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index bca0c618fdb3..54c63ce3de25 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->i_private = NULL; inode->i_mapping = mapping; +#ifdef CONFIG_FSNOTIFY + inode->i_fsnotify_mask = 0; +#endif + return inode; out_free_security: @@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode) BUG_ON(inode_has_buffers(inode)); ima_inode_free(inode); security_inode_free(inode); + fsnotify_inode_delete(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->inotify_watches); mutex_init(&inode->inotify_mutex); #endif +#ifdef CONFIG_FSNOTIFY + INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); +#endif } EXPORT_SYMBOL(inode_init_once); -- cgit v1.2.3 From 164bc6195139047faaf5ada1278332e99494803b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:58 -0400 Subject: fsnotify: handle filesystem unmounts with fsnotify marks When an fs is unmounted with an fsnotify mark entry attached to one of its inodes we need to destroy that mark entry and we also (like inotify) send an unmount event. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/inode.c | 1 + fs/notify/inode_mark.c | 72 ++++++++++++++++++++++++++++++++++++++++ include/linux/fsnotify_backend.h | 4 +++ 3 files changed, 77 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 54c63ce3de25..ca337014ae29 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -407,6 +407,7 @@ int invalidate_inodes(struct super_block *sb) mutex_lock(&iprune_mutex); spin_lock(&inode_lock); inotify_unmount_inodes(&sb->s_inodes); + fsnotify_unmount_inodes(&sb->s_inodes); busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 282150f74cfa..0a499d2c6191 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -89,6 +89,7 @@ #include #include #include +#include /* for inode_lock */ #include @@ -351,3 +352,74 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry, return ret; } + +/** + * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @list: list of inodes being unmounted (sb->s_inodes) + * + * Called with inode_lock held, protecting the unmounting super block's list + * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. + * We temporarily drop inode_lock, however, and CAN block. + */ +void fsnotify_unmount_inodes(struct list_head *list) +{ + struct inode *inode, *next_i, *need_iput = NULL; + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inode *need_iput_tmp; + + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + continue; + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) + continue; + + need_iput_tmp = need_iput; + need_iput = NULL; + + /* In case fsnotify_inode_delete() drops a reference. */ + if (inode != need_iput_tmp) + __iget(inode); + else + need_iput_tmp = NULL; + + /* In case the dropping of a reference would nuke next_i. */ + if ((&next_i->i_sb_list != list) && + atomic_read(&next_i->i_count) && + !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + + /* + * We can safely drop inode_lock here because we hold + * references on both inode and next_i. Also no new inodes + * will be added since the umount has begun. Finally, + * iprune_mutex keeps shrink_icache_memory() away. + */ + spin_unlock(&inode_lock); + + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput(inode); + + spin_lock(&inode_lock); + } +} diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index efdf9e442d86..d2c0ee30e618 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -336,6 +336,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_unmount_inodes(struct list_head *list); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -365,6 +366,9 @@ static inline u32 fsnotify_get_cookie(void) return 0; } +static inline void fsnotify_unmount_inodes(struct list_head *list) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3 From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:55 +1000 Subject: fs: introduce mnt_clone_write This patch speeds up lmbench lat_mmap test by about another 2% after the first patch. Before: avg = 462.286 std = 5.46106 After: avg = 453.12 std = 9.58257 (50 runs of each, stddev gives a reasonable confidence) It does this by introducing mnt_clone_write, which avoids some heavyweight operations of mnt_want_write if called on a vfsmount which we know already has a write count; and mnt_want_write_file, which can call mnt_clone_write if the file is open for write. After these two patches, mnt_want_write and mnt_drop_write go from 7% on the profile down to 1.3% (including mnt_clone_write). [AV: mnt_want_write_file() should take file alone and derive mnt from it; not only all callers have that form, but that's the only mnt about which we know that it's already held for write if file is opened for write] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 2 +- fs/inode.c | 2 +- fs/namespace.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/open.c | 4 ++-- fs/xattr.c | 4 ++-- include/linux/mount.h | 4 ++++ 6 files changed, 50 insertions(+), 6 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe48840..3d66dbcebef6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, */ if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { file_take_write(file); - error = mnt_want_write(mnt); + error = mnt_clone_write(mnt); WARN_ON(error); } return error; diff --git a/fs/inode.c b/fs/inode.c index ca337014ae29..a88baebf77cf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1422,7 +1422,7 @@ void file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return; - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return; diff --git a/fs/namespace.c b/fs/namespace.c index 22ae06ad751d..120b8a6b99ed 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -264,6 +264,46 @@ out: } EXPORT_SYMBOL_GPL(mnt_want_write); +/** + * mnt_clone_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This is effectively like mnt_want_write, except + * it must only be used to take an extra write reference + * on a mountpoint that we already know has a write reference + * on it. This allows some optimisation. + * + * After finished, mnt_drop_write must be called as usual to + * drop the reference. + */ +int mnt_clone_write(struct vfsmount *mnt) +{ + /* superblock may be r/o */ + if (__mnt_is_readonly(mnt)) + return -EROFS; + preempt_disable(); + inc_mnt_writers(mnt); + preempt_enable(); + return 0; +} +EXPORT_SYMBOL_GPL(mnt_clone_write); + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + */ +int mnt_want_write_file(struct file *file) +{ + if (!(file->f_mode & FMODE_WRITE)) + return mnt_want_write(file->f_path.mnt); + else + return mnt_clone_write(file->f_path.mnt); +} +EXPORT_SYMBOL_GPL(mnt_want_write_file); + /** * mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access diff --git a/fs/open.c b/fs/open.c index bdfbf03615a4..7200e23d9258 100644 --- a/fs/open.c +++ b/fs/open.c @@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) audit_inode(NULL, dentry); - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out_putf; mutex_lock(&inode->i_mutex); @@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) if (!file) goto out; - error = mnt_want_write(file->f_path.mnt); + error = mnt_want_write_file(file); if (error) goto out_fput; dentry = file->f_path.dentry; diff --git a/fs/xattr.c b/fs/xattr.c index d51b8f9db921..1c3d0af59ddf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = setxattr(dentry, name, value, size, flags); mnt_drop_write(f->f_path.mnt); @@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = removexattr(dentry, name); mnt_drop_write(f->f_path.mnt); diff --git a/include/linux/mount.h b/include/linux/mount.h index ac49c1f8e5c0..5d5275364867 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); -- cgit v1.2.3 From 2eadfc0ed68690075dcff08b30d87831388a1663 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 2 Apr 2009 15:23:37 +0200 Subject: trivial: fs/inode: Fix typo in file_update_time nanodoc The advertised flag for not updating the time was wrong. Signed-off-by: Wolfram Sang Signed-off-by: Jiri Kosina --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index a88baebf77cf..f643be565df8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime); * for writeback. Note that this function is meant exclusively for * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the - * S_NOCTIME inode flag, e.g. for network filesystem where these + * S_NOCMTIME inode flag, e.g. for network filesystem where these * timestamps are handled by the server. */ -- cgit v1.2.3 From 9a7aa12f3911853a3574d47d567b81a2a5df7208 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jun 2009 15:26:49 +0200 Subject: vfs: Set special lockdep map for dirs only if not set by fs Some filesystems need to set lockdep map for i_mutex differently for different directories. For example OCFS2 has system directories (for orphan inode tracking and for gathering all system files like journal or quota files into a single place) which have different locking locking rules than standard directories. For a filesystem setting lockdep map is naturaly done when the inode is read but we have to modify unlock_new_inode() not to overwrite the lockdep map the filesystem has set. Acked-by: peterz@infradead.org CC: mingo@redhat.com Signed-off-by: Jan Kara Signed-off-by: Joel Becker --- fs/inode.c | 17 +++++++++++------ include/linux/lockdep.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f643be565df8..04c785bb63c3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -665,12 +665,17 @@ void unlock_new_inode(struct inode *inode) if (inode->i_mode & S_IFDIR) { struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + /* Set new key only if filesystem hasn't already changed it */ + if (!lockdep_match_class(&inode->i_mutex, + &type->i_mutex_key)) { + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, + &type->i_mutex_dir_key); + } } #endif /* diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index da5a5a1f4cd2..b25d1b53df0d 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -258,6 +258,16 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, #define lockdep_set_subclass(lock, sub) \ lockdep_init_map(&(lock)->dep_map, #lock, \ (lock)->dep_map.key, sub) +/* + * Compare locking classes + */ +#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key) + +static inline int lockdep_match_key(struct lockdep_map *lock, + struct lock_class_key *key) +{ + return lock->key == key; +} /* * Acquire a lock. @@ -326,6 +336,11 @@ static inline void lockdep_on(void) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) +/* + * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP + * case since the result is not well defined and the caller should rather + * #ifdef the call himself. + */ # define INIT_LOCKDEP # define lockdep_reset() do { debug_locks = 1; } while (0) -- cgit v1.2.3 From f19d4a8fa6f9b6ccf54df0971c97ffcaa390b7b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Jun 2009 19:50:45 -0400 Subject: add caching of ACLs in struct inode No helpers, no conversions yet. Signed-off-by: Al Viro --- fs/inode.c | 10 ++++++++++ include/linux/fs.h | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f643be565df8..e193cd592fa8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) } inode->i_private = NULL; inode->i_mapping = mapping; +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; +#endif #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; @@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode) ima_inode_free(inode); security_inode_free(inode); fsnotify_inode_delete(inode); +#ifdef CONFIG_FS_POSIX_ACL + if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_acl); + if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_default_acl); +#endif if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else diff --git a/include/linux/fs.h b/include/linux/fs.h index 79e302ddde0f..0872372184fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -710,6 +710,9 @@ static inline int mapping_writably_mapped(struct address_space *mapping) #define i_size_ordered_init(inode) do { } while (0) #endif +struct posix_acl; +#define ACL_NOT_CACHED ((void *)(-1)) + struct inode { struct hlist_node i_hash; struct list_head i_list; @@ -772,6 +775,10 @@ struct inode { atomic_t i_writecount; #ifdef CONFIG_SECURITY void *i_security; +#endif +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; #endif void *i_private; /* fs or device private pointer */ }; -- cgit v1.2.3