From b1eaa950f7e905aaffca0454aa05101ce4f6446a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 11 Jan 2017 12:07:46 +0200 Subject: ovl: lockdep annotate of nested stacked overlayfs inode lock An overlayfs instance can be the lower layer of another overlayfs instance. This setup triggers a lockdep splat of possible recursive locking of sb->s_type->i_mutex_key in iterate_dir(). Trimmed snip: [ INFO: possible recursive locking detected ] bash/2468 is trying to acquire lock: &sb->s_type->i_mutex_key#14, at: iterate_dir+0x7d/0x15c but task is already holding lock: &sb->s_type->i_mutex_key#14, at: iterate_dir+0x7d/0x15c One problem observed with this splat is that ovl_new_inode() does not call lockdep_annotate_inode_mutex_key() to annotate the dir inode lock as &sb->s_type->i_mutex_dir_key like other fs do. The other problem is that the 2 nested levels of overlayfs inode lock are annotated using the same key, which is the cause of the false positive lockdep warning. Fix this by annotating overlayfs inode lock in ovl_fill_inode() according to stack level of the super block instance and use different key for dir vs. non-dir like other fs do. Here is an edited snip from /proc/lockdep_chains after iterate_dir() of nested overlayfs: [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) [...] &type->i_mutex_dir_key (stack_depth=0) Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'fs/overlayfs/inode.c') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f8fe6bf2036d..17b8418358ed 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -303,6 +303,41 @@ static const struct inode_operations ovl_symlink_inode_operations = { .update_time = ovl_update_time, }; +/* + * It is possible to stack overlayfs instance on top of another + * overlayfs instance as lower layer. We need to annonate the + * stackable i_mutex locks according to stack level of the super + * block instance. An overlayfs instance can never be in stack + * depth 0 (there is always a real fs below it). An overlayfs + * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. + * + * For example, here is a snip from /proc/lockdep_chains after + * dir_iterate of nested overlayfs: + * + * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) + * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) + * [...] &type->i_mutex_dir_key (stack_depth=0) + */ +#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH + +static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; + static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; + + int depth = inode->i_sb->s_stack_depth - 1; + + if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) + depth = 0; + + if (S_ISDIR(inode->i_mode)) + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); + else + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); +#endif +} + static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_ino = get_next_ino(); @@ -312,6 +347,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif + ovl_lockdep_annotate_inode_mutex_key(inode); + switch (mode & S_IFMT) { case S_IFREG: inode->i_op = &ovl_file_inode_operations; -- cgit v1.2.3 From 72b608f08528458334218a809d66ea94d924c378 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 24 Apr 2017 00:25:49 +0300 Subject: ovl: constant st_ino/st_dev across copy up When all layers are on the same underlying filesystem, let stat(2) return st_dev/st_ino values of the copy up origin inode if it is known. This results in constant st_ino/st_dev representation of files in an overlay mount before and after copy up. When the underlying filesystem support NFS exportfs, the result is also persistent st_ino/st_dev representation before and after mount cycle. Lower hardlinks are broken on copy up to different upper files, so we cannot use the lower origin st_ino for those different files, even for the same fs case. When all overlay layers are on the same fs, use overlay st_dev for non-dirs to get the correct result from du -x. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'fs/overlayfs/inode.c') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 17b8418358ed..3dc693a78de2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -61,14 +61,51 @@ static int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; + enum ovl_path_type type; struct path realpath; const struct cred *old_cred; int err; - ovl_path_real(dentry, &realpath); + type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getattr(&realpath, stat, request_mask, flags); + if (err) + goto out; + + /* + * When all layers are on the same fs, all real inode number are + * unique, so we use the overlay st_dev, which is friendly to du -x. + * + * We also use st_ino of the copy up origin, if we know it. + * This guaranties constant st_dev/st_ino across copy up. + * + * If filesystem supports NFS export ops, this also guaranties + * persistent st_ino across mount cycle. + */ + if (ovl_same_sb(dentry->d_sb)) { + if (OVL_TYPE_ORIGIN(type)) { + struct kstat lowerstat; + + ovl_path_lower(dentry, &realpath); + err = vfs_getattr(&realpath, &lowerstat, + STATX_INO | STATX_NLINK, flags); + if (err) + goto out; + + WARN_ON_ONCE(stat->dev != lowerstat.dev); + /* + * Lower hardlinks are broken on copy up to different + * upper files, so we cannot use the lower origin st_ino + * for those different files, even for the same fs case. + */ + if (lowerstat.nlink == 1) + stat->ino = lowerstat.ino; + } + stat->dev = dentry->d_sb->s_dev; + } +out: revert_creds(old_cred); + return err; } -- cgit v1.2.3 From 5b712091a3a3904b0ae8311e18e6b540a070d464 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 5 May 2017 11:38:58 +0200 Subject: ovl: merge getattr for dir and nondir Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 61 +----------------------------------------------- fs/overlayfs/inode.c | 31 ++++++++++++++++++++---- fs/overlayfs/overlayfs.h | 2 ++ 3 files changed, 30 insertions(+), 64 deletions(-) (limited to 'fs/overlayfs/inode.c') diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 0c5e79966957..f4cf2928cf8e 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -138,65 +138,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return err; } -static int ovl_dir_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) -{ - struct dentry *dentry = path->dentry; - int err; - enum ovl_path_type type; - struct path realpath; - const struct cred *old_cred; - - type = ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); - if (err) - goto out; - - /* - * When all layers are on the same fs, use the copy-up-origin st_ino, - * which is persistent, unique and constant across copy up. - * - * Otherwise the pair {real st_ino; overlay st_dev} is not unique, so - * use the non persistent overlay st_ino. - */ - if (ovl_same_sb(dentry->d_sb)) { - if (OVL_TYPE_ORIGIN(type)) { - struct kstat lowerstat; - - ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - STATX_INO, flags); - if (err) - goto out; - - WARN_ON_ONCE(stat->dev != lowerstat.dev); - stat->ino = lowerstat.ino; - } - } else { - stat->ino = dentry->d_inode->i_ino; - } - - /* - * Always use the overlay st_dev for directories, so 'find -xdev' will - * scan the entire overlay mount and won't cross the overlay mount - * boundaries. - */ - stat->dev = dentry->d_sb->s_dev; - - /* - * It's probably not worth it to count subdirs to get the - * correct link count. nlink=1 seems to pacify 'find' and - * other utilities. - */ - if (OVL_TYPE_MERGE(type)) - stat->nlink = 1; -out: - revert_creds(old_cred); - - return err; -} - /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -1099,7 +1040,7 @@ const struct inode_operations ovl_dir_inode_operations = { .create = ovl_create, .mknod = ovl_mknod, .permission = ovl_permission, - .getattr = ovl_dir_getattr, + .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3dc693a78de2..ad9547f82da5 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -57,13 +57,14 @@ out: return err; } -static int ovl_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; enum ovl_path_type type; struct path realpath; const struct cred *old_cred; + bool is_dir = S_ISDIR(dentry->d_inode->i_mode); int err; type = ovl_path_real(dentry, &realpath); @@ -85,10 +86,11 @@ static int ovl_getattr(const struct path *path, struct kstat *stat, if (ovl_same_sb(dentry->d_sb)) { if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; + u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); err = vfs_getattr(&realpath, &lowerstat, - STATX_INO | STATX_NLINK, flags); + lowermask, flags); if (err) goto out; @@ -98,11 +100,32 @@ static int ovl_getattr(const struct path *path, struct kstat *stat, * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. */ - if (lowerstat.nlink == 1) + if (is_dir || lowerstat.nlink == 1) stat->ino = lowerstat.ino; } stat->dev = dentry->d_sb->s_dev; + } else if (is_dir) { + /* + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino. + * + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; } + + /* + * It's probably not worth it to count subdirs to get the + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ + if (is_dir && OVL_TYPE_MERGE(type)) + stat->nlink = 1; + out: revert_creds(old_cred); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 98af19828695..caa36cb9c46d 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -236,6 +236,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -- cgit v1.2.3