From 88903daecacf03b1e5636e1b5f18bda5b07030fc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 20 Nov 2023 18:51:06 -0500 Subject: eventfs: Remove expectation that ei->is_freed means ei->dentry == NULL The logic to free the eventfs_inode (ei) use to set is_freed and clear the "dentry" field under the eventfs_mutex. But that changed when a race was found where the ei->dentry needed to be cleared when the last dput() was called on it. But there was still logic that checked if ei->dentry was not NULL and is_freed is set, and would warn if it was. But since that situation was changed and the ei->dentry isn't cleared until the last dput() is called on it while the ei->is_freed is set, do not test for that condition anymore, and change the comments to reflect that. Link: https://lkml.kernel.org/r/20231120235154.265826243@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 020010fbfa20 ("eventfs: Delete eventfs_inode when the last dentry is freed") Reported-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f8a594a50ae6..f239b2b507a4 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -27,16 +27,16 @@ /* * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access * to the ei->dentry must be done under this mutex and after checking - * if ei->is_freed is not set. The ei->dentry is released under the - * mutex at the same time ei->is_freed is set. If ei->is_freed is set - * then the ei->dentry is invalid. + * if ei->is_freed is not set. When ei->is_freed is set, the dentry + * is on its way to being freed after the last dput() is made on it. */ static DEFINE_MUTEX(eventfs_mutex); /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from * its parent's list and will have is_freed set (under eventfs_mutex). - * After the SRCU grace period is over, the ei may be freed. + * After the SRCU grace period is over and the last dput() is called + * the ei is freed. */ DEFINE_STATIC_SRCU(eventfs_srcu); @@ -365,12 +365,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed, don't return it. If e_dentry is NULL + * it means it was already freed. */ - dentry = *e_dentry; - if (WARN_ON_ONCE(dentry && ei->is_freed)) + if (ei->is_freed) dentry = NULL; + else + dentry = *e_dentry; /* The lookup does not need to up the dentry refcount */ if (dentry && !lookup) dget(dentry); @@ -473,8 +475,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed. */ dentry = ei->dentry; if (dentry && !lookup) -- cgit v1.2.3 From 71cade82f2b553a74d046c015c986f2df165696f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 20 Nov 2023 18:51:07 -0500 Subject: eventfs: Do not invalidate dentry in create_file/dir_dentry() With the call to simple_recursive_removal() on the entire eventfs sub system when the directory is removed, it performs the d_invalidate on all the dentries when it is removed. There's no need to do clean ups when a dentry is being created while the directory is being deleted. As dentries are cleaned up by the simpler_recursive_removal(), trying to do d_invalidate() in these functions will cause the dentry to be invalidated twice, and crash the kernel. Link: https://lore.kernel.org/all/20231116123016.140576-1-naresh.kamboju@linaro.org/ Link: https://lkml.kernel.org/r/20231120235154.422970988@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 407c6726ca71 ("eventfs: Use simple_recursive_removal() to clean up dentries") Reported-by: Mark Rutland Reported-by: Naresh Kamboju Reported-by: Linux Kernel Functional Testing Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f239b2b507a4..3eb6c622a74d 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -326,7 +326,6 @@ create_file_dentry(struct eventfs_inode *ei, int idx, struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; struct dentry *dentry; - bool invalidate = false; mutex_lock(&eventfs_mutex); if (ei->is_freed) { @@ -389,17 +388,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** @@ -439,7 +435,6 @@ static struct dentry * create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, struct dentry *parent, bool lookup) { - bool invalidate = false; struct dentry *dentry = NULL; mutex_lock(&eventfs_mutex); @@ -495,16 +490,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** -- cgit v1.2.3 From 4763d635c907baed212664dc579dde1663bb2676 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:04 -0500 Subject: eventfs: Use GFP_NOFS for allocation when eventfs_mutex is held If memory reclaim happens, it can reclaim file system pages. The file system pages from eventfs may take the eventfs_mutex on reclaim. This means that allocation while holding the eventfs_mutex must not call into filesystem reclaim. A lockdep splat uncovered this. Link: https://lkml.kernel.org/r/20231121231112.373501894@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode") Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: Mark Rutland Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 3eb6c622a74d..56d192f0ead8 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, if (!(dentry->d_inode->i_mode & S_IFDIR)) { if (!ei->entry_attrs) { ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, - GFP_KERNEL); + GFP_NOFS); if (!ei->entry_attrs) { ret = -ENOMEM; goto out; @@ -627,7 +627,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) { struct dentry **tmp; - tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); if (!tmp) return -1; tmp[cnt] = d; -- cgit v1.2.3 From bcae32c5632fc0a0dbce46fa731cd23403117e66 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:05 -0500 Subject: eventfs: Move taking of inode_lock into dcache_dir_open_wrapper() The both create_file_dentry() and create_dir_dentry() takes a boolean parameter "lookup", as on lookup the inode_lock should already be taken, but for dcache_dir_open_wrapper() it is not taken. There's no reason that the dcache_dir_open_wrapper() can't take the inode_lock before calling these functions. In fact, it's better if it does, as the lock can be held throughout both directory and file creations. This also simplifies the code, and possibly prevents unexpected race conditions when the lock is released. Link: https://lkml.kernel.org/r/20231121231112.528544825@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 56d192f0ead8..590e8176449b 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -347,15 +347,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_file(name, mode, attr, parent, data, fops); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry)) { @@ -453,15 +446,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, } mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_dir(ei, parent); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { @@ -693,6 +679,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) return -ENOMEM; } + inode_lock(parent->d_inode); list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { d = create_dir_dentry(ei, ei_child, parent, false); @@ -725,6 +712,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) cnt++; } } + inode_unlock(parent->d_inode); srcu_read_unlock(&eventfs_srcu, idx); ret = dcache_dir_open(inode, file); -- cgit v1.2.3 From fc4561226feaad5fcdcb55646c348d77b8ee69c5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:06 -0500 Subject: eventfs: Do not allow NULL parent to eventfs_start_creating() The eventfs directory is dynamically created via the meta data supplied by the existing trace events. All files and directories in eventfs has a parent. Do not allow NULL to be passed into eventfs_start_creating() as the parent because that should never happen. Warn if it does. Link: https://lkml.kernel.org/r/20231121231112.693841807@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5b54948514fe..ae648deed019 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; + /* Must always have a parent. */ + if (WARN_ON_ONCE(!parent)) + return ERR_PTR(-EINVAL); + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); if (error) return ERR_PTR(error); - /* - * If the parent is not specified, we create it in the root. - * We need the root dentry to do this, which is in the super - * block. A pointer to that is in the struct vfsmount that we - * have around. - */ - if (!parent) - parent = tracefs_mount->mnt_root; - if (unlikely(IS_DEADDIR(parent->d_inode))) dentry = ERR_PTR(-ENOENT); else -- cgit v1.2.3 From f49f950c217bfb40f11662bab39cb388d41e4cfb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:07 -0500 Subject: eventfs: Make sure that parent->d_inode is locked in creating files/dirs Since the locking of the parent->d_inode has been moved outside the creation of the files and directories (as it use to be locked via a conditional), add a WARN_ON_ONCE() to the case that it's not locked. Link: https://lkml.kernel.org/r/20231121231112.853962542@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 590e8176449b..0b90869fd805 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -327,6 +327,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, struct dentry **e_dentry = &ei->d_children[idx]; struct dentry *dentry; + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); + mutex_lock(&eventfs_mutex); if (ei->is_freed) { mutex_unlock(&eventfs_mutex); @@ -430,6 +432,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, { struct dentry *dentry = NULL; + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); + mutex_lock(&eventfs_mutex); if (pei->is_freed || ei->is_freed) { mutex_unlock(&eventfs_mutex); -- cgit v1.2.3 From 76d9eafff4484547ed9e606c8227ac9799a9f2da Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 15 Nov 2023 10:50:18 -0500 Subject: MAINTAINERS: TRACING: Add Mathieu Desnoyers as Reviewer In order to make sure I get CC'd on tracing changes for which my input would be relevant, add my name as reviewer of the TRACING subsystem. Link: https://lore.kernel.org/linux-trace-kernel/20231115155018.8236-1-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea790149af79..a2d4ef4d90f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22078,6 +22078,7 @@ F: drivers/watchdog/tqmx86_wdt.c TRACING M: Steven Rostedt M: Masami Hiramatsu +R: Mathieu Desnoyers L: linux-kernel@vger.kernel.org L: linux-trace-kernel@vger.kernel.org S: Maintained -- cgit v1.2.3