--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:35 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:57 -0500
+Subject: eventfs: Clean up dentry ops and add revalidate function
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.845943821@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 8dce06e98c70a7fcbb4bca7d90faf40522e65c58 upstream.
+
+In order for the dentries to stay up-to-date with the eventfs changes,
+just add a 'd_revalidate' function that checks the 'is_freed' bit.
+
+Also, clean up the dentry release to actually use d_release() rather
+than the slightly odd d_iput() function. We don't care about the inode,
+all we want to do is to get rid of the refcount to the eventfs data
+added by dentry->d_fsdata.
+
+It would probably be cleaner to make eventfs its own filesystem, or at
+least set its own dentry ops when looking up eventfs files. But as it
+is, only eventfs dentries use d_fsdata, so we don't really need to split
+these things up by use.
+
+Another thing that might be worth doing is to make all eventfs lookups
+mark their dentries as not worth caching. We could do that with
+d_delete(), but the DCACHE_DONTCACHE flag would likely be even better.
+
+As it is, the dentries are all freeable, but they only tend to get freed
+at memory pressure rather than more proactively. But that's a separate
+issue.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.124644253@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 5 ++---
+ fs/tracefs/inode.c | 27 ++++++++++++++++++---------
+ fs/tracefs/internal.h | 3 ++-
+ 3 files changed, 22 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -378,13 +378,12 @@ static void free_ei(struct eventfs_inode
+ }
+
+ /**
+- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+- * @ti: the tracefs_inode of the dentry
++ * eventfs_d_release - dentry is going away
+ * @dentry: dentry which has the reference to remove.
+ *
+ * Remove the association between a dentry from an eventfs_inode.
+ */
+-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
++void eventfs_d_release(struct dentry *dentry)
+ {
+ struct eventfs_inode *ei;
+ int i;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -377,21 +377,30 @@ static const struct super_operations tra
+ .show_options = tracefs_show_options,
+ };
+
+-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
++/*
++ * It would be cleaner if eventfs had its own dentry ops.
++ *
++ * Note that d_revalidate is called potentially under RCU,
++ * so it can't take the eventfs mutex etc. It's fine - if
++ * we open a file just as it's marked dead, things will
++ * still work just fine, and just see the old stale case.
++ */
++static void tracefs_d_release(struct dentry *dentry)
+ {
+- struct tracefs_inode *ti;
++ if (dentry->d_fsdata)
++ eventfs_d_release(dentry);
++}
+
+- if (!dentry || !inode)
+- return;
++static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
++{
++ struct eventfs_inode *ei = dentry->d_fsdata;
+
+- ti = get_tracefs(inode);
+- if (ti && ti->flags & TRACEFS_EVENT_INODE)
+- eventfs_set_ei_status_free(ti, dentry);
+- iput(inode);
++ return !(ei && ei->is_freed);
+ }
+
+ static const struct dentry_operations tracefs_dentry_operations = {
+- .d_iput = tracefs_dentry_iput,
++ .d_revalidate = tracefs_d_revalidate,
++ .d_release = tracefs_d_release,
+ };
+
+ static int trace_fill_super(struct super_block *sb, void *data, int silent)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -78,6 +78,7 @@ struct dentry *tracefs_start_creating(co
+ struct dentry *tracefs_end_creating(struct dentry *dentry);
+ struct dentry *tracefs_failed_creating(struct dentry *dentry);
+ struct inode *tracefs_get_inode(struct super_block *sb);
+-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
++
++void eventfs_d_release(struct dentry *dentry);
+
+ #endif /* _TRACEFS_INTERNAL_H */
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:54 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:26 -0500
+Subject: eventfs: Delete eventfs_inode when the last dentry is freed
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120949.792406858@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 020010fbfa202aa528a52743eba4ab0da3400a4e upstream.
+
+There exists a race between holding a reference of an eventfs_inode dentry
+and the freeing of the eventfs_inode. If user space has a dentry held long
+enough, it may still be able to access the dentry's eventfs_inode after it
+has been freed.
+
+To prevent this, have he eventfs_inode freed via the last dput() (or via
+RCU if the eventfs_inode does not have a dentry).
+
+This means reintroducing the eventfs_inode del_list field at a temporary
+place to put the eventfs_inode. It needs to mark it as freed (via the
+list) but also must invalidate the dentry immediately as the return from
+eventfs_remove_dir() expects that they are. But the dentry invalidation
+must not be called under the eventfs_mutex, so it must be done after the
+eventfs_inode is marked as free (put on a deletion list).
+
+Link: https://lkml.kernel.org/r/20231101172650.123479767@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 146 +++++++++++++++++++++--------------------------
+ fs/tracefs/internal.h | 2
+ 2 files changed, 69 insertions(+), 79 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -85,8 +85,7 @@ static int eventfs_set_attr(struct mnt_i
+
+ mutex_lock(&eventfs_mutex);
+ ei = dentry->d_fsdata;
+- /* The LSB is set when the eventfs_inode is being freed */
+- if (((unsigned long)ei & 1UL) || ei->is_freed) {
++ if (ei->is_freed) {
+ /* Do not allow changes if the event is about to be removed. */
+ mutex_unlock(&eventfs_mutex);
+ return -ENODEV;
+@@ -276,35 +275,17 @@ static void free_ei(struct eventfs_inode
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+ struct tracefs_inode *ti_parent;
+- struct eventfs_inode *ei_child, *tmp;
+ struct eventfs_inode *ei;
+ int i;
+
+ /* The top level events directory may be freed by this */
+ if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+- LIST_HEAD(ef_del_list);
+-
+ mutex_lock(&eventfs_mutex);
+-
+ ei = ti->private;
+-
+- /* Record all the top level files */
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- lockdep_is_held(&eventfs_mutex)) {
+- list_add_tail(&ei_child->del_list, &ef_del_list);
+- }
+-
+ /* Nothing should access this, but just in case! */
+ ti->private = NULL;
+-
+ mutex_unlock(&eventfs_mutex);
+
+- /* Now safely free the top level files and their children */
+- list_for_each_entry_safe(ei_child, tmp, &ef_del_list, del_list) {
+- list_del(&ei_child->del_list);
+- eventfs_remove_dir(ei_child);
+- }
+-
+ free_ei(ei);
+ return;
+ }
+@@ -319,14 +300,6 @@ void eventfs_set_ei_status_free(struct t
+ if (!ei)
+ goto out;
+
+- /*
+- * If ei was freed, then the LSB bit is set for d_fsdata.
+- * But this should not happen, as it should still have a
+- * ref count that prevents it. Warn in case it does.
+- */
+- if (WARN_ON_ONCE((unsigned long)ei & 1))
+- goto out;
+-
+ /* This could belong to one of the files of the ei */
+ if (ei->dentry != dentry) {
+ for (i = 0; i < ei->nr_entries; i++) {
+@@ -336,6 +309,8 @@ void eventfs_set_ei_status_free(struct t
+ if (WARN_ON_ONCE(i == ei->nr_entries))
+ goto out;
+ ei->d_children[i] = NULL;
++ } else if (ei->is_freed) {
++ free_ei(ei);
+ } else {
+ ei->dentry = NULL;
+ }
+@@ -962,13 +937,65 @@ struct eventfs_inode *eventfs_create_eve
+ return ERR_PTR(-ENOMEM);
+ }
+
++static LLIST_HEAD(free_list);
++
++static void eventfs_workfn(struct work_struct *work)
++{
++ struct eventfs_inode *ei, *tmp;
++ struct llist_node *llnode;
++
++ llnode = llist_del_all(&free_list);
++ llist_for_each_entry_safe(ei, tmp, llnode, llist) {
++ /* This dput() matches the dget() from unhook_dentry() */
++ for (int i = 0; i < ei->nr_entries; i++) {
++ if (ei->d_children[i])
++ dput(ei->d_children[i]);
++ }
++ /* This should only get here if it had a dentry */
++ if (!WARN_ON_ONCE(!ei->dentry))
++ dput(ei->dentry);
++ }
++}
++
++static DECLARE_WORK(eventfs_work, eventfs_workfn);
++
+ static void free_rcu_ei(struct rcu_head *head)
+ {
+ struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+
++ if (ei->dentry) {
++ /* Do not free the ei until all references of dentry are gone */
++ if (llist_add(&ei->llist, &free_list))
++ queue_work(system_unbound_wq, &eventfs_work);
++ return;
++ }
++
++ /* If the ei doesn't have a dentry, neither should its children */
++ for (int i = 0; i < ei->nr_entries; i++) {
++ WARN_ON_ONCE(ei->d_children[i]);
++ }
++
+ free_ei(ei);
+ }
+
++static void unhook_dentry(struct dentry *dentry)
++{
++ if (!dentry)
++ return;
++
++ /* Keep the dentry from being freed yet (see eventfs_workfn()) */
++ dget(dentry);
++
++ dentry->d_fsdata = NULL;
++ d_invalidate(dentry);
++ mutex_lock(&eventfs_mutex);
++ /* dentry should now have at least a single reference */
++ WARN_ONCE((int)d_count(dentry) < 1,
++ "dentry %px (%s) less than one reference (%d) after invalidate\n",
++ dentry, dentry->d_name.name, d_count(dentry));
++ mutex_unlock(&eventfs_mutex);
++}
++
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
+@@ -1006,33 +1033,6 @@ static void eventfs_remove_rec(struct ev
+ list_add_tail(&ei->del_list, head);
+ }
+
+-static void unhook_dentry(struct dentry **dentry, struct dentry **list)
+-{
+- if (*dentry) {
+- unsigned long ptr = (unsigned long)*list;
+-
+- /* Keep the dentry from being freed yet */
+- dget(*dentry);
+-
+- /*
+- * Paranoid: The dget() above should prevent the dentry
+- * from being freed and calling eventfs_set_ei_status_free().
+- * But just in case, set the link list LSB pointer to 1
+- * and have eventfs_set_ei_status_free() check that to
+- * make sure that if it does happen, it will not think
+- * the d_fsdata is an eventfs_inode.
+- *
+- * For this to work, no eventfs_inode should be allocated
+- * on a odd space, as the ef should always be allocated
+- * to be at least word aligned. Check for that too.
+- */
+- WARN_ON_ONCE(ptr & 1);
+-
+- (*dentry)->d_fsdata = (void *)(ptr | 1);
+- *list = *dentry;
+- *dentry = NULL;
+- }
+-}
+ /**
+ * eventfs_remove_dir - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
+@@ -1043,40 +1043,28 @@ void eventfs_remove_dir(struct eventfs_i
+ {
+ struct eventfs_inode *tmp;
+ LIST_HEAD(ei_del_list);
+- struct dentry *dentry_list = NULL;
+- struct dentry *dentry;
+- int i;
+
+ if (!ei)
+ return;
+
++ /*
++ * Move the deleted eventfs_inodes onto the ei_del_list
++ * which will also set the is_freed value. Note, this has to be
++ * done under the eventfs_mutex, but the deletions of
++ * the dentries must be done outside the eventfs_mutex.
++ * Hence moving them to this temporary list.
++ */
+ mutex_lock(&eventfs_mutex);
+ eventfs_remove_rec(ei, &ei_del_list, 0);
++ mutex_unlock(&eventfs_mutex);
+
+ list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
+- for (i = 0; i < ei->nr_entries; i++)
+- unhook_dentry(&ei->d_children[i], &dentry_list);
+- unhook_dentry(&ei->dentry, &dentry_list);
++ for (int i = 0; i < ei->nr_entries; i++)
++ unhook_dentry(ei->d_children[i]);
++ unhook_dentry(ei->dentry);
++ list_del(&ei->del_list);
+ call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+ }
+- mutex_unlock(&eventfs_mutex);
+-
+- while (dentry_list) {
+- unsigned long ptr;
+-
+- dentry = dentry_list;
+- ptr = (unsigned long)dentry->d_fsdata & ~1UL;
+- dentry_list = (struct dentry *)ptr;
+- dentry->d_fsdata = NULL;
+- d_invalidate(dentry);
+- mutex_lock(&eventfs_mutex);
+- /* dentry should now have at least a single reference */
+- WARN_ONCE((int)d_count(dentry) < 1,
+- "dentry %px (%s) less than one reference (%d) after invalidate\n",
+- dentry, dentry->d_name.name, d_count(dentry));
+- mutex_unlock(&eventfs_mutex);
+- dput(dentry);
+- }
+ }
+
+ /**
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -54,10 +54,12 @@ struct eventfs_inode {
+ void *data;
+ /*
+ * Union - used for deletion
++ * @llist: for calling dput() if needed after RCU
+ * @del_list: list of eventfs_inode to delete
+ * @rcu: eventfs_inode to delete in RCU
+ */
+ union {
++ struct llist_node llist;
+ struct list_head del_list;
+ struct rcu_head rcu;
+ };
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:53 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:44 -0500
+Subject: eventfs: Do ctx->pos update for all iterations in eventfs_iterate()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.722064231@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 1e4624eb5a0ecaae0d2c4e3019bece119725bb98 upstream.
+
+The ctx->pos was only updated when it added an entry, but the "skip to
+current pos" check (c--) happened for every loop regardless of if the
+entry was added or not. This inconsistency caused readdir to be incorrect.
+
+It was due to:
+
+ for (i = 0; i < ei->nr_entries; i++) {
+
+ if (c > 0) {
+ c--;
+ continue;
+ }
+
+ mutex_lock(&eventfs_mutex);
+ /* If ei->is_freed then just bail here, nothing more to do */
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+ goto out;
+ }
+ r = entry->callback(name, &mode, &cdata, &fops);
+ mutex_unlock(&eventfs_mutex);
+
+ [..]
+ ctx->pos++;
+ }
+
+But this can cause the iterator to return a file that was already read.
+That's because of the way the callback() works. Some events may not have
+all files, and the callback can return 0 to tell eventfs to skip the file
+for this directory.
+
+for instance, we have:
+
+ # ls /sys/kernel/tracing/events/ftrace/function
+format hist hist_debug id inject
+
+and
+
+ # ls /sys/kernel/tracing/events/sched/sched_switch/
+enable filter format hist hist_debug id inject trigger
+
+Where the function directory is missing "enable", "filter" and
+"trigger". That's because the callback() for events has:
+
+static int event_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ struct trace_event_file *file = *data;
+ struct trace_event_call *call = file->event_call;
+
+[..]
+
+ /*
+ * Only event directories that can be enabled should have
+ * triggers or filters, with the exception of the "print"
+ * event that can have a "trigger" file.
+ */
+ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+ if (call->class->reg && strcmp(name, "enable") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &ftrace_enable_fops;
+ return 1;
+ }
+
+ if (strcmp(name, "filter") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &ftrace_event_filter_fops;
+ return 1;
+ }
+ }
+
+ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ strcmp(trace_event_name(call), "print") == 0) {
+ if (strcmp(name, "trigger") == 0) {
+ *mode = TRACE_MODE_WRITE;
+ *fops = &event_trigger_fops;
+ return 1;
+ }
+ }
+[..]
+ return 0;
+}
+
+Where the function event has the TRACE_EVENT_FL_IGNORE_ENABLE set.
+
+This means that the entries array elements for "enable", "filter" and
+"trigger" when called on the function event will have the callback return
+0 and not 1, to tell eventfs to skip these files for it.
+
+Because the "skip to current ctx->pos" check happened for all entries, but
+the ctx->pos++ only happened to entries that exist, it would confuse the
+reading of a directory. Which would cause:
+
+ # ls /sys/kernel/tracing/events/ftrace/function/
+format hist hist hist_debug hist_debug id inject inject
+
+The missing "enable", "filter" and "trigger" caused ls to show "hist",
+"hist_debug" and "inject" twice.
+
+Update the ctx->pos for every iteration to keep its update and the "skip"
+update consistent. This also means that on error, the ctx->pos needs to be
+decremented if it was incremented without adding something.
+
+Link: https://lore.kernel.org/all/20240104150500.38b15a62@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.172295263@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 493ec81a8fb8e ("eventfs: Stop using dcache_readdir() for getdents()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -760,6 +760,8 @@ static int eventfs_iterate(struct file *
+ continue;
+ }
+
++ ctx->pos++;
++
+ if (ei_child->is_freed)
+ continue;
+
+@@ -767,13 +769,12 @@ static int eventfs_iterate(struct file *
+
+ dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+ if (!dentry)
+- goto out;
++ goto out_dec;
+ ino = dentry->d_inode->i_ino;
+ dput(dentry);
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+- goto out;
+- ctx->pos++;
++ goto out_dec;
+ }
+
+ for (i = 0; i < ei->nr_entries; i++) {
+@@ -784,6 +785,8 @@ static int eventfs_iterate(struct file *
+ continue;
+ }
+
++ ctx->pos++;
++
+ entry = &ei->entries[i];
+ name = entry->name;
+
+@@ -791,7 +794,7 @@ static int eventfs_iterate(struct file *
+ /* If ei->is_freed then just bail here, nothing more to do */
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+- goto out;
++ goto out_dec;
+ }
+ r = entry->callback(name, &mode, &cdata, &fops);
+ mutex_unlock(&eventfs_mutex);
+@@ -800,19 +803,23 @@ static int eventfs_iterate(struct file *
+
+ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+ if (!dentry)
+- goto out;
++ goto out_dec;
+ ino = dentry->d_inode->i_ino;
+ dput(dentry);
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+- goto out;
+- ctx->pos++;
++ goto out_dec;
+ }
+ ret = 1;
+ out:
+ srcu_read_unlock(&eventfs_srcu, idx);
+
+ return ret;
++
++ out_dec:
++ /* Incremented ctx->pos without adding something, reset it */
++ ctx->pos--;
++ goto out;
+ }
+
+ /**
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:53 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:33 -0500
+Subject: eventfs: Do not allow NULL parent to eventfs_start_creating()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.930155940@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit fc4561226feaad5fcdcb55646c348d77b8ee69c5 upstream.
+
+The eventfs directory is dynamically created via the meta data supplied by
+the existing trace events. All files and directories in eventfs has a
+parent. Do not allow NULL to be passed into eventfs_start_creating() as
+the parent because that should never happen. Warn if it does.
+
+Link: https://lkml.kernel.org/r/20231121231112.693841807@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(co
+ struct dentry *dentry;
+ int error;
+
++ /* Must always have a parent. */
++ if (WARN_ON_ONCE(!parent))
++ return ERR_PTR(-EINVAL);
++
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+- /*
+- * If the parent is not specified, we create it in the root.
+- * We need the root dentry to do this, which is in the super
+- * block. A pointer to that is in the struct vfsmount that we
+- * have around.
+- */
+- if (!parent)
+- parent = tracefs_mount->mnt_root;
+-
+ if (unlikely(IS_DEADDIR(parent->d_inode)))
+ dentry = ERR_PTR(-ENOENT);
+ else
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:48 -0500
+Subject: eventfs: Do not create dentries nor inodes in iterate_shared
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120953.380140896@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 852e46e239ee6db3cd220614cf8bce96e79227c2 upstream.
+
+The original eventfs code added a wrapper around the dcache_readdir open
+callback and created all the dentries and inodes at open, and increment
+their ref count. A wrapper was added around the dcache_readdir release
+function to decrement all the ref counts of those created inodes and
+dentries. But this proved to be buggy[1] for when a kprobe was created
+during a dir read, it would create a dentry between the open and the
+release, and because the release would decrement all ref counts of all
+files and directories, that would include the kprobe directory that was
+not there to have its ref count incremented in open. This would cause the
+ref count to go to negative and later crash the kernel.
+
+To solve this, the dentries and inodes that were created and had their ref
+count upped in open needed to be saved. That list needed to be passed from
+the open to the release, so that the release would only decrement the ref
+counts of the entries that were incremented in the open.
+
+Unfortunately, the dcache_readdir logic was already using the
+file->private_data, which is the only field that can be used to pass
+information from the open to the release. What was done was the eventfs
+created another descriptor that had a void pointer to save the
+dcache_readdir pointer, and it wrapped all the callbacks, so that it could
+save the list of entries that had their ref counts incremented in the
+open, and pass it to the release. The wrapped callbacks would just put
+back the dcache_readdir pointer and call the functions it used so it could
+still use its data[2].
+
+But Linus had an issue with the "hijacking" of the file->private_data
+(unfortunately this discussion was on a security list, so no public link).
+Which we finally agreed on doing everything within the iterate_shared
+callback and leave the dcache_readdir out of it[3]. All the information
+needed for the getents() could be created then.
+
+But this ended up being buggy too[4]. The iterate_shared callback was not
+the right place to create the dentries and inodes. Even Christian Brauner
+had issues with that[5].
+
+An attempt was to go back to creating the inodes and dentries at
+the open, create an array to store the information in the
+file->private_data, and pass that information to the other callbacks.[6]
+
+The difference between that and the original method, is that it does not
+use dcache_readdir. It also does not up the ref counts of the dentries and
+pass them. Instead, it creates an array of a structure that saves the
+dentry's name and inode number. That information is used in the
+iterate_shared callback, and the array is freed in the dir release. The
+dentries and inodes created in the open are not used for the iterate_share
+or release callbacks. Just their names and inode numbers.
+
+Linus did not like that either[7] and just wanted to remove the dentries
+being created in iterate_shared and use the hard coded inode numbers.
+
+[ All this while Linus enjoyed an unexpected vacation during the merge
+ window due to lack of power. ]
+
+[1] https://lore.kernel.org/linux-trace-kernel/20230919211804.230edf1e@gandalf.local.home/
+[2] https://lore.kernel.org/linux-trace-kernel/20230922163446.1431d4fa@gandalf.local.home/
+[3] https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org/
+[4] https://lore.kernel.org/all/202401152142.bfc28861-oliver.sang@intel.com/
+[5] https://lore.kernel.org/all/20240111-unzahl-gefegt-433acb8a841d@brauner/
+[6] https://lore.kernel.org/all/20240116114711.7e8637be@gandalf.local.home/
+[7] https://lore.kernel.org/all/20240116170154.5bf0a250@gandalf.local.home/
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.573784051@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: 493ec81a8fb8 ("eventfs: Stop using dcache_readdir() for getdents()")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401152142.bfc28861-oliver.sang@intel.com
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 20 +++++---------------
+ 1 file changed, 5 insertions(+), 15 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -727,8 +727,6 @@ static int eventfs_iterate(struct file *
+ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+- struct dentry *ei_dentry = NULL;
+- struct dentry *dentry;
+ const char *name;
+ umode_t mode;
+ int idx;
+@@ -749,11 +747,11 @@ static int eventfs_iterate(struct file *
+
+ mutex_lock(&eventfs_mutex);
+ ei = READ_ONCE(ti->private);
+- if (ei && !ei->is_freed)
+- ei_dentry = READ_ONCE(ei->dentry);
++ if (ei && ei->is_freed)
++ ei = NULL;
+ mutex_unlock(&eventfs_mutex);
+
+- if (!ei || !ei_dentry)
++ if (!ei)
+ goto out;
+
+ /*
+@@ -780,11 +778,7 @@ static int eventfs_iterate(struct file *
+ if (r <= 0)
+ continue;
+
+- dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+- if (!dentry)
+- goto out;
+- ino = dentry->d_inode->i_ino;
+- dput(dentry);
++ ino = EVENTFS_FILE_INODE_INO;
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+ goto out;
+@@ -808,11 +802,7 @@ static int eventfs_iterate(struct file *
+
+ name = ei_child->name;
+
+- dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+- if (!dentry)
+- goto out_dec;
+- ino = dentry->d_inode->i_ino;
+- dput(dentry);
++ ino = EVENTFS_DIR_INODE_INO;
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+ goto out_dec;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:46 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:30 -0500
+Subject: eventfs: Do not invalidate dentry in create_file/dir_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Naresh Kamboju <naresh.kamboju@linaro.org>, Linux Kernel Functional Testing <lkft@linaro.org>
+Message-ID: <20240206120950.446963304@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 71cade82f2b553a74d046c015c986f2df165696f upstream.
+
+With the call to simple_recursive_removal() on the entire eventfs sub
+system when the directory is removed, it performs the d_invalidate on all
+the dentries when it is removed. There's no need to do clean ups when a
+dentry is being created while the directory is being deleted.
+
+As dentries are cleaned up by the simpler_recursive_removal(), trying to
+do d_invalidate() in these functions will cause the dentry to be
+invalidated twice, and crash the kernel.
+
+Link: https://lore.kernel.org/all/20231116123016.140576-1-naresh.kamboju@linaro.org/
+Link: https://lkml.kernel.org/r/20231120235154.422970988@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 407c6726ca71 ("eventfs: Use simple_recursive_removal() to clean up dentries")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 19 ++++++-------------
+ 1 file changed, 6 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -326,7 +326,6 @@ create_file_dentry(struct eventfs_inode
+ struct eventfs_attr *attr = NULL;
+ struct dentry **e_dentry = &ei->d_children[idx];
+ struct dentry *dentry;
+- bool invalidate = false;
+
+ mutex_lock(&eventfs_mutex);
+ if (ei->is_freed) {
+@@ -389,17 +388,14 @@ create_file_dentry(struct eventfs_inode
+ * Otherwise it means two dentries exist with the same name.
+ */
+ WARN_ON_ONCE(!ei->is_freed);
+- invalidate = true;
++ dentry = NULL;
+ }
+ mutex_unlock(&eventfs_mutex);
+
+- if (invalidate)
+- d_invalidate(dentry);
+-
+- if (lookup || invalidate)
++ if (lookup)
+ dput(dentry);
+
+- return invalidate ? NULL : dentry;
++ return dentry;
+ }
+
+ /**
+@@ -439,7 +435,6 @@ static struct dentry *
+ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+ struct dentry *parent, bool lookup)
+ {
+- bool invalidate = false;
+ struct dentry *dentry = NULL;
+
+ mutex_lock(&eventfs_mutex);
+@@ -495,16 +490,14 @@ create_dir_dentry(struct eventfs_inode *
+ * Otherwise it means two dentries exist with the same name.
+ */
+ WARN_ON_ONCE(!ei->is_freed);
+- invalidate = true;
++ dentry = NULL;
+ }
+ mutex_unlock(&eventfs_mutex);
+- if (invalidate)
+- d_invalidate(dentry);
+
+- if (lookup || invalidate)
++ if (lookup)
+ dput(dentry);
+
+- return invalidate ? NULL : dentry;
++ return dentry;
+ }
+
+ /**
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:54 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:39 -0500
+Subject: eventfs: Fix bitwise fields for "is_events"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120951.901001747@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit fd56cd5f6d76e93356d9520cf9dabffe1e3d1aa0 upstream.
+
+A flag was needed to denote which eventfs_inode was the "events"
+directory, so a bit was taken from the "nr_entries" field, as there's not
+that many entries, and 2^30 is plenty. But the bit number for nr_entries
+was not updated to reflect the bit taken from it, which would add an
+unnecessary integer to the structure.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240102151832.7ca87275@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7e8358edf503e ("eventfs: Fix file and directory uid and gid ownership")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -63,7 +63,7 @@ struct eventfs_inode {
+ };
+ unsigned int is_freed:1;
+ unsigned int is_events:1;
+- unsigned int nr_entries:31;
++ unsigned int nr_entries:30;
+ };
+
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:23 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:35 -0500
+Subject: eventfs: Fix events beyond NAME_MAX blocking tasks
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Beau Belgrave <beaub@linux.microsoft.com>
+Message-ID: <20240206120951.255146556@rostedt.homelinux.com>
+
+From: Beau Belgrave <beaub@linux.microsoft.com>
+
+commit 5eaf7f0589c0d88178f0fbeebe0e0b7108258707 upstream.
+
+Eventfs uses simple_lookup(), however, it will fail if the name of the
+entry is beyond NAME_MAX length. When this error is encountered, eventfs
+still tries to create dentries instead of skipping the dentry creation.
+When the dentry is attempted to be created in this state d_wait_lookup()
+will loop forever, waiting for the lookup to be removed.
+
+Fix eventfs to return the error in simple_lookup() back to the caller
+instead of continuing to try to create the dentry.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231210213534.497-1-beaub@linux.microsoft.com
+
+Fixes: 63940449555e ("eventfs: Implement eventfs lookup, read, open functions")
+Link: https://lore.kernel.org/linux-trace-kernel/20231208183601.GA46-beaub@linux.microsoft.com/
+Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -546,6 +546,8 @@ static struct dentry *eventfs_root_looku
+ if (strcmp(ei_child->name, name) != 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
++ if (IS_ERR(ret))
++ goto out;
+ create_dir_dentry(ei, ei_child, ei_dentry, true);
+ created = true;
+ break;
+@@ -568,6 +570,8 @@ static struct dentry *eventfs_root_looku
+ if (r <= 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
++ if (IS_ERR(ret))
++ goto out;
+ create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+ fops, true);
+ break;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:15 -0500
+Subject: eventfs: Fix failure path in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Julia Lawall <julia.lawall@inria.fr>
+Message-ID: <20240206120948.003109160@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 7e8ad67c9b5c11e990c320ed7e7563f2301672a7 upstream.
+
+The failure path of allocating ei goes to a path that dereferences ei.
+Add another label that skips over the ei dereferences to do the rest of
+the clean up.
+
+Link: https://lore.kernel.org/all/70e7bace-561c-95f-1117-706c2c220bc@inria.fr/
+Link: https://lore.kernel.org/linux-trace-kernel/20231019204132.6662fef0@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -735,7 +735,7 @@ struct eventfs_inode *eventfs_create_eve
+
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei)
+- goto fail;
++ goto fail_ei;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+@@ -781,6 +781,7 @@ struct eventfs_inode *eventfs_create_eve
+ fail:
+ kfree(ei->d_children);
+ kfree(ei);
++ fail_ei:
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:44 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:37 -0500
+Subject: eventfs: Fix file and directory uid and gid ownership
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Dongliang Cui <cuidongliang390@gmail.com>, Hongyu Jin <hongyu.jin@unisoc.com>
+Message-ID: <20240206120951.578630343@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 7e8358edf503e87236c8d07f69ef0ed846dd5112 upstream.
+
+It was reported that when mounting the tracefs file system with a gid
+other than root, the ownership did not carry down to the eventfs directory
+due to the dynamic nature of it.
+
+A fix was done to solve this, but it had two issues.
+
+(a) if the attr passed into update_inode_attr() was NULL, it didn't do
+ anything. This is true for files that have not had a chown or chgrp
+ done to itself or any of its sibling files, as the attr is allocated
+ for all children when any one needs it.
+
+ # umount /sys/kernel/tracing
+ # mount -o rw,seclabel,relatime,gid=1000 -t tracefs nodev /mnt
+
+ # ls -ld /mnt/events/sched
+drwxr-xr-x 28 root rostedt 0 Dec 21 13:12 /mnt/events/sched/
+
+ # ls -ld /mnt/events/sched/sched_switch
+drwxr-xr-x 2 root rostedt 0 Dec 21 13:12 /mnt/events/sched/sched_switch/
+
+But when checking the files:
+
+ # ls -l /mnt/events/sched/sched_switch
+total 0
+-rw-r----- 1 root root 0 Dec 21 13:12 enable
+-rw-r----- 1 root root 0 Dec 21 13:12 filter
+-r--r----- 1 root root 0 Dec 21 13:12 format
+-r--r----- 1 root root 0 Dec 21 13:12 hist
+-r--r----- 1 root root 0 Dec 21 13:12 id
+-rw-r----- 1 root root 0 Dec 21 13:12 trigger
+
+(b) When the attr does not denote the UID or GID, it defaulted to using
+ the parent uid or gid. This is incorrect as changing the parent
+ uid or gid will automatically change all its children.
+
+ # chgrp tracing /mnt/events/timer
+
+ # ls -ld /mnt/events/timer
+drwxr-xr-x 2 root tracing 0 Dec 21 14:34 /mnt/events/timer
+
+ # ls -l /mnt/events/timer
+total 0
+-rw-r----- 1 root root 0 Dec 21 14:35 enable
+-rw-r----- 1 root root 0 Dec 21 14:35 filter
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_cancel
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_expire_entry
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_expire_exit
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_init
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_start
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 itimer_expire
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 itimer_state
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 tick_stop
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_cancel
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_expire_entry
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_expire_exit
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_init
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_start
+
+At first it was thought that this could be easily fixed by just making the
+default ownership of the superblock when it was mounted. But this does not
+handle the case of:
+
+ # chgrp tracing instances
+ # mkdir instances/foo
+
+If the superblock was used, then the group ownership would be that of what
+it was when it was mounted, when it should instead be "tracing".
+
+Instead, set a flag for the top level eventfs directory ("events") to flag
+which eventfs_inode belongs to it.
+
+Since the "events" directory's dentry and inode are never freed, it does
+not need to use its attr field to restore its mode and ownership. Use the
+this eventfs_inode's attr as the default ownership for all the files and
+directories underneath it.
+
+When the events eventfs_inode is created, it sets its ownership to its
+parent uid and gid. As the events directory is created at boot up before
+it gets mounted, this will always be uid=0 and gid=0. If it's created via
+an instance, then it will take the ownership of the instance directory.
+
+When the file system is mounted, it will update all the gids if one is
+specified. This will have a callback to update the events evenfs_inode's
+default entries.
+
+When a file or directory is created under the events directory, it will
+walk the ei->dentry parents until it finds the evenfs_inode that belongs
+to the events directory to retrieve the default uid and gid values.
+
+Link: https://lore.kernel.org/all/CAHk-=wiwQtUHvzwyZucDq8=Gtw+AnwScyLhpFswrQ84PjhoGsg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20231221190757.7eddbca9@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Dongliang Cui <cuidongliang390@gmail.com>
+Cc: Hongyu Jin <hongyu.jin@unisoc.com>
+Fixes: 0dfc852b6fe3 ("eventfs: Have event files and directories default to parent uid and gid")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 105 ++++++++++++++++++++++++++++++++++++++++++-----
+ fs/tracefs/inode.c | 6 ++
+ fs/tracefs/internal.h | 2
+ 3 files changed, 103 insertions(+), 10 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -113,7 +113,14 @@ static int eventfs_set_attr(struct mnt_i
+ * determined by the parent directory.
+ */
+ if (dentry->d_inode->i_mode & S_IFDIR) {
+- update_attr(&ei->attr, iattr);
++ /*
++ * The events directory dentry is never freed, unless its
++ * part of an instance that is deleted. It's attr is the
++ * default for its child files and directories.
++ * Do not update it. It's not used for its own mode or ownership
++ */
++ if (!ei->is_events)
++ update_attr(&ei->attr, iattr);
+
+ } else {
+ name = dentry->d_name.name;
+@@ -148,28 +155,93 @@ static const struct file_operations even
+ .release = eventfs_release,
+ };
+
++/* Return the evenfs_inode of the "events" directory */
++static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
++{
++ struct eventfs_inode *ei;
++
++ mutex_lock(&eventfs_mutex);
++ do {
++ /* The parent always has an ei, except for events itself */
++ ei = dentry->d_parent->d_fsdata;
++
++ /*
++ * If the ei is being freed, the ownership of the children
++ * doesn't matter.
++ */
++ if (ei->is_freed) {
++ ei = NULL;
++ break;
++ }
++
++ dentry = ei->dentry;
++ } while (!ei->is_events);
++ mutex_unlock(&eventfs_mutex);
++
++ return ei;
++}
++
+ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
+ struct eventfs_attr *attr, umode_t mode)
+ {
+- if (!attr) {
+- inode->i_mode = mode;
++ struct eventfs_inode *events_ei = eventfs_find_events(dentry);
++
++ if (!events_ei)
++ return;
++
++ inode->i_mode = mode;
++ inode->i_uid = events_ei->attr.uid;
++ inode->i_gid = events_ei->attr.gid;
++
++ if (!attr)
+ return;
+- }
+
+ if (attr->mode & EVENTFS_SAVE_MODE)
+ inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
+- else
+- inode->i_mode = mode;
+
+ if (attr->mode & EVENTFS_SAVE_UID)
+ inode->i_uid = attr->uid;
+- else
+- inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+
+ if (attr->mode & EVENTFS_SAVE_GID)
+ inode->i_gid = attr->gid;
+- else
+- inode->i_gid = d_inode(dentry->d_parent)->i_gid;
++}
++
++static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
++{
++ struct eventfs_inode *ei_child;
++
++ /* at most we have events/system/event */
++ if (WARN_ON_ONCE(level > 3))
++ return;
++
++ ei->attr.gid = gid;
++
++ if (ei->entry_attrs) {
++ for (int i = 0; i < ei->nr_entries; i++) {
++ ei->entry_attrs[i].gid = gid;
++ }
++ }
++
++ /*
++ * Only eventfs_inode with dentries are updated, make sure
++ * all eventfs_inodes are updated. If one of the children
++ * do not have a dentry, this function must traverse it.
++ */
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
++ srcu_read_lock_held(&eventfs_srcu)) {
++ if (!ei_child->dentry)
++ update_gid(ei_child, gid, level + 1);
++ }
++}
++
++void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
++{
++ struct eventfs_inode *ei = dentry->d_fsdata;
++ int idx;
++
++ idx = srcu_read_lock(&eventfs_srcu);
++ update_gid(ei, gid, 0);
++ srcu_read_unlock(&eventfs_srcu, idx);
+ }
+
+ /**
+@@ -860,6 +932,8 @@ struct eventfs_inode *eventfs_create_eve
+ struct eventfs_inode *ei;
+ struct tracefs_inode *ti;
+ struct inode *inode;
++ kuid_t uid;
++ kgid_t gid;
+
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+@@ -884,11 +958,20 @@ struct eventfs_inode *eventfs_create_eve
+ ei->dentry = dentry;
+ ei->entries = entries;
+ ei->nr_entries = size;
++ ei->is_events = 1;
+ ei->data = data;
+ ei->name = kstrdup_const(name, GFP_KERNEL);
+ if (!ei->name)
+ goto fail;
+
++ /* Save the ownership of this directory */
++ uid = d_inode(dentry->d_parent)->i_uid;
++ gid = d_inode(dentry->d_parent)->i_gid;
++
++ /* This is used as the default ownership of the files and directories */
++ ei->attr.uid = uid;
++ ei->attr.gid = gid;
++
+ INIT_LIST_HEAD(&ei->children);
+ INIT_LIST_HEAD(&ei->list);
+
+@@ -897,6 +980,8 @@ struct eventfs_inode *eventfs_create_eve
+ ti->private = ei;
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
++ inode->i_uid = uid;
++ inode->i_gid = gid;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -210,6 +210,7 @@ repeat:
+ next = this_parent->d_subdirs.next;
+ resume:
+ while (next != &this_parent->d_subdirs) {
++ struct tracefs_inode *ti;
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+@@ -218,6 +219,11 @@ resume:
+
+ change_gid(dentry, gid);
+
++ /* If this is the events directory, update that too */
++ ti = get_tracefs(dentry->d_inode);
++ if (ti && (ti->flags & TRACEFS_EVENT_INODE))
++ eventfs_update_gid(dentry, gid);
++
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -62,6 +62,7 @@ struct eventfs_inode {
+ struct rcu_head rcu;
+ };
+ unsigned int is_freed:1;
++ unsigned int is_events:1;
+ unsigned int nr_entries:31;
+ };
+
+@@ -77,6 +78,7 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
++void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+ #endif /* _TRACEFS_INTERNAL_H */
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:20 -0500
+Subject: eventfs: Fix kerneldoc of eventfs_remove_rec()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, kernel test robot <lkp@intel.com>
+Message-ID: <20240206120948.816153049@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 9037caa09ed345b35325200f0e4acf5a94ae0a65 upstream.
+
+The eventfs_remove_rec() had some missing parameters in the kerneldoc
+comment above it. Also, rephrase the description a bit more to have a bit
+more correct grammar.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231030121523.0b2225a7@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode");
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202310052216.4SgqasWo-lkp@intel.com/
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -796,9 +796,11 @@ static void free_ei(struct rcu_head *hea
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
++ * @head: the list head to place the deleted @ei and children
++ * @level: prevent recursion from going more than 3 levels deep.
+ *
+- * This function recursively remove eventfs_inode which
+- * contains info of file or dir.
++ * This function recursively removes eventfs_inodes which
++ * contains info of files and/or directories.
+ */
+ static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
+ {
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:18 -0500
+Subject: eventfs: Fix typo in eventfs_inode union comment
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.496559787@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 29e06c10702e81a7d0b75020ca514d2f2962704a upstream.
+
+It's eventfs_inode not eventfs_indoe. There's no deer involved!
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231024131024.5634c743@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -37,7 +37,7 @@ struct eventfs_inode {
+ /*
+ * Union - used for deletion
+ * @del_list: list of eventfs_inode to delete
+- * @rcu: eventfs_indoe to delete in RCU
++ * @rcu: eventfs_inode to delete in RCU
+ * @is_freed: node is freed if one of the above is set
+ */
+ union {
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:17 -0500
+Subject: eventfs: Fix WARN_ON() in create_file_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.331907337@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit a9de4eb15ad430fe45747c211e367da745a90093 upstream.
+
+As the comment right above a WARN_ON() in create_file_dentry() states:
+
+ * Note, with the mutex held, the e_dentry cannot have content
+ * and the ei->is_freed be true at the same time.
+
+But the WARN_ON() only has:
+
+ WARN_ON_ONCE(ei->is_free);
+
+Where to match the comment (and what it should actually do) is:
+
+ dentry = *e_dentry;
+ WARN_ON_ONCE(dentry && ei->is_free)
+
+Also in that case, set dentry to NULL (although it should never happen).
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231024123628.62b88755@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -264,8 +264,9 @@ create_file_dentry(struct eventfs_inode
+ * Note, with the mutex held, the e_dentry cannot have content
+ * and the ei->is_freed be true at the same time.
+ */
+- WARN_ON_ONCE(ei->is_freed);
+ dentry = *e_dentry;
++ if (WARN_ON_ONCE(dentry && ei->is_freed))
++ dentry = NULL;
+ /* The lookup does not need to up the dentry refcount */
+ if (dentry && !lookup)
+ dget(dentry);
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:47 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:58 -0500
+Subject: eventfs: Get rid of dentry pointers without refcounts
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.006367557@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 43aa6f97c2d03a52c1ddb86768575fc84344bdbb upstream.
+
+The eventfs inode had pointers to dentries (and child dentries) without
+actually holding a refcount on said pointer. That is fundamentally
+broken, and while eventfs tried to then maintain coherence with dentries
+going away by hooking into the '.d_iput' callback, that doesn't actually
+work since it's not ordered wrt lookups.
+
+There were two reasonms why eventfs tried to keep a pointer to a dentry:
+
+ - the creation of a 'events' directory would actually have a stable
+ dentry pointer that it created with tracefs_start_creating().
+
+ And it needed that dentry when tearing it all down again in
+ eventfs_remove_events_dir().
+
+ This use is actually ok, because the special top-level events
+ directory dentries are actually stable, not just a temporary cache of
+ the eventfs data structures.
+
+ - the 'eventfs_inode' (aka ei) needs to stay around as long as there
+ are dentries that refer to it.
+
+ It then used these dentry pointers as a replacement for doing
+ reference counting: it would try to make sure that there was only
+ ever one dentry associated with an event_inode, and keep a child
+ dentry array around to see which dentries might still refer to the
+ parent ei.
+
+This gets rid of the invalid dentry pointer use, and renames the one
+valid case to a different name to make it clear that it's not just any
+random dentry.
+
+The magic child dentry array that is kind of a "reverse reference list"
+is simply replaced by having child dentries take a ref to the ei. As
+does the directory dentries. That makes the broken use case go away.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.280463000@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 248 ++++++++++++++---------------------------------
+ fs/tracefs/internal.h | 7 -
+ 2 files changed, 78 insertions(+), 177 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -62,6 +62,35 @@ enum {
+
+ #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
+
++/*
++ * eventfs_inode reference count management.
++ *
++ * NOTE! We count only references from dentries, in the
++ * form 'dentry->d_fsdata'. There are also references from
++ * directory inodes ('ti->private'), but the dentry reference
++ * count is always a superset of the inode reference count.
++ */
++static void release_ei(struct kref *ref)
++{
++ struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
++ kfree(ei->entry_attrs);
++ kfree_const(ei->name);
++ kfree_rcu(ei, rcu);
++}
++
++static inline void put_ei(struct eventfs_inode *ei)
++{
++ if (ei)
++ kref_put(&ei->kref, release_ei);
++}
++
++static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
++{
++ if (ei)
++ kref_get(&ei->kref);
++ return ei;
++}
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+@@ -289,7 +318,8 @@ static void update_inode_attr(struct den
+ * directory. The inode.i_private pointer will point to @data in the open()
+ * call.
+ */
+-static struct dentry *lookup_file(struct dentry *dentry,
++static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
++ struct dentry *dentry,
+ umode_t mode,
+ struct eventfs_attr *attr,
+ void *data,
+@@ -302,7 +332,7 @@ static struct dentry *lookup_file(struct
+ mode |= S_IFREG;
+
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+- return NULL;
++ return ERR_PTR(-EIO);
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+@@ -321,9 +351,12 @@ static struct dentry *lookup_file(struct
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+
++ // Files have their parent's ei as their fsdata
++ dentry->d_fsdata = get_ei(parent_ei);
++
+ d_add(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+- return dentry;
++ return NULL;
+ };
+
+ /**
+@@ -359,22 +392,29 @@ static struct dentry *lookup_dir_entry(s
+ /* Only directories have ti->private set to an ei, not files */
+ ti->private = ei;
+
+- dentry->d_fsdata = ei;
+- ei->dentry = dentry; // Remove me!
++ dentry->d_fsdata = get_ei(ei);
+
+ inc_nlink(inode);
+ d_add(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+- return dentry;
++ return NULL;
+ }
+
+-static void free_ei(struct eventfs_inode *ei)
++static inline struct eventfs_inode *alloc_ei(const char *name)
+ {
+- kfree_const(ei->name);
+- kfree(ei->d_children);
+- kfree(ei->entry_attrs);
+- kfree(ei);
++ struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++
++ if (!ei)
++ return NULL;
++
++ ei->name = kstrdup_const(name, GFP_KERNEL);
++ if (!ei->name) {
++ kfree(ei);
++ return NULL;
++ }
++ kref_init(&ei->kref);
++ return ei;
+ }
+
+ /**
+@@ -385,39 +425,13 @@ static void free_ei(struct eventfs_inode
+ */
+ void eventfs_d_release(struct dentry *dentry)
+ {
+- struct eventfs_inode *ei;
+- int i;
+-
+- mutex_lock(&eventfs_mutex);
+-
+- ei = dentry->d_fsdata;
+- if (!ei)
+- goto out;
+-
+- /* This could belong to one of the files of the ei */
+- if (ei->dentry != dentry) {
+- for (i = 0; i < ei->nr_entries; i++) {
+- if (ei->d_children[i] == dentry)
+- break;
+- }
+- if (WARN_ON_ONCE(i == ei->nr_entries))
+- goto out;
+- ei->d_children[i] = NULL;
+- } else if (ei->is_freed) {
+- free_ei(ei);
+- } else {
+- ei->dentry = NULL;
+- }
+-
+- dentry->d_fsdata = NULL;
+- out:
+- mutex_unlock(&eventfs_mutex);
++ put_ei(dentry->d_fsdata);
+ }
+
+ /**
+ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @ei: the eventfs_inode that the file will be created under
+- * @idx: the index into the d_children[] of the @ei
++ * @idx: the index into the entry_attrs[] of the @ei
+ * @parent: The parent dentry of the created file.
+ * @name: The name of the file to create
+ * @mode: The mode of the file.
+@@ -434,17 +448,11 @@ lookup_file_dentry(struct dentry *dentry
+ const struct file_operations *fops)
+ {
+ struct eventfs_attr *attr = NULL;
+- struct dentry **e_dentry = &ei->d_children[idx];
+
+ if (ei->entry_attrs)
+ attr = &ei->entry_attrs[idx];
+
+- dentry->d_fsdata = ei; // NOTE: ei of _parent_
+- lookup_file(dentry, mode, attr, data, fops);
+-
+- *e_dentry = dentry; // Remove me
+-
+- return dentry;
++ return lookup_file(ei, dentry, mode, attr, data, fops);
+ }
+
+ /**
+@@ -465,6 +473,7 @@ static struct dentry *eventfs_root_looku
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ const char *name = dentry->d_name.name;
++ struct dentry *result = NULL;
+
+ ti = get_tracefs(dir);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+@@ -481,7 +490,7 @@ static struct dentry *eventfs_root_looku
+ continue;
+ if (ei_child->is_freed)
+ goto out;
+- lookup_dir_entry(dentry, ei, ei_child);
++ result = lookup_dir_entry(dentry, ei, ei_child);
+ goto out;
+ }
+
+@@ -498,12 +507,12 @@ static struct dentry *eventfs_root_looku
+ if (entry->callback(name, &mode, &data, &fops) <= 0)
+ goto out;
+
+- lookup_file_dentry(dentry, ei, i, mode, data, fops);
++ result = lookup_file_dentry(dentry, ei, i, mode, data, fops);
+ goto out;
+ }
+ out:
+ mutex_unlock(&eventfs_mutex);
+- return NULL;
++ return result;
+ }
+
+ /*
+@@ -653,25 +662,10 @@ struct eventfs_inode *eventfs_create_dir
+ if (!parent)
+ return ERR_PTR(-EINVAL);
+
+- ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++ ei = alloc_ei(name);
+ if (!ei)
+ return ERR_PTR(-ENOMEM);
+
+- ei->name = kstrdup_const(name, GFP_KERNEL);
+- if (!ei->name) {
+- kfree(ei);
+- return ERR_PTR(-ENOMEM);
+- }
+-
+- if (size) {
+- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+- if (!ei->d_children) {
+- kfree_const(ei->name);
+- kfree(ei);
+- return ERR_PTR(-ENOMEM);
+- }
+- }
+-
+ ei->entries = entries;
+ ei->nr_entries = size;
+ ei->data = data;
+@@ -685,7 +679,7 @@ struct eventfs_inode *eventfs_create_dir
+
+ /* Was the parent freed? */
+ if (list_empty(&ei->list)) {
+- free_ei(ei);
++ put_ei(ei);
+ ei = NULL;
+ }
+ return ei;
+@@ -720,28 +714,20 @@ struct eventfs_inode *eventfs_create_eve
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+- ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++ ei = alloc_ei(name);
+ if (!ei)
+- goto fail_ei;
++ goto fail;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ goto fail;
+
+- if (size) {
+- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+- if (!ei->d_children)
+- goto fail;
+- }
+-
+- ei->dentry = dentry;
++ // Note: we have a ref to the dentry from tracefs_start_creating()
++ ei->events_dir = dentry;
+ ei->entries = entries;
+ ei->nr_entries = size;
+ ei->is_events = 1;
+ ei->data = data;
+- ei->name = kstrdup_const(name, GFP_KERNEL);
+- if (!ei->name)
+- goto fail;
+
+ /* Save the ownership of this directory */
+ uid = d_inode(dentry->d_parent)->i_uid;
+@@ -772,7 +758,7 @@ struct eventfs_inode *eventfs_create_eve
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
+- dentry->d_fsdata = ei;
++ dentry->d_fsdata = get_ei(ei);
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+@@ -784,72 +770,11 @@ struct eventfs_inode *eventfs_create_eve
+ return ei;
+
+ fail:
+- kfree(ei->d_children);
+- kfree(ei);
+- fail_ei:
++ put_ei(ei);
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+-static LLIST_HEAD(free_list);
+-
+-static void eventfs_workfn(struct work_struct *work)
+-{
+- struct eventfs_inode *ei, *tmp;
+- struct llist_node *llnode;
+-
+- llnode = llist_del_all(&free_list);
+- llist_for_each_entry_safe(ei, tmp, llnode, llist) {
+- /* This dput() matches the dget() from unhook_dentry() */
+- for (int i = 0; i < ei->nr_entries; i++) {
+- if (ei->d_children[i])
+- dput(ei->d_children[i]);
+- }
+- /* This should only get here if it had a dentry */
+- if (!WARN_ON_ONCE(!ei->dentry))
+- dput(ei->dentry);
+- }
+-}
+-
+-static DECLARE_WORK(eventfs_work, eventfs_workfn);
+-
+-static void free_rcu_ei(struct rcu_head *head)
+-{
+- struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+-
+- if (ei->dentry) {
+- /* Do not free the ei until all references of dentry are gone */
+- if (llist_add(&ei->llist, &free_list))
+- queue_work(system_unbound_wq, &eventfs_work);
+- return;
+- }
+-
+- /* If the ei doesn't have a dentry, neither should its children */
+- for (int i = 0; i < ei->nr_entries; i++) {
+- WARN_ON_ONCE(ei->d_children[i]);
+- }
+-
+- free_ei(ei);
+-}
+-
+-static void unhook_dentry(struct dentry *dentry)
+-{
+- if (!dentry)
+- return;
+- /*
+- * Need to add a reference to the dentry that is expected by
+- * simple_recursive_removal(), which will include a dput().
+- */
+- dget(dentry);
+-
+- /*
+- * Also add a reference for the dput() in eventfs_workfn().
+- * That is required as that dput() will free the ei after
+- * the SRCU grace period is over.
+- */
+- dget(dentry);
+-}
+-
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
+@@ -862,8 +787,6 @@ static void eventfs_remove_rec(struct ev
+ {
+ struct eventfs_inode *ei_child;
+
+- if (!ei)
+- return;
+ /*
+ * Check recursion depth. It should never be greater than 3:
+ * 0 - events/
+@@ -875,28 +798,12 @@ static void eventfs_remove_rec(struct ev
+ return;
+
+ /* search for nested folders or files */
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- lockdep_is_held(&eventfs_mutex)) {
+- /* Children only have dentry if parent does */
+- WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
++ list_for_each_entry(ei_child, &ei->children, list)
+ eventfs_remove_rec(ei_child, level + 1);
+- }
+-
+
+ ei->is_freed = 1;
+-
+- for (int i = 0; i < ei->nr_entries; i++) {
+- if (ei->d_children[i]) {
+- /* Children only have dentry if parent does */
+- WARN_ON_ONCE(!ei->dentry);
+- unhook_dentry(ei->d_children[i]);
+- }
+- }
+-
+- unhook_dentry(ei->dentry);
+-
+- list_del_rcu(&ei->list);
+- call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
++ list_del(&ei->list);
++ put_ei(ei);
+ }
+
+ /**
+@@ -907,22 +814,12 @@ static void eventfs_remove_rec(struct ev
+ */
+ void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+- struct dentry *dentry;
+-
+ if (!ei)
+ return;
+
+ mutex_lock(&eventfs_mutex);
+- dentry = ei->dentry;
+ eventfs_remove_rec(ei, 0);
+ mutex_unlock(&eventfs_mutex);
+-
+- /*
+- * If any of the ei children has a dentry, then the ei itself
+- * must have a dentry.
+- */
+- if (dentry)
+- simple_recursive_removal(dentry, NULL);
+ }
+
+ /**
+@@ -935,7 +832,11 @@ void eventfs_remove_events_dir(struct ev
+ {
+ struct dentry *dentry;
+
+- dentry = ei->dentry;
++ dentry = ei->events_dir;
++ if (!dentry)
++ return;
++
++ ei->events_dir = NULL;
+ eventfs_remove_dir(ei);
+
+ /*
+@@ -945,5 +846,6 @@ void eventfs_remove_events_dir(struct ev
+ * sticks around while the other ei->dentry are created
+ * and destroyed dynamically.
+ */
++ d_invalidate(dentry);
+ dput(dentry);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -35,8 +35,7 @@ struct eventfs_attr {
+ * @entries: the array of entries representing the files in the directory
+ * @name: the name of the directory to create
+ * @children: link list into the child eventfs_inode
+- * @dentry: the dentry of the directory
+- * @d_children: The array of dentries to represent the files when created
++ * @events_dir: the dentry of the events directory
+ * @entry_attrs: Saved mode and ownership of the @d_children
+ * @attr: Saved mode and ownership of eventfs_inode itself
+ * @data: The private data to pass to the callbacks
+@@ -45,12 +44,12 @@ struct eventfs_attr {
+ * @nr_entries: The number of items in @entries
+ */
+ struct eventfs_inode {
++ struct kref kref;
+ struct list_head list;
+ const struct eventfs_entry *entries;
+ const char *name;
+ struct list_head children;
+- struct dentry *dentry; /* Check is_freed to access */
+- struct dentry **d_children;
++ struct dentry *events_dir;
+ struct eventfs_attr *entry_attrs;
+ struct eventfs_attr attr;
+ void *data;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:22 -0500
+Subject: eventfs: Have a free_ei() that just frees the eventfs_inode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.141151788@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit db3a397209b00d2e4e0a068608e5c546fc064b82 upstream.
+
+As the eventfs_inode is freed in two different locations, make a helper
+function free_ei() to make sure all the allocated fields of the
+eventfs_inode is freed.
+
+This requires renaming the existing free_ei() which is called by the srcu
+handler to free_rcu_ei() and have free_ei() just do the freeing, where
+free_rcu_ei() will call it.
+
+Link: https://lkml.kernel.org/r/20231101172649.265214087@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -129,6 +129,13 @@ static struct dentry *create_dir(const c
+ return eventfs_end_creating(dentry);
+ }
+
++static void free_ei(struct eventfs_inode *ei)
++{
++ kfree_const(ei->name);
++ kfree(ei->d_children);
++ kfree(ei);
++}
++
+ /**
+ * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+ * @ti: the tracefs_inode of the dentry
+@@ -168,9 +175,7 @@ void eventfs_set_ei_status_free(struct t
+ eventfs_remove_dir(ei_child);
+ }
+
+- kfree_const(ei->name);
+- kfree(ei->d_children);
+- kfree(ei);
++ free_ei(ei);
+ return;
+ }
+
+@@ -784,13 +789,11 @@ struct eventfs_inode *eventfs_create_eve
+ return ERR_PTR(-ENOMEM);
+ }
+
+-static void free_ei(struct rcu_head *head)
++static void free_rcu_ei(struct rcu_head *head)
+ {
+ struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+
+- kfree_const(ei->name);
+- kfree(ei->d_children);
+- kfree(ei);
++ free_ei(ei);
+ }
+
+ /**
+@@ -881,7 +884,7 @@ void eventfs_remove_dir(struct eventfs_i
+ for (i = 0; i < ei->nr_entries; i++)
+ unhook_dentry(&ei->d_children[i], &dentry_list);
+ unhook_dentry(&ei->dentry, &dentry_list);
+- call_srcu(&eventfs_srcu, &ei->rcu, free_ei);
++ call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+ }
+ mutex_unlock(&eventfs_mutex);
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:23 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:36 -0500
+Subject: eventfs: Have event files and directories default to parent uid and gid
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Hongyu Jin <hongyu.jin@unisoc.com>, Dongliang Cui <cuidongliang390@gmail.com>
+Message-ID: <20240206120951.415245327@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 0dfc852b6fe3cbecbea67332a0dce2bebeba540d upstream.
+
+Dongliang reported:
+
+ I found that in the latest version, the nodes of tracefs have been
+ changed to dynamically created.
+
+ This has caused me to encounter a problem where the gid I specified in
+ the mounting parameters cannot apply to all files, as in the following
+ situation:
+
+ /data/tmp/events # mount | grep tracefs
+ tracefs on /data/tmp type tracefs (rw,seclabel,relatime,gid=3012)
+
+ gid 3012 = readtracefs
+
+ /data/tmp # ls -lh
+ total 0
+ -r--r----- 1 root readtracefs 0 1970-01-01 08:00 README
+ -r--r----- 1 root readtracefs 0 1970-01-01 08:00 available_events
+
+ ums9621_1h10:/data/tmp/events # ls -lh
+ total 0
+ drwxr-xr-x 2 root root 0 2023-12-19 00:56 alarmtimer
+ drwxr-xr-x 2 root root 0 2023-12-19 00:56 asoc
+
+ It will prevent certain applications from accessing tracefs properly, I
+ try to avoid this issue by making the following modifications.
+
+To fix this, have the files created default to taking the ownership of
+the parent dentry unless the ownership was previously set by the user.
+
+Link: https://lore.kernel.org/linux-trace-kernel/1703063706-30539-1-git-send-email-dongliang.cui@unisoc.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20231220105017.1489d790@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Hongyu Jin <hongyu.jin@unisoc.com>
+Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode")
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reported-by: Dongliang Cui <cuidongliang390@gmail.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -148,7 +148,8 @@ static const struct file_operations even
+ .release = eventfs_release,
+ };
+
+-static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
++static void update_inode_attr(struct dentry *dentry, struct inode *inode,
++ struct eventfs_attr *attr, umode_t mode)
+ {
+ if (!attr) {
+ inode->i_mode = mode;
+@@ -162,9 +163,13 @@ static void update_inode_attr(struct ino
+
+ if (attr->mode & EVENTFS_SAVE_UID)
+ inode->i_uid = attr->uid;
++ else
++ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+
+ if (attr->mode & EVENTFS_SAVE_GID)
+ inode->i_gid = attr->gid;
++ else
++ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
+ }
+
+ /**
+@@ -206,7 +211,7 @@ static struct dentry *create_file(const
+ return eventfs_failed_creating(dentry);
+
+ /* If the user updated the directory's attributes, use them */
+- update_inode_attr(inode, attr, mode);
++ update_inode_attr(dentry, inode, attr, mode);
+
+ inode->i_op = &eventfs_file_inode_operations;
+ inode->i_fop = fop;
+@@ -242,7 +247,8 @@ static struct dentry *create_dir(struct
+ return eventfs_failed_creating(dentry);
+
+ /* If the user updated the directory's attributes, use them */
+- update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
++ update_inode_attr(dentry, inode, &ei->attr,
++ S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:32 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:43 -0500
+Subject: eventfs: Have eventfs_iterate() stop immediately if ei->is_freed is set
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.562520394@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit e109deadb73318cf4a3bd61287d969f705df278f upstream.
+
+If ei->is_freed is set in eventfs_iterate(), it means that the directory
+that is being iterated on is in the process of being freed. Just exit the
+loop immediately when that is ever detected, and separate out the return
+of the entry->callback() from ei->is_freed.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.016261289@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -788,11 +788,12 @@ static int eventfs_iterate(struct file *
+ name = entry->name;
+
+ mutex_lock(&eventfs_mutex);
+- /* If ei->is_freed, then the event itself may be too */
+- if (!ei->is_freed)
+- r = entry->callback(name, &mode, &cdata, &fops);
+- else
+- r = -1;
++ /* If ei->is_freed then just bail here, nothing more to do */
++ if (ei->is_freed) {
++ mutex_unlock(&eventfs_mutex);
++ goto out;
++ }
++ r = entry->callback(name, &mode, &cdata, &fops);
+ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:10 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:47 -0500
+Subject: eventfs: Have the inodes all for files and directories all be the same
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120953.213007763@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 53c41052ba3121761e6f62a813961164532a214f upstream.
+
+The dentries and inodes are created in the readdir for the sole purpose of
+getting a consistent inode number. Linus stated that is unnecessary, and
+that all inodes can have the same inode number. For a virtual file system
+they are pretty meaningless.
+
+Instead use a single unique inode number for all files and one for all
+directories.
+
+Link: https://lore.kernel.org/all/20240116133753.2808d45e@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.412180363@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -32,6 +32,10 @@
+ */
+ static DEFINE_MUTEX(eventfs_mutex);
+
++/* Choose something "unique" ;-) */
++#define EVENTFS_FILE_INODE_INO 0x12c4e37
++#define EVENTFS_DIR_INODE_INO 0x134b2f5
++
+ /*
+ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+ * its parent's list and will have is_freed set (under eventfs_mutex).
+@@ -352,6 +356,9 @@ static struct dentry *create_file(const
+ inode->i_fop = fop;
+ inode->i_private = data;
+
++ /* All files will have the same inode number */
++ inode->i_ino = EVENTFS_FILE_INODE_INO;
++
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+ d_instantiate(dentry, inode);
+@@ -388,6 +395,9 @@ static struct dentry *create_dir(struct
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
++ /* All directories will have the same inode number */
++ inode->i_ino = EVENTFS_DIR_INODE_INO;
++
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:57 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:25 -0500
+Subject: eventfs: Hold eventfs_mutex when calling callback functions
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>, Linux Kernel Functional Testing <lkft@linaro.org>, Naresh Kamboju <naresh.kamboju@linaro.org>
+Message-ID: <20240206120949.629213120@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 44365329f8219fc379097c2c9a75ff53f123764f upstream.
+
+The callback function that is used to create inodes and dentries is not
+protected by anything and the data that is passed to it could become
+stale. After eventfs_remove_dir() is called by the tracing system, it is
+free to remove the events that are associated to that directory.
+Unfortunately, that means the callbacks must not be called after that.
+
+ CPU0 CPU1
+ ---- ----
+ eventfs_root_lookup() {
+ eventfs_remove_dir() {
+ mutex_lock(&event_mutex);
+ ei->is_freed = set;
+ mutex_unlock(&event_mutex);
+ }
+ kfree(event_call);
+
+ for (...) {
+ entry = &ei->entries[i];
+ r = entry->callback() {
+ call = data; // call == event_call above
+ if (call->flags ...)
+
+ [ USE AFTER FREE BUG ]
+
+The safest way to protect this is to wrap the callback with:
+
+ mutex_lock(&eventfs_mutex);
+ if (!ei->is_freed)
+ r = entry->callback();
+ else
+ r = -1;
+ mutex_unlock(&eventfs_mutex);
+
+This will make sure that the callback will not be called after it is
+freed. But now it needs to be known that the callback is called while
+holding internal eventfs locks, and that it must not call back into the
+eventfs / tracefs system. There's no reason it should anyway, but document
+that as well.
+
+Link: https://lore.kernel.org/all/CA+G9fYu9GOEbD=rR5eMR-=HJ8H6rMsbzDC2ZY5=Y50WpWAE7_Q@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20231101172649.906696613@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 22 ++++++++++++++++++++--
+ include/linux/tracefs.h | 43 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 63 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -615,7 +615,13 @@ static struct dentry *eventfs_root_looku
+ entry = &ei->entries[i];
+ if (strcmp(name, entry->name) == 0) {
+ void *cdata = data;
+- r = entry->callback(name, &mode, &cdata, &fops);
++ mutex_lock(&eventfs_mutex);
++ /* If ei->is_freed, then the event itself may be too */
++ if (!ei->is_freed)
++ r = entry->callback(name, &mode, &cdata, &fops);
++ else
++ r = -1;
++ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+@@ -749,7 +755,13 @@ static int dcache_dir_open_wrapper(struc
+ void *cdata = data;
+ entry = &ei->entries[i];
+ name = entry->name;
+- r = entry->callback(name, &mode, &cdata, &fops);
++ mutex_lock(&eventfs_mutex);
++ /* If ei->is_freed, then the event itself may be too */
++ if (!ei->is_freed)
++ r = entry->callback(name, &mode, &cdata, &fops);
++ else
++ r = -1;
++ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;
+ d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
+@@ -819,6 +831,10 @@ static int dcache_readdir_wrapper(struct
+ * data = A pointer to @data, and the callback may replace it, which will
+ * cause the file created to pass the new data to the open() call.
+ * fops = the fops to use for the created file.
++ *
++ * NB. @callback is called while holding internal locks of the eventfs
++ * system. The callback must not call any code that might also call into
++ * the tracefs or eventfs system or it will risk creating a deadlock.
+ */
+ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+ const struct eventfs_entry *entries,
+@@ -878,6 +894,8 @@ struct eventfs_inode *eventfs_create_dir
+ * @data: The default data to pass to the files (an entry may override it).
+ *
+ * This function creates the top of the trace event directory.
++ *
++ * See eventfs_create_dir() for use of @entries.
+ */
+ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+ const struct eventfs_entry *entries,
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -23,9 +23,52 @@ struct file_operations;
+
+ struct eventfs_file;
+
++/**
++ * eventfs_callback - A callback function to create dynamic files in eventfs
++ * @name: The name of the file that is to be created
++ * @mode: return the file mode for the file (RW access, etc)
++ * @data: data to pass to the created file ops
++ * @fops: the file operations of the created file
++ *
++ * The evetnfs files are dynamically created. The struct eventfs_entry array
++ * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will
++ * be used to create the files within those directories. When a lookup
++ * or access to a file within the directory is made, the struct eventfs_entry
++ * array is used to find a callback() with the matching name that is being
++ * referenced (for lookups, the entire array is iterated and each callback
++ * will be called).
++ *
++ * The callback will be called with @name for the name of the file to create.
++ * The callback can return less than 1 to indicate that no file should be
++ * created.
++ *
++ * If a file is to be created, then @mode should be populated with the file
++ * mode (permissions) for which the file is created for. This would be
++ * used to set the created inode i_mode field.
++ *
++ * The @data should be set to the data passed to the other file operations
++ * (read, write, etc). Note, @data will also point to the data passed in
++ * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback
++ * can replace the data if it chooses to. Otherwise, the original data
++ * will be used for the file operation functions.
++ *
++ * The @fops should be set to the file operations that will be used to create
++ * the inode.
++ *
++ * NB. This callback is called while holding internal locks of the eventfs
++ * system. The callback must not call any code that might also call into
++ * the tracefs or eventfs system or it will risk creating a deadlock.
++ */
+ typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops);
+
++/**
++ * struct eventfs_entry - dynamically created eventfs file call back handler
++ * @name: Then name of the dynamic file in an eventfs directory
++ * @callback: The callback to get the fops of the file when it is created
++ *
++ * See evenfs_callback() typedef for how to set up @callback.
++ */
+ struct eventfs_entry {
+ const char *name;
+ eventfs_callback callback;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:11 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:53 -0500
+Subject: eventfs: Initialize the tracefs inode properly
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120954.203316263@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 4fa4b010b83fb2f837b5ef79e38072a79e96e4f1 upstream.
+
+The tracefs-specific fields in the inode were not initialized before the
+inode was exposed to others through the dentry with 'd_instantiate()'.
+
+Move the field initializations up to before the d_instantiate.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.478449628@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -370,6 +370,8 @@ static struct dentry *create_dir(struct
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
++ /* Only directories have ti->private set to an ei, not files */
++ ti->private = ei;
+
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+@@ -515,7 +517,6 @@ create_file_dentry(struct eventfs_inode
+ static void eventfs_post_create_dir(struct eventfs_inode *ei)
+ {
+ struct eventfs_inode *ei_child;
+- struct tracefs_inode *ti;
+
+ lockdep_assert_held(&eventfs_mutex);
+
+@@ -525,9 +526,6 @@ static void eventfs_post_create_dir(stru
+ srcu_read_lock_held(&eventfs_srcu)) {
+ ei_child->d_parent = ei->dentry;
+ }
+-
+- ti = get_tracefs(ei->dentry->d_inode);
+- ti->private = ei;
+ }
+
+ /**
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:22 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:02 -0500
+Subject: eventfs: Keep all directory links at 1
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120955.665411833@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit ca185770db914869ff9fe773bac5e0e5e4165b83 upstream.
+
+The directory link count in eventfs was somewhat bogus. It was only being
+updated when a directory child was being looked up and not on creation.
+
+One solution would be to update in get_attr() the link count by iterating
+the ei->children list and then adding 2. But that could slow down simple
+stat() calls, especially if it's done on all directories in eventfs.
+
+Another solution would be to add a parent pointer to the eventfs_inode
+and keep track of the number of sub directories it has on creation. But
+this adds overhead for something not really worthwhile.
+
+The solution decided upon is to keep all directory links in eventfs as 1.
+This tells user space not to rely on the hard links of directories. Which
+in this case it shouldn't.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.339968298@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -404,9 +404,7 @@ static struct dentry *lookup_dir_entry(s
+
+ dentry->d_fsdata = get_ei(ei);
+
+- inc_nlink(inode);
+ d_add(dentry, inode);
+- inc_nlink(dentry->d_parent->d_inode);
+ return NULL;
+ }
+
+@@ -769,9 +767,17 @@ struct eventfs_inode *eventfs_create_eve
+
+ dentry->d_fsdata = get_ei(ei);
+
+- /* directory inodes start off with i_nlink == 2 (for "." entry) */
+- inc_nlink(inode);
++ /*
++ * Keep all eventfs directories with i_nlink == 1.
++ * Due to the dynamic nature of the dentry creations and not
++ * wanting to add a pointer to the parent eventfs_inode in the
++ * eventfs_inode structure, keeping the i_nlink in sync with the
++ * number of directories would cause too much complexity for
++ * something not worth much. Keeping directory links at 1
++ * tells userspace not to trust the link number.
++ */
+ d_instantiate(dentry, inode);
++ /* The dentry of the "events" parent does keep track though */
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ tracefs_end_creating(dentry);
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:06 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:34 -0500
+Subject: eventfs: Make sure that parent->d_inode is locked in creating files/dirs
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120951.095364893@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit f49f950c217bfb40f11662bab39cb388d41e4cfb upstream.
+
+Since the locking of the parent->d_inode has been moved outside the
+creation of the files and directories (as it use to be locked via a
+conditional), add a WARN_ON_ONCE() to the case that it's not locked.
+
+Link: https://lkml.kernel.org/r/20231121231112.853962542@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -327,6 +327,8 @@ create_file_dentry(struct eventfs_inode
+ struct dentry **e_dentry = &ei->d_children[idx];
+ struct dentry *dentry;
+
++ WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
++
+ mutex_lock(&eventfs_mutex);
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+@@ -430,6 +432,8 @@ create_dir_dentry(struct eventfs_inode *
+ {
+ struct dentry *dentry = NULL;
+
++ WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
++
+ mutex_lock(&eventfs_mutex);
+ if (pei->is_freed || ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:32 -0500
+Subject: eventfs: Move taking of inode_lock into dcache_dir_open_wrapper()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.772179769@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit bcae32c5632fc0a0dbce46fa731cd23403117e66 upstream.
+
+The both create_file_dentry() and create_dir_dentry() takes a boolean
+parameter "lookup", as on lookup the inode_lock should already be taken,
+but for dcache_dir_open_wrapper() it is not taken.
+
+There's no reason that the dcache_dir_open_wrapper() can't take the
+inode_lock before calling these functions. In fact, it's better if it
+does, as the lock can be held throughout both directory and file
+creations.
+
+This also simplifies the code, and possibly prevents unexpected race
+conditions when the lock is released.
+
+Link: https://lkml.kernel.org/r/20231121231112.528544825@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 16 ++--------------
+ 1 file changed, 2 insertions(+), 14 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -347,15 +347,8 @@ create_file_dentry(struct eventfs_inode
+
+ mutex_unlock(&eventfs_mutex);
+
+- /* The lookup already has the parent->d_inode locked */
+- if (!lookup)
+- inode_lock(parent->d_inode);
+-
+ dentry = create_file(name, mode, attr, parent, data, fops);
+
+- if (!lookup)
+- inode_unlock(parent->d_inode);
+-
+ mutex_lock(&eventfs_mutex);
+
+ if (IS_ERR_OR_NULL(dentry)) {
+@@ -453,15 +446,8 @@ create_dir_dentry(struct eventfs_inode *
+ }
+ mutex_unlock(&eventfs_mutex);
+
+- /* The lookup already has the parent->d_inode locked */
+- if (!lookup)
+- inode_lock(parent->d_inode);
+-
+ dentry = create_dir(ei, parent);
+
+- if (!lookup)
+- inode_unlock(parent->d_inode);
+-
+ mutex_lock(&eventfs_mutex);
+
+ if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
+@@ -693,6 +679,7 @@ static int dcache_dir_open_wrapper(struc
+ return -ENOMEM;
+ }
+
++ inode_lock(parent->d_inode);
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ d = create_dir_dentry(ei, ei_child, parent, false);
+@@ -725,6 +712,7 @@ static int dcache_dir_open_wrapper(struc
+ cnt++;
+ }
+ }
++ inode_unlock(parent->d_inode);
+ srcu_read_unlock(&eventfs_srcu, idx);
+ ret = dcache_dir_open(inode, file);
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:56 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:45 -0500
+Subject: eventfs: Read ei->entries before ei->children in eventfs_iterate()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.892527913@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 704f960dbee2f1634f4b4e16f208cb16eaf41c1e upstream.
+
+In order to apply a shortcut to skip over the current ctx->pos
+immediately, by using the ei->entries array, the reading of that array
+should be first. Moving the array reading before the linked list reading
+will make the shortcut change diff nicer to read.
+
+Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.333115095@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -752,8 +752,8 @@ static int eventfs_iterate(struct file *
+ * Need to create the dentries and inodes to have a consistent
+ * inode number.
+ */
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- srcu_read_lock_held(&eventfs_srcu)) {
++ for (i = 0; i < ei->nr_entries; i++) {
++ void *cdata = ei->data;
+
+ if (c > 0) {
+ c--;
+@@ -762,23 +762,32 @@ static int eventfs_iterate(struct file *
+
+ ctx->pos++;
+
+- if (ei_child->is_freed)
+- continue;
++ entry = &ei->entries[i];
++ name = entry->name;
+
+- name = ei_child->name;
++ mutex_lock(&eventfs_mutex);
++ /* If ei->is_freed then just bail here, nothing more to do */
++ if (ei->is_freed) {
++ mutex_unlock(&eventfs_mutex);
++ goto out_dec;
++ }
++ r = entry->callback(name, &mode, &cdata, &fops);
++ mutex_unlock(&eventfs_mutex);
++ if (r <= 0)
++ continue;
+
+- dentry = create_dir_dentry(ei, ei_child, ei_dentry);
++ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+ if (!dentry)
+ goto out_dec;
+ ino = dentry->d_inode->i_ino;
+ dput(dentry);
+
+- if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
++ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+ goto out_dec;
+ }
+
+- for (i = 0; i < ei->nr_entries; i++) {
+- void *cdata = ei->data;
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
++ srcu_read_lock_held(&eventfs_srcu)) {
+
+ if (c > 0) {
+ c--;
+@@ -787,27 +796,18 @@ static int eventfs_iterate(struct file *
+
+ ctx->pos++;
+
+- entry = &ei->entries[i];
+- name = entry->name;
+-
+- mutex_lock(&eventfs_mutex);
+- /* If ei->is_freed then just bail here, nothing more to do */
+- if (ei->is_freed) {
+- mutex_unlock(&eventfs_mutex);
+- goto out_dec;
+- }
+- r = entry->callback(name, &mode, &cdata, &fops);
+- mutex_unlock(&eventfs_mutex);
+- if (r <= 0)
++ if (ei_child->is_freed)
+ continue;
+
+- dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++ name = ei_child->name;
++
++ dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+ if (!dentry)
+ goto out_dec;
+ ino = dentry->d_inode->i_ino;
+ dput(dentry);
+
+- if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
++ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+ goto out_dec;
+ }
+ ret = 1;
--- /dev/null
+From stable+bounces-18939-greg=kroah.com@vger.kernel.org Tue Feb 6 13:14:41 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:12 -0500
+Subject: eventfs: Remove eventfs_file and just use eventfs_inode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120947.516739239@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 5790b1fb3d672d9a1fe3881a7181dfdbe741568f upstream.
+
+Instead of having a descriptor for every file represented in the eventfs
+directory, only have the directory itself represented. Change the API to
+send in a list of entries that represent all the files in the directory
+(but not other directories). The entry list contains a name and a callback
+function that will be used to create the files when they are accessed.
+
+struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);
+
+is used for the top level eventfs directory, and returns an eventfs_inode
+that will be used by:
+
+struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);
+
+where both of the above take an array of struct eventfs_entry entries for
+every file that is in the directory.
+
+The entries are defined by:
+
+typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops);
+
+struct eventfs_entry {
+ const char *name;
+ eventfs_callback callback;
+};
+
+Where the name is the name of the file and the callback gets called when
+the file is being created. The callback passes in the name (in case the
+same callback is used for multiple files), a pointer to the mode, data and
+fops. The data will be pointing to the data that was passed in
+eventfs_create_dir() or eventfs_create_events_dir() but may be overridden
+to point to something else, as it will be used to point to the
+inode->i_private that is created. The information passed back from the
+callback is used to create the dentry/inode.
+
+If the callback fills the data and the file should be created, it must
+return a positive number. On zero or negative, the file is ignored.
+
+This logic may also be used as a prototype to convert entire pseudo file
+systems into just-in-time allocation.
+
+The "show_events_dentry" file has been updated to show the directories,
+and any files they have.
+
+With just the eventfs_file allocations:
+
+ Before after deltas for meminfo (in kB):
+
+ MemFree: -14360
+ MemAvailable: -14260
+ Buffers: 40
+ Cached: 24
+ Active: 44
+ Inactive: 48
+ Inactive(anon): 28
+ Active(file): 44
+ Inactive(file): 20
+ Dirty: -4
+ AnonPages: 28
+ Mapped: 4
+ KReclaimable: 132
+ Slab: 1604
+ SReclaimable: 132
+ SUnreclaim: 1472
+ Committed_AS: 12
+
+ Before after deltas for slabinfo:
+
+ <slab>: <objects> [ * <size> = <total>]
+
+ ext4_inode_cache 27 [* 1184 = 31968 ]
+ extent_status 102 [* 40 = 4080 ]
+ tracefs_inode_cache 144 [* 656 = 94464 ]
+ buffer_head 39 [* 104 = 4056 ]
+ shmem_inode_cache 49 [* 800 = 39200 ]
+ filp -53 [* 256 = -13568 ]
+ dentry 251 [* 192 = 48192 ]
+ lsm_file_cache 277 [* 32 = 8864 ]
+ vm_area_struct -14 [* 184 = -2576 ]
+ trace_event_file 1748 [* 88 = 153824 ]
+ kmalloc-1k 35 [* 1024 = 35840 ]
+ kmalloc-256 49 [* 256 = 12544 ]
+ kmalloc-192 -28 [* 192 = -5376 ]
+ kmalloc-128 -30 [* 128 = -3840 ]
+ kmalloc-96 10581 [* 96 = 1015776 ]
+ kmalloc-64 3056 [* 64 = 195584 ]
+ kmalloc-32 1291 [* 32 = 41312 ]
+ kmalloc-16 2310 [* 16 = 36960 ]
+ kmalloc-8 9216 [* 8 = 73728 ]
+
+ Free memory dropped by 14,360 kB
+ Available memory dropped by 14,260 kB
+ Total slab additions in size: 1,771,032 bytes
+
+With this change:
+
+ Before after deltas for meminfo (in kB):
+
+ MemFree: -12084
+ MemAvailable: -11976
+ Buffers: 32
+ Cached: 32
+ Active: 72
+ Inactive: 168
+ Inactive(anon): 176
+ Active(file): 72
+ Inactive(file): -8
+ Dirty: 24
+ AnonPages: 196
+ Mapped: 8
+ KReclaimable: 148
+ Slab: 836
+ SReclaimable: 148
+ SUnreclaim: 688
+ Committed_AS: 324
+
+ Before after deltas for slabinfo:
+
+ <slab>: <objects> [ * <size> = <total>]
+
+ tracefs_inode_cache 144 [* 656 = 94464 ]
+ shmem_inode_cache -23 [* 800 = -18400 ]
+ filp -92 [* 256 = -23552 ]
+ dentry 179 [* 192 = 34368 ]
+ lsm_file_cache -3 [* 32 = -96 ]
+ vm_area_struct -13 [* 184 = -2392 ]
+ trace_event_file 1748 [* 88 = 153824 ]
+ kmalloc-1k -49 [* 1024 = -50176 ]
+ kmalloc-256 -27 [* 256 = -6912 ]
+ kmalloc-128 1864 [* 128 = 238592 ]
+ kmalloc-64 4685 [* 64 = 299840 ]
+ kmalloc-32 -72 [* 32 = -2304 ]
+ kmalloc-16 256 [* 16 = 4096 ]
+ total = 721352
+
+ Free memory dropped by 12,084 kB
+ Available memory dropped by 11,976 kB
+ Total slab additions in size: 721,352 bytes
+
+That's over 2 MB in savings per instance for free and available memory,
+and over 1 MB in savings per instance of slab memory.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231003184059.4924468e@gandalf.local.home
+Link: https://lore.kernel.org/linux-trace-kernel/20231004165007.43d79161@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 853 +++++++++++++++++++++----------------------
+ fs/tracefs/inode.c | 2
+ fs/tracefs/internal.h | 37 +
+ include/linux/trace_events.h | 2
+ include/linux/tracefs.h | 29 -
+ kernel/trace/trace.c | 7
+ kernel/trace/trace.h | 4
+ kernel/trace/trace_events.c | 313 ++++++++++-----
+ 8 files changed, 708 insertions(+), 539 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -2,8 +2,9 @@
+ /*
+ * event_inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+- * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) <rostedt@goodmis.org>
++ * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
+ * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
++ * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
+ *
+ * eventfs is used to dynamically create inodes and dentries based on the
+ * meta data provided by the tracing system.
+@@ -23,46 +24,6 @@
+ #include <linux/delay.h>
+ #include "internal.h"
+
+-struct eventfs_inode {
+- struct list_head e_top_files;
+-};
+-
+-/*
+- * struct eventfs_file - hold the properties of the eventfs files and
+- * directories.
+- * @name: the name of the file or directory to create
+- * @d_parent: holds parent's dentry
+- * @dentry: once accessed holds dentry
+- * @list: file or directory to be added to parent directory
+- * @ei: list of files and directories within directory
+- * @fop: file_operations for file or directory
+- * @iop: inode_operations for file or directory
+- * @data: something that the caller will want to get to later on
+- * @mode: the permission that the file or directory should have
+- */
+-struct eventfs_file {
+- const char *name;
+- struct dentry *d_parent;
+- struct dentry *dentry;
+- struct list_head list;
+- struct eventfs_inode *ei;
+- const struct file_operations *fop;
+- const struct inode_operations *iop;
+- /*
+- * Union - used for deletion
+- * @del_list: list of eventfs_file to delete
+- * @rcu: eventfs_file to delete in RCU
+- * @is_freed: node is freed if one of the above is set
+- */
+- union {
+- struct list_head del_list;
+- struct rcu_head rcu;
+- unsigned long is_freed;
+- };
+- void *data;
+- umode_t mode;
+-};
+-
+ static DEFINE_MUTEX(eventfs_mutex);
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+
+@@ -93,16 +54,9 @@ static const struct file_operations even
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+- * This is the basic "create a file" function for tracefs. It allows for a
+- * wide range of flexibility in creating a file.
+- *
+- * This function will return a pointer to a dentry if it succeeds. This
+- * pointer must be passed to the tracefs_remove() function when the file is
+- * to be removed (no automatic cleanup happens if your module is unloaded,
+- * you are responsible here.) If an error occurs, %NULL will be returned.
+- *
+- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+- * returned.
++ * This function creates a dentry that represents a file in the eventsfs_inode
++ * directory. The inode.i_private pointer will point to @data in the open()
++ * call.
+ */
+ static struct dentry *create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+@@ -118,6 +72,7 @@ static struct dentry *create_file(const
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return NULL;
+
++ WARN_ON_ONCE(!parent);
+ dentry = eventfs_start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+@@ -142,20 +97,11 @@ static struct dentry *create_file(const
+ * create_dir - create a dir in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @parent: parent dentry for this file.
+- * @data: something that the caller will want to get to later on.
+- *
+- * This is the basic "create a dir" function for eventfs. It allows for a
+- * wide range of flexibility in creating a dir.
+- *
+- * This function will return a pointer to a dentry if it succeeds. This
+- * pointer must be passed to the tracefs_remove() function when the file is
+- * to be removed (no automatic cleanup happens if your module is unloaded,
+- * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+- * returned.
++ * This function will create a dentry for a directory represented by
++ * a eventfs_inode.
+ */
+-static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
++static struct dentry *create_dir(const char *name, struct dentry *parent)
+ {
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+@@ -172,7 +118,6 @@ static struct dentry *create_dir(const c
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+- inode->i_private = data;
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+@@ -185,18 +130,18 @@ static struct dentry *create_dir(const c
+ }
+
+ /**
+- * eventfs_set_ef_status_free - set the ef->status to free
++ * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+ * @ti: the tracefs_inode of the dentry
+- * @dentry: dentry who's status to be freed
++ * @dentry: dentry which has the reference to remove.
+ *
+- * eventfs_set_ef_status_free will be called if no more
+- * references remain
++ * Remove the association between a dentry from an eventfs_inode.
+ */
+-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
++void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+ struct tracefs_inode *ti_parent;
++ struct eventfs_inode *ei_child, *tmp;
+ struct eventfs_inode *ei;
+- struct eventfs_file *ef, *tmp;
++ int i;
+
+ /* The top level events directory may be freed by this */
+ if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+@@ -207,9 +152,9 @@ void eventfs_set_ef_status_free(struct t
+ ei = ti->private;
+
+ /* Record all the top level files */
+- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ lockdep_is_held(&eventfs_mutex)) {
+- list_add_tail(&ef->del_list, &ef_del_list);
++ list_add_tail(&ei_child->del_list, &ef_del_list);
+ }
+
+ /* Nothing should access this, but just in case! */
+@@ -218,11 +163,13 @@ void eventfs_set_ef_status_free(struct t
+ mutex_unlock(&eventfs_mutex);
+
+ /* Now safely free the top level files and their children */
+- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+- list_del(&ef->del_list);
+- eventfs_remove(ef);
++ list_for_each_entry_safe(ei_child, tmp, &ef_del_list, del_list) {
++ list_del(&ei_child->del_list);
++ eventfs_remove_dir(ei_child);
+ }
+
++ kfree_const(ei->name);
++ kfree(ei->d_children);
+ kfree(ei);
+ return;
+ }
+@@ -233,68 +180,162 @@ void eventfs_set_ef_status_free(struct t
+ if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+ goto out;
+
+- ef = dentry->d_fsdata;
+- if (!ef)
++ ei = dentry->d_fsdata;
++ if (!ei)
+ goto out;
+
+ /*
+- * If ef was freed, then the LSB bit is set for d_fsdata.
++ * If ei was freed, then the LSB bit is set for d_fsdata.
+ * But this should not happen, as it should still have a
+ * ref count that prevents it. Warn in case it does.
+ */
+- if (WARN_ON_ONCE((unsigned long)ef & 1))
++ if (WARN_ON_ONCE((unsigned long)ei & 1))
+ goto out;
+
++ /* This could belong to one of the files of the ei */
++ if (ei->dentry != dentry) {
++ for (i = 0; i < ei->nr_entries; i++) {
++ if (ei->d_children[i] == dentry)
++ break;
++ }
++ if (WARN_ON_ONCE(i == ei->nr_entries))
++ goto out;
++ ei->d_children[i] = NULL;
++ } else {
++ ei->dentry = NULL;
++ }
++
+ dentry->d_fsdata = NULL;
+- ef->dentry = NULL;
+-out:
++ out:
+ mutex_unlock(&eventfs_mutex);
+ }
+
+ /**
++ * create_file_dentry - create a dentry for a file of an eventfs_inode
++ * @ei: the eventfs_inode that the file will be created under
++ * @e_dentry: a pointer to the d_children[] of the @ei
++ * @parent: The parent dentry of the created file.
++ * @name: The name of the file to create
++ * @mode: The mode of the file.
++ * @data: The data to use to set the inode of the file with on open()
++ * @fops: The fops of the file to be created.
++ * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
++ *
++ * Create a dentry for a file of an eventfs_inode @ei and place it into the
++ * address located at @e_dentry. If the @e_dentry already has a dentry, then
++ * just do a dget() on it and return. Otherwise create the dentry and attach it.
++ */
++static struct dentry *
++create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
++ struct dentry *parent, const char *name, umode_t mode, void *data,
++ const struct file_operations *fops, bool lookup)
++{
++ struct dentry *dentry;
++ bool invalidate = false;
++
++ mutex_lock(&eventfs_mutex);
++ /* If the e_dentry already has a dentry, use it */
++ if (*e_dentry) {
++ /* lookup does not need to up the ref count */
++ if (!lookup)
++ dget(*e_dentry);
++ mutex_unlock(&eventfs_mutex);
++ return *e_dentry;
++ }
++ mutex_unlock(&eventfs_mutex);
++
++ /* The lookup already has the parent->d_inode locked */
++ if (!lookup)
++ inode_lock(parent->d_inode);
++
++ dentry = create_file(name, mode, parent, data, fops);
++
++ if (!lookup)
++ inode_unlock(parent->d_inode);
++
++ mutex_lock(&eventfs_mutex);
++
++ if (IS_ERR_OR_NULL(dentry)) {
++ /*
++ * When the mutex was released, something else could have
++ * created the dentry for this e_dentry. In which case
++ * use that one.
++ *
++ * Note, with the mutex held, the e_dentry cannot have content
++ * and the ei->is_freed be true at the same time.
++ */
++ WARN_ON_ONCE(ei->is_freed);
++ dentry = *e_dentry;
++ /* The lookup does not need to up the dentry refcount */
++ if (dentry && !lookup)
++ dget(dentry);
++ mutex_unlock(&eventfs_mutex);
++ return dentry;
++ }
++
++ if (!*e_dentry && !ei->is_freed) {
++ *e_dentry = dentry;
++ dentry->d_fsdata = ei;
++ } else {
++ /*
++ * Should never happen unless we get here due to being freed.
++ * Otherwise it means two dentries exist with the same name.
++ */
++ WARN_ON_ONCE(!ei->is_freed);
++ invalidate = true;
++ }
++ mutex_unlock(&eventfs_mutex);
++
++ if (invalidate)
++ d_invalidate(dentry);
++
++ if (lookup || invalidate)
++ dput(dentry);
++
++ return invalidate ? NULL : dentry;
++}
++
++/**
+ * eventfs_post_create_dir - post create dir routine
+- * @ef: eventfs_file of recently created dir
++ * @ei: eventfs_inode of recently created dir
+ *
+ * Map the meta-data of files within an eventfs dir to their parent dentry
+ */
+-static void eventfs_post_create_dir(struct eventfs_file *ef)
++static void eventfs_post_create_dir(struct eventfs_inode *ei)
+ {
+- struct eventfs_file *ef_child;
++ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+
+ /* srcu lock already held */
+ /* fill parent-child relation */
+- list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- ef_child->d_parent = ef->dentry;
++ ei_child->d_parent = ei->dentry;
+ }
+
+- ti = get_tracefs(ef->dentry->d_inode);
+- ti->private = ef->ei;
++ ti = get_tracefs(ei->dentry->d_inode);
++ ti->private = ei;
+ }
+
+ /**
+- * create_dentry - helper function to create dentry
+- * @ef: eventfs_file of file or directory to create
+- * @parent: parent dentry
+- * @lookup: true if called from lookup routine
++ * create_dir_dentry - Create a directory dentry for the eventfs_inode
++ * @ei: The eventfs_inode to create the directory for
++ * @parent: The dentry of the parent of this directory
++ * @lookup: True if this is called by the lookup code
+ *
+- * Used to create a dentry for file/dir, executes post dentry creation routine
++ * This creates and attaches a directory dentry to the eventfs_inode @ei.
+ */
+ static struct dentry *
+-create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
++create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool lookup)
+ {
+ bool invalidate = false;
+- struct dentry *dentry;
++ struct dentry *dentry = NULL;
+
+ mutex_lock(&eventfs_mutex);
+- if (ef->is_freed) {
+- mutex_unlock(&eventfs_mutex);
+- return NULL;
+- }
+- if (ef->dentry) {
+- dentry = ef->dentry;
+- /* On dir open, up the ref count */
++ if (ei->dentry) {
++ /* If the dentry already has a dentry, use it */
++ dentry = ei->dentry;
++ /* lookup does not need to up the ref count */
+ if (!lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+@@ -302,42 +343,44 @@ create_dentry(struct eventfs_file *ef, s
+ }
+ mutex_unlock(&eventfs_mutex);
+
++ /* The lookup already has the parent->d_inode locked */
+ if (!lookup)
+ inode_lock(parent->d_inode);
+
+- if (ef->ei)
+- dentry = create_dir(ef->name, parent, ef->data);
+- else
+- dentry = create_file(ef->name, ef->mode, parent,
+- ef->data, ef->fop);
++ dentry = create_dir(ei->name, parent);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+
+ mutex_lock(&eventfs_mutex);
+- if (IS_ERR_OR_NULL(dentry)) {
+- /* If the ef was already updated get it */
+- dentry = ef->dentry;
++
++ if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
++ /*
++ * When the mutex was released, something else could have
++ * created the dentry for this e_dentry. In which case
++ * use that one.
++ *
++ * Note, with the mutex held, the e_dentry cannot have content
++ * and the ei->is_freed be true at the same time.
++ */
++ dentry = ei->dentry;
+ if (dentry && !lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+
+- if (!ef->dentry && !ef->is_freed) {
+- ef->dentry = dentry;
+- if (ef->ei)
+- eventfs_post_create_dir(ef);
+- dentry->d_fsdata = ef;
++ if (!ei->dentry && !ei->is_freed) {
++ ei->dentry = dentry;
++ eventfs_post_create_dir(ei);
++ dentry->d_fsdata = ei;
+ } else {
+- /* A race here, should try again (unless freed) */
+- invalidate = true;
+-
+ /*
+ * Should never happen unless we get here due to being freed.
+ * Otherwise it means two dentries exist with the same name.
+ */
+- WARN_ON_ONCE(!ef->is_freed);
++ WARN_ON_ONCE(!ei->is_freed);
++ invalidate = true;
+ }
+ mutex_unlock(&eventfs_mutex);
+ if (invalidate)
+@@ -349,50 +392,85 @@ create_dentry(struct eventfs_file *ef, s
+ return invalidate ? NULL : dentry;
+ }
+
+-static bool match_event_file(struct eventfs_file *ef, const char *name)
+-{
+- bool ret;
+-
+- mutex_lock(&eventfs_mutex);
+- ret = !ef->is_freed && strcmp(ef->name, name) == 0;
+- mutex_unlock(&eventfs_mutex);
+-
+- return ret;
+-}
+-
+ /**
+ * eventfs_root_lookup - lookup routine to create file/dir
+ * @dir: in which a lookup is being done
+ * @dentry: file/dir dentry
+- * @flags: to pass as flags parameter to simple lookup
++ * @flags: Just passed to simple_lookup()
+ *
+- * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
+- * list of meta data to find the information needed to create the file/dir.
++ * Used to create dynamic file/dir with-in @dir, search with-in @ei
++ * list, if @dentry found go ahead and create the file/dir
+ */
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+ {
++ const struct file_operations *fops;
++ const struct eventfs_entry *entry;
++ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+- struct eventfs_file *ef;
++ struct dentry *ei_dentry = NULL;
+ struct dentry *ret = NULL;
++ const char *name = dentry->d_name.name;
++ bool created = false;
++ umode_t mode;
++ void *data;
+ int idx;
++ int i;
++ int r;
+
+ ti = get_tracefs(dir);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return NULL;
+
+- ei = ti->private;
++ /* Grab srcu to prevent the ei from going away */
+ idx = srcu_read_lock(&eventfs_srcu);
+- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++
++ /*
++ * Grab the eventfs_mutex to consistent value from ti->private.
++ * This s
++ */
++ mutex_lock(&eventfs_mutex);
++ ei = READ_ONCE(ti->private);
++ if (ei)
++ ei_dentry = READ_ONCE(ei->dentry);
++ mutex_unlock(&eventfs_mutex);
++
++ if (!ei || !ei_dentry)
++ goto out;
++
++ data = ei->data;
++
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- if (!match_event_file(ef, dentry->d_name.name))
++ if (strcmp(ei_child->name, name) != 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+- create_dentry(ef, ef->d_parent, true);
++ create_dir_dentry(ei_child, ei_dentry, true);
++ created = true;
+ break;
+ }
++
++ if (created)
++ goto out;
++
++ for (i = 0; i < ei->nr_entries; i++) {
++ entry = &ei->entries[i];
++ if (strcmp(name, entry->name) == 0) {
++ void *cdata = data;
++ r = entry->callback(name, &mode, &cdata, &fops);
++ if (r <= 0)
++ continue;
++ ret = simple_lookup(dir, dentry, flags);
++ create_file_dentry(ei, &ei->d_children[i],
++ ei_dentry, name, mode, cdata,
++ fops, true);
++ break;
++ }
++ }
++ out:
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return ret;
+ }
+@@ -432,29 +510,48 @@ static int eventfs_release(struct inode
+ return dcache_dir_close(inode, file);
+ }
+
++static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
++{
++ struct dentry **tmp;
++
++ tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
++ if (!tmp)
++ return -1;
++ tmp[cnt] = d;
++ tmp[cnt + 1] = NULL;
++ *dentries = tmp;
++ return 0;
++}
++
+ /**
+ * dcache_dir_open_wrapper - eventfs open wrapper
+ * @inode: not used
+- * @file: dir to be opened (to create its child)
++ * @file: dir to be opened (to create it's children)
+ *
+- * Used to dynamically create the file/dir within @file. @file is really a
+- * directory and all the files/dirs of the children within @file will be
+- * created. If any of the files/dirs have already been created, their
+- * reference count will be incremented.
++ * Used to dynamic create file/dir with-in @file, all the
++ * file/dir will be created. If already created then references
++ * will be increased
+ */
+ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
+ {
++ const struct file_operations *fops;
++ const struct eventfs_entry *entry;
++ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+- struct eventfs_file *ef;
+ struct dentry_list *dlist;
+ struct dentry **dentries = NULL;
+- struct dentry *dentry = file_dentry(file);
++ struct dentry *parent = file_dentry(file);
+ struct dentry *d;
+ struct inode *f_inode = file_inode(file);
++ const char *name = parent->d_name.name;
++ umode_t mode;
++ void *data;
+ int cnt = 0;
+ int idx;
+ int ret;
++ int i;
++ int r;
+
+ ti = get_tracefs(f_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+@@ -463,25 +560,51 @@ static int dcache_dir_open_wrapper(struc
+ if (WARN_ON_ONCE(file->private_data))
+ return -EINVAL;
+
++ idx = srcu_read_lock(&eventfs_srcu);
++
++ mutex_lock(&eventfs_mutex);
++ ei = READ_ONCE(ti->private);
++ mutex_unlock(&eventfs_mutex);
++
++ if (!ei) {
++ srcu_read_unlock(&eventfs_srcu, idx);
++ return -EINVAL;
++ }
++
++
++ data = ei->data;
++
+ dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
+- if (!dlist)
++ if (!dlist) {
++ srcu_read_unlock(&eventfs_srcu, idx);
+ return -ENOMEM;
++ }
+
+- ei = ti->private;
+- idx = srcu_read_lock(&eventfs_srcu);
+- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- d = create_dentry(ef, dentry, false);
++ d = create_dir_dentry(ei_child, parent, false);
+ if (d) {
+- struct dentry **tmp;
++ ret = add_dentries(&dentries, d, cnt);
++ if (ret < 0)
++ break;
++ cnt++;
++ }
++ }
+
+- tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+- if (!tmp)
++ for (i = 0; i < ei->nr_entries; i++) {
++ void *cdata = data;
++ entry = &ei->entries[i];
++ name = entry->name;
++ r = entry->callback(name, &mode, &cdata, &fops);
++ if (r <= 0)
++ continue;
++ d = create_file_dentry(ei, &ei->d_children[i],
++ parent, name, mode, cdata, fops, false);
++ if (d) {
++ ret = add_dentries(&dentries, d, cnt);
++ if (ret < 0)
+ break;
+- tmp[cnt] = d;
+- tmp[cnt + 1] = NULL;
+ cnt++;
+- dentries = tmp;
+ }
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+@@ -514,63 +637,90 @@ static int dcache_readdir_wrapper(struct
+ }
+
+ /**
+- * eventfs_prepare_ef - helper function to prepare eventfs_file
+- * @name: the name of the file/directory to create.
+- * @mode: the permission that the file should have.
+- * @fop: struct file_operations that should be used for this file/directory.
+- * @iop: struct inode_operations that should be used for this file/directory.
+- * @data: something that the caller will want to get to later on. The
+- * inode.i_private pointer will point to this value on the open() call.
+- *
+- * This function allocates and fills the eventfs_file structure.
+- */
+-static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode,
+- const struct file_operations *fop,
+- const struct inode_operations *iop,
+- void *data)
++ * eventfs_create_dir - Create the eventfs_inode for this directory
++ * @name: The name of the directory to create.
++ * @parent: The eventfs_inode of the parent directory.
++ * @entries: A list of entries that represent the files under this directory
++ * @size: The number of @entries
++ * @data: The default data to pass to the files (an entry may override it).
++ *
++ * This function creates the descriptor to represent a directory in the
++ * eventfs. This descriptor is an eventfs_inode, and it is returned to be
++ * used to create other children underneath.
++ *
++ * The @entries is an array of eventfs_entry structures which has:
++ * const char *name
++ * eventfs_callback callback;
++ *
++ * The name is the name of the file, and the callback is a pointer to a function
++ * that will be called when the file is reference (either by lookup or by
++ * reading a directory). The callback is of the prototype:
++ *
++ * int callback(const char *name, umode_t *mode, void **data,
++ * const struct file_operations **fops);
++ *
++ * When a file needs to be created, this callback will be called with
++ * name = the name of the file being created (so that the same callback
++ * may be used for multiple files).
++ * mode = a place to set the file's mode
++ * data = A pointer to @data, and the callback may replace it, which will
++ * cause the file created to pass the new data to the open() call.
++ * fops = the fops to use for the created file.
++ */
++struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
++ const struct eventfs_entry *entries,
++ int size, void *data)
+ {
+- struct eventfs_file *ef;
++ struct eventfs_inode *ei;
++
++ if (!parent)
++ return ERR_PTR(-EINVAL);
+
+- ef = kzalloc(sizeof(*ef), GFP_KERNEL);
+- if (!ef)
++ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++ if (!ei)
+ return ERR_PTR(-ENOMEM);
+
+- ef->name = kstrdup(name, GFP_KERNEL);
+- if (!ef->name) {
+- kfree(ef);
++ ei->name = kstrdup_const(name, GFP_KERNEL);
++ if (!ei->name) {
++ kfree(ei);
+ return ERR_PTR(-ENOMEM);
+ }
+
+- if (S_ISDIR(mode)) {
+- ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL);
+- if (!ef->ei) {
+- kfree(ef->name);
+- kfree(ef);
++ if (size) {
++ ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++ if (!ei->d_children) {
++ kfree_const(ei->name);
++ kfree(ei);
+ return ERR_PTR(-ENOMEM);
+ }
+- INIT_LIST_HEAD(&ef->ei->e_top_files);
+- } else {
+- ef->ei = NULL;
+ }
+
+- ef->iop = iop;
+- ef->fop = fop;
+- ef->mode = mode;
+- ef->data = data;
+- return ef;
++ ei->entries = entries;
++ ei->nr_entries = size;
++ ei->data = data;
++ INIT_LIST_HEAD(&ei->children);
++
++ mutex_lock(&eventfs_mutex);
++ list_add_tail(&ei->list, &parent->children);
++ ei->d_parent = parent->dentry;
++ mutex_unlock(&eventfs_mutex);
++
++ return ei;
+ }
+
+ /**
+- * eventfs_create_events_dir - create the trace event structure
+- * @name: the name of the directory to create.
+- * @parent: parent dentry for this file. This should be a directory dentry
+- * if set. If this parameter is NULL, then the directory will be
+- * created in the root of the tracefs filesystem.
++ * eventfs_create_events_dir - create the top level events directory
++ * @name: The name of the top level directory to create.
++ * @parent: Parent dentry for this file in the tracefs directory.
++ * @entries: A list of entries that represent the files under this directory
++ * @size: The number of @entries
++ * @data: The default data to pass to the files (an entry may override it).
+ *
+ * This function creates the top of the trace event directory.
+ */
+-struct dentry *eventfs_create_events_dir(const char *name,
+- struct dentry *parent)
++struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
++ const struct eventfs_entry *entries,
++ int size, void *data)
+ {
+ struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct eventfs_inode *ei;
+@@ -581,19 +731,32 @@ struct dentry *eventfs_create_events_dir
+ return NULL;
+
+ if (IS_ERR(dentry))
+- return dentry;
++ return (struct eventfs_inode *)dentry;
+
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei)
+- return ERR_PTR(-ENOMEM);
++ goto fail;
++
+ inode = tracefs_get_inode(dentry->d_sb);
+- if (unlikely(!inode)) {
+- kfree(ei);
+- tracefs_failed_creating(dentry);
+- return ERR_PTR(-ENOMEM);
+- }
++ if (unlikely(!inode))
++ goto fail;
++
++ if (size) {
++ ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++ if (!ei->d_children)
++ goto fail;
++ }
++
++ ei->dentry = dentry;
++ ei->entries = entries;
++ ei->nr_entries = size;
++ ei->data = data;
++ ei->name = kstrdup_const(name, GFP_KERNEL);
++ if (!ei->name)
++ goto fail;
+
+- INIT_LIST_HEAD(&ei->e_top_files);
++ INIT_LIST_HEAD(&ei->children);
++ INIT_LIST_HEAD(&ei->list);
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
+@@ -608,193 +771,41 @@ struct dentry *eventfs_create_events_dir
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+- return tracefs_end_creating(dentry);
+-}
++ tracefs_end_creating(dentry);
+
+-/**
+- * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
+- * @name: the name of the file to create.
+- * @parent: parent dentry for this dir.
+- *
+- * This function adds eventfs subsystem dir to list.
+- * And all these dirs are created on the fly when they are looked up,
+- * and the dentry and inodes will be removed when they are done.
+- */
+-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+- struct dentry *parent)
+-{
+- struct tracefs_inode *ti_parent;
+- struct eventfs_inode *ei_parent;
+- struct eventfs_file *ef;
++ /* Will call dput when the directory is removed */
++ dget(dentry);
+
+- if (security_locked_down(LOCKDOWN_TRACEFS))
+- return NULL;
+-
+- if (!parent)
+- return ERR_PTR(-EINVAL);
++ return ei;
+
+- ti_parent = get_tracefs(parent->d_inode);
+- ei_parent = ti_parent->private;
+-
+- ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+- if (IS_ERR(ef))
+- return ef;
+-
+- mutex_lock(&eventfs_mutex);
+- list_add_tail(&ef->list, &ei_parent->e_top_files);
+- ef->d_parent = parent;
+- mutex_unlock(&eventfs_mutex);
+- return ef;
++ fail:
++ kfree(ei->d_children);
++ kfree(ei);
++ tracefs_failed_creating(dentry);
++ return ERR_PTR(-ENOMEM);
+ }
+
+-/**
+- * eventfs_add_dir - add eventfs dir to list to create later
+- * @name: the name of the file to create.
+- * @ef_parent: parent eventfs_file for this dir.
+- *
+- * This function adds eventfs dir to list.
+- * And all these dirs are created on the fly when they are looked up,
+- * and the dentry and inodes will be removed when they are done.
+- */
+-struct eventfs_file *eventfs_add_dir(const char *name,
+- struct eventfs_file *ef_parent)
++static void free_ei(struct rcu_head *head)
+ {
+- struct eventfs_file *ef;
++ struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+
+- if (security_locked_down(LOCKDOWN_TRACEFS))
+- return NULL;
+-
+- if (!ef_parent)
+- return ERR_PTR(-EINVAL);
+-
+- ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+- if (IS_ERR(ef))
+- return ef;
+-
+- mutex_lock(&eventfs_mutex);
+- list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+- ef->d_parent = ef_parent->dentry;
+- mutex_unlock(&eventfs_mutex);
+- return ef;
+-}
+-
+-/**
+- * eventfs_add_events_file - add the data needed to create a file for later reference
+- * @name: the name of the file to create.
+- * @mode: the permission that the file should have.
+- * @parent: parent dentry for this file.
+- * @data: something that the caller will want to get to later on.
+- * @fop: struct file_operations that should be used for this file.
+- *
+- * This function is used to add the information needed to create a
+- * dentry/inode within the top level events directory. The file created
+- * will have the @mode permissions. The @data will be used to fill the
+- * inode.i_private when the open() call is done. The dentry and inodes are
+- * all created when they are referenced, and removed when they are no
+- * longer referenced.
+- */
+-int eventfs_add_events_file(const char *name, umode_t mode,
+- struct dentry *parent, void *data,
+- const struct file_operations *fop)
+-{
+- struct tracefs_inode *ti;
+- struct eventfs_inode *ei;
+- struct eventfs_file *ef;
+-
+- if (security_locked_down(LOCKDOWN_TRACEFS))
+- return -ENODEV;
+-
+- if (!parent)
+- return -EINVAL;
+-
+- if (!(mode & S_IFMT))
+- mode |= S_IFREG;
+-
+- if (!parent->d_inode)
+- return -EINVAL;
+-
+- ti = get_tracefs(parent->d_inode);
+- if (!(ti->flags & TRACEFS_EVENT_INODE))
+- return -EINVAL;
+-
+- ei = ti->private;
+- ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+-
+- if (IS_ERR(ef))
+- return -ENOMEM;
+-
+- mutex_lock(&eventfs_mutex);
+- list_add_tail(&ef->list, &ei->e_top_files);
+- ef->d_parent = parent;
+- mutex_unlock(&eventfs_mutex);
+- return 0;
+-}
+-
+-/**
+- * eventfs_add_file - add eventfs file to list to create later
+- * @name: the name of the file to create.
+- * @mode: the permission that the file should have.
+- * @ef_parent: parent eventfs_file for this file.
+- * @data: something that the caller will want to get to later on.
+- * @fop: struct file_operations that should be used for this file.
+- *
+- * This function is used to add the information needed to create a
+- * file within a subdirectory of the events directory. The file created
+- * will have the @mode permissions. The @data will be used to fill the
+- * inode.i_private when the open() call is done. The dentry and inodes are
+- * all created when they are referenced, and removed when they are no
+- * longer referenced.
+- */
+-int eventfs_add_file(const char *name, umode_t mode,
+- struct eventfs_file *ef_parent,
+- void *data,
+- const struct file_operations *fop)
+-{
+- struct eventfs_file *ef;
+-
+- if (security_locked_down(LOCKDOWN_TRACEFS))
+- return -ENODEV;
+-
+- if (!ef_parent)
+- return -EINVAL;
+-
+- if (!(mode & S_IFMT))
+- mode |= S_IFREG;
+-
+- ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+- if (IS_ERR(ef))
+- return -ENOMEM;
+-
+- mutex_lock(&eventfs_mutex);
+- list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+- ef->d_parent = ef_parent->dentry;
+- mutex_unlock(&eventfs_mutex);
+- return 0;
+-}
+-
+-static void free_ef(struct rcu_head *head)
+-{
+- struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+-
+- kfree(ef->name);
+- kfree(ef->ei);
+- kfree(ef);
++ kfree_const(ei->name);
++ kfree(ei->d_children);
++ kfree(ei);
+ }
+
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+- * @ef: eventfs_file to be removed.
+- * @head: to create list of eventfs_file to be deleted
+- * @level: to check recursion depth
++ * @ei: eventfs_inode to be removed.
+ *
+- * The helper function eventfs_remove_rec() is used to clean up and free the
+- * associated data from eventfs for both of the added functions.
++ * This function recursively remove eventfs_inode which
++ * contains info of file or dir.
+ */
+-static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
++static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
+ {
+- struct eventfs_file *ef_child;
++ struct eventfs_inode *ei_child;
+
+- if (!ef)
++ if (!ei)
+ return;
+ /*
+ * Check recursion depth. It should never be greater than 3:
+@@ -806,62 +817,68 @@ static void eventfs_remove_rec(struct ev
+ if (WARN_ON_ONCE(level > 3))
+ return;
+
+- if (ef->ei) {
+- /* search for nested folders or files */
+- list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+- lockdep_is_held(&eventfs_mutex)) {
+- eventfs_remove_rec(ef_child, head, level + 1);
+- }
++ /* search for nested folders or files */
++ list_for_each_entry_srcu(ei_child, &ei->children, list,
++ lockdep_is_held(&eventfs_mutex)) {
++ eventfs_remove_rec(ei_child, head, level + 1);
+ }
+
+- list_del_rcu(&ef->list);
+- list_add_tail(&ef->del_list, head);
++ list_del_rcu(&ei->list);
++ list_add_tail(&ei->del_list, head);
+ }
+
++static void unhook_dentry(struct dentry **dentry, struct dentry **list)
++{
++ if (*dentry) {
++ unsigned long ptr = (unsigned long)*list;
++
++ /* Keep the dentry from being freed yet */
++ dget(*dentry);
++
++ /*
++ * Paranoid: The dget() above should prevent the dentry
++ * from being freed and calling eventfs_set_ei_status_free().
++ * But just in case, set the link list LSB pointer to 1
++ * and have eventfs_set_ei_status_free() check that to
++ * make sure that if it does happen, it will not think
++ * the d_fsdata is an eventfs_inode.
++ *
++ * For this to work, no eventfs_inode should be allocated
++ * on a odd space, as the ef should always be allocated
++ * to be at least word aligned. Check for that too.
++ */
++ WARN_ON_ONCE(ptr & 1);
++
++ (*dentry)->d_fsdata = (void *)(ptr | 1);
++ *list = *dentry;
++ *dentry = NULL;
++ }
++}
+ /**
+ * eventfs_remove - remove eventfs dir or file from list
+- * @ef: eventfs_file to be removed.
++ * @ei: eventfs_inode to be removed.
+ *
+ * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
+ */
+-void eventfs_remove(struct eventfs_file *ef)
++void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+- struct eventfs_file *tmp;
+- LIST_HEAD(ef_del_list);
++ struct eventfs_inode *tmp;
++ LIST_HEAD(ei_del_list);
+ struct dentry *dentry_list = NULL;
+ struct dentry *dentry;
++ int i;
+
+- if (!ef)
++ if (!ei)
+ return;
+
+ mutex_lock(&eventfs_mutex);
+- eventfs_remove_rec(ef, &ef_del_list, 0);
+- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+- if (ef->dentry) {
+- unsigned long ptr = (unsigned long)dentry_list;
+-
+- /* Keep the dentry from being freed yet */
+- dget(ef->dentry);
+-
+- /*
+- * Paranoid: The dget() above should prevent the dentry
+- * from being freed and calling eventfs_set_ef_status_free().
+- * But just in case, set the link list LSB pointer to 1
+- * and have eventfs_set_ef_status_free() check that to
+- * make sure that if it does happen, it will not think
+- * the d_fsdata is an event_file.
+- *
+- * For this to work, no event_file should be allocated
+- * on a odd space, as the ef should always be allocated
+- * to be at least word aligned. Check for that too.
+- */
+- WARN_ON_ONCE(ptr & 1);
+-
+- ef->dentry->d_fsdata = (void *)(ptr | 1);
+- dentry_list = ef->dentry;
+- ef->dentry = NULL;
+- }
+- call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
++ eventfs_remove_rec(ei, &ei_del_list, 0);
++
++ list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
++ for (i = 0; i < ei->nr_entries; i++)
++ unhook_dentry(&ei->d_children[i], &dentry_list);
++ unhook_dentry(&ei->dentry, &dentry_list);
++ call_srcu(&eventfs_srcu, &ei->rcu, free_ei);
+ }
+ mutex_unlock(&eventfs_mutex);
+
+@@ -876,8 +893,8 @@ void eventfs_remove(struct eventfs_file
+ mutex_lock(&eventfs_mutex);
+ /* dentry should now have at least a single reference */
+ WARN_ONCE((int)d_count(dentry) < 1,
+- "dentry %p less than one reference (%d) after invalidate\n",
+- dentry, d_count(dentry));
++ "dentry %px (%s) less than one reference (%d) after invalidate\n",
++ dentry, dentry->d_name.name, d_count(dentry));
+ mutex_unlock(&eventfs_mutex);
+ dput(dentry);
+ }
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -385,7 +385,7 @@ static void tracefs_dentry_iput(struct d
+
+ ti = get_tracefs(inode);
+ if (ti && ti->flags & TRACEFS_EVENT_INODE)
+- eventfs_set_ef_status_free(ti, dentry);
++ eventfs_set_ei_status_free(ti, dentry);
+ iput(inode);
+ }
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -13,6 +13,41 @@ struct tracefs_inode {
+ struct inode vfs_inode;
+ };
+
++/*
++ * struct eventfs_inode - hold the properties of the eventfs directories.
++ * @list: link list into the parent directory
++ * @entries: the array of entries representing the files in the directory
++ * @name: the name of the directory to create
++ * @children: link list into the child eventfs_inode
++ * @dentry: the dentry of the directory
++ * @d_parent: pointer to the parent's dentry
++ * @d_children: The array of dentries to represent the files when created
++ * @data: The private data to pass to the callbacks
++ * @nr_entries: The number of items in @entries
++ */
++struct eventfs_inode {
++ struct list_head list;
++ const struct eventfs_entry *entries;
++ const char *name;
++ struct list_head children;
++ struct dentry *dentry;
++ struct dentry *d_parent;
++ struct dentry **d_children;
++ void *data;
++ /*
++ * Union - used for deletion
++ * @del_list: list of eventfs_inode to delete
++ * @rcu: eventfs_indoe to delete in RCU
++ * @is_freed: node is freed if one of the above is set
++ */
++ union {
++ struct list_head del_list;
++ struct rcu_head rcu;
++ unsigned long is_freed;
++ };
++ int nr_entries;
++};
++
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
+ {
+ return container_of(inode, struct tracefs_inode, vfs_inode);
+@@ -25,6 +60,6 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
+-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry);
++void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+ #endif /* _TRACEFS_INTERNAL_H */
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -652,7 +652,7 @@ struct trace_event_file {
+ struct list_head list;
+ struct trace_event_call *event_call;
+ struct event_filter __rcu *filter;
+- struct eventfs_file *ef;
++ struct eventfs_inode *ei;
+ struct trace_array *tr;
+ struct trace_subsystem_dir *system;
+ struct list_head triggers;
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -23,26 +23,25 @@ struct file_operations;
+
+ struct eventfs_file;
+
+-struct dentry *eventfs_create_events_dir(const char *name,
+- struct dentry *parent);
++typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
++ const struct file_operations **fops);
+
+-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+- struct dentry *parent);
++struct eventfs_entry {
++ const char *name;
++ eventfs_callback callback;
++};
+
+-struct eventfs_file *eventfs_add_dir(const char *name,
+- struct eventfs_file *ef_parent);
++struct eventfs_inode;
+
+-int eventfs_add_file(const char *name, umode_t mode,
+- struct eventfs_file *ef_parent, void *data,
+- const struct file_operations *fops);
++struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
++ const struct eventfs_entry *entries,
++ int size, void *data);
+
+-int eventfs_add_events_file(const char *name, umode_t mode,
+- struct dentry *parent, void *data,
+- const struct file_operations *fops);
++struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
++ const struct eventfs_entry *entries,
++ int size, void *data);
+
+-void eventfs_remove(struct eventfs_file *ef);
+-
+-void eventfs_remove_events_dir(struct dentry *dentry);
++void eventfs_remove_dir(struct eventfs_inode *ei);
+
+ struct dentry *tracefs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -9760,7 +9760,6 @@ static __init void create_trace_instance
+ static void
+ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
+ {
+- struct trace_event_file *file;
+ int cpu;
+
+ trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
+@@ -9793,11 +9792,7 @@ init_tracer_tracefs(struct trace_array *
+ trace_create_file("trace_marker", 0220, d_tracer,
+ tr, &tracing_mark_fops);
+
+- file = __find_event_file(tr, "ftrace", "print");
+- if (file && file->ef)
+- eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
+- file, &event_trigger_fops);
+- tr->trace_marker_file = file;
++ tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
+
+ trace_create_file("trace_marker_raw", 0220, d_tracer,
+ tr, &tracing_mark_raw_fops);
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -381,7 +381,7 @@ struct trace_array {
+ struct dentry *dir;
+ struct dentry *options;
+ struct dentry *percpu_dir;
+- struct dentry *event_dir;
++ struct eventfs_inode *event_dir;
+ struct trace_options *topts;
+ struct list_head systems;
+ struct list_head events;
+@@ -1345,7 +1345,7 @@ struct trace_subsystem_dir {
+ struct list_head list;
+ struct event_subsystem *subsystem;
+ struct trace_array *tr;
+- struct eventfs_file *ef;
++ struct eventfs_inode *ei;
+ int ref_count;
+ int nr_events;
+ };
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -984,7 +984,7 @@ static void remove_subsystem(struct trac
+ return;
+
+ if (!--dir->nr_events) {
+- eventfs_remove(dir->ef);
++ eventfs_remove_dir(dir->ei);
+ list_del(&dir->list);
+ __put_system_dir(dir);
+ }
+@@ -1013,7 +1013,7 @@ void event_file_put(struct trace_event_f
+
+ static void remove_event_file_dir(struct trace_event_file *file)
+ {
+- eventfs_remove(file->ef);
++ eventfs_remove_dir(file->ei);
+ list_del(&file->list);
+ remove_subsystem(file->system);
+ free_event_filter(file->filter);
+@@ -2302,14 +2302,40 @@ create_new_subsystem(const char *name)
+ return NULL;
+ }
+
+-static struct eventfs_file *
++int system_callback(const char *name, umode_t *mode, void **data,
++ const struct file_operations **fops)
++{
++ if (strcmp(name, "filter") == 0)
++ *fops = &ftrace_subsystem_filter_fops;
++
++ else if (strcmp(name, "enable") == 0)
++ *fops = &ftrace_system_enable_fops;
++
++ else
++ return 0;
++
++ *mode = TRACE_MODE_WRITE;
++ return 1;
++}
++
++static struct eventfs_inode *
+ event_subsystem_dir(struct trace_array *tr, const char *name,
+- struct trace_event_file *file, struct dentry *parent)
++ struct trace_event_file *file, struct eventfs_inode *parent)
+ {
+ struct event_subsystem *system, *iter;
+ struct trace_subsystem_dir *dir;
+- struct eventfs_file *ef;
+- int res;
++ struct eventfs_inode *ei;
++ int nr_entries;
++ static struct eventfs_entry system_entries[] = {
++ {
++ .name = "filter",
++ .callback = system_callback,
++ },
++ {
++ .name = "enable",
++ .callback = system_callback,
++ }
++ };
+
+ /* First see if we did not already create this dir */
+ list_for_each_entry(dir, &tr->systems, list) {
+@@ -2317,7 +2343,7 @@ event_subsystem_dir(struct trace_array *
+ if (strcmp(system->name, name) == 0) {
+ dir->nr_events++;
+ file->system = dir;
+- return dir->ef;
++ return dir->ei;
+ }
+ }
+
+@@ -2341,39 +2367,29 @@ event_subsystem_dir(struct trace_array *
+ } else
+ __get_system(system);
+
+- ef = eventfs_add_subsystem_dir(name, parent);
+- if (IS_ERR(ef)) {
++ /* ftrace only has directories no files */
++ if (strcmp(name, "ftrace") == 0)
++ nr_entries = 0;
++ else
++ nr_entries = ARRAY_SIZE(system_entries);
++
++ ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
++ if (!ei) {
+ pr_warn("Failed to create system directory %s\n", name);
+ __put_system(system);
+ goto out_free;
+ }
+
+- dir->ef = ef;
++ dir->ei = ei;
+ dir->tr = tr;
+ dir->ref_count = 1;
+ dir->nr_events = 1;
+ dir->subsystem = system;
+ file->system = dir;
+
+- /* the ftrace system is special, do not create enable or filter files */
+- if (strcmp(name, "ftrace") != 0) {
+-
+- res = eventfs_add_file("filter", TRACE_MODE_WRITE,
+- dir->ef, dir,
+- &ftrace_subsystem_filter_fops);
+- if (res) {
+- kfree(system->filter);
+- system->filter = NULL;
+- pr_warn("Could not create tracefs '%s/filter' entry\n", name);
+- }
+-
+- eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
+- &ftrace_system_enable_fops);
+- }
+-
+ list_add(&dir->list, &tr->systems);
+
+- return dir->ef;
++ return dir->ei;
+
+ out_free:
+ kfree(dir);
+@@ -2422,15 +2438,134 @@ event_define_fields(struct trace_event_c
+ return ret;
+ }
+
++static int event_callback(const char *name, umode_t *mode, void **data,
++ const struct file_operations **fops)
++{
++ struct trace_event_file *file = *data;
++ struct trace_event_call *call = file->event_call;
++
++ if (strcmp(name, "format") == 0) {
++ *mode = TRACE_MODE_READ;
++ *fops = &ftrace_event_format_fops;
++ *data = call;
++ return 1;
++ }
++
++ /*
++ * Only event directories that can be enabled should have
++ * triggers or filters, with the exception of the "print"
++ * event that can have a "trigger" file.
++ */
++ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
++ if (call->class->reg && strcmp(name, "enable") == 0) {
++ *mode = TRACE_MODE_WRITE;
++ *fops = &ftrace_enable_fops;
++ return 1;
++ }
++
++ if (strcmp(name, "filter") == 0) {
++ *mode = TRACE_MODE_WRITE;
++ *fops = &ftrace_event_filter_fops;
++ return 1;
++ }
++ }
++
++ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
++ strcmp(trace_event_name(call), "print") == 0) {
++ if (strcmp(name, "trigger") == 0) {
++ *mode = TRACE_MODE_WRITE;
++ *fops = &event_trigger_fops;
++ return 1;
++ }
++ }
++
++#ifdef CONFIG_PERF_EVENTS
++ if (call->event.type && call->class->reg &&
++ strcmp(name, "id") == 0) {
++ *mode = TRACE_MODE_READ;
++ *data = (void *)(long)call->event.type;
++ *fops = &ftrace_event_id_fops;
++ return 1;
++ }
++#endif
++
++#ifdef CONFIG_HIST_TRIGGERS
++ if (strcmp(name, "hist") == 0) {
++ *mode = TRACE_MODE_READ;
++ *fops = &event_hist_fops;
++ return 1;
++ }
++#endif
++#ifdef CONFIG_HIST_TRIGGERS_DEBUG
++ if (strcmp(name, "hist_debug") == 0) {
++ *mode = TRACE_MODE_READ;
++ *fops = &event_hist_debug_fops;
++ return 1;
++ }
++#endif
++#ifdef CONFIG_TRACE_EVENT_INJECT
++ if (call->event.type && call->class->reg &&
++ strcmp(name, "inject") == 0) {
++ *mode = 0200;
++ *fops = &event_inject_fops;
++ return 1;
++ }
++#endif
++ return 0;
++}
++
+ static int
+-event_create_dir(struct dentry *parent, struct trace_event_file *file)
++event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
+ {
+ struct trace_event_call *call = file->event_call;
+- struct eventfs_file *ef_subsystem = NULL;
+ struct trace_array *tr = file->tr;
+- struct eventfs_file *ef;
++ struct eventfs_inode *e_events;
++ struct eventfs_inode *ei;
+ const char *name;
++ int nr_entries;
+ int ret;
++ static struct eventfs_entry event_entries[] = {
++ {
++ .name = "enable",
++ .callback = event_callback,
++ },
++ {
++ .name = "filter",
++ .callback = event_callback,
++ },
++ {
++ .name = "trigger",
++ .callback = event_callback,
++ },
++ {
++ .name = "format",
++ .callback = event_callback,
++ },
++#ifdef CONFIG_PERF_EVENTS
++ {
++ .name = "id",
++ .callback = event_callback,
++ },
++#endif
++#ifdef CONFIG_HIST_TRIGGERS
++ {
++ .name = "hist",
++ .callback = event_callback,
++ },
++#endif
++#ifdef CONFIG_HIST_TRIGGERS_DEBUG
++ {
++ .name = "hist_debug",
++ .callback = event_callback,
++ },
++#endif
++#ifdef CONFIG_TRACE_EVENT_INJECT
++ {
++ .name = "inject",
++ .callback = event_callback,
++ },
++#endif
++ };
+
+ /*
+ * If the trace point header did not define TRACE_SYSTEM
+@@ -2440,29 +2575,20 @@ event_create_dir(struct dentry *parent,
+ if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
+ return -ENODEV;
+
+- ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
+- if (!ef_subsystem)
++ e_events = event_subsystem_dir(tr, call->class->system, file, parent);
++ if (!e_events)
+ return -ENOMEM;
+
++ nr_entries = ARRAY_SIZE(event_entries);
++
+ name = trace_event_name(call);
+- ef = eventfs_add_dir(name, ef_subsystem);
+- if (IS_ERR(ef)) {
++ ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
++ if (IS_ERR(ei)) {
+ pr_warn("Could not create tracefs '%s' directory\n", name);
+ return -1;
+ }
+
+- file->ef = ef;
+-
+- if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+- eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
+- &ftrace_enable_fops);
+-
+-#ifdef CONFIG_PERF_EVENTS
+- if (call->event.type && call->class->reg)
+- eventfs_add_file("id", TRACE_MODE_READ, file->ef,
+- (void *)(long)call->event.type,
+- &ftrace_event_id_fops);
+-#endif
++ file->ei = ei;
+
+ ret = event_define_fields(call);
+ if (ret < 0) {
+@@ -2470,35 +2596,6 @@ event_create_dir(struct dentry *parent,
+ return ret;
+ }
+
+- /*
+- * Only event directories that can be enabled should have
+- * triggers or filters.
+- */
+- if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+- eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
+- file, &ftrace_event_filter_fops);
+-
+- eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
+- file, &event_trigger_fops);
+- }
+-
+-#ifdef CONFIG_HIST_TRIGGERS
+- eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
+- &event_hist_fops);
+-#endif
+-#ifdef CONFIG_HIST_TRIGGERS_DEBUG
+- eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
+- &event_hist_debug_fops);
+-#endif
+- eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
+- &ftrace_event_format_fops);
+-
+-#ifdef CONFIG_TRACE_EVENT_INJECT
+- if (call->event.type && call->class->reg)
+- eventfs_add_file("inject", 0200, file->ef, file,
+- &event_inject_fops);
+-#endif
+-
+ return 0;
+ }
+
+@@ -3644,30 +3741,65 @@ static __init int setup_trace_event(char
+ }
+ __setup("trace_event=", setup_trace_event);
+
++static int events_callback(const char *name, umode_t *mode, void **data,
++ const struct file_operations **fops)
++{
++ if (strcmp(name, "enable") == 0) {
++ *mode = TRACE_MODE_WRITE;
++ *fops = &ftrace_tr_enable_fops;
++ return 1;
++ }
++
++ if (strcmp(name, "header_page") == 0)
++ *data = ring_buffer_print_page_header;
++
++ else if (strcmp(name, "header_event") == 0)
++ *data = ring_buffer_print_entry_header;
++
++ else
++ return 0;
++
++ *mode = TRACE_MODE_READ;
++ *fops = &ftrace_show_header_fops;
++ return 1;
++}
++
+ /* Expects to have event_mutex held when called */
+ static int
+ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+ {
+- struct dentry *d_events;
++ struct eventfs_inode *e_events;
+ struct dentry *entry;
+- int error = 0;
++ int nr_entries;
++ static struct eventfs_entry events_entries[] = {
++ {
++ .name = "enable",
++ .callback = events_callback,
++ },
++ {
++ .name = "header_page",
++ .callback = events_callback,
++ },
++ {
++ .name = "header_event",
++ .callback = events_callback,
++ },
++ };
+
+ entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_fops);
+ if (!entry)
+ return -ENOMEM;
+
+- d_events = eventfs_create_events_dir("events", parent);
+- if (IS_ERR(d_events)) {
++ nr_entries = ARRAY_SIZE(events_entries);
++
++ e_events = eventfs_create_events_dir("events", parent, events_entries,
++ nr_entries, tr);
++ if (IS_ERR(e_events)) {
+ pr_warn("Could not create tracefs 'events' directory\n");
+ return -ENOMEM;
+ }
+
+- error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
+- tr, &ftrace_tr_enable_fops);
+- if (error)
+- return -ENOMEM;
+-
+ /* There are not as crucial, just warn if they are not created */
+
+ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+@@ -3677,16 +3809,7 @@ create_event_toplevel_files(struct dentr
+ TRACE_MODE_WRITE, parent, tr,
+ &ftrace_set_event_notrace_pid_fops);
+
+- /* ring buffer internal formats */
+- eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
+- ring_buffer_print_page_header,
+- &ftrace_show_header_fops);
+-
+- eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
+- ring_buffer_print_entry_header,
+- &ftrace_show_header_fops);
+-
+- tr->event_dir = d_events;
++ tr->event_dir = e_events;
+
+ return 0;
+ }
+@@ -3770,7 +3893,7 @@ int event_trace_del_tracer(struct trace_
+
+ down_write(&trace_event_sem);
+ __trace_remove_event_dirs(tr);
+- eventfs_remove_events_dir(tr->event_dir);
++ eventfs_remove_dir(tr->event_dir);
+ up_write(&trace_event_sem);
+
+ tr->event_dir = NULL;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:18 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:29 -0500
+Subject: eventfs: Remove expectation that ei->is_freed means ei->dentry == NULL
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120950.284520771@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 88903daecacf03b1e5636e1b5f18bda5b07030fc upstream.
+
+The logic to free the eventfs_inode (ei) use to set is_freed and clear the
+"dentry" field under the eventfs_mutex. But that changed when a race was
+found where the ei->dentry needed to be cleared when the last dput() was
+called on it. But there was still logic that checked if ei->dentry was not
+NULL and is_freed is set, and would warn if it was.
+
+But since that situation was changed and the ei->dentry isn't cleared
+until the last dput() is called on it while the ei->is_freed is set, do
+not test for that condition anymore, and change the comments to reflect
+that.
+
+Link: https://lkml.kernel.org/r/20231120235154.265826243@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 020010fbfa20 ("eventfs: Delete eventfs_inode when the last dentry is freed")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -27,16 +27,16 @@
+ /*
+ * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
+ * to the ei->dentry must be done under this mutex and after checking
+- * if ei->is_freed is not set. The ei->dentry is released under the
+- * mutex at the same time ei->is_freed is set. If ei->is_freed is set
+- * then the ei->dentry is invalid.
++ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
++ * is on its way to being freed after the last dput() is made on it.
+ */
+ static DEFINE_MUTEX(eventfs_mutex);
+
+ /*
+ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+ * its parent's list and will have is_freed set (under eventfs_mutex).
+- * After the SRCU grace period is over, the ei may be freed.
++ * After the SRCU grace period is over and the last dput() is called
++ * the ei is freed.
+ */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+
+@@ -365,12 +365,14 @@ create_file_dentry(struct eventfs_inode
+ * created the dentry for this e_dentry. In which case
+ * use that one.
+ *
+- * Note, with the mutex held, the e_dentry cannot have content
+- * and the ei->is_freed be true at the same time.
++ * If ei->is_freed is set, the e_dentry is currently on its
++ * way to being freed, don't return it. If e_dentry is NULL
++ * it means it was already freed.
+ */
+- dentry = *e_dentry;
+- if (WARN_ON_ONCE(dentry && ei->is_freed))
++ if (ei->is_freed)
+ dentry = NULL;
++ else
++ dentry = *e_dentry;
+ /* The lookup does not need to up the dentry refcount */
+ if (dentry && !lookup)
+ dget(dentry);
+@@ -473,8 +475,8 @@ create_dir_dentry(struct eventfs_inode *
+ * created the dentry for this e_dentry. In which case
+ * use that one.
+ *
+- * Note, with the mutex held, the e_dentry cannot have content
+- * and the ei->is_freed be true at the same time.
++ * If ei->is_freed is set, the e_dentry is currently on its
++ * way to being freed.
+ */
+ dentry = ei->dentry;
+ if (dentry && !lookup)
--- /dev/null
+From stable+bounces-18947-greg=kroah.com@vger.kernel.org Tue Feb 6 13:15:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:19 -0500
+Subject: eventfs: Remove extra dget() in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.657072999@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 77bc4d4921bd3497678ba8e7f4e480de35692f05 upstream.
+
+The creation of the top events directory does a dget() at the end of the
+creation in eventfs_create_events_dir() with a comment saying the final
+dput() will happen when it is removed. The problem is that a dget() is
+already done on the dentry when it was created with tracefs_start_creating()!
+The dget() now just causes a memory leak of that dentry.
+
+Remove the extra dget() as the final dput() in the deletion of the events
+directory actually matches the one in tracefs_start_creating().
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231031124229.4f2e3fa1@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -774,9 +774,6 @@ struct eventfs_inode *eventfs_create_eve
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ tracefs_end_creating(dentry);
+
+- /* Will call dput when the directory is removed */
+- dget(dentry);
+-
+ return ei;
+
+ fail:
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:28 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:01 -0500
+Subject: eventfs: Remove fsnotify*() functions from lookup()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120955.500466790@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 12d823b31fadf47c8f36ecada7abac5f903cac33 upstream.
+
+The dentries and inodes are created when referenced in the lookup code.
+There's no reason to call fsnotify_*() functions when they are created by
+a reference. It doesn't make any sense.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.166973329@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: a376007917776 ("eventfs: Implement functions to create files and dirs when accessed");
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -366,7 +366,6 @@ static struct dentry *lookup_file(struct
+ dentry->d_fsdata = get_ei(parent_ei);
+
+ d_add(dentry, inode);
+- fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return NULL;
+ };
+
+@@ -408,7 +407,6 @@ static struct dentry *lookup_dir_entry(s
+ inc_nlink(inode);
+ d_add(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return NULL;
+ }
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:21 -0500
+Subject: eventfs: Remove "is_freed" union with rcu head
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120948.980929088@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit f2f496370afcbc5227d7002da28c74b91fed12ff upstream.
+
+The eventfs_inode->is_freed was a union with the rcu_head with the
+assumption that when it was on the srcu list the head would contain a
+pointer which would make "is_freed" true. But that was a wrong assumption
+as the rcu head is a single link list where the last element is NULL.
+
+Instead, split the nr_entries integer so that "is_freed" is one bit and
+the nr_entries is the next 31 bits. As there shouldn't be more than 10
+(currently there's at most 5 to 7 depending on the config), this should
+not be a problem.
+
+Link: https://lkml.kernel.org/r/20231101172649.049758712@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 2 ++
+ fs/tracefs/internal.h | 6 +++---
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -824,6 +824,8 @@ static void eventfs_remove_rec(struct ev
+ eventfs_remove_rec(ei_child, head, level + 1);
+ }
+
++ ei->is_freed = 1;
++
+ list_del_rcu(&ei->list);
+ list_add_tail(&ei->del_list, head);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -23,6 +23,7 @@ struct tracefs_inode {
+ * @d_parent: pointer to the parent's dentry
+ * @d_children: The array of dentries to represent the files when created
+ * @data: The private data to pass to the callbacks
++ * @is_freed: Flag set if the eventfs is on its way to be freed
+ * @nr_entries: The number of items in @entries
+ */
+ struct eventfs_inode {
+@@ -38,14 +39,13 @@ struct eventfs_inode {
+ * Union - used for deletion
+ * @del_list: list of eventfs_inode to delete
+ * @rcu: eventfs_inode to delete in RCU
+- * @is_freed: node is freed if one of the above is set
+ */
+ union {
+ struct list_head del_list;
+ struct rcu_head rcu;
+- unsigned long is_freed;
+ };
+- int nr_entries;
++ unsigned int is_freed:1;
++ unsigned int nr_entries:31;
+ };
+
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:10 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:40 -0500
+Subject: eventfs: Remove "lookup" parameter from create_dir/file_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.069546514@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit b0f7e2d739b4aac131ea1662d086a07775097b05 upstream.
+
+The "lookup" parameter is a way to differentiate the call to
+create_file/dir_dentry() from when it's just a lookup (no need to up the
+dentry refcount) and accessed via a readdir (need to up the refcount).
+
+But reality, it just makes the code more complex. Just up the refcount and
+let the caller decide to dput() the result or not.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240103102553.17a19cea@gandalf.local.home
+Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.517502710@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 55 +++++++++++++++++------------------------------
+ 1 file changed, 20 insertions(+), 35 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -390,16 +390,14 @@ void eventfs_set_ei_status_free(struct t
+ * @mode: The mode of the file.
+ * @data: The data to use to set the inode of the file with on open()
+ * @fops: The fops of the file to be created.
+- * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
+ *
+ * Create a dentry for a file of an eventfs_inode @ei and place it into the
+- * address located at @e_dentry. If the @e_dentry already has a dentry, then
+- * just do a dget() on it and return. Otherwise create the dentry and attach it.
++ * address located at @e_dentry.
+ */
+ static struct dentry *
+ create_file_dentry(struct eventfs_inode *ei, int idx,
+ struct dentry *parent, const char *name, umode_t mode, void *data,
+- const struct file_operations *fops, bool lookup)
++ const struct file_operations *fops)
+ {
+ struct eventfs_attr *attr = NULL;
+ struct dentry **e_dentry = &ei->d_children[idx];
+@@ -414,9 +412,7 @@ create_file_dentry(struct eventfs_inode
+ }
+ /* If the e_dentry already has a dentry, use it */
+ if (*e_dentry) {
+- /* lookup does not need to up the ref count */
+- if (!lookup)
+- dget(*e_dentry);
++ dget(*e_dentry);
+ mutex_unlock(&eventfs_mutex);
+ return *e_dentry;
+ }
+@@ -441,13 +437,12 @@ create_file_dentry(struct eventfs_inode
+ * way to being freed, don't return it. If e_dentry is NULL
+ * it means it was already freed.
+ */
+- if (ei->is_freed)
++ if (ei->is_freed) {
+ dentry = NULL;
+- else
++ } else {
+ dentry = *e_dentry;
+- /* The lookup does not need to up the dentry refcount */
+- if (dentry && !lookup)
+ dget(dentry);
++ }
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+@@ -465,9 +460,6 @@ create_file_dentry(struct eventfs_inode
+ }
+ mutex_unlock(&eventfs_mutex);
+
+- if (lookup)
+- dput(dentry);
+-
+ return dentry;
+ }
+
+@@ -500,13 +492,12 @@ static void eventfs_post_create_dir(stru
+ * @pei: The eventfs_inode parent of ei.
+ * @ei: The eventfs_inode to create the directory for
+ * @parent: The dentry of the parent of this directory
+- * @lookup: True if this is called by the lookup code
+ *
+ * This creates and attaches a directory dentry to the eventfs_inode @ei.
+ */
+ static struct dentry *
+ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+- struct dentry *parent, bool lookup)
++ struct dentry *parent)
+ {
+ struct dentry *dentry = NULL;
+
+@@ -518,11 +509,9 @@ create_dir_dentry(struct eventfs_inode *
+ return NULL;
+ }
+ if (ei->dentry) {
+- /* If the dentry already has a dentry, use it */
++ /* If the eventfs_inode already has a dentry, use it */
+ dentry = ei->dentry;
+- /* lookup does not need to up the ref count */
+- if (!lookup)
+- dget(dentry);
++ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+@@ -542,7 +531,7 @@ create_dir_dentry(struct eventfs_inode *
+ * way to being freed.
+ */
+ dentry = ei->dentry;
+- if (dentry && !lookup)
++ if (dentry)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+@@ -562,9 +551,6 @@ create_dir_dentry(struct eventfs_inode *
+ }
+ mutex_unlock(&eventfs_mutex);
+
+- if (lookup)
+- dput(dentry);
+-
+ return dentry;
+ }
+
+@@ -589,8 +575,8 @@ static struct dentry *eventfs_root_looku
+ struct eventfs_inode *ei;
+ struct dentry *ei_dentry = NULL;
+ struct dentry *ret = NULL;
++ struct dentry *d;
+ const char *name = dentry->d_name.name;
+- bool created = false;
+ umode_t mode;
+ void *data;
+ int idx;
+@@ -626,13 +612,10 @@ static struct dentry *eventfs_root_looku
+ ret = simple_lookup(dir, dentry, flags);
+ if (IS_ERR(ret))
+ goto out;
+- create_dir_dentry(ei, ei_child, ei_dentry, true);
+- created = true;
+- break;
+- }
+-
+- if (created)
++ d = create_dir_dentry(ei, ei_child, ei_dentry);
++ dput(d);
+ goto out;
++ }
+
+ for (i = 0; i < ei->nr_entries; i++) {
+ entry = &ei->entries[i];
+@@ -650,8 +633,8 @@ static struct dentry *eventfs_root_looku
+ ret = simple_lookup(dir, dentry, flags);
+ if (IS_ERR(ret))
+ goto out;
+- create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+- fops, true);
++ d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++ dput(d);
+ break;
+ }
+ }
+@@ -768,9 +751,10 @@ static int dcache_dir_open_wrapper(struc
+ inode_lock(parent->d_inode);
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- d = create_dir_dentry(ei, ei_child, parent, false);
++ d = create_dir_dentry(ei, ei_child, parent);
+ if (d) {
+ ret = add_dentries(&dentries, d, cnt);
++ dput(d);
+ if (ret < 0)
+ break;
+ cnt++;
+@@ -790,9 +774,10 @@ static int dcache_dir_open_wrapper(struc
+ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;
+- d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
++ d = create_file_dentry(ei, i, parent, name, mode, cdata, fops);
+ if (d) {
+ ret = add_dentries(&dentries, d, cnt);
++ dput(d);
+ if (ret < 0)
+ break;
+ cnt++;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:02 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:27 -0500
+Subject: eventfs: Remove special processing of dput() of events directory
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.956372816@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 62d65cac119d08d39f751b4e3e2063ed996edc05 upstream.
+
+The top level events directory is no longer special with regards to how it
+should be delete. Remove the extra processing for it in
+eventfs_set_ei_status_free().
+
+Link: https://lkml.kernel.org/r/20231101172650.340876747@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 19 ++-----------------
+ 1 file changed, 2 insertions(+), 17 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -274,28 +274,11 @@ static void free_ei(struct eventfs_inode
+ */
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+- struct tracefs_inode *ti_parent;
+ struct eventfs_inode *ei;
+ int i;
+
+- /* The top level events directory may be freed by this */
+- if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+- mutex_lock(&eventfs_mutex);
+- ei = ti->private;
+- /* Nothing should access this, but just in case! */
+- ti->private = NULL;
+- mutex_unlock(&eventfs_mutex);
+-
+- free_ei(ei);
+- return;
+- }
+-
+ mutex_lock(&eventfs_mutex);
+
+- ti_parent = get_tracefs(dentry->d_parent->d_inode);
+- if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+- goto out;
+-
+ ei = dentry->d_fsdata;
+ if (!ei)
+ goto out;
+@@ -920,6 +903,8 @@ struct eventfs_inode *eventfs_create_eve
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
++ dentry->d_fsdata = ei;
++
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:26 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:56 -0500
+Subject: eventfs: Remove unused d_parent pointer field
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.681339731@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 408600be78cdb8c650a97ecc7ff411cb216811b5 upstream.
+
+It's never used
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.961772428@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 4 +---
+ fs/tracefs/internal.h | 2 --
+ 2 files changed, 1 insertion(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -680,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir
+ INIT_LIST_HEAD(&ei->list);
+
+ mutex_lock(&eventfs_mutex);
+- if (!parent->is_freed) {
++ if (!parent->is_freed)
+ list_add_tail(&ei->list, &parent->children);
+- ei->d_parent = parent->dentry;
+- }
+ mutex_unlock(&eventfs_mutex);
+
+ /* Was the parent freed? */
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -36,7 +36,6 @@ struct eventfs_attr {
+ * @name: the name of the directory to create
+ * @children: link list into the child eventfs_inode
+ * @dentry: the dentry of the directory
+- * @d_parent: pointer to the parent's dentry
+ * @d_children: The array of dentries to represent the files when created
+ * @entry_attrs: Saved mode and ownership of the @d_children
+ * @attr: Saved mode and ownership of eventfs_inode itself
+@@ -51,7 +50,6 @@ struct eventfs_inode {
+ const char *name;
+ struct list_head children;
+ struct dentry *dentry; /* Check is_freed to access */
+- struct dentry *d_parent;
+ struct dentry **d_children;
+ struct eventfs_attr *entry_attrs;
+ struct eventfs_attr attr;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:09 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:00 -0500
+Subject: eventfs: Restructure eventfs_inode structure to be more condensed
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.335266477@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 264424dfdd5cbd92bc5b5ddf93944929fc877fac upstream.
+
+Some of the eventfs_inode structure has holes in it. Rework the structure
+to be a bit more condensed, and also remove the no longer used llist
+field.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.002321438@goodmis.org
+
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h | 27 ++++++++++++---------------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -32,40 +32,37 @@ struct eventfs_attr {
+ /*
+ * struct eventfs_inode - hold the properties of the eventfs directories.
+ * @list: link list into the parent directory
++ * @rcu: Union with @list for freeing
++ * @children: link list into the child eventfs_inode
+ * @entries: the array of entries representing the files in the directory
+ * @name: the name of the directory to create
+- * @children: link list into the child eventfs_inode
+ * @events_dir: the dentry of the events directory
+ * @entry_attrs: Saved mode and ownership of the @d_children
+- * @attr: Saved mode and ownership of eventfs_inode itself
+ * @data: The private data to pass to the callbacks
++ * @attr: Saved mode and ownership of eventfs_inode itself
+ * @is_freed: Flag set if the eventfs is on its way to be freed
+ * Note if is_freed is set, then dentry is corrupted.
++ * @is_events: Flag set for only the top level "events" directory
+ * @nr_entries: The number of items in @entries
++ * @ino: The saved inode number
+ */
+ struct eventfs_inode {
+- struct kref kref;
+- struct list_head list;
++ union {
++ struct list_head list;
++ struct rcu_head rcu;
++ };
++ struct list_head children;
+ const struct eventfs_entry *entries;
+ const char *name;
+- struct list_head children;
+ struct dentry *events_dir;
+ struct eventfs_attr *entry_attrs;
+- struct eventfs_attr attr;
+ void *data;
++ struct eventfs_attr attr;
++ struct kref kref;
+ unsigned int is_freed:1;
+ unsigned int is_events:1;
+ unsigned int nr_entries:30;
+ unsigned int ino;
+- /*
+- * Union - used for deletion
+- * @llist: for calling dput() if needed after RCU
+- * @rcu: eventfs_inode to delete in RCU
+- */
+- union {
+- struct llist_node llist;
+- struct rcu_head rcu;
+- };
+ };
+
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:50 -0500
+Subject: eventfs: Save directory inodes in the eventfs_inode structure
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Geert Uytterhoeven <geert@linux-m68k.org>, Geert Uytterhoeven <geert+renesas@glider.be>, Kees Cook <keescook@chromium.org>
+Message-ID: <20240206120953.708915826@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 834bf76add3e6168038150f162cbccf1fd492a67 upstream.
+
+The eventfs inodes and directories are allocated when referenced. But this
+leaves the issue of keeping consistent inode numbers and the number is
+only saved in the inode structure itself. When the inode is no longer
+referenced, it can be freed. When the file that the inode was representing
+is referenced again, the inode is once again created, but the inode number
+needs to be the same as it was before.
+
+Just making the inode numbers the same for all files is fine, but that
+does not work with directories. The find command will check for loops via
+the inode number and having the same inode number for directories triggers:
+
+ # find /sys/kernel/tracing
+find: File system loop detected;
+'/sys/kernel/debug/tracing/events/initcall/initcall_finish' is part of the same file system loop as
+'/sys/kernel/debug/tracing/events/initcall'.
+[..]
+
+Linus pointed out that the eventfs_inode structure ends with a single
+32bit int, and on 64 bit machines, there's likely a 4 byte hole due to
+alignment. We can use this hole to store the inode number for the
+eventfs_inode. All directories in eventfs are represented by an
+eventfs_inode and that data structure can hold its inode number.
+
+That last int was also purposely placed at the end of the structure to
+prevent holes from within. Now that there's a 4 byte number to hold the
+inode, both the inode number and the last integer can be moved up in the
+structure for better cache locality, where the llist and rcu fields can be
+moved to the end as they are only used when the eventfs_inode is being
+deleted.
+
+Link: https://lore.kernel.org/all/CAMuHMdXKiorg-jiuKoZpfZyDJ3Ynrfb8=X+c7x0Eewxn-YRdCA@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240122152748.46897388@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Fixes: 53c41052ba31 ("eventfs: Have the inodes all for files and directories all be the same")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 14 +++++++++++---
+ fs/tracefs/internal.h | 7 ++++---
+ 2 files changed, 15 insertions(+), 6 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex);
+
+ /* Choose something "unique" ;-) */
+ #define EVENTFS_FILE_INODE_INO 0x12c4e37
+-#define EVENTFS_DIR_INODE_INO 0x134b2f5
++
++/* Just try to make something consistent and unique */
++static int eventfs_dir_ino(struct eventfs_inode *ei)
++{
++ if (!ei->ino)
++ ei->ino = get_next_ino();
++
++ return ei->ino;
++}
+
+ /*
+ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+@@ -396,7 +404,7 @@ static struct dentry *create_dir(struct
+ inode->i_fop = &eventfs_file_operations;
+
+ /* All directories will have the same inode number */
+- inode->i_ino = EVENTFS_DIR_INODE_INO;
++ inode->i_ino = eventfs_dir_ino(ei);
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+@@ -802,7 +810,7 @@ static int eventfs_iterate(struct file *
+
+ name = ei_child->name;
+
+- ino = EVENTFS_DIR_INODE_INO;
++ ino = eventfs_dir_ino(ei_child);
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+ goto out_dec;
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -55,6 +55,10 @@ struct eventfs_inode {
+ struct eventfs_attr *entry_attrs;
+ struct eventfs_attr attr;
+ void *data;
++ unsigned int is_freed:1;
++ unsigned int is_events:1;
++ unsigned int nr_entries:30;
++ unsigned int ino;
+ /*
+ * Union - used for deletion
+ * @llist: for calling dput() if needed after RCU
+@@ -64,9 +68,6 @@ struct eventfs_inode {
+ struct llist_node llist;
+ struct rcu_head rcu;
+ };
+- unsigned int is_freed:1;
+- unsigned int is_events:1;
+- unsigned int nr_entries:30;
+ };
+
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:13 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:24 -0500
+Subject: eventfs: Save ownership and mode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.464245650@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 28e12c09f5aa081b2d13d1340e3610070b6c624d upstream.
+
+Now that inodes and dentries are created on the fly, they are also
+reclaimed on memory pressure. Since the ownership and file mode are saved
+in the inode, if they are freed, any changes to the ownership and mode
+will be lost.
+
+To counter this, if the user changes the permissions or ownership, save
+them, and when creating the inodes again, restore those changes.
+
+Link: https://lkml.kernel.org/r/20231101172649.691841445@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 148 ++++++++++++++++++++++++++++++++++++++++++-----
+ fs/tracefs/internal.h | 16 +++++
+ 2 files changed, 151 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -40,6 +40,15 @@ static DEFINE_MUTEX(eventfs_mutex);
+ */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+
++/* Mode is unsigned short, use the upper bits for flags */
++enum {
++ EVENTFS_SAVE_MODE = BIT(16),
++ EVENTFS_SAVE_UID = BIT(17),
++ EVENTFS_SAVE_GID = BIT(18),
++};
++
++#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+@@ -47,8 +56,89 @@ static int dcache_dir_open_wrapper(struc
+ static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+ static int eventfs_release(struct inode *inode, struct file *file);
+
++static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
++{
++ unsigned int ia_valid = iattr->ia_valid;
++
++ if (ia_valid & ATTR_MODE) {
++ attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
++ (iattr->ia_mode & EVENTFS_MODE_MASK) |
++ EVENTFS_SAVE_MODE;
++ }
++ if (ia_valid & ATTR_UID) {
++ attr->mode |= EVENTFS_SAVE_UID;
++ attr->uid = iattr->ia_uid;
++ }
++ if (ia_valid & ATTR_GID) {
++ attr->mode |= EVENTFS_SAVE_GID;
++ attr->gid = iattr->ia_gid;
++ }
++}
++
++static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
++ struct iattr *iattr)
++{
++ const struct eventfs_entry *entry;
++ struct eventfs_inode *ei;
++ const char *name;
++ int ret;
++
++ mutex_lock(&eventfs_mutex);
++ ei = dentry->d_fsdata;
++ /* The LSB is set when the eventfs_inode is being freed */
++ if (((unsigned long)ei & 1UL) || ei->is_freed) {
++ /* Do not allow changes if the event is about to be removed. */
++ mutex_unlock(&eventfs_mutex);
++ return -ENODEV;
++ }
++
++ /* Preallocate the children mode array if necessary */
++ if (!(dentry->d_inode->i_mode & S_IFDIR)) {
++ if (!ei->entry_attrs) {
++ ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
++ GFP_KERNEL);
++ if (!ei->entry_attrs) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ }
++ }
++
++ ret = simple_setattr(idmap, dentry, iattr);
++ if (ret < 0)
++ goto out;
++
++ /*
++ * If this is a dir, then update the ei cache, only the file
++ * mode is saved in the ei->m_children, and the ownership is
++ * determined by the parent directory.
++ */
++ if (dentry->d_inode->i_mode & S_IFDIR) {
++ update_attr(&ei->attr, iattr);
++
++ } else {
++ name = dentry->d_name.name;
++
++ for (int i = 0; i < ei->nr_entries; i++) {
++ entry = &ei->entries[i];
++ if (strcmp(name, entry->name) == 0) {
++ update_attr(&ei->entry_attrs[i], iattr);
++ break;
++ }
++ }
++ }
++ out:
++ mutex_unlock(&eventfs_mutex);
++ return ret;
++}
++
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+ .lookup = eventfs_root_lookup,
++ .setattr = eventfs_set_attr,
++};
++
++static const struct inode_operations eventfs_file_inode_operations = {
++ .setattr = eventfs_set_attr,
+ };
+
+ static const struct file_operations eventfs_file_operations = {
+@@ -59,10 +149,30 @@ static const struct file_operations even
+ .release = eventfs_release,
+ };
+
++static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
++{
++ if (!attr) {
++ inode->i_mode = mode;
++ return;
++ }
++
++ if (attr->mode & EVENTFS_SAVE_MODE)
++ inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
++ else
++ inode->i_mode = mode;
++
++ if (attr->mode & EVENTFS_SAVE_UID)
++ inode->i_uid = attr->uid;
++
++ if (attr->mode & EVENTFS_SAVE_GID)
++ inode->i_gid = attr->gid;
++}
++
+ /**
+ * create_file - create a file in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
++ * @attr: saved attributes changed by user
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+@@ -72,6 +182,7 @@ static const struct file_operations even
+ * call.
+ */
+ static struct dentry *create_file(const char *name, umode_t mode,
++ struct eventfs_attr *attr,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+ {
+@@ -95,7 +206,10 @@ static struct dentry *create_file(const
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+- inode->i_mode = mode;
++ /* If the user updated the directory's attributes, use them */
++ update_inode_attr(inode, attr, mode);
++
++ inode->i_op = &eventfs_file_inode_operations;
+ inode->i_fop = fop;
+ inode->i_private = data;
+
+@@ -108,19 +222,19 @@ static struct dentry *create_file(const
+
+ /**
+ * create_dir - create a dir in the tracefs filesystem
+- * @name: the name of the file to create.
++ * @ei: the eventfs_inode that represents the directory to create
+ * @parent: parent dentry for this file.
+ *
+ * This function will create a dentry for a directory represented by
+ * a eventfs_inode.
+ */
+-static struct dentry *create_dir(const char *name, struct dentry *parent)
++static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
+ {
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+- dentry = eventfs_start_creating(name, parent);
++ dentry = eventfs_start_creating(ei->name, parent);
+ if (IS_ERR(dentry))
+ return dentry;
+
+@@ -128,7 +242,9 @@ static struct dentry *create_dir(const c
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
++ /* If the user updated the directory's attributes, use them */
++ update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
++
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
+@@ -146,6 +262,7 @@ static void free_ei(struct eventfs_inode
+ {
+ kfree_const(ei->name);
+ kfree(ei->d_children);
++ kfree(ei->entry_attrs);
+ kfree(ei);
+ }
+
+@@ -231,7 +348,7 @@ void eventfs_set_ei_status_free(struct t
+ /**
+ * create_file_dentry - create a dentry for a file of an eventfs_inode
+ * @ei: the eventfs_inode that the file will be created under
+- * @e_dentry: a pointer to the d_children[] of the @ei
++ * @idx: the index into the d_children[] of the @ei
+ * @parent: The parent dentry of the created file.
+ * @name: The name of the file to create
+ * @mode: The mode of the file.
+@@ -244,10 +361,12 @@ void eventfs_set_ei_status_free(struct t
+ * just do a dget() on it and return. Otherwise create the dentry and attach it.
+ */
+ static struct dentry *
+-create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
++create_file_dentry(struct eventfs_inode *ei, int idx,
+ struct dentry *parent, const char *name, umode_t mode, void *data,
+ const struct file_operations *fops, bool lookup)
+ {
++ struct eventfs_attr *attr = NULL;
++ struct dentry **e_dentry = &ei->d_children[idx];
+ struct dentry *dentry;
+ bool invalidate = false;
+
+@@ -264,13 +383,18 @@ create_file_dentry(struct eventfs_inode
+ mutex_unlock(&eventfs_mutex);
+ return *e_dentry;
+ }
++
++ /* ei->entry_attrs are protected by SRCU */
++ if (ei->entry_attrs)
++ attr = &ei->entry_attrs[idx];
++
+ mutex_unlock(&eventfs_mutex);
+
+ /* The lookup already has the parent->d_inode locked */
+ if (!lookup)
+ inode_lock(parent->d_inode);
+
+- dentry = create_file(name, mode, parent, data, fops);
++ dentry = create_file(name, mode, attr, parent, data, fops);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+@@ -378,7 +502,7 @@ create_dir_dentry(struct eventfs_inode *
+ if (!lookup)
+ inode_lock(parent->d_inode);
+
+- dentry = create_dir(ei->name, parent);
++ dentry = create_dir(ei, parent);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+@@ -495,8 +619,7 @@ static struct dentry *eventfs_root_looku
+ if (r <= 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+- create_file_dentry(ei, &ei->d_children[i],
+- ei_dentry, name, mode, cdata,
++ create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+ fops, true);
+ break;
+ }
+@@ -629,8 +752,7 @@ static int dcache_dir_open_wrapper(struc
+ r = entry->callback(name, &mode, &cdata, &fops);
+ if (r <= 0)
+ continue;
+- d = create_file_dentry(ei, &ei->d_children[i],
+- parent, name, mode, cdata, fops, false);
++ d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
+ if (d) {
+ ret = add_dentries(&dentries, d, cnt);
+ if (ret < 0)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -14,6 +14,18 @@ struct tracefs_inode {
+ };
+
+ /*
++ * struct eventfs_attr - cache the mode and ownership of a eventfs entry
++ * @mode: saved mode plus flags of what is saved
++ * @uid: saved uid if changed
++ * @gid: saved gid if changed
++ */
++struct eventfs_attr {
++ int mode;
++ kuid_t uid;
++ kgid_t gid;
++};
++
++/*
+ * struct eventfs_inode - hold the properties of the eventfs directories.
+ * @list: link list into the parent directory
+ * @entries: the array of entries representing the files in the directory
+@@ -22,6 +34,8 @@ struct tracefs_inode {
+ * @dentry: the dentry of the directory
+ * @d_parent: pointer to the parent's dentry
+ * @d_children: The array of dentries to represent the files when created
++ * @entry_attrs: Saved mode and ownership of the @d_children
++ * @attr: Saved mode and ownership of eventfs_inode itself
+ * @data: The private data to pass to the callbacks
+ * @is_freed: Flag set if the eventfs is on its way to be freed
+ * Note if is_freed is set, then dentry is corrupted.
+@@ -35,6 +49,8 @@ struct eventfs_inode {
+ struct dentry *dentry; /* Check is_freed to access */
+ struct dentry *d_parent;
+ struct dentry **d_children;
++ struct eventfs_attr *entry_attrs;
++ struct eventfs_attr attr;
+ void *data;
+ /*
+ * Union - used for deletion
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:48 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:46 -0500
+Subject: eventfs: Shortcut eventfs_iterate() by skipping entries already read
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120953.046426517@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 1de94b52d5e8d8b32f0252f14fad1f1edc2e71f1 upstream.
+
+As the ei->entries array is fixed for the duration of the eventfs_inode,
+it can be used to skip over already read entries in eventfs_iterate().
+
+That is, if ctx->pos is greater than zero, there's no reason in doing the
+loop across the ei->entries array for the entries less than ctx->pos.
+Instead, start the lookup of the entries at the current ctx->pos.
+
+Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.494956957@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -746,21 +746,15 @@ static int eventfs_iterate(struct file *
+ if (!ei || !ei_dentry)
+ goto out;
+
+- ret = 0;
+-
+ /*
+ * Need to create the dentries and inodes to have a consistent
+ * inode number.
+ */
+- for (i = 0; i < ei->nr_entries; i++) {
+- void *cdata = ei->data;
+-
+- if (c > 0) {
+- c--;
+- continue;
+- }
++ ret = 0;
+
+- ctx->pos++;
++ /* Start at 'c' to jump over already read entries */
++ for (i = c; i < ei->nr_entries; i++, ctx->pos++) {
++ void *cdata = ei->data;
+
+ entry = &ei->entries[i];
+ name = entry->name;
+@@ -769,7 +763,7 @@ static int eventfs_iterate(struct file *
+ /* If ei->is_freed then just bail here, nothing more to do */
+ if (ei->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+- goto out_dec;
++ goto out;
+ }
+ r = entry->callback(name, &mode, &cdata, &fops);
+ mutex_unlock(&eventfs_mutex);
+@@ -778,14 +772,17 @@ static int eventfs_iterate(struct file *
+
+ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+ if (!dentry)
+- goto out_dec;
++ goto out;
+ ino = dentry->d_inode->i_ino;
+ dput(dentry);
+
+ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+- goto out_dec;
++ goto out;
+ }
+
++ /* Subtract the skipped entries above */
++ c -= min((unsigned int)c, (unsigned int)ei->nr_entries);
++
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:08 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:41 -0500
+Subject: eventfs: Stop using dcache_readdir() for getdents()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.237926780@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 493ec81a8fb8e4ada6f223b8b73791a1280d4774 upstream.
+
+The eventfs creates dynamically allocated dentries and inodes. Using the
+dcache_readdir() logic for its own directory lookups requires hiding the
+cursor of the dcache logic and playing games to allow the dcache_readdir()
+to still have access to the cursor while the eventfs saved what it created
+and what it needs to release.
+
+Instead, just have eventfs have its own iterate_shared callback function
+that will fill in the dent entries. This simplifies the code quite a bit.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 194 +++++++++++++++--------------------------------
+ 1 file changed, 64 insertions(+), 130 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -52,9 +52,7 @@ enum {
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
+-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+-static int eventfs_release(struct inode *inode, struct file *file);
++static int eventfs_iterate(struct file *file, struct dir_context *ctx);
+
+ static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
+ {
+@@ -148,11 +146,9 @@ static const struct inode_operations eve
+ };
+
+ static const struct file_operations eventfs_file_operations = {
+- .open = dcache_dir_open_wrapper,
+ .read = generic_read_dir,
+- .iterate_shared = dcache_readdir_wrapper,
++ .iterate_shared = eventfs_iterate,
+ .llseek = generic_file_llseek,
+- .release = eventfs_release,
+ };
+
+ /* Return the evenfs_inode of the "events" directory */
+@@ -643,128 +639,87 @@ static struct dentry *eventfs_root_looku
+ return ret;
+ }
+
+-struct dentry_list {
+- void *cursor;
+- struct dentry **dentries;
+-};
+-
+-/**
+- * eventfs_release - called to release eventfs file/dir
+- * @inode: inode to be released
+- * @file: file to be released (not used)
+- */
+-static int eventfs_release(struct inode *inode, struct file *file)
+-{
+- struct tracefs_inode *ti;
+- struct dentry_list *dlist = file->private_data;
+- void *cursor;
+- int i;
+-
+- ti = get_tracefs(inode);
+- if (!(ti->flags & TRACEFS_EVENT_INODE))
+- return -EINVAL;
+-
+- if (WARN_ON_ONCE(!dlist))
+- return -EINVAL;
+-
+- for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
+- dput(dlist->dentries[i]);
+- }
+-
+- cursor = dlist->cursor;
+- kfree(dlist->dentries);
+- kfree(dlist);
+- file->private_data = cursor;
+- return dcache_dir_close(inode, file);
+-}
+-
+-static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
+-{
+- struct dentry **tmp;
+-
+- tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
+- if (!tmp)
+- return -1;
+- tmp[cnt] = d;
+- tmp[cnt + 1] = NULL;
+- *dentries = tmp;
+- return 0;
+-}
+-
+-/**
+- * dcache_dir_open_wrapper - eventfs open wrapper
+- * @inode: not used
+- * @file: dir to be opened (to create it's children)
+- *
+- * Used to dynamic create file/dir with-in @file, all the
+- * file/dir will be created. If already created then references
+- * will be increased
++/*
++ * Walk the children of a eventfs_inode to fill in getdents().
+ */
+-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
++static int eventfs_iterate(struct file *file, struct dir_context *ctx)
+ {
+ const struct file_operations *fops;
++ struct inode *f_inode = file_inode(file);
+ const struct eventfs_entry *entry;
+ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+- struct dentry_list *dlist;
+- struct dentry **dentries = NULL;
+- struct dentry *parent = file_dentry(file);
+- struct dentry *d;
+- struct inode *f_inode = file_inode(file);
+- const char *name = parent->d_name.name;
++ struct dentry *ei_dentry = NULL;
++ struct dentry *dentry;
++ const char *name;
+ umode_t mode;
+- void *data;
+- int cnt = 0;
+ int idx;
+- int ret;
+- int i;
+- int r;
++ int ret = -EINVAL;
++ int ino;
++ int i, r, c;
++
++ if (!dir_emit_dots(file, ctx))
++ return 0;
+
+ ti = get_tracefs(f_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+- if (WARN_ON_ONCE(file->private_data))
+- return -EINVAL;
++ c = ctx->pos - 2;
+
+ idx = srcu_read_lock(&eventfs_srcu);
+
+ mutex_lock(&eventfs_mutex);
+ ei = READ_ONCE(ti->private);
++ if (ei && !ei->is_freed)
++ ei_dentry = READ_ONCE(ei->dentry);
+ mutex_unlock(&eventfs_mutex);
+
+- if (!ei) {
+- srcu_read_unlock(&eventfs_srcu, idx);
+- return -EINVAL;
+- }
+-
+-
+- data = ei->data;
++ if (!ei || !ei_dentry)
++ goto out;
+
+- dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
+- if (!dlist) {
+- srcu_read_unlock(&eventfs_srcu, idx);
+- return -ENOMEM;
+- }
++ ret = 0;
+
+- inode_lock(parent->d_inode);
++ /*
++ * Need to create the dentries and inodes to have a consistent
++ * inode number.
++ */
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- d = create_dir_dentry(ei, ei_child, parent);
+- if (d) {
+- ret = add_dentries(&dentries, d, cnt);
+- dput(d);
+- if (ret < 0)
+- break;
+- cnt++;
++
++ if (c > 0) {
++ c--;
++ continue;
+ }
++
++ if (ei_child->is_freed)
++ continue;
++
++ name = ei_child->name;
++
++ dentry = create_dir_dentry(ei, ei_child, ei_dentry);
++ if (!dentry)
++ goto out;
++ ino = dentry->d_inode->i_ino;
++ dput(dentry);
++
++ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
++ goto out;
++ ctx->pos++;
+ }
+
+ for (i = 0; i < ei->nr_entries; i++) {
+- void *cdata = data;
++ void *cdata = ei->data;
++
++ if (c > 0) {
++ c--;
++ continue;
++ }
++
+ entry = &ei->entries[i];
+ name = entry->name;
++
+ mutex_lock(&eventfs_mutex);
+ /* If ei->is_freed, then the event itself may be too */
+ if (!ei->is_freed)
+@@ -774,42 +729,21 @@ static int dcache_dir_open_wrapper(struc
+ mutex_unlock(&eventfs_mutex);
+ if (r <= 0)
+ continue;
+- d = create_file_dentry(ei, i, parent, name, mode, cdata, fops);
+- if (d) {
+- ret = add_dentries(&dentries, d, cnt);
+- dput(d);
+- if (ret < 0)
+- break;
+- cnt++;
+- }
+- }
+- inode_unlock(parent->d_inode);
+- srcu_read_unlock(&eventfs_srcu, idx);
+- ret = dcache_dir_open(inode, file);
+
+- /*
+- * dcache_dir_open() sets file->private_data to a dentry cursor.
+- * Need to save that but also save all the dentries that were
+- * opened by this function.
+- */
+- dlist->cursor = file->private_data;
+- dlist->dentries = dentries;
+- file->private_data = dlist;
+- return ret;
+-}
++ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++ if (!dentry)
++ goto out;
++ ino = dentry->d_inode->i_ino;
++ dput(dentry);
+
+-/*
+- * This just sets the file->private_data back to the cursor and back.
+- */
+-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
+-{
+- struct dentry_list *dlist = file->private_data;
+- int ret;
++ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
++ goto out;
++ ctx->pos++;
++ }
++ ret = 1;
++ out:
++ srcu_read_unlock(&eventfs_srcu, idx);
+
+- file->private_data = dlist->cursor;
+- ret = dcache_readdir(file, ctx);
+- dlist->cursor = file->private_data;
+- file->private_data = dlist;
+ return ret;
+ }
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:41 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:23 -0500
+Subject: eventfs: Test for ei->is_freed when accessing ei->dentry
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>, Linux Kernel Functional Testing <lkft@linaro.org>, Naresh Kamboju <naresh.kamboju@linaro.org>, Beau Belgrave <beaub@linux.microsoft.com>
+Message-ID: <20240206120949.301438848@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 77a06c33a22d13f3a6e31f06f6ee6bca666e6898 upstream.
+
+The eventfs_inode (ei) is protected by SRCU, but the ei->dentry is not. It
+is protected by the eventfs_mutex. Anytime the eventfs_mutex is released,
+and access to the ei->dentry needs to be done, it should first check if
+ei->is_freed is set under the eventfs_mutex. If it is, then the ei->dentry
+is invalid and must not be used. The ei->dentry must only be accessed
+under the eventfs_mutex and after checking if ei->is_freed is set.
+
+When the ei is being freed, it will (under the eventfs_mutex) set is_freed
+and at the same time move the dentry to a free list to be cleared after
+the eventfs_mutex is released. This means that any access to the
+ei->dentry must check first if ei->is_freed is set, because if it is, then
+the dentry is on its way to be freed.
+
+Also add comments to describe this better.
+
+Link: https://lore.kernel.org/all/CA+G9fYt6pY+tMZEOg=SoEywQOe19fGP3uR15SGowkdK+_X85Cg@mail.gmail.com/
+Link: https://lore.kernel.org/all/CA+G9fYuDP3hVQ3t7FfrBAjd_WFVSurMgCepTxunSJf=MTe=6aA@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20231101172649.477608228@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Beau Belgrave <beaub@linux.microsoft.com>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Tested-by: Beau Belgrave <beaub@linux.microsoft.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 45 +++++++++++++++++++++++++++++++++++++++------
+ fs/tracefs/internal.h | 3 ++-
+ 2 files changed, 41 insertions(+), 7 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -24,7 +24,20 @@
+ #include <linux/delay.h>
+ #include "internal.h"
+
++/*
++ * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
++ * to the ei->dentry must be done under this mutex and after checking
++ * if ei->is_freed is not set. The ei->dentry is released under the
++ * mutex at the same time ei->is_freed is set. If ei->is_freed is set
++ * then the ei->dentry is invalid.
++ */
+ static DEFINE_MUTEX(eventfs_mutex);
++
++/*
++ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
++ * its parent's list and will have is_freed set (under eventfs_mutex).
++ * After the SRCU grace period is over, the ei may be freed.
++ */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+@@ -239,6 +252,10 @@ create_file_dentry(struct eventfs_inode
+ bool invalidate = false;
+
+ mutex_lock(&eventfs_mutex);
++ if (ei->is_freed) {
++ mutex_unlock(&eventfs_mutex);
++ return NULL;
++ }
+ /* If the e_dentry already has a dentry, use it */
+ if (*e_dentry) {
+ /* lookup does not need to up the ref count */
+@@ -312,6 +329,8 @@ static void eventfs_post_create_dir(stru
+ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+
++ lockdep_assert_held(&eventfs_mutex);
++
+ /* srcu lock already held */
+ /* fill parent-child relation */
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+@@ -325,6 +344,7 @@ static void eventfs_post_create_dir(stru
+
+ /**
+ * create_dir_dentry - Create a directory dentry for the eventfs_inode
++ * @pei: The eventfs_inode parent of ei.
+ * @ei: The eventfs_inode to create the directory for
+ * @parent: The dentry of the parent of this directory
+ * @lookup: True if this is called by the lookup code
+@@ -332,12 +352,17 @@ static void eventfs_post_create_dir(stru
+ * This creates and attaches a directory dentry to the eventfs_inode @ei.
+ */
+ static struct dentry *
+-create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool lookup)
++create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
++ struct dentry *parent, bool lookup)
+ {
+ bool invalidate = false;
+ struct dentry *dentry = NULL;
+
+ mutex_lock(&eventfs_mutex);
++ if (pei->is_freed || ei->is_freed) {
++ mutex_unlock(&eventfs_mutex);
++ return NULL;
++ }
+ if (ei->dentry) {
+ /* If the dentry already has a dentry, use it */
+ dentry = ei->dentry;
+@@ -440,7 +465,7 @@ static struct dentry *eventfs_root_looku
+ */
+ mutex_lock(&eventfs_mutex);
+ ei = READ_ONCE(ti->private);
+- if (ei)
++ if (ei && !ei->is_freed)
+ ei_dentry = READ_ONCE(ei->dentry);
+ mutex_unlock(&eventfs_mutex);
+
+@@ -454,7 +479,7 @@ static struct dentry *eventfs_root_looku
+ if (strcmp(ei_child->name, name) != 0)
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+- create_dir_dentry(ei_child, ei_dentry, true);
++ create_dir_dentry(ei, ei_child, ei_dentry, true);
+ created = true;
+ break;
+ }
+@@ -588,7 +613,7 @@ static int dcache_dir_open_wrapper(struc
+
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+- d = create_dir_dentry(ei_child, parent, false);
++ d = create_dir_dentry(ei, ei_child, parent, false);
+ if (d) {
+ ret = add_dentries(&dentries, d, cnt);
+ if (ret < 0)
+@@ -705,12 +730,20 @@ struct eventfs_inode *eventfs_create_dir
+ ei->nr_entries = size;
+ ei->data = data;
+ INIT_LIST_HEAD(&ei->children);
++ INIT_LIST_HEAD(&ei->list);
+
+ mutex_lock(&eventfs_mutex);
+- list_add_tail(&ei->list, &parent->children);
+- ei->d_parent = parent->dentry;
++ if (!parent->is_freed) {
++ list_add_tail(&ei->list, &parent->children);
++ ei->d_parent = parent->dentry;
++ }
+ mutex_unlock(&eventfs_mutex);
+
++ /* Was the parent freed? */
++ if (list_empty(&ei->list)) {
++ free_ei(ei);
++ ei = NULL;
++ }
+ return ei;
+ }
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -24,6 +24,7 @@ struct tracefs_inode {
+ * @d_children: The array of dentries to represent the files when created
+ * @data: The private data to pass to the callbacks
+ * @is_freed: Flag set if the eventfs is on its way to be freed
++ * Note if is_freed is set, then dentry is corrupted.
+ * @nr_entries: The number of items in @entries
+ */
+ struct eventfs_inode {
+@@ -31,7 +32,7 @@ struct eventfs_inode {
+ const struct eventfs_entry *entries;
+ const char *name;
+ struct list_head children;
+- struct dentry *dentry;
++ struct dentry *dentry; /* Check is_freed to access */
+ struct dentry *d_parent;
+ struct dentry **d_children;
+ void *data;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:14 -0500
+Subject: eventfs: Use ERR_CAST() in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Kees Cook <keescook@chromium.org>, Nathan Chancellor <nathan@kernel.org>
+Message-ID: <20240206120947.843106843@rostedt.homelinux.com>
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit b8a555dc31e5aa18d976de0bc228006e398a2e7d upstream.
+
+When building with clang and CONFIG_RANDSTRUCT_FULL=y, there is an error
+due to a cast in eventfs_create_events_dir():
+
+ fs/tracefs/event_inode.c:734:10: error: casting from randomized structure pointer type 'struct dentry *' to 'struct eventfs_inode *'
+ 734 | return (struct eventfs_inode *)dentry;
+ | ^
+ 1 error generated.
+
+Use the ERR_CAST() function to resolve the error, as it was designed for
+this exact situation (casting an error pointer to another type).
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231018-ftrace-fix-clang-randstruct-v1-1-338cb214abfb@kernel.org
+
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1947
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -731,7 +731,7 @@ struct eventfs_inode *eventfs_create_eve
+ return NULL;
+
+ if (IS_ERR(dentry))
+- return (struct eventfs_inode *)dentry;
++ return ERR_CAST(dentry);
+
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei)
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:13 -0500
+Subject: eventfs: Use eventfs_remove_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, kernel test robot <lkp@intel.com>
+Message-ID: <20240206120947.686070579@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 2819f23ac12ce93ff79ca7a54597df9a4a1f6331 upstream.
+
+The update to removing the eventfs_file changed the way the events top
+level directory was handled. Instead of returning a dentry, it now returns
+the eventfs_inode. In this changed, the removing of the events top level
+directory is not much different than removing any of the other
+directories. Because of this, the removal just called eventfs_remove_dir()
+instead of eventfs_remove_events_dir().
+
+Although eventfs_remove_dir() does the clean up, it misses out on the
+dget() of the ei->dentry done in eventfs_create_events_dir(). It makes
+more sense to match eventfs_create_events_dir() with a specific function
+eventfs_remove_events_dir() and this specific function can then perform
+the dput() to the dentry that had the dget() when it was created.
+
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202310051743.y9EobbUr-lkp@intel.com/
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 19 +++++++------------
+ include/linux/tracefs.h | 1 +
+ kernel/trace/trace_events.c | 2 +-
+ 3 files changed, 9 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -901,22 +901,17 @@ void eventfs_remove_dir(struct eventfs_i
+ }
+
+ /**
+- * eventfs_remove_events_dir - remove eventfs dir or file from list
+- * @dentry: events's dentry to be removed.
++ * eventfs_remove_events_dir - remove the top level eventfs directory
++ * @ei: the event_inode returned by eventfs_create_events_dir().
+ *
+- * This function remove events main directory
++ * This function removes the events main directory
+ */
+-void eventfs_remove_events_dir(struct dentry *dentry)
++void eventfs_remove_events_dir(struct eventfs_inode *ei)
+ {
+- struct tracefs_inode *ti;
++ struct dentry *dentry = ei->dentry;
+
+- if (!dentry || !dentry->d_inode)
+- return;
++ eventfs_remove_dir(ei);
+
+- ti = get_tracefs(dentry->d_inode);
+- if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+- return;
+-
+- d_invalidate(dentry);
++ /* Matches the dget() from eventfs_create_events_dir() */
+ dput(dentry);
+ }
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -41,6 +41,7 @@ struct eventfs_inode *eventfs_create_dir
+ const struct eventfs_entry *entries,
+ int size, void *data);
+
++void eventfs_remove_events_dir(struct eventfs_inode *ei);
+ void eventfs_remove_dir(struct eventfs_inode *ei);
+
+ struct dentry *tracefs_create_file(const char *name, umode_t mode,
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -3893,7 +3893,7 @@ int event_trace_del_tracer(struct trace_
+
+ down_write(&trace_event_sem);
+ __trace_remove_event_dirs(tr);
+- eventfs_remove_dir(tr->event_dir);
++ eventfs_remove_events_dir(tr->event_dir);
+ up_write(&trace_event_sem);
+
+ tr->event_dir = NULL;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:31 -0500
+Subject: eventfs: Use GFP_NOFS for allocation when eventfs_mutex is held
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.611237633@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 4763d635c907baed212664dc579dde1663bb2676 upstream.
+
+If memory reclaim happens, it can reclaim file system pages. The file
+system pages from eventfs may take the eventfs_mutex on reclaim. This
+means that allocation while holding the eventfs_mutex must not call into
+filesystem reclaim. A lockdep splat uncovered this.
+
+Link: https://lkml.kernel.org/r/20231121231112.373501894@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode")
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_i
+ if (!(dentry->d_inode->i_mode & S_IFDIR)) {
+ if (!ei->entry_attrs) {
+ ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
+- GFP_KERNEL);
++ GFP_NOFS);
+ if (!ei->entry_attrs) {
+ ret = -ENOMEM;
+ goto out;
+@@ -627,7 +627,7 @@ static int add_dentries(struct dentry **
+ {
+ struct dentry **tmp;
+
+- tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
++ tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
+ if (!tmp)
+ return -1;
+ tmp[cnt] = d;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:47 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:49 -0500
+Subject: eventfs: Use kcalloc() instead of kzalloc()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Erick Archer <erick.archer@gmx.com>, "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Message-ID: <20240206120953.546131126@rostedt.homelinux.com>
+
+From: Erick Archer <erick.archer@gmx.com>
+
+commit 1057066009c4325bb1d8430c9274894d0860e7c3 upstream.
+
+As noted in the "Deprecated Interfaces, Language Features, Attributes,
+and Conventions" documentation [1], size calculations (especially
+multiplication) should not be performed in memory allocator (or similar)
+function arguments due to the risk of them overflowing. This could lead
+to values wrapping around and a smaller allocation being made than the
+caller was expecting. Using those allocations could lead to linear
+overflows of heap memory and other misbehaviors.
+
+So, use the purpose specific kcalloc() function instead of the argument
+size * count in the kzalloc() function.
+
+[1] https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240115181658.4562-1-erick.archer@gmx.com
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Link: https://github.com/KSPP/linux/issues/162
+Signed-off-by: Erick Archer <erick.archer@gmx.com>
+Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -97,7 +97,7 @@ static int eventfs_set_attr(struct mnt_i
+ /* Preallocate the children mode array if necessary */
+ if (!(dentry->d_inode->i_mode & S_IFDIR)) {
+ if (!ei->entry_attrs) {
+- ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
++ ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs),
+ GFP_NOFS);
+ if (!ei->entry_attrs) {
+ ret = -ENOMEM;
+@@ -874,7 +874,7 @@ struct eventfs_inode *eventfs_create_dir
+ }
+
+ if (size) {
+- ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++ ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+ if (!ei->d_children) {
+ kfree_const(ei->name);
+ kfree(ei);
+@@ -941,7 +941,7 @@ struct eventfs_inode *eventfs_create_eve
+ goto fail;
+
+ if (size) {
+- ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++ ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+ if (!ei->d_children)
+ goto fail;
+ }
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:28 -0500
+Subject: eventfs: Use simple_recursive_removal() to clean up dentries
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120950.121281039@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 407c6726ca71b33330d2d6345d9ea7ebc02575e9 upstream.
+
+Looking at how dentry is removed via the tracefs system, I found that
+eventfs does not do everything that it did under tracefs. The tracefs
+removal of a dentry calls simple_recursive_removal() that does a lot more
+than a simple d_invalidate().
+
+As it should be a requirement that any eventfs_inode that has a dentry, so
+does its parent. When removing a eventfs_inode, if it has a dentry, a call
+to simple_recursive_removal() on that dentry should clean up all the
+dentries underneath it.
+
+Add WARN_ON_ONCE() to check for the parent having a dentry if any children
+do.
+
+Link: https://lore.kernel.org/all/20231101022553.GE1957730@ZenIV/
+Link: https://lkml.kernel.org/r/20231101172650.552471568@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 77 ++++++++++++++++++++++++++---------------------
+ fs/tracefs/internal.h | 2 -
+ 2 files changed, 44 insertions(+), 35 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -967,30 +967,29 @@ static void unhook_dentry(struct dentry
+ {
+ if (!dentry)
+ return;
+-
+- /* Keep the dentry from being freed yet (see eventfs_workfn()) */
++ /*
++ * Need to add a reference to the dentry that is expected by
++ * simple_recursive_removal(), which will include a dput().
++ */
+ dget(dentry);
+
+- dentry->d_fsdata = NULL;
+- d_invalidate(dentry);
+- mutex_lock(&eventfs_mutex);
+- /* dentry should now have at least a single reference */
+- WARN_ONCE((int)d_count(dentry) < 1,
+- "dentry %px (%s) less than one reference (%d) after invalidate\n",
+- dentry, dentry->d_name.name, d_count(dentry));
+- mutex_unlock(&eventfs_mutex);
++ /*
++ * Also add a reference for the dput() in eventfs_workfn().
++ * That is required as that dput() will free the ei after
++ * the SRCU grace period is over.
++ */
++ dget(dentry);
+ }
+
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
+- * @head: the list head to place the deleted @ei and children
+ * @level: prevent recursion from going more than 3 levels deep.
+ *
+ * This function recursively removes eventfs_inodes which
+ * contains info of files and/or directories.
+ */
+-static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
++static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
+ {
+ struct eventfs_inode *ei_child;
+
+@@ -1009,13 +1008,26 @@ static void eventfs_remove_rec(struct ev
+ /* search for nested folders or files */
+ list_for_each_entry_srcu(ei_child, &ei->children, list,
+ lockdep_is_held(&eventfs_mutex)) {
+- eventfs_remove_rec(ei_child, head, level + 1);
++ /* Children only have dentry if parent does */
++ WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
++ eventfs_remove_rec(ei_child, level + 1);
+ }
+
++
+ ei->is_freed = 1;
+
++ for (int i = 0; i < ei->nr_entries; i++) {
++ if (ei->d_children[i]) {
++ /* Children only have dentry if parent does */
++ WARN_ON_ONCE(!ei->dentry);
++ unhook_dentry(ei->d_children[i]);
++ }
++ }
++
++ unhook_dentry(ei->dentry);
++
+ list_del_rcu(&ei->list);
+- list_add_tail(&ei->del_list, head);
++ call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+ }
+
+ /**
+@@ -1026,30 +1038,22 @@ static void eventfs_remove_rec(struct ev
+ */
+ void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+- struct eventfs_inode *tmp;
+- LIST_HEAD(ei_del_list);
++ struct dentry *dentry;
+
+ if (!ei)
+ return;
+
+- /*
+- * Move the deleted eventfs_inodes onto the ei_del_list
+- * which will also set the is_freed value. Note, this has to be
+- * done under the eventfs_mutex, but the deletions of
+- * the dentries must be done outside the eventfs_mutex.
+- * Hence moving them to this temporary list.
+- */
+ mutex_lock(&eventfs_mutex);
+- eventfs_remove_rec(ei, &ei_del_list, 0);
++ dentry = ei->dentry;
++ eventfs_remove_rec(ei, 0);
+ mutex_unlock(&eventfs_mutex);
+
+- list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
+- for (int i = 0; i < ei->nr_entries; i++)
+- unhook_dentry(ei->d_children[i]);
+- unhook_dentry(ei->dentry);
+- list_del(&ei->del_list);
+- call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+- }
++ /*
++ * If any of the ei children has a dentry, then the ei itself
++ * must have a dentry.
++ */
++ if (dentry)
++ simple_recursive_removal(dentry, NULL);
+ }
+
+ /**
+@@ -1060,10 +1064,17 @@ void eventfs_remove_dir(struct eventfs_i
+ */
+ void eventfs_remove_events_dir(struct eventfs_inode *ei)
+ {
+- struct dentry *dentry = ei->dentry;
++ struct dentry *dentry;
+
++ dentry = ei->dentry;
+ eventfs_remove_dir(ei);
+
+- /* Matches the dget() from eventfs_create_events_dir() */
++ /*
++ * Matches the dget() done by tracefs_start_creating()
++ * in eventfs_create_events_dir() when it the dentry was
++ * created. In other words, it's a normal dentry that
++ * sticks around while the other ei->dentry are created
++ * and destroyed dynamically.
++ */
+ dput(dentry);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -55,12 +55,10 @@ struct eventfs_inode {
+ /*
+ * Union - used for deletion
+ * @llist: for calling dput() if needed after RCU
+- * @del_list: list of eventfs_inode to delete
+ * @rcu: eventfs_inode to delete in RCU
+ */
+ union {
+ struct llist_node llist;
+- struct list_head del_list;
+ struct rcu_head rcu;
+ };
+ unsigned int is_freed:1;
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:01 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:59 -0500
+Subject: eventfs: Warn if an eventfs_inode is freed without is_freed being set
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.173872948@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 5a49f996046ba947466bc7461e4b19c4d1daf978 upstream.
+
+There should never be a case where an evenfs_inode is being freed without
+is_freed being set. Add a WARN_ON_ONCE() if it ever happens. That would
+mean there was one too many put_ei()s.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161616.843551963@goodmis.org
+
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -73,6 +73,9 @@ enum {
+ static void release_ei(struct kref *ref)
+ {
+ struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
++
++ WARN_ON_ONCE(!ei->is_freed);
++
+ kfree(ei->entry_attrs);
+ kfree_const(ei->name);
+ kfree_rcu(ei, rcu);
+@@ -84,6 +87,14 @@ static inline void put_ei(struct eventfs
+ kref_put(&ei->kref, release_ei);
+ }
+
++static inline void free_ei(struct eventfs_inode *ei)
++{
++ if (ei) {
++ ei->is_freed = 1;
++ put_ei(ei);
++ }
++}
++
+ static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
+ {
+ if (ei)
+@@ -679,7 +690,7 @@ struct eventfs_inode *eventfs_create_dir
+
+ /* Was the parent freed? */
+ if (list_empty(&ei->list)) {
+- put_ei(ei);
++ free_ei(ei);
+ ei = NULL;
+ }
+ return ei;
+@@ -770,7 +781,7 @@ struct eventfs_inode *eventfs_create_eve
+ return ei;
+
+ fail:
+- put_ei(ei);
++ free_ei(ei);
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+@@ -801,9 +812,8 @@ static void eventfs_remove_rec(struct ev
+ list_for_each_entry(ei_child, &ei->children, list)
+ eventfs_remove_rec(ei_child, level + 1);
+
+- ei->is_freed = 1;
+ list_del(&ei->list);
+- put_ei(ei);
++ free_ei(ei);
+ }
+
+ /**
--- /dev/null
+From 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 upstream.
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock. This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock. Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock. One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation(). The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked. i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test. It is still tested
+*after* vfs_setlease() has succeeded. This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4944,10 +4944,8 @@ nfsd_break_deleg_cb(struct file_lock *fl
+ */
+ fl->fl_break_time = 0;
+
+- spin_lock(&fp->fi_lock);
+ fp->fi_had_conflict = true;
+ nfsd_break_one_deleg(dp);
+- spin_unlock(&fp->fi_lock);
+ return false;
+ }
+
+@@ -5556,12 +5554,13 @@ nfs4_set_delegation(struct nfsd4_open *o
+ if (status)
+ goto out_unlock;
+
++ status = -EAGAIN;
++ if (fp->fi_had_conflict)
++ goto out_unlock;
++
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+- if (fp->fi_had_conflict)
+- status = -EAGAIN;
+- else
+- status = hash_delegation_locked(dp, fp);
++ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:07 -0500
+Subject: Revert "eventfs: Check for NULL ef in eventfs_set_attr()"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.700644630@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit d8f492a059728bbd397defbc9b8d2f4159d869b5.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -113,14 +113,14 @@ static int eventfs_set_attr(struct mnt_i
+
+ mutex_lock(&eventfs_mutex);
+ ef = dentry->d_fsdata;
+- if (ef && ef->is_freed) {
++ if (ef->is_freed) {
+ /* Do not allow changes if the event is about to be removed. */
+ mutex_unlock(&eventfs_mutex);
+ return -ENODEV;
+ }
+
+ ret = simple_setattr(idmap, dentry, iattr);
+- if (!ret && ef)
++ if (!ret)
+ update_attr(ef, iattr);
+ mutex_unlock(&eventfs_mutex);
+ return ret;
--- /dev/null
+From stable+bounces-18936-greg=kroah.com@vger.kernel.org Tue Feb 6 13:14:29 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:09 -0500
+Subject: Revert "eventfs: Delete eventfs_inode when the last dentry is freed"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.030353224@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit ea4c30a0a73fb5cb2604539db550f1e620bb949c.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 150 +++++++++++++++++++++++------------------------
+ 1 file changed, 76 insertions(+), 74 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -53,12 +53,10 @@ struct eventfs_file {
+ const struct inode_operations *iop;
+ /*
+ * Union - used for deletion
+- * @llist: for calling dput() if needed after RCU
+ * @del_list: list of eventfs_file to delete
+ * @rcu: eventfs_file to delete in RCU
+ */
+ union {
+- struct llist_node llist;
+ struct list_head del_list;
+ struct rcu_head rcu;
+ };
+@@ -115,7 +113,8 @@ static int eventfs_set_attr(struct mnt_i
+
+ mutex_lock(&eventfs_mutex);
+ ef = dentry->d_fsdata;
+- if (ef->is_freed) {
++ /* The LSB is set when the eventfs_inode is being freed */
++ if (((unsigned long)ef & 1UL) || ef->is_freed) {
+ /* Do not allow changes if the event is about to be removed. */
+ mutex_unlock(&eventfs_mutex);
+ return -ENODEV;
+@@ -259,13 +258,6 @@ static struct dentry *create_dir(struct
+ return eventfs_end_creating(dentry);
+ }
+
+-static void free_ef(struct eventfs_file *ef)
+-{
+- kfree(ef->name);
+- kfree(ef->ei);
+- kfree(ef);
+-}
+-
+ /**
+ * eventfs_set_ef_status_free - set the ef->status to free
+ * @ti: the tracefs_inode of the dentry
+@@ -278,20 +270,34 @@ void eventfs_set_ef_status_free(struct t
+ {
+ struct tracefs_inode *ti_parent;
+ struct eventfs_inode *ei;
+- struct eventfs_file *ef;
++ struct eventfs_file *ef, *tmp;
+
+ /* The top level events directory may be freed by this */
+ if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
++ LIST_HEAD(ef_del_list);
++
+ mutex_lock(&eventfs_mutex);
++
+ ei = ti->private;
+
++ /* Record all the top level files */
++ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++ lockdep_is_held(&eventfs_mutex)) {
++ list_add_tail(&ef->del_list, &ef_del_list);
++ }
++
+ /* Nothing should access this, but just in case! */
+ ti->private = NULL;
++
+ mutex_unlock(&eventfs_mutex);
+
+- ef = dentry->d_fsdata;
+- if (ef)
+- free_ef(ef);
++ /* Now safely free the top level files and their children */
++ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++ list_del(&ef->del_list);
++ eventfs_remove(ef);
++ }
++
++ kfree(ei);
+ return;
+ }
+
+@@ -305,13 +311,16 @@ void eventfs_set_ef_status_free(struct t
+ if (!ef)
+ goto out;
+
+- if (ef->is_freed) {
+- free_ef(ef);
+- } else {
+- ef->dentry = NULL;
+- }
++ /*
++ * If ef was freed, then the LSB bit is set for d_fsdata.
++ * But this should not happen, as it should still have a
++ * ref count that prevents it. Warn in case it does.
++ */
++ if (WARN_ON_ONCE((unsigned long)ef & 1))
++ goto out;
+
+ dentry->d_fsdata = NULL;
++ ef->dentry = NULL;
+ out:
+ mutex_unlock(&eventfs_mutex);
+ }
+@@ -838,53 +847,13 @@ int eventfs_add_file(const char *name, u
+ return 0;
+ }
+
+-static LLIST_HEAD(free_list);
+-
+-static void eventfs_workfn(struct work_struct *work)
+-{
+- struct eventfs_file *ef, *tmp;
+- struct llist_node *llnode;
+-
+- llnode = llist_del_all(&free_list);
+- llist_for_each_entry_safe(ef, tmp, llnode, llist) {
+- /* This should only get here if it had a dentry */
+- if (!WARN_ON_ONCE(!ef->dentry))
+- dput(ef->dentry);
+- }
+-}
+-
+-static DECLARE_WORK(eventfs_work, eventfs_workfn);
+-
+-static void free_rcu_ef(struct rcu_head *head)
++static void free_ef(struct rcu_head *head)
+ {
+ struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+
+- if (ef->dentry) {
+- /* Do not free the ef until all references of dentry are gone */
+- if (llist_add(&ef->llist, &free_list))
+- queue_work(system_unbound_wq, &eventfs_work);
+- return;
+- }
+-
+- free_ef(ef);
+-}
+-
+-static void unhook_dentry(struct dentry *dentry)
+-{
+- if (!dentry)
+- return;
+-
+- /* Keep the dentry from being freed yet (see eventfs_workfn()) */
+- dget(dentry);
+-
+- dentry->d_fsdata = NULL;
+- d_invalidate(dentry);
+- mutex_lock(&eventfs_mutex);
+- /* dentry should now have at least a single reference */
+- WARN_ONCE((int)d_count(dentry) < 1,
+- "dentry %px (%s) less than one reference (%d) after invalidate\n",
+- dentry, dentry->d_name.name, d_count(dentry));
+- mutex_unlock(&eventfs_mutex);
++ kfree(ef->name);
++ kfree(ef->ei);
++ kfree(ef);
+ }
+
+ /**
+@@ -936,25 +905,58 @@ void eventfs_remove(struct eventfs_file
+ {
+ struct eventfs_file *tmp;
+ LIST_HEAD(ef_del_list);
++ struct dentry *dentry_list = NULL;
++ struct dentry *dentry;
+
+ if (!ef)
+ return;
+
+- /*
+- * Move the deleted eventfs_inodes onto the ei_del_list
+- * which will also set the is_freed value. Note, this has to be
+- * done under the eventfs_mutex, but the deletions of
+- * the dentries must be done outside the eventfs_mutex.
+- * Hence moving them to this temporary list.
+- */
+ mutex_lock(&eventfs_mutex);
+ eventfs_remove_rec(ef, &ef_del_list, 0);
++ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++ if (ef->dentry) {
++ unsigned long ptr = (unsigned long)dentry_list;
++
++ /* Keep the dentry from being freed yet */
++ dget(ef->dentry);
++
++ /*
++ * Paranoid: The dget() above should prevent the dentry
++ * from being freed and calling eventfs_set_ef_status_free().
++ * But just in case, set the link list LSB pointer to 1
++ * and have eventfs_set_ef_status_free() check that to
++ * make sure that if it does happen, it will not think
++ * the d_fsdata is an event_file.
++ *
++ * For this to work, no event_file should be allocated
++ * on a odd space, as the ef should always be allocated
++ * to be at least word aligned. Check for that too.
++ */
++ WARN_ON_ONCE(ptr & 1);
++
++ ef->dentry->d_fsdata = (void *)(ptr | 1);
++ dentry_list = ef->dentry;
++ ef->dentry = NULL;
++ }
++ call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
++ }
+ mutex_unlock(&eventfs_mutex);
+
+- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+- unhook_dentry(ef->dentry);
+- list_del(&ef->del_list);
+- call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++ while (dentry_list) {
++ unsigned long ptr;
++
++ dentry = dentry_list;
++ ptr = (unsigned long)dentry->d_fsdata & ~1UL;
++ dentry_list = (struct dentry *)ptr;
++ dentry->d_fsdata = NULL;
++ d_invalidate(dentry);
++ mutex_lock(&eventfs_mutex);
++ /* dentry should now have at least a single reference */
++ WARN_ONCE((int)d_count(dentry) < 1,
++ "dentry %p less than one reference (%d) after invalidate\n",
++ dentry, d_count(dentry));
++ mutex_unlock(&eventfs_mutex);
++ dput(dentry);
+ }
+ }
+
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:06 -0500
+Subject: Revert "eventfs: Do not allow NULL parent to eventfs_start_creating()"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.536298441@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 6abb8c223ce12078a0f2c129656a13338dfe960b.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -509,15 +509,20 @@ struct dentry *eventfs_start_creating(co
+ struct dentry *dentry;
+ int error;
+
+- /* Must always have a parent. */
+- if (WARN_ON_ONCE(!parent))
+- return ERR_PTR(-EINVAL);
+-
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
++ /*
++ * If the parent is not specified, we create it in the root.
++ * We need the root dentry to do this, which is in the super
++ * block. A pointer to that is in the struct vfsmount that we
++ * have around.
++ */
++ if (!parent)
++ parent = tracefs_mount->mnt_root;
++
+ if (unlikely(IS_DEADDIR(parent->d_inode)))
+ dentry = ERR_PTR(-ENOENT);
+ else
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:11 -0500
+Subject: Revert "eventfs: Remove "is_freed" union with rcu head"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.351905829@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit fa18a8a0539b02cc621938091691f0b73f0b1288.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -38,7 +38,6 @@ struct eventfs_inode {
+ * @fop: file_operations for file or directory
+ * @iop: inode_operations for file or directory
+ * @data: something that the caller will want to get to later on
+- * @is_freed: Flag set if the eventfs is on its way to be freed
+ * @mode: the permission that the file or directory should have
+ */
+ struct eventfs_file {
+@@ -53,14 +52,15 @@ struct eventfs_file {
+ * Union - used for deletion
+ * @del_list: list of eventfs_file to delete
+ * @rcu: eventfs_file to delete in RCU
++ * @is_freed: node is freed if one of the above is set
+ */
+ union {
+ struct list_head del_list;
+ struct rcu_head rcu;
++ unsigned long is_freed;
+ };
+ void *data;
+- unsigned int is_freed:1;
+- unsigned int mode:31;
++ umode_t mode;
+ };
+
+ static DEFINE_MUTEX(eventfs_mutex);
+@@ -814,8 +814,6 @@ static void eventfs_remove_rec(struct ev
+ }
+ }
+
+- ef->is_freed = 1;
+-
+ list_del_rcu(&ef->list);
+ list_add_tail(&ef->del_list, head);
+ }
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:10 -0500
+Subject: Revert "eventfs: Save ownership and mode"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.186364236@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 9aaee3eebc91dd9ccebf6b6bc8a5f59d04ef718b.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 107 +++++++----------------------------------------
+ 1 file changed, 16 insertions(+), 91 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -40,8 +40,6 @@ struct eventfs_inode {
+ * @data: something that the caller will want to get to later on
+ * @is_freed: Flag set if the eventfs is on its way to be freed
+ * @mode: the permission that the file or directory should have
+- * @uid: saved uid if changed
+- * @gid: saved gid if changed
+ */
+ struct eventfs_file {
+ const char *name;
+@@ -63,22 +61,11 @@ struct eventfs_file {
+ void *data;
+ unsigned int is_freed:1;
+ unsigned int mode:31;
+- kuid_t uid;
+- kgid_t gid;
+ };
+
+ static DEFINE_MUTEX(eventfs_mutex);
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+
+-/* Mode is unsigned short, use the upper bits for flags */
+-enum {
+- EVENTFS_SAVE_MODE = BIT(16),
+- EVENTFS_SAVE_UID = BIT(17),
+- EVENTFS_SAVE_GID = BIT(18),
+-};
+-
+-#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
+-
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+@@ -86,54 +73,8 @@ static int dcache_dir_open_wrapper(struc
+ static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+ static int eventfs_release(struct inode *inode, struct file *file);
+
+-static void update_attr(struct eventfs_file *ef, struct iattr *iattr)
+-{
+- unsigned int ia_valid = iattr->ia_valid;
+-
+- if (ia_valid & ATTR_MODE) {
+- ef->mode = (ef->mode & ~EVENTFS_MODE_MASK) |
+- (iattr->ia_mode & EVENTFS_MODE_MASK) |
+- EVENTFS_SAVE_MODE;
+- }
+- if (ia_valid & ATTR_UID) {
+- ef->mode |= EVENTFS_SAVE_UID;
+- ef->uid = iattr->ia_uid;
+- }
+- if (ia_valid & ATTR_GID) {
+- ef->mode |= EVENTFS_SAVE_GID;
+- ef->gid = iattr->ia_gid;
+- }
+-}
+-
+-static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
+- struct iattr *iattr)
+-{
+- struct eventfs_file *ef;
+- int ret;
+-
+- mutex_lock(&eventfs_mutex);
+- ef = dentry->d_fsdata;
+- /* The LSB is set when the eventfs_inode is being freed */
+- if (((unsigned long)ef & 1UL) || ef->is_freed) {
+- /* Do not allow changes if the event is about to be removed. */
+- mutex_unlock(&eventfs_mutex);
+- return -ENODEV;
+- }
+-
+- ret = simple_setattr(idmap, dentry, iattr);
+- if (!ret)
+- update_attr(ef, iattr);
+- mutex_unlock(&eventfs_mutex);
+- return ret;
+-}
+-
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+ .lookup = eventfs_root_lookup,
+- .setattr = eventfs_set_attr,
+-};
+-
+-static const struct inode_operations eventfs_file_inode_operations = {
+- .setattr = eventfs_set_attr,
+ };
+
+ static const struct file_operations eventfs_file_operations = {
+@@ -144,20 +85,10 @@ static const struct file_operations even
+ .release = eventfs_release,
+ };
+
+-static void update_inode_attr(struct inode *inode, struct eventfs_file *ef)
+-{
+- inode->i_mode = ef->mode & EVENTFS_MODE_MASK;
+-
+- if (ef->mode & EVENTFS_SAVE_UID)
+- inode->i_uid = ef->uid;
+-
+- if (ef->mode & EVENTFS_SAVE_GID)
+- inode->i_gid = ef->gid;
+-}
+-
+ /**
+ * create_file - create a file in the tracefs filesystem
+- * @ef: the eventfs_file
++ * @name: the name of the file to create.
++ * @mode: the permission that the file should have.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+@@ -173,7 +104,7 @@ static void update_inode_attr(struct ino
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+-static struct dentry *create_file(struct eventfs_file *ef,
++static struct dentry *create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+ {
+@@ -181,13 +112,13 @@ static struct dentry *create_file(struct
+ struct dentry *dentry;
+ struct inode *inode;
+
+- if (!(ef->mode & S_IFMT))
+- ef->mode |= S_IFREG;
++ if (!(mode & S_IFMT))
++ mode |= S_IFREG;
+
+- if (WARN_ON_ONCE(!S_ISREG(ef->mode)))
++ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return NULL;
+
+- dentry = eventfs_start_creating(ef->name, parent);
++ dentry = eventfs_start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return dentry;
+@@ -196,10 +127,7 @@ static struct dentry *create_file(struct
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+- /* If the user updated the directory's attributes, use them */
+- update_inode_attr(inode, ef);
+-
+- inode->i_op = &eventfs_file_inode_operations;
++ inode->i_mode = mode;
+ inode->i_fop = fop;
+ inode->i_private = data;
+
+@@ -212,7 +140,7 @@ static struct dentry *create_file(struct
+
+ /**
+ * create_dir - create a dir in the tracefs filesystem
+- * @ei: the eventfs_inode that represents the directory to create
++ * @name: the name of the file to create.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ *
+@@ -227,14 +155,13 @@ static struct dentry *create_file(struct
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+-static struct dentry *create_dir(struct eventfs_file *ef,
+- struct dentry *parent, void *data)
++static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
+ {
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+- dentry = eventfs_start_creating(ef->name, parent);
++ dentry = eventfs_start_creating(name, parent);
+ if (IS_ERR(dentry))
+ return dentry;
+
+@@ -242,8 +169,7 @@ static struct dentry *create_dir(struct
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+- update_inode_attr(inode, ef);
+-
++ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+ inode->i_private = data;
+@@ -380,9 +306,10 @@ create_dentry(struct eventfs_file *ef, s
+ inode_lock(parent->d_inode);
+
+ if (ef->ei)
+- dentry = create_dir(ef, parent, ef->data);
++ dentry = create_dir(ef->name, parent, ef->data);
+ else
+- dentry = create_file(ef, parent, ef->data, ef->fop);
++ dentry = create_file(ef->name, ef->mode, parent,
++ ef->data, ef->fop);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+@@ -548,7 +475,6 @@ static int dcache_dir_open_wrapper(struc
+ if (d) {
+ struct dentry **tmp;
+
+-
+ tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+ if (!tmp)
+ break;
+@@ -623,14 +549,13 @@ static struct eventfs_file *eventfs_prep
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&ef->ei->e_top_files);
+- ef->mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ } else {
+ ef->ei = NULL;
+- ef->mode = mode;
+ }
+
+ ef->iop = iop;
+ ef->fop = fop;
++ ef->mode = mode;
+ ef->data = data;
+ return ef;
+ }
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:08 -0500
+Subject: Revert "eventfs: Use simple_recursive_removal() to clean up dentries"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.866568635@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 055907ad2c14838c90d63297f7bab8d180a5d844.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 71 +++++++++++++++++++++++++----------------------
+ 1 file changed, 38 insertions(+), 33 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -54,10 +54,12 @@ struct eventfs_file {
+ /*
+ * Union - used for deletion
+ * @llist: for calling dput() if needed after RCU
++ * @del_list: list of eventfs_file to delete
+ * @rcu: eventfs_file to delete in RCU
+ */
+ union {
+ struct llist_node llist;
++ struct list_head del_list;
+ struct rcu_head rcu;
+ };
+ void *data;
+@@ -274,6 +276,7 @@ static void free_ef(struct eventfs_file
+ */
+ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
++ struct tracefs_inode *ti_parent;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+
+@@ -294,6 +297,10 @@ void eventfs_set_ef_status_free(struct t
+
+ mutex_lock(&eventfs_mutex);
+
++ ti_parent = get_tracefs(dentry->d_parent->d_inode);
++ if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
++ goto out;
++
+ ef = dentry->d_fsdata;
+ if (!ef)
+ goto out;
+@@ -866,29 +873,30 @@ static void unhook_dentry(struct dentry
+ {
+ if (!dentry)
+ return;
+- /*
+- * Need to add a reference to the dentry that is expected by
+- * simple_recursive_removal(), which will include a dput().
+- */
+- dget(dentry);
+
+- /*
+- * Also add a reference for the dput() in eventfs_workfn().
+- * That is required as that dput() will free the ei after
+- * the SRCU grace period is over.
+- */
++ /* Keep the dentry from being freed yet (see eventfs_workfn()) */
+ dget(dentry);
++
++ dentry->d_fsdata = NULL;
++ d_invalidate(dentry);
++ mutex_lock(&eventfs_mutex);
++ /* dentry should now have at least a single reference */
++ WARN_ONCE((int)d_count(dentry) < 1,
++ "dentry %px (%s) less than one reference (%d) after invalidate\n",
++ dentry, dentry->d_name.name, d_count(dentry));
++ mutex_unlock(&eventfs_mutex);
+ }
+
+ /**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ef: eventfs_file to be removed.
++ * @head: to create list of eventfs_file to be deleted
+ * @level: to check recursion depth
+ *
+ * The helper function eventfs_remove_rec() is used to clean up and free the
+ * associated data from eventfs for both of the added functions.
+ */
+-static void eventfs_remove_rec(struct eventfs_file *ef, int level)
++static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
+ {
+ struct eventfs_file *ef_child;
+
+@@ -908,16 +916,14 @@ static void eventfs_remove_rec(struct ev
+ /* search for nested folders or files */
+ list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+ lockdep_is_held(&eventfs_mutex)) {
+- eventfs_remove_rec(ef_child, level + 1);
++ eventfs_remove_rec(ef_child, head, level + 1);
+ }
+ }
+
+ ef->is_freed = 1;
+
+- unhook_dentry(ef->dentry);
+-
+ list_del_rcu(&ef->list);
+- call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++ list_add_tail(&ef->del_list, head);
+ }
+
+ /**
+@@ -928,22 +934,28 @@ static void eventfs_remove_rec(struct ev
+ */
+ void eventfs_remove(struct eventfs_file *ef)
+ {
+- struct dentry *dentry;
++ struct eventfs_file *tmp;
++ LIST_HEAD(ef_del_list);
+
+ if (!ef)
+ return;
+
++ /*
++ * Move the deleted eventfs_inodes onto the ei_del_list
++ * which will also set the is_freed value. Note, this has to be
++ * done under the eventfs_mutex, but the deletions of
++ * the dentries must be done outside the eventfs_mutex.
++ * Hence moving them to this temporary list.
++ */
+ mutex_lock(&eventfs_mutex);
+- dentry = ef->dentry;
+- eventfs_remove_rec(ef, 0);
++ eventfs_remove_rec(ef, &ef_del_list, 0);
+ mutex_unlock(&eventfs_mutex);
+
+- /*
+- * If any of the ei children has a dentry, then the ei itself
+- * must have a dentry.
+- */
+- if (dentry)
+- simple_recursive_removal(dentry, NULL);
++ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++ unhook_dentry(ef->dentry);
++ list_del(&ef->del_list);
++ call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++ }
+ }
+
+ /**
+@@ -954,8 +966,6 @@ void eventfs_remove(struct eventfs_file
+ */
+ void eventfs_remove_events_dir(struct dentry *dentry)
+ {
+- struct eventfs_file *ef_child;
+- struct eventfs_inode *ei;
+ struct tracefs_inode *ti;
+
+ if (!dentry || !dentry->d_inode)
+@@ -965,11 +975,6 @@ void eventfs_remove_events_dir(struct de
+ if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+ return;
+
+- mutex_lock(&eventfs_mutex);
+- ei = ti->private;
+- list_for_each_entry_srcu(ef_child, &ei->e_top_files, list,
+- lockdep_is_held(&eventfs_mutex)) {
+- eventfs_remove_rec(ef_child, 0);
+- }
+- mutex_unlock(&eventfs_mutex);
++ d_invalidate(dentry);
++ dput(dentry);
+ }
netfilter-ipset-fix-performance-regression-in-swap-operation.patch
netfilter-ipset-missing-gc-cancellations-fixed.patch
parisc-fix-random-data-corruption-from-exception-handler.patch
+revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
+revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch
+revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
+revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
+revert-eventfs-save-ownership-and-mode.patch
+revert-eventfs-remove-is_freed-union-with-rcu-head.patch
+eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch
+eventfs-use-eventfs_remove_events_dir.patch
+eventfs-use-err_cast-in-eventfs_create_events_dir.patch
+eventfs-fix-failure-path-in-eventfs_create_events_dir.patch
+tracefs-eventfs-modify-mismatched-function-name.patch
+eventfs-fix-warn_on-in-create_file_dentry.patch
+eventfs-fix-typo-in-eventfs_inode-union-comment.patch
+eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch
+eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch
+eventfs-remove-is_freed-union-with-rcu-head.patch
+eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch
+eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch
+eventfs-save-ownership-and-mode.patch
+eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch
+eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
+eventfs-remove-special-processing-of-dput-of-events-directory.patch
+eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
+eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch
+eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch
+eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch
+eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch
+eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
+eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch
+eventfs-fix-events-beyond-name_max-blocking-tasks.patch
+eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch
+eventfs-fix-file-and-directory-uid-and-gid-ownership.patch
+tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch
+eventfs-fix-bitwise-fields-for-is_events.patch
+eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch
+eventfs-stop-using-dcache_readdir-for-getdents.patch
+tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch
+eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch
+eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch
+eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch
+eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch
+eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch
+eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch
+eventfs-use-kcalloc-instead-of-kzalloc.patch
+eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch
+tracefs-remove-stale-update_gid-code.patch
+tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch
+eventfs-initialize-the-tracefs-inode-properly.patch
+tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch
+tracefs-dentry-lookup-crapectomy.patch
+eventfs-remove-unused-d_parent-pointer-field.patch
+eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch
+eventfs-get-rid-of-dentry-pointers-without-refcounts.patch
+eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch
+eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch
+eventfs-remove-fsnotify-functions-from-lookup.patch
+eventfs-keep-all-directory-links-at-1.patch
+nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:29 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:54 -0500
+Subject: tracefs: Avoid using the ei->dentry pointer unnecessarily
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.362950692@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 99c001cb617df409dac275a059d6c3f187a2da7a upstream.
+
+The eventfs_find_events() code tries to walk up the tree to find the
+event directory that a dentry belongs to, in order to then find the
+eventfs inode that is associated with that event directory.
+
+However, it uses an odd combination of walking the dentry parent,
+looking up the eventfs inode associated with that, and then looking up
+the dentry from there. Repeat.
+
+But the code shouldn't have back-pointers to dentries in the first
+place, and it should just walk the dentry parenthood chain directly.
+
+Similarly, 'set_top_events_ownership()' looks up the dentry from the
+eventfs inode, but the only reason it wants a dentry is to look up the
+superblock in order to look up the root dentry.
+
+But it already has the real filesystem inode, which has that same
+superblock pointer. So just pass in the superblock pointer using the
+information that's already there, instead of looking up extraneous data
+that is irrelevant.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.638645365@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 26 ++++++++++++--------------
+ 1 file changed, 12 insertions(+), 14 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -156,33 +156,30 @@ static int eventfs_set_attr(struct mnt_i
+ return ret;
+ }
+
+-static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
++static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
+ {
+- struct inode *inode;
++ struct inode *root;
+
+ /* Only update if the "events" was on the top level */
+ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+ return;
+
+ /* Get the tracefs root inode. */
+- inode = d_inode(dentry->d_sb->s_root);
+- ei->attr.uid = inode->i_uid;
+- ei->attr.gid = inode->i_gid;
++ root = d_inode(sb->s_root);
++ ei->attr.uid = root->i_uid;
++ ei->attr.gid = root->i_gid;
+ }
+
+ static void set_top_events_ownership(struct inode *inode)
+ {
+ struct tracefs_inode *ti = get_tracefs(inode);
+ struct eventfs_inode *ei = ti->private;
+- struct dentry *dentry;
+
+ /* The top events directory doesn't get automatically updated */
+ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+ return;
+
+- dentry = ei->dentry;
+-
+- update_top_events_attr(ei, dentry);
++ update_top_events_attr(ei, inode->i_sb);
+
+ if (!(ei->attr.mode & EVENTFS_SAVE_UID))
+ inode->i_uid = ei->attr.uid;
+@@ -235,8 +232,10 @@ static struct eventfs_inode *eventfs_fin
+
+ mutex_lock(&eventfs_mutex);
+ do {
+- /* The parent always has an ei, except for events itself */
+- ei = dentry->d_parent->d_fsdata;
++ // The parent is stable because we do not do renames
++ dentry = dentry->d_parent;
++ // ... and directories always have d_fsdata
++ ei = dentry->d_fsdata;
+
+ /*
+ * If the ei is being freed, the ownership of the children
+@@ -246,12 +245,11 @@ static struct eventfs_inode *eventfs_fin
+ ei = NULL;
+ break;
+ }
+-
+- dentry = ei->dentry;
++ // Walk upwards until you find the events inode
+ } while (!ei->is_events);
+ mutex_unlock(&eventfs_mutex);
+
+- update_top_events_attr(ei, dentry);
++ update_top_events_attr(ei, dentry->d_sb);
+
+ return ei;
+ }
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:50 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:38 -0500
+Subject: tracefs: Check for dentry->d_inode exists in set_gid()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, "Ubisectech Sirius" <bugreport@ubisectech.com>
+Message-ID: <20240206120951.738927603@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit ad579864637af46447208254719943179b69d41a upstream.
+
+If a getdents() is called on the tracefs directory but does not get all
+the files, it can leave a "cursor" dentry in the d_subdirs list of tracefs
+dentry. This cursor dentry does not have a d_inode for it. Before
+referencing tracefs_inode from the dentry, the d_inode must first be
+checked if it has content. If not, then it's not a tracefs_inode and can
+be ignored.
+
+The following caused a crash:
+
+ #define getdents64(fd, dirp, count) syscall(SYS_getdents64, fd, dirp, count)
+ #define BUF_SIZE 256
+ #define TDIR "/tmp/file0"
+
+ int main(void)
+ {
+ char buf[BUF_SIZE];
+ int fd;
+ int n;
+
+ mkdir(TDIR, 0777);
+ mount(NULL, TDIR, "tracefs", 0, NULL);
+ fd = openat(AT_FDCWD, TDIR, O_RDONLY);
+ n = getdents64(fd, buf, BUF_SIZE);
+ ret = mount(NULL, TDIR, NULL, MS_NOSUID|MS_REMOUNT|MS_RELATIME|MS_LAZYTIME,
+ "gid=1000");
+ return 0;
+ }
+
+That's because the 256 BUF_SIZE was not big enough to read all the
+dentries of the tracefs file system and it left a "cursor" dentry in the
+subdirs of the tracefs root inode. Then on remounting with "gid=1000",
+it would cause an iteration of all dentries which hit:
+
+ ti = get_tracefs(dentry->d_inode);
+ if (ti && (ti->flags & TRACEFS_EVENT_INODE))
+ eventfs_update_gid(dentry, gid);
+
+Which crashed because of the dereference of the cursor dentry which had a NULL
+d_inode.
+
+In the subdir loop of the dentry lookup of set_gid(), if a child has a
+NULL d_inode, simply skip it.
+
+Link: https://lore.kernel.org/all/20240102135637.3a21fb10@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240102151249.05da244d@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7e8358edf503e ("eventfs: Fix file and directory uid and gid ownership")
+Reported-by: "Ubisectech Sirius" <bugreport@ubisectech.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -215,6 +215,10 @@ resume:
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+
++ /* Note, getdents() can add a cursor dentry with no inode */
++ if (!dentry->d_inode)
++ continue;
++
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+ change_gid(dentry, gid);
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:43 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:55 -0500
+Subject: tracefs: dentry lookup crapectomy
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.518365320@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 49304c2b93e4f7468b51ef717cbe637981397115 upstream.
+
+The dentry lookup for eventfs files was very broken, and had lots of
+signs of the old situation where the filesystem names were all created
+statically in the dentry tree, rather than being looked up dynamically
+based on the eventfs data structures.
+
+You could see it in the naming - how it claimed to "create" dentries
+rather than just look up the dentries that were given it.
+
+You could see it in various nonsensical and very incorrect operations,
+like using "simple_lookup()" on the dentries that were passed in, which
+only results in those dentries becoming negative dentries. Which meant
+that any other lookup would possibly return ENOENT if it saw that
+negative dentry before the data was then later filled in.
+
+You could see it in the immense amount of nonsensical code that didn't
+actually just do lookups.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131233227.73db55e1@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 275 ++++++++---------------------------------------
+ fs/tracefs/inode.c | 69 -----------
+ fs/tracefs/internal.h | 3
+ 3 files changed, 50 insertions(+), 297 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -230,7 +230,6 @@ static struct eventfs_inode *eventfs_fin
+ {
+ struct eventfs_inode *ei;
+
+- mutex_lock(&eventfs_mutex);
+ do {
+ // The parent is stable because we do not do renames
+ dentry = dentry->d_parent;
+@@ -247,7 +246,6 @@ static struct eventfs_inode *eventfs_fin
+ }
+ // Walk upwards until you find the events inode
+ } while (!ei->is_events);
+- mutex_unlock(&eventfs_mutex);
+
+ update_top_events_attr(ei, dentry->d_sb);
+
+@@ -280,11 +278,10 @@ static void update_inode_attr(struct den
+ }
+
+ /**
+- * create_file - create a file in the tracefs filesystem
+- * @name: the name of the file to create.
++ * lookup_file - look up a file in the tracefs filesystem
++ * @dentry: the dentry to look up
+ * @mode: the permission that the file should have.
+ * @attr: saved attributes changed by user
+- * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+@@ -292,13 +289,13 @@ static void update_inode_attr(struct den
+ * directory. The inode.i_private pointer will point to @data in the open()
+ * call.
+ */
+-static struct dentry *create_file(const char *name, umode_t mode,
++static struct dentry *lookup_file(struct dentry *dentry,
++ umode_t mode,
+ struct eventfs_attr *attr,
+- struct dentry *parent, void *data,
++ void *data,
+ const struct file_operations *fop)
+ {
+ struct tracefs_inode *ti;
+- struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+@@ -307,15 +304,9 @@ static struct dentry *create_file(const
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return NULL;
+
+- WARN_ON_ONCE(!parent);
+- dentry = eventfs_start_creating(name, parent);
+-
+- if (IS_ERR(dentry))
+- return dentry;
+-
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+- return eventfs_failed_creating(dentry);
++ return ERR_PTR(-ENOMEM);
+
+ /* If the user updated the directory's attributes, use them */
+ update_inode_attr(dentry, inode, attr, mode);
+@@ -329,32 +320,29 @@ static struct dentry *create_file(const
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+- d_instantiate(dentry, inode);
++
++ d_add(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+- return eventfs_end_creating(dentry);
++ return dentry;
+ };
+
+ /**
+- * create_dir - create a dir in the tracefs filesystem
++ * lookup_dir_entry - look up a dir in the tracefs filesystem
++ * @dentry: the directory to look up
+ * @ei: the eventfs_inode that represents the directory to create
+- * @parent: parent dentry for this file.
+ *
+- * This function will create a dentry for a directory represented by
++ * This function will look up a dentry for a directory represented by
+ * a eventfs_inode.
+ */
+-static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
++static struct dentry *lookup_dir_entry(struct dentry *dentry,
++ struct eventfs_inode *pei, struct eventfs_inode *ei)
+ {
+ struct tracefs_inode *ti;
+- struct dentry *dentry;
+ struct inode *inode;
+
+- dentry = eventfs_start_creating(ei->name, parent);
+- if (IS_ERR(dentry))
+- return dentry;
+-
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+- return eventfs_failed_creating(dentry);
++ return ERR_PTR(-ENOMEM);
+
+ /* If the user updated the directory's attributes, use them */
+ update_inode_attr(dentry, inode, &ei->attr,
+@@ -371,11 +359,14 @@ static struct dentry *create_dir(struct
+ /* Only directories have ti->private set to an ei, not files */
+ ti->private = ei;
+
++ dentry->d_fsdata = ei;
++ ei->dentry = dentry; // Remove me!
++
+ inc_nlink(inode);
+- d_instantiate(dentry, inode);
++ d_add(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+- return eventfs_end_creating(dentry);
++ return dentry;
+ }
+
+ static void free_ei(struct eventfs_inode *ei)
+@@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct t
+ }
+
+ /**
+- * create_file_dentry - create a dentry for a file of an eventfs_inode
++ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @ei: the eventfs_inode that the file will be created under
+ * @idx: the index into the d_children[] of the @ei
+ * @parent: The parent dentry of the created file.
+@@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct t
+ * address located at @e_dentry.
+ */
+ static struct dentry *
+-create_file_dentry(struct eventfs_inode *ei, int idx,
+- struct dentry *parent, const char *name, umode_t mode, void *data,
++lookup_file_dentry(struct dentry *dentry,
++ struct eventfs_inode *ei, int idx,
++ umode_t mode, void *data,
+ const struct file_operations *fops)
+ {
+ struct eventfs_attr *attr = NULL;
+ struct dentry **e_dentry = &ei->d_children[idx];
+- struct dentry *dentry;
+
+- WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+-
+- mutex_lock(&eventfs_mutex);
+- if (ei->is_freed) {
+- mutex_unlock(&eventfs_mutex);
+- return NULL;
+- }
+- /* If the e_dentry already has a dentry, use it */
+- if (*e_dentry) {
+- dget(*e_dentry);
+- mutex_unlock(&eventfs_mutex);
+- return *e_dentry;
+- }
+-
+- /* ei->entry_attrs are protected by SRCU */
+ if (ei->entry_attrs)
+ attr = &ei->entry_attrs[idx];
+
+- mutex_unlock(&eventfs_mutex);
+-
+- dentry = create_file(name, mode, attr, parent, data, fops);
+-
+- mutex_lock(&eventfs_mutex);
+-
+- if (IS_ERR_OR_NULL(dentry)) {
+- /*
+- * When the mutex was released, something else could have
+- * created the dentry for this e_dentry. In which case
+- * use that one.
+- *
+- * If ei->is_freed is set, the e_dentry is currently on its
+- * way to being freed, don't return it. If e_dentry is NULL
+- * it means it was already freed.
+- */
+- if (ei->is_freed) {
+- dentry = NULL;
+- } else {
+- dentry = *e_dentry;
+- dget(dentry);
+- }
+- mutex_unlock(&eventfs_mutex);
+- return dentry;
+- }
++ dentry->d_fsdata = ei; // NOTE: ei of _parent_
++ lookup_file(dentry, mode, attr, data, fops);
+
+- if (!*e_dentry && !ei->is_freed) {
+- *e_dentry = dentry;
+- dentry->d_fsdata = ei;
+- } else {
+- /*
+- * Should never happen unless we get here due to being freed.
+- * Otherwise it means two dentries exist with the same name.
+- */
+- WARN_ON_ONCE(!ei->is_freed);
+- dentry = NULL;
+- }
+- mutex_unlock(&eventfs_mutex);
+-
+- return dentry;
+-}
+-
+-/**
+- * eventfs_post_create_dir - post create dir routine
+- * @ei: eventfs_inode of recently created dir
+- *
+- * Map the meta-data of files within an eventfs dir to their parent dentry
+- */
+-static void eventfs_post_create_dir(struct eventfs_inode *ei)
+-{
+- struct eventfs_inode *ei_child;
+-
+- lockdep_assert_held(&eventfs_mutex);
+-
+- /* srcu lock already held */
+- /* fill parent-child relation */
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- srcu_read_lock_held(&eventfs_srcu)) {
+- ei_child->d_parent = ei->dentry;
+- }
+-}
+-
+-/**
+- * create_dir_dentry - Create a directory dentry for the eventfs_inode
+- * @pei: The eventfs_inode parent of ei.
+- * @ei: The eventfs_inode to create the directory for
+- * @parent: The dentry of the parent of this directory
+- *
+- * This creates and attaches a directory dentry to the eventfs_inode @ei.
+- */
+-static struct dentry *
+-create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+- struct dentry *parent)
+-{
+- struct dentry *dentry = NULL;
+-
+- WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+-
+- mutex_lock(&eventfs_mutex);
+- if (pei->is_freed || ei->is_freed) {
+- mutex_unlock(&eventfs_mutex);
+- return NULL;
+- }
+- if (ei->dentry) {
+- /* If the eventfs_inode already has a dentry, use it */
+- dentry = ei->dentry;
+- dget(dentry);
+- mutex_unlock(&eventfs_mutex);
+- return dentry;
+- }
+- mutex_unlock(&eventfs_mutex);
+-
+- dentry = create_dir(ei, parent);
+-
+- mutex_lock(&eventfs_mutex);
+-
+- if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
+- /*
+- * When the mutex was released, something else could have
+- * created the dentry for this e_dentry. In which case
+- * use that one.
+- *
+- * If ei->is_freed is set, the e_dentry is currently on its
+- * way to being freed.
+- */
+- dentry = ei->dentry;
+- if (dentry)
+- dget(dentry);
+- mutex_unlock(&eventfs_mutex);
+- return dentry;
+- }
+-
+- if (!ei->dentry && !ei->is_freed) {
+- ei->dentry = dentry;
+- eventfs_post_create_dir(ei);
+- dentry->d_fsdata = ei;
+- } else {
+- /*
+- * Should never happen unless we get here due to being freed.
+- * Otherwise it means two dentries exist with the same name.
+- */
+- WARN_ON_ONCE(!ei->is_freed);
+- dentry = NULL;
+- }
+- mutex_unlock(&eventfs_mutex);
++ *e_dentry = dentry; // Remove me
+
+ return dentry;
+ }
+@@ -607,79 +462,49 @@ static struct dentry *eventfs_root_looku
+ struct dentry *dentry,
+ unsigned int flags)
+ {
+- const struct file_operations *fops;
+- const struct eventfs_entry *entry;
+ struct eventfs_inode *ei_child;
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+- struct dentry *ei_dentry = NULL;
+- struct dentry *ret = NULL;
+- struct dentry *d;
+ const char *name = dentry->d_name.name;
+- umode_t mode;
+- void *data;
+- int idx;
+- int i;
+- int r;
+
+ ti = get_tracefs(dir);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+- return NULL;
++ return ERR_PTR(-EIO);
+
+- /* Grab srcu to prevent the ei from going away */
+- idx = srcu_read_lock(&eventfs_srcu);
+-
+- /*
+- * Grab the eventfs_mutex to consistent value from ti->private.
+- * This s
+- */
+ mutex_lock(&eventfs_mutex);
+- ei = READ_ONCE(ti->private);
+- if (ei && !ei->is_freed)
+- ei_dentry = READ_ONCE(ei->dentry);
+- mutex_unlock(&eventfs_mutex);
+
+- if (!ei || !ei_dentry)
++ ei = ti->private;
++ if (!ei || ei->is_freed)
+ goto out;
+
+- data = ei->data;
+-
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- srcu_read_lock_held(&eventfs_srcu)) {
++ list_for_each_entry(ei_child, &ei->children, list) {
+ if (strcmp(ei_child->name, name) != 0)
+ continue;
+- ret = simple_lookup(dir, dentry, flags);
+- if (IS_ERR(ret))
++ if (ei_child->is_freed)
+ goto out;
+- d = create_dir_dentry(ei, ei_child, ei_dentry);
+- dput(d);
++ lookup_dir_entry(dentry, ei, ei_child);
+ goto out;
+ }
+
+- for (i = 0; i < ei->nr_entries; i++) {
+- entry = &ei->entries[i];
+- if (strcmp(name, entry->name) == 0) {
+- void *cdata = data;
+- mutex_lock(&eventfs_mutex);
+- /* If ei->is_freed, then the event itself may be too */
+- if (!ei->is_freed)
+- r = entry->callback(name, &mode, &cdata, &fops);
+- else
+- r = -1;
+- mutex_unlock(&eventfs_mutex);
+- if (r <= 0)
+- continue;
+- ret = simple_lookup(dir, dentry, flags);
+- if (IS_ERR(ret))
+- goto out;
+- d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+- dput(d);
+- break;
+- }
++ for (int i = 0; i < ei->nr_entries; i++) {
++ void *data;
++ umode_t mode;
++ const struct file_operations *fops;
++ const struct eventfs_entry *entry = &ei->entries[i];
++
++ if (strcmp(name, entry->name) != 0)
++ continue;
++
++ data = ei->data;
++ if (entry->callback(name, &mode, &data, &fops) <= 0)
++ goto out;
++
++ lookup_file_dentry(dentry, ei, i, mode, data, fops);
++ goto out;
+ }
+ out:
+- srcu_read_unlock(&eventfs_srcu, idx);
+- return ret;
++ mutex_unlock(&eventfs_mutex);
++ return NULL;
+ }
+
+ /*
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -495,75 +495,6 @@ struct dentry *tracefs_end_creating(stru
+ return dentry;
+ }
+
+-/**
+- * eventfs_start_creating - start the process of creating a dentry
+- * @name: Name of the file created for the dentry
+- * @parent: The parent dentry where this dentry will be created
+- *
+- * This is a simple helper function for the dynamically created eventfs
+- * files. When the directory of the eventfs files are accessed, their
+- * dentries are created on the fly. This function is used to start that
+- * process.
+- */
+-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
+-{
+- struct dentry *dentry;
+- int error;
+-
+- /* Must always have a parent. */
+- if (WARN_ON_ONCE(!parent))
+- return ERR_PTR(-EINVAL);
+-
+- error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+- &tracefs_mount_count);
+- if (error)
+- return ERR_PTR(error);
+-
+- if (unlikely(IS_DEADDIR(parent->d_inode)))
+- dentry = ERR_PTR(-ENOENT);
+- else
+- dentry = lookup_one_len(name, parent, strlen(name));
+-
+- if (!IS_ERR(dentry) && dentry->d_inode) {
+- dput(dentry);
+- dentry = ERR_PTR(-EEXIST);
+- }
+-
+- if (IS_ERR(dentry))
+- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+-
+- return dentry;
+-}
+-
+-/**
+- * eventfs_failed_creating - clean up a failed eventfs dentry creation
+- * @dentry: The dentry to clean up
+- *
+- * If after calling eventfs_start_creating(), a failure is detected, the
+- * resources created by eventfs_start_creating() needs to be cleaned up. In
+- * that case, this function should be called to perform that clean up.
+- */
+-struct dentry *eventfs_failed_creating(struct dentry *dentry)
+-{
+- dput(dentry);
+- simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+- return NULL;
+-}
+-
+-/**
+- * eventfs_end_creating - Finish the process of creating a eventfs dentry
+- * @dentry: The dentry that has successfully been created.
+- *
+- * This function is currently just a place holder to match
+- * eventfs_start_creating(). In case any synchronization needs to be added,
+- * this function will be used to implement that without having to modify
+- * the callers of eventfs_start_creating().
+- */
+-struct dentry *eventfs_end_creating(struct dentry *dentry)
+-{
+- return dentry;
+-}
+-
+ /* Find the inode that this will use for default */
+ static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
+ {
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -80,9 +80,6 @@ struct dentry *tracefs_start_creating(co
+ struct dentry *tracefs_end_creating(struct dentry *dentry);
+ struct dentry *tracefs_failed_creating(struct dentry *dentry);
+ struct inode *tracefs_get_inode(struct super_block *sb);
+-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+-struct dentry *eventfs_failed_creating(struct dentry *dentry);
+-struct dentry *eventfs_end_creating(struct dentry *dentry);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+ #endif /* _TRACEFS_INTERNAL_H */
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:16 -0500
+Subject: tracefs/eventfs: Modify mismatched function name
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Abaci Robot <abaci@linux.alibaba.com>, Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Message-ID: <20240206120948.165080330@rostedt.homelinux.com>
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+commit 64bf2f685c795e75dd855761c75a193ee5998731 upstream.
+
+No functional modification involved.
+
+fs/tracefs/event_inode.c:864: warning: expecting prototype for eventfs_remove(). Prototype was for eventfs_remove_dir() instead.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231019031353.73846-1-jiapeng.chong@linux.alibaba.com
+
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6939
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -856,7 +856,7 @@ static void unhook_dentry(struct dentry
+ }
+ }
+ /**
+- * eventfs_remove - remove eventfs dir or file from list
++ * eventfs_remove_dir - remove eventfs dir or file from list
+ * @ei: eventfs_inode to be removed.
+ *
+ * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:42 -0500
+Subject: tracefs/eventfs: Use root and instance inodes as default ownership
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.401268456@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 8186fff7ab649085e2c60d032d9a20a85af1d87c upstream.
+
+Instead of walking the dentries on mount/remount to update the gid values of
+all the dentries if a gid option is specified on mount, just update the root
+inode. Add .getattr, .setattr, and .permissions on the tracefs inode
+operations to update the permissions of the files and directories.
+
+For all files and directories in the top level instance:
+
+ /sys/kernel/tracing/*
+
+It will use the root inode as the default permissions. The inode that
+represents: /sys/kernel/tracing (or wherever it is mounted).
+
+When an instance is created:
+
+ mkdir /sys/kernel/tracing/instance/foo
+
+The directory "foo" and all its files and directories underneath will use
+the default of what foo is when it was created. A remount of tracefs will
+not affect it.
+
+If a user were to modify the permissions of any file or directory in
+tracefs, it will also no longer be modified by a change in ownership of a
+remount.
+
+The events directory, if it is in the top level instance, will use the
+tracefs root inode as the default ownership for itself and all the files and
+directories below it.
+
+For the events directory in an instance ("foo"), it will keep the ownership
+of what it was when it was created, and that will be used as the default
+ownership for the files and directories beneath it.
+
+Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wjVdGkjDXBbvLn2wbZnqP4UsH46E3gqJ9m7UG6DpX2+WA@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240103215016.1e0c9811@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 79 ++++++++++++++++++
+ fs/tracefs/inode.c | 198 ++++++++++++++++++++++++++---------------------
+ fs/tracefs/internal.h | 3
+ 3 files changed, 190 insertions(+), 90 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -45,6 +45,7 @@ enum {
+ EVENTFS_SAVE_MODE = BIT(16),
+ EVENTFS_SAVE_UID = BIT(17),
+ EVENTFS_SAVE_GID = BIT(18),
++ EVENTFS_TOPLEVEL = BIT(19),
+ };
+
+ #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
+@@ -115,10 +116,17 @@ static int eventfs_set_attr(struct mnt_i
+ * The events directory dentry is never freed, unless its
+ * part of an instance that is deleted. It's attr is the
+ * default for its child files and directories.
+- * Do not update it. It's not used for its own mode or ownership
++ * Do not update it. It's not used for its own mode or ownership.
+ */
+- if (!ei->is_events)
++ if (ei->is_events) {
++ /* But it still needs to know if it was modified */
++ if (iattr->ia_valid & ATTR_UID)
++ ei->attr.mode |= EVENTFS_SAVE_UID;
++ if (iattr->ia_valid & ATTR_GID)
++ ei->attr.mode |= EVENTFS_SAVE_GID;
++ } else {
+ update_attr(&ei->attr, iattr);
++ }
+
+ } else {
+ name = dentry->d_name.name;
+@@ -136,9 +144,66 @@ static int eventfs_set_attr(struct mnt_i
+ return ret;
+ }
+
++static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
++{
++ struct inode *inode;
++
++ /* Only update if the "events" was on the top level */
++ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
++ return;
++
++ /* Get the tracefs root inode. */
++ inode = d_inode(dentry->d_sb->s_root);
++ ei->attr.uid = inode->i_uid;
++ ei->attr.gid = inode->i_gid;
++}
++
++static void set_top_events_ownership(struct inode *inode)
++{
++ struct tracefs_inode *ti = get_tracefs(inode);
++ struct eventfs_inode *ei = ti->private;
++ struct dentry *dentry;
++
++ /* The top events directory doesn't get automatically updated */
++ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
++ return;
++
++ dentry = ei->dentry;
++
++ update_top_events_attr(ei, dentry);
++
++ if (!(ei->attr.mode & EVENTFS_SAVE_UID))
++ inode->i_uid = ei->attr.uid;
++
++ if (!(ei->attr.mode & EVENTFS_SAVE_GID))
++ inode->i_gid = ei->attr.gid;
++}
++
++static int eventfs_get_attr(struct mnt_idmap *idmap,
++ const struct path *path, struct kstat *stat,
++ u32 request_mask, unsigned int flags)
++{
++ struct dentry *dentry = path->dentry;
++ struct inode *inode = d_backing_inode(dentry);
++
++ set_top_events_ownership(inode);
++
++ generic_fillattr(idmap, request_mask, inode, stat);
++ return 0;
++}
++
++static int eventfs_permission(struct mnt_idmap *idmap,
++ struct inode *inode, int mask)
++{
++ set_top_events_ownership(inode);
++ return generic_permission(idmap, inode, mask);
++}
++
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+ .lookup = eventfs_root_lookup,
+ .setattr = eventfs_set_attr,
++ .getattr = eventfs_get_attr,
++ .permission = eventfs_permission,
+ };
+
+ static const struct inode_operations eventfs_file_inode_operations = {
+@@ -174,6 +239,8 @@ static struct eventfs_inode *eventfs_fin
+ } while (!ei->is_events);
+ mutex_unlock(&eventfs_mutex);
+
++ update_top_events_attr(ei, dentry);
++
+ return ei;
+ }
+
+@@ -887,6 +954,14 @@ struct eventfs_inode *eventfs_create_eve
+ uid = d_inode(dentry->d_parent)->i_uid;
+ gid = d_inode(dentry->d_parent)->i_gid;
+
++ /*
++ * If the events directory is of the top instance, then parent
++ * is NULL. Set the attr.mode to reflect this and its permissions will
++ * default to the tracefs root dentry.
++ */
++ if (!parent)
++ ei->attr.mode = EVENTFS_TOPLEVEL;
++
+ /* This is used as the default ownership of the files and directories */
+ ei->attr.uid = uid;
+ ei->attr.gid = gid;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -91,6 +91,7 @@ static int tracefs_syscall_mkdir(struct
+ struct inode *inode, struct dentry *dentry,
+ umode_t mode)
+ {
++ struct tracefs_inode *ti;
+ char *name;
+ int ret;
+
+@@ -99,6 +100,15 @@ static int tracefs_syscall_mkdir(struct
+ return -ENOMEM;
+
+ /*
++ * This is a new directory that does not take the default of
++ * the rootfs. It becomes the default permissions for all the
++ * files and directories underneath it.
++ */
++ ti = get_tracefs(inode);
++ ti->flags |= TRACEFS_INSTANCE_INODE;
++ ti->private = inode;
++
++ /*
+ * The mkdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * mkdir routine to handle races.
+@@ -141,10 +151,76 @@ static int tracefs_syscall_rmdir(struct
+ return ret;
+ }
+
+-static const struct inode_operations tracefs_dir_inode_operations = {
++static void set_tracefs_inode_owner(struct inode *inode)
++{
++ struct tracefs_inode *ti = get_tracefs(inode);
++ struct inode *root_inode = ti->private;
++
++ /*
++ * If this inode has never been referenced, then update
++ * the permissions to the superblock.
++ */
++ if (!(ti->flags & TRACEFS_UID_PERM_SET))
++ inode->i_uid = root_inode->i_uid;
++
++ if (!(ti->flags & TRACEFS_GID_PERM_SET))
++ inode->i_gid = root_inode->i_gid;
++}
++
++static int tracefs_permission(struct mnt_idmap *idmap,
++ struct inode *inode, int mask)
++{
++ set_tracefs_inode_owner(inode);
++ return generic_permission(idmap, inode, mask);
++}
++
++static int tracefs_getattr(struct mnt_idmap *idmap,
++ const struct path *path, struct kstat *stat,
++ u32 request_mask, unsigned int flags)
++{
++ struct inode *inode = d_backing_inode(path->dentry);
++
++ set_tracefs_inode_owner(inode);
++ generic_fillattr(idmap, request_mask, inode, stat);
++ return 0;
++}
++
++static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
++ struct iattr *attr)
++{
++ unsigned int ia_valid = attr->ia_valid;
++ struct inode *inode = d_inode(dentry);
++ struct tracefs_inode *ti = get_tracefs(inode);
++
++ if (ia_valid & ATTR_UID)
++ ti->flags |= TRACEFS_UID_PERM_SET;
++
++ if (ia_valid & ATTR_GID)
++ ti->flags |= TRACEFS_GID_PERM_SET;
++
++ return simple_setattr(idmap, dentry, attr);
++}
++
++static const struct inode_operations tracefs_instance_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .mkdir = tracefs_syscall_mkdir,
+ .rmdir = tracefs_syscall_rmdir,
++ .permission = tracefs_permission,
++ .getattr = tracefs_getattr,
++ .setattr = tracefs_setattr,
++};
++
++static const struct inode_operations tracefs_dir_inode_operations = {
++ .lookup = simple_lookup,
++ .permission = tracefs_permission,
++ .getattr = tracefs_getattr,
++ .setattr = tracefs_setattr,
++};
++
++static const struct inode_operations tracefs_file_inode_operations = {
++ .permission = tracefs_permission,
++ .getattr = tracefs_getattr,
++ .setattr = tracefs_setattr,
+ };
+
+ struct inode *tracefs_get_inode(struct super_block *sb)
+@@ -183,87 +259,6 @@ struct tracefs_fs_info {
+ struct tracefs_mount_opts mount_opts;
+ };
+
+-static void change_gid(struct dentry *dentry, kgid_t gid)
+-{
+- if (!dentry->d_inode)
+- return;
+- dentry->d_inode->i_gid = gid;
+-}
+-
+-/*
+- * Taken from d_walk, but without he need for handling renames.
+- * Nothing can be renamed while walking the list, as tracefs
+- * does not support renames. This is only called when mounting
+- * or remounting the file system, to set all the files to
+- * the given gid.
+- */
+-static void set_gid(struct dentry *parent, kgid_t gid)
+-{
+- struct dentry *this_parent;
+- struct list_head *next;
+-
+- this_parent = parent;
+- spin_lock(&this_parent->d_lock);
+-
+- change_gid(this_parent, gid);
+-repeat:
+- next = this_parent->d_subdirs.next;
+-resume:
+- while (next != &this_parent->d_subdirs) {
+- struct tracefs_inode *ti;
+- struct list_head *tmp = next;
+- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+- next = tmp->next;
+-
+- /* Note, getdents() can add a cursor dentry with no inode */
+- if (!dentry->d_inode)
+- continue;
+-
+- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+-
+- change_gid(dentry, gid);
+-
+- /* If this is the events directory, update that too */
+- ti = get_tracefs(dentry->d_inode);
+- if (ti && (ti->flags & TRACEFS_EVENT_INODE))
+- eventfs_update_gid(dentry, gid);
+-
+- if (!list_empty(&dentry->d_subdirs)) {
+- spin_unlock(&this_parent->d_lock);
+- spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+- this_parent = dentry;
+- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+- goto repeat;
+- }
+- spin_unlock(&dentry->d_lock);
+- }
+- /*
+- * All done at this level ... ascend and resume the search.
+- */
+- rcu_read_lock();
+-ascend:
+- if (this_parent != parent) {
+- struct dentry *child = this_parent;
+- this_parent = child->d_parent;
+-
+- spin_unlock(&child->d_lock);
+- spin_lock(&this_parent->d_lock);
+-
+- /* go into the first sibling still alive */
+- do {
+- next = child->d_child.next;
+- if (next == &this_parent->d_subdirs)
+- goto ascend;
+- child = list_entry(next, struct dentry, d_child);
+- } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+- rcu_read_unlock();
+- goto resume;
+- }
+- rcu_read_unlock();
+- spin_unlock(&this_parent->d_lock);
+- return;
+-}
+-
+ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ {
+ substring_t args[MAX_OPT_ARGS];
+@@ -336,10 +331,8 @@ static int tracefs_apply_options(struct
+ if (!remount || opts->opts & BIT(Opt_uid))
+ inode->i_uid = opts->uid;
+
+- if (!remount || opts->opts & BIT(Opt_gid)) {
+- /* Set all the group ids to the mount option */
+- set_gid(sb->s_root, opts->gid);
+- }
++ if (!remount || opts->opts & BIT(Opt_gid))
++ inode->i_gid = opts->gid;
+
+ return 0;
+ }
+@@ -573,6 +566,26 @@ struct dentry *eventfs_end_creating(stru
+ return dentry;
+ }
+
++/* Find the inode that this will use for default */
++static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
++{
++ struct tracefs_inode *ti;
++
++ /* If parent is NULL then use root inode */
++ if (!parent)
++ return d_inode(inode->i_sb->s_root);
++
++ /* Find the inode that is flagged as an instance or the root inode */
++ while (!IS_ROOT(parent)) {
++ ti = get_tracefs(d_inode(parent));
++ if (ti->flags & TRACEFS_INSTANCE_INODE)
++ break;
++ parent = parent->d_parent;
++ }
++
++ return d_inode(parent);
++}
++
+ /**
+ * tracefs_create_file - create a file in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+@@ -603,6 +616,7 @@ struct dentry *tracefs_create_file(const
+ struct dentry *parent, void *data,
+ const struct file_operations *fops)
+ {
++ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+@@ -621,7 +635,11 @@ struct dentry *tracefs_create_file(const
+ if (unlikely(!inode))
+ return tracefs_failed_creating(dentry);
+
++ ti = get_tracefs(inode);
++ ti->private = instance_inode(parent, inode);
++
+ inode->i_mode = mode;
++ inode->i_op = &tracefs_file_inode_operations;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
+ inode->i_private = data;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+@@ -634,6 +652,7 @@ struct dentry *tracefs_create_file(const
+ static struct dentry *__create_dir(const char *name, struct dentry *parent,
+ const struct inode_operations *ops)
+ {
++ struct tracefs_inode *ti;
+ struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct inode *inode;
+
+@@ -651,6 +670,9 @@ static struct dentry *__create_dir(const
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
+
++ ti = get_tracefs(inode);
++ ti->private = instance_inode(parent, inode);
++
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+@@ -681,7 +703,7 @@ struct dentry *tracefs_create_dir(const
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
+- return __create_dir(name, parent, &simple_dir_inode_operations);
++ return __create_dir(name, parent, &tracefs_dir_inode_operations);
+ }
+
+ /**
+@@ -712,7 +734,7 @@ __init struct dentry *tracefs_create_ins
+ if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+ return NULL;
+
+- dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
++ dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations);
+ if (!dentry)
+ return NULL;
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -5,6 +5,9 @@
+ enum {
+ TRACEFS_EVENT_INODE = BIT(1),
+ TRACEFS_EVENT_TOP_INODE = BIT(2),
++ TRACEFS_GID_PERM_SET = BIT(3),
++ TRACEFS_UID_PERM_SET = BIT(4),
++ TRACEFS_INSTANCE_INODE = BIT(5),
+ };
+
+ struct tracefs_inode {
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:09:26 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:51 -0500
+Subject: tracefs: remove stale update_gid code
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120953.870617585@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 29142dc92c37d3259a33aef15b03e6ee25b0d188 upstream.
+
+The 'eventfs_update_gid()' function is no longer called, so remove it
+(and the helper function it uses).
+
+Link: https://lore.kernel.org/all/CAHk-=wj+DsZZ=2iTUkJ-Nojs9fjYMvPs1NuoM3yK7aTDtJfPYQ@mail.gmail.com/
+
+Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 38 --------------------------------------
+ fs/tracefs/internal.h | 1 -
+ 2 files changed, 39 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -281,44 +281,6 @@ static void update_inode_attr(struct den
+ inode->i_gid = attr->gid;
+ }
+
+-static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
+-{
+- struct eventfs_inode *ei_child;
+-
+- /* at most we have events/system/event */
+- if (WARN_ON_ONCE(level > 3))
+- return;
+-
+- ei->attr.gid = gid;
+-
+- if (ei->entry_attrs) {
+- for (int i = 0; i < ei->nr_entries; i++) {
+- ei->entry_attrs[i].gid = gid;
+- }
+- }
+-
+- /*
+- * Only eventfs_inode with dentries are updated, make sure
+- * all eventfs_inodes are updated. If one of the children
+- * do not have a dentry, this function must traverse it.
+- */
+- list_for_each_entry_srcu(ei_child, &ei->children, list,
+- srcu_read_lock_held(&eventfs_srcu)) {
+- if (!ei_child->dentry)
+- update_gid(ei_child, gid, level + 1);
+- }
+-}
+-
+-void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
+-{
+- struct eventfs_inode *ei = dentry->d_fsdata;
+- int idx;
+-
+- idx = srcu_read_lock(&eventfs_srcu);
+- update_gid(ei, gid, 0);
+- srcu_read_unlock(&eventfs_srcu, idx);
+-}
+-
+ /**
+ * create_file - create a file in the tracefs filesystem
+ * @name: the name of the file to create.
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
+-void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+ #endif /* _TRACEFS_INTERNAL_H */
--- /dev/null
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 13:10:43 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:52 -0500
+Subject: tracefs: Zero out the tracefs_inode when allocating it
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120954.038732037@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit d81786f53aec14fd4d56263145a0635afbc64617 upstream.
+
+eventfs uses the tracefs_inode and assumes that it's already initialized
+to zero. That is, it doesn't set fields to zero (like ti->private) after
+getting its tracefs_inode. This causes bugs due to stale values.
+
+Just initialize the entire structure to zero on allocation so there isn't
+any more surprises.
+
+This is a partial fix to access to ti->private. The assignment still needs
+to be made before the dentry is instantiated.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.315825944@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c | 6 ++++--
+ fs/tracefs/internal.h | 3 ++-
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode
+ if (!ti)
+ return NULL;
+
+- ti->flags = 0;
+-
+ return &ti->vfs_inode;
+ }
+
+@@ -779,7 +777,11 @@ static void init_once(void *foo)
+ {
+ struct tracefs_inode *ti = (struct tracefs_inode *) foo;
+
++ /* inode_init_once() calls memset() on the vfs_inode portion */
+ inode_init_once(&ti->vfs_inode);
++
++ /* Zero out the rest */
++ memset_after(ti, 0, vfs_inode);
+ }
+
+ static int __init tracefs_init(void)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -11,9 +11,10 @@ enum {
+ };
+
+ struct tracefs_inode {
++ struct inode vfs_inode;
++ /* The below gets initialized with memset_after(ti, 0, vfs_inode) */
+ unsigned long flags;
+ void *private;
+- struct inode vfs_inode;
+ };
+
+ /*