]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 20 Feb 2024 15:06:53 +0000 (16:06 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 20 Feb 2024 15:06:53 +0000 (16:06 +0100)
added patches:
eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch
eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch
eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch
eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch
eventfs-fix-bitwise-fields-for-is_events.patch
eventfs-fix-events-beyond-name_max-blocking-tasks.patch
eventfs-fix-failure-path-in-eventfs_create_events_dir.patch
eventfs-fix-file-and-directory-uid-and-gid-ownership.patch
eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch
eventfs-fix-typo-in-eventfs_inode-union-comment.patch
eventfs-fix-warn_on-in-create_file_dentry.patch
eventfs-get-rid-of-dentry-pointers-without-refcounts.patch
eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch
eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch
eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch
eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch
eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch
eventfs-initialize-the-tracefs-inode-properly.patch
eventfs-keep-all-directory-links-at-1.patch
eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch
eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch
eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch
eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch
eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch
eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch
eventfs-remove-fsnotify-functions-from-lookup.patch
eventfs-remove-is_freed-union-with-rcu-head.patch
eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch
eventfs-remove-special-processing-of-dput-of-events-directory.patch
eventfs-remove-unused-d_parent-pointer-field.patch
eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch
eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch
eventfs-save-ownership-and-mode.patch
eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch
eventfs-stop-using-dcache_readdir-for-getdents.patch
eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch
eventfs-use-err_cast-in-eventfs_create_events_dir.patch
eventfs-use-eventfs_remove_events_dir.patch
eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch
eventfs-use-kcalloc-instead-of-kzalloc.patch
eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch
nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch
revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
revert-eventfs-remove-is_freed-union-with-rcu-head.patch
revert-eventfs-save-ownership-and-mode.patch
revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch
tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch
tracefs-dentry-lookup-crapectomy.patch
tracefs-eventfs-modify-mismatched-function-name.patch
tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch
tracefs-remove-stale-update_gid-code.patch
tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch

59 files changed:
queue-6.6/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch [new file with mode: 0644]
queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch [new file with mode: 0644]
queue-6.6/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch [new file with mode: 0644]
queue-6.6/eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch [new file with mode: 0644]
queue-6.6/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch [new file with mode: 0644]
queue-6.6/eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-bitwise-fields-for-is_events.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-events-beyond-name_max-blocking-tasks.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-failure-path-in-eventfs_create_events_dir.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-file-and-directory-uid-and-gid-ownership.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-typo-in-eventfs_inode-union-comment.patch [new file with mode: 0644]
queue-6.6/eventfs-fix-warn_on-in-create_file_dentry.patch [new file with mode: 0644]
queue-6.6/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch [new file with mode: 0644]
queue-6.6/eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch [new file with mode: 0644]
queue-6.6/eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch [new file with mode: 0644]
queue-6.6/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch [new file with mode: 0644]
queue-6.6/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch [new file with mode: 0644]
queue-6.6/eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch [new file with mode: 0644]
queue-6.6/eventfs-initialize-the-tracefs-inode-properly.patch [new file with mode: 0644]
queue-6.6/eventfs-keep-all-directory-links-at-1.patch [new file with mode: 0644]
queue-6.6/eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch [new file with mode: 0644]
queue-6.6/eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch [new file with mode: 0644]
queue-6.6/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-fsnotify-functions-from-lookup.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-special-processing-of-dput-of-events-directory.patch [new file with mode: 0644]
queue-6.6/eventfs-remove-unused-d_parent-pointer-field.patch [new file with mode: 0644]
queue-6.6/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch [new file with mode: 0644]
queue-6.6/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch [new file with mode: 0644]
queue-6.6/eventfs-save-ownership-and-mode.patch [new file with mode: 0644]
queue-6.6/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch [new file with mode: 0644]
queue-6.6/eventfs-stop-using-dcache_readdir-for-getdents.patch [new file with mode: 0644]
queue-6.6/eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch [new file with mode: 0644]
queue-6.6/eventfs-use-err_cast-in-eventfs_create_events_dir.patch [new file with mode: 0644]
queue-6.6/eventfs-use-eventfs_remove_events_dir.patch [new file with mode: 0644]
queue-6.6/eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch [new file with mode: 0644]
queue-6.6/eventfs-use-kcalloc-instead-of-kzalloc.patch [new file with mode: 0644]
queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch [new file with mode: 0644]
queue-6.6/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch [new file with mode: 0644]
queue-6.6/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-remove-is_freed-union-with-rcu-head.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-save-ownership-and-mode.patch [new file with mode: 0644]
queue-6.6/revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch [new file with mode: 0644]
queue-6.6/tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch [new file with mode: 0644]
queue-6.6/tracefs-dentry-lookup-crapectomy.patch [new file with mode: 0644]
queue-6.6/tracefs-eventfs-modify-mismatched-function-name.patch [new file with mode: 0644]
queue-6.6/tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch [new file with mode: 0644]
queue-6.6/tracefs-remove-stale-update_gid-code.patch [new file with mode: 0644]
queue-6.6/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch [new file with mode: 0644]

diff --git a/queue-6.6/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch b/queue-6.6/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch
new file mode 100644 (file)
index 0000000..f6bd0a9
--- /dev/null
@@ -0,0 +1,125 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:35 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:57 -0500
+Subject: eventfs: Clean up dentry ops and add revalidate function
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.845943821@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 8dce06e98c70a7fcbb4bca7d90faf40522e65c58 upstream.
+
+In order for the dentries to stay up-to-date with the eventfs changes,
+just add a 'd_revalidate' function that checks the 'is_freed' bit.
+
+Also, clean up the dentry release to actually use d_release() rather
+than the slightly odd d_iput() function.  We don't care about the inode,
+all we want to do is to get rid of the refcount to the eventfs data
+added by dentry->d_fsdata.
+
+It would probably be cleaner to make eventfs its own filesystem, or at
+least set its own dentry ops when looking up eventfs files.  But as it
+is, only eventfs dentries use d_fsdata, so we don't really need to split
+these things up by use.
+
+Another thing that might be worth doing is to make all eventfs lookups
+mark their dentries as not worth caching.  We could do that with
+d_delete(), but the DCACHE_DONTCACHE flag would likely be even better.
+
+As it is, the dentries are all freeable, but they only tend to get freed
+at memory pressure rather than more proactively.  But that's a separate
+issue.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.124644253@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    5 ++---
+ fs/tracefs/inode.c       |   27 ++++++++++++++++++---------
+ fs/tracefs/internal.h    |    3 ++-
+ 3 files changed, 22 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -378,13 +378,12 @@ static void free_ei(struct eventfs_inode
+ }
+ /**
+- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+- * @ti: the tracefs_inode of the dentry
++ * eventfs_d_release - dentry is going away
+  * @dentry: dentry which has the reference to remove.
+  *
+  * Remove the association between a dentry from an eventfs_inode.
+  */
+-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
++void eventfs_d_release(struct dentry *dentry)
+ {
+       struct eventfs_inode *ei;
+       int i;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -377,21 +377,30 @@ static const struct super_operations tra
+       .show_options   = tracefs_show_options,
+ };
+-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
++/*
++ * It would be cleaner if eventfs had its own dentry ops.
++ *
++ * Note that d_revalidate is called potentially under RCU,
++ * so it can't take the eventfs mutex etc. It's fine - if
++ * we open a file just as it's marked dead, things will
++ * still work just fine, and just see the old stale case.
++ */
++static void tracefs_d_release(struct dentry *dentry)
+ {
+-      struct tracefs_inode *ti;
++      if (dentry->d_fsdata)
++              eventfs_d_release(dentry);
++}
+-      if (!dentry || !inode)
+-              return;
++static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
++{
++      struct eventfs_inode *ei = dentry->d_fsdata;
+-      ti = get_tracefs(inode);
+-      if (ti && ti->flags & TRACEFS_EVENT_INODE)
+-              eventfs_set_ei_status_free(ti, dentry);
+-      iput(inode);
++      return !(ei && ei->is_freed);
+ }
+ static const struct dentry_operations tracefs_dentry_operations = {
+-      .d_iput = tracefs_dentry_iput,
++      .d_revalidate = tracefs_d_revalidate,
++      .d_release = tracefs_d_release,
+ };
+ static int trace_fill_super(struct super_block *sb, void *data, int silent)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -78,6 +78,7 @@ struct dentry *tracefs_start_creating(co
+ struct dentry *tracefs_end_creating(struct dentry *dentry);
+ struct dentry *tracefs_failed_creating(struct dentry *dentry);
+ struct inode *tracefs_get_inode(struct super_block *sb);
+-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
++
++void eventfs_d_release(struct dentry *dentry);
+ #endif /* _TRACEFS_INTERNAL_H */
diff --git a/queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch b/queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
new file mode 100644 (file)
index 0000000..6c32486
--- /dev/null
@@ -0,0 +1,282 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:54 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:26 -0500
+Subject: eventfs: Delete eventfs_inode when the last dentry is freed
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120949.792406858@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 020010fbfa202aa528a52743eba4ab0da3400a4e upstream.
+
+There exists a race between holding a reference of an eventfs_inode dentry
+and the freeing of the eventfs_inode. If user space has a dentry held long
+enough, it may still be able to access the dentry's eventfs_inode after it
+has been freed.
+
+To prevent this, have he eventfs_inode freed via the last dput() (or via
+RCU if the eventfs_inode does not have a dentry).
+
+This means reintroducing the eventfs_inode del_list field at a temporary
+place to put the eventfs_inode. It needs to mark it as freed (via the
+list) but also must invalidate the dentry immediately as the return from
+eventfs_remove_dir() expects that they are. But the dentry invalidation
+must not be called under the eventfs_mutex, so it must be done after the
+eventfs_inode is marked as free (put on a deletion list).
+
+Link: https://lkml.kernel.org/r/20231101172650.123479767@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  146 +++++++++++++++++++++--------------------------
+ fs/tracefs/internal.h    |    2 
+ 2 files changed, 69 insertions(+), 79 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -85,8 +85,7 @@ static int eventfs_set_attr(struct mnt_i
+       mutex_lock(&eventfs_mutex);
+       ei = dentry->d_fsdata;
+-      /* The LSB is set when the eventfs_inode is being freed */
+-      if (((unsigned long)ei & 1UL) || ei->is_freed) {
++      if (ei->is_freed) {
+               /* Do not allow changes if the event is about to be removed. */
+               mutex_unlock(&eventfs_mutex);
+               return -ENODEV;
+@@ -276,35 +275,17 @@ static void free_ei(struct eventfs_inode
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+       struct tracefs_inode *ti_parent;
+-      struct eventfs_inode *ei_child, *tmp;
+       struct eventfs_inode *ei;
+       int i;
+       /* The top level events directory may be freed by this */
+       if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+-              LIST_HEAD(ef_del_list);
+-
+               mutex_lock(&eventfs_mutex);
+-
+               ei = ti->private;
+-
+-              /* Record all the top level files */
+-              list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                                       lockdep_is_held(&eventfs_mutex)) {
+-                      list_add_tail(&ei_child->del_list, &ef_del_list);
+-              }
+-
+               /* Nothing should access this, but just in case! */
+               ti->private = NULL;
+-
+               mutex_unlock(&eventfs_mutex);
+-              /* Now safely free the top level files and their children */
+-              list_for_each_entry_safe(ei_child, tmp, &ef_del_list, del_list) {
+-                      list_del(&ei_child->del_list);
+-                      eventfs_remove_dir(ei_child);
+-              }
+-
+               free_ei(ei);
+               return;
+       }
+@@ -319,14 +300,6 @@ void eventfs_set_ei_status_free(struct t
+       if (!ei)
+               goto out;
+-      /*
+-       * If ei was freed, then the LSB bit is set for d_fsdata.
+-       * But this should not happen, as it should still have a
+-       * ref count that prevents it. Warn in case it does.
+-       */
+-      if (WARN_ON_ONCE((unsigned long)ei & 1))
+-              goto out;
+-
+       /* This could belong to one of the files of the ei */
+       if (ei->dentry != dentry) {
+               for (i = 0; i < ei->nr_entries; i++) {
+@@ -336,6 +309,8 @@ void eventfs_set_ei_status_free(struct t
+               if (WARN_ON_ONCE(i == ei->nr_entries))
+                       goto out;
+               ei->d_children[i] = NULL;
++      } else if (ei->is_freed) {
++              free_ei(ei);
+       } else {
+               ei->dentry = NULL;
+       }
+@@ -962,13 +937,65 @@ struct eventfs_inode *eventfs_create_eve
+       return ERR_PTR(-ENOMEM);
+ }
++static LLIST_HEAD(free_list);
++
++static void eventfs_workfn(struct work_struct *work)
++{
++        struct eventfs_inode *ei, *tmp;
++        struct llist_node *llnode;
++
++      llnode = llist_del_all(&free_list);
++        llist_for_each_entry_safe(ei, tmp, llnode, llist) {
++              /* This dput() matches the dget() from unhook_dentry() */
++              for (int i = 0; i < ei->nr_entries; i++) {
++                      if (ei->d_children[i])
++                              dput(ei->d_children[i]);
++              }
++              /* This should only get here if it had a dentry */
++              if (!WARN_ON_ONCE(!ei->dentry))
++                      dput(ei->dentry);
++        }
++}
++
++static DECLARE_WORK(eventfs_work, eventfs_workfn);
++
+ static void free_rcu_ei(struct rcu_head *head)
+ {
+       struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
++      if (ei->dentry) {
++              /* Do not free the ei until all references of dentry are gone */
++              if (llist_add(&ei->llist, &free_list))
++                      queue_work(system_unbound_wq, &eventfs_work);
++              return;
++      }
++
++      /* If the ei doesn't have a dentry, neither should its children */
++      for (int i = 0; i < ei->nr_entries; i++) {
++              WARN_ON_ONCE(ei->d_children[i]);
++      }
++
+       free_ei(ei);
+ }
++static void unhook_dentry(struct dentry *dentry)
++{
++      if (!dentry)
++              return;
++
++      /* Keep the dentry from being freed yet (see eventfs_workfn()) */
++      dget(dentry);
++
++      dentry->d_fsdata = NULL;
++      d_invalidate(dentry);
++      mutex_lock(&eventfs_mutex);
++      /* dentry should now have at least a single reference */
++      WARN_ONCE((int)d_count(dentry) < 1,
++                "dentry %px (%s) less than one reference (%d) after invalidate\n",
++                dentry, dentry->d_name.name, d_count(dentry));
++      mutex_unlock(&eventfs_mutex);
++}
++
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
+@@ -1006,33 +1033,6 @@ static void eventfs_remove_rec(struct ev
+       list_add_tail(&ei->del_list, head);
+ }
+-static void unhook_dentry(struct dentry **dentry, struct dentry **list)
+-{
+-      if (*dentry) {
+-              unsigned long ptr = (unsigned long)*list;
+-
+-              /* Keep the dentry from being freed yet */
+-              dget(*dentry);
+-
+-              /*
+-               * Paranoid: The dget() above should prevent the dentry
+-               * from being freed and calling eventfs_set_ei_status_free().
+-               * But just in case, set the link list LSB pointer to 1
+-               * and have eventfs_set_ei_status_free() check that to
+-               * make sure that if it does happen, it will not think
+-               * the d_fsdata is an eventfs_inode.
+-               *
+-               * For this to work, no eventfs_inode should be allocated
+-               * on a odd space, as the ef should always be allocated
+-               * to be at least word aligned. Check for that too.
+-               */
+-              WARN_ON_ONCE(ptr & 1);
+-
+-              (*dentry)->d_fsdata = (void *)(ptr | 1);
+-              *list = *dentry;
+-              *dentry = NULL;
+-      }
+-}
+ /**
+  * eventfs_remove_dir - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
+@@ -1043,40 +1043,28 @@ void eventfs_remove_dir(struct eventfs_i
+ {
+       struct eventfs_inode *tmp;
+       LIST_HEAD(ei_del_list);
+-      struct dentry *dentry_list = NULL;
+-      struct dentry *dentry;
+-      int i;
+       if (!ei)
+               return;
++      /*
++       * Move the deleted eventfs_inodes onto the ei_del_list
++       * which will also set the is_freed value. Note, this has to be
++       * done under the eventfs_mutex, but the deletions of
++       * the dentries must be done outside the eventfs_mutex.
++       * Hence moving them to this temporary list.
++       */
+       mutex_lock(&eventfs_mutex);
+       eventfs_remove_rec(ei, &ei_del_list, 0);
++      mutex_unlock(&eventfs_mutex);
+       list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
+-              for (i = 0; i < ei->nr_entries; i++)
+-                      unhook_dentry(&ei->d_children[i], &dentry_list);
+-              unhook_dentry(&ei->dentry, &dentry_list);
++              for (int i = 0; i < ei->nr_entries; i++)
++                      unhook_dentry(ei->d_children[i]);
++              unhook_dentry(ei->dentry);
++              list_del(&ei->del_list);
+               call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+       }
+-      mutex_unlock(&eventfs_mutex);
+-
+-      while (dentry_list) {
+-              unsigned long ptr;
+-
+-              dentry = dentry_list;
+-              ptr = (unsigned long)dentry->d_fsdata & ~1UL;
+-              dentry_list = (struct dentry *)ptr;
+-              dentry->d_fsdata = NULL;
+-              d_invalidate(dentry);
+-              mutex_lock(&eventfs_mutex);
+-              /* dentry should now have at least a single reference */
+-              WARN_ONCE((int)d_count(dentry) < 1,
+-                        "dentry %px (%s) less than one reference (%d) after invalidate\n",
+-                        dentry, dentry->d_name.name, d_count(dentry));
+-              mutex_unlock(&eventfs_mutex);
+-              dput(dentry);
+-      }
+ }
+ /**
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -54,10 +54,12 @@ struct eventfs_inode {
+       void                            *data;
+       /*
+        * Union - used for deletion
++       * @llist:      for calling dput() if needed after RCU
+        * @del_list:   list of eventfs_inode to delete
+        * @rcu:        eventfs_inode to delete in RCU
+        */
+       union {
++              struct llist_node       llist;
+               struct list_head        del_list;
+               struct rcu_head         rcu;
+       };
diff --git a/queue-6.6/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch b/queue-6.6/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch
new file mode 100644 (file)
index 0000000..88bfbb9
--- /dev/null
@@ -0,0 +1,205 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:53 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:44 -0500
+Subject: eventfs: Do ctx->pos update for all iterations in eventfs_iterate()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.722064231@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 1e4624eb5a0ecaae0d2c4e3019bece119725bb98 upstream.
+
+The ctx->pos was only updated when it added an entry, but the "skip to
+current pos" check (c--) happened for every loop regardless of if the
+entry was added or not. This inconsistency caused readdir to be incorrect.
+
+It was due to:
+
+       for (i = 0; i < ei->nr_entries; i++) {
+
+               if (c > 0) {
+                       c--;
+                       continue;
+               }
+
+               mutex_lock(&eventfs_mutex);
+               /* If ei->is_freed then just bail here, nothing more to do */
+               if (ei->is_freed) {
+                       mutex_unlock(&eventfs_mutex);
+                       goto out;
+               }
+               r = entry->callback(name, &mode, &cdata, &fops);
+               mutex_unlock(&eventfs_mutex);
+
+               [..]
+               ctx->pos++;
+       }
+
+But this can cause the iterator to return a file that was already read.
+That's because of the way the callback() works. Some events may not have
+all files, and the callback can return 0 to tell eventfs to skip the file
+for this directory.
+
+for instance, we have:
+
+ # ls /sys/kernel/tracing/events/ftrace/function
+format  hist  hist_debug  id  inject
+
+and
+
+ # ls /sys/kernel/tracing/events/sched/sched_switch/
+enable  filter  format  hist  hist_debug  id  inject  trigger
+
+Where the function directory is missing "enable", "filter" and
+"trigger". That's because the callback() for events has:
+
+static int event_callback(const char *name, umode_t *mode, void **data,
+                         const struct file_operations **fops)
+{
+       struct trace_event_file *file = *data;
+       struct trace_event_call *call = file->event_call;
+
+[..]
+
+       /*
+        * Only event directories that can be enabled should have
+        * triggers or filters, with the exception of the "print"
+        * event that can have a "trigger" file.
+        */
+       if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+               if (call->class->reg && strcmp(name, "enable") == 0) {
+                       *mode = TRACE_MODE_WRITE;
+                       *fops = &ftrace_enable_fops;
+                       return 1;
+               }
+
+               if (strcmp(name, "filter") == 0) {
+                       *mode = TRACE_MODE_WRITE;
+                       *fops = &ftrace_event_filter_fops;
+                       return 1;
+               }
+       }
+
+       if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+           strcmp(trace_event_name(call), "print") == 0) {
+               if (strcmp(name, "trigger") == 0) {
+                       *mode = TRACE_MODE_WRITE;
+                       *fops = &event_trigger_fops;
+                       return 1;
+               }
+       }
+[..]
+       return 0;
+}
+
+Where the function event has the TRACE_EVENT_FL_IGNORE_ENABLE set.
+
+This means that the entries array elements for "enable", "filter" and
+"trigger" when called on the function event will have the callback return
+0 and not 1, to tell eventfs to skip these files for it.
+
+Because the "skip to current ctx->pos" check happened for all entries, but
+the ctx->pos++ only happened to entries that exist, it would confuse the
+reading of a directory. Which would cause:
+
+ # ls /sys/kernel/tracing/events/ftrace/function/
+format  hist  hist  hist_debug  hist_debug  id  inject  inject
+
+The missing "enable", "filter" and "trigger" caused ls to show "hist",
+"hist_debug" and "inject" twice.
+
+Update the ctx->pos for every iteration to keep its update and the "skip"
+update consistent. This also means that on error, the ctx->pos needs to be
+decremented if it was incremented without adding something.
+
+Link: https://lore.kernel.org/all/20240104150500.38b15a62@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.172295263@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 493ec81a8fb8e ("eventfs: Stop using dcache_readdir() for getdents()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -760,6 +760,8 @@ static int eventfs_iterate(struct file *
+                       continue;
+               }
++              ctx->pos++;
++
+               if (ei_child->is_freed)
+                       continue;
+@@ -767,13 +769,12 @@ static int eventfs_iterate(struct file *
+               dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+               if (!dentry)
+-                      goto out;
++                      goto out_dec;
+               ino = dentry->d_inode->i_ino;
+               dput(dentry);
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+-                      goto out;
+-              ctx->pos++;
++                      goto out_dec;
+       }
+       for (i = 0; i < ei->nr_entries; i++) {
+@@ -784,6 +785,8 @@ static int eventfs_iterate(struct file *
+                       continue;
+               }
++              ctx->pos++;
++
+               entry = &ei->entries[i];
+               name = entry->name;
+@@ -791,7 +794,7 @@ static int eventfs_iterate(struct file *
+               /* If ei->is_freed then just bail here, nothing more to do */
+               if (ei->is_freed) {
+                       mutex_unlock(&eventfs_mutex);
+-                      goto out;
++                      goto out_dec;
+               }
+               r = entry->callback(name, &mode, &cdata, &fops);
+               mutex_unlock(&eventfs_mutex);
+@@ -800,19 +803,23 @@ static int eventfs_iterate(struct file *
+               dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+               if (!dentry)
+-                      goto out;
++                      goto out_dec;
+               ino = dentry->d_inode->i_ino;
+               dput(dentry);
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+-                      goto out;
+-              ctx->pos++;
++                      goto out_dec;
+       }
+       ret = 1;
+  out:
+       srcu_read_unlock(&eventfs_srcu, idx);
+       return ret;
++
++ out_dec:
++      /* Incremented ctx->pos without adding something, reset it */
++      ctx->pos--;
++      goto out;
+ }
+ /**
diff --git a/queue-6.6/eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch b/queue-6.6/eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
new file mode 100644 (file)
index 0000000..6fb1e40
--- /dev/null
@@ -0,0 +1,56 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:53 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:33 -0500
+Subject: eventfs: Do not allow NULL parent to eventfs_start_creating()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.930155940@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit fc4561226feaad5fcdcb55646c348d77b8ee69c5 upstream.
+
+The eventfs directory is dynamically created via the meta data supplied by
+the existing trace events. All files and directories in eventfs has a
+parent. Do not allow NULL to be passed into eventfs_start_creating() as
+the parent because that should never happen. Warn if it does.
+
+Link: https://lkml.kernel.org/r/20231121231112.693841807@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c |   13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(co
+       struct dentry *dentry;
+       int error;
++      /* Must always have a parent. */
++      if (WARN_ON_ONCE(!parent))
++              return ERR_PTR(-EINVAL);
++
+       error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+                             &tracefs_mount_count);
+       if (error)
+               return ERR_PTR(error);
+-      /*
+-       * If the parent is not specified, we create it in the root.
+-       * We need the root dentry to do this, which is in the super
+-       * block. A pointer to that is in the struct vfsmount that we
+-       * have around.
+-       */
+-      if (!parent)
+-              parent = tracefs_mount->mnt_root;
+-
+       if (unlikely(IS_DEADDIR(parent->d_inode)))
+               dentry = ERR_PTR(-ENOENT);
+       else
diff --git a/queue-6.6/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch b/queue-6.6/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch
new file mode 100644 (file)
index 0000000..3b8ad6e
--- /dev/null
@@ -0,0 +1,144 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:48 -0500
+Subject: eventfs: Do not create dentries nor inodes in iterate_shared
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al  Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120953.380140896@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 852e46e239ee6db3cd220614cf8bce96e79227c2 upstream.
+
+The original eventfs code added a wrapper around the dcache_readdir open
+callback and created all the dentries and inodes at open, and increment
+their ref count. A wrapper was added around the dcache_readdir release
+function to decrement all the ref counts of those created inodes and
+dentries. But this proved to be buggy[1] for when a kprobe was created
+during a dir read, it would create a dentry between the open and the
+release, and because the release would decrement all ref counts of all
+files and directories, that would include the kprobe directory that was
+not there to have its ref count incremented in open. This would cause the
+ref count to go to negative and later crash the kernel.
+
+To solve this, the dentries and inodes that were created and had their ref
+count upped in open needed to be saved. That list needed to be passed from
+the open to the release, so that the release would only decrement the ref
+counts of the entries that were incremented in the open.
+
+Unfortunately, the dcache_readdir logic was already using the
+file->private_data, which is the only field that can be used to pass
+information from the open to the release. What was done was the eventfs
+created another descriptor that had a void pointer to save the
+dcache_readdir pointer, and it wrapped all the callbacks, so that it could
+save the list of entries that had their ref counts incremented in the
+open, and pass it to the release. The wrapped callbacks would just put
+back the dcache_readdir pointer and call the functions it used so it could
+still use its data[2].
+
+But Linus had an issue with the "hijacking" of the file->private_data
+(unfortunately this discussion was on a security list, so no public link).
+Which we finally agreed on doing everything within the iterate_shared
+callback and leave the dcache_readdir out of it[3]. All the information
+needed for the getents() could be created then.
+
+But this ended up being buggy too[4]. The iterate_shared callback was not
+the right place to create the dentries and inodes. Even Christian Brauner
+had issues with that[5].
+
+An attempt was to go back to creating the inodes and dentries at
+the open, create an array to store the information in the
+file->private_data, and pass that information to the other callbacks.[6]
+
+The difference between that and the original method, is that it does not
+use dcache_readdir. It also does not up the ref counts of the dentries and
+pass them. Instead, it creates an array of a structure that saves the
+dentry's name and inode number. That information is used in the
+iterate_shared callback, and the array is freed in the dir release. The
+dentries and inodes created in the open are not used for the iterate_share
+or release callbacks. Just their names and inode numbers.
+
+Linus did not like that either[7] and just wanted to remove the dentries
+being created in iterate_shared and use the hard coded inode numbers.
+
+[ All this while Linus enjoyed an unexpected vacation during the merge
+  window due to lack of power. ]
+
+[1] https://lore.kernel.org/linux-trace-kernel/20230919211804.230edf1e@gandalf.local.home/
+[2] https://lore.kernel.org/linux-trace-kernel/20230922163446.1431d4fa@gandalf.local.home/
+[3] https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org/
+[4] https://lore.kernel.org/all/202401152142.bfc28861-oliver.sang@intel.com/
+[5] https://lore.kernel.org/all/20240111-unzahl-gefegt-433acb8a841d@brauner/
+[6] https://lore.kernel.org/all/20240116114711.7e8637be@gandalf.local.home/
+[7] https://lore.kernel.org/all/20240116170154.5bf0a250@gandalf.local.home/
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.573784051@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al  Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: 493ec81a8fb8 ("eventfs: Stop using dcache_readdir() for getdents()")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401152142.bfc28861-oliver.sang@intel.com
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   20 +++++---------------
+ 1 file changed, 5 insertions(+), 15 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -727,8 +727,6 @@ static int eventfs_iterate(struct file *
+       struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+-      struct dentry *ei_dentry = NULL;
+-      struct dentry *dentry;
+       const char *name;
+       umode_t mode;
+       int idx;
+@@ -749,11 +747,11 @@ static int eventfs_iterate(struct file *
+       mutex_lock(&eventfs_mutex);
+       ei = READ_ONCE(ti->private);
+-      if (ei && !ei->is_freed)
+-              ei_dentry = READ_ONCE(ei->dentry);
++      if (ei && ei->is_freed)
++              ei = NULL;
+       mutex_unlock(&eventfs_mutex);
+-      if (!ei || !ei_dentry)
++      if (!ei)
+               goto out;
+       /*
+@@ -780,11 +778,7 @@ static int eventfs_iterate(struct file *
+               if (r <= 0)
+                       continue;
+-              dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+-              if (!dentry)
+-                      goto out;
+-              ino = dentry->d_inode->i_ino;
+-              dput(dentry);
++              ino = EVENTFS_FILE_INODE_INO;
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+                       goto out;
+@@ -808,11 +802,7 @@ static int eventfs_iterate(struct file *
+               name = ei_child->name;
+-              dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+-              if (!dentry)
+-                      goto out_dec;
+-              ino = dentry->d_inode->i_ino;
+-              dput(dentry);
++              ino = EVENTFS_DIR_INODE_INO;
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+                       goto out_dec;
diff --git a/queue-6.6/eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch b/queue-6.6/eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch
new file mode 100644 (file)
index 0000000..b86238a
--- /dev/null
@@ -0,0 +1,95 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:46 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:30 -0500
+Subject: eventfs: Do not invalidate dentry in create_file/dir_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Naresh Kamboju <naresh.kamboju@linaro.org>, Linux Kernel Functional Testing <lkft@linaro.org>
+Message-ID: <20240206120950.446963304@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 71cade82f2b553a74d046c015c986f2df165696f upstream.
+
+With the call to simple_recursive_removal() on the entire eventfs sub
+system when the directory is removed, it performs the d_invalidate on all
+the dentries when it is removed. There's no need to do clean ups when a
+dentry is being created while the directory is being deleted.
+
+As dentries are cleaned up by the simpler_recursive_removal(), trying to
+do d_invalidate() in these functions will cause the dentry to be
+invalidated twice, and crash the kernel.
+
+Link: https://lore.kernel.org/all/20231116123016.140576-1-naresh.kamboju@linaro.org/
+Link: https://lkml.kernel.org/r/20231120235154.422970988@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 407c6726ca71 ("eventfs: Use simple_recursive_removal() to clean up dentries")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   19 ++++++-------------
+ 1 file changed, 6 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -326,7 +326,6 @@ create_file_dentry(struct eventfs_inode
+       struct eventfs_attr *attr = NULL;
+       struct dentry **e_dentry = &ei->d_children[idx];
+       struct dentry *dentry;
+-      bool invalidate = false;
+       mutex_lock(&eventfs_mutex);
+       if (ei->is_freed) {
+@@ -389,17 +388,14 @@ create_file_dentry(struct eventfs_inode
+                * Otherwise it means two dentries exist with the same name.
+                */
+               WARN_ON_ONCE(!ei->is_freed);
+-              invalidate = true;
++              dentry = NULL;
+       }
+       mutex_unlock(&eventfs_mutex);
+-      if (invalidate)
+-              d_invalidate(dentry);
+-
+-      if (lookup || invalidate)
++      if (lookup)
+               dput(dentry);
+-      return invalidate ? NULL : dentry;
++      return dentry;
+ }
+ /**
+@@ -439,7 +435,6 @@ static struct dentry *
+ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+                 struct dentry *parent, bool lookup)
+ {
+-      bool invalidate = false;
+       struct dentry *dentry = NULL;
+       mutex_lock(&eventfs_mutex);
+@@ -495,16 +490,14 @@ create_dir_dentry(struct eventfs_inode *
+                * Otherwise it means two dentries exist with the same name.
+                */
+               WARN_ON_ONCE(!ei->is_freed);
+-              invalidate = true;
++              dentry = NULL;
+       }
+       mutex_unlock(&eventfs_mutex);
+-      if (invalidate)
+-              d_invalidate(dentry);
+-      if (lookup || invalidate)
++      if (lookup)
+               dput(dentry);
+-      return invalidate ? NULL : dentry;
++      return dentry;
+ }
+ /**
diff --git a/queue-6.6/eventfs-fix-bitwise-fields-for-is_events.patch b/queue-6.6/eventfs-fix-bitwise-fields-for-is_events.patch
new file mode 100644 (file)
index 0000000..e772c88
--- /dev/null
@@ -0,0 +1,41 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:54 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:39 -0500
+Subject: eventfs: Fix bitwise fields for "is_events"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120951.901001747@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit fd56cd5f6d76e93356d9520cf9dabffe1e3d1aa0 upstream.
+
+A flag was needed to denote which eventfs_inode was the "events"
+directory, so a bit was taken from the "nr_entries" field, as there's not
+that many entries, and 2^30 is plenty. But the bit number for nr_entries
+was not updated to reflect the bit taken from it, which would add an
+unnecessary integer to the structure.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240102151832.7ca87275@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7e8358edf503e ("eventfs: Fix file and directory uid and gid ownership")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -63,7 +63,7 @@ struct eventfs_inode {
+       };
+       unsigned int                    is_freed:1;
+       unsigned int                    is_events:1;
+-      unsigned int                    nr_entries:31;
++      unsigned int                    nr_entries:30;
+ };
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
diff --git a/queue-6.6/eventfs-fix-events-beyond-name_max-blocking-tasks.patch b/queue-6.6/eventfs-fix-events-beyond-name_max-blocking-tasks.patch
new file mode 100644 (file)
index 0000000..78fe13b
--- /dev/null
@@ -0,0 +1,52 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:23 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:35 -0500
+Subject: eventfs: Fix events beyond NAME_MAX blocking tasks
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Beau Belgrave <beaub@linux.microsoft.com>
+Message-ID: <20240206120951.255146556@rostedt.homelinux.com>
+
+From: Beau Belgrave <beaub@linux.microsoft.com>
+
+commit 5eaf7f0589c0d88178f0fbeebe0e0b7108258707 upstream.
+
+Eventfs uses simple_lookup(), however, it will fail if the name of the
+entry is beyond NAME_MAX length. When this error is encountered, eventfs
+still tries to create dentries instead of skipping the dentry creation.
+When the dentry is attempted to be created in this state d_wait_lookup()
+will loop forever, waiting for the lookup to be removed.
+
+Fix eventfs to return the error in simple_lookup() back to the caller
+instead of continuing to try to create the dentry.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231210213534.497-1-beaub@linux.microsoft.com
+
+Fixes: 63940449555e ("eventfs: Implement eventfs lookup, read, open functions")
+Link: https://lore.kernel.org/linux-trace-kernel/20231208183601.GA46-beaub@linux.microsoft.com/
+Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -546,6 +546,8 @@ static struct dentry *eventfs_root_looku
+               if (strcmp(ei_child->name, name) != 0)
+                       continue;
+               ret = simple_lookup(dir, dentry, flags);
++              if (IS_ERR(ret))
++                      goto out;
+               create_dir_dentry(ei, ei_child, ei_dentry, true);
+               created = true;
+               break;
+@@ -568,6 +570,8 @@ static struct dentry *eventfs_root_looku
+                       if (r <= 0)
+                               continue;
+                       ret = simple_lookup(dir, dentry, flags);
++                      if (IS_ERR(ret))
++                              goto out;
+                       create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+                                          fops, true);
+                       break;
diff --git a/queue-6.6/eventfs-fix-failure-path-in-eventfs_create_events_dir.patch b/queue-6.6/eventfs-fix-failure-path-in-eventfs_create_events_dir.patch
new file mode 100644 (file)
index 0000000..0dbc391
--- /dev/null
@@ -0,0 +1,48 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:15 -0500
+Subject: eventfs: Fix failure path in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Julia Lawall <julia.lawall@inria.fr>
+Message-ID: <20240206120948.003109160@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 7e8ad67c9b5c11e990c320ed7e7563f2301672a7 upstream.
+
+The failure path of allocating ei goes to a path that dereferences ei.
+Add another label that skips over the ei dereferences to do the rest of
+the clean up.
+
+Link: https://lore.kernel.org/all/70e7bace-561c-95f-1117-706c2c220bc@inria.fr/
+Link: https://lore.kernel.org/linux-trace-kernel/20231019204132.6662fef0@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -735,7 +735,7 @@ struct eventfs_inode *eventfs_create_eve
+       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+       if (!ei)
+-              goto fail;
++              goto fail_ei;
+       inode = tracefs_get_inode(dentry->d_sb);
+       if (unlikely(!inode))
+@@ -781,6 +781,7 @@ struct eventfs_inode *eventfs_create_eve
+  fail:
+       kfree(ei->d_children);
+       kfree(ei);
++ fail_ei:
+       tracefs_failed_creating(dentry);
+       return ERR_PTR(-ENOMEM);
+ }
diff --git a/queue-6.6/eventfs-fix-file-and-directory-uid-and-gid-ownership.patch b/queue-6.6/eventfs-fix-file-and-directory-uid-and-gid-ownership.patch
new file mode 100644 (file)
index 0000000..ffb835c
--- /dev/null
@@ -0,0 +1,321 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:44 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:37 -0500
+Subject: eventfs: Fix file and directory uid and gid ownership
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Dongliang Cui <cuidongliang390@gmail.com>, Hongyu Jin <hongyu.jin@unisoc.com>
+Message-ID: <20240206120951.578630343@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 7e8358edf503e87236c8d07f69ef0ed846dd5112 upstream.
+
+It was reported that when mounting the tracefs file system with a gid
+other than root, the ownership did not carry down to the eventfs directory
+due to the dynamic nature of it.
+
+A fix was done to solve this, but it had two issues.
+
+(a) if the attr passed into update_inode_attr() was NULL, it didn't do
+    anything. This is true for files that have not had a chown or chgrp
+    done to itself or any of its sibling files, as the attr is allocated
+    for all children when any one needs it.
+
+ # umount /sys/kernel/tracing
+ # mount -o rw,seclabel,relatime,gid=1000 -t tracefs nodev /mnt
+
+ # ls -ld /mnt/events/sched
+drwxr-xr-x 28 root rostedt 0 Dec 21 13:12 /mnt/events/sched/
+
+ # ls -ld /mnt/events/sched/sched_switch
+drwxr-xr-x 2 root rostedt 0 Dec 21 13:12 /mnt/events/sched/sched_switch/
+
+But when checking the files:
+
+ # ls -l /mnt/events/sched/sched_switch
+total 0
+-rw-r----- 1 root root 0 Dec 21 13:12 enable
+-rw-r----- 1 root root 0 Dec 21 13:12 filter
+-r--r----- 1 root root 0 Dec 21 13:12 format
+-r--r----- 1 root root 0 Dec 21 13:12 hist
+-r--r----- 1 root root 0 Dec 21 13:12 id
+-rw-r----- 1 root root 0 Dec 21 13:12 trigger
+
+(b) When the attr does not denote the UID or GID, it defaulted to using
+    the parent uid or gid. This is incorrect as changing the parent
+    uid or gid will automatically change all its children.
+
+ # chgrp tracing /mnt/events/timer
+
+ # ls -ld /mnt/events/timer
+drwxr-xr-x 2 root tracing 0 Dec 21 14:34 /mnt/events/timer
+
+ # ls -l /mnt/events/timer
+total 0
+-rw-r----- 1 root root    0 Dec 21 14:35 enable
+-rw-r----- 1 root root    0 Dec 21 14:35 filter
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_cancel
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_expire_entry
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_expire_exit
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_init
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 hrtimer_start
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 itimer_expire
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 itimer_state
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 tick_stop
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_cancel
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_expire_entry
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_expire_exit
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_init
+drwxr-xr-x 2 root tracing 0 Dec 21 14:35 timer_start
+
+At first it was thought that this could be easily fixed by just making the
+default ownership of the superblock when it was mounted. But this does not
+handle the case of:
+
+ # chgrp tracing instances
+ # mkdir instances/foo
+
+If the superblock was used, then the group ownership would be that of what
+it was when it was mounted, when it should instead be "tracing".
+
+Instead, set a flag for the top level eventfs directory ("events") to flag
+which eventfs_inode belongs to it.
+
+Since the "events" directory's dentry and inode are never freed, it does
+not need to use its attr field to restore its mode and ownership. Use the
+this eventfs_inode's attr as the default ownership for all the files and
+directories underneath it.
+
+When the events eventfs_inode is created, it sets its ownership to its
+parent uid and gid. As the events directory is created at boot up before
+it gets mounted, this will always be uid=0 and gid=0. If it's created via
+an instance, then it will take the ownership of the instance directory.
+
+When the file system is mounted, it will update all the gids if one is
+specified. This will have a callback to update the events evenfs_inode's
+default entries.
+
+When a file or directory is created under the events directory, it will
+walk the ei->dentry parents until it finds the evenfs_inode that belongs
+to the events directory to retrieve the default uid and gid values.
+
+Link: https://lore.kernel.org/all/CAHk-=wiwQtUHvzwyZucDq8=Gtw+AnwScyLhpFswrQ84PjhoGsg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20231221190757.7eddbca9@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Dongliang Cui <cuidongliang390@gmail.com>
+Cc: Hongyu Jin  <hongyu.jin@unisoc.com>
+Fixes: 0dfc852b6fe3 ("eventfs: Have event files and directories default to parent uid and gid")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  105 ++++++++++++++++++++++++++++++++++++++++++-----
+ fs/tracefs/inode.c       |    6 ++
+ fs/tracefs/internal.h    |    2 
+ 3 files changed, 103 insertions(+), 10 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -113,7 +113,14 @@ static int eventfs_set_attr(struct mnt_i
+        * determined by the parent directory.
+        */
+       if (dentry->d_inode->i_mode & S_IFDIR) {
+-              update_attr(&ei->attr, iattr);
++              /*
++               * The events directory dentry is never freed, unless its
++               * part of an instance that is deleted. It's attr is the
++               * default for its child files and directories.
++               * Do not update it. It's not used for its own mode or ownership
++               */
++              if (!ei->is_events)
++                      update_attr(&ei->attr, iattr);
+       } else {
+               name = dentry->d_name.name;
+@@ -148,28 +155,93 @@ static const struct file_operations even
+       .release        = eventfs_release,
+ };
++/* Return the evenfs_inode of the "events" directory */
++static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
++{
++      struct eventfs_inode *ei;
++
++      mutex_lock(&eventfs_mutex);
++      do {
++              /* The parent always has an ei, except for events itself */
++              ei = dentry->d_parent->d_fsdata;
++
++              /*
++               * If the ei is being freed, the ownership of the children
++               * doesn't matter.
++               */
++              if (ei->is_freed) {
++                      ei = NULL;
++                      break;
++              }
++
++              dentry = ei->dentry;
++      } while (!ei->is_events);
++      mutex_unlock(&eventfs_mutex);
++
++      return ei;
++}
++
+ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
+                             struct eventfs_attr *attr, umode_t mode)
+ {
+-      if (!attr) {
+-              inode->i_mode = mode;
++      struct eventfs_inode *events_ei = eventfs_find_events(dentry);
++
++      if (!events_ei)
++              return;
++
++      inode->i_mode = mode;
++      inode->i_uid = events_ei->attr.uid;
++      inode->i_gid = events_ei->attr.gid;
++
++      if (!attr)
+               return;
+-      }
+       if (attr->mode & EVENTFS_SAVE_MODE)
+               inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
+-      else
+-              inode->i_mode = mode;
+       if (attr->mode & EVENTFS_SAVE_UID)
+               inode->i_uid = attr->uid;
+-      else
+-              inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+       if (attr->mode & EVENTFS_SAVE_GID)
+               inode->i_gid = attr->gid;
+-      else
+-              inode->i_gid = d_inode(dentry->d_parent)->i_gid;
++}
++
++static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
++{
++      struct eventfs_inode *ei_child;
++
++      /* at most we have events/system/event */
++      if (WARN_ON_ONCE(level > 3))
++              return;
++
++      ei->attr.gid = gid;
++
++      if (ei->entry_attrs) {
++              for (int i = 0; i < ei->nr_entries; i++) {
++                      ei->entry_attrs[i].gid = gid;
++              }
++      }
++
++      /*
++       * Only eventfs_inode with dentries are updated, make sure
++       * all eventfs_inodes are updated. If one of the children
++       * do not have a dentry, this function must traverse it.
++       */
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
++                               srcu_read_lock_held(&eventfs_srcu)) {
++              if (!ei_child->dentry)
++                      update_gid(ei_child, gid, level + 1);
++      }
++}
++
++void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
++{
++      struct eventfs_inode *ei = dentry->d_fsdata;
++      int idx;
++
++      idx = srcu_read_lock(&eventfs_srcu);
++      update_gid(ei, gid, 0);
++      srcu_read_unlock(&eventfs_srcu, idx);
+ }
+ /**
+@@ -860,6 +932,8 @@ struct eventfs_inode *eventfs_create_eve
+       struct eventfs_inode *ei;
+       struct tracefs_inode *ti;
+       struct inode *inode;
++      kuid_t uid;
++      kgid_t gid;
+       if (security_locked_down(LOCKDOWN_TRACEFS))
+               return NULL;
+@@ -884,11 +958,20 @@ struct eventfs_inode *eventfs_create_eve
+       ei->dentry = dentry;
+       ei->entries = entries;
+       ei->nr_entries = size;
++      ei->is_events = 1;
+       ei->data = data;
+       ei->name = kstrdup_const(name, GFP_KERNEL);
+       if (!ei->name)
+               goto fail;
++      /* Save the ownership of this directory */
++      uid = d_inode(dentry->d_parent)->i_uid;
++      gid = d_inode(dentry->d_parent)->i_gid;
++
++      /* This is used as the default ownership of the files and directories */
++      ei->attr.uid = uid;
++      ei->attr.gid = gid;
++
+       INIT_LIST_HEAD(&ei->children);
+       INIT_LIST_HEAD(&ei->list);
+@@ -897,6 +980,8 @@ struct eventfs_inode *eventfs_create_eve
+       ti->private = ei;
+       inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
++      inode->i_uid = uid;
++      inode->i_gid = gid;
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -210,6 +210,7 @@ repeat:
+       next = this_parent->d_subdirs.next;
+ resume:
+       while (next != &this_parent->d_subdirs) {
++              struct tracefs_inode *ti;
+               struct list_head *tmp = next;
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+               next = tmp->next;
+@@ -218,6 +219,11 @@ resume:
+               change_gid(dentry, gid);
++              /* If this is the events directory, update that too */
++              ti = get_tracefs(dentry->d_inode);
++              if (ti && (ti->flags & TRACEFS_EVENT_INODE))
++                      eventfs_update_gid(dentry, gid);
++
+               if (!list_empty(&dentry->d_subdirs)) {
+                       spin_unlock(&this_parent->d_lock);
+                       spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -62,6 +62,7 @@ struct eventfs_inode {
+               struct rcu_head         rcu;
+       };
+       unsigned int                    is_freed:1;
++      unsigned int                    is_events:1;
+       unsigned int                    nr_entries:31;
+ };
+@@ -77,6 +78,7 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
++void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+ #endif /* _TRACEFS_INTERNAL_H */
diff --git a/queue-6.6/eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch b/queue-6.6/eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch
new file mode 100644 (file)
index 0000000..09459e2
--- /dev/null
@@ -0,0 +1,45 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:20 -0500
+Subject: eventfs: Fix kerneldoc of eventfs_remove_rec()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, kernel test robot <lkp@intel.com>
+Message-ID: <20240206120948.816153049@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 9037caa09ed345b35325200f0e4acf5a94ae0a65 upstream.
+
+The eventfs_remove_rec() had some missing parameters in the kerneldoc
+comment above it. Also, rephrase the description a bit more to have a bit
+more correct grammar.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231030121523.0b2225a7@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode");
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202310052216.4SgqasWo-lkp@intel.com/
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -796,9 +796,11 @@ static void free_ei(struct rcu_head *hea
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
++ * @head: the list head to place the deleted @ei and children
++ * @level: prevent recursion from going more than 3 levels deep.
+  *
+- * This function recursively remove eventfs_inode which
+- * contains info of file or dir.
++ * This function recursively removes eventfs_inodes which
++ * contains info of files and/or directories.
+  */
+ static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
+ {
diff --git a/queue-6.6/eventfs-fix-typo-in-eventfs_inode-union-comment.patch b/queue-6.6/eventfs-fix-typo-in-eventfs_inode-union-comment.patch
new file mode 100644 (file)
index 0000000..8fc5946
--- /dev/null
@@ -0,0 +1,36 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:18 -0500
+Subject: eventfs: Fix typo in eventfs_inode union comment
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.496559787@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 29e06c10702e81a7d0b75020ca514d2f2962704a upstream.
+
+It's eventfs_inode not eventfs_indoe. There's no deer involved!
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231024131024.5634c743@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -37,7 +37,7 @@ struct eventfs_inode {
+       /*
+        * Union - used for deletion
+        * @del_list:   list of eventfs_inode to delete
+-       * @rcu:        eventfs_indoe to delete in RCU
++       * @rcu:        eventfs_inode to delete in RCU
+        * @is_freed:   node is freed if one of the above is set
+        */
+       union {
diff --git a/queue-6.6/eventfs-fix-warn_on-in-create_file_dentry.patch b/queue-6.6/eventfs-fix-warn_on-in-create_file_dentry.patch
new file mode 100644 (file)
index 0000000..f9acd56
--- /dev/null
@@ -0,0 +1,52 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:17 -0500
+Subject: eventfs: Fix WARN_ON() in create_file_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.331907337@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit a9de4eb15ad430fe45747c211e367da745a90093 upstream.
+
+As the comment right above a WARN_ON() in create_file_dentry() states:
+
+  * Note, with the mutex held, the e_dentry cannot have content
+  * and the ei->is_freed be true at the same time.
+
+But the WARN_ON() only has:
+
+  WARN_ON_ONCE(ei->is_free);
+
+Where to match the comment (and what it should actually do) is:
+
+  dentry = *e_dentry;
+  WARN_ON_ONCE(dentry && ei->is_free)
+
+Also in that case, set dentry to NULL (although it should never happen).
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231024123628.62b88755@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -264,8 +264,9 @@ create_file_dentry(struct eventfs_inode
+                * Note, with the mutex held, the e_dentry cannot have content
+                * and the ei->is_freed be true at the same time.
+                */
+-              WARN_ON_ONCE(ei->is_freed);
+               dentry = *e_dentry;
++              if (WARN_ON_ONCE(dentry && ei->is_freed))
++                      dentry = NULL;
+               /* The lookup does not need to up the dentry refcount */
+               if (dentry && !lookup)
+                       dget(dentry);
diff --git a/queue-6.6/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch b/queue-6.6/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch
new file mode 100644 (file)
index 0000000..2fe76b7
--- /dev/null
@@ -0,0 +1,532 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:47 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:58 -0500
+Subject: eventfs: Get rid of dentry pointers without refcounts
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.006367557@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 43aa6f97c2d03a52c1ddb86768575fc84344bdbb upstream.
+
+The eventfs inode had pointers to dentries (and child dentries) without
+actually holding a refcount on said pointer.  That is fundamentally
+broken, and while eventfs tried to then maintain coherence with dentries
+going away by hooking into the '.d_iput' callback, that doesn't actually
+work since it's not ordered wrt lookups.
+
+There were two reasonms why eventfs tried to keep a pointer to a dentry:
+
+ - the creation of a 'events' directory would actually have a stable
+   dentry pointer that it created with tracefs_start_creating().
+
+   And it needed that dentry when tearing it all down again in
+   eventfs_remove_events_dir().
+
+   This use is actually ok, because the special top-level events
+   directory dentries are actually stable, not just a temporary cache of
+   the eventfs data structures.
+
+ - the 'eventfs_inode' (aka ei) needs to stay around as long as there
+   are dentries that refer to it.
+
+   It then used these dentry pointers as a replacement for doing
+   reference counting: it would try to make sure that there was only
+   ever one dentry associated with an event_inode, and keep a child
+   dentry array around to see which dentries might still refer to the
+   parent ei.
+
+This gets rid of the invalid dentry pointer use, and renames the one
+valid case to a different name to make it clear that it's not just any
+random dentry.
+
+The magic child dentry array that is kind of a "reverse reference list"
+is simply replaced by having child dentries take a ref to the ei.  As
+does the directory dentries.  That makes the broken use case go away.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.280463000@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  248 ++++++++++++++---------------------------------
+ fs/tracefs/internal.h    |    7 -
+ 2 files changed, 78 insertions(+), 177 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -62,6 +62,35 @@ enum {
+ #define EVENTFS_MODE_MASK     (EVENTFS_SAVE_MODE - 1)
++/*
++ * eventfs_inode reference count management.
++ *
++ * NOTE! We count only references from dentries, in the
++ * form 'dentry->d_fsdata'. There are also references from
++ * directory inodes ('ti->private'), but the dentry reference
++ * count is always a superset of the inode reference count.
++ */
++static void release_ei(struct kref *ref)
++{
++      struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
++      kfree(ei->entry_attrs);
++      kfree_const(ei->name);
++      kfree_rcu(ei, rcu);
++}
++
++static inline void put_ei(struct eventfs_inode *ei)
++{
++      if (ei)
++              kref_put(&ei->kref, release_ei);
++}
++
++static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
++{
++      if (ei)
++              kref_get(&ei->kref);
++      return ei;
++}
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+                                         struct dentry *dentry,
+                                         unsigned int flags);
+@@ -289,7 +318,8 @@ static void update_inode_attr(struct den
+  * directory. The inode.i_private pointer will point to @data in the open()
+  * call.
+  */
+-static struct dentry *lookup_file(struct dentry *dentry,
++static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
++                                struct dentry *dentry,
+                                 umode_t mode,
+                                 struct eventfs_attr *attr,
+                                 void *data,
+@@ -302,7 +332,7 @@ static struct dentry *lookup_file(struct
+               mode |= S_IFREG;
+       if (WARN_ON_ONCE(!S_ISREG(mode)))
+-              return NULL;
++              return ERR_PTR(-EIO);
+       inode = tracefs_get_inode(dentry->d_sb);
+       if (unlikely(!inode))
+@@ -321,9 +351,12 @@ static struct dentry *lookup_file(struct
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
++      // Files have their parent's ei as their fsdata
++      dentry->d_fsdata = get_ei(parent_ei);
++
+       d_add(dentry, inode);
+       fsnotify_create(dentry->d_parent->d_inode, dentry);
+-      return dentry;
++      return NULL;
+ };
+ /**
+@@ -359,22 +392,29 @@ static struct dentry *lookup_dir_entry(s
+       /* Only directories have ti->private set to an ei, not files */
+       ti->private = ei;
+-      dentry->d_fsdata = ei;
+-        ei->dentry = dentry;  // Remove me!
++      dentry->d_fsdata = get_ei(ei);
+       inc_nlink(inode);
+       d_add(dentry, inode);
+       inc_nlink(dentry->d_parent->d_inode);
+       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+-      return dentry;
++      return NULL;
+ }
+-static void free_ei(struct eventfs_inode *ei)
++static inline struct eventfs_inode *alloc_ei(const char *name)
+ {
+-      kfree_const(ei->name);
+-      kfree(ei->d_children);
+-      kfree(ei->entry_attrs);
+-      kfree(ei);
++      struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++
++      if (!ei)
++              return NULL;
++
++      ei->name = kstrdup_const(name, GFP_KERNEL);
++      if (!ei->name) {
++              kfree(ei);
++              return NULL;
++      }
++      kref_init(&ei->kref);
++      return ei;
+ }
+ /**
+@@ -385,39 +425,13 @@ static void free_ei(struct eventfs_inode
+  */
+ void eventfs_d_release(struct dentry *dentry)
+ {
+-      struct eventfs_inode *ei;
+-      int i;
+-
+-      mutex_lock(&eventfs_mutex);
+-
+-      ei = dentry->d_fsdata;
+-      if (!ei)
+-              goto out;
+-
+-      /* This could belong to one of the files of the ei */
+-      if (ei->dentry != dentry) {
+-              for (i = 0; i < ei->nr_entries; i++) {
+-                      if (ei->d_children[i] == dentry)
+-                              break;
+-              }
+-              if (WARN_ON_ONCE(i == ei->nr_entries))
+-                      goto out;
+-              ei->d_children[i] = NULL;
+-      } else if (ei->is_freed) {
+-              free_ei(ei);
+-      } else {
+-              ei->dentry = NULL;
+-      }
+-
+-      dentry->d_fsdata = NULL;
+- out:
+-      mutex_unlock(&eventfs_mutex);
++      put_ei(dentry->d_fsdata);
+ }
+ /**
+  * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+  * @ei: the eventfs_inode that the file will be created under
+- * @idx: the index into the d_children[] of the @ei
++ * @idx: the index into the entry_attrs[] of the @ei
+  * @parent: The parent dentry of the created file.
+  * @name: The name of the file to create
+  * @mode: The mode of the file.
+@@ -434,17 +448,11 @@ lookup_file_dentry(struct dentry *dentry
+                  const struct file_operations *fops)
+ {
+       struct eventfs_attr *attr = NULL;
+-      struct dentry **e_dentry = &ei->d_children[idx];
+       if (ei->entry_attrs)
+               attr = &ei->entry_attrs[idx];
+-      dentry->d_fsdata = ei;          // NOTE: ei of _parent_
+-      lookup_file(dentry, mode, attr, data, fops);
+-
+-      *e_dentry = dentry;     // Remove me
+-
+-      return dentry;
++      return lookup_file(ei, dentry, mode, attr, data, fops);
+ }
+ /**
+@@ -465,6 +473,7 @@ static struct dentry *eventfs_root_looku
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+       const char *name = dentry->d_name.name;
++      struct dentry *result = NULL;
+       ti = get_tracefs(dir);
+       if (!(ti->flags & TRACEFS_EVENT_INODE))
+@@ -481,7 +490,7 @@ static struct dentry *eventfs_root_looku
+                       continue;
+               if (ei_child->is_freed)
+                       goto out;
+-              lookup_dir_entry(dentry, ei, ei_child);
++              result = lookup_dir_entry(dentry, ei, ei_child);
+               goto out;
+       }
+@@ -498,12 +507,12 @@ static struct dentry *eventfs_root_looku
+               if (entry->callback(name, &mode, &data, &fops) <= 0)
+                       goto out;
+-              lookup_file_dentry(dentry, ei, i, mode, data, fops);
++              result = lookup_file_dentry(dentry, ei, i, mode, data, fops);
+               goto out;
+       }
+  out:
+       mutex_unlock(&eventfs_mutex);
+-      return NULL;
++      return result;
+ }
+ /*
+@@ -653,25 +662,10 @@ struct eventfs_inode *eventfs_create_dir
+       if (!parent)
+               return ERR_PTR(-EINVAL);
+-      ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++      ei = alloc_ei(name);
+       if (!ei)
+               return ERR_PTR(-ENOMEM);
+-      ei->name = kstrdup_const(name, GFP_KERNEL);
+-      if (!ei->name) {
+-              kfree(ei);
+-              return ERR_PTR(-ENOMEM);
+-      }
+-
+-      if (size) {
+-              ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+-              if (!ei->d_children) {
+-                      kfree_const(ei->name);
+-                      kfree(ei);
+-                      return ERR_PTR(-ENOMEM);
+-              }
+-      }
+-
+       ei->entries = entries;
+       ei->nr_entries = size;
+       ei->data = data;
+@@ -685,7 +679,7 @@ struct eventfs_inode *eventfs_create_dir
+       /* Was the parent freed? */
+       if (list_empty(&ei->list)) {
+-              free_ei(ei);
++              put_ei(ei);
+               ei = NULL;
+       }
+       return ei;
+@@ -720,28 +714,20 @@ struct eventfs_inode *eventfs_create_eve
+       if (IS_ERR(dentry))
+               return ERR_CAST(dentry);
+-      ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++      ei = alloc_ei(name);
+       if (!ei)
+-              goto fail_ei;
++              goto fail;
+       inode = tracefs_get_inode(dentry->d_sb);
+       if (unlikely(!inode))
+               goto fail;
+-      if (size) {
+-              ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+-              if (!ei->d_children)
+-                      goto fail;
+-      }
+-
+-      ei->dentry = dentry;
++      // Note: we have a ref to the dentry from tracefs_start_creating()
++      ei->events_dir = dentry;
+       ei->entries = entries;
+       ei->nr_entries = size;
+       ei->is_events = 1;
+       ei->data = data;
+-      ei->name = kstrdup_const(name, GFP_KERNEL);
+-      if (!ei->name)
+-              goto fail;
+       /* Save the ownership of this directory */
+       uid = d_inode(dentry->d_parent)->i_uid;
+@@ -772,7 +758,7 @@ struct eventfs_inode *eventfs_create_eve
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
+-      dentry->d_fsdata = ei;
++      dentry->d_fsdata = get_ei(ei);
+       /* directory inodes start off with i_nlink == 2 (for "." entry) */
+       inc_nlink(inode);
+@@ -784,72 +770,11 @@ struct eventfs_inode *eventfs_create_eve
+       return ei;
+  fail:
+-      kfree(ei->d_children);
+-      kfree(ei);
+- fail_ei:
++      put_ei(ei);
+       tracefs_failed_creating(dentry);
+       return ERR_PTR(-ENOMEM);
+ }
+-static LLIST_HEAD(free_list);
+-
+-static void eventfs_workfn(struct work_struct *work)
+-{
+-        struct eventfs_inode *ei, *tmp;
+-        struct llist_node *llnode;
+-
+-      llnode = llist_del_all(&free_list);
+-        llist_for_each_entry_safe(ei, tmp, llnode, llist) {
+-              /* This dput() matches the dget() from unhook_dentry() */
+-              for (int i = 0; i < ei->nr_entries; i++) {
+-                      if (ei->d_children[i])
+-                              dput(ei->d_children[i]);
+-              }
+-              /* This should only get here if it had a dentry */
+-              if (!WARN_ON_ONCE(!ei->dentry))
+-                      dput(ei->dentry);
+-        }
+-}
+-
+-static DECLARE_WORK(eventfs_work, eventfs_workfn);
+-
+-static void free_rcu_ei(struct rcu_head *head)
+-{
+-      struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+-
+-      if (ei->dentry) {
+-              /* Do not free the ei until all references of dentry are gone */
+-              if (llist_add(&ei->llist, &free_list))
+-                      queue_work(system_unbound_wq, &eventfs_work);
+-              return;
+-      }
+-
+-      /* If the ei doesn't have a dentry, neither should its children */
+-      for (int i = 0; i < ei->nr_entries; i++) {
+-              WARN_ON_ONCE(ei->d_children[i]);
+-      }
+-
+-      free_ei(ei);
+-}
+-
+-static void unhook_dentry(struct dentry *dentry)
+-{
+-      if (!dentry)
+-              return;
+-      /*
+-       * Need to add a reference to the dentry that is expected by
+-       * simple_recursive_removal(), which will include a dput().
+-       */
+-      dget(dentry);
+-
+-      /*
+-       * Also add a reference for the dput() in eventfs_workfn().
+-       * That is required as that dput() will free the ei after
+-       * the SRCU grace period is over.
+-       */
+-      dget(dentry);
+-}
+-
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
+@@ -862,8 +787,6 @@ static void eventfs_remove_rec(struct ev
+ {
+       struct eventfs_inode *ei_child;
+-      if (!ei)
+-              return;
+       /*
+        * Check recursion depth. It should never be greater than 3:
+        * 0 - events/
+@@ -875,28 +798,12 @@ static void eventfs_remove_rec(struct ev
+               return;
+       /* search for nested folders or files */
+-      list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                               lockdep_is_held(&eventfs_mutex)) {
+-              /* Children only have dentry if parent does */
+-              WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
++      list_for_each_entry(ei_child, &ei->children, list)
+               eventfs_remove_rec(ei_child, level + 1);
+-      }
+-
+       ei->is_freed = 1;
+-
+-      for (int i = 0; i < ei->nr_entries; i++) {
+-              if (ei->d_children[i]) {
+-                      /* Children only have dentry if parent does */
+-                      WARN_ON_ONCE(!ei->dentry);
+-                      unhook_dentry(ei->d_children[i]);
+-              }
+-      }
+-
+-      unhook_dentry(ei->dentry);
+-
+-      list_del_rcu(&ei->list);
+-      call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
++      list_del(&ei->list);
++      put_ei(ei);
+ }
+ /**
+@@ -907,22 +814,12 @@ static void eventfs_remove_rec(struct ev
+  */
+ void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+-      struct dentry *dentry;
+-
+       if (!ei)
+               return;
+       mutex_lock(&eventfs_mutex);
+-      dentry = ei->dentry;
+       eventfs_remove_rec(ei, 0);
+       mutex_unlock(&eventfs_mutex);
+-
+-      /*
+-       * If any of the ei children has a dentry, then the ei itself
+-       * must have a dentry.
+-       */
+-      if (dentry)
+-              simple_recursive_removal(dentry, NULL);
+ }
+ /**
+@@ -935,7 +832,11 @@ void eventfs_remove_events_dir(struct ev
+ {
+       struct dentry *dentry;
+-      dentry = ei->dentry;
++      dentry = ei->events_dir;
++      if (!dentry)
++              return;
++
++      ei->events_dir = NULL;
+       eventfs_remove_dir(ei);
+       /*
+@@ -945,5 +846,6 @@ void eventfs_remove_events_dir(struct ev
+        * sticks around while the other ei->dentry are created
+        * and destroyed dynamically.
+        */
++      d_invalidate(dentry);
+       dput(dentry);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -35,8 +35,7 @@ struct eventfs_attr {
+  * @entries:  the array of entries representing the files in the directory
+  * @name:     the name of the directory to create
+  * @children: link list into the child eventfs_inode
+- * @dentry:     the dentry of the directory
+- * @d_children: The array of dentries to represent the files when created
++ * @events_dir: the dentry of the events directory
+  * @entry_attrs: Saved mode and ownership of the @d_children
+  * @attr:     Saved mode and ownership of eventfs_inode itself
+  * @data:     The private data to pass to the callbacks
+@@ -45,12 +44,12 @@ struct eventfs_attr {
+  * @nr_entries: The number of items in @entries
+  */
+ struct eventfs_inode {
++      struct kref                     kref;
+       struct list_head                list;
+       const struct eventfs_entry      *entries;
+       const char                      *name;
+       struct list_head                children;
+-      struct dentry                   *dentry; /* Check is_freed to access */
+-      struct dentry                   **d_children;
++      struct dentry                   *events_dir;
+       struct eventfs_attr             *entry_attrs;
+       struct eventfs_attr             attr;
+       void                            *data;
diff --git a/queue-6.6/eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch b/queue-6.6/eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch
new file mode 100644 (file)
index 0000000..54dd5e1
--- /dev/null
@@ -0,0 +1,84 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:22 -0500
+Subject: eventfs: Have a free_ei() that just frees the eventfs_inode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.141151788@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit db3a397209b00d2e4e0a068608e5c546fc064b82 upstream.
+
+As the eventfs_inode is freed in two different locations, make a helper
+function free_ei() to make sure all the allocated fields of the
+eventfs_inode is freed.
+
+This requires renaming the existing free_ei() which is called by the srcu
+handler to free_rcu_ei() and have free_ei() just do the freeing, where
+free_rcu_ei() will call it.
+
+Link: https://lkml.kernel.org/r/20231101172649.265214087@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -129,6 +129,13 @@ static struct dentry *create_dir(const c
+       return eventfs_end_creating(dentry);
+ }
++static void free_ei(struct eventfs_inode *ei)
++{
++      kfree_const(ei->name);
++      kfree(ei->d_children);
++      kfree(ei);
++}
++
+ /**
+  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+  * @ti: the tracefs_inode of the dentry
+@@ -168,9 +175,7 @@ void eventfs_set_ei_status_free(struct t
+                       eventfs_remove_dir(ei_child);
+               }
+-              kfree_const(ei->name);
+-              kfree(ei->d_children);
+-              kfree(ei);
++              free_ei(ei);
+               return;
+       }
+@@ -784,13 +789,11 @@ struct eventfs_inode *eventfs_create_eve
+       return ERR_PTR(-ENOMEM);
+ }
+-static void free_ei(struct rcu_head *head)
++static void free_rcu_ei(struct rcu_head *head)
+ {
+       struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+-      kfree_const(ei->name);
+-      kfree(ei->d_children);
+-      kfree(ei);
++      free_ei(ei);
+ }
+ /**
+@@ -881,7 +884,7 @@ void eventfs_remove_dir(struct eventfs_i
+               for (i = 0; i < ei->nr_entries; i++)
+                       unhook_dentry(&ei->d_children[i], &dentry_list);
+               unhook_dentry(&ei->dentry, &dentry_list);
+-              call_srcu(&eventfs_srcu, &ei->rcu, free_ei);
++              call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+       }
+       mutex_unlock(&eventfs_mutex);
diff --git a/queue-6.6/eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch b/queue-6.6/eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch
new file mode 100644 (file)
index 0000000..90d835e
--- /dev/null
@@ -0,0 +1,102 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:23 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:36 -0500
+Subject: eventfs: Have event files and directories default to parent uid and gid
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Hongyu Jin <hongyu.jin@unisoc.com>, Dongliang Cui <cuidongliang390@gmail.com>
+Message-ID: <20240206120951.415245327@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 0dfc852b6fe3cbecbea67332a0dce2bebeba540d upstream.
+
+Dongliang reported:
+
+  I found that in the latest version, the nodes of tracefs have been
+  changed to dynamically created.
+
+  This has caused me to encounter a problem where the gid I specified in
+  the mounting parameters cannot apply to all files, as in the following
+  situation:
+
+  /data/tmp/events # mount | grep tracefs
+  tracefs on /data/tmp type tracefs (rw,seclabel,relatime,gid=3012)
+
+  gid 3012 = readtracefs
+
+  /data/tmp # ls -lh
+  total 0
+  -r--r-----   1 root readtracefs 0 1970-01-01 08:00 README
+  -r--r-----   1 root readtracefs 0 1970-01-01 08:00 available_events
+
+  ums9621_1h10:/data/tmp/events # ls -lh
+  total 0
+  drwxr-xr-x 2 root root 0 2023-12-19 00:56 alarmtimer
+  drwxr-xr-x 2 root root 0 2023-12-19 00:56 asoc
+
+  It will prevent certain applications from accessing tracefs properly, I
+  try to avoid this issue by making the following modifications.
+
+To fix this, have the files created default to taking the ownership of
+the parent dentry unless the ownership was previously set by the user.
+
+Link: https://lore.kernel.org/linux-trace-kernel/1703063706-30539-1-git-send-email-dongliang.cui@unisoc.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20231220105017.1489d790@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Hongyu Jin  <hongyu.jin@unisoc.com>
+Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode")
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reported-by: Dongliang Cui <cuidongliang390@gmail.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -148,7 +148,8 @@ static const struct file_operations even
+       .release        = eventfs_release,
+ };
+-static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
++static void update_inode_attr(struct dentry *dentry, struct inode *inode,
++                            struct eventfs_attr *attr, umode_t mode)
+ {
+       if (!attr) {
+               inode->i_mode = mode;
+@@ -162,9 +163,13 @@ static void update_inode_attr(struct ino
+       if (attr->mode & EVENTFS_SAVE_UID)
+               inode->i_uid = attr->uid;
++      else
++              inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+       if (attr->mode & EVENTFS_SAVE_GID)
+               inode->i_gid = attr->gid;
++      else
++              inode->i_gid = d_inode(dentry->d_parent)->i_gid;
+ }
+ /**
+@@ -206,7 +211,7 @@ static struct dentry *create_file(const
+               return eventfs_failed_creating(dentry);
+       /* If the user updated the directory's attributes, use them */
+-      update_inode_attr(inode, attr, mode);
++      update_inode_attr(dentry, inode, attr, mode);
+       inode->i_op = &eventfs_file_inode_operations;
+       inode->i_fop = fop;
+@@ -242,7 +247,8 @@ static struct dentry *create_dir(struct
+               return eventfs_failed_creating(dentry);
+       /* If the user updated the directory's attributes, use them */
+-      update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
++      update_inode_attr(dentry, inode, &ei->attr,
++                        S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
diff --git a/queue-6.6/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch b/queue-6.6/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch
new file mode 100644 (file)
index 0000000..f6195f8
--- /dev/null
@@ -0,0 +1,53 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:32 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:43 -0500
+Subject: eventfs: Have eventfs_iterate() stop immediately if ei->is_freed is set
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.562520394@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit e109deadb73318cf4a3bd61287d969f705df278f upstream.
+
+If ei->is_freed is set in eventfs_iterate(), it means that the directory
+that is being iterated on is in the process of being freed. Just exit the
+loop immediately when that is ever detected, and separate out the return
+of the entry->callback() from ei->is_freed.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.016261289@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -788,11 +788,12 @@ static int eventfs_iterate(struct file *
+               name = entry->name;
+               mutex_lock(&eventfs_mutex);
+-              /* If ei->is_freed, then the event itself may be too */
+-              if (!ei->is_freed)
+-                      r = entry->callback(name, &mode, &cdata, &fops);
+-              else
+-                      r = -1;
++              /* If ei->is_freed then just bail here, nothing more to do */
++              if (ei->is_freed) {
++                      mutex_unlock(&eventfs_mutex);
++                      goto out;
++              }
++              r = entry->callback(name, &mode, &cdata, &fops);
+               mutex_unlock(&eventfs_mutex);
+               if (r <= 0)
+                       continue;
diff --git a/queue-6.6/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch b/queue-6.6/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch
new file mode 100644 (file)
index 0000000..ae92125
--- /dev/null
@@ -0,0 +1,69 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:10 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:47 -0500
+Subject: eventfs: Have the inodes all for files and directories all be the same
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al  Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120953.213007763@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 53c41052ba3121761e6f62a813961164532a214f upstream.
+
+The dentries and inodes are created in the readdir for the sole purpose of
+getting a consistent inode number. Linus stated that is unnecessary, and
+that all inodes can have the same inode number. For a virtual file system
+they are pretty meaningless.
+
+Instead use a single unique inode number for all files and one for all
+directories.
+
+Link: https://lore.kernel.org/all/20240116133753.2808d45e@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.412180363@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al  Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -32,6 +32,10 @@
+  */
+ static DEFINE_MUTEX(eventfs_mutex);
++/* Choose something "unique" ;-) */
++#define EVENTFS_FILE_INODE_INO                0x12c4e37
++#define EVENTFS_DIR_INODE_INO         0x134b2f5
++
+ /*
+  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+  * its parent's list and will have is_freed set (under eventfs_mutex).
+@@ -352,6 +356,9 @@ static struct dentry *create_file(const
+       inode->i_fop = fop;
+       inode->i_private = data;
++      /* All files will have the same inode number */
++      inode->i_ino = EVENTFS_FILE_INODE_INO;
++
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
+       d_instantiate(dentry, inode);
+@@ -388,6 +395,9 @@ static struct dentry *create_dir(struct
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
++      /* All directories will have the same inode number */
++      inode->i_ino = EVENTFS_DIR_INODE_INO;
++
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
diff --git a/queue-6.6/eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch b/queue-6.6/eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch
new file mode 100644 (file)
index 0000000..56f1096
--- /dev/null
@@ -0,0 +1,177 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:57 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:25 -0500
+Subject: eventfs: Hold eventfs_mutex when calling callback functions
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>, Linux Kernel Functional Testing <lkft@linaro.org>, Naresh Kamboju <naresh.kamboju@linaro.org>
+Message-ID: <20240206120949.629213120@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 44365329f8219fc379097c2c9a75ff53f123764f upstream.
+
+The callback function that is used to create inodes and dentries is not
+protected by anything and the data that is passed to it could become
+stale. After eventfs_remove_dir() is called by the tracing system, it is
+free to remove the events that are associated to that directory.
+Unfortunately, that means the callbacks must not be called after that.
+
+     CPU0                              CPU1
+     ----                              ----
+ eventfs_root_lookup() {
+                                eventfs_remove_dir() {
+                                     mutex_lock(&event_mutex);
+                                     ei->is_freed = set;
+                                     mutex_unlock(&event_mutex);
+                                }
+                                kfree(event_call);
+
+    for (...) {
+      entry = &ei->entries[i];
+      r = entry->callback() {
+          call = data;         // call == event_call above
+          if (call->flags ...)
+
+ [ USE AFTER FREE BUG ]
+
+The safest way to protect this is to wrap the callback with:
+
+ mutex_lock(&eventfs_mutex);
+ if (!ei->is_freed)
+     r = entry->callback();
+ else
+     r = -1;
+ mutex_unlock(&eventfs_mutex);
+
+This will make sure that the callback will not be called after it is
+freed. But now it needs to be known that the callback is called while
+holding internal eventfs locks, and that it must not call back into the
+eventfs / tracefs system. There's no reason it should anyway, but document
+that as well.
+
+Link: https://lore.kernel.org/all/CA+G9fYu9GOEbD=rR5eMR-=HJ8H6rMsbzDC2ZY5=Y50WpWAE7_Q@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20231101172649.906696613@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   22 ++++++++++++++++++++--
+ include/linux/tracefs.h  |   43 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 63 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -615,7 +615,13 @@ static struct dentry *eventfs_root_looku
+               entry = &ei->entries[i];
+               if (strcmp(name, entry->name) == 0) {
+                       void *cdata = data;
+-                      r = entry->callback(name, &mode, &cdata, &fops);
++                      mutex_lock(&eventfs_mutex);
++                      /* If ei->is_freed, then the event itself may be too */
++                      if (!ei->is_freed)
++                              r = entry->callback(name, &mode, &cdata, &fops);
++                      else
++                              r = -1;
++                      mutex_unlock(&eventfs_mutex);
+                       if (r <= 0)
+                               continue;
+                       ret = simple_lookup(dir, dentry, flags);
+@@ -749,7 +755,13 @@ static int dcache_dir_open_wrapper(struc
+               void *cdata = data;
+               entry = &ei->entries[i];
+               name = entry->name;
+-              r = entry->callback(name, &mode, &cdata, &fops);
++              mutex_lock(&eventfs_mutex);
++              /* If ei->is_freed, then the event itself may be too */
++              if (!ei->is_freed)
++                      r = entry->callback(name, &mode, &cdata, &fops);
++              else
++                      r = -1;
++              mutex_unlock(&eventfs_mutex);
+               if (r <= 0)
+                       continue;
+               d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
+@@ -819,6 +831,10 @@ static int dcache_readdir_wrapper(struct
+  *   data = A pointer to @data, and the callback may replace it, which will
+  *         cause the file created to pass the new data to the open() call.
+  *   fops = the fops to use for the created file.
++ *
++ * NB. @callback is called while holding internal locks of the eventfs
++ *     system. The callback must not call any code that might also call into
++ *     the tracefs or eventfs system or it will risk creating a deadlock.
+  */
+ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+                                        const struct eventfs_entry *entries,
+@@ -878,6 +894,8 @@ struct eventfs_inode *eventfs_create_dir
+  * @data: The default data to pass to the files (an entry may override it).
+  *
+  * This function creates the top of the trace event directory.
++ *
++ * See eventfs_create_dir() for use of @entries.
+  */
+ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+                                               const struct eventfs_entry *entries,
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -23,9 +23,52 @@ struct file_operations;
+ struct eventfs_file;
++/**
++ * eventfs_callback - A callback function to create dynamic files in eventfs
++ * @name: The name of the file that is to be created
++ * @mode: return the file mode for the file (RW access, etc)
++ * @data: data to pass to the created file ops
++ * @fops: the file operations of the created file
++ *
++ * The evetnfs files are dynamically created. The struct eventfs_entry array
++ * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will
++ * be used to create the files within those directories. When a lookup
++ * or access to a file within the directory is made, the struct eventfs_entry
++ * array is used to find a callback() with the matching name that is being
++ * referenced (for lookups, the entire array is iterated and each callback
++ * will be called).
++ *
++ * The callback will be called with @name for the name of the file to create.
++ * The callback can return less than 1 to indicate  that no file should be
++ * created.
++ *
++ * If a file is to be created, then @mode should be populated with the file
++ * mode (permissions) for which the file is created for. This would be
++ * used to set the created inode i_mode field.
++ *
++ * The @data should be set to the data passed to the other file operations
++ * (read, write, etc). Note, @data will also point to the data passed in
++ * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback
++ * can replace the data if it chooses to. Otherwise, the original data
++ * will be used for the file operation functions.
++ *
++ * The @fops should be set to the file operations that will be used to create
++ * the inode.
++ *
++ * NB. This callback is called while holding internal locks of the eventfs
++ *     system. The callback must not call any code that might also call into
++ *     the tracefs or eventfs system or it will risk creating a deadlock.
++ */
+ typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+                               const struct file_operations **fops);
++/**
++ * struct eventfs_entry - dynamically created eventfs file call back handler
++ * @name:     Then name of the dynamic file in an eventfs directory
++ * @callback: The callback to get the fops of the file when it is created
++ *
++ * See evenfs_callback() typedef for how to set up @callback.
++ */
+ struct eventfs_entry {
+       const char                      *name;
+       eventfs_callback                callback;
diff --git a/queue-6.6/eventfs-initialize-the-tracefs-inode-properly.patch b/queue-6.6/eventfs-initialize-the-tracefs-inode-properly.patch
new file mode 100644 (file)
index 0000000..b9d2996
--- /dev/null
@@ -0,0 +1,66 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:11 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:53 -0500
+Subject: eventfs: Initialize the tracefs inode properly
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120954.203316263@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 4fa4b010b83fb2f837b5ef79e38072a79e96e4f1 upstream.
+
+The tracefs-specific fields in the inode were not initialized before the
+inode was exposed to others through the dentry with 'd_instantiate()'.
+
+Move the field initializations up to before the d_instantiate.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.478449628@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -370,6 +370,8 @@ static struct dentry *create_dir(struct
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
++      /* Only directories have ti->private set to an ei, not files */
++      ti->private = ei;
+       inc_nlink(inode);
+       d_instantiate(dentry, inode);
+@@ -515,7 +517,6 @@ create_file_dentry(struct eventfs_inode
+ static void eventfs_post_create_dir(struct eventfs_inode *ei)
+ {
+       struct eventfs_inode *ei_child;
+-      struct tracefs_inode *ti;
+       lockdep_assert_held(&eventfs_mutex);
+@@ -525,9 +526,6 @@ static void eventfs_post_create_dir(stru
+                                srcu_read_lock_held(&eventfs_srcu)) {
+               ei_child->d_parent = ei->dentry;
+       }
+-
+-      ti = get_tracefs(ei->dentry->d_inode);
+-      ti->private = ei;
+ }
+ /**
diff --git a/queue-6.6/eventfs-keep-all-directory-links-at-1.patch b/queue-6.6/eventfs-keep-all-directory-links-at-1.patch
new file mode 100644 (file)
index 0000000..f38eb40
--- /dev/null
@@ -0,0 +1,78 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:22 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:02 -0500
+Subject: eventfs: Keep all directory links at 1
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120955.665411833@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit ca185770db914869ff9fe773bac5e0e5e4165b83 upstream.
+
+The directory link count in eventfs was somewhat bogus. It was only being
+updated when a directory child was being looked up and not on creation.
+
+One solution would be to update in get_attr() the link count by iterating
+the ei->children list and then adding 2. But that could slow down simple
+stat() calls, especially if it's done on all directories in eventfs.
+
+Another solution would be to add a parent pointer to the eventfs_inode
+and keep track of the number of sub directories it has on creation. But
+this adds overhead for something not really worthwhile.
+
+The solution decided upon is to keep all directory links in eventfs as 1.
+This tells user space not to rely on the hard links of directories. Which
+in this case it shouldn't.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.339968298@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -404,9 +404,7 @@ static struct dentry *lookup_dir_entry(s
+       dentry->d_fsdata = get_ei(ei);
+-      inc_nlink(inode);
+       d_add(dentry, inode);
+-      inc_nlink(dentry->d_parent->d_inode);
+       return NULL;
+ }
+@@ -769,9 +767,17 @@ struct eventfs_inode *eventfs_create_eve
+       dentry->d_fsdata = get_ei(ei);
+-      /* directory inodes start off with i_nlink == 2 (for "." entry) */
+-      inc_nlink(inode);
++      /*
++       * Keep all eventfs directories with i_nlink == 1.
++       * Due to the dynamic nature of the dentry creations and not
++       * wanting to add a pointer to the parent eventfs_inode in the
++       * eventfs_inode structure, keeping the i_nlink in sync with the
++       * number of directories would cause too much complexity for
++       * something not worth much. Keeping directory links at 1
++       * tells userspace not to trust the link number.
++       */
+       d_instantiate(dentry, inode);
++      /* The dentry of the "events" parent does keep track though */
+       inc_nlink(dentry->d_parent->d_inode);
+       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+       tracefs_end_creating(dentry);
diff --git a/queue-6.6/eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch b/queue-6.6/eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch
new file mode 100644 (file)
index 0000000..c88aaf1
--- /dev/null
@@ -0,0 +1,48 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:06 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:34 -0500
+Subject: eventfs: Make sure that parent->d_inode is locked in creating files/dirs
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120951.095364893@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit f49f950c217bfb40f11662bab39cb388d41e4cfb upstream.
+
+Since the locking of the parent->d_inode has been moved outside the
+creation of the files and directories (as it use to be locked via a
+conditional), add a WARN_ON_ONCE() to the case that it's not locked.
+
+Link: https://lkml.kernel.org/r/20231121231112.853962542@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -327,6 +327,8 @@ create_file_dentry(struct eventfs_inode
+       struct dentry **e_dentry = &ei->d_children[idx];
+       struct dentry *dentry;
++      WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
++
+       mutex_lock(&eventfs_mutex);
+       if (ei->is_freed) {
+               mutex_unlock(&eventfs_mutex);
+@@ -430,6 +432,8 @@ create_dir_dentry(struct eventfs_inode *
+ {
+       struct dentry *dentry = NULL;
++      WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
++
+       mutex_lock(&eventfs_mutex);
+       if (pei->is_freed || ei->is_freed) {
+               mutex_unlock(&eventfs_mutex);
diff --git a/queue-6.6/eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch b/queue-6.6/eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch
new file mode 100644 (file)
index 0000000..9d57694
--- /dev/null
@@ -0,0 +1,87 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:32 -0500
+Subject: eventfs: Move taking of inode_lock into dcache_dir_open_wrapper()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.772179769@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit bcae32c5632fc0a0dbce46fa731cd23403117e66 upstream.
+
+The both create_file_dentry() and create_dir_dentry() takes a boolean
+parameter "lookup", as on lookup the inode_lock should already be taken,
+but for dcache_dir_open_wrapper() it is not taken.
+
+There's no reason that the dcache_dir_open_wrapper() can't take the
+inode_lock before calling these functions. In fact, it's better if it
+does, as the lock can be held throughout both directory and file
+creations.
+
+This also simplifies the code, and possibly prevents unexpected race
+conditions when the lock is released.
+
+Link: https://lkml.kernel.org/r/20231121231112.528544825@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   16 ++--------------
+ 1 file changed, 2 insertions(+), 14 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -347,15 +347,8 @@ create_file_dentry(struct eventfs_inode
+       mutex_unlock(&eventfs_mutex);
+-      /* The lookup already has the parent->d_inode locked */
+-      if (!lookup)
+-              inode_lock(parent->d_inode);
+-
+       dentry = create_file(name, mode, attr, parent, data, fops);
+-      if (!lookup)
+-              inode_unlock(parent->d_inode);
+-
+       mutex_lock(&eventfs_mutex);
+       if (IS_ERR_OR_NULL(dentry)) {
+@@ -453,15 +446,8 @@ create_dir_dentry(struct eventfs_inode *
+       }
+       mutex_unlock(&eventfs_mutex);
+-      /* The lookup already has the parent->d_inode locked */
+-      if (!lookup)
+-              inode_lock(parent->d_inode);
+-
+       dentry = create_dir(ei, parent);
+-      if (!lookup)
+-              inode_unlock(parent->d_inode);
+-
+       mutex_lock(&eventfs_mutex);
+       if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
+@@ -693,6 +679,7 @@ static int dcache_dir_open_wrapper(struc
+               return -ENOMEM;
+       }
++      inode_lock(parent->d_inode);
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+               d = create_dir_dentry(ei, ei_child, parent, false);
+@@ -725,6 +712,7 @@ static int dcache_dir_open_wrapper(struc
+                       cnt++;
+               }
+       }
++      inode_unlock(parent->d_inode);
+       srcu_read_unlock(&eventfs_srcu, idx);
+       ret = dcache_dir_open(inode, file);
diff --git a/queue-6.6/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch b/queue-6.6/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch
new file mode 100644 (file)
index 0000000..73b14a1
--- /dev/null
@@ -0,0 +1,120 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:56 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:45 -0500
+Subject: eventfs: Read ei->entries before ei->children in eventfs_iterate()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.892527913@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 704f960dbee2f1634f4b4e16f208cb16eaf41c1e upstream.
+
+In order to apply a shortcut to skip over the current ctx->pos
+immediately, by using the ei->entries array, the reading of that array
+should be first. Moving the array reading before the linked list reading
+will make the shortcut change diff nicer to read.
+
+Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.333115095@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -752,8 +752,8 @@ static int eventfs_iterate(struct file *
+        * Need to create the dentries and inodes to have a consistent
+        * inode number.
+        */
+-      list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                               srcu_read_lock_held(&eventfs_srcu)) {
++      for (i = 0; i < ei->nr_entries; i++) {
++              void *cdata = ei->data;
+               if (c > 0) {
+                       c--;
+@@ -762,23 +762,32 @@ static int eventfs_iterate(struct file *
+               ctx->pos++;
+-              if (ei_child->is_freed)
+-                      continue;
++              entry = &ei->entries[i];
++              name = entry->name;
+-              name = ei_child->name;
++              mutex_lock(&eventfs_mutex);
++              /* If ei->is_freed then just bail here, nothing more to do */
++              if (ei->is_freed) {
++                      mutex_unlock(&eventfs_mutex);
++                      goto out_dec;
++              }
++              r = entry->callback(name, &mode, &cdata, &fops);
++              mutex_unlock(&eventfs_mutex);
++              if (r <= 0)
++                      continue;
+-              dentry = create_dir_dentry(ei, ei_child, ei_dentry);
++              dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+               if (!dentry)
+                       goto out_dec;
+               ino = dentry->d_inode->i_ino;
+               dput(dentry);
+-              if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
++              if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+                       goto out_dec;
+       }
+-      for (i = 0; i < ei->nr_entries; i++) {
+-              void *cdata = ei->data;
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
++                               srcu_read_lock_held(&eventfs_srcu)) {
+               if (c > 0) {
+                       c--;
+@@ -787,27 +796,18 @@ static int eventfs_iterate(struct file *
+               ctx->pos++;
+-              entry = &ei->entries[i];
+-              name = entry->name;
+-
+-              mutex_lock(&eventfs_mutex);
+-              /* If ei->is_freed then just bail here, nothing more to do */
+-              if (ei->is_freed) {
+-                      mutex_unlock(&eventfs_mutex);
+-                      goto out_dec;
+-              }
+-              r = entry->callback(name, &mode, &cdata, &fops);
+-              mutex_unlock(&eventfs_mutex);
+-              if (r <= 0)
++              if (ei_child->is_freed)
+                       continue;
+-              dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++              name = ei_child->name;
++
++              dentry = create_dir_dentry(ei, ei_child, ei_dentry);
+               if (!dentry)
+                       goto out_dec;
+               ino = dentry->d_inode->i_ino;
+               dput(dentry);
+-              if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
++              if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+                       goto out_dec;
+       }
+       ret = 1;
diff --git a/queue-6.6/eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch b/queue-6.6/eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch
new file mode 100644 (file)
index 0000000..0410e5d
--- /dev/null
@@ -0,0 +1,1931 @@
+From stable+bounces-18939-greg=kroah.com@vger.kernel.org Tue Feb  6 13:14:41 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:12 -0500
+Subject: eventfs: Remove eventfs_file and just use eventfs_inode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120947.516739239@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 5790b1fb3d672d9a1fe3881a7181dfdbe741568f upstream.
+
+Instead of having a descriptor for every file represented in the eventfs
+directory, only have the directory itself represented. Change the API to
+send in a list of entries that represent all the files in the directory
+(but not other directories). The entry list contains a name and a callback
+function that will be used to create the files when they are accessed.
+
+struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+                                               const struct eventfs_entry *entries,
+                                               int size, void *data);
+
+is used for the top level eventfs directory, and returns an eventfs_inode
+that will be used by:
+
+struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+                                        const struct eventfs_entry *entries,
+                                        int size, void *data);
+
+where both of the above take an array of struct eventfs_entry entries for
+every file that is in the directory.
+
+The entries are defined by:
+
+typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+                               const struct file_operations **fops);
+
+struct eventfs_entry {
+       const char                      *name;
+       eventfs_callback                callback;
+};
+
+Where the name is the name of the file and the callback gets called when
+the file is being created. The callback passes in the name (in case the
+same callback is used for multiple files), a pointer to the mode, data and
+fops. The data will be pointing to the data that was passed in
+eventfs_create_dir() or eventfs_create_events_dir() but may be overridden
+to point to something else, as it will be used to point to the
+inode->i_private that is created. The information passed back from the
+callback is used to create the dentry/inode.
+
+If the callback fills the data and the file should be created, it must
+return a positive number. On zero or negative, the file is ignored.
+
+This logic may also be used as a prototype to convert entire pseudo file
+systems into just-in-time allocation.
+
+The "show_events_dentry" file has been updated to show the directories,
+and any files they have.
+
+With just the eventfs_file allocations:
+
+ Before after deltas for meminfo (in kB):
+
+   MemFree:            -14360
+   MemAvailable:       -14260
+   Buffers:            40
+   Cached:             24
+   Active:             44
+   Inactive:           48
+   Inactive(anon):     28
+   Active(file):       44
+   Inactive(file):     20
+   Dirty:              -4
+   AnonPages:          28
+   Mapped:             4
+   KReclaimable:       132
+   Slab:               1604
+   SReclaimable:       132
+   SUnreclaim:         1472
+   Committed_AS:       12
+
+ Before after deltas for slabinfo:
+
+   <slab>:             <objects>       [ * <size> = <total>]
+
+   ext4_inode_cache    27              [* 1184 = 31968 ]
+   extent_status       102             [*   40 = 4080 ]
+   tracefs_inode_cache 144             [*  656 = 94464 ]
+   buffer_head         39              [*  104 = 4056 ]
+   shmem_inode_cache   49              [*  800 = 39200 ]
+   filp                        -53             [*  256 = -13568 ]
+   dentry              251             [*  192 = 48192 ]
+   lsm_file_cache      277             [*   32 = 8864 ]
+   vm_area_struct      -14             [*  184 = -2576 ]
+   trace_event_file    1748            [*   88 = 153824 ]
+   kmalloc-1k          35              [* 1024 = 35840 ]
+   kmalloc-256         49              [*  256 = 12544 ]
+   kmalloc-192         -28             [*  192 = -5376 ]
+   kmalloc-128         -30             [*  128 = -3840 ]
+   kmalloc-96          10581           [*   96 = 1015776 ]
+   kmalloc-64          3056            [*   64 = 195584 ]
+   kmalloc-32          1291            [*   32 = 41312 ]
+   kmalloc-16          2310            [*   16 = 36960 ]
+   kmalloc-8           9216            [*    8 = 73728 ]
+
+ Free memory dropped by 14,360 kB
+ Available memory dropped by 14,260 kB
+ Total slab additions in size: 1,771,032 bytes
+
+With this change:
+
+ Before after deltas for meminfo (in kB):
+
+   MemFree:            -12084
+   MemAvailable:       -11976
+   Buffers:            32
+   Cached:             32
+   Active:             72
+   Inactive:           168
+   Inactive(anon):     176
+   Active(file):       72
+   Inactive(file):     -8
+   Dirty:              24
+   AnonPages:          196
+   Mapped:             8
+   KReclaimable:       148
+   Slab:               836
+   SReclaimable:       148
+   SUnreclaim:         688
+   Committed_AS:       324
+
+ Before after deltas for slabinfo:
+
+   <slab>:             <objects>       [ * <size> = <total>]
+
+   tracefs_inode_cache 144             [* 656 = 94464 ]
+   shmem_inode_cache   -23             [* 800 = -18400 ]
+   filp                        -92             [* 256 = -23552 ]
+   dentry              179             [* 192 = 34368 ]
+   lsm_file_cache      -3              [* 32 = -96 ]
+   vm_area_struct      -13             [* 184 = -2392 ]
+   trace_event_file    1748            [* 88 = 153824 ]
+   kmalloc-1k          -49             [* 1024 = -50176 ]
+   kmalloc-256         -27             [* 256 = -6912 ]
+   kmalloc-128         1864            [* 128 = 238592 ]
+   kmalloc-64          4685            [* 64 = 299840 ]
+   kmalloc-32          -72             [* 32 = -2304 ]
+   kmalloc-16          256             [* 16 = 4096 ]
+   total = 721352
+
+ Free memory dropped by 12,084 kB
+ Available memory dropped by 11,976 kB
+ Total slab additions in size:  721,352 bytes
+
+That's over 2 MB in savings per instance for free and available memory,
+and over 1 MB in savings per instance of slab memory.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231003184059.4924468e@gandalf.local.home
+Link: https://lore.kernel.org/linux-trace-kernel/20231004165007.43d79161@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c     |  853 +++++++++++++++++++++----------------------
+ fs/tracefs/inode.c           |    2 
+ fs/tracefs/internal.h        |   37 +
+ include/linux/trace_events.h |    2 
+ include/linux/tracefs.h      |   29 -
+ kernel/trace/trace.c         |    7 
+ kernel/trace/trace.h         |    4 
+ kernel/trace/trace_events.c  |  313 ++++++++++-----
+ 8 files changed, 708 insertions(+), 539 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -2,8 +2,9 @@
+ /*
+  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
+  *
+- *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) <rostedt@goodmis.org>
++ *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
+  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
++ *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
+  *
+  *  eventfs is used to dynamically create inodes and dentries based on the
+  *  meta data provided by the tracing system.
+@@ -23,46 +24,6 @@
+ #include <linux/delay.h>
+ #include "internal.h"
+-struct eventfs_inode {
+-      struct list_head        e_top_files;
+-};
+-
+-/*
+- * struct eventfs_file - hold the properties of the eventfs files and
+- *                       directories.
+- * @name:     the name of the file or directory to create
+- * @d_parent:   holds parent's dentry
+- * @dentry:     once accessed holds dentry
+- * @list:     file or directory to be added to parent directory
+- * @ei:               list of files and directories within directory
+- * @fop:      file_operations for file or directory
+- * @iop:      inode_operations for file or directory
+- * @data:     something that the caller will want to get to later on
+- * @mode:     the permission that the file or directory should have
+- */
+-struct eventfs_file {
+-      const char                      *name;
+-      struct dentry                   *d_parent;
+-      struct dentry                   *dentry;
+-      struct list_head                list;
+-      struct eventfs_inode            *ei;
+-      const struct file_operations    *fop;
+-      const struct inode_operations   *iop;
+-      /*
+-       * Union - used for deletion
+-       * @del_list:   list of eventfs_file to delete
+-       * @rcu:        eventfs_file to delete in RCU
+-       * @is_freed:   node is freed if one of the above is set
+-       */
+-      union {
+-              struct list_head        del_list;
+-              struct rcu_head         rcu;
+-              unsigned long           is_freed;
+-      };
+-      void                            *data;
+-      umode_t                         mode;
+-};
+-
+ static DEFINE_MUTEX(eventfs_mutex);
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+@@ -93,16 +54,9 @@ static const struct file_operations even
+  * @data: something that the caller will want to get to later on.
+  * @fop: struct file_operations that should be used for this file.
+  *
+- * This is the basic "create a file" function for tracefs.  It allows for a
+- * wide range of flexibility in creating a file.
+- *
+- * This function will return a pointer to a dentry if it succeeds.  This
+- * pointer must be passed to the tracefs_remove() function when the file is
+- * to be removed (no automatic cleanup happens if your module is unloaded,
+- * you are responsible here.)  If an error occurs, %NULL will be returned.
+- *
+- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+- * returned.
++ * This function creates a dentry that represents a file in the eventsfs_inode
++ * directory. The inode.i_private pointer will point to @data in the open()
++ * call.
+  */
+ static struct dentry *create_file(const char *name, umode_t mode,
+                                 struct dentry *parent, void *data,
+@@ -118,6 +72,7 @@ static struct dentry *create_file(const
+       if (WARN_ON_ONCE(!S_ISREG(mode)))
+               return NULL;
++      WARN_ON_ONCE(!parent);
+       dentry = eventfs_start_creating(name, parent);
+       if (IS_ERR(dentry))
+@@ -142,20 +97,11 @@ static struct dentry *create_file(const
+  * create_dir - create a dir in the tracefs filesystem
+  * @name: the name of the file to create.
+  * @parent: parent dentry for this file.
+- * @data: something that the caller will want to get to later on.
+- *
+- * This is the basic "create a dir" function for eventfs.  It allows for a
+- * wide range of flexibility in creating a dir.
+- *
+- * This function will return a pointer to a dentry if it succeeds.  This
+- * pointer must be passed to the tracefs_remove() function when the file is
+- * to be removed (no automatic cleanup happens if your module is unloaded,
+- * you are responsible here.)  If an error occurs, %NULL will be returned.
+  *
+- * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+- * returned.
++ * This function will create a dentry for a directory represented by
++ * a eventfs_inode.
+  */
+-static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
++static struct dentry *create_dir(const char *name, struct dentry *parent)
+ {
+       struct tracefs_inode *ti;
+       struct dentry *dentry;
+@@ -172,7 +118,6 @@ static struct dentry *create_dir(const c
+       inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
+-      inode->i_private = data;
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
+@@ -185,18 +130,18 @@ static struct dentry *create_dir(const c
+ }
+ /**
+- * eventfs_set_ef_status_free - set the ef->status to free
++ * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
+  * @ti: the tracefs_inode of the dentry
+- * @dentry: dentry who's status to be freed
++ * @dentry: dentry which has the reference to remove.
+  *
+- * eventfs_set_ef_status_free will be called if no more
+- * references remain
++ * Remove the association between a dentry from an eventfs_inode.
+  */
+-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
++void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+       struct tracefs_inode *ti_parent;
++      struct eventfs_inode *ei_child, *tmp;
+       struct eventfs_inode *ei;
+-      struct eventfs_file *ef, *tmp;
++      int i;
+       /* The top level events directory may be freed by this */
+       if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+@@ -207,9 +152,9 @@ void eventfs_set_ef_status_free(struct t
+               ei = ti->private;
+               /* Record all the top level files */
+-              list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++              list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                        lockdep_is_held(&eventfs_mutex)) {
+-                      list_add_tail(&ef->del_list, &ef_del_list);
++                      list_add_tail(&ei_child->del_list, &ef_del_list);
+               }
+               /* Nothing should access this, but just in case! */
+@@ -218,11 +163,13 @@ void eventfs_set_ef_status_free(struct t
+               mutex_unlock(&eventfs_mutex);
+               /* Now safely free the top level files and their children */
+-              list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+-                      list_del(&ef->del_list);
+-                      eventfs_remove(ef);
++              list_for_each_entry_safe(ei_child, tmp, &ef_del_list, del_list) {
++                      list_del(&ei_child->del_list);
++                      eventfs_remove_dir(ei_child);
+               }
++              kfree_const(ei->name);
++              kfree(ei->d_children);
+               kfree(ei);
+               return;
+       }
+@@ -233,68 +180,162 @@ void eventfs_set_ef_status_free(struct t
+       if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+               goto out;
+-      ef = dentry->d_fsdata;
+-      if (!ef)
++      ei = dentry->d_fsdata;
++      if (!ei)
+               goto out;
+       /*
+-       * If ef was freed, then the LSB bit is set for d_fsdata.
++       * If ei was freed, then the LSB bit is set for d_fsdata.
+        * But this should not happen, as it should still have a
+        * ref count that prevents it. Warn in case it does.
+        */
+-      if (WARN_ON_ONCE((unsigned long)ef & 1))
++      if (WARN_ON_ONCE((unsigned long)ei & 1))
+               goto out;
++      /* This could belong to one of the files of the ei */
++      if (ei->dentry != dentry) {
++              for (i = 0; i < ei->nr_entries; i++) {
++                      if (ei->d_children[i] == dentry)
++                              break;
++              }
++              if (WARN_ON_ONCE(i == ei->nr_entries))
++                      goto out;
++              ei->d_children[i] = NULL;
++      } else {
++              ei->dentry = NULL;
++      }
++
+       dentry->d_fsdata = NULL;
+-      ef->dentry = NULL;
+-out:
++ out:
+       mutex_unlock(&eventfs_mutex);
+ }
+ /**
++ * create_file_dentry - create a dentry for a file of an eventfs_inode
++ * @ei: the eventfs_inode that the file will be created under
++ * @e_dentry: a pointer to the d_children[] of the @ei
++ * @parent: The parent dentry of the created file.
++ * @name: The name of the file to create
++ * @mode: The mode of the file.
++ * @data: The data to use to set the inode of the file with on open()
++ * @fops: The fops of the file to be created.
++ * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
++ *
++ * Create a dentry for a file of an eventfs_inode @ei and place it into the
++ * address located at @e_dentry. If the @e_dentry already has a dentry, then
++ * just do a dget() on it and return. Otherwise create the dentry and attach it.
++ */
++static struct dentry *
++create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
++                 struct dentry *parent, const char *name, umode_t mode, void *data,
++                 const struct file_operations *fops, bool lookup)
++{
++      struct dentry *dentry;
++      bool invalidate = false;
++
++      mutex_lock(&eventfs_mutex);
++      /* If the e_dentry already has a dentry, use it */
++      if (*e_dentry) {
++              /* lookup does not need to up the ref count */
++              if (!lookup)
++                      dget(*e_dentry);
++              mutex_unlock(&eventfs_mutex);
++              return *e_dentry;
++      }
++      mutex_unlock(&eventfs_mutex);
++
++      /* The lookup already has the parent->d_inode locked */
++      if (!lookup)
++              inode_lock(parent->d_inode);
++
++      dentry = create_file(name, mode, parent, data, fops);
++
++      if (!lookup)
++              inode_unlock(parent->d_inode);
++
++      mutex_lock(&eventfs_mutex);
++
++      if (IS_ERR_OR_NULL(dentry)) {
++              /*
++               * When the mutex was released, something else could have
++               * created the dentry for this e_dentry. In which case
++               * use that one.
++               *
++               * Note, with the mutex held, the e_dentry cannot have content
++               * and the ei->is_freed be true at the same time.
++               */
++              WARN_ON_ONCE(ei->is_freed);
++              dentry = *e_dentry;
++              /* The lookup does not need to up the dentry refcount */
++              if (dentry && !lookup)
++                      dget(dentry);
++              mutex_unlock(&eventfs_mutex);
++              return dentry;
++      }
++
++      if (!*e_dentry && !ei->is_freed) {
++              *e_dentry = dentry;
++              dentry->d_fsdata = ei;
++      } else {
++              /*
++               * Should never happen unless we get here due to being freed.
++               * Otherwise it means two dentries exist with the same name.
++               */
++              WARN_ON_ONCE(!ei->is_freed);
++              invalidate = true;
++      }
++      mutex_unlock(&eventfs_mutex);
++
++      if (invalidate)
++              d_invalidate(dentry);
++
++      if (lookup || invalidate)
++              dput(dentry);
++
++      return invalidate ? NULL : dentry;
++}
++
++/**
+  * eventfs_post_create_dir - post create dir routine
+- * @ef: eventfs_file of recently created dir
++ * @ei: eventfs_inode of recently created dir
+  *
+  * Map the meta-data of files within an eventfs dir to their parent dentry
+  */
+-static void eventfs_post_create_dir(struct eventfs_file *ef)
++static void eventfs_post_create_dir(struct eventfs_inode *ei)
+ {
+-      struct eventfs_file *ef_child;
++      struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       /* srcu lock already held */
+       /* fill parent-child relation */
+-      list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              ef_child->d_parent = ef->dentry;
++              ei_child->d_parent = ei->dentry;
+       }
+-      ti = get_tracefs(ef->dentry->d_inode);
+-      ti->private = ef->ei;
++      ti = get_tracefs(ei->dentry->d_inode);
++      ti->private = ei;
+ }
+ /**
+- * create_dentry - helper function to create dentry
+- * @ef: eventfs_file of file or directory to create
+- * @parent: parent dentry
+- * @lookup: true if called from lookup routine
++ * create_dir_dentry - Create a directory dentry for the eventfs_inode
++ * @ei: The eventfs_inode to create the directory for
++ * @parent: The dentry of the parent of this directory
++ * @lookup: True if this is called by the lookup code
+  *
+- * Used to create a dentry for file/dir, executes post dentry creation routine
++ * This creates and attaches a directory dentry to the eventfs_inode @ei.
+  */
+ static struct dentry *
+-create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
++create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool lookup)
+ {
+       bool invalidate = false;
+-      struct dentry *dentry;
++      struct dentry *dentry = NULL;
+       mutex_lock(&eventfs_mutex);
+-      if (ef->is_freed) {
+-              mutex_unlock(&eventfs_mutex);
+-              return NULL;
+-      }
+-      if (ef->dentry) {
+-              dentry = ef->dentry;
+-              /* On dir open, up the ref count */
++      if (ei->dentry) {
++              /* If the dentry already has a dentry, use it */
++              dentry = ei->dentry;
++              /* lookup does not need to up the ref count */
+               if (!lookup)
+                       dget(dentry);
+               mutex_unlock(&eventfs_mutex);
+@@ -302,42 +343,44 @@ create_dentry(struct eventfs_file *ef, s
+       }
+       mutex_unlock(&eventfs_mutex);
++      /* The lookup already has the parent->d_inode locked */
+       if (!lookup)
+               inode_lock(parent->d_inode);
+-      if (ef->ei)
+-              dentry = create_dir(ef->name, parent, ef->data);
+-      else
+-              dentry = create_file(ef->name, ef->mode, parent,
+-                                   ef->data, ef->fop);
++      dentry = create_dir(ei->name, parent);
+       if (!lookup)
+               inode_unlock(parent->d_inode);
+       mutex_lock(&eventfs_mutex);
+-      if (IS_ERR_OR_NULL(dentry)) {
+-              /* If the ef was already updated get it */
+-              dentry = ef->dentry;
++
++      if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
++              /*
++               * When the mutex was released, something else could have
++               * created the dentry for this e_dentry. In which case
++               * use that one.
++               *
++               * Note, with the mutex held, the e_dentry cannot have content
++               * and the ei->is_freed be true at the same time.
++               */
++              dentry = ei->dentry;
+               if (dentry && !lookup)
+                       dget(dentry);
+               mutex_unlock(&eventfs_mutex);
+               return dentry;
+       }
+-      if (!ef->dentry && !ef->is_freed) {
+-              ef->dentry = dentry;
+-              if (ef->ei)
+-                      eventfs_post_create_dir(ef);
+-              dentry->d_fsdata = ef;
++      if (!ei->dentry && !ei->is_freed) {
++              ei->dentry = dentry;
++              eventfs_post_create_dir(ei);
++              dentry->d_fsdata = ei;
+       } else {
+-              /* A race here, should try again (unless freed) */
+-              invalidate = true;
+-
+               /*
+                * Should never happen unless we get here due to being freed.
+                * Otherwise it means two dentries exist with the same name.
+                */
+-              WARN_ON_ONCE(!ef->is_freed);
++              WARN_ON_ONCE(!ei->is_freed);
++              invalidate = true;
+       }
+       mutex_unlock(&eventfs_mutex);
+       if (invalidate)
+@@ -349,50 +392,85 @@ create_dentry(struct eventfs_file *ef, s
+       return invalidate ? NULL : dentry;
+ }
+-static bool match_event_file(struct eventfs_file *ef, const char *name)
+-{
+-      bool ret;
+-
+-      mutex_lock(&eventfs_mutex);
+-      ret = !ef->is_freed && strcmp(ef->name, name) == 0;
+-      mutex_unlock(&eventfs_mutex);
+-
+-      return ret;
+-}
+-
+ /**
+  * eventfs_root_lookup - lookup routine to create file/dir
+  * @dir: in which a lookup is being done
+  * @dentry: file/dir dentry
+- * @flags: to pass as flags parameter to simple lookup
++ * @flags: Just passed to simple_lookup()
+  *
+- * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
+- * list of meta data to find the information needed to create the file/dir.
++ * Used to create dynamic file/dir with-in @dir, search with-in @ei
++ * list, if @dentry found go ahead and create the file/dir
+  */
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+                                         struct dentry *dentry,
+                                         unsigned int flags)
+ {
++      const struct file_operations *fops;
++      const struct eventfs_entry *entry;
++      struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+-      struct eventfs_file *ef;
++      struct dentry *ei_dentry = NULL;
+       struct dentry *ret = NULL;
++      const char *name = dentry->d_name.name;
++      bool created = false;
++      umode_t mode;
++      void *data;
+       int idx;
++      int i;
++      int r;
+       ti = get_tracefs(dir);
+       if (!(ti->flags & TRACEFS_EVENT_INODE))
+               return NULL;
+-      ei = ti->private;
++      /* Grab srcu to prevent the ei from going away */
+       idx = srcu_read_lock(&eventfs_srcu);
+-      list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++
++      /*
++       * Grab the eventfs_mutex to consistent value from ti->private.
++       * This s
++       */
++      mutex_lock(&eventfs_mutex);
++      ei = READ_ONCE(ti->private);
++      if (ei)
++              ei_dentry = READ_ONCE(ei->dentry);
++      mutex_unlock(&eventfs_mutex);
++
++      if (!ei || !ei_dentry)
++              goto out;
++
++      data = ei->data;
++
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              if (!match_event_file(ef, dentry->d_name.name))
++              if (strcmp(ei_child->name, name) != 0)
+                       continue;
+               ret = simple_lookup(dir, dentry, flags);
+-              create_dentry(ef, ef->d_parent, true);
++              create_dir_dentry(ei_child, ei_dentry, true);
++              created = true;
+               break;
+       }
++
++      if (created)
++              goto out;
++
++      for (i = 0; i < ei->nr_entries; i++) {
++              entry = &ei->entries[i];
++              if (strcmp(name, entry->name) == 0) {
++                      void *cdata = data;
++                      r = entry->callback(name, &mode, &cdata, &fops);
++                      if (r <= 0)
++                              continue;
++                      ret = simple_lookup(dir, dentry, flags);
++                      create_file_dentry(ei, &ei->d_children[i],
++                                         ei_dentry, name, mode, cdata,
++                                         fops, true);
++                      break;
++              }
++      }
++ out:
+       srcu_read_unlock(&eventfs_srcu, idx);
+       return ret;
+ }
+@@ -432,29 +510,48 @@ static int eventfs_release(struct inode
+       return dcache_dir_close(inode, file);
+ }
++static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
++{
++      struct dentry **tmp;
++
++      tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
++      if (!tmp)
++              return -1;
++      tmp[cnt] = d;
++      tmp[cnt + 1] = NULL;
++      *dentries = tmp;
++      return 0;
++}
++
+ /**
+  * dcache_dir_open_wrapper - eventfs open wrapper
+  * @inode: not used
+- * @file: dir to be opened (to create its child)
++ * @file: dir to be opened (to create it's children)
+  *
+- * Used to dynamically create the file/dir within @file. @file is really a
+- * directory and all the files/dirs of the children within @file will be
+- * created. If any of the files/dirs have already been created, their
+- * reference count will be incremented.
++ * Used to dynamic create file/dir with-in @file, all the
++ * file/dir will be created. If already created then references
++ * will be increased
+  */
+ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
+ {
++      const struct file_operations *fops;
++      const struct eventfs_entry *entry;
++      struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+-      struct eventfs_file *ef;
+       struct dentry_list *dlist;
+       struct dentry **dentries = NULL;
+-      struct dentry *dentry = file_dentry(file);
++      struct dentry *parent = file_dentry(file);
+       struct dentry *d;
+       struct inode *f_inode = file_inode(file);
++      const char *name = parent->d_name.name;
++      umode_t mode;
++      void *data;
+       int cnt = 0;
+       int idx;
+       int ret;
++      int i;
++      int r;
+       ti = get_tracefs(f_inode);
+       if (!(ti->flags & TRACEFS_EVENT_INODE))
+@@ -463,25 +560,51 @@ static int dcache_dir_open_wrapper(struc
+       if (WARN_ON_ONCE(file->private_data))
+               return -EINVAL;
++      idx = srcu_read_lock(&eventfs_srcu);
++
++      mutex_lock(&eventfs_mutex);
++      ei = READ_ONCE(ti->private);
++      mutex_unlock(&eventfs_mutex);
++
++      if (!ei) {
++              srcu_read_unlock(&eventfs_srcu, idx);
++              return -EINVAL;
++      }
++
++
++      data = ei->data;
++
+       dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
+-      if (!dlist)
++      if (!dlist) {
++              srcu_read_unlock(&eventfs_srcu, idx);
+               return -ENOMEM;
++      }
+-      ei = ti->private;
+-      idx = srcu_read_lock(&eventfs_srcu);
+-      list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              d = create_dentry(ef, dentry, false);
++              d = create_dir_dentry(ei_child, parent, false);
+               if (d) {
+-                      struct dentry **tmp;
++                      ret = add_dentries(&dentries, d, cnt);
++                      if (ret < 0)
++                              break;
++                      cnt++;
++              }
++      }
+-                      tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+-                      if (!tmp)
++      for (i = 0; i < ei->nr_entries; i++) {
++              void *cdata = data;
++              entry = &ei->entries[i];
++              name = entry->name;
++              r = entry->callback(name, &mode, &cdata, &fops);
++              if (r <= 0)
++                      continue;
++              d = create_file_dentry(ei, &ei->d_children[i],
++                                     parent, name, mode, cdata, fops, false);
++              if (d) {
++                      ret = add_dentries(&dentries, d, cnt);
++                      if (ret < 0)
+                               break;
+-                      tmp[cnt] = d;
+-                      tmp[cnt + 1] = NULL;
+                       cnt++;
+-                      dentries = tmp;
+               }
+       }
+       srcu_read_unlock(&eventfs_srcu, idx);
+@@ -514,63 +637,90 @@ static int dcache_readdir_wrapper(struct
+ }
+ /**
+- * eventfs_prepare_ef - helper function to prepare eventfs_file
+- * @name: the name of the file/directory to create.
+- * @mode: the permission that the file should have.
+- * @fop: struct file_operations that should be used for this file/directory.
+- * @iop: struct inode_operations that should be used for this file/directory.
+- * @data: something that the caller will want to get to later on. The
+- *        inode.i_private pointer will point to this value on the open() call.
+- *
+- * This function allocates and fills the eventfs_file structure.
+- */
+-static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode,
+-                                      const struct file_operations *fop,
+-                                      const struct inode_operations *iop,
+-                                      void *data)
++ * eventfs_create_dir - Create the eventfs_inode for this directory
++ * @name: The name of the directory to create.
++ * @parent: The eventfs_inode of the parent directory.
++ * @entries: A list of entries that represent the files under this directory
++ * @size: The number of @entries
++ * @data: The default data to pass to the files (an entry may override it).
++ *
++ * This function creates the descriptor to represent a directory in the
++ * eventfs. This descriptor is an eventfs_inode, and it is returned to be
++ * used to create other children underneath.
++ *
++ * The @entries is an array of eventfs_entry structures which has:
++ *    const char               *name
++ *    eventfs_callback        callback;
++ *
++ * The name is the name of the file, and the callback is a pointer to a function
++ * that will be called when the file is reference (either by lookup or by
++ * reading a directory). The callback is of the prototype:
++ *
++ *    int callback(const char *name, umode_t *mode, void **data,
++ *               const struct file_operations **fops);
++ *
++ * When a file needs to be created, this callback will be called with
++ *   name = the name of the file being created (so that the same callback
++ *          may be used for multiple files).
++ *   mode = a place to set the file's mode
++ *   data = A pointer to @data, and the callback may replace it, which will
++ *         cause the file created to pass the new data to the open() call.
++ *   fops = the fops to use for the created file.
++ */
++struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
++                                       const struct eventfs_entry *entries,
++                                       int size, void *data)
+ {
+-      struct eventfs_file *ef;
++      struct eventfs_inode *ei;
++
++      if (!parent)
++              return ERR_PTR(-EINVAL);
+-      ef = kzalloc(sizeof(*ef), GFP_KERNEL);
+-      if (!ef)
++      ei = kzalloc(sizeof(*ei), GFP_KERNEL);
++      if (!ei)
+               return ERR_PTR(-ENOMEM);
+-      ef->name = kstrdup(name, GFP_KERNEL);
+-      if (!ef->name) {
+-              kfree(ef);
++      ei->name = kstrdup_const(name, GFP_KERNEL);
++      if (!ei->name) {
++              kfree(ei);
+               return ERR_PTR(-ENOMEM);
+       }
+-      if (S_ISDIR(mode)) {
+-              ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL);
+-              if (!ef->ei) {
+-                      kfree(ef->name);
+-                      kfree(ef);
++      if (size) {
++              ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++              if (!ei->d_children) {
++                      kfree_const(ei->name);
++                      kfree(ei);
+                       return ERR_PTR(-ENOMEM);
+               }
+-              INIT_LIST_HEAD(&ef->ei->e_top_files);
+-      } else {
+-              ef->ei = NULL;
+       }
+-      ef->iop = iop;
+-      ef->fop = fop;
+-      ef->mode = mode;
+-      ef->data = data;
+-      return ef;
++      ei->entries = entries;
++      ei->nr_entries = size;
++      ei->data = data;
++      INIT_LIST_HEAD(&ei->children);
++
++      mutex_lock(&eventfs_mutex);
++      list_add_tail(&ei->list, &parent->children);
++      ei->d_parent = parent->dentry;
++      mutex_unlock(&eventfs_mutex);
++
++      return ei;
+ }
+ /**
+- * eventfs_create_events_dir - create the trace event structure
+- * @name: the name of the directory to create.
+- * @parent: parent dentry for this file.  This should be a directory dentry
+- *          if set.  If this parameter is NULL, then the directory will be
+- *          created in the root of the tracefs filesystem.
++ * eventfs_create_events_dir - create the top level events directory
++ * @name: The name of the top level directory to create.
++ * @parent: Parent dentry for this file in the tracefs directory.
++ * @entries: A list of entries that represent the files under this directory
++ * @size: The number of @entries
++ * @data: The default data to pass to the files (an entry may override it).
+  *
+  * This function creates the top of the trace event directory.
+  */
+-struct dentry *eventfs_create_events_dir(const char *name,
+-                                       struct dentry *parent)
++struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
++                                              const struct eventfs_entry *entries,
++                                              int size, void *data)
+ {
+       struct dentry *dentry = tracefs_start_creating(name, parent);
+       struct eventfs_inode *ei;
+@@ -581,19 +731,32 @@ struct dentry *eventfs_create_events_dir
+               return NULL;
+       if (IS_ERR(dentry))
+-              return dentry;
++              return (struct eventfs_inode *)dentry;
+       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+       if (!ei)
+-              return ERR_PTR(-ENOMEM);
++              goto fail;
++
+       inode = tracefs_get_inode(dentry->d_sb);
+-      if (unlikely(!inode)) {
+-              kfree(ei);
+-              tracefs_failed_creating(dentry);
+-              return ERR_PTR(-ENOMEM);
+-      }
++      if (unlikely(!inode))
++              goto fail;
++
++      if (size) {
++              ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++              if (!ei->d_children)
++                      goto fail;
++      }
++
++      ei->dentry = dentry;
++      ei->entries = entries;
++      ei->nr_entries = size;
++      ei->data = data;
++      ei->name = kstrdup_const(name, GFP_KERNEL);
++      if (!ei->name)
++              goto fail;
+-      INIT_LIST_HEAD(&ei->e_top_files);
++      INIT_LIST_HEAD(&ei->children);
++      INIT_LIST_HEAD(&ei->list);
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
+@@ -608,193 +771,41 @@ struct dentry *eventfs_create_events_dir
+       d_instantiate(dentry, inode);
+       inc_nlink(dentry->d_parent->d_inode);
+       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+-      return tracefs_end_creating(dentry);
+-}
++      tracefs_end_creating(dentry);
+-/**
+- * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
+- * @name: the name of the file to create.
+- * @parent: parent dentry for this dir.
+- *
+- * This function adds eventfs subsystem dir to list.
+- * And all these dirs are created on the fly when they are looked up,
+- * and the dentry and inodes will be removed when they are done.
+- */
+-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+-                                             struct dentry *parent)
+-{
+-      struct tracefs_inode *ti_parent;
+-      struct eventfs_inode *ei_parent;
+-      struct eventfs_file *ef;
++      /* Will call dput when the directory is removed */
++      dget(dentry);
+-      if (security_locked_down(LOCKDOWN_TRACEFS))
+-              return NULL;
+-
+-      if (!parent)
+-              return ERR_PTR(-EINVAL);
++      return ei;
+-      ti_parent = get_tracefs(parent->d_inode);
+-      ei_parent = ti_parent->private;
+-
+-      ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+-      if (IS_ERR(ef))
+-              return ef;
+-
+-      mutex_lock(&eventfs_mutex);
+-      list_add_tail(&ef->list, &ei_parent->e_top_files);
+-      ef->d_parent = parent;
+-      mutex_unlock(&eventfs_mutex);
+-      return ef;
++ fail:
++      kfree(ei->d_children);
++      kfree(ei);
++      tracefs_failed_creating(dentry);
++      return ERR_PTR(-ENOMEM);
+ }
+-/**
+- * eventfs_add_dir - add eventfs dir to list to create later
+- * @name: the name of the file to create.
+- * @ef_parent: parent eventfs_file for this dir.
+- *
+- * This function adds eventfs dir to list.
+- * And all these dirs are created on the fly when they are looked up,
+- * and the dentry and inodes will be removed when they are done.
+- */
+-struct eventfs_file *eventfs_add_dir(const char *name,
+-                                   struct eventfs_file *ef_parent)
++static void free_ei(struct rcu_head *head)
+ {
+-      struct eventfs_file *ef;
++      struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
+-      if (security_locked_down(LOCKDOWN_TRACEFS))
+-              return NULL;
+-
+-      if (!ef_parent)
+-              return ERR_PTR(-EINVAL);
+-
+-      ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+-      if (IS_ERR(ef))
+-              return ef;
+-
+-      mutex_lock(&eventfs_mutex);
+-      list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+-      ef->d_parent = ef_parent->dentry;
+-      mutex_unlock(&eventfs_mutex);
+-      return ef;
+-}
+-
+-/**
+- * eventfs_add_events_file - add the data needed to create a file for later reference
+- * @name: the name of the file to create.
+- * @mode: the permission that the file should have.
+- * @parent: parent dentry for this file.
+- * @data: something that the caller will want to get to later on.
+- * @fop: struct file_operations that should be used for this file.
+- *
+- * This function is used to add the information needed to create a
+- * dentry/inode within the top level events directory. The file created
+- * will have the @mode permissions. The @data will be used to fill the
+- * inode.i_private when the open() call is done. The dentry and inodes are
+- * all created when they are referenced, and removed when they are no
+- * longer referenced.
+- */
+-int eventfs_add_events_file(const char *name, umode_t mode,
+-                       struct dentry *parent, void *data,
+-                       const struct file_operations *fop)
+-{
+-      struct tracefs_inode *ti;
+-      struct eventfs_inode *ei;
+-      struct eventfs_file *ef;
+-
+-      if (security_locked_down(LOCKDOWN_TRACEFS))
+-              return -ENODEV;
+-
+-      if (!parent)
+-              return -EINVAL;
+-
+-      if (!(mode & S_IFMT))
+-              mode |= S_IFREG;
+-
+-      if (!parent->d_inode)
+-              return -EINVAL;
+-
+-      ti = get_tracefs(parent->d_inode);
+-      if (!(ti->flags & TRACEFS_EVENT_INODE))
+-              return -EINVAL;
+-
+-      ei = ti->private;
+-      ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+-
+-      if (IS_ERR(ef))
+-              return -ENOMEM;
+-
+-      mutex_lock(&eventfs_mutex);
+-      list_add_tail(&ef->list, &ei->e_top_files);
+-      ef->d_parent = parent;
+-      mutex_unlock(&eventfs_mutex);
+-      return 0;
+-}
+-
+-/**
+- * eventfs_add_file - add eventfs file to list to create later
+- * @name: the name of the file to create.
+- * @mode: the permission that the file should have.
+- * @ef_parent: parent eventfs_file for this file.
+- * @data: something that the caller will want to get to later on.
+- * @fop: struct file_operations that should be used for this file.
+- *
+- * This function is used to add the information needed to create a
+- * file within a subdirectory of the events directory. The file created
+- * will have the @mode permissions. The @data will be used to fill the
+- * inode.i_private when the open() call is done. The dentry and inodes are
+- * all created when they are referenced, and removed when they are no
+- * longer referenced.
+- */
+-int eventfs_add_file(const char *name, umode_t mode,
+-                   struct eventfs_file *ef_parent,
+-                   void *data,
+-                   const struct file_operations *fop)
+-{
+-      struct eventfs_file *ef;
+-
+-      if (security_locked_down(LOCKDOWN_TRACEFS))
+-              return -ENODEV;
+-
+-      if (!ef_parent)
+-              return -EINVAL;
+-
+-      if (!(mode & S_IFMT))
+-              mode |= S_IFREG;
+-
+-      ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+-      if (IS_ERR(ef))
+-              return -ENOMEM;
+-
+-      mutex_lock(&eventfs_mutex);
+-      list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+-      ef->d_parent = ef_parent->dentry;
+-      mutex_unlock(&eventfs_mutex);
+-      return 0;
+-}
+-
+-static void free_ef(struct rcu_head *head)
+-{
+-      struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+-
+-      kfree(ef->name);
+-      kfree(ef->ei);
+-      kfree(ef);
++      kfree_const(ei->name);
++      kfree(ei->d_children);
++      kfree(ei);
+ }
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+- * @ef: eventfs_file to be removed.
+- * @head: to create list of eventfs_file to be deleted
+- * @level: to check recursion depth
++ * @ei: eventfs_inode to be removed.
+  *
+- * The helper function eventfs_remove_rec() is used to clean up and free the
+- * associated data from eventfs for both of the added functions.
++ * This function recursively remove eventfs_inode which
++ * contains info of file or dir.
+  */
+-static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
++static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
+ {
+-      struct eventfs_file *ef_child;
++      struct eventfs_inode *ei_child;
+-      if (!ef)
++      if (!ei)
+               return;
+       /*
+        * Check recursion depth. It should never be greater than 3:
+@@ -806,62 +817,68 @@ static void eventfs_remove_rec(struct ev
+       if (WARN_ON_ONCE(level > 3))
+               return;
+-      if (ef->ei) {
+-              /* search for nested folders or files */
+-              list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+-                                       lockdep_is_held(&eventfs_mutex)) {
+-                      eventfs_remove_rec(ef_child, head, level + 1);
+-              }
++      /* search for nested folders or files */
++      list_for_each_entry_srcu(ei_child, &ei->children, list,
++                               lockdep_is_held(&eventfs_mutex)) {
++              eventfs_remove_rec(ei_child, head, level + 1);
+       }
+-      list_del_rcu(&ef->list);
+-      list_add_tail(&ef->del_list, head);
++      list_del_rcu(&ei->list);
++      list_add_tail(&ei->del_list, head);
+ }
++static void unhook_dentry(struct dentry **dentry, struct dentry **list)
++{
++      if (*dentry) {
++              unsigned long ptr = (unsigned long)*list;
++
++              /* Keep the dentry from being freed yet */
++              dget(*dentry);
++
++              /*
++               * Paranoid: The dget() above should prevent the dentry
++               * from being freed and calling eventfs_set_ei_status_free().
++               * But just in case, set the link list LSB pointer to 1
++               * and have eventfs_set_ei_status_free() check that to
++               * make sure that if it does happen, it will not think
++               * the d_fsdata is an eventfs_inode.
++               *
++               * For this to work, no eventfs_inode should be allocated
++               * on a odd space, as the ef should always be allocated
++               * to be at least word aligned. Check for that too.
++               */
++              WARN_ON_ONCE(ptr & 1);
++
++              (*dentry)->d_fsdata = (void *)(ptr | 1);
++              *list = *dentry;
++              *dentry = NULL;
++      }
++}
+ /**
+  * eventfs_remove - remove eventfs dir or file from list
+- * @ef: eventfs_file to be removed.
++ * @ei: eventfs_inode to be removed.
+  *
+  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
+  */
+-void eventfs_remove(struct eventfs_file *ef)
++void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+-      struct eventfs_file *tmp;
+-      LIST_HEAD(ef_del_list);
++      struct eventfs_inode *tmp;
++      LIST_HEAD(ei_del_list);
+       struct dentry *dentry_list = NULL;
+       struct dentry *dentry;
++      int i;
+-      if (!ef)
++      if (!ei)
+               return;
+       mutex_lock(&eventfs_mutex);
+-      eventfs_remove_rec(ef, &ef_del_list, 0);
+-      list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+-              if (ef->dentry) {
+-                      unsigned long ptr = (unsigned long)dentry_list;
+-
+-                      /* Keep the dentry from being freed yet */
+-                      dget(ef->dentry);
+-
+-                      /*
+-                       * Paranoid: The dget() above should prevent the dentry
+-                       * from being freed and calling eventfs_set_ef_status_free().
+-                       * But just in case, set the link list LSB pointer to 1
+-                       * and have eventfs_set_ef_status_free() check that to
+-                       * make sure that if it does happen, it will not think
+-                       * the d_fsdata is an event_file.
+-                       *
+-                       * For this to work, no event_file should be allocated
+-                       * on a odd space, as the ef should always be allocated
+-                       * to be at least word aligned. Check for that too.
+-                       */
+-                      WARN_ON_ONCE(ptr & 1);
+-
+-                      ef->dentry->d_fsdata = (void *)(ptr | 1);
+-                      dentry_list = ef->dentry;
+-                      ef->dentry = NULL;
+-              }
+-              call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
++      eventfs_remove_rec(ei, &ei_del_list, 0);
++
++      list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
++              for (i = 0; i < ei->nr_entries; i++)
++                      unhook_dentry(&ei->d_children[i], &dentry_list);
++              unhook_dentry(&ei->dentry, &dentry_list);
++              call_srcu(&eventfs_srcu, &ei->rcu, free_ei);
+       }
+       mutex_unlock(&eventfs_mutex);
+@@ -876,8 +893,8 @@ void eventfs_remove(struct eventfs_file
+               mutex_lock(&eventfs_mutex);
+               /* dentry should now have at least a single reference */
+               WARN_ONCE((int)d_count(dentry) < 1,
+-                        "dentry %p less than one reference (%d) after invalidate\n",
+-                        dentry, d_count(dentry));
++                        "dentry %px (%s) less than one reference (%d) after invalidate\n",
++                        dentry, dentry->d_name.name, d_count(dentry));
+               mutex_unlock(&eventfs_mutex);
+               dput(dentry);
+       }
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -385,7 +385,7 @@ static void tracefs_dentry_iput(struct d
+       ti = get_tracefs(inode);
+       if (ti && ti->flags & TRACEFS_EVENT_INODE)
+-              eventfs_set_ef_status_free(ti, dentry);
++              eventfs_set_ei_status_free(ti, dentry);
+       iput(inode);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -13,6 +13,41 @@ struct tracefs_inode {
+       struct inode            vfs_inode;
+ };
++/*
++ * struct eventfs_inode - hold the properties of the eventfs directories.
++ * @list:     link list into the parent directory
++ * @entries:  the array of entries representing the files in the directory
++ * @name:     the name of the directory to create
++ * @children: link list into the child eventfs_inode
++ * @dentry:     the dentry of the directory
++ * @d_parent:   pointer to the parent's dentry
++ * @d_children: The array of dentries to represent the files when created
++ * @data:     The private data to pass to the callbacks
++ * @nr_entries: The number of items in @entries
++ */
++struct eventfs_inode {
++      struct list_head                list;
++      const struct eventfs_entry      *entries;
++      const char                      *name;
++      struct list_head                children;
++      struct dentry                   *dentry;
++      struct dentry                   *d_parent;
++      struct dentry                   **d_children;
++      void                            *data;
++      /*
++       * Union - used for deletion
++       * @del_list:   list of eventfs_inode to delete
++       * @rcu:        eventfs_indoe to delete in RCU
++       * @is_freed:   node is freed if one of the above is set
++       */
++      union {
++              struct list_head        del_list;
++              struct rcu_head         rcu;
++              unsigned long           is_freed;
++      };
++      int                             nr_entries;
++};
++
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
+ {
+       return container_of(inode, struct tracefs_inode, vfs_inode);
+@@ -25,6 +60,6 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
+-void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry);
++void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+ #endif /* _TRACEFS_INTERNAL_H */
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -652,7 +652,7 @@ struct trace_event_file {
+       struct list_head                list;
+       struct trace_event_call         *event_call;
+       struct event_filter __rcu       *filter;
+-      struct eventfs_file             *ef;
++      struct eventfs_inode            *ei;
+       struct trace_array              *tr;
+       struct trace_subsystem_dir      *system;
+       struct list_head                triggers;
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -23,26 +23,25 @@ struct file_operations;
+ struct eventfs_file;
+-struct dentry *eventfs_create_events_dir(const char *name,
+-                                       struct dentry *parent);
++typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
++                              const struct file_operations **fops);
+-struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+-                                             struct dentry *parent);
++struct eventfs_entry {
++      const char                      *name;
++      eventfs_callback                callback;
++};
+-struct eventfs_file *eventfs_add_dir(const char *name,
+-                                   struct eventfs_file *ef_parent);
++struct eventfs_inode;
+-int eventfs_add_file(const char *name, umode_t mode,
+-                   struct eventfs_file *ef_parent, void *data,
+-                   const struct file_operations *fops);
++struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
++                                              const struct eventfs_entry *entries,
++                                              int size, void *data);
+-int eventfs_add_events_file(const char *name, umode_t mode,
+-                       struct dentry *parent, void *data,
+-                       const struct file_operations *fops);
++struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
++                                       const struct eventfs_entry *entries,
++                                       int size, void *data);
+-void eventfs_remove(struct eventfs_file *ef);
+-
+-void eventfs_remove_events_dir(struct dentry *dentry);
++void eventfs_remove_dir(struct eventfs_inode *ei);
+ struct dentry *tracefs_create_file(const char *name, umode_t mode,
+                                  struct dentry *parent, void *data,
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -9760,7 +9760,6 @@ static __init void create_trace_instance
+ static void
+ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
+ {
+-      struct trace_event_file *file;
+       int cpu;
+       trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
+@@ -9793,11 +9792,7 @@ init_tracer_tracefs(struct trace_array *
+       trace_create_file("trace_marker", 0220, d_tracer,
+                         tr, &tracing_mark_fops);
+-      file = __find_event_file(tr, "ftrace", "print");
+-      if (file && file->ef)
+-              eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
+-                                file, &event_trigger_fops);
+-      tr->trace_marker_file = file;
++      tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
+       trace_create_file("trace_marker_raw", 0220, d_tracer,
+                         tr, &tracing_mark_raw_fops);
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -381,7 +381,7 @@ struct trace_array {
+       struct dentry           *dir;
+       struct dentry           *options;
+       struct dentry           *percpu_dir;
+-      struct dentry           *event_dir;
++      struct eventfs_inode    *event_dir;
+       struct trace_options    *topts;
+       struct list_head        systems;
+       struct list_head        events;
+@@ -1345,7 +1345,7 @@ struct trace_subsystem_dir {
+       struct list_head                list;
+       struct event_subsystem          *subsystem;
+       struct trace_array              *tr;
+-      struct eventfs_file             *ef;
++      struct eventfs_inode            *ei;
+       int                             ref_count;
+       int                             nr_events;
+ };
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -984,7 +984,7 @@ static void remove_subsystem(struct trac
+               return;
+       if (!--dir->nr_events) {
+-              eventfs_remove(dir->ef);
++              eventfs_remove_dir(dir->ei);
+               list_del(&dir->list);
+               __put_system_dir(dir);
+       }
+@@ -1013,7 +1013,7 @@ void event_file_put(struct trace_event_f
+ static void remove_event_file_dir(struct trace_event_file *file)
+ {
+-      eventfs_remove(file->ef);
++      eventfs_remove_dir(file->ei);
+       list_del(&file->list);
+       remove_subsystem(file->system);
+       free_event_filter(file->filter);
+@@ -2302,14 +2302,40 @@ create_new_subsystem(const char *name)
+       return NULL;
+ }
+-static struct eventfs_file *
++int system_callback(const char *name, umode_t *mode, void **data,
++                  const struct file_operations **fops)
++{
++      if (strcmp(name, "filter") == 0)
++              *fops = &ftrace_subsystem_filter_fops;
++
++      else if (strcmp(name, "enable") == 0)
++              *fops = &ftrace_system_enable_fops;
++
++      else
++              return 0;
++
++      *mode = TRACE_MODE_WRITE;
++      return 1;
++}
++
++static struct eventfs_inode *
+ event_subsystem_dir(struct trace_array *tr, const char *name,
+-                  struct trace_event_file *file, struct dentry *parent)
++                  struct trace_event_file *file, struct eventfs_inode *parent)
+ {
+       struct event_subsystem *system, *iter;
+       struct trace_subsystem_dir *dir;
+-      struct eventfs_file *ef;
+-      int res;
++      struct eventfs_inode *ei;
++      int nr_entries;
++      static struct eventfs_entry system_entries[] = {
++              {
++                      .name           = "filter",
++                      .callback       = system_callback,
++              },
++              {
++                      .name           = "enable",
++                      .callback       = system_callback,
++              }
++      };
+       /* First see if we did not already create this dir */
+       list_for_each_entry(dir, &tr->systems, list) {
+@@ -2317,7 +2343,7 @@ event_subsystem_dir(struct trace_array *
+               if (strcmp(system->name, name) == 0) {
+                       dir->nr_events++;
+                       file->system = dir;
+-                      return dir->ef;
++                      return dir->ei;
+               }
+       }
+@@ -2341,39 +2367,29 @@ event_subsystem_dir(struct trace_array *
+       } else
+               __get_system(system);
+-      ef = eventfs_add_subsystem_dir(name, parent);
+-      if (IS_ERR(ef)) {
++      /* ftrace only has directories no files */
++      if (strcmp(name, "ftrace") == 0)
++              nr_entries = 0;
++      else
++              nr_entries = ARRAY_SIZE(system_entries);
++
++      ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
++      if (!ei) {
+               pr_warn("Failed to create system directory %s\n", name);
+               __put_system(system);
+               goto out_free;
+       }
+-      dir->ef = ef;
++      dir->ei = ei;
+       dir->tr = tr;
+       dir->ref_count = 1;
+       dir->nr_events = 1;
+       dir->subsystem = system;
+       file->system = dir;
+-      /* the ftrace system is special, do not create enable or filter files */
+-      if (strcmp(name, "ftrace") != 0) {
+-
+-              res = eventfs_add_file("filter", TRACE_MODE_WRITE,
+-                                          dir->ef, dir,
+-                                          &ftrace_subsystem_filter_fops);
+-              if (res) {
+-                      kfree(system->filter);
+-                      system->filter = NULL;
+-                      pr_warn("Could not create tracefs '%s/filter' entry\n", name);
+-              }
+-
+-              eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
+-                                &ftrace_system_enable_fops);
+-      }
+-
+       list_add(&dir->list, &tr->systems);
+-      return dir->ef;
++      return dir->ei;
+  out_free:
+       kfree(dir);
+@@ -2422,15 +2438,134 @@ event_define_fields(struct trace_event_c
+       return ret;
+ }
++static int event_callback(const char *name, umode_t *mode, void **data,
++                        const struct file_operations **fops)
++{
++      struct trace_event_file *file = *data;
++      struct trace_event_call *call = file->event_call;
++
++      if (strcmp(name, "format") == 0) {
++              *mode = TRACE_MODE_READ;
++              *fops = &ftrace_event_format_fops;
++              *data = call;
++              return 1;
++      }
++
++      /*
++       * Only event directories that can be enabled should have
++       * triggers or filters, with the exception of the "print"
++       * event that can have a "trigger" file.
++       */
++      if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
++              if (call->class->reg && strcmp(name, "enable") == 0) {
++                      *mode = TRACE_MODE_WRITE;
++                      *fops = &ftrace_enable_fops;
++                      return 1;
++              }
++
++              if (strcmp(name, "filter") == 0) {
++                      *mode = TRACE_MODE_WRITE;
++                      *fops = &ftrace_event_filter_fops;
++                      return 1;
++              }
++      }
++
++      if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
++          strcmp(trace_event_name(call), "print") == 0) {
++              if (strcmp(name, "trigger") == 0) {
++                      *mode = TRACE_MODE_WRITE;
++                      *fops = &event_trigger_fops;
++                      return 1;
++              }
++      }
++
++#ifdef CONFIG_PERF_EVENTS
++      if (call->event.type && call->class->reg &&
++          strcmp(name, "id") == 0) {
++              *mode = TRACE_MODE_READ;
++              *data = (void *)(long)call->event.type;
++              *fops = &ftrace_event_id_fops;
++              return 1;
++      }
++#endif
++
++#ifdef CONFIG_HIST_TRIGGERS
++      if (strcmp(name, "hist") == 0) {
++              *mode = TRACE_MODE_READ;
++              *fops = &event_hist_fops;
++              return 1;
++      }
++#endif
++#ifdef CONFIG_HIST_TRIGGERS_DEBUG
++      if (strcmp(name, "hist_debug") == 0) {
++              *mode = TRACE_MODE_READ;
++              *fops = &event_hist_debug_fops;
++              return 1;
++      }
++#endif
++#ifdef CONFIG_TRACE_EVENT_INJECT
++      if (call->event.type && call->class->reg &&
++          strcmp(name, "inject") == 0) {
++              *mode = 0200;
++              *fops = &event_inject_fops;
++              return 1;
++      }
++#endif
++      return 0;
++}
++
+ static int
+-event_create_dir(struct dentry *parent, struct trace_event_file *file)
++event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
+ {
+       struct trace_event_call *call = file->event_call;
+-      struct eventfs_file *ef_subsystem = NULL;
+       struct trace_array *tr = file->tr;
+-      struct eventfs_file *ef;
++      struct eventfs_inode *e_events;
++      struct eventfs_inode *ei;
+       const char *name;
++      int nr_entries;
+       int ret;
++      static struct eventfs_entry event_entries[] = {
++              {
++                      .name           = "enable",
++                      .callback       = event_callback,
++              },
++              {
++                      .name           = "filter",
++                      .callback       = event_callback,
++              },
++              {
++                      .name           = "trigger",
++                      .callback       = event_callback,
++              },
++              {
++                      .name           = "format",
++                      .callback       = event_callback,
++              },
++#ifdef CONFIG_PERF_EVENTS
++              {
++                      .name           = "id",
++                      .callback       = event_callback,
++              },
++#endif
++#ifdef CONFIG_HIST_TRIGGERS
++              {
++                      .name           = "hist",
++                      .callback       = event_callback,
++              },
++#endif
++#ifdef CONFIG_HIST_TRIGGERS_DEBUG
++              {
++                      .name           = "hist_debug",
++                      .callback       = event_callback,
++              },
++#endif
++#ifdef CONFIG_TRACE_EVENT_INJECT
++              {
++                      .name           = "inject",
++                      .callback       = event_callback,
++              },
++#endif
++      };
+       /*
+        * If the trace point header did not define TRACE_SYSTEM
+@@ -2440,29 +2575,20 @@ event_create_dir(struct dentry *parent,
+       if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
+               return -ENODEV;
+-      ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
+-      if (!ef_subsystem)
++      e_events = event_subsystem_dir(tr, call->class->system, file, parent);
++      if (!e_events)
+               return -ENOMEM;
++      nr_entries = ARRAY_SIZE(event_entries);
++
+       name = trace_event_name(call);
+-      ef = eventfs_add_dir(name, ef_subsystem);
+-      if (IS_ERR(ef)) {
++      ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
++      if (IS_ERR(ei)) {
+               pr_warn("Could not create tracefs '%s' directory\n", name);
+               return -1;
+       }
+-      file->ef = ef;
+-
+-      if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+-              eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
+-                                &ftrace_enable_fops);
+-
+-#ifdef CONFIG_PERF_EVENTS
+-      if (call->event.type && call->class->reg)
+-              eventfs_add_file("id", TRACE_MODE_READ, file->ef,
+-                                (void *)(long)call->event.type,
+-                                &ftrace_event_id_fops);
+-#endif
++      file->ei = ei;
+       ret = event_define_fields(call);
+       if (ret < 0) {
+@@ -2470,35 +2596,6 @@ event_create_dir(struct dentry *parent,
+               return ret;
+       }
+-      /*
+-       * Only event directories that can be enabled should have
+-       * triggers or filters.
+-       */
+-      if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+-              eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
+-                                file, &ftrace_event_filter_fops);
+-
+-              eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
+-                                file, &event_trigger_fops);
+-      }
+-
+-#ifdef CONFIG_HIST_TRIGGERS
+-      eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
+-                        &event_hist_fops);
+-#endif
+-#ifdef CONFIG_HIST_TRIGGERS_DEBUG
+-      eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
+-                        &event_hist_debug_fops);
+-#endif
+-      eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
+-                        &ftrace_event_format_fops);
+-
+-#ifdef CONFIG_TRACE_EVENT_INJECT
+-      if (call->event.type && call->class->reg)
+-              eventfs_add_file("inject", 0200, file->ef, file,
+-                                &event_inject_fops);
+-#endif
+-
+       return 0;
+ }
+@@ -3644,30 +3741,65 @@ static __init int setup_trace_event(char
+ }
+ __setup("trace_event=", setup_trace_event);
++static int events_callback(const char *name, umode_t *mode, void **data,
++                         const struct file_operations **fops)
++{
++      if (strcmp(name, "enable") == 0) {
++              *mode = TRACE_MODE_WRITE;
++              *fops = &ftrace_tr_enable_fops;
++              return 1;
++      }
++
++      if (strcmp(name, "header_page") == 0)
++              *data = ring_buffer_print_page_header;
++
++      else if (strcmp(name, "header_event") == 0)
++              *data = ring_buffer_print_entry_header;
++
++      else
++              return 0;
++
++      *mode = TRACE_MODE_READ;
++      *fops = &ftrace_show_header_fops;
++      return 1;
++}
++
+ /* Expects to have event_mutex held when called */
+ static int
+ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+ {
+-      struct dentry *d_events;
++      struct eventfs_inode *e_events;
+       struct dentry *entry;
+-      int error = 0;
++      int nr_entries;
++      static struct eventfs_entry events_entries[] = {
++              {
++                      .name           = "enable",
++                      .callback       = events_callback,
++              },
++              {
++                      .name           = "header_page",
++                      .callback       = events_callback,
++              },
++              {
++                      .name           = "header_event",
++                      .callback       = events_callback,
++              },
++      };
+       entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+                                 tr, &ftrace_set_event_fops);
+       if (!entry)
+               return -ENOMEM;
+-      d_events = eventfs_create_events_dir("events", parent);
+-      if (IS_ERR(d_events)) {
++      nr_entries = ARRAY_SIZE(events_entries);
++
++      e_events = eventfs_create_events_dir("events", parent, events_entries,
++                                           nr_entries, tr);
++      if (IS_ERR(e_events)) {
+               pr_warn("Could not create tracefs 'events' directory\n");
+               return -ENOMEM;
+       }
+-      error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
+-                                tr, &ftrace_tr_enable_fops);
+-      if (error)
+-              return -ENOMEM;
+-
+       /* There are not as crucial, just warn if they are not created */
+       trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+@@ -3677,16 +3809,7 @@ create_event_toplevel_files(struct dentr
+                         TRACE_MODE_WRITE, parent, tr,
+                         &ftrace_set_event_notrace_pid_fops);
+-      /* ring buffer internal formats */
+-      eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
+-                                ring_buffer_print_page_header,
+-                                &ftrace_show_header_fops);
+-
+-      eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
+-                                ring_buffer_print_entry_header,
+-                                &ftrace_show_header_fops);
+-
+-      tr->event_dir = d_events;
++      tr->event_dir = e_events;
+       return 0;
+ }
+@@ -3770,7 +3893,7 @@ int event_trace_del_tracer(struct trace_
+       down_write(&trace_event_sem);
+       __trace_remove_event_dirs(tr);
+-      eventfs_remove_events_dir(tr->event_dir);
++      eventfs_remove_dir(tr->event_dir);
+       up_write(&trace_event_sem);
+       tr->event_dir = NULL;
diff --git a/queue-6.6/eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch b/queue-6.6/eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch
new file mode 100644 (file)
index 0000000..8d1b446
--- /dev/null
@@ -0,0 +1,88 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:18 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:29 -0500
+Subject: eventfs: Remove expectation that ei->is_freed means ei->dentry == NULL
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120950.284520771@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 88903daecacf03b1e5636e1b5f18bda5b07030fc upstream.
+
+The logic to free the eventfs_inode (ei) use to set is_freed and clear the
+"dentry" field under the eventfs_mutex. But that changed when a race was
+found where the ei->dentry needed to be cleared when the last dput() was
+called on it. But there was still logic that checked if ei->dentry was not
+NULL and is_freed is set, and would warn if it was.
+
+But since that situation was changed and the ei->dentry isn't cleared
+until the last dput() is called on it while the ei->is_freed is set, do
+not test for that condition anymore, and change the comments to reflect
+that.
+
+Link: https://lkml.kernel.org/r/20231120235154.265826243@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 020010fbfa20 ("eventfs: Delete eventfs_inode when the last dentry is freed")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -27,16 +27,16 @@
+ /*
+  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
+  * to the ei->dentry must be done under this mutex and after checking
+- * if ei->is_freed is not set. The ei->dentry is released under the
+- * mutex at the same time ei->is_freed is set. If ei->is_freed is set
+- * then the ei->dentry is invalid.
++ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
++ * is on its way to being freed after the last dput() is made on it.
+  */
+ static DEFINE_MUTEX(eventfs_mutex);
+ /*
+  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+  * its parent's list and will have is_freed set (under eventfs_mutex).
+- * After the SRCU grace period is over, the ei may be freed.
++ * After the SRCU grace period is over and the last dput() is called
++ * the ei is freed.
+  */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+@@ -365,12 +365,14 @@ create_file_dentry(struct eventfs_inode
+                * created the dentry for this e_dentry. In which case
+                * use that one.
+                *
+-               * Note, with the mutex held, the e_dentry cannot have content
+-               * and the ei->is_freed be true at the same time.
++               * If ei->is_freed is set, the e_dentry is currently on its
++               * way to being freed, don't return it. If e_dentry is NULL
++               * it means it was already freed.
+                */
+-              dentry = *e_dentry;
+-              if (WARN_ON_ONCE(dentry && ei->is_freed))
++              if (ei->is_freed)
+                       dentry = NULL;
++              else
++                      dentry = *e_dentry;
+               /* The lookup does not need to up the dentry refcount */
+               if (dentry && !lookup)
+                       dget(dentry);
+@@ -473,8 +475,8 @@ create_dir_dentry(struct eventfs_inode *
+                * created the dentry for this e_dentry. In which case
+                * use that one.
+                *
+-               * Note, with the mutex held, the e_dentry cannot have content
+-               * and the ei->is_freed be true at the same time.
++               * If ei->is_freed is set, the e_dentry is currently on its
++               * way to being freed.
+                */
+               dentry = ei->dentry;
+               if (dentry && !lookup)
diff --git a/queue-6.6/eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch b/queue-6.6/eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch
new file mode 100644 (file)
index 0000000..fca39f1
--- /dev/null
@@ -0,0 +1,44 @@
+From stable+bounces-18947-greg=kroah.com@vger.kernel.org Tue Feb  6 13:15:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:19 -0500
+Subject: eventfs: Remove extra dget() in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120948.657072999@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 77bc4d4921bd3497678ba8e7f4e480de35692f05 upstream.
+
+The creation of the top events directory does a dget() at the end of the
+creation in eventfs_create_events_dir() with a comment saying the final
+dput() will happen when it is removed. The problem is that a dget() is
+already done on the dentry when it was created with tracefs_start_creating()!
+The dget() now just causes a memory leak of that dentry.
+
+Remove the extra dget() as the final dput() in the deletion of the events
+directory actually matches the one in tracefs_start_creating().
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231031124229.4f2e3fa1@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -774,9 +774,6 @@ struct eventfs_inode *eventfs_create_eve
+       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+       tracefs_end_creating(dentry);
+-      /* Will call dput when the directory is removed */
+-      dget(dentry);
+-
+       return ei;
+  fail:
diff --git a/queue-6.6/eventfs-remove-fsnotify-functions-from-lookup.patch b/queue-6.6/eventfs-remove-fsnotify-functions-from-lookup.patch
new file mode 100644 (file)
index 0000000..fada6f9
--- /dev/null
@@ -0,0 +1,53 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:28 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:01 -0500
+Subject: eventfs: Remove fsnotify*() functions from lookup()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120955.500466790@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 12d823b31fadf47c8f36ecada7abac5f903cac33 upstream.
+
+The dentries and inodes are created when referenced in the lookup code.
+There's no reason to call fsnotify_*() functions when they are created by
+a reference. It doesn't make any sense.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.166973329@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Fixes: a376007917776 ("eventfs: Implement functions to create files and dirs when accessed");
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -366,7 +366,6 @@ static struct dentry *lookup_file(struct
+       dentry->d_fsdata = get_ei(parent_ei);
+       d_add(dentry, inode);
+-      fsnotify_create(dentry->d_parent->d_inode, dentry);
+       return NULL;
+ };
+@@ -408,7 +407,6 @@ static struct dentry *lookup_dir_entry(s
+       inc_nlink(inode);
+       d_add(dentry, inode);
+       inc_nlink(dentry->d_parent->d_inode);
+-      fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+       return NULL;
+ }
diff --git a/queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch b/queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch
new file mode 100644 (file)
index 0000000..fd23047
--- /dev/null
@@ -0,0 +1,75 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:21 -0500
+Subject: eventfs: Remove "is_freed" union with rcu head
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Ajay Kaher <akaher@vmware.com>
+Message-ID: <20240206120948.980929088@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit f2f496370afcbc5227d7002da28c74b91fed12ff upstream.
+
+The eventfs_inode->is_freed was a union with the rcu_head with the
+assumption that when it was on the srcu list the head would contain a
+pointer which would make "is_freed" true. But that was a wrong assumption
+as the rcu head is a single link list where the last element is NULL.
+
+Instead, split the nr_entries integer so that "is_freed" is one bit and
+the nr_entries is the next 31 bits. As there shouldn't be more than 10
+(currently there's at most 5 to 7 depending on the config), this should
+not be a problem.
+
+Link: https://lkml.kernel.org/r/20231101172649.049758712@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    2 ++
+ fs/tracefs/internal.h    |    6 +++---
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -824,6 +824,8 @@ static void eventfs_remove_rec(struct ev
+               eventfs_remove_rec(ei_child, head, level + 1);
+       }
++      ei->is_freed = 1;
++
+       list_del_rcu(&ei->list);
+       list_add_tail(&ei->del_list, head);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -23,6 +23,7 @@ struct tracefs_inode {
+  * @d_parent:   pointer to the parent's dentry
+  * @d_children: The array of dentries to represent the files when created
+  * @data:     The private data to pass to the callbacks
++ * @is_freed: Flag set if the eventfs is on its way to be freed
+  * @nr_entries: The number of items in @entries
+  */
+ struct eventfs_inode {
+@@ -38,14 +39,13 @@ struct eventfs_inode {
+        * Union - used for deletion
+        * @del_list:   list of eventfs_inode to delete
+        * @rcu:        eventfs_inode to delete in RCU
+-       * @is_freed:   node is freed if one of the above is set
+        */
+       union {
+               struct list_head        del_list;
+               struct rcu_head         rcu;
+-              unsigned long           is_freed;
+       };
+-      int                             nr_entries;
++      unsigned int                    is_freed:1;
++      unsigned int                    nr_entries:31;
+ };
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
diff --git a/queue-6.6/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch b/queue-6.6/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch
new file mode 100644 (file)
index 0000000..9ffaa80
--- /dev/null
@@ -0,0 +1,204 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:10 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:40 -0500
+Subject: eventfs: Remove "lookup" parameter from create_dir/file_dentry()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.069546514@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit b0f7e2d739b4aac131ea1662d086a07775097b05 upstream.
+
+The "lookup" parameter is a way to differentiate the call to
+create_file/dir_dentry() from when it's just a lookup (no need to up the
+dentry refcount) and accessed via a readdir (need to up the refcount).
+
+But reality, it just makes the code more complex. Just up the refcount and
+let the caller decide to dput() the result or not.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240103102553.17a19cea@gandalf.local.home
+Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.517502710@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   55 +++++++++++++++++------------------------------
+ 1 file changed, 20 insertions(+), 35 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -390,16 +390,14 @@ void eventfs_set_ei_status_free(struct t
+  * @mode: The mode of the file.
+  * @data: The data to use to set the inode of the file with on open()
+  * @fops: The fops of the file to be created.
+- * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
+  *
+  * Create a dentry for a file of an eventfs_inode @ei and place it into the
+- * address located at @e_dentry. If the @e_dentry already has a dentry, then
+- * just do a dget() on it and return. Otherwise create the dentry and attach it.
++ * address located at @e_dentry.
+  */
+ static struct dentry *
+ create_file_dentry(struct eventfs_inode *ei, int idx,
+                  struct dentry *parent, const char *name, umode_t mode, void *data,
+-                 const struct file_operations *fops, bool lookup)
++                 const struct file_operations *fops)
+ {
+       struct eventfs_attr *attr = NULL;
+       struct dentry **e_dentry = &ei->d_children[idx];
+@@ -414,9 +412,7 @@ create_file_dentry(struct eventfs_inode
+       }
+       /* If the e_dentry already has a dentry, use it */
+       if (*e_dentry) {
+-              /* lookup does not need to up the ref count */
+-              if (!lookup)
+-                      dget(*e_dentry);
++              dget(*e_dentry);
+               mutex_unlock(&eventfs_mutex);
+               return *e_dentry;
+       }
+@@ -441,13 +437,12 @@ create_file_dentry(struct eventfs_inode
+                * way to being freed, don't return it. If e_dentry is NULL
+                * it means it was already freed.
+                */
+-              if (ei->is_freed)
++              if (ei->is_freed) {
+                       dentry = NULL;
+-              else
++              } else {
+                       dentry = *e_dentry;
+-              /* The lookup does not need to up the dentry refcount */
+-              if (dentry && !lookup)
+                       dget(dentry);
++              }
+               mutex_unlock(&eventfs_mutex);
+               return dentry;
+       }
+@@ -465,9 +460,6 @@ create_file_dentry(struct eventfs_inode
+       }
+       mutex_unlock(&eventfs_mutex);
+-      if (lookup)
+-              dput(dentry);
+-
+       return dentry;
+ }
+@@ -500,13 +492,12 @@ static void eventfs_post_create_dir(stru
+  * @pei: The eventfs_inode parent of ei.
+  * @ei: The eventfs_inode to create the directory for
+  * @parent: The dentry of the parent of this directory
+- * @lookup: True if this is called by the lookup code
+  *
+  * This creates and attaches a directory dentry to the eventfs_inode @ei.
+  */
+ static struct dentry *
+ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+-                struct dentry *parent, bool lookup)
++                struct dentry *parent)
+ {
+       struct dentry *dentry = NULL;
+@@ -518,11 +509,9 @@ create_dir_dentry(struct eventfs_inode *
+               return NULL;
+       }
+       if (ei->dentry) {
+-              /* If the dentry already has a dentry, use it */
++              /* If the eventfs_inode already has a dentry, use it */
+               dentry = ei->dentry;
+-              /* lookup does not need to up the ref count */
+-              if (!lookup)
+-                      dget(dentry);
++              dget(dentry);
+               mutex_unlock(&eventfs_mutex);
+               return dentry;
+       }
+@@ -542,7 +531,7 @@ create_dir_dentry(struct eventfs_inode *
+                * way to being freed.
+                */
+               dentry = ei->dentry;
+-              if (dentry && !lookup)
++              if (dentry)
+                       dget(dentry);
+               mutex_unlock(&eventfs_mutex);
+               return dentry;
+@@ -562,9 +551,6 @@ create_dir_dentry(struct eventfs_inode *
+       }
+       mutex_unlock(&eventfs_mutex);
+-      if (lookup)
+-              dput(dentry);
+-
+       return dentry;
+ }
+@@ -589,8 +575,8 @@ static struct dentry *eventfs_root_looku
+       struct eventfs_inode *ei;
+       struct dentry *ei_dentry = NULL;
+       struct dentry *ret = NULL;
++      struct dentry *d;
+       const char *name = dentry->d_name.name;
+-      bool created = false;
+       umode_t mode;
+       void *data;
+       int idx;
+@@ -626,13 +612,10 @@ static struct dentry *eventfs_root_looku
+               ret = simple_lookup(dir, dentry, flags);
+               if (IS_ERR(ret))
+                       goto out;
+-              create_dir_dentry(ei, ei_child, ei_dentry, true);
+-              created = true;
+-              break;
+-      }
+-
+-      if (created)
++              d = create_dir_dentry(ei, ei_child, ei_dentry);
++              dput(d);
+               goto out;
++      }
+       for (i = 0; i < ei->nr_entries; i++) {
+               entry = &ei->entries[i];
+@@ -650,8 +633,8 @@ static struct dentry *eventfs_root_looku
+                       ret = simple_lookup(dir, dentry, flags);
+                       if (IS_ERR(ret))
+                               goto out;
+-                      create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+-                                         fops, true);
++                      d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++                      dput(d);
+                       break;
+               }
+       }
+@@ -768,9 +751,10 @@ static int dcache_dir_open_wrapper(struc
+       inode_lock(parent->d_inode);
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              d = create_dir_dentry(ei, ei_child, parent, false);
++              d = create_dir_dentry(ei, ei_child, parent);
+               if (d) {
+                       ret = add_dentries(&dentries, d, cnt);
++                      dput(d);
+                       if (ret < 0)
+                               break;
+                       cnt++;
+@@ -790,9 +774,10 @@ static int dcache_dir_open_wrapper(struc
+               mutex_unlock(&eventfs_mutex);
+               if (r <= 0)
+                       continue;
+-              d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
++              d = create_file_dentry(ei, i, parent, name, mode, cdata, fops);
+               if (d) {
+                       ret = add_dentries(&dentries, d, cnt);
++                      dput(d);
+                       if (ret < 0)
+                               break;
+                       cnt++;
diff --git a/queue-6.6/eventfs-remove-special-processing-of-dput-of-events-directory.patch b/queue-6.6/eventfs-remove-special-processing-of-dput-of-events-directory.patch
new file mode 100644 (file)
index 0000000..32b44f3
--- /dev/null
@@ -0,0 +1,68 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:02 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:27 -0500
+Subject: eventfs: Remove special processing of dput() of events directory
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.956372816@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 62d65cac119d08d39f751b4e3e2063ed996edc05 upstream.
+
+The top level events directory is no longer special with regards to how it
+should be delete. Remove the extra processing for it in
+eventfs_set_ei_status_free().
+
+Link: https://lkml.kernel.org/r/20231101172650.340876747@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   19 ++-----------------
+ 1 file changed, 2 insertions(+), 17 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -274,28 +274,11 @@ static void free_ei(struct eventfs_inode
+  */
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
+-      struct tracefs_inode *ti_parent;
+       struct eventfs_inode *ei;
+       int i;
+-      /* The top level events directory may be freed by this */
+-      if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+-              mutex_lock(&eventfs_mutex);
+-              ei = ti->private;
+-              /* Nothing should access this, but just in case! */
+-              ti->private = NULL;
+-              mutex_unlock(&eventfs_mutex);
+-
+-              free_ei(ei);
+-              return;
+-      }
+-
+       mutex_lock(&eventfs_mutex);
+-      ti_parent = get_tracefs(dentry->d_parent->d_inode);
+-      if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+-              goto out;
+-
+       ei = dentry->d_fsdata;
+       if (!ei)
+               goto out;
+@@ -920,6 +903,8 @@ struct eventfs_inode *eventfs_create_eve
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
++      dentry->d_fsdata = ei;
++
+       /* directory inodes start off with i_nlink == 2 (for "." entry) */
+       inc_nlink(inode);
+       d_instantiate(dentry, inode);
diff --git a/queue-6.6/eventfs-remove-unused-d_parent-pointer-field.patch b/queue-6.6/eventfs-remove-unused-d_parent-pointer-field.patch
new file mode 100644 (file)
index 0000000..471d4e0
--- /dev/null
@@ -0,0 +1,66 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:26 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:56 -0500
+Subject: eventfs: Remove unused d_parent pointer field
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.681339731@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 408600be78cdb8c650a97ecc7ff411cb216811b5 upstream.
+
+It's never used
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.961772428@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    4 +---
+ fs/tracefs/internal.h    |    2 --
+ 2 files changed, 1 insertion(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -680,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir
+       INIT_LIST_HEAD(&ei->list);
+       mutex_lock(&eventfs_mutex);
+-      if (!parent->is_freed) {
++      if (!parent->is_freed)
+               list_add_tail(&ei->list, &parent->children);
+-              ei->d_parent = parent->dentry;
+-      }
+       mutex_unlock(&eventfs_mutex);
+       /* Was the parent freed? */
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -36,7 +36,6 @@ struct eventfs_attr {
+  * @name:     the name of the directory to create
+  * @children: link list into the child eventfs_inode
+  * @dentry:     the dentry of the directory
+- * @d_parent:   pointer to the parent's dentry
+  * @d_children: The array of dentries to represent the files when created
+  * @entry_attrs: Saved mode and ownership of the @d_children
+  * @attr:     Saved mode and ownership of eventfs_inode itself
+@@ -51,7 +50,6 @@ struct eventfs_inode {
+       const char                      *name;
+       struct list_head                children;
+       struct dentry                   *dentry; /* Check is_freed to access */
+-      struct dentry                   *d_parent;
+       struct dentry                   **d_children;
+       struct eventfs_attr             *entry_attrs;
+       struct eventfs_attr             attr;
diff --git a/queue-6.6/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch b/queue-6.6/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch
new file mode 100644 (file)
index 0000000..48b8b0d
--- /dev/null
@@ -0,0 +1,86 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:09 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:10:00 -0500
+Subject: eventfs: Restructure eventfs_inode structure to be more condensed
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.335266477@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 264424dfdd5cbd92bc5b5ddf93944929fc877fac upstream.
+
+Some of the eventfs_inode structure has holes in it. Rework the structure
+to be a bit more condensed, and also remove the no longer used llist
+field.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.002321438@goodmis.org
+
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/internal.h |   27 ++++++++++++---------------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -32,40 +32,37 @@ struct eventfs_attr {
+ /*
+  * struct eventfs_inode - hold the properties of the eventfs directories.
+  * @list:     link list into the parent directory
++ * @rcu:      Union with @list for freeing
++ * @children: link list into the child eventfs_inode
+  * @entries:  the array of entries representing the files in the directory
+  * @name:     the name of the directory to create
+- * @children: link list into the child eventfs_inode
+  * @events_dir: the dentry of the events directory
+  * @entry_attrs: Saved mode and ownership of the @d_children
+- * @attr:     Saved mode and ownership of eventfs_inode itself
+  * @data:     The private data to pass to the callbacks
++ * @attr:     Saved mode and ownership of eventfs_inode itself
+  * @is_freed: Flag set if the eventfs is on its way to be freed
+  *                Note if is_freed is set, then dentry is corrupted.
++ * @is_events:        Flag set for only the top level "events" directory
+  * @nr_entries: The number of items in @entries
++ * @ino:      The saved inode number
+  */
+ struct eventfs_inode {
+-      struct kref                     kref;
+-      struct list_head                list;
++      union {
++              struct list_head        list;
++              struct rcu_head         rcu;
++      };
++      struct list_head                children;
+       const struct eventfs_entry      *entries;
+       const char                      *name;
+-      struct list_head                children;
+       struct dentry                   *events_dir;
+       struct eventfs_attr             *entry_attrs;
+-      struct eventfs_attr             attr;
+       void                            *data;
++      struct eventfs_attr             attr;
++      struct kref                     kref;
+       unsigned int                    is_freed:1;
+       unsigned int                    is_events:1;
+       unsigned int                    nr_entries:30;
+       unsigned int                    ino;
+-      /*
+-       * Union - used for deletion
+-       * @llist:      for calling dput() if needed after RCU
+-       * @rcu:        eventfs_inode to delete in RCU
+-       */
+-      union {
+-              struct llist_node       llist;
+-              struct rcu_head         rcu;
+-      };
+ };
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
diff --git a/queue-6.6/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch b/queue-6.6/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch
new file mode 100644 (file)
index 0000000..16db65e
--- /dev/null
@@ -0,0 +1,119 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:27 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:50 -0500
+Subject: eventfs: Save directory inodes in the eventfs_inode structure
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Geert Uytterhoeven <geert@linux-m68k.org>, Geert Uytterhoeven <geert+renesas@glider.be>, Kees Cook <keescook@chromium.org>
+Message-ID: <20240206120953.708915826@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 834bf76add3e6168038150f162cbccf1fd492a67 upstream.
+
+The eventfs inodes and directories are allocated when referenced. But this
+leaves the issue of keeping consistent inode numbers and the number is
+only saved in the inode structure itself. When the inode is no longer
+referenced, it can be freed. When the file that the inode was representing
+is referenced again, the inode is once again created, but the inode number
+needs to be the same as it was before.
+
+Just making the inode numbers the same for all files is fine, but that
+does not work with directories. The find command will check for loops via
+the inode number and having the same inode number for directories triggers:
+
+  # find /sys/kernel/tracing
+find: File system loop detected;
+'/sys/kernel/debug/tracing/events/initcall/initcall_finish' is part of the same file system loop as
+'/sys/kernel/debug/tracing/events/initcall'.
+[..]
+
+Linus pointed out that the eventfs_inode structure ends with a single
+32bit int, and on 64 bit machines, there's likely a 4 byte hole due to
+alignment. We can use this hole to store the inode number for the
+eventfs_inode. All directories in eventfs are represented by an
+eventfs_inode and that data structure can hold its inode number.
+
+That last int was also purposely placed at the end of the structure to
+prevent holes from within. Now that there's a 4 byte number to hold the
+inode, both the inode number and the last integer can be moved up in the
+structure for better cache locality, where the llist and rcu fields can be
+moved to the end as they are only used when the eventfs_inode is being
+deleted.
+
+Link: https://lore.kernel.org/all/CAMuHMdXKiorg-jiuKoZpfZyDJ3Ynrfb8=X+c7x0Eewxn-YRdCA@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240122152748.46897388@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Fixes: 53c41052ba31 ("eventfs: Have the inodes all for files and directories all be the same")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   14 +++++++++++---
+ fs/tracefs/internal.h    |    7 ++++---
+ 2 files changed, 15 insertions(+), 6 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex);
+ /* Choose something "unique" ;-) */
+ #define EVENTFS_FILE_INODE_INO                0x12c4e37
+-#define EVENTFS_DIR_INODE_INO         0x134b2f5
++
++/* Just try to make something consistent and unique */
++static int eventfs_dir_ino(struct eventfs_inode *ei)
++{
++      if (!ei->ino)
++              ei->ino = get_next_ino();
++
++      return ei->ino;
++}
+ /*
+  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
+@@ -396,7 +404,7 @@ static struct dentry *create_dir(struct
+       inode->i_fop = &eventfs_file_operations;
+       /* All directories will have the same inode number */
+-      inode->i_ino = EVENTFS_DIR_INODE_INO;
++      inode->i_ino = eventfs_dir_ino(ei);
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
+@@ -802,7 +810,7 @@ static int eventfs_iterate(struct file *
+               name = ei_child->name;
+-              ino = EVENTFS_DIR_INODE_INO;
++              ino = eventfs_dir_ino(ei_child);
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
+                       goto out_dec;
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -55,6 +55,10 @@ struct eventfs_inode {
+       struct eventfs_attr             *entry_attrs;
+       struct eventfs_attr             attr;
+       void                            *data;
++      unsigned int                    is_freed:1;
++      unsigned int                    is_events:1;
++      unsigned int                    nr_entries:30;
++      unsigned int                    ino;
+       /*
+        * Union - used for deletion
+        * @llist:      for calling dput() if needed after RCU
+@@ -64,9 +68,6 @@ struct eventfs_inode {
+               struct llist_node       llist;
+               struct rcu_head         rcu;
+       };
+-      unsigned int                    is_freed:1;
+-      unsigned int                    is_events:1;
+-      unsigned int                    nr_entries:30;
+ };
+ static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
diff --git a/queue-6.6/eventfs-save-ownership-and-mode.patch b/queue-6.6/eventfs-save-ownership-and-mode.patch
new file mode 100644 (file)
index 0000000..91e4d29
--- /dev/null
@@ -0,0 +1,347 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:13 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:24 -0500
+Subject: eventfs: Save ownership and mode
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>
+Message-ID: <20240206120949.464245650@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 28e12c09f5aa081b2d13d1340e3610070b6c624d upstream.
+
+Now that inodes and dentries are created on the fly, they are also
+reclaimed on memory pressure. Since the ownership and file mode are saved
+in the inode, if they are freed, any changes to the ownership and mode
+will be lost.
+
+To counter this, if the user changes the permissions or ownership, save
+them, and when creating the inodes again, restore those changes.
+
+Link: https://lkml.kernel.org/r/20231101172649.691841445@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions")
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  148 ++++++++++++++++++++++++++++++++++++++++++-----
+ fs/tracefs/internal.h    |   16 +++++
+ 2 files changed, 151 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -40,6 +40,15 @@ static DEFINE_MUTEX(eventfs_mutex);
+  */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
++/* Mode is unsigned short, use the upper bits for flags */
++enum {
++      EVENTFS_SAVE_MODE       = BIT(16),
++      EVENTFS_SAVE_UID        = BIT(17),
++      EVENTFS_SAVE_GID        = BIT(18),
++};
++
++#define EVENTFS_MODE_MASK     (EVENTFS_SAVE_MODE - 1)
++
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+                                         struct dentry *dentry,
+                                         unsigned int flags);
+@@ -47,8 +56,89 @@ static int dcache_dir_open_wrapper(struc
+ static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+ static int eventfs_release(struct inode *inode, struct file *file);
++static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
++{
++      unsigned int ia_valid = iattr->ia_valid;
++
++      if (ia_valid & ATTR_MODE) {
++              attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
++                      (iattr->ia_mode & EVENTFS_MODE_MASK) |
++                      EVENTFS_SAVE_MODE;
++      }
++      if (ia_valid & ATTR_UID) {
++              attr->mode |= EVENTFS_SAVE_UID;
++              attr->uid = iattr->ia_uid;
++      }
++      if (ia_valid & ATTR_GID) {
++              attr->mode |= EVENTFS_SAVE_GID;
++              attr->gid = iattr->ia_gid;
++      }
++}
++
++static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
++                          struct iattr *iattr)
++{
++      const struct eventfs_entry *entry;
++      struct eventfs_inode *ei;
++      const char *name;
++      int ret;
++
++      mutex_lock(&eventfs_mutex);
++      ei = dentry->d_fsdata;
++      /* The LSB is set when the eventfs_inode is being freed */
++      if (((unsigned long)ei & 1UL) || ei->is_freed) {
++              /* Do not allow changes if the event is about to be removed. */
++              mutex_unlock(&eventfs_mutex);
++              return -ENODEV;
++      }
++
++      /* Preallocate the children mode array if necessary */
++      if (!(dentry->d_inode->i_mode & S_IFDIR)) {
++              if (!ei->entry_attrs) {
++                      ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
++                                                GFP_KERNEL);
++                      if (!ei->entry_attrs) {
++                              ret = -ENOMEM;
++                              goto out;
++                      }
++              }
++      }
++
++      ret = simple_setattr(idmap, dentry, iattr);
++      if (ret < 0)
++              goto out;
++
++      /*
++       * If this is a dir, then update the ei cache, only the file
++       * mode is saved in the ei->m_children, and the ownership is
++       * determined by the parent directory.
++       */
++      if (dentry->d_inode->i_mode & S_IFDIR) {
++              update_attr(&ei->attr, iattr);
++
++      } else {
++              name = dentry->d_name.name;
++
++              for (int i = 0; i < ei->nr_entries; i++) {
++                      entry = &ei->entries[i];
++                      if (strcmp(name, entry->name) == 0) {
++                              update_attr(&ei->entry_attrs[i], iattr);
++                              break;
++                      }
++              }
++      }
++ out:
++      mutex_unlock(&eventfs_mutex);
++      return ret;
++}
++
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+       .lookup         = eventfs_root_lookup,
++      .setattr        = eventfs_set_attr,
++};
++
++static const struct inode_operations eventfs_file_inode_operations = {
++      .setattr        = eventfs_set_attr,
+ };
+ static const struct file_operations eventfs_file_operations = {
+@@ -59,10 +149,30 @@ static const struct file_operations even
+       .release        = eventfs_release,
+ };
++static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
++{
++      if (!attr) {
++              inode->i_mode = mode;
++              return;
++      }
++
++      if (attr->mode & EVENTFS_SAVE_MODE)
++              inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
++      else
++              inode->i_mode = mode;
++
++      if (attr->mode & EVENTFS_SAVE_UID)
++              inode->i_uid = attr->uid;
++
++      if (attr->mode & EVENTFS_SAVE_GID)
++              inode->i_gid = attr->gid;
++}
++
+ /**
+  * create_file - create a file in the tracefs filesystem
+  * @name: the name of the file to create.
+  * @mode: the permission that the file should have.
++ * @attr: saved attributes changed by user
+  * @parent: parent dentry for this file.
+  * @data: something that the caller will want to get to later on.
+  * @fop: struct file_operations that should be used for this file.
+@@ -72,6 +182,7 @@ static const struct file_operations even
+  * call.
+  */
+ static struct dentry *create_file(const char *name, umode_t mode,
++                                struct eventfs_attr *attr,
+                                 struct dentry *parent, void *data,
+                                 const struct file_operations *fop)
+ {
+@@ -95,7 +206,10 @@ static struct dentry *create_file(const
+       if (unlikely(!inode))
+               return eventfs_failed_creating(dentry);
+-      inode->i_mode = mode;
++      /* If the user updated the directory's attributes, use them */
++      update_inode_attr(inode, attr, mode);
++
++      inode->i_op = &eventfs_file_inode_operations;
+       inode->i_fop = fop;
+       inode->i_private = data;
+@@ -108,19 +222,19 @@ static struct dentry *create_file(const
+ /**
+  * create_dir - create a dir in the tracefs filesystem
+- * @name: the name of the file to create.
++ * @ei: the eventfs_inode that represents the directory to create
+  * @parent: parent dentry for this file.
+  *
+  * This function will create a dentry for a directory represented by
+  * a eventfs_inode.
+  */
+-static struct dentry *create_dir(const char *name, struct dentry *parent)
++static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
+ {
+       struct tracefs_inode *ti;
+       struct dentry *dentry;
+       struct inode *inode;
+-      dentry = eventfs_start_creating(name, parent);
++      dentry = eventfs_start_creating(ei->name, parent);
+       if (IS_ERR(dentry))
+               return dentry;
+@@ -128,7 +242,9 @@ static struct dentry *create_dir(const c
+       if (unlikely(!inode))
+               return eventfs_failed_creating(dentry);
+-      inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
++      /* If the user updated the directory's attributes, use them */
++      update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
++
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
+@@ -146,6 +262,7 @@ static void free_ei(struct eventfs_inode
+ {
+       kfree_const(ei->name);
+       kfree(ei->d_children);
++      kfree(ei->entry_attrs);
+       kfree(ei);
+ }
+@@ -231,7 +348,7 @@ void eventfs_set_ei_status_free(struct t
+ /**
+  * create_file_dentry - create a dentry for a file of an eventfs_inode
+  * @ei: the eventfs_inode that the file will be created under
+- * @e_dentry: a pointer to the d_children[] of the @ei
++ * @idx: the index into the d_children[] of the @ei
+  * @parent: The parent dentry of the created file.
+  * @name: The name of the file to create
+  * @mode: The mode of the file.
+@@ -244,10 +361,12 @@ void eventfs_set_ei_status_free(struct t
+  * just do a dget() on it and return. Otherwise create the dentry and attach it.
+  */
+ static struct dentry *
+-create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
++create_file_dentry(struct eventfs_inode *ei, int idx,
+                  struct dentry *parent, const char *name, umode_t mode, void *data,
+                  const struct file_operations *fops, bool lookup)
+ {
++      struct eventfs_attr *attr = NULL;
++      struct dentry **e_dentry = &ei->d_children[idx];
+       struct dentry *dentry;
+       bool invalidate = false;
+@@ -264,13 +383,18 @@ create_file_dentry(struct eventfs_inode
+               mutex_unlock(&eventfs_mutex);
+               return *e_dentry;
+       }
++
++      /* ei->entry_attrs are protected by SRCU */
++      if (ei->entry_attrs)
++              attr = &ei->entry_attrs[idx];
++
+       mutex_unlock(&eventfs_mutex);
+       /* The lookup already has the parent->d_inode locked */
+       if (!lookup)
+               inode_lock(parent->d_inode);
+-      dentry = create_file(name, mode, parent, data, fops);
++      dentry = create_file(name, mode, attr, parent, data, fops);
+       if (!lookup)
+               inode_unlock(parent->d_inode);
+@@ -378,7 +502,7 @@ create_dir_dentry(struct eventfs_inode *
+       if (!lookup)
+               inode_lock(parent->d_inode);
+-      dentry = create_dir(ei->name, parent);
++      dentry = create_dir(ei, parent);
+       if (!lookup)
+               inode_unlock(parent->d_inode);
+@@ -495,8 +619,7 @@ static struct dentry *eventfs_root_looku
+                       if (r <= 0)
+                               continue;
+                       ret = simple_lookup(dir, dentry, flags);
+-                      create_file_dentry(ei, &ei->d_children[i],
+-                                         ei_dentry, name, mode, cdata,
++                      create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
+                                          fops, true);
+                       break;
+               }
+@@ -629,8 +752,7 @@ static int dcache_dir_open_wrapper(struc
+               r = entry->callback(name, &mode, &cdata, &fops);
+               if (r <= 0)
+                       continue;
+-              d = create_file_dentry(ei, &ei->d_children[i],
+-                                     parent, name, mode, cdata, fops, false);
++              d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
+               if (d) {
+                       ret = add_dentries(&dentries, d, cnt);
+                       if (ret < 0)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -14,6 +14,18 @@ struct tracefs_inode {
+ };
+ /*
++ * struct eventfs_attr - cache the mode and ownership of a eventfs entry
++ * @mode:     saved mode plus flags of what is saved
++ * @uid:      saved uid if changed
++ * @gid:      saved gid if changed
++ */
++struct eventfs_attr {
++      int                             mode;
++      kuid_t                          uid;
++      kgid_t                          gid;
++};
++
++/*
+  * struct eventfs_inode - hold the properties of the eventfs directories.
+  * @list:     link list into the parent directory
+  * @entries:  the array of entries representing the files in the directory
+@@ -22,6 +34,8 @@ struct tracefs_inode {
+  * @dentry:     the dentry of the directory
+  * @d_parent:   pointer to the parent's dentry
+  * @d_children: The array of dentries to represent the files when created
++ * @entry_attrs: Saved mode and ownership of the @d_children
++ * @attr:     Saved mode and ownership of eventfs_inode itself
+  * @data:     The private data to pass to the callbacks
+  * @is_freed: Flag set if the eventfs is on its way to be freed
+  *                Note if is_freed is set, then dentry is corrupted.
+@@ -35,6 +49,8 @@ struct eventfs_inode {
+       struct dentry                   *dentry; /* Check is_freed to access */
+       struct dentry                   *d_parent;
+       struct dentry                   **d_children;
++      struct eventfs_attr             *entry_attrs;
++      struct eventfs_attr             attr;
+       void                            *data;
+       /*
+        * Union - used for deletion
diff --git a/queue-6.6/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch b/queue-6.6/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch
new file mode 100644 (file)
index 0000000..80e5512
--- /dev/null
@@ -0,0 +1,93 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:48 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:46 -0500
+Subject: eventfs: Shortcut eventfs_iterate() by skipping entries already read
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120953.046426517@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 1de94b52d5e8d8b32f0252f14fad1f1edc2e71f1 upstream.
+
+As the ei->entries array is fixed for the duration of the eventfs_inode,
+it can be used to skip over already read entries in eventfs_iterate().
+
+That is, if ctx->pos is greater than zero, there's no reason in doing the
+loop across the ei->entries array for the entries less than ctx->pos.
+Instead, start the lookup of the entries at the current ctx->pos.
+
+Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.494956957@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -746,21 +746,15 @@ static int eventfs_iterate(struct file *
+       if (!ei || !ei_dentry)
+               goto out;
+-      ret = 0;
+-
+       /*
+        * Need to create the dentries and inodes to have a consistent
+        * inode number.
+        */
+-      for (i = 0; i < ei->nr_entries; i++) {
+-              void *cdata = ei->data;
+-
+-              if (c > 0) {
+-                      c--;
+-                      continue;
+-              }
++      ret = 0;
+-              ctx->pos++;
++      /* Start at 'c' to jump over already read entries */
++      for (i = c; i < ei->nr_entries; i++, ctx->pos++) {
++              void *cdata = ei->data;
+               entry = &ei->entries[i];
+               name = entry->name;
+@@ -769,7 +763,7 @@ static int eventfs_iterate(struct file *
+               /* If ei->is_freed then just bail here, nothing more to do */
+               if (ei->is_freed) {
+                       mutex_unlock(&eventfs_mutex);
+-                      goto out_dec;
++                      goto out;
+               }
+               r = entry->callback(name, &mode, &cdata, &fops);
+               mutex_unlock(&eventfs_mutex);
+@@ -778,14 +772,17 @@ static int eventfs_iterate(struct file *
+               dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+               if (!dentry)
+-                      goto out_dec;
++                      goto out;
+               ino = dentry->d_inode->i_ino;
+               dput(dentry);
+               if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
+-                      goto out_dec;
++                      goto out;
+       }
++      /* Subtract the skipped entries above */
++      c -= min((unsigned int)c, (unsigned int)ei->nr_entries);
++
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
diff --git a/queue-6.6/eventfs-stop-using-dcache_readdir-for-getdents.patch b/queue-6.6/eventfs-stop-using-dcache_readdir-for-getdents.patch
new file mode 100644 (file)
index 0000000..eb80b42
--- /dev/null
@@ -0,0 +1,296 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:08 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:41 -0500
+Subject: eventfs: Stop using dcache_readdir() for getdents()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.237926780@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 493ec81a8fb8e4ada6f223b8b73791a1280d4774 upstream.
+
+The eventfs creates dynamically allocated dentries and inodes. Using the
+dcache_readdir() logic for its own directory lookups requires hiding the
+cursor of the dcache logic and playing games to allow the dcache_readdir()
+to still have access to the cursor while the eventfs saved what it created
+and what it needs to release.
+
+Instead, just have eventfs have its own iterate_shared callback function
+that will fill in the dent entries. This simplifies the code quite a bit.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  194 +++++++++++++++--------------------------------
+ 1 file changed, 64 insertions(+), 130 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -52,9 +52,7 @@ enum {
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+                                         struct dentry *dentry,
+                                         unsigned int flags);
+-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
+-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+-static int eventfs_release(struct inode *inode, struct file *file);
++static int eventfs_iterate(struct file *file, struct dir_context *ctx);
+ static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
+ {
+@@ -148,11 +146,9 @@ static const struct inode_operations eve
+ };
+ static const struct file_operations eventfs_file_operations = {
+-      .open           = dcache_dir_open_wrapper,
+       .read           = generic_read_dir,
+-      .iterate_shared = dcache_readdir_wrapper,
++      .iterate_shared = eventfs_iterate,
+       .llseek         = generic_file_llseek,
+-      .release        = eventfs_release,
+ };
+ /* Return the evenfs_inode of the "events" directory */
+@@ -643,128 +639,87 @@ static struct dentry *eventfs_root_looku
+       return ret;
+ }
+-struct dentry_list {
+-      void                    *cursor;
+-      struct dentry           **dentries;
+-};
+-
+-/**
+- * eventfs_release - called to release eventfs file/dir
+- * @inode: inode to be released
+- * @file: file to be released (not used)
+- */
+-static int eventfs_release(struct inode *inode, struct file *file)
+-{
+-      struct tracefs_inode *ti;
+-      struct dentry_list *dlist = file->private_data;
+-      void *cursor;
+-      int i;
+-
+-      ti = get_tracefs(inode);
+-      if (!(ti->flags & TRACEFS_EVENT_INODE))
+-              return -EINVAL;
+-
+-      if (WARN_ON_ONCE(!dlist))
+-              return -EINVAL;
+-
+-      for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
+-              dput(dlist->dentries[i]);
+-      }
+-
+-      cursor = dlist->cursor;
+-      kfree(dlist->dentries);
+-      kfree(dlist);
+-      file->private_data = cursor;
+-      return dcache_dir_close(inode, file);
+-}
+-
+-static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
+-{
+-      struct dentry **tmp;
+-
+-      tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
+-      if (!tmp)
+-              return -1;
+-      tmp[cnt] = d;
+-      tmp[cnt + 1] = NULL;
+-      *dentries = tmp;
+-      return 0;
+-}
+-
+-/**
+- * dcache_dir_open_wrapper - eventfs open wrapper
+- * @inode: not used
+- * @file: dir to be opened (to create it's children)
+- *
+- * Used to dynamic create file/dir with-in @file, all the
+- * file/dir will be created. If already created then references
+- * will be increased
++/*
++ * Walk the children of a eventfs_inode to fill in getdents().
+  */
+-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
++static int eventfs_iterate(struct file *file, struct dir_context *ctx)
+ {
+       const struct file_operations *fops;
++      struct inode *f_inode = file_inode(file);
+       const struct eventfs_entry *entry;
+       struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+-      struct dentry_list *dlist;
+-      struct dentry **dentries = NULL;
+-      struct dentry *parent = file_dentry(file);
+-      struct dentry *d;
+-      struct inode *f_inode = file_inode(file);
+-      const char *name = parent->d_name.name;
++      struct dentry *ei_dentry = NULL;
++      struct dentry *dentry;
++      const char *name;
+       umode_t mode;
+-      void *data;
+-      int cnt = 0;
+       int idx;
+-      int ret;
+-      int i;
+-      int r;
++      int ret = -EINVAL;
++      int ino;
++      int i, r, c;
++
++      if (!dir_emit_dots(file, ctx))
++              return 0;
+       ti = get_tracefs(f_inode);
+       if (!(ti->flags & TRACEFS_EVENT_INODE))
+               return -EINVAL;
+-      if (WARN_ON_ONCE(file->private_data))
+-              return -EINVAL;
++      c = ctx->pos - 2;
+       idx = srcu_read_lock(&eventfs_srcu);
+       mutex_lock(&eventfs_mutex);
+       ei = READ_ONCE(ti->private);
++      if (ei && !ei->is_freed)
++              ei_dentry = READ_ONCE(ei->dentry);
+       mutex_unlock(&eventfs_mutex);
+-      if (!ei) {
+-              srcu_read_unlock(&eventfs_srcu, idx);
+-              return -EINVAL;
+-      }
+-
+-
+-      data = ei->data;
++      if (!ei || !ei_dentry)
++              goto out;
+-      dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
+-      if (!dlist) {
+-              srcu_read_unlock(&eventfs_srcu, idx);
+-              return -ENOMEM;
+-      }
++      ret = 0;
+-      inode_lock(parent->d_inode);
++      /*
++       * Need to create the dentries and inodes to have a consistent
++       * inode number.
++       */
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              d = create_dir_dentry(ei, ei_child, parent);
+-              if (d) {
+-                      ret = add_dentries(&dentries, d, cnt);
+-                      dput(d);
+-                      if (ret < 0)
+-                              break;
+-                      cnt++;
++
++              if (c > 0) {
++                      c--;
++                      continue;
+               }
++
++              if (ei_child->is_freed)
++                      continue;
++
++              name = ei_child->name;
++
++              dentry = create_dir_dentry(ei, ei_child, ei_dentry);
++              if (!dentry)
++                      goto out;
++              ino = dentry->d_inode->i_ino;
++              dput(dentry);
++
++              if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
++                      goto out;
++              ctx->pos++;
+       }
+       for (i = 0; i < ei->nr_entries; i++) {
+-              void *cdata = data;
++              void *cdata = ei->data;
++
++              if (c > 0) {
++                      c--;
++                      continue;
++              }
++
+               entry = &ei->entries[i];
+               name = entry->name;
++
+               mutex_lock(&eventfs_mutex);
+               /* If ei->is_freed, then the event itself may be too */
+               if (!ei->is_freed)
+@@ -774,42 +729,21 @@ static int dcache_dir_open_wrapper(struc
+               mutex_unlock(&eventfs_mutex);
+               if (r <= 0)
+                       continue;
+-              d = create_file_dentry(ei, i, parent, name, mode, cdata, fops);
+-              if (d) {
+-                      ret = add_dentries(&dentries, d, cnt);
+-                      dput(d);
+-                      if (ret < 0)
+-                              break;
+-                      cnt++;
+-              }
+-      }
+-      inode_unlock(parent->d_inode);
+-      srcu_read_unlock(&eventfs_srcu, idx);
+-      ret = dcache_dir_open(inode, file);
+-      /*
+-       * dcache_dir_open() sets file->private_data to a dentry cursor.
+-       * Need to save that but also save all the dentries that were
+-       * opened by this function.
+-       */
+-      dlist->cursor = file->private_data;
+-      dlist->dentries = dentries;
+-      file->private_data = dlist;
+-      return ret;
+-}
++              dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
++              if (!dentry)
++                      goto out;
++              ino = dentry->d_inode->i_ino;
++              dput(dentry);
+-/*
+- * This just sets the file->private_data back to the cursor and back.
+- */
+-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
+-{
+-      struct dentry_list *dlist = file->private_data;
+-      int ret;
++              if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
++                      goto out;
++              ctx->pos++;
++      }
++      ret = 1;
++ out:
++      srcu_read_unlock(&eventfs_srcu, idx);
+-      file->private_data = dlist->cursor;
+-      ret = dcache_readdir(file, ctx);
+-      dlist->cursor = file->private_data;
+-      file->private_data = dlist;
+       return ret;
+ }
diff --git a/queue-6.6/eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch b/queue-6.6/eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch
new file mode 100644 (file)
index 0000000..52c102f
--- /dev/null
@@ -0,0 +1,188 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:41 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:23 -0500
+Subject: eventfs: Test for ei->is_freed when accessing ei->dentry
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Ajay Kaher <akaher@vmware.com>, Andrew Morton <akpm@linux-foundation.org>, Linux Kernel Functional Testing <lkft@linaro.org>, Naresh Kamboju <naresh.kamboju@linaro.org>, Beau Belgrave <beaub@linux.microsoft.com>
+Message-ID: <20240206120949.301438848@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 77a06c33a22d13f3a6e31f06f6ee6bca666e6898 upstream.
+
+The eventfs_inode (ei) is protected by SRCU, but the ei->dentry is not. It
+is protected by the eventfs_mutex. Anytime the eventfs_mutex is released,
+and access to the ei->dentry needs to be done, it should first check if
+ei->is_freed is set under the eventfs_mutex. If it is, then the ei->dentry
+is invalid and must not be used. The ei->dentry must only be accessed
+under the eventfs_mutex and after checking if ei->is_freed is set.
+
+When the ei is being freed, it will (under the eventfs_mutex) set is_freed
+and at the same time move the dentry to a free list to be cleared after
+the eventfs_mutex is released. This means that any access to the
+ei->dentry must check first if ei->is_freed is set, because if it is, then
+the dentry is on its way to be freed.
+
+Also add comments to describe this better.
+
+Link: https://lore.kernel.org/all/CA+G9fYt6pY+tMZEOg=SoEywQOe19fGP3uR15SGowkdK+_X85Cg@mail.gmail.com/
+Link: https://lore.kernel.org/all/CA+G9fYuDP3hVQ3t7FfrBAjd_WFVSurMgCepTxunSJf=MTe=6aA@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20231101172649.477608228@goodmis.org
+
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Beau Belgrave <beaub@linux.microsoft.com>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Tested-by: Beau Belgrave <beaub@linux.microsoft.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   45 +++++++++++++++++++++++++++++++++++++++------
+ fs/tracefs/internal.h    |    3 ++-
+ 2 files changed, 41 insertions(+), 7 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -24,7 +24,20 @@
+ #include <linux/delay.h>
+ #include "internal.h"
++/*
++ * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
++ * to the ei->dentry must be done under this mutex and after checking
++ * if ei->is_freed is not set. The ei->dentry is released under the
++ * mutex at the same time ei->is_freed is set. If ei->is_freed is set
++ * then the ei->dentry is invalid.
++ */
+ static DEFINE_MUTEX(eventfs_mutex);
++
++/*
++ * The eventfs_inode (ei) itself is protected by SRCU. It is released from
++ * its parent's list and will have is_freed set (under eventfs_mutex).
++ * After the SRCU grace period is over, the ei may be freed.
++ */
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+@@ -239,6 +252,10 @@ create_file_dentry(struct eventfs_inode
+       bool invalidate = false;
+       mutex_lock(&eventfs_mutex);
++      if (ei->is_freed) {
++              mutex_unlock(&eventfs_mutex);
++              return NULL;
++      }
+       /* If the e_dentry already has a dentry, use it */
+       if (*e_dentry) {
+               /* lookup does not need to up the ref count */
+@@ -312,6 +329,8 @@ static void eventfs_post_create_dir(stru
+       struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
++      lockdep_assert_held(&eventfs_mutex);
++
+       /* srcu lock already held */
+       /* fill parent-child relation */
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+@@ -325,6 +344,7 @@ static void eventfs_post_create_dir(stru
+ /**
+  * create_dir_dentry - Create a directory dentry for the eventfs_inode
++ * @pei: The eventfs_inode parent of ei.
+  * @ei: The eventfs_inode to create the directory for
+  * @parent: The dentry of the parent of this directory
+  * @lookup: True if this is called by the lookup code
+@@ -332,12 +352,17 @@ static void eventfs_post_create_dir(stru
+  * This creates and attaches a directory dentry to the eventfs_inode @ei.
+  */
+ static struct dentry *
+-create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool lookup)
++create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
++                struct dentry *parent, bool lookup)
+ {
+       bool invalidate = false;
+       struct dentry *dentry = NULL;
+       mutex_lock(&eventfs_mutex);
++      if (pei->is_freed || ei->is_freed) {
++              mutex_unlock(&eventfs_mutex);
++              return NULL;
++      }
+       if (ei->dentry) {
+               /* If the dentry already has a dentry, use it */
+               dentry = ei->dentry;
+@@ -440,7 +465,7 @@ static struct dentry *eventfs_root_looku
+        */
+       mutex_lock(&eventfs_mutex);
+       ei = READ_ONCE(ti->private);
+-      if (ei)
++      if (ei && !ei->is_freed)
+               ei_dentry = READ_ONCE(ei->dentry);
+       mutex_unlock(&eventfs_mutex);
+@@ -454,7 +479,7 @@ static struct dentry *eventfs_root_looku
+               if (strcmp(ei_child->name, name) != 0)
+                       continue;
+               ret = simple_lookup(dir, dentry, flags);
+-              create_dir_dentry(ei_child, ei_dentry, true);
++              create_dir_dentry(ei, ei_child, ei_dentry, true);
+               created = true;
+               break;
+       }
+@@ -588,7 +613,7 @@ static int dcache_dir_open_wrapper(struc
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                srcu_read_lock_held(&eventfs_srcu)) {
+-              d = create_dir_dentry(ei_child, parent, false);
++              d = create_dir_dentry(ei, ei_child, parent, false);
+               if (d) {
+                       ret = add_dentries(&dentries, d, cnt);
+                       if (ret < 0)
+@@ -705,12 +730,20 @@ struct eventfs_inode *eventfs_create_dir
+       ei->nr_entries = size;
+       ei->data = data;
+       INIT_LIST_HEAD(&ei->children);
++      INIT_LIST_HEAD(&ei->list);
+       mutex_lock(&eventfs_mutex);
+-      list_add_tail(&ei->list, &parent->children);
+-      ei->d_parent = parent->dentry;
++      if (!parent->is_freed) {
++              list_add_tail(&ei->list, &parent->children);
++              ei->d_parent = parent->dentry;
++      }
+       mutex_unlock(&eventfs_mutex);
++      /* Was the parent freed? */
++      if (list_empty(&ei->list)) {
++              free_ei(ei);
++              ei = NULL;
++      }
+       return ei;
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -24,6 +24,7 @@ struct tracefs_inode {
+  * @d_children: The array of dentries to represent the files when created
+  * @data:     The private data to pass to the callbacks
+  * @is_freed: Flag set if the eventfs is on its way to be freed
++ *                Note if is_freed is set, then dentry is corrupted.
+  * @nr_entries: The number of items in @entries
+  */
+ struct eventfs_inode {
+@@ -31,7 +32,7 @@ struct eventfs_inode {
+       const struct eventfs_entry      *entries;
+       const char                      *name;
+       struct list_head                children;
+-      struct dentry                   *dentry;
++      struct dentry                   *dentry; /* Check is_freed to access */
+       struct dentry                   *d_parent;
+       struct dentry                   **d_children;
+       void                            *data;
diff --git a/queue-6.6/eventfs-use-err_cast-in-eventfs_create_events_dir.patch b/queue-6.6/eventfs-use-err_cast-in-eventfs_create_events_dir.patch
new file mode 100644 (file)
index 0000000..94b57b6
--- /dev/null
@@ -0,0 +1,46 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:14 -0500
+Subject: eventfs: Use ERR_CAST() in eventfs_create_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Kees Cook <keescook@chromium.org>, Nathan Chancellor <nathan@kernel.org>
+Message-ID: <20240206120947.843106843@rostedt.homelinux.com>
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit b8a555dc31e5aa18d976de0bc228006e398a2e7d upstream.
+
+When building with clang and CONFIG_RANDSTRUCT_FULL=y, there is an error
+due to a cast in eventfs_create_events_dir():
+
+  fs/tracefs/event_inode.c:734:10: error: casting from randomized structure pointer type 'struct dentry *' to 'struct eventfs_inode *'
+    734 |                 return (struct eventfs_inode *)dentry;
+        |                        ^
+  1 error generated.
+
+Use the ERR_CAST() function to resolve the error, as it was designed for
+this exact situation (casting an error pointer to another type).
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231018-ftrace-fix-clang-randstruct-v1-1-338cb214abfb@kernel.org
+
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1947
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -731,7 +731,7 @@ struct eventfs_inode *eventfs_create_eve
+               return NULL;
+       if (IS_ERR(dentry))
+-              return (struct eventfs_inode *)dentry;
++              return ERR_CAST(dentry);
+       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+       if (!ei)
diff --git a/queue-6.6/eventfs-use-eventfs_remove_events_dir.patch b/queue-6.6/eventfs-use-eventfs_remove_events_dir.patch
new file mode 100644 (file)
index 0000000..4d4bdf3
--- /dev/null
@@ -0,0 +1,89 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:13 -0500
+Subject: eventfs: Use eventfs_remove_events_dir()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, kernel test robot <lkp@intel.com>
+Message-ID: <20240206120947.686070579@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 2819f23ac12ce93ff79ca7a54597df9a4a1f6331 upstream.
+
+The update to removing the eventfs_file changed the way the events top
+level directory was handled. Instead of returning a dentry, it now returns
+the eventfs_inode. In this changed, the removing of the events top level
+directory is not much different than removing any of the other
+directories. Because of this, the removal just called eventfs_remove_dir()
+instead of eventfs_remove_events_dir().
+
+Although eventfs_remove_dir() does the clean up, it misses out on the
+dget() of the ei->dentry done in eventfs_create_events_dir(). It makes
+more sense to match eventfs_create_events_dir() with a specific function
+eventfs_remove_events_dir() and this specific function can then perform
+the dput() to the dentry that had the dget() when it was created.
+
+Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202310051743.y9EobbUr-lkp@intel.com/
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c    |   19 +++++++------------
+ include/linux/tracefs.h     |    1 +
+ kernel/trace/trace_events.c |    2 +-
+ 3 files changed, 9 insertions(+), 13 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -901,22 +901,17 @@ void eventfs_remove_dir(struct eventfs_i
+ }
+ /**
+- * eventfs_remove_events_dir - remove eventfs dir or file from list
+- * @dentry: events's dentry to be removed.
++ * eventfs_remove_events_dir - remove the top level eventfs directory
++ * @ei: the event_inode returned by eventfs_create_events_dir().
+  *
+- * This function remove events main directory
++ * This function removes the events main directory
+  */
+-void eventfs_remove_events_dir(struct dentry *dentry)
++void eventfs_remove_events_dir(struct eventfs_inode *ei)
+ {
+-      struct tracefs_inode *ti;
++      struct dentry *dentry = ei->dentry;
+-      if (!dentry || !dentry->d_inode)
+-              return;
++      eventfs_remove_dir(ei);
+-      ti = get_tracefs(dentry->d_inode);
+-      if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+-              return;
+-
+-      d_invalidate(dentry);
++      /* Matches the dget() from eventfs_create_events_dir() */
+       dput(dentry);
+ }
+--- a/include/linux/tracefs.h
++++ b/include/linux/tracefs.h
+@@ -41,6 +41,7 @@ struct eventfs_inode *eventfs_create_dir
+                                        const struct eventfs_entry *entries,
+                                        int size, void *data);
++void eventfs_remove_events_dir(struct eventfs_inode *ei);
+ void eventfs_remove_dir(struct eventfs_inode *ei);
+ struct dentry *tracefs_create_file(const char *name, umode_t mode,
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -3893,7 +3893,7 @@ int event_trace_del_tracer(struct trace_
+       down_write(&trace_event_sem);
+       __trace_remove_event_dirs(tr);
+-      eventfs_remove_dir(tr->event_dir);
++      eventfs_remove_events_dir(tr->event_dir);
+       up_write(&trace_event_sem);
+       tr->event_dir = NULL;
diff --git a/queue-6.6/eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch b/queue-6.6/eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch
new file mode 100644 (file)
index 0000000..2cfb1ee
--- /dev/null
@@ -0,0 +1,51 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:31 -0500
+Subject: eventfs: Use GFP_NOFS for allocation when eventfs_mutex is held
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Josef Bacik <josef@toxicpanda.com>
+Message-ID: <20240206120950.611237633@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 4763d635c907baed212664dc579dde1663bb2676 upstream.
+
+If memory reclaim happens, it can reclaim file system pages. The file
+system pages from eventfs may take the eventfs_mutex on reclaim. This
+means that allocation while holding the eventfs_mutex must not call into
+filesystem reclaim. A lockdep splat uncovered this.
+
+Link: https://lkml.kernel.org/r/20231121231112.373501894@goodmis.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode")
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_i
+       if (!(dentry->d_inode->i_mode & S_IFDIR)) {
+               if (!ei->entry_attrs) {
+                       ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
+-                                                GFP_KERNEL);
++                                                GFP_NOFS);
+                       if (!ei->entry_attrs) {
+                               ret = -ENOMEM;
+                               goto out;
+@@ -627,7 +627,7 @@ static int add_dentries(struct dentry **
+ {
+       struct dentry **tmp;
+-      tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
++      tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
+       if (!tmp)
+               return -1;
+       tmp[cnt] = d;
diff --git a/queue-6.6/eventfs-use-kcalloc-instead-of-kzalloc.patch b/queue-6.6/eventfs-use-kcalloc-instead-of-kzalloc.patch
new file mode 100644 (file)
index 0000000..c5a0116
--- /dev/null
@@ -0,0 +1,68 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:47 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:49 -0500
+Subject: eventfs: Use kcalloc() instead of kzalloc()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Erick Archer <erick.archer@gmx.com>, "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Message-ID: <20240206120953.546131126@rostedt.homelinux.com>
+
+From: Erick Archer <erick.archer@gmx.com>
+
+commit 1057066009c4325bb1d8430c9274894d0860e7c3 upstream.
+
+As noted in the "Deprecated Interfaces, Language Features, Attributes,
+and Conventions" documentation [1], size calculations (especially
+multiplication) should not be performed in memory allocator (or similar)
+function arguments due to the risk of them overflowing. This could lead
+to values wrapping around and a smaller allocation being made than the
+caller was expecting. Using those allocations could lead to linear
+overflows of heap memory and other misbehaviors.
+
+So, use the purpose specific kcalloc() function instead of the argument
+size * count in the kzalloc() function.
+
+[1] https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240115181658.4562-1-erick.archer@gmx.com
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Link: https://github.com/KSPP/linux/issues/162
+Signed-off-by: Erick Archer <erick.archer@gmx.com>
+Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -97,7 +97,7 @@ static int eventfs_set_attr(struct mnt_i
+       /* Preallocate the children mode array if necessary */
+       if (!(dentry->d_inode->i_mode & S_IFDIR)) {
+               if (!ei->entry_attrs) {
+-                      ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
++                      ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs),
+                                                 GFP_NOFS);
+                       if (!ei->entry_attrs) {
+                               ret = -ENOMEM;
+@@ -874,7 +874,7 @@ struct eventfs_inode *eventfs_create_dir
+       }
+       if (size) {
+-              ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++              ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+               if (!ei->d_children) {
+                       kfree_const(ei->name);
+                       kfree(ei);
+@@ -941,7 +941,7 @@ struct eventfs_inode *eventfs_create_eve
+               goto fail;
+       if (size) {
+-              ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
++              ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
+               if (!ei->d_children)
+                       goto fail;
+       }
diff --git a/queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch b/queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
new file mode 100644 (file)
index 0000000..67a4923
--- /dev/null
@@ -0,0 +1,189 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:28 -0500
+Subject: eventfs: Use simple_recursive_removal() to clean up dentries
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Andrew Morton <akpm@linux-foundation.org>, Al Viro <viro@zeniv.linux.org.uk>
+Message-ID: <20240206120950.121281039@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 407c6726ca71b33330d2d6345d9ea7ebc02575e9 upstream.
+
+Looking at how dentry is removed via the tracefs system, I found that
+eventfs does not do everything that it did under tracefs. The tracefs
+removal of a dentry calls simple_recursive_removal() that does a lot more
+than a simple d_invalidate().
+
+As it should be a requirement that any eventfs_inode that has a dentry, so
+does its parent. When removing a eventfs_inode, if it has a dentry, a call
+to simple_recursive_removal() on that dentry should clean up all the
+dentries underneath it.
+
+Add WARN_ON_ONCE() to check for the parent having a dentry if any children
+do.
+
+Link: https://lore.kernel.org/all/20231101022553.GE1957730@ZenIV/
+Link: https://lkml.kernel.org/r/20231101172650.552471568@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   77 ++++++++++++++++++++++++++---------------------
+ fs/tracefs/internal.h    |    2 -
+ 2 files changed, 44 insertions(+), 35 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -967,30 +967,29 @@ static void unhook_dentry(struct dentry
+ {
+       if (!dentry)
+               return;
+-
+-      /* Keep the dentry from being freed yet (see eventfs_workfn()) */
++      /*
++       * Need to add a reference to the dentry that is expected by
++       * simple_recursive_removal(), which will include a dput().
++       */
+       dget(dentry);
+-      dentry->d_fsdata = NULL;
+-      d_invalidate(dentry);
+-      mutex_lock(&eventfs_mutex);
+-      /* dentry should now have at least a single reference */
+-      WARN_ONCE((int)d_count(dentry) < 1,
+-                "dentry %px (%s) less than one reference (%d) after invalidate\n",
+-                dentry, dentry->d_name.name, d_count(dentry));
+-      mutex_unlock(&eventfs_mutex);
++      /*
++       * Also add a reference for the dput() in eventfs_workfn().
++       * That is required as that dput() will free the ei after
++       * the SRCU grace period is over.
++       */
++      dget(dentry);
+ }
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
+- * @head: the list head to place the deleted @ei and children
+  * @level: prevent recursion from going more than 3 levels deep.
+  *
+  * This function recursively removes eventfs_inodes which
+  * contains info of files and/or directories.
+  */
+-static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level)
++static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
+ {
+       struct eventfs_inode *ei_child;
+@@ -1009,13 +1008,26 @@ static void eventfs_remove_rec(struct ev
+       /* search for nested folders or files */
+       list_for_each_entry_srcu(ei_child, &ei->children, list,
+                                lockdep_is_held(&eventfs_mutex)) {
+-              eventfs_remove_rec(ei_child, head, level + 1);
++              /* Children only have dentry if parent does */
++              WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
++              eventfs_remove_rec(ei_child, level + 1);
+       }
++
+       ei->is_freed = 1;
++      for (int i = 0; i < ei->nr_entries; i++) {
++              if (ei->d_children[i]) {
++                      /* Children only have dentry if parent does */
++                      WARN_ON_ONCE(!ei->dentry);
++                      unhook_dentry(ei->d_children[i]);
++              }
++      }
++
++      unhook_dentry(ei->dentry);
++
+       list_del_rcu(&ei->list);
+-      list_add_tail(&ei->del_list, head);
++      call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+ }
+ /**
+@@ -1026,30 +1038,22 @@ static void eventfs_remove_rec(struct ev
+  */
+ void eventfs_remove_dir(struct eventfs_inode *ei)
+ {
+-      struct eventfs_inode *tmp;
+-      LIST_HEAD(ei_del_list);
++      struct dentry *dentry;
+       if (!ei)
+               return;
+-      /*
+-       * Move the deleted eventfs_inodes onto the ei_del_list
+-       * which will also set the is_freed value. Note, this has to be
+-       * done under the eventfs_mutex, but the deletions of
+-       * the dentries must be done outside the eventfs_mutex.
+-       * Hence moving them to this temporary list.
+-       */
+       mutex_lock(&eventfs_mutex);
+-      eventfs_remove_rec(ei, &ei_del_list, 0);
++      dentry = ei->dentry;
++      eventfs_remove_rec(ei, 0);
+       mutex_unlock(&eventfs_mutex);
+-      list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) {
+-              for (int i = 0; i < ei->nr_entries; i++)
+-                      unhook_dentry(ei->d_children[i]);
+-              unhook_dentry(ei->dentry);
+-              list_del(&ei->del_list);
+-              call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+-      }
++      /*
++       * If any of the ei children has a dentry, then the ei itself
++       * must have a dentry.
++       */
++      if (dentry)
++              simple_recursive_removal(dentry, NULL);
+ }
+ /**
+@@ -1060,10 +1064,17 @@ void eventfs_remove_dir(struct eventfs_i
+  */
+ void eventfs_remove_events_dir(struct eventfs_inode *ei)
+ {
+-      struct dentry *dentry = ei->dentry;
++      struct dentry *dentry;
++      dentry = ei->dentry;
+       eventfs_remove_dir(ei);
+-      /* Matches the dget() from eventfs_create_events_dir() */
++      /*
++       * Matches the dget() done by tracefs_start_creating()
++       * in eventfs_create_events_dir() when it the dentry was
++       * created. In other words, it's a normal dentry that
++       * sticks around while the other ei->dentry are created
++       * and destroyed dynamically.
++       */
+       dput(dentry);
+ }
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -55,12 +55,10 @@ struct eventfs_inode {
+       /*
+        * Union - used for deletion
+        * @llist:      for calling dput() if needed after RCU
+-       * @del_list:   list of eventfs_inode to delete
+        * @rcu:        eventfs_inode to delete in RCU
+        */
+       union {
+               struct llist_node       llist;
+-              struct list_head        del_list;
+               struct rcu_head         rcu;
+       };
+       unsigned int                    is_freed:1;
diff --git a/queue-6.6/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch b/queue-6.6/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch
new file mode 100644 (file)
index 0000000..afd7c5d
--- /dev/null
@@ -0,0 +1,87 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:01 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:59 -0500
+Subject: eventfs: Warn if an eventfs_inode is freed without is_freed being set
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120955.173872948@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 5a49f996046ba947466bc7461e4b19c4d1daf978 upstream.
+
+There should never be a case where an evenfs_inode is being freed without
+is_freed being set. Add a WARN_ON_ONCE() if it ever happens. That would
+mean there was one too many put_ei()s.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240201161616.843551963@goodmis.org
+
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -73,6 +73,9 @@ enum {
+ static void release_ei(struct kref *ref)
+ {
+       struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
++
++      WARN_ON_ONCE(!ei->is_freed);
++
+       kfree(ei->entry_attrs);
+       kfree_const(ei->name);
+       kfree_rcu(ei, rcu);
+@@ -84,6 +87,14 @@ static inline void put_ei(struct eventfs
+               kref_put(&ei->kref, release_ei);
+ }
++static inline void free_ei(struct eventfs_inode *ei)
++{
++      if (ei) {
++              ei->is_freed = 1;
++              put_ei(ei);
++      }
++}
++
+ static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
+ {
+       if (ei)
+@@ -679,7 +690,7 @@ struct eventfs_inode *eventfs_create_dir
+       /* Was the parent freed? */
+       if (list_empty(&ei->list)) {
+-              put_ei(ei);
++              free_ei(ei);
+               ei = NULL;
+       }
+       return ei;
+@@ -770,7 +781,7 @@ struct eventfs_inode *eventfs_create_eve
+       return ei;
+  fail:
+-      put_ei(ei);
++      free_ei(ei);
+       tracefs_failed_creating(dentry);
+       return ERR_PTR(-ENOMEM);
+ }
+@@ -801,9 +812,8 @@ static void eventfs_remove_rec(struct ev
+       list_for_each_entry(ei_child, &ei->children, list)
+               eventfs_remove_rec(ei_child, level + 1);
+-      ei->is_freed = 1;
+       list_del(&ei->list);
+-      put_ei(ei);
++      free_ei(ei);
+ }
+ /**
diff --git a/queue-6.6/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch b/queue-6.6/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
new file mode 100644 (file)
index 0000000..eeb1843
--- /dev/null
@@ -0,0 +1,92 @@
+From 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 upstream.
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock.  This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock.  However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock.  Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock.  One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb().  These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation().  The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked.  i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test.  It is still tested
+*after* vfs_setlease() has succeeded.  This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4944,10 +4944,8 @@ nfsd_break_deleg_cb(struct file_lock *fl
+        */
+       fl->fl_break_time = 0;
+-      spin_lock(&fp->fi_lock);
+       fp->fi_had_conflict = true;
+       nfsd_break_one_deleg(dp);
+-      spin_unlock(&fp->fi_lock);
+       return false;
+ }
+@@ -5556,12 +5554,13 @@ nfs4_set_delegation(struct nfsd4_open *o
+       if (status)
+               goto out_unlock;
++      status = -EAGAIN;
++      if (fp->fi_had_conflict)
++              goto out_unlock;
++
+       spin_lock(&state_lock);
+       spin_lock(&fp->fi_lock);
+-      if (fp->fi_had_conflict)
+-              status = -EAGAIN;
+-      else
+-              status = hash_delegation_locked(dp, fp);
++      status = hash_delegation_locked(dp, fp);
+       spin_unlock(&fp->fi_lock);
+       spin_unlock(&state_lock);
diff --git a/queue-6.6/revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch b/queue-6.6/revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch
new file mode 100644 (file)
index 0000000..751c4d5
--- /dev/null
@@ -0,0 +1,42 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:07 -0500
+Subject: Revert "eventfs: Check for NULL ef in eventfs_set_attr()"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.700644630@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit d8f492a059728bbd397defbc9b8d2f4159d869b5.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -113,14 +113,14 @@ static int eventfs_set_attr(struct mnt_i
+       mutex_lock(&eventfs_mutex);
+       ef = dentry->d_fsdata;
+-      if (ef && ef->is_freed) {
++      if (ef->is_freed) {
+               /* Do not allow changes if the event is about to be removed. */
+               mutex_unlock(&eventfs_mutex);
+               return -ENODEV;
+       }
+       ret = simple_setattr(idmap, dentry, iattr);
+-      if (!ret && ef)
++      if (!ret)
+               update_attr(ef, iattr);
+       mutex_unlock(&eventfs_mutex);
+       return ret;
diff --git a/queue-6.6/revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch b/queue-6.6/revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
new file mode 100644 (file)
index 0000000..19840b4
--- /dev/null
@@ -0,0 +1,251 @@
+From stable+bounces-18936-greg=kroah.com@vger.kernel.org Tue Feb  6 13:14:29 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:09 -0500
+Subject: Revert "eventfs: Delete eventfs_inode when the last dentry is freed"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.030353224@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit ea4c30a0a73fb5cb2604539db550f1e620bb949c.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  150 +++++++++++++++++++++++------------------------
+ 1 file changed, 76 insertions(+), 74 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -53,12 +53,10 @@ struct eventfs_file {
+       const struct inode_operations   *iop;
+       /*
+        * Union - used for deletion
+-       * @llist:      for calling dput() if needed after RCU
+        * @del_list:   list of eventfs_file to delete
+        * @rcu:        eventfs_file to delete in RCU
+        */
+       union {
+-              struct llist_node       llist;
+               struct list_head        del_list;
+               struct rcu_head         rcu;
+       };
+@@ -115,7 +113,8 @@ static int eventfs_set_attr(struct mnt_i
+       mutex_lock(&eventfs_mutex);
+       ef = dentry->d_fsdata;
+-      if (ef->is_freed) {
++      /* The LSB is set when the eventfs_inode is being freed */
++      if (((unsigned long)ef & 1UL) || ef->is_freed) {
+               /* Do not allow changes if the event is about to be removed. */
+               mutex_unlock(&eventfs_mutex);
+               return -ENODEV;
+@@ -259,13 +258,6 @@ static struct dentry *create_dir(struct
+       return eventfs_end_creating(dentry);
+ }
+-static void free_ef(struct eventfs_file *ef)
+-{
+-      kfree(ef->name);
+-      kfree(ef->ei);
+-      kfree(ef);
+-}
+-
+ /**
+  * eventfs_set_ef_status_free - set the ef->status to free
+  * @ti: the tracefs_inode of the dentry
+@@ -278,20 +270,34 @@ void eventfs_set_ef_status_free(struct t
+ {
+       struct tracefs_inode *ti_parent;
+       struct eventfs_inode *ei;
+-      struct eventfs_file *ef;
++      struct eventfs_file *ef, *tmp;
+       /* The top level events directory may be freed by this */
+       if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
++              LIST_HEAD(ef_del_list);
++
+               mutex_lock(&eventfs_mutex);
++
+               ei = ti->private;
++              /* Record all the top level files */
++              list_for_each_entry_srcu(ef, &ei->e_top_files, list,
++                                       lockdep_is_held(&eventfs_mutex)) {
++                      list_add_tail(&ef->del_list, &ef_del_list);
++              }
++
+               /* Nothing should access this, but just in case! */
+               ti->private = NULL;
++
+               mutex_unlock(&eventfs_mutex);
+-              ef = dentry->d_fsdata;
+-              if (ef)
+-                      free_ef(ef);
++              /* Now safely free the top level files and their children */
++              list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++                      list_del(&ef->del_list);
++                      eventfs_remove(ef);
++              }
++
++              kfree(ei);
+               return;
+       }
+@@ -305,13 +311,16 @@ void eventfs_set_ef_status_free(struct t
+       if (!ef)
+               goto out;
+-      if (ef->is_freed) {
+-              free_ef(ef);
+-      } else {
+-              ef->dentry = NULL;
+-      }
++      /*
++       * If ef was freed, then the LSB bit is set for d_fsdata.
++       * But this should not happen, as it should still have a
++       * ref count that prevents it. Warn in case it does.
++       */
++      if (WARN_ON_ONCE((unsigned long)ef & 1))
++              goto out;
+       dentry->d_fsdata = NULL;
++      ef->dentry = NULL;
+ out:
+       mutex_unlock(&eventfs_mutex);
+ }
+@@ -838,53 +847,13 @@ int eventfs_add_file(const char *name, u
+       return 0;
+ }
+-static LLIST_HEAD(free_list);
+-
+-static void eventfs_workfn(struct work_struct *work)
+-{
+-        struct eventfs_file *ef, *tmp;
+-        struct llist_node *llnode;
+-
+-      llnode = llist_del_all(&free_list);
+-        llist_for_each_entry_safe(ef, tmp, llnode, llist) {
+-              /* This should only get here if it had a dentry */
+-              if (!WARN_ON_ONCE(!ef->dentry))
+-                      dput(ef->dentry);
+-        }
+-}
+-
+-static DECLARE_WORK(eventfs_work, eventfs_workfn);
+-
+-static void free_rcu_ef(struct rcu_head *head)
++static void free_ef(struct rcu_head *head)
+ {
+       struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+-      if (ef->dentry) {
+-              /* Do not free the ef until all references of dentry are gone */
+-              if (llist_add(&ef->llist, &free_list))
+-                      queue_work(system_unbound_wq, &eventfs_work);
+-              return;
+-      }
+-
+-      free_ef(ef);
+-}
+-
+-static void unhook_dentry(struct dentry *dentry)
+-{
+-      if (!dentry)
+-              return;
+-
+-      /* Keep the dentry from being freed yet (see eventfs_workfn()) */
+-      dget(dentry);
+-
+-      dentry->d_fsdata = NULL;
+-      d_invalidate(dentry);
+-      mutex_lock(&eventfs_mutex);
+-      /* dentry should now have at least a single reference */
+-      WARN_ONCE((int)d_count(dentry) < 1,
+-                "dentry %px (%s) less than one reference (%d) after invalidate\n",
+-                dentry, dentry->d_name.name, d_count(dentry));
+-      mutex_unlock(&eventfs_mutex);
++      kfree(ef->name);
++      kfree(ef->ei);
++      kfree(ef);
+ }
+ /**
+@@ -936,25 +905,58 @@ void eventfs_remove(struct eventfs_file
+ {
+       struct eventfs_file *tmp;
+       LIST_HEAD(ef_del_list);
++      struct dentry *dentry_list = NULL;
++      struct dentry *dentry;
+       if (!ef)
+               return;
+-      /*
+-       * Move the deleted eventfs_inodes onto the ei_del_list
+-       * which will also set the is_freed value. Note, this has to be
+-       * done under the eventfs_mutex, but the deletions of
+-       * the dentries must be done outside the eventfs_mutex.
+-       * Hence moving them to this temporary list.
+-       */
+       mutex_lock(&eventfs_mutex);
+       eventfs_remove_rec(ef, &ef_del_list, 0);
++      list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++              if (ef->dentry) {
++                      unsigned long ptr = (unsigned long)dentry_list;
++
++                      /* Keep the dentry from being freed yet */
++                      dget(ef->dentry);
++
++                      /*
++                       * Paranoid: The dget() above should prevent the dentry
++                       * from being freed and calling eventfs_set_ef_status_free().
++                       * But just in case, set the link list LSB pointer to 1
++                       * and have eventfs_set_ef_status_free() check that to
++                       * make sure that if it does happen, it will not think
++                       * the d_fsdata is an event_file.
++                       *
++                       * For this to work, no event_file should be allocated
++                       * on a odd space, as the ef should always be allocated
++                       * to be at least word aligned. Check for that too.
++                       */
++                      WARN_ON_ONCE(ptr & 1);
++
++                      ef->dentry->d_fsdata = (void *)(ptr | 1);
++                      dentry_list = ef->dentry;
++                      ef->dentry = NULL;
++              }
++              call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
++      }
+       mutex_unlock(&eventfs_mutex);
+-      list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+-              unhook_dentry(ef->dentry);
+-              list_del(&ef->del_list);
+-              call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++      while (dentry_list) {
++              unsigned long ptr;
++
++              dentry = dentry_list;
++              ptr = (unsigned long)dentry->d_fsdata & ~1UL;
++              dentry_list = (struct dentry *)ptr;
++              dentry->d_fsdata = NULL;
++              d_invalidate(dentry);
++              mutex_lock(&eventfs_mutex);
++              /* dentry should now have at least a single reference */
++              WARN_ONCE((int)d_count(dentry) < 1,
++                        "dentry %p less than one reference (%d) after invalidate\n",
++                        dentry, d_count(dentry));
++              mutex_unlock(&eventfs_mutex);
++              dput(dentry);
+       }
+ }
diff --git a/queue-6.6/revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch b/queue-6.6/revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
new file mode 100644 (file)
index 0000000..1332094
--- /dev/null
@@ -0,0 +1,50 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:06 -0500
+Subject: Revert "eventfs: Do not allow NULL parent to eventfs_start_creating()"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.536298441@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 6abb8c223ce12078a0f2c129656a13338dfe960b.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -509,15 +509,20 @@ struct dentry *eventfs_start_creating(co
+       struct dentry *dentry;
+       int error;
+-      /* Must always have a parent. */
+-      if (WARN_ON_ONCE(!parent))
+-              return ERR_PTR(-EINVAL);
+-
+       error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+                             &tracefs_mount_count);
+       if (error)
+               return ERR_PTR(error);
++      /*
++       * If the parent is not specified, we create it in the root.
++       * We need the root dentry to do this, which is in the super
++       * block. A pointer to that is in the struct vfsmount that we
++       * have around.
++       */
++      if (!parent)
++              parent = tracefs_mount->mnt_root;
++
+       if (unlikely(IS_DEADDIR(parent->d_inode)))
+               dentry = ERR_PTR(-ENOENT);
+       else
diff --git a/queue-6.6/revert-eventfs-remove-is_freed-union-with-rcu-head.patch b/queue-6.6/revert-eventfs-remove-is_freed-union-with-rcu-head.patch
new file mode 100644 (file)
index 0000000..4600331
--- /dev/null
@@ -0,0 +1,60 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:11 -0500
+Subject: Revert "eventfs: Remove "is_freed" union with rcu head"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.351905829@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit fa18a8a0539b02cc621938091691f0b73f0b1288.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -38,7 +38,6 @@ struct eventfs_inode {
+  * @fop:      file_operations for file or directory
+  * @iop:      inode_operations for file or directory
+  * @data:     something that the caller will want to get to later on
+- * @is_freed: Flag set if the eventfs is on its way to be freed
+  * @mode:     the permission that the file or directory should have
+  */
+ struct eventfs_file {
+@@ -53,14 +52,15 @@ struct eventfs_file {
+        * Union - used for deletion
+        * @del_list:   list of eventfs_file to delete
+        * @rcu:        eventfs_file to delete in RCU
++       * @is_freed:   node is freed if one of the above is set
+        */
+       union {
+               struct list_head        del_list;
+               struct rcu_head         rcu;
++              unsigned long           is_freed;
+       };
+       void                            *data;
+-      unsigned int                    is_freed:1;
+-      unsigned int                    mode:31;
++      umode_t                         mode;
+ };
+ static DEFINE_MUTEX(eventfs_mutex);
+@@ -814,8 +814,6 @@ static void eventfs_remove_rec(struct ev
+               }
+       }
+-      ef->is_freed = 1;
+-
+       list_del_rcu(&ef->list);
+       list_add_tail(&ef->del_list, head);
+ }
diff --git a/queue-6.6/revert-eventfs-save-ownership-and-mode.patch b/queue-6.6/revert-eventfs-save-ownership-and-mode.patch
new file mode 100644 (file)
index 0000000..2e8aa8e
--- /dev/null
@@ -0,0 +1,247 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:10 -0500
+Subject: Revert "eventfs: Save ownership and mode"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120947.186364236@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 9aaee3eebc91dd9ccebf6b6bc8a5f59d04ef718b.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  107 +++++++----------------------------------------
+ 1 file changed, 16 insertions(+), 91 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -40,8 +40,6 @@ struct eventfs_inode {
+  * @data:     something that the caller will want to get to later on
+  * @is_freed: Flag set if the eventfs is on its way to be freed
+  * @mode:     the permission that the file or directory should have
+- * @uid:      saved uid if changed
+- * @gid:      saved gid if changed
+  */
+ struct eventfs_file {
+       const char                      *name;
+@@ -63,22 +61,11 @@ struct eventfs_file {
+       void                            *data;
+       unsigned int                    is_freed:1;
+       unsigned int                    mode:31;
+-      kuid_t                          uid;
+-      kgid_t                          gid;
+ };
+ static DEFINE_MUTEX(eventfs_mutex);
+ DEFINE_STATIC_SRCU(eventfs_srcu);
+-/* Mode is unsigned short, use the upper bits for flags */
+-enum {
+-      EVENTFS_SAVE_MODE       = BIT(16),
+-      EVENTFS_SAVE_UID        = BIT(17),
+-      EVENTFS_SAVE_GID        = BIT(18),
+-};
+-
+-#define EVENTFS_MODE_MASK     (EVENTFS_SAVE_MODE - 1)
+-
+ static struct dentry *eventfs_root_lookup(struct inode *dir,
+                                         struct dentry *dentry,
+                                         unsigned int flags);
+@@ -86,54 +73,8 @@ static int dcache_dir_open_wrapper(struc
+ static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
+ static int eventfs_release(struct inode *inode, struct file *file);
+-static void update_attr(struct eventfs_file *ef, struct iattr *iattr)
+-{
+-      unsigned int ia_valid = iattr->ia_valid;
+-
+-      if (ia_valid & ATTR_MODE) {
+-              ef->mode = (ef->mode & ~EVENTFS_MODE_MASK) |
+-                      (iattr->ia_mode & EVENTFS_MODE_MASK) |
+-                      EVENTFS_SAVE_MODE;
+-      }
+-      if (ia_valid & ATTR_UID) {
+-              ef->mode |= EVENTFS_SAVE_UID;
+-              ef->uid = iattr->ia_uid;
+-      }
+-      if (ia_valid & ATTR_GID) {
+-              ef->mode |= EVENTFS_SAVE_GID;
+-              ef->gid = iattr->ia_gid;
+-      }
+-}
+-
+-static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
+-                           struct iattr *iattr)
+-{
+-      struct eventfs_file *ef;
+-      int ret;
+-
+-      mutex_lock(&eventfs_mutex);
+-      ef = dentry->d_fsdata;
+-      /* The LSB is set when the eventfs_inode is being freed */
+-      if (((unsigned long)ef & 1UL) || ef->is_freed) {
+-              /* Do not allow changes if the event is about to be removed. */
+-              mutex_unlock(&eventfs_mutex);
+-              return -ENODEV;
+-      }
+-
+-      ret = simple_setattr(idmap, dentry, iattr);
+-      if (!ret)
+-              update_attr(ef, iattr);
+-      mutex_unlock(&eventfs_mutex);
+-      return ret;
+-}
+-
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+       .lookup         = eventfs_root_lookup,
+-      .setattr        = eventfs_set_attr,
+-};
+-
+-static const struct inode_operations eventfs_file_inode_operations = {
+-      .setattr        = eventfs_set_attr,
+ };
+ static const struct file_operations eventfs_file_operations = {
+@@ -144,20 +85,10 @@ static const struct file_operations even
+       .release        = eventfs_release,
+ };
+-static void update_inode_attr(struct inode *inode, struct eventfs_file *ef)
+-{
+-      inode->i_mode = ef->mode & EVENTFS_MODE_MASK;
+-
+-      if (ef->mode & EVENTFS_SAVE_UID)
+-              inode->i_uid = ef->uid;
+-
+-      if (ef->mode & EVENTFS_SAVE_GID)
+-              inode->i_gid = ef->gid;
+-}
+-
+ /**
+  * create_file - create a file in the tracefs filesystem
+- * @ef: the eventfs_file
++ * @name: the name of the file to create.
++ * @mode: the permission that the file should have.
+  * @parent: parent dentry for this file.
+  * @data: something that the caller will want to get to later on.
+  * @fop: struct file_operations that should be used for this file.
+@@ -173,7 +104,7 @@ static void update_inode_attr(struct ino
+  * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+  * returned.
+  */
+-static struct dentry *create_file(struct eventfs_file *ef,
++static struct dentry *create_file(const char *name, umode_t mode,
+                                 struct dentry *parent, void *data,
+                                 const struct file_operations *fop)
+ {
+@@ -181,13 +112,13 @@ static struct dentry *create_file(struct
+       struct dentry *dentry;
+       struct inode *inode;
+-      if (!(ef->mode & S_IFMT))
+-              ef->mode |= S_IFREG;
++      if (!(mode & S_IFMT))
++              mode |= S_IFREG;
+-      if (WARN_ON_ONCE(!S_ISREG(ef->mode)))
++      if (WARN_ON_ONCE(!S_ISREG(mode)))
+               return NULL;
+-      dentry = eventfs_start_creating(ef->name, parent);
++      dentry = eventfs_start_creating(name, parent);
+       if (IS_ERR(dentry))
+               return dentry;
+@@ -196,10 +127,7 @@ static struct dentry *create_file(struct
+       if (unlikely(!inode))
+               return eventfs_failed_creating(dentry);
+-      /* If the user updated the directory's attributes, use them */
+-      update_inode_attr(inode, ef);
+-
+-      inode->i_op = &eventfs_file_inode_operations;
++      inode->i_mode = mode;
+       inode->i_fop = fop;
+       inode->i_private = data;
+@@ -212,7 +140,7 @@ static struct dentry *create_file(struct
+ /**
+  * create_dir - create a dir in the tracefs filesystem
+- * @ei: the eventfs_inode that represents the directory to create
++ * @name: the name of the file to create.
+  * @parent: parent dentry for this file.
+  * @data: something that the caller will want to get to later on.
+  *
+@@ -227,14 +155,13 @@ static struct dentry *create_file(struct
+  * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+  * returned.
+  */
+-static struct dentry *create_dir(struct eventfs_file *ef,
+-                               struct dentry *parent, void *data)
++static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
+ {
+       struct tracefs_inode *ti;
+       struct dentry *dentry;
+       struct inode *inode;
+-      dentry = eventfs_start_creating(ef->name, parent);
++      dentry = eventfs_start_creating(name, parent);
+       if (IS_ERR(dentry))
+               return dentry;
+@@ -242,8 +169,7 @@ static struct dentry *create_dir(struct
+       if (unlikely(!inode))
+               return eventfs_failed_creating(dentry);
+-      update_inode_attr(inode, ef);
+-
++      inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+       inode->i_op = &eventfs_root_dir_inode_operations;
+       inode->i_fop = &eventfs_file_operations;
+       inode->i_private = data;
+@@ -380,9 +306,10 @@ create_dentry(struct eventfs_file *ef, s
+               inode_lock(parent->d_inode);
+       if (ef->ei)
+-              dentry = create_dir(ef, parent, ef->data);
++              dentry = create_dir(ef->name, parent, ef->data);
+       else
+-              dentry = create_file(ef, parent, ef->data, ef->fop);
++              dentry = create_file(ef->name, ef->mode, parent,
++                                   ef->data, ef->fop);
+       if (!lookup)
+               inode_unlock(parent->d_inode);
+@@ -548,7 +475,6 @@ static int dcache_dir_open_wrapper(struc
+               if (d) {
+                       struct dentry **tmp;
+-
+                       tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+                       if (!tmp)
+                               break;
+@@ -623,14 +549,13 @@ static struct eventfs_file *eventfs_prep
+                       return ERR_PTR(-ENOMEM);
+               }
+               INIT_LIST_HEAD(&ef->ei->e_top_files);
+-              ef->mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+       } else {
+               ef->ei = NULL;
+-              ef->mode = mode;
+       }
+       ef->iop = iop;
+       ef->fop = fop;
++      ef->mode = mode;
+       ef->data = data;
+       return ef;
+ }
diff --git a/queue-6.6/revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch b/queue-6.6/revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
new file mode 100644 (file)
index 0000000..c703f73
--- /dev/null
@@ -0,0 +1,179 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:19 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:08 -0500
+Subject: Revert "eventfs: Use simple_recursive_removal() to clean up dentries"
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120946.866568635@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+This reverts commit 055907ad2c14838c90d63297f7bab8d180a5d844.
+
+The eventfs was not designed properly and may have some hidden bugs in it.
+Linus rewrote it properly and I trust his version more than this one. Revert
+the backported patches for 6.6 and re-apply all the changes to make it
+equivalent to Linus's version.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   71 +++++++++++++++++++++++++----------------------
+ 1 file changed, 38 insertions(+), 33 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -54,10 +54,12 @@ struct eventfs_file {
+       /*
+        * Union - used for deletion
+        * @llist:      for calling dput() if needed after RCU
++       * @del_list:   list of eventfs_file to delete
+        * @rcu:        eventfs_file to delete in RCU
+        */
+       union {
+               struct llist_node       llist;
++              struct list_head        del_list;
+               struct rcu_head         rcu;
+       };
+       void                            *data;
+@@ -274,6 +276,7 @@ static void free_ef(struct eventfs_file
+  */
+ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+ {
++      struct tracefs_inode *ti_parent;
+       struct eventfs_inode *ei;
+       struct eventfs_file *ef;
+@@ -294,6 +297,10 @@ void eventfs_set_ef_status_free(struct t
+       mutex_lock(&eventfs_mutex);
++      ti_parent = get_tracefs(dentry->d_parent->d_inode);
++      if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
++              goto out;
++
+       ef = dentry->d_fsdata;
+       if (!ef)
+               goto out;
+@@ -866,29 +873,30 @@ static void unhook_dentry(struct dentry
+ {
+       if (!dentry)
+               return;
+-      /*
+-       * Need to add a reference to the dentry that is expected by
+-       * simple_recursive_removal(), which will include a dput().
+-       */
+-      dget(dentry);
+-      /*
+-       * Also add a reference for the dput() in eventfs_workfn().
+-       * That is required as that dput() will free the ei after
+-       * the SRCU grace period is over.
+-       */
++      /* Keep the dentry from being freed yet (see eventfs_workfn()) */
+       dget(dentry);
++
++      dentry->d_fsdata = NULL;
++      d_invalidate(dentry);
++      mutex_lock(&eventfs_mutex);
++      /* dentry should now have at least a single reference */
++      WARN_ONCE((int)d_count(dentry) < 1,
++                "dentry %px (%s) less than one reference (%d) after invalidate\n",
++                dentry, dentry->d_name.name, d_count(dentry));
++      mutex_unlock(&eventfs_mutex);
+ }
+ /**
+  * eventfs_remove_rec - remove eventfs dir or file from list
+  * @ef: eventfs_file to be removed.
++ * @head: to create list of eventfs_file to be deleted
+  * @level: to check recursion depth
+  *
+  * The helper function eventfs_remove_rec() is used to clean up and free the
+  * associated data from eventfs for both of the added functions.
+  */
+-static void eventfs_remove_rec(struct eventfs_file *ef, int level)
++static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
+ {
+       struct eventfs_file *ef_child;
+@@ -908,16 +916,14 @@ static void eventfs_remove_rec(struct ev
+               /* search for nested folders or files */
+               list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+                                        lockdep_is_held(&eventfs_mutex)) {
+-                      eventfs_remove_rec(ef_child, level + 1);
++                      eventfs_remove_rec(ef_child, head, level + 1);
+               }
+       }
+       ef->is_freed = 1;
+-      unhook_dentry(ef->dentry);
+-
+       list_del_rcu(&ef->list);
+-      call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++      list_add_tail(&ef->del_list, head);
+ }
+ /**
+@@ -928,22 +934,28 @@ static void eventfs_remove_rec(struct ev
+  */
+ void eventfs_remove(struct eventfs_file *ef)
+ {
+-      struct dentry *dentry;
++      struct eventfs_file *tmp;
++      LIST_HEAD(ef_del_list);
+       if (!ef)
+               return;
++      /*
++       * Move the deleted eventfs_inodes onto the ei_del_list
++       * which will also set the is_freed value. Note, this has to be
++       * done under the eventfs_mutex, but the deletions of
++       * the dentries must be done outside the eventfs_mutex.
++       * Hence moving them to this temporary list.
++       */
+       mutex_lock(&eventfs_mutex);
+-      dentry = ef->dentry;
+-      eventfs_remove_rec(ef, 0);
++      eventfs_remove_rec(ef, &ef_del_list, 0);
+       mutex_unlock(&eventfs_mutex);
+-      /*
+-       * If any of the ei children has a dentry, then the ei itself
+-       * must have a dentry.
+-       */
+-      if (dentry)
+-              simple_recursive_removal(dentry, NULL);
++      list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
++              unhook_dentry(ef->dentry);
++              list_del(&ef->del_list);
++              call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
++      }
+ }
+ /**
+@@ -954,8 +966,6 @@ void eventfs_remove(struct eventfs_file
+  */
+ void eventfs_remove_events_dir(struct dentry *dentry)
+ {
+-      struct eventfs_file *ef_child;
+-      struct eventfs_inode *ei;
+       struct tracefs_inode *ti;
+       if (!dentry || !dentry->d_inode)
+@@ -965,11 +975,6 @@ void eventfs_remove_events_dir(struct de
+       if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+               return;
+-      mutex_lock(&eventfs_mutex);
+-      ei = ti->private;
+-      list_for_each_entry_srcu(ef_child, &ei->e_top_files, list,
+-                               lockdep_is_held(&eventfs_mutex)) {
+-              eventfs_remove_rec(ef_child, 0);
+-      }
+-      mutex_unlock(&eventfs_mutex);
++      d_invalidate(dentry);
++      dput(dentry);
+ }
index 11d748cea956da49260fc187a5e42e07dabc0c26..0c10abcf03279d227535679e6adab95192e96d7f 100644 (file)
@@ -255,3 +255,61 @@ block-fix-partial-zone-append-completion-handling-in.patch
 netfilter-ipset-fix-performance-regression-in-swap-operation.patch
 netfilter-ipset-missing-gc-cancellations-fixed.patch
 parisc-fix-random-data-corruption-from-exception-handler.patch
+revert-eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
+revert-eventfs-check-for-null-ef-in-eventfs_set_attr.patch
+revert-eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
+revert-eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
+revert-eventfs-save-ownership-and-mode.patch
+revert-eventfs-remove-is_freed-union-with-rcu-head.patch
+eventfs-remove-eventfs_file-and-just-use-eventfs_inode.patch
+eventfs-use-eventfs_remove_events_dir.patch
+eventfs-use-err_cast-in-eventfs_create_events_dir.patch
+eventfs-fix-failure-path-in-eventfs_create_events_dir.patch
+tracefs-eventfs-modify-mismatched-function-name.patch
+eventfs-fix-warn_on-in-create_file_dentry.patch
+eventfs-fix-typo-in-eventfs_inode-union-comment.patch
+eventfs-remove-extra-dget-in-eventfs_create_events_dir.patch
+eventfs-fix-kerneldoc-of-eventfs_remove_rec.patch
+eventfs-remove-is_freed-union-with-rcu-head.patch
+eventfs-have-a-free_ei-that-just-frees-the-eventfs_inode.patch
+eventfs-test-for-ei-is_freed-when-accessing-ei-dentry.patch
+eventfs-save-ownership-and-mode.patch
+eventfs-hold-eventfs_mutex-when-calling-callback-functions.patch
+eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch
+eventfs-remove-special-processing-of-dput-of-events-directory.patch
+eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch
+eventfs-remove-expectation-that-ei-is_freed-means-ei-dentry-null.patch
+eventfs-do-not-invalidate-dentry-in-create_file-dir_dentry.patch
+eventfs-use-gfp_nofs-for-allocation-when-eventfs_mutex-is-held.patch
+eventfs-move-taking-of-inode_lock-into-dcache_dir_open_wrapper.patch
+eventfs-do-not-allow-null-parent-to-eventfs_start_creating.patch
+eventfs-make-sure-that-parent-d_inode-is-locked-in-creating-files-dirs.patch
+eventfs-fix-events-beyond-name_max-blocking-tasks.patch
+eventfs-have-event-files-and-directories-default-to-parent-uid-and-gid.patch
+eventfs-fix-file-and-directory-uid-and-gid-ownership.patch
+tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch
+eventfs-fix-bitwise-fields-for-is_events.patch
+eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch
+eventfs-stop-using-dcache_readdir-for-getdents.patch
+tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch
+eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch
+eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch
+eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch
+eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch
+eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch
+eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch
+eventfs-use-kcalloc-instead-of-kzalloc.patch
+eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch
+tracefs-remove-stale-update_gid-code.patch
+tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch
+eventfs-initialize-the-tracefs-inode-properly.patch
+tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch
+tracefs-dentry-lookup-crapectomy.patch
+eventfs-remove-unused-d_parent-pointer-field.patch
+eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch
+eventfs-get-rid-of-dentry-pointers-without-refcounts.patch
+eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch
+eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch
+eventfs-remove-fsnotify-functions-from-lookup.patch
+eventfs-keep-all-directory-links-at-1.patch
+nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
diff --git a/queue-6.6/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch b/queue-6.6/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch
new file mode 100644 (file)
index 0000000..786494d
--- /dev/null
@@ -0,0 +1,121 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:29 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:54 -0500
+Subject: tracefs: Avoid using the ei->dentry pointer unnecessarily
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.362950692@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 99c001cb617df409dac275a059d6c3f187a2da7a upstream.
+
+The eventfs_find_events() code tries to walk up the tree to find the
+event directory that a dentry belongs to, in order to then find the
+eventfs inode that is associated with that event directory.
+
+However, it uses an odd combination of walking the dentry parent,
+looking up the eventfs inode associated with that, and then looking up
+the dentry from there.  Repeat.
+
+But the code shouldn't have back-pointers to dentries in the first
+place, and it should just walk the dentry parenthood chain directly.
+
+Similarly, 'set_top_events_ownership()' looks up the dentry from the
+eventfs inode, but the only reason it wants a dentry is to look up the
+superblock in order to look up the root dentry.
+
+But it already has the real filesystem inode, which has that same
+superblock pointer.  So just pass in the superblock pointer using the
+information that's already there, instead of looking up extraneous data
+that is irrelevant.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.638645365@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   26 ++++++++++++--------------
+ 1 file changed, 12 insertions(+), 14 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -156,33 +156,30 @@ static int eventfs_set_attr(struct mnt_i
+       return ret;
+ }
+-static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
++static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
+ {
+-      struct inode *inode;
++      struct inode *root;
+       /* Only update if the "events" was on the top level */
+       if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+               return;
+       /* Get the tracefs root inode. */
+-      inode = d_inode(dentry->d_sb->s_root);
+-      ei->attr.uid = inode->i_uid;
+-      ei->attr.gid = inode->i_gid;
++      root = d_inode(sb->s_root);
++      ei->attr.uid = root->i_uid;
++      ei->attr.gid = root->i_gid;
+ }
+ static void set_top_events_ownership(struct inode *inode)
+ {
+       struct tracefs_inode *ti = get_tracefs(inode);
+       struct eventfs_inode *ei = ti->private;
+-      struct dentry *dentry;
+       /* The top events directory doesn't get automatically updated */
+       if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+               return;
+-      dentry = ei->dentry;
+-
+-      update_top_events_attr(ei, dentry);
++      update_top_events_attr(ei, inode->i_sb);
+       if (!(ei->attr.mode & EVENTFS_SAVE_UID))
+               inode->i_uid = ei->attr.uid;
+@@ -235,8 +232,10 @@ static struct eventfs_inode *eventfs_fin
+       mutex_lock(&eventfs_mutex);
+       do {
+-              /* The parent always has an ei, except for events itself */
+-              ei = dentry->d_parent->d_fsdata;
++              // The parent is stable because we do not do renames
++              dentry = dentry->d_parent;
++              // ... and directories always have d_fsdata
++              ei = dentry->d_fsdata;
+               /*
+                * If the ei is being freed, the ownership of the children
+@@ -246,12 +245,11 @@ static struct eventfs_inode *eventfs_fin
+                       ei = NULL;
+                       break;
+               }
+-
+-              dentry = ei->dentry;
++              // Walk upwards until you find the events inode
+       } while (!ei->is_events);
+       mutex_unlock(&eventfs_mutex);
+-      update_top_events_attr(ei, dentry);
++      update_top_events_attr(ei, dentry->d_sb);
+       return ei;
+ }
diff --git a/queue-6.6/tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch b/queue-6.6/tracefs-check-for-dentry-d_inode-exists-in-set_gid.patch
new file mode 100644 (file)
index 0000000..195e64e
--- /dev/null
@@ -0,0 +1,83 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:50 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:38 -0500
+Subject: tracefs: Check for dentry->d_inode exists in set_gid()
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, "Ubisectech Sirius" <bugreport@ubisectech.com>
+Message-ID: <20240206120951.738927603@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit ad579864637af46447208254719943179b69d41a upstream.
+
+If a getdents() is called on the tracefs directory but does not get all
+the files, it can leave a "cursor" dentry in the d_subdirs list of tracefs
+dentry. This cursor dentry does not have a d_inode for it. Before
+referencing tracefs_inode from the dentry, the d_inode must first be
+checked if it has content. If not, then it's not a tracefs_inode and can
+be ignored.
+
+The following caused a crash:
+
+ #define getdents64(fd, dirp, count) syscall(SYS_getdents64, fd, dirp, count)
+ #define BUF_SIZE 256
+ #define TDIR "/tmp/file0"
+
+ int main(void)
+ {
+       char buf[BUF_SIZE];
+       int fd;
+               int n;
+
+               mkdir(TDIR, 0777);
+       mount(NULL, TDIR, "tracefs", 0, NULL);
+               fd = openat(AT_FDCWD, TDIR, O_RDONLY);
+               n = getdents64(fd, buf, BUF_SIZE);
+               ret = mount(NULL, TDIR, NULL, MS_NOSUID|MS_REMOUNT|MS_RELATIME|MS_LAZYTIME,
+                   "gid=1000");
+       return 0;
+ }
+
+That's because the 256 BUF_SIZE was not big enough to read all the
+dentries of the tracefs file system and it left a "cursor" dentry in the
+subdirs of the tracefs root inode. Then on remounting with "gid=1000",
+it would cause an iteration of all dentries which hit:
+
+       ti = get_tracefs(dentry->d_inode);
+       if (ti && (ti->flags & TRACEFS_EVENT_INODE))
+               eventfs_update_gid(dentry, gid);
+
+Which crashed because of the dereference of the cursor dentry which had a NULL
+d_inode.
+
+In the subdir loop of the dentry lookup of set_gid(), if a child has a
+NULL d_inode, simply skip it.
+
+Link: https://lore.kernel.org/all/20240102135637.3a21fb10@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20240102151249.05da244d@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7e8358edf503e ("eventfs: Fix file and directory uid and gid ownership")
+Reported-by: "Ubisectech Sirius" <bugreport@ubisectech.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -215,6 +215,10 @@ resume:
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+               next = tmp->next;
++              /* Note, getdents() can add a cursor dentry with no inode */
++              if (!dentry->d_inode)
++                      continue;
++
+               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+               change_gid(dentry, gid);
diff --git a/queue-6.6/tracefs-dentry-lookup-crapectomy.patch b/queue-6.6/tracefs-dentry-lookup-crapectomy.patch
new file mode 100644 (file)
index 0000000..be37bd6
--- /dev/null
@@ -0,0 +1,541 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:43 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:55 -0500
+Subject: tracefs: dentry lookup crapectomy
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Ajay Kaher <ajay.kaher@broadcom.com>
+Message-ID: <20240206120954.518365320@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 49304c2b93e4f7468b51ef717cbe637981397115 upstream.
+
+The dentry lookup for eventfs files was very broken, and had lots of
+signs of the old situation where the filesystem names were all created
+statically in the dentry tree, rather than being looked up dynamically
+based on the eventfs data structures.
+
+You could see it in the naming - how it claimed to "create" dentries
+rather than just look up the dentries that were given it.
+
+You could see it in various nonsensical and very incorrect operations,
+like using "simple_lookup()" on the dentries that were passed in, which
+only results in those dentries becoming negative dentries.  Which meant
+that any other lookup would possibly return ENOENT if it saw that
+negative dentry before the data was then later filled in.
+
+You could see it in the immense amount of nonsensical code that didn't
+actually just do lookups.
+
+Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240131233227.73db55e1@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |  275 ++++++++---------------------------------------
+ fs/tracefs/inode.c       |   69 -----------
+ fs/tracefs/internal.h    |    3 
+ 3 files changed, 50 insertions(+), 297 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -230,7 +230,6 @@ static struct eventfs_inode *eventfs_fin
+ {
+       struct eventfs_inode *ei;
+-      mutex_lock(&eventfs_mutex);
+       do {
+               // The parent is stable because we do not do renames
+               dentry = dentry->d_parent;
+@@ -247,7 +246,6 @@ static struct eventfs_inode *eventfs_fin
+               }
+               // Walk upwards until you find the events inode
+       } while (!ei->is_events);
+-      mutex_unlock(&eventfs_mutex);
+       update_top_events_attr(ei, dentry->d_sb);
+@@ -280,11 +278,10 @@ static void update_inode_attr(struct den
+ }
+ /**
+- * create_file - create a file in the tracefs filesystem
+- * @name: the name of the file to create.
++ * lookup_file - look up a file in the tracefs filesystem
++ * @dentry: the dentry to look up
+  * @mode: the permission that the file should have.
+  * @attr: saved attributes changed by user
+- * @parent: parent dentry for this file.
+  * @data: something that the caller will want to get to later on.
+  * @fop: struct file_operations that should be used for this file.
+  *
+@@ -292,13 +289,13 @@ static void update_inode_attr(struct den
+  * directory. The inode.i_private pointer will point to @data in the open()
+  * call.
+  */
+-static struct dentry *create_file(const char *name, umode_t mode,
++static struct dentry *lookup_file(struct dentry *dentry,
++                                umode_t mode,
+                                 struct eventfs_attr *attr,
+-                                struct dentry *parent, void *data,
++                                void *data,
+                                 const struct file_operations *fop)
+ {
+       struct tracefs_inode *ti;
+-      struct dentry *dentry;
+       struct inode *inode;
+       if (!(mode & S_IFMT))
+@@ -307,15 +304,9 @@ static struct dentry *create_file(const
+       if (WARN_ON_ONCE(!S_ISREG(mode)))
+               return NULL;
+-      WARN_ON_ONCE(!parent);
+-      dentry = eventfs_start_creating(name, parent);
+-
+-      if (IS_ERR(dentry))
+-              return dentry;
+-
+       inode = tracefs_get_inode(dentry->d_sb);
+       if (unlikely(!inode))
+-              return eventfs_failed_creating(dentry);
++              return ERR_PTR(-ENOMEM);
+       /* If the user updated the directory's attributes, use them */
+       update_inode_attr(dentry, inode, attr, mode);
+@@ -329,32 +320,29 @@ static struct dentry *create_file(const
+       ti = get_tracefs(inode);
+       ti->flags |= TRACEFS_EVENT_INODE;
+-      d_instantiate(dentry, inode);
++
++      d_add(dentry, inode);
+       fsnotify_create(dentry->d_parent->d_inode, dentry);
+-      return eventfs_end_creating(dentry);
++      return dentry;
+ };
+ /**
+- * create_dir - create a dir in the tracefs filesystem
++ * lookup_dir_entry - look up a dir in the tracefs filesystem
++ * @dentry: the directory to look up
+  * @ei: the eventfs_inode that represents the directory to create
+- * @parent: parent dentry for this file.
+  *
+- * This function will create a dentry for a directory represented by
++ * This function will look up a dentry for a directory represented by
+  * a eventfs_inode.
+  */
+-static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
++static struct dentry *lookup_dir_entry(struct dentry *dentry,
++      struct eventfs_inode *pei, struct eventfs_inode *ei)
+ {
+       struct tracefs_inode *ti;
+-      struct dentry *dentry;
+       struct inode *inode;
+-      dentry = eventfs_start_creating(ei->name, parent);
+-      if (IS_ERR(dentry))
+-              return dentry;
+-
+       inode = tracefs_get_inode(dentry->d_sb);
+       if (unlikely(!inode))
+-              return eventfs_failed_creating(dentry);
++              return ERR_PTR(-ENOMEM);
+       /* If the user updated the directory's attributes, use them */
+       update_inode_attr(dentry, inode, &ei->attr,
+@@ -371,11 +359,14 @@ static struct dentry *create_dir(struct
+       /* Only directories have ti->private set to an ei, not files */
+       ti->private = ei;
++      dentry->d_fsdata = ei;
++        ei->dentry = dentry;  // Remove me!
++
+       inc_nlink(inode);
+-      d_instantiate(dentry, inode);
++      d_add(dentry, inode);
+       inc_nlink(dentry->d_parent->d_inode);
+       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+-      return eventfs_end_creating(dentry);
++      return dentry;
+ }
+ static void free_ei(struct eventfs_inode *ei)
+@@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct t
+ }
+ /**
+- * create_file_dentry - create a dentry for a file of an eventfs_inode
++ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+  * @ei: the eventfs_inode that the file will be created under
+  * @idx: the index into the d_children[] of the @ei
+  * @parent: The parent dentry of the created file.
+@@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct t
+  * address located at @e_dentry.
+  */
+ static struct dentry *
+-create_file_dentry(struct eventfs_inode *ei, int idx,
+-                 struct dentry *parent, const char *name, umode_t mode, void *data,
++lookup_file_dentry(struct dentry *dentry,
++                 struct eventfs_inode *ei, int idx,
++                 umode_t mode, void *data,
+                  const struct file_operations *fops)
+ {
+       struct eventfs_attr *attr = NULL;
+       struct dentry **e_dentry = &ei->d_children[idx];
+-      struct dentry *dentry;
+-      WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+-
+-      mutex_lock(&eventfs_mutex);
+-      if (ei->is_freed) {
+-              mutex_unlock(&eventfs_mutex);
+-              return NULL;
+-      }
+-      /* If the e_dentry already has a dentry, use it */
+-      if (*e_dentry) {
+-              dget(*e_dentry);
+-              mutex_unlock(&eventfs_mutex);
+-              return *e_dentry;
+-      }
+-
+-      /* ei->entry_attrs are protected by SRCU */
+       if (ei->entry_attrs)
+               attr = &ei->entry_attrs[idx];
+-      mutex_unlock(&eventfs_mutex);
+-
+-      dentry = create_file(name, mode, attr, parent, data, fops);
+-
+-      mutex_lock(&eventfs_mutex);
+-
+-      if (IS_ERR_OR_NULL(dentry)) {
+-              /*
+-               * When the mutex was released, something else could have
+-               * created the dentry for this e_dentry. In which case
+-               * use that one.
+-               *
+-               * If ei->is_freed is set, the e_dentry is currently on its
+-               * way to being freed, don't return it. If e_dentry is NULL
+-               * it means it was already freed.
+-               */
+-              if (ei->is_freed) {
+-                      dentry = NULL;
+-              } else {
+-                      dentry = *e_dentry;
+-                      dget(dentry);
+-              }
+-              mutex_unlock(&eventfs_mutex);
+-              return dentry;
+-      }
++      dentry->d_fsdata = ei;          // NOTE: ei of _parent_
++      lookup_file(dentry, mode, attr, data, fops);
+-      if (!*e_dentry && !ei->is_freed) {
+-              *e_dentry = dentry;
+-              dentry->d_fsdata = ei;
+-      } else {
+-              /*
+-               * Should never happen unless we get here due to being freed.
+-               * Otherwise it means two dentries exist with the same name.
+-               */
+-              WARN_ON_ONCE(!ei->is_freed);
+-              dentry = NULL;
+-      }
+-      mutex_unlock(&eventfs_mutex);
+-
+-      return dentry;
+-}
+-
+-/**
+- * eventfs_post_create_dir - post create dir routine
+- * @ei: eventfs_inode of recently created dir
+- *
+- * Map the meta-data of files within an eventfs dir to their parent dentry
+- */
+-static void eventfs_post_create_dir(struct eventfs_inode *ei)
+-{
+-      struct eventfs_inode *ei_child;
+-
+-      lockdep_assert_held(&eventfs_mutex);
+-
+-      /* srcu lock already held */
+-      /* fill parent-child relation */
+-      list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                               srcu_read_lock_held(&eventfs_srcu)) {
+-              ei_child->d_parent = ei->dentry;
+-      }
+-}
+-
+-/**
+- * create_dir_dentry - Create a directory dentry for the eventfs_inode
+- * @pei: The eventfs_inode parent of ei.
+- * @ei: The eventfs_inode to create the directory for
+- * @parent: The dentry of the parent of this directory
+- *
+- * This creates and attaches a directory dentry to the eventfs_inode @ei.
+- */
+-static struct dentry *
+-create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
+-                struct dentry *parent)
+-{
+-      struct dentry *dentry = NULL;
+-
+-      WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+-
+-      mutex_lock(&eventfs_mutex);
+-      if (pei->is_freed || ei->is_freed) {
+-              mutex_unlock(&eventfs_mutex);
+-              return NULL;
+-      }
+-      if (ei->dentry) {
+-              /* If the eventfs_inode already has a dentry, use it */
+-              dentry = ei->dentry;
+-              dget(dentry);
+-              mutex_unlock(&eventfs_mutex);
+-              return dentry;
+-      }
+-      mutex_unlock(&eventfs_mutex);
+-
+-      dentry = create_dir(ei, parent);
+-
+-      mutex_lock(&eventfs_mutex);
+-
+-      if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
+-              /*
+-               * When the mutex was released, something else could have
+-               * created the dentry for this e_dentry. In which case
+-               * use that one.
+-               *
+-               * If ei->is_freed is set, the e_dentry is currently on its
+-               * way to being freed.
+-               */
+-              dentry = ei->dentry;
+-              if (dentry)
+-                      dget(dentry);
+-              mutex_unlock(&eventfs_mutex);
+-              return dentry;
+-      }
+-
+-      if (!ei->dentry && !ei->is_freed) {
+-              ei->dentry = dentry;
+-              eventfs_post_create_dir(ei);
+-              dentry->d_fsdata = ei;
+-      } else {
+-              /*
+-               * Should never happen unless we get here due to being freed.
+-               * Otherwise it means two dentries exist with the same name.
+-               */
+-              WARN_ON_ONCE(!ei->is_freed);
+-              dentry = NULL;
+-      }
+-      mutex_unlock(&eventfs_mutex);
++      *e_dentry = dentry;     // Remove me
+       return dentry;
+ }
+@@ -607,79 +462,49 @@ static struct dentry *eventfs_root_looku
+                                         struct dentry *dentry,
+                                         unsigned int flags)
+ {
+-      const struct file_operations *fops;
+-      const struct eventfs_entry *entry;
+       struct eventfs_inode *ei_child;
+       struct tracefs_inode *ti;
+       struct eventfs_inode *ei;
+-      struct dentry *ei_dentry = NULL;
+-      struct dentry *ret = NULL;
+-      struct dentry *d;
+       const char *name = dentry->d_name.name;
+-      umode_t mode;
+-      void *data;
+-      int idx;
+-      int i;
+-      int r;
+       ti = get_tracefs(dir);
+       if (!(ti->flags & TRACEFS_EVENT_INODE))
+-              return NULL;
++              return ERR_PTR(-EIO);
+-      /* Grab srcu to prevent the ei from going away */
+-      idx = srcu_read_lock(&eventfs_srcu);
+-
+-      /*
+-       * Grab the eventfs_mutex to consistent value from ti->private.
+-       * This s
+-       */
+       mutex_lock(&eventfs_mutex);
+-      ei = READ_ONCE(ti->private);
+-      if (ei && !ei->is_freed)
+-              ei_dentry = READ_ONCE(ei->dentry);
+-      mutex_unlock(&eventfs_mutex);
+-      if (!ei || !ei_dentry)
++      ei = ti->private;
++      if (!ei || ei->is_freed)
+               goto out;
+-      data = ei->data;
+-
+-      list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                               srcu_read_lock_held(&eventfs_srcu)) {
++      list_for_each_entry(ei_child, &ei->children, list) {
+               if (strcmp(ei_child->name, name) != 0)
+                       continue;
+-              ret = simple_lookup(dir, dentry, flags);
+-              if (IS_ERR(ret))
++              if (ei_child->is_freed)
+                       goto out;
+-              d = create_dir_dentry(ei, ei_child, ei_dentry);
+-              dput(d);
++              lookup_dir_entry(dentry, ei, ei_child);
+               goto out;
+       }
+-      for (i = 0; i < ei->nr_entries; i++) {
+-              entry = &ei->entries[i];
+-              if (strcmp(name, entry->name) == 0) {
+-                      void *cdata = data;
+-                      mutex_lock(&eventfs_mutex);
+-                      /* If ei->is_freed, then the event itself may be too */
+-                      if (!ei->is_freed)
+-                              r = entry->callback(name, &mode, &cdata, &fops);
+-                      else
+-                              r = -1;
+-                      mutex_unlock(&eventfs_mutex);
+-                      if (r <= 0)
+-                              continue;
+-                      ret = simple_lookup(dir, dentry, flags);
+-                      if (IS_ERR(ret))
+-                              goto out;
+-                      d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
+-                      dput(d);
+-                      break;
+-              }
++      for (int i = 0; i < ei->nr_entries; i++) {
++              void *data;
++              umode_t mode;
++              const struct file_operations *fops;
++              const struct eventfs_entry *entry = &ei->entries[i];
++
++              if (strcmp(name, entry->name) != 0)
++                      continue;
++
++              data = ei->data;
++              if (entry->callback(name, &mode, &data, &fops) <= 0)
++                      goto out;
++
++              lookup_file_dentry(dentry, ei, i, mode, data, fops);
++              goto out;
+       }
+  out:
+-      srcu_read_unlock(&eventfs_srcu, idx);
+-      return ret;
++      mutex_unlock(&eventfs_mutex);
++      return NULL;
+ }
+ /*
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -495,75 +495,6 @@ struct dentry *tracefs_end_creating(stru
+       return dentry;
+ }
+-/**
+- * eventfs_start_creating - start the process of creating a dentry
+- * @name: Name of the file created for the dentry
+- * @parent: The parent dentry where this dentry will be created
+- *
+- * This is a simple helper function for the dynamically created eventfs
+- * files. When the directory of the eventfs files are accessed, their
+- * dentries are created on the fly. This function is used to start that
+- * process.
+- */
+-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
+-{
+-      struct dentry *dentry;
+-      int error;
+-
+-      /* Must always have a parent. */
+-      if (WARN_ON_ONCE(!parent))
+-              return ERR_PTR(-EINVAL);
+-
+-      error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+-                            &tracefs_mount_count);
+-      if (error)
+-              return ERR_PTR(error);
+-
+-      if (unlikely(IS_DEADDIR(parent->d_inode)))
+-              dentry = ERR_PTR(-ENOENT);
+-      else
+-              dentry = lookup_one_len(name, parent, strlen(name));
+-
+-      if (!IS_ERR(dentry) && dentry->d_inode) {
+-              dput(dentry);
+-              dentry = ERR_PTR(-EEXIST);
+-      }
+-
+-      if (IS_ERR(dentry))
+-              simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+-
+-      return dentry;
+-}
+-
+-/**
+- * eventfs_failed_creating - clean up a failed eventfs dentry creation
+- * @dentry: The dentry to clean up
+- *
+- * If after calling eventfs_start_creating(), a failure is detected, the
+- * resources created by eventfs_start_creating() needs to be cleaned up. In
+- * that case, this function should be called to perform that clean up.
+- */
+-struct dentry *eventfs_failed_creating(struct dentry *dentry)
+-{
+-      dput(dentry);
+-      simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+-      return NULL;
+-}
+-
+-/**
+- * eventfs_end_creating - Finish the process of creating a eventfs dentry
+- * @dentry: The dentry that has successfully been created.
+- *
+- * This function is currently just a place holder to match
+- * eventfs_start_creating(). In case any synchronization needs to be added,
+- * this function will be used to implement that without having to modify
+- * the callers of eventfs_start_creating().
+- */
+-struct dentry *eventfs_end_creating(struct dentry *dentry)
+-{
+-      return dentry;
+-}
+-
+ /* Find the inode that this will use for default */
+ static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
+ {
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -80,9 +80,6 @@ struct dentry *tracefs_start_creating(co
+ struct dentry *tracefs_end_creating(struct dentry *dentry);
+ struct dentry *tracefs_failed_creating(struct dentry *dentry);
+ struct inode *tracefs_get_inode(struct super_block *sb);
+-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+-struct dentry *eventfs_failed_creating(struct dentry *dentry);
+-struct dentry *eventfs_end_creating(struct dentry *dentry);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+ #endif /* _TRACEFS_INTERNAL_H */
diff --git a/queue-6.6/tracefs-eventfs-modify-mismatched-function-name.patch b/queue-6.6/tracefs-eventfs-modify-mismatched-function-name.patch
new file mode 100644 (file)
index 0000000..735e872
--- /dev/null
@@ -0,0 +1,38 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:20 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:16 -0500
+Subject: tracefs/eventfs: Modify mismatched function name
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Abaci Robot <abaci@linux.alibaba.com>, Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Message-ID: <20240206120948.165080330@rostedt.homelinux.com>
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+commit 64bf2f685c795e75dd855761c75a193ee5998731 upstream.
+
+No functional modification involved.
+
+fs/tracefs/event_inode.c:864: warning: expecting prototype for eventfs_remove(). Prototype was for eventfs_remove_dir() instead.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231019031353.73846-1-jiapeng.chong@linux.alibaba.com
+
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6939
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -856,7 +856,7 @@ static void unhook_dentry(struct dentry
+       }
+ }
+ /**
+- * eventfs_remove - remove eventfs dir or file from list
++ * eventfs_remove_dir - remove eventfs dir or file from list
+  * @ei: eventfs_inode to be removed.
+  *
+  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
diff --git a/queue-6.6/tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch b/queue-6.6/tracefs-eventfs-use-root-and-instance-inodes-as-default-ownership.patch
new file mode 100644 (file)
index 0000000..8c4df65
--- /dev/null
@@ -0,0 +1,482 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:11:21 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:42 -0500
+Subject: tracefs/eventfs: Use root and instance inodes as default ownership
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Al Viro <viro@ZenIV.linux.org.uk>, Christian Brauner <brauner@kernel.org>
+Message-ID: <20240206120952.401268456@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit 8186fff7ab649085e2c60d032d9a20a85af1d87c upstream.
+
+Instead of walking the dentries on mount/remount to update the gid values of
+all the dentries if a gid option is specified on mount, just update the root
+inode. Add .getattr, .setattr, and .permissions on the tracefs inode
+operations to update the permissions of the files and directories.
+
+For all files and directories in the top level instance:
+
+ /sys/kernel/tracing/*
+
+It will use the root inode as the default permissions. The inode that
+represents: /sys/kernel/tracing (or wherever it is mounted).
+
+When an instance is created:
+
+ mkdir /sys/kernel/tracing/instance/foo
+
+The directory "foo" and all its files and directories underneath will use
+the default of what foo is when it was created. A remount of tracefs will
+not affect it.
+
+If a user were to modify the permissions of any file or directory in
+tracefs, it will also no longer be modified by a change in ownership of a
+remount.
+
+The events directory, if it is in the top level instance, will use the
+tracefs root inode as the default ownership for itself and all the files and
+directories below it.
+
+For the events directory in an instance ("foo"), it will keep the ownership
+of what it was when it was created, and that will be used as the default
+ownership for the files and directories beneath it.
+
+Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wjVdGkjDXBbvLn2wbZnqP4UsH46E3gqJ9m7UG6DpX2+WA@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240103215016.1e0c9811@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   79 ++++++++++++++++++
+ fs/tracefs/inode.c       |  198 ++++++++++++++++++++++++++---------------------
+ fs/tracefs/internal.h    |    3 
+ 3 files changed, 190 insertions(+), 90 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -45,6 +45,7 @@ enum {
+       EVENTFS_SAVE_MODE       = BIT(16),
+       EVENTFS_SAVE_UID        = BIT(17),
+       EVENTFS_SAVE_GID        = BIT(18),
++      EVENTFS_TOPLEVEL        = BIT(19),
+ };
+ #define EVENTFS_MODE_MASK     (EVENTFS_SAVE_MODE - 1)
+@@ -115,10 +116,17 @@ static int eventfs_set_attr(struct mnt_i
+                * The events directory dentry is never freed, unless its
+                * part of an instance that is deleted. It's attr is the
+                * default for its child files and directories.
+-               * Do not update it. It's not used for its own mode or ownership
++               * Do not update it. It's not used for its own mode or ownership.
+                */
+-              if (!ei->is_events)
++              if (ei->is_events) {
++                      /* But it still needs to know if it was modified */
++                      if (iattr->ia_valid & ATTR_UID)
++                              ei->attr.mode |= EVENTFS_SAVE_UID;
++                      if (iattr->ia_valid & ATTR_GID)
++                              ei->attr.mode |= EVENTFS_SAVE_GID;
++              } else {
+                       update_attr(&ei->attr, iattr);
++              }
+       } else {
+               name = dentry->d_name.name;
+@@ -136,9 +144,66 @@ static int eventfs_set_attr(struct mnt_i
+       return ret;
+ }
++static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
++{
++      struct inode *inode;
++
++      /* Only update if the "events" was on the top level */
++      if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
++              return;
++
++      /* Get the tracefs root inode. */
++      inode = d_inode(dentry->d_sb->s_root);
++      ei->attr.uid = inode->i_uid;
++      ei->attr.gid = inode->i_gid;
++}
++
++static void set_top_events_ownership(struct inode *inode)
++{
++      struct tracefs_inode *ti = get_tracefs(inode);
++      struct eventfs_inode *ei = ti->private;
++      struct dentry *dentry;
++
++      /* The top events directory doesn't get automatically updated */
++      if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
++              return;
++
++      dentry = ei->dentry;
++
++      update_top_events_attr(ei, dentry);
++
++      if (!(ei->attr.mode & EVENTFS_SAVE_UID))
++              inode->i_uid = ei->attr.uid;
++
++      if (!(ei->attr.mode & EVENTFS_SAVE_GID))
++              inode->i_gid = ei->attr.gid;
++}
++
++static int eventfs_get_attr(struct mnt_idmap *idmap,
++                          const struct path *path, struct kstat *stat,
++                          u32 request_mask, unsigned int flags)
++{
++      struct dentry *dentry = path->dentry;
++      struct inode *inode = d_backing_inode(dentry);
++
++      set_top_events_ownership(inode);
++
++      generic_fillattr(idmap, request_mask, inode, stat);
++      return 0;
++}
++
++static int eventfs_permission(struct mnt_idmap *idmap,
++                            struct inode *inode, int mask)
++{
++      set_top_events_ownership(inode);
++      return generic_permission(idmap, inode, mask);
++}
++
+ static const struct inode_operations eventfs_root_dir_inode_operations = {
+       .lookup         = eventfs_root_lookup,
+       .setattr        = eventfs_set_attr,
++      .getattr        = eventfs_get_attr,
++      .permission     = eventfs_permission,
+ };
+ static const struct inode_operations eventfs_file_inode_operations = {
+@@ -174,6 +239,8 @@ static struct eventfs_inode *eventfs_fin
+       } while (!ei->is_events);
+       mutex_unlock(&eventfs_mutex);
++      update_top_events_attr(ei, dentry);
++
+       return ei;
+ }
+@@ -887,6 +954,14 @@ struct eventfs_inode *eventfs_create_eve
+       uid = d_inode(dentry->d_parent)->i_uid;
+       gid = d_inode(dentry->d_parent)->i_gid;
++      /*
++       * If the events directory is of the top instance, then parent
++       * is NULL. Set the attr.mode to reflect this and its permissions will
++       * default to the tracefs root dentry.
++       */
++      if (!parent)
++              ei->attr.mode = EVENTFS_TOPLEVEL;
++
+       /* This is used as the default ownership of the files and directories */
+       ei->attr.uid = uid;
+       ei->attr.gid = gid;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -91,6 +91,7 @@ static int tracefs_syscall_mkdir(struct
+                                struct inode *inode, struct dentry *dentry,
+                                umode_t mode)
+ {
++      struct tracefs_inode *ti;
+       char *name;
+       int ret;
+@@ -99,6 +100,15 @@ static int tracefs_syscall_mkdir(struct
+               return -ENOMEM;
+       /*
++       * This is a new directory that does not take the default of
++       * the rootfs. It becomes the default permissions for all the
++       * files and directories underneath it.
++       */
++      ti = get_tracefs(inode);
++      ti->flags |= TRACEFS_INSTANCE_INODE;
++      ti->private = inode;
++
++      /*
+        * The mkdir call can call the generic functions that create
+        * the files within the tracefs system. It is up to the individual
+        * mkdir routine to handle races.
+@@ -141,10 +151,76 @@ static int tracefs_syscall_rmdir(struct
+       return ret;
+ }
+-static const struct inode_operations tracefs_dir_inode_operations = {
++static void set_tracefs_inode_owner(struct inode *inode)
++{
++      struct tracefs_inode *ti = get_tracefs(inode);
++      struct inode *root_inode = ti->private;
++
++      /*
++       * If this inode has never been referenced, then update
++       * the permissions to the superblock.
++       */
++      if (!(ti->flags & TRACEFS_UID_PERM_SET))
++              inode->i_uid = root_inode->i_uid;
++
++      if (!(ti->flags & TRACEFS_GID_PERM_SET))
++              inode->i_gid = root_inode->i_gid;
++}
++
++static int tracefs_permission(struct mnt_idmap *idmap,
++                            struct inode *inode, int mask)
++{
++      set_tracefs_inode_owner(inode);
++      return generic_permission(idmap, inode, mask);
++}
++
++static int tracefs_getattr(struct mnt_idmap *idmap,
++                         const struct path *path, struct kstat *stat,
++                         u32 request_mask, unsigned int flags)
++{
++      struct inode *inode = d_backing_inode(path->dentry);
++
++      set_tracefs_inode_owner(inode);
++      generic_fillattr(idmap, request_mask, inode, stat);
++      return 0;
++}
++
++static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
++                         struct iattr *attr)
++{
++      unsigned int ia_valid = attr->ia_valid;
++      struct inode *inode = d_inode(dentry);
++      struct tracefs_inode *ti = get_tracefs(inode);
++
++      if (ia_valid & ATTR_UID)
++              ti->flags |= TRACEFS_UID_PERM_SET;
++
++      if (ia_valid & ATTR_GID)
++              ti->flags |= TRACEFS_GID_PERM_SET;
++
++      return simple_setattr(idmap, dentry, attr);
++}
++
++static const struct inode_operations tracefs_instance_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .mkdir          = tracefs_syscall_mkdir,
+       .rmdir          = tracefs_syscall_rmdir,
++      .permission     = tracefs_permission,
++      .getattr        = tracefs_getattr,
++      .setattr        = tracefs_setattr,
++};
++
++static const struct inode_operations tracefs_dir_inode_operations = {
++      .lookup         = simple_lookup,
++      .permission     = tracefs_permission,
++      .getattr        = tracefs_getattr,
++      .setattr        = tracefs_setattr,
++};
++
++static const struct inode_operations tracefs_file_inode_operations = {
++      .permission     = tracefs_permission,
++      .getattr        = tracefs_getattr,
++      .setattr        = tracefs_setattr,
+ };
+ struct inode *tracefs_get_inode(struct super_block *sb)
+@@ -183,87 +259,6 @@ struct tracefs_fs_info {
+       struct tracefs_mount_opts mount_opts;
+ };
+-static void change_gid(struct dentry *dentry, kgid_t gid)
+-{
+-      if (!dentry->d_inode)
+-              return;
+-      dentry->d_inode->i_gid = gid;
+-}
+-
+-/*
+- * Taken from d_walk, but without he need for handling renames.
+- * Nothing can be renamed while walking the list, as tracefs
+- * does not support renames. This is only called when mounting
+- * or remounting the file system, to set all the files to
+- * the given gid.
+- */
+-static void set_gid(struct dentry *parent, kgid_t gid)
+-{
+-      struct dentry *this_parent;
+-      struct list_head *next;
+-
+-      this_parent = parent;
+-      spin_lock(&this_parent->d_lock);
+-
+-      change_gid(this_parent, gid);
+-repeat:
+-      next = this_parent->d_subdirs.next;
+-resume:
+-      while (next != &this_parent->d_subdirs) {
+-              struct tracefs_inode *ti;
+-              struct list_head *tmp = next;
+-              struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+-              next = tmp->next;
+-
+-              /* Note, getdents() can add a cursor dentry with no inode */
+-              if (!dentry->d_inode)
+-                      continue;
+-
+-              spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+-
+-              change_gid(dentry, gid);
+-
+-              /* If this is the events directory, update that too */
+-              ti = get_tracefs(dentry->d_inode);
+-              if (ti && (ti->flags & TRACEFS_EVENT_INODE))
+-                      eventfs_update_gid(dentry, gid);
+-
+-              if (!list_empty(&dentry->d_subdirs)) {
+-                      spin_unlock(&this_parent->d_lock);
+-                      spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+-                      this_parent = dentry;
+-                      spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+-                      goto repeat;
+-              }
+-              spin_unlock(&dentry->d_lock);
+-      }
+-      /*
+-       * All done at this level ... ascend and resume the search.
+-       */
+-      rcu_read_lock();
+-ascend:
+-      if (this_parent != parent) {
+-              struct dentry *child = this_parent;
+-              this_parent = child->d_parent;
+-
+-              spin_unlock(&child->d_lock);
+-              spin_lock(&this_parent->d_lock);
+-
+-              /* go into the first sibling still alive */
+-              do {
+-                      next = child->d_child.next;
+-                      if (next == &this_parent->d_subdirs)
+-                              goto ascend;
+-                      child = list_entry(next, struct dentry, d_child);
+-              } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+-              rcu_read_unlock();
+-              goto resume;
+-      }
+-      rcu_read_unlock();
+-      spin_unlock(&this_parent->d_lock);
+-      return;
+-}
+-
+ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ {
+       substring_t args[MAX_OPT_ARGS];
+@@ -336,10 +331,8 @@ static int tracefs_apply_options(struct
+       if (!remount || opts->opts & BIT(Opt_uid))
+               inode->i_uid = opts->uid;
+-      if (!remount || opts->opts & BIT(Opt_gid)) {
+-              /* Set all the group ids to the mount option */
+-              set_gid(sb->s_root, opts->gid);
+-      }
++      if (!remount || opts->opts & BIT(Opt_gid))
++              inode->i_gid = opts->gid;
+       return 0;
+ }
+@@ -573,6 +566,26 @@ struct dentry *eventfs_end_creating(stru
+       return dentry;
+ }
++/* Find the inode that this will use for default */
++static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
++{
++      struct tracefs_inode *ti;
++
++      /* If parent is NULL then use root inode */
++      if (!parent)
++              return d_inode(inode->i_sb->s_root);
++
++      /* Find the inode that is flagged as an instance or the root inode */
++      while (!IS_ROOT(parent)) {
++              ti = get_tracefs(d_inode(parent));
++              if (ti->flags & TRACEFS_INSTANCE_INODE)
++                      break;
++              parent = parent->d_parent;
++      }
++
++      return d_inode(parent);
++}
++
+ /**
+  * tracefs_create_file - create a file in the tracefs filesystem
+  * @name: a pointer to a string containing the name of the file to create.
+@@ -603,6 +616,7 @@ struct dentry *tracefs_create_file(const
+                                  struct dentry *parent, void *data,
+                                  const struct file_operations *fops)
+ {
++      struct tracefs_inode *ti;
+       struct dentry *dentry;
+       struct inode *inode;
+@@ -621,7 +635,11 @@ struct dentry *tracefs_create_file(const
+       if (unlikely(!inode))
+               return tracefs_failed_creating(dentry);
++      ti = get_tracefs(inode);
++      ti->private = instance_inode(parent, inode);
++
+       inode->i_mode = mode;
++      inode->i_op = &tracefs_file_inode_operations;
+       inode->i_fop = fops ? fops : &tracefs_file_operations;
+       inode->i_private = data;
+       inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+@@ -634,6 +652,7 @@ struct dentry *tracefs_create_file(const
+ static struct dentry *__create_dir(const char *name, struct dentry *parent,
+                                  const struct inode_operations *ops)
+ {
++      struct tracefs_inode *ti;
+       struct dentry *dentry = tracefs_start_creating(name, parent);
+       struct inode *inode;
+@@ -651,6 +670,9 @@ static struct dentry *__create_dir(const
+       inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+       inode->i_gid = d_inode(dentry->d_parent)->i_gid;
++      ti = get_tracefs(inode);
++      ti->private = instance_inode(parent, inode);
++
+       /* directory inodes start off with i_nlink == 2 (for "." entry) */
+       inc_nlink(inode);
+       d_instantiate(dentry, inode);
+@@ -681,7 +703,7 @@ struct dentry *tracefs_create_dir(const
+       if (security_locked_down(LOCKDOWN_TRACEFS))
+               return NULL;
+-      return __create_dir(name, parent, &simple_dir_inode_operations);
++      return __create_dir(name, parent, &tracefs_dir_inode_operations);
+ }
+ /**
+@@ -712,7 +734,7 @@ __init struct dentry *tracefs_create_ins
+       if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+               return NULL;
+-      dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
++      dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations);
+       if (!dentry)
+               return NULL;
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -5,6 +5,9 @@
+ enum {
+       TRACEFS_EVENT_INODE             = BIT(1),
+       TRACEFS_EVENT_TOP_INODE         = BIT(2),
++      TRACEFS_GID_PERM_SET            = BIT(3),
++      TRACEFS_UID_PERM_SET            = BIT(4),
++      TRACEFS_INSTANCE_INODE          = BIT(5),
+ };
+ struct tracefs_inode {
diff --git a/queue-6.6/tracefs-remove-stale-update_gid-code.patch b/queue-6.6/tracefs-remove-stale-update_gid-code.patch
new file mode 100644 (file)
index 0000000..a11f742
--- /dev/null
@@ -0,0 +1,83 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:09:26 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:51 -0500
+Subject: tracefs: remove stale update_gid code
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Message-ID: <20240206120953.870617585@rostedt.homelinux.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 29142dc92c37d3259a33aef15b03e6ee25b0d188 upstream.
+
+The 'eventfs_update_gid()' function is no longer called, so remove it
+(and the helper function it uses).
+
+Link: https://lore.kernel.org/all/CAHk-=wj+DsZZ=2iTUkJ-Nojs9fjYMvPs1NuoM3yK7aTDtJfPYQ@mail.gmail.com/
+
+Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |   38 --------------------------------------
+ fs/tracefs/internal.h    |    1 -
+ 2 files changed, 39 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -281,44 +281,6 @@ static void update_inode_attr(struct den
+               inode->i_gid = attr->gid;
+ }
+-static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
+-{
+-      struct eventfs_inode *ei_child;
+-
+-      /* at most we have events/system/event */
+-      if (WARN_ON_ONCE(level > 3))
+-              return;
+-
+-      ei->attr.gid = gid;
+-
+-      if (ei->entry_attrs) {
+-              for (int i = 0; i < ei->nr_entries; i++) {
+-                      ei->entry_attrs[i].gid = gid;
+-              }
+-      }
+-
+-      /*
+-       * Only eventfs_inode with dentries are updated, make sure
+-       * all eventfs_inodes are updated. If one of the children
+-       * do not have a dentry, this function must traverse it.
+-       */
+-      list_for_each_entry_srcu(ei_child, &ei->children, list,
+-                               srcu_read_lock_held(&eventfs_srcu)) {
+-              if (!ei_child->dentry)
+-                      update_gid(ei_child, gid, level + 1);
+-      }
+-}
+-
+-void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
+-{
+-      struct eventfs_inode *ei = dentry->d_fsdata;
+-      int idx;
+-
+-      idx = srcu_read_lock(&eventfs_srcu);
+-      update_gid(ei, gid, 0);
+-      srcu_read_unlock(&eventfs_srcu, idx);
+-}
+-
+ /**
+  * create_file - create a file in the tracefs filesystem
+  * @name: the name of the file to create.
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct s
+ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+ struct dentry *eventfs_failed_creating(struct dentry *dentry);
+ struct dentry *eventfs_end_creating(struct dentry *dentry);
+-void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
+ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+ #endif /* _TRACEFS_INTERNAL_H */
diff --git a/queue-6.6/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch b/queue-6.6/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch
new file mode 100644 (file)
index 0000000..d37ed43
--- /dev/null
@@ -0,0 +1,80 @@
+From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb  6 13:10:43 2024
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 06 Feb 2024 07:09:52 -0500
+Subject: tracefs: Zero out the tracefs_inode when allocating it
+To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Masami Hiramatsu <mhiramat@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Christian Brauner <brauner@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>, Ajay Kaher <ajay.kaher@broadcom.com>, kernel test robot <oliver.sang@intel.com>
+Message-ID: <20240206120954.038732037@rostedt.homelinux.com>
+
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+
+commit d81786f53aec14fd4d56263145a0635afbc64617 upstream.
+
+eventfs uses the tracefs_inode and assumes that it's already initialized
+to zero. That is, it doesn't set fields to zero (like ti->private) after
+getting its tracefs_inode. This causes bugs due to stale values.
+
+Just initialize the entire structure to zero on allocation so there isn't
+any more surprises.
+
+This is a partial fix to access to ti->private. The assignment still needs
+to be made before the dentry is instantiated.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.315825944@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Al Viro <viro@ZenIV.linux.org.uk>
+Cc: Ajay Kaher <ajay.kaher@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c    |    6 ++++--
+ fs/tracefs/internal.h |    3 ++-
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode
+       if (!ti)
+               return NULL;
+-      ti->flags = 0;
+-
+       return &ti->vfs_inode;
+ }
+@@ -779,7 +777,11 @@ static void init_once(void *foo)
+ {
+       struct tracefs_inode *ti = (struct tracefs_inode *) foo;
++      /* inode_init_once() calls memset() on the vfs_inode portion */
+       inode_init_once(&ti->vfs_inode);
++
++      /* Zero out the rest */
++      memset_after(ti, 0, vfs_inode);
+ }
+ static int __init tracefs_init(void)
+--- a/fs/tracefs/internal.h
++++ b/fs/tracefs/internal.h
+@@ -11,9 +11,10 @@ enum {
+ };
+ struct tracefs_inode {
++      struct inode            vfs_inode;
++      /* The below gets initialized with memset_after(ti, 0, vfs_inode) */
+       unsigned long           flags;
+       void                    *private;
+-      struct inode            vfs_inode;
+ };
+ /*