From b09bf82643788016d1bdf5b750839478660728d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 19 Feb 2024 19:04:05 +0100 Subject: [PATCH] 6.7-stable patches added patches: eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch eventfs-get-rid-of-dentry-pointers-without-refcounts.patch eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch eventfs-initialize-the-tracefs-inode-properly.patch eventfs-keep-all-directory-links-at-1.patch eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch eventfs-remove-fsnotify-functions-from-lookup.patch eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch eventfs-remove-unused-d_parent-pointer-field.patch eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch eventfs-stop-using-dcache_readdir-for-getdents.patch eventfs-use-kcalloc-instead-of-kzalloc.patch eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch tracefs-dentry-lookup-crapectomy.patch tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch --- ...ntry-ops-and-add-revalidate-function.patch | 125 ++++ ...or-all-iterations-in-eventfs_iterate.patch | 205 +++++++ ...entries-nor-inodes-in-iterate_shared.patch | 144 +++++ ...of-dentry-pointers-without-refcounts.patch | 532 +++++++++++++++++ ...op-immediately-if-ei-is_freed-is-set.patch | 53 ++ ...iles-and-directories-all-be-the-same.patch | 69 +++ ...nitialize-the-tracefs-inode-properly.patch | 66 +++ ...ventfs-keep-all-directory-links-at-1.patch | 78 +++ ...efore-ei-children-in-eventfs_iterate.patch | 120 ++++ ...emove-fsnotify-functions-from-lookup.patch | 53 ++ ...arameter-from-create_dir-file_dentry.patch | 204 +++++++ ...remove-unused-d_parent-pointer-field.patch | 66 +++ ...inode-structure-to-be-more-condensed.patch | 86 +++ ...nodes-in-the-eventfs_inode-structure.patch | 119 ++++ ...ate-by-skipping-entries-already-read.patch | 93 +++ ...op-using-dcache_readdir-for-getdents.patch | 296 ++++++++++ ...entfs-use-kcalloc-instead-of-kzalloc.patch | 68 +++ ...-is-freed-without-is_freed-being-set.patch | 87 +++ queue-6.7/series | 21 + ...-the-ei-dentry-pointer-unnecessarily.patch | 122 ++++ .../tracefs-dentry-lookup-crapectomy.patch | 542 ++++++++++++++++++ ...the-tracefs_inode-when-allocating-it.patch | 81 +++ 22 files changed, 3230 insertions(+) create mode 100644 queue-6.7/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch create mode 100644 queue-6.7/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch create mode 100644 queue-6.7/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch create mode 100644 queue-6.7/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch create mode 100644 queue-6.7/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch create mode 100644 queue-6.7/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch create mode 100644 queue-6.7/eventfs-initialize-the-tracefs-inode-properly.patch create mode 100644 queue-6.7/eventfs-keep-all-directory-links-at-1.patch create mode 100644 queue-6.7/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch create mode 100644 queue-6.7/eventfs-remove-fsnotify-functions-from-lookup.patch create mode 100644 queue-6.7/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch create mode 100644 queue-6.7/eventfs-remove-unused-d_parent-pointer-field.patch create mode 100644 queue-6.7/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch create mode 100644 queue-6.7/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch create mode 100644 queue-6.7/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch create mode 100644 queue-6.7/eventfs-stop-using-dcache_readdir-for-getdents.patch create mode 100644 queue-6.7/eventfs-use-kcalloc-instead-of-kzalloc.patch create mode 100644 queue-6.7/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch create mode 100644 queue-6.7/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch create mode 100644 queue-6.7/tracefs-dentry-lookup-crapectomy.patch create mode 100644 queue-6.7/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch diff --git a/queue-6.7/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch b/queue-6.7/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch new file mode 100644 index 00000000000..2c57ebff667 --- /dev/null +++ b/queue-6.7/eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch @@ -0,0 +1,125 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:33 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:16 -0500 +Subject: eventfs: Clean up dentry ops and add revalidate function +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113401.010494265@rostedt.homelinux.com> + +From: Linus Torvalds + +commit 8dce06e98c70a7fcbb4bca7d90faf40522e65c58 upstream. + +In order for the dentries to stay up-to-date with the eventfs changes, +just add a 'd_revalidate' function that checks the 'is_freed' bit. + +Also, clean up the dentry release to actually use d_release() rather +than the slightly odd d_iput() function. We don't care about the inode, +all we want to do is to get rid of the refcount to the eventfs data +added by dentry->d_fsdata. + +It would probably be cleaner to make eventfs its own filesystem, or at +least set its own dentry ops when looking up eventfs files. But as it +is, only eventfs dentries use d_fsdata, so we don't really need to split +these things up by use. + +Another thing that might be worth doing is to make all eventfs lookups +mark their dentries as not worth caching. We could do that with +d_delete(), but the DCACHE_DONTCACHE flag would likely be even better. + +As it is, the dentries are all freeable, but they only tend to get freed +at memory pressure rather than more proactively. But that's a separate +issue. + +Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.124644253@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 5 ++--- + fs/tracefs/inode.c | 27 ++++++++++++++++++--------- + fs/tracefs/internal.h | 3 ++- + 3 files changed, 22 insertions(+), 13 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -378,13 +378,12 @@ static void free_ei(struct eventfs_inode + } + + /** +- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode +- * @ti: the tracefs_inode of the dentry ++ * eventfs_d_release - dentry is going away + * @dentry: dentry which has the reference to remove. + * + * Remove the association between a dentry from an eventfs_inode. + */ +-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) ++void eventfs_d_release(struct dentry *dentry) + { + struct eventfs_inode *ei; + int i; +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -377,21 +377,30 @@ static const struct super_operations tra + .show_options = tracefs_show_options, + }; + +-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) ++/* ++ * It would be cleaner if eventfs had its own dentry ops. ++ * ++ * Note that d_revalidate is called potentially under RCU, ++ * so it can't take the eventfs mutex etc. It's fine - if ++ * we open a file just as it's marked dead, things will ++ * still work just fine, and just see the old stale case. ++ */ ++static void tracefs_d_release(struct dentry *dentry) + { +- struct tracefs_inode *ti; ++ if (dentry->d_fsdata) ++ eventfs_d_release(dentry); ++} + +- if (!dentry || !inode) +- return; ++static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) ++{ ++ struct eventfs_inode *ei = dentry->d_fsdata; + +- ti = get_tracefs(inode); +- if (ti && ti->flags & TRACEFS_EVENT_INODE) +- eventfs_set_ei_status_free(ti, dentry); +- iput(inode); ++ return !(ei && ei->is_freed); + } + + static const struct dentry_operations tracefs_dentry_operations = { +- .d_iput = tracefs_dentry_iput, ++ .d_revalidate = tracefs_d_revalidate, ++ .d_release = tracefs_d_release, + }; + + static int trace_fill_super(struct super_block *sb, void *data, int silent) +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -78,6 +78,7 @@ struct dentry *tracefs_start_creating(co + struct dentry *tracefs_end_creating(struct dentry *dentry); + struct dentry *tracefs_failed_creating(struct dentry *dentry); + struct inode *tracefs_get_inode(struct super_block *sb); +-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); ++ ++void eventfs_d_release(struct dentry *dentry); + + #endif /* _TRACEFS_INTERNAL_H */ diff --git a/queue-6.7/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch b/queue-6.7/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch new file mode 100644 index 00000000000..1a0e8edd4c6 --- /dev/null +++ b/queue-6.7/eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch @@ -0,0 +1,205 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:03 -0500 +Subject: eventfs: Do ctx->pos update for all iterations in eventfs_iterate() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Andrew Morton , Al Viro , Christian Brauner +Message-ID: <20240206113358.897028018@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 1e4624eb5a0ecaae0d2c4e3019bece119725bb98 upstream. + +The ctx->pos was only updated when it added an entry, but the "skip to +current pos" check (c--) happened for every loop regardless of if the +entry was added or not. This inconsistency caused readdir to be incorrect. + +It was due to: + + for (i = 0; i < ei->nr_entries; i++) { + + if (c > 0) { + c--; + continue; + } + + mutex_lock(&eventfs_mutex); + /* If ei->is_freed then just bail here, nothing more to do */ + if (ei->is_freed) { + mutex_unlock(&eventfs_mutex); + goto out; + } + r = entry->callback(name, &mode, &cdata, &fops); + mutex_unlock(&eventfs_mutex); + + [..] + ctx->pos++; + } + +But this can cause the iterator to return a file that was already read. +That's because of the way the callback() works. Some events may not have +all files, and the callback can return 0 to tell eventfs to skip the file +for this directory. + +for instance, we have: + + # ls /sys/kernel/tracing/events/ftrace/function +format hist hist_debug id inject + +and + + # ls /sys/kernel/tracing/events/sched/sched_switch/ +enable filter format hist hist_debug id inject trigger + +Where the function directory is missing "enable", "filter" and +"trigger". That's because the callback() for events has: + +static int event_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + struct trace_event_file *file = *data; + struct trace_event_call *call = file->event_call; + +[..] + + /* + * Only event directories that can be enabled should have + * triggers or filters, with the exception of the "print" + * event that can have a "trigger" file. + */ + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { + if (call->class->reg && strcmp(name, "enable") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_enable_fops; + return 1; + } + + if (strcmp(name, "filter") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_event_filter_fops; + return 1; + } + } + + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + strcmp(trace_event_name(call), "print") == 0) { + if (strcmp(name, "trigger") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &event_trigger_fops; + return 1; + } + } +[..] + return 0; +} + +Where the function event has the TRACE_EVENT_FL_IGNORE_ENABLE set. + +This means that the entries array elements for "enable", "filter" and +"trigger" when called on the function event will have the callback return +0 and not 1, to tell eventfs to skip these files for it. + +Because the "skip to current ctx->pos" check happened for all entries, but +the ctx->pos++ only happened to entries that exist, it would confuse the +reading of a directory. Which would cause: + + # ls /sys/kernel/tracing/events/ftrace/function/ +format hist hist hist_debug hist_debug id inject inject + +The missing "enable", "filter" and "trigger" caused ls to show "hist", +"hist_debug" and "inject" twice. + +Update the ctx->pos for every iteration to keep its update and the "skip" +update consistent. This also means that on error, the ctx->pos needs to be +decremented if it was incremented without adding something. + +Link: https://lore.kernel.org/all/20240104150500.38b15a62@gandalf.local.home/ +Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.172295263@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Linus Torvalds +Cc: Al Viro +Cc: Christian Brauner +Cc: Greg Kroah-Hartman +Fixes: 493ec81a8fb8e ("eventfs: Stop using dcache_readdir() for getdents()") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -722,6 +722,8 @@ static int eventfs_iterate(struct file * + continue; + } + ++ ctx->pos++; ++ + if (ei_child->is_freed) + continue; + +@@ -729,13 +731,12 @@ static int eventfs_iterate(struct file * + + dentry = create_dir_dentry(ei, ei_child, ei_dentry); + if (!dentry) +- goto out; ++ goto out_dec; + ino = dentry->d_inode->i_ino; + dput(dentry); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) +- goto out; +- ctx->pos++; ++ goto out_dec; + } + + for (i = 0; i < ei->nr_entries; i++) { +@@ -746,6 +747,8 @@ static int eventfs_iterate(struct file * + continue; + } + ++ ctx->pos++; ++ + entry = &ei->entries[i]; + name = entry->name; + +@@ -753,7 +756,7 @@ static int eventfs_iterate(struct file * + /* If ei->is_freed then just bail here, nothing more to do */ + if (ei->is_freed) { + mutex_unlock(&eventfs_mutex); +- goto out; ++ goto out_dec; + } + r = entry->callback(name, &mode, &cdata, &fops); + mutex_unlock(&eventfs_mutex); +@@ -762,19 +765,23 @@ static int eventfs_iterate(struct file * + + dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); + if (!dentry) +- goto out; ++ goto out_dec; + ino = dentry->d_inode->i_ino; + dput(dentry); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) +- goto out; +- ctx->pos++; ++ goto out_dec; + } + ret = 1; + out: + srcu_read_unlock(&eventfs_srcu, idx); + + return ret; ++ ++ out_dec: ++ /* Incremented ctx->pos without adding something, reset it */ ++ ctx->pos--; ++ goto out; + } + + /** diff --git a/queue-6.7/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch b/queue-6.7/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch new file mode 100644 index 00000000000..18c01ef0772 --- /dev/null +++ b/queue-6.7/eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch @@ -0,0 +1,144 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:07 -0500 +Subject: eventfs: Do not create dentries nor inodes in iterate_shared +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher , kernel test robot +Message-ID: <20240206113359.552951427@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 852e46e239ee6db3cd220614cf8bce96e79227c2 upstream. + +The original eventfs code added a wrapper around the dcache_readdir open +callback and created all the dentries and inodes at open, and increment +their ref count. A wrapper was added around the dcache_readdir release +function to decrement all the ref counts of those created inodes and +dentries. But this proved to be buggy[1] for when a kprobe was created +during a dir read, it would create a dentry between the open and the +release, and because the release would decrement all ref counts of all +files and directories, that would include the kprobe directory that was +not there to have its ref count incremented in open. This would cause the +ref count to go to negative and later crash the kernel. + +To solve this, the dentries and inodes that were created and had their ref +count upped in open needed to be saved. That list needed to be passed from +the open to the release, so that the release would only decrement the ref +counts of the entries that were incremented in the open. + +Unfortunately, the dcache_readdir logic was already using the +file->private_data, which is the only field that can be used to pass +information from the open to the release. What was done was the eventfs +created another descriptor that had a void pointer to save the +dcache_readdir pointer, and it wrapped all the callbacks, so that it could +save the list of entries that had their ref counts incremented in the +open, and pass it to the release. The wrapped callbacks would just put +back the dcache_readdir pointer and call the functions it used so it could +still use its data[2]. + +But Linus had an issue with the "hijacking" of the file->private_data +(unfortunately this discussion was on a security list, so no public link). +Which we finally agreed on doing everything within the iterate_shared +callback and leave the dcache_readdir out of it[3]. All the information +needed for the getents() could be created then. + +But this ended up being buggy too[4]. The iterate_shared callback was not +the right place to create the dentries and inodes. Even Christian Brauner +had issues with that[5]. + +An attempt was to go back to creating the inodes and dentries at +the open, create an array to store the information in the +file->private_data, and pass that information to the other callbacks.[6] + +The difference between that and the original method, is that it does not +use dcache_readdir. It also does not up the ref counts of the dentries and +pass them. Instead, it creates an array of a structure that saves the +dentry's name and inode number. That information is used in the +iterate_shared callback, and the array is freed in the dir release. The +dentries and inodes created in the open are not used for the iterate_share +or release callbacks. Just their names and inode numbers. + +Linus did not like that either[7] and just wanted to remove the dentries +being created in iterate_shared and use the hard coded inode numbers. + +[ All this while Linus enjoyed an unexpected vacation during the merge + window due to lack of power. ] + +[1] https://lore.kernel.org/linux-trace-kernel/20230919211804.230edf1e@gandalf.local.home/ +[2] https://lore.kernel.org/linux-trace-kernel/20230922163446.1431d4fa@gandalf.local.home/ +[3] https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org/ +[4] https://lore.kernel.org/all/202401152142.bfc28861-oliver.sang@intel.com/ +[5] https://lore.kernel.org/all/20240111-unzahl-gefegt-433acb8a841d@brauner/ +[6] https://lore.kernel.org/all/20240116114711.7e8637be@gandalf.local.home/ +[7] https://lore.kernel.org/all/20240116170154.5bf0a250@gandalf.local.home/ + +Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.573784051@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Linus Torvalds +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Fixes: 493ec81a8fb8 ("eventfs: Stop using dcache_readdir() for getdents()") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202401152142.bfc28861-oliver.sang@intel.com +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 20 +++++--------------- + 1 file changed, 5 insertions(+), 15 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -689,8 +689,6 @@ static int eventfs_iterate(struct file * + struct eventfs_inode *ei_child; + struct tracefs_inode *ti; + struct eventfs_inode *ei; +- struct dentry *ei_dentry = NULL; +- struct dentry *dentry; + const char *name; + umode_t mode; + int idx; +@@ -711,11 +709,11 @@ static int eventfs_iterate(struct file * + + mutex_lock(&eventfs_mutex); + ei = READ_ONCE(ti->private); +- if (ei && !ei->is_freed) +- ei_dentry = READ_ONCE(ei->dentry); ++ if (ei && ei->is_freed) ++ ei = NULL; + mutex_unlock(&eventfs_mutex); + +- if (!ei || !ei_dentry) ++ if (!ei) + goto out; + + /* +@@ -742,11 +740,7 @@ static int eventfs_iterate(struct file * + if (r <= 0) + continue; + +- dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); +- if (!dentry) +- goto out; +- ino = dentry->d_inode->i_ino; +- dput(dentry); ++ ino = EVENTFS_FILE_INODE_INO; + + if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) + goto out; +@@ -770,11 +764,7 @@ static int eventfs_iterate(struct file * + + name = ei_child->name; + +- dentry = create_dir_dentry(ei, ei_child, ei_dentry); +- if (!dentry) +- goto out_dec; +- ino = dentry->d_inode->i_ino; +- dput(dentry); ++ ino = EVENTFS_DIR_INODE_INO; + + if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) + goto out_dec; diff --git a/queue-6.7/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch b/queue-6.7/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch new file mode 100644 index 00000000000..7aa53b90b8a --- /dev/null +++ b/queue-6.7/eventfs-get-rid-of-dentry-pointers-without-refcounts.patch @@ -0,0 +1,532 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:10 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:17 -0500 +Subject: eventfs: Get rid of dentry pointers without refcounts +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113401.170105803@rostedt.homelinux.com> + +From: Linus Torvalds + +commit 43aa6f97c2d03a52c1ddb86768575fc84344bdbb upstream. + +The eventfs inode had pointers to dentries (and child dentries) without +actually holding a refcount on said pointer. That is fundamentally +broken, and while eventfs tried to then maintain coherence with dentries +going away by hooking into the '.d_iput' callback, that doesn't actually +work since it's not ordered wrt lookups. + +There were two reasonms why eventfs tried to keep a pointer to a dentry: + + - the creation of a 'events' directory would actually have a stable + dentry pointer that it created with tracefs_start_creating(). + + And it needed that dentry when tearing it all down again in + eventfs_remove_events_dir(). + + This use is actually ok, because the special top-level events + directory dentries are actually stable, not just a temporary cache of + the eventfs data structures. + + - the 'eventfs_inode' (aka ei) needs to stay around as long as there + are dentries that refer to it. + + It then used these dentry pointers as a replacement for doing + reference counting: it would try to make sure that there was only + ever one dentry associated with an event_inode, and keep a child + dentry array around to see which dentries might still refer to the + parent ei. + +This gets rid of the invalid dentry pointer use, and renames the one +valid case to a different name to make it clear that it's not just any +random dentry. + +The magic child dentry array that is kind of a "reverse reference list" +is simply replaced by having child dentries take a ref to the ei. As +does the directory dentries. That makes the broken use case go away. + +Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.280463000@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 248 ++++++++++++++--------------------------------- + fs/tracefs/internal.h | 7 - + 2 files changed, 78 insertions(+), 177 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -62,6 +62,35 @@ enum { + + #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) + ++/* ++ * eventfs_inode reference count management. ++ * ++ * NOTE! We count only references from dentries, in the ++ * form 'dentry->d_fsdata'. There are also references from ++ * directory inodes ('ti->private'), but the dentry reference ++ * count is always a superset of the inode reference count. ++ */ ++static void release_ei(struct kref *ref) ++{ ++ struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); ++ kfree(ei->entry_attrs); ++ kfree_const(ei->name); ++ kfree_rcu(ei, rcu); ++} ++ ++static inline void put_ei(struct eventfs_inode *ei) ++{ ++ if (ei) ++ kref_put(&ei->kref, release_ei); ++} ++ ++static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) ++{ ++ if (ei) ++ kref_get(&ei->kref); ++ return ei; ++} ++ + static struct dentry *eventfs_root_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags); +@@ -289,7 +318,8 @@ static void update_inode_attr(struct den + * directory. The inode.i_private pointer will point to @data in the open() + * call. + */ +-static struct dentry *lookup_file(struct dentry *dentry, ++static struct dentry *lookup_file(struct eventfs_inode *parent_ei, ++ struct dentry *dentry, + umode_t mode, + struct eventfs_attr *attr, + void *data, +@@ -302,7 +332,7 @@ static struct dentry *lookup_file(struct + mode |= S_IFREG; + + if (WARN_ON_ONCE(!S_ISREG(mode))) +- return NULL; ++ return ERR_PTR(-EIO); + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) +@@ -321,9 +351,12 @@ static struct dentry *lookup_file(struct + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; + ++ // Files have their parent's ei as their fsdata ++ dentry->d_fsdata = get_ei(parent_ei); ++ + d_add(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); +- return dentry; ++ return NULL; + }; + + /** +@@ -359,22 +392,29 @@ static struct dentry *lookup_dir_entry(s + /* Only directories have ti->private set to an ei, not files */ + ti->private = ei; + +- dentry->d_fsdata = ei; +- ei->dentry = dentry; // Remove me! ++ dentry->d_fsdata = get_ei(ei); + + inc_nlink(inode); + d_add(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); +- return dentry; ++ return NULL; + } + +-static void free_ei(struct eventfs_inode *ei) ++static inline struct eventfs_inode *alloc_ei(const char *name) + { +- kfree_const(ei->name); +- kfree(ei->d_children); +- kfree(ei->entry_attrs); +- kfree(ei); ++ struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); ++ ++ if (!ei) ++ return NULL; ++ ++ ei->name = kstrdup_const(name, GFP_KERNEL); ++ if (!ei->name) { ++ kfree(ei); ++ return NULL; ++ } ++ kref_init(&ei->kref); ++ return ei; + } + + /** +@@ -385,39 +425,13 @@ static void free_ei(struct eventfs_inode + */ + void eventfs_d_release(struct dentry *dentry) + { +- struct eventfs_inode *ei; +- int i; +- +- mutex_lock(&eventfs_mutex); +- +- ei = dentry->d_fsdata; +- if (!ei) +- goto out; +- +- /* This could belong to one of the files of the ei */ +- if (ei->dentry != dentry) { +- for (i = 0; i < ei->nr_entries; i++) { +- if (ei->d_children[i] == dentry) +- break; +- } +- if (WARN_ON_ONCE(i == ei->nr_entries)) +- goto out; +- ei->d_children[i] = NULL; +- } else if (ei->is_freed) { +- free_ei(ei); +- } else { +- ei->dentry = NULL; +- } +- +- dentry->d_fsdata = NULL; +- out: +- mutex_unlock(&eventfs_mutex); ++ put_ei(dentry->d_fsdata); + } + + /** + * lookup_file_dentry - create a dentry for a file of an eventfs_inode + * @ei: the eventfs_inode that the file will be created under +- * @idx: the index into the d_children[] of the @ei ++ * @idx: the index into the entry_attrs[] of the @ei + * @parent: The parent dentry of the created file. + * @name: The name of the file to create + * @mode: The mode of the file. +@@ -434,17 +448,11 @@ lookup_file_dentry(struct dentry *dentry + const struct file_operations *fops) + { + struct eventfs_attr *attr = NULL; +- struct dentry **e_dentry = &ei->d_children[idx]; + + if (ei->entry_attrs) + attr = &ei->entry_attrs[idx]; + +- dentry->d_fsdata = ei; // NOTE: ei of _parent_ +- lookup_file(dentry, mode, attr, data, fops); +- +- *e_dentry = dentry; // Remove me +- +- return dentry; ++ return lookup_file(ei, dentry, mode, attr, data, fops); + } + + /** +@@ -465,6 +473,7 @@ static struct dentry *eventfs_root_looku + struct tracefs_inode *ti; + struct eventfs_inode *ei; + const char *name = dentry->d_name.name; ++ struct dentry *result = NULL; + + ti = get_tracefs(dir); + if (!(ti->flags & TRACEFS_EVENT_INODE)) +@@ -481,7 +490,7 @@ static struct dentry *eventfs_root_looku + continue; + if (ei_child->is_freed) + goto out; +- lookup_dir_entry(dentry, ei, ei_child); ++ result = lookup_dir_entry(dentry, ei, ei_child); + goto out; + } + +@@ -498,12 +507,12 @@ static struct dentry *eventfs_root_looku + if (entry->callback(name, &mode, &data, &fops) <= 0) + goto out; + +- lookup_file_dentry(dentry, ei, i, mode, data, fops); ++ result = lookup_file_dentry(dentry, ei, i, mode, data, fops); + goto out; + } + out: + mutex_unlock(&eventfs_mutex); +- return NULL; ++ return result; + } + + /* +@@ -653,25 +662,10 @@ struct eventfs_inode *eventfs_create_dir + if (!parent) + return ERR_PTR(-EINVAL); + +- ei = kzalloc(sizeof(*ei), GFP_KERNEL); ++ ei = alloc_ei(name); + if (!ei) + return ERR_PTR(-ENOMEM); + +- ei->name = kstrdup_const(name, GFP_KERNEL); +- if (!ei->name) { +- kfree(ei); +- return ERR_PTR(-ENOMEM); +- } +- +- if (size) { +- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); +- if (!ei->d_children) { +- kfree_const(ei->name); +- kfree(ei); +- return ERR_PTR(-ENOMEM); +- } +- } +- + ei->entries = entries; + ei->nr_entries = size; + ei->data = data; +@@ -685,7 +679,7 @@ struct eventfs_inode *eventfs_create_dir + + /* Was the parent freed? */ + if (list_empty(&ei->list)) { +- free_ei(ei); ++ put_ei(ei); + ei = NULL; + } + return ei; +@@ -720,28 +714,20 @@ struct eventfs_inode *eventfs_create_eve + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + +- ei = kzalloc(sizeof(*ei), GFP_KERNEL); ++ ei = alloc_ei(name); + if (!ei) +- goto fail_ei; ++ goto fail; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + goto fail; + +- if (size) { +- ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); +- if (!ei->d_children) +- goto fail; +- } +- +- ei->dentry = dentry; ++ // Note: we have a ref to the dentry from tracefs_start_creating() ++ ei->events_dir = dentry; + ei->entries = entries; + ei->nr_entries = size; + ei->is_events = 1; + ei->data = data; +- ei->name = kstrdup_const(name, GFP_KERNEL); +- if (!ei->name) +- goto fail; + + /* Save the ownership of this directory */ + uid = d_inode(dentry->d_parent)->i_uid; +@@ -772,7 +758,7 @@ struct eventfs_inode *eventfs_create_eve + inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_fop = &eventfs_file_operations; + +- dentry->d_fsdata = ei; ++ dentry->d_fsdata = get_ei(ei); + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); +@@ -784,72 +770,11 @@ struct eventfs_inode *eventfs_create_eve + return ei; + + fail: +- kfree(ei->d_children); +- kfree(ei); +- fail_ei: ++ put_ei(ei); + tracefs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); + } + +-static LLIST_HEAD(free_list); +- +-static void eventfs_workfn(struct work_struct *work) +-{ +- struct eventfs_inode *ei, *tmp; +- struct llist_node *llnode; +- +- llnode = llist_del_all(&free_list); +- llist_for_each_entry_safe(ei, tmp, llnode, llist) { +- /* This dput() matches the dget() from unhook_dentry() */ +- for (int i = 0; i < ei->nr_entries; i++) { +- if (ei->d_children[i]) +- dput(ei->d_children[i]); +- } +- /* This should only get here if it had a dentry */ +- if (!WARN_ON_ONCE(!ei->dentry)) +- dput(ei->dentry); +- } +-} +- +-static DECLARE_WORK(eventfs_work, eventfs_workfn); +- +-static void free_rcu_ei(struct rcu_head *head) +-{ +- struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); +- +- if (ei->dentry) { +- /* Do not free the ei until all references of dentry are gone */ +- if (llist_add(&ei->llist, &free_list)) +- queue_work(system_unbound_wq, &eventfs_work); +- return; +- } +- +- /* If the ei doesn't have a dentry, neither should its children */ +- for (int i = 0; i < ei->nr_entries; i++) { +- WARN_ON_ONCE(ei->d_children[i]); +- } +- +- free_ei(ei); +-} +- +-static void unhook_dentry(struct dentry *dentry) +-{ +- if (!dentry) +- return; +- /* +- * Need to add a reference to the dentry that is expected by +- * simple_recursive_removal(), which will include a dput(). +- */ +- dget(dentry); +- +- /* +- * Also add a reference for the dput() in eventfs_workfn(). +- * That is required as that dput() will free the ei after +- * the SRCU grace period is over. +- */ +- dget(dentry); +-} +- + /** + * eventfs_remove_rec - remove eventfs dir or file from list + * @ei: eventfs_inode to be removed. +@@ -862,8 +787,6 @@ static void eventfs_remove_rec(struct ev + { + struct eventfs_inode *ei_child; + +- if (!ei) +- return; + /* + * Check recursion depth. It should never be greater than 3: + * 0 - events/ +@@ -875,28 +798,12 @@ static void eventfs_remove_rec(struct ev + return; + + /* search for nested folders or files */ +- list_for_each_entry_srcu(ei_child, &ei->children, list, +- lockdep_is_held(&eventfs_mutex)) { +- /* Children only have dentry if parent does */ +- WARN_ON_ONCE(ei_child->dentry && !ei->dentry); ++ list_for_each_entry(ei_child, &ei->children, list) + eventfs_remove_rec(ei_child, level + 1); +- } +- + + ei->is_freed = 1; +- +- for (int i = 0; i < ei->nr_entries; i++) { +- if (ei->d_children[i]) { +- /* Children only have dentry if parent does */ +- WARN_ON_ONCE(!ei->dentry); +- unhook_dentry(ei->d_children[i]); +- } +- } +- +- unhook_dentry(ei->dentry); +- +- list_del_rcu(&ei->list); +- call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); ++ list_del(&ei->list); ++ put_ei(ei); + } + + /** +@@ -907,22 +814,12 @@ static void eventfs_remove_rec(struct ev + */ + void eventfs_remove_dir(struct eventfs_inode *ei) + { +- struct dentry *dentry; +- + if (!ei) + return; + + mutex_lock(&eventfs_mutex); +- dentry = ei->dentry; + eventfs_remove_rec(ei, 0); + mutex_unlock(&eventfs_mutex); +- +- /* +- * If any of the ei children has a dentry, then the ei itself +- * must have a dentry. +- */ +- if (dentry) +- simple_recursive_removal(dentry, NULL); + } + + /** +@@ -935,7 +832,11 @@ void eventfs_remove_events_dir(struct ev + { + struct dentry *dentry; + +- dentry = ei->dentry; ++ dentry = ei->events_dir; ++ if (!dentry) ++ return; ++ ++ ei->events_dir = NULL; + eventfs_remove_dir(ei); + + /* +@@ -945,5 +846,6 @@ void eventfs_remove_events_dir(struct ev + * sticks around while the other ei->dentry are created + * and destroyed dynamically. + */ ++ d_invalidate(dentry); + dput(dentry); + } +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -35,8 +35,7 @@ struct eventfs_attr { + * @entries: the array of entries representing the files in the directory + * @name: the name of the directory to create + * @children: link list into the child eventfs_inode +- * @dentry: the dentry of the directory +- * @d_children: The array of dentries to represent the files when created ++ * @events_dir: the dentry of the events directory + * @entry_attrs: Saved mode and ownership of the @d_children + * @attr: Saved mode and ownership of eventfs_inode itself + * @data: The private data to pass to the callbacks +@@ -45,12 +44,12 @@ struct eventfs_attr { + * @nr_entries: The number of items in @entries + */ + struct eventfs_inode { ++ struct kref kref; + struct list_head list; + const struct eventfs_entry *entries; + const char *name; + struct list_head children; +- struct dentry *dentry; /* Check is_freed to access */ +- struct dentry **d_children; ++ struct dentry *events_dir; + struct eventfs_attr *entry_attrs; + struct eventfs_attr attr; + void *data; diff --git a/queue-6.7/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch b/queue-6.7/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch new file mode 100644 index 00000000000..4c70fd262b1 --- /dev/null +++ b/queue-6.7/eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch @@ -0,0 +1,53 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:31 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:02 -0500 +Subject: eventfs: Have eventfs_iterate() stop immediately if ei->is_freed is set +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Andrew Morton , Al Viro , Christian Brauner +Message-ID: <20240206113358.729003384@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit e109deadb73318cf4a3bd61287d969f705df278f upstream. + +If ei->is_freed is set in eventfs_iterate(), it means that the directory +that is being iterated on is in the process of being freed. Just exit the +loop immediately when that is ever detected, and separate out the return +of the entry->callback() from ei->is_freed. + +Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.016261289@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Linus Torvalds +Cc: Al Viro +Cc: Christian Brauner +Cc: Greg Kroah-Hartman +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -750,11 +750,12 @@ static int eventfs_iterate(struct file * + name = entry->name; + + mutex_lock(&eventfs_mutex); +- /* If ei->is_freed, then the event itself may be too */ +- if (!ei->is_freed) +- r = entry->callback(name, &mode, &cdata, &fops); +- else +- r = -1; ++ /* If ei->is_freed then just bail here, nothing more to do */ ++ if (ei->is_freed) { ++ mutex_unlock(&eventfs_mutex); ++ goto out; ++ } ++ r = entry->callback(name, &mode, &cdata, &fops); + mutex_unlock(&eventfs_mutex); + if (r <= 0) + continue; diff --git a/queue-6.7/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch b/queue-6.7/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch new file mode 100644 index 00000000000..97c74a46fd2 --- /dev/null +++ b/queue-6.7/eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch @@ -0,0 +1,69 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:24 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:06 -0500 +Subject: eventfs: Have the inodes all for files and directories all be the same +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113359.393228331@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 53c41052ba3121761e6f62a813961164532a214f upstream. + +The dentries and inodes are created in the readdir for the sole purpose of +getting a consistent inode number. Linus stated that is unnecessary, and +that all inodes can have the same inode number. For a virtual file system +they are pretty meaningless. + +Instead use a single unique inode number for all files and one for all +directories. + +Link: https://lore.kernel.org/all/20240116133753.2808d45e@gandalf.local.home/ +Link: https://lore.kernel.org/linux-trace-kernel/20240116211353.412180363@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Suggested-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -32,6 +32,10 @@ + */ + static DEFINE_MUTEX(eventfs_mutex); + ++/* Choose something "unique" ;-) */ ++#define EVENTFS_FILE_INODE_INO 0x12c4e37 ++#define EVENTFS_DIR_INODE_INO 0x134b2f5 ++ + /* + * The eventfs_inode (ei) itself is protected by SRCU. It is released from + * its parent's list and will have is_freed set (under eventfs_mutex). +@@ -314,6 +318,9 @@ static struct dentry *create_file(const + inode->i_fop = fop; + inode->i_private = data; + ++ /* All files will have the same inode number */ ++ inode->i_ino = EVENTFS_FILE_INODE_INO; ++ + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; + d_instantiate(dentry, inode); +@@ -350,6 +357,9 @@ static struct dentry *create_dir(struct + inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_fop = &eventfs_file_operations; + ++ /* All directories will have the same inode number */ ++ inode->i_ino = EVENTFS_DIR_INODE_INO; ++ + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; + diff --git a/queue-6.7/eventfs-initialize-the-tracefs-inode-properly.patch b/queue-6.7/eventfs-initialize-the-tracefs-inode-properly.patch new file mode 100644 index 00000000000..855b7a3406b --- /dev/null +++ b/queue-6.7/eventfs-initialize-the-tracefs-inode-properly.patch @@ -0,0 +1,66 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:12 -0500 +Subject: eventfs: Initialize the tracefs inode properly +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher , kernel test robot +Message-ID: <20240206113400.362338010@rostedt.homelinux.com> + +From: Linus Torvalds + +commit 4fa4b010b83fb2f837b5ef79e38072a79e96e4f1 upstream. + +The tracefs-specific fields in the inode were not initialized before the +inode was exposed to others through the dentry with 'd_instantiate()'. + +Move the field initializations up to before the d_instantiate. + +Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.478449628@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -370,6 +370,8 @@ static struct dentry *create_dir(struct + + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; ++ /* Only directories have ti->private set to an ei, not files */ ++ ti->private = ei; + + inc_nlink(inode); + d_instantiate(dentry, inode); +@@ -515,7 +517,6 @@ create_file_dentry(struct eventfs_inode + static void eventfs_post_create_dir(struct eventfs_inode *ei) + { + struct eventfs_inode *ei_child; +- struct tracefs_inode *ti; + + lockdep_assert_held(&eventfs_mutex); + +@@ -525,9 +526,6 @@ static void eventfs_post_create_dir(stru + srcu_read_lock_held(&eventfs_srcu)) { + ei_child->d_parent = ei->dentry; + } +- +- ti = get_tracefs(ei->dentry->d_inode); +- ti->private = ei; + } + + /** diff --git a/queue-6.7/eventfs-keep-all-directory-links-at-1.patch b/queue-6.7/eventfs-keep-all-directory-links-at-1.patch new file mode 100644 index 00000000000..6257cfa7f12 --- /dev/null +++ b/queue-6.7/eventfs-keep-all-directory-links-at-1.patch @@ -0,0 +1,78 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:18 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:21 -0500 +Subject: eventfs: Keep all directory links at 1 +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher , Al Viro +Message-ID: <20240206113401.818811467@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit ca185770db914869ff9fe773bac5e0e5e4165b83 upstream. + +The directory link count in eventfs was somewhat bogus. It was only being +updated when a directory child was being looked up and not on creation. + +One solution would be to update in get_attr() the link count by iterating +the ei->children list and then adding 2. But that could slow down simple +stat() calls, especially if it's done on all directories in eventfs. + +Another solution would be to add a parent pointer to the eventfs_inode +and keep track of the number of sub directories it has on creation. But +this adds overhead for something not really worthwhile. + +The solution decided upon is to keep all directory links in eventfs as 1. +This tells user space not to rely on the hard links of directories. Which +in this case it shouldn't. + +Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ +Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.339968298@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Suggested-by: Al Viro +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -404,9 +404,7 @@ static struct dentry *lookup_dir_entry(s + + dentry->d_fsdata = get_ei(ei); + +- inc_nlink(inode); + d_add(dentry, inode); +- inc_nlink(dentry->d_parent->d_inode); + return NULL; + } + +@@ -769,9 +767,17 @@ struct eventfs_inode *eventfs_create_eve + + dentry->d_fsdata = get_ei(ei); + +- /* directory inodes start off with i_nlink == 2 (for "." entry) */ +- inc_nlink(inode); ++ /* ++ * Keep all eventfs directories with i_nlink == 1. ++ * Due to the dynamic nature of the dentry creations and not ++ * wanting to add a pointer to the parent eventfs_inode in the ++ * eventfs_inode structure, keeping the i_nlink in sync with the ++ * number of directories would cause too much complexity for ++ * something not worth much. Keeping directory links at 1 ++ * tells userspace not to trust the link number. ++ */ + d_instantiate(dentry, inode); ++ /* The dentry of the "events" parent does keep track though */ + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + tracefs_end_creating(dentry); diff --git a/queue-6.7/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch b/queue-6.7/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch new file mode 100644 index 00000000000..fa83f1fea0c --- /dev/null +++ b/queue-6.7/eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch @@ -0,0 +1,120 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:31 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:04 -0500 +Subject: eventfs: Read ei->entries before ei->children in eventfs_iterate() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Andrew Morton , Al Viro , Christian Brauner +Message-ID: <20240206113359.062398719@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 704f960dbee2f1634f4b4e16f208cb16eaf41c1e upstream. + +In order to apply a shortcut to skip over the current ctx->pos +immediately, by using the ei->entries array, the reading of that array +should be first. Moving the array reading before the linked list reading +will make the shortcut change diff nicer to read. + +Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.333115095@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Linus Torvalds +Cc: Al Viro +Cc: Christian Brauner +Cc: Greg Kroah-Hartman +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 46 +++++++++++++++++++++++----------------------- + 1 file changed, 23 insertions(+), 23 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -714,8 +714,8 @@ static int eventfs_iterate(struct file * + * Need to create the dentries and inodes to have a consistent + * inode number. + */ +- list_for_each_entry_srcu(ei_child, &ei->children, list, +- srcu_read_lock_held(&eventfs_srcu)) { ++ for (i = 0; i < ei->nr_entries; i++) { ++ void *cdata = ei->data; + + if (c > 0) { + c--; +@@ -724,23 +724,32 @@ static int eventfs_iterate(struct file * + + ctx->pos++; + +- if (ei_child->is_freed) +- continue; ++ entry = &ei->entries[i]; ++ name = entry->name; + +- name = ei_child->name; ++ mutex_lock(&eventfs_mutex); ++ /* If ei->is_freed then just bail here, nothing more to do */ ++ if (ei->is_freed) { ++ mutex_unlock(&eventfs_mutex); ++ goto out_dec; ++ } ++ r = entry->callback(name, &mode, &cdata, &fops); ++ mutex_unlock(&eventfs_mutex); ++ if (r <= 0) ++ continue; + +- dentry = create_dir_dentry(ei, ei_child, ei_dentry); ++ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); + if (!dentry) + goto out_dec; + ino = dentry->d_inode->i_ino; + dput(dentry); + +- if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) ++ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) + goto out_dec; + } + +- for (i = 0; i < ei->nr_entries; i++) { +- void *cdata = ei->data; ++ list_for_each_entry_srcu(ei_child, &ei->children, list, ++ srcu_read_lock_held(&eventfs_srcu)) { + + if (c > 0) { + c--; +@@ -749,27 +758,18 @@ static int eventfs_iterate(struct file * + + ctx->pos++; + +- entry = &ei->entries[i]; +- name = entry->name; +- +- mutex_lock(&eventfs_mutex); +- /* If ei->is_freed then just bail here, nothing more to do */ +- if (ei->is_freed) { +- mutex_unlock(&eventfs_mutex); +- goto out_dec; +- } +- r = entry->callback(name, &mode, &cdata, &fops); +- mutex_unlock(&eventfs_mutex); +- if (r <= 0) ++ if (ei_child->is_freed) + continue; + +- dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); ++ name = ei_child->name; ++ ++ dentry = create_dir_dentry(ei, ei_child, ei_dentry); + if (!dentry) + goto out_dec; + ino = dentry->d_inode->i_ino; + dput(dentry); + +- if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) ++ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) + goto out_dec; + } + ret = 1; diff --git a/queue-6.7/eventfs-remove-fsnotify-functions-from-lookup.patch b/queue-6.7/eventfs-remove-fsnotify-functions-from-lookup.patch new file mode 100644 index 00000000000..b75a249eeed --- /dev/null +++ b/queue-6.7/eventfs-remove-fsnotify-functions-from-lookup.patch @@ -0,0 +1,53 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:08 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:20 -0500 +Subject: eventfs: Remove fsnotify*() functions from lookup() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher , Al Viro +Message-ID: <20240206113401.654906760@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 12d823b31fadf47c8f36ecada7abac5f903cac33 upstream. + +The dentries and inodes are created when referenced in the lookup code. +There's no reason to call fsnotify_*() functions when they are created by +a reference. It doesn't make any sense. + +Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ +Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.166973329@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Fixes: a376007917776 ("eventfs: Implement functions to create files and dirs when accessed"); +Suggested-by: Al Viro +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -366,7 +366,6 @@ static struct dentry *lookup_file(struct + dentry->d_fsdata = get_ei(parent_ei); + + d_add(dentry, inode); +- fsnotify_create(dentry->d_parent->d_inode, dentry); + return NULL; + }; + +@@ -408,7 +407,6 @@ static struct dentry *lookup_dir_entry(s + inc_nlink(inode); + d_add(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); +- fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + return NULL; + } + diff --git a/queue-6.7/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch b/queue-6.7/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch new file mode 100644 index 00000000000..8f90c05bfe1 --- /dev/null +++ b/queue-6.7/eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch @@ -0,0 +1,204 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:31:59 -0500 +Subject: eventfs: Remove "lookup" parameter from create_dir/file_dentry() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Ajay Kaher , Al Viro , Christian Brauner +Message-ID: <20240206113358.234112125@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit b0f7e2d739b4aac131ea1662d086a07775097b05 upstream. + +The "lookup" parameter is a way to differentiate the call to +create_file/dir_dentry() from when it's just a lookup (no need to up the +dentry refcount) and accessed via a readdir (need to up the refcount). + +But reality, it just makes the code more complex. Just up the refcount and +let the caller decide to dput() the result or not. + +Link: https://lore.kernel.org/linux-trace-kernel/20240103102553.17a19cea@gandalf.local.home +Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.517502710@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Linus Torvalds +Cc: Ajay Kaher +Cc: Al Viro +Cc: Christian Brauner +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 55 +++++++++++++++++------------------------------ + 1 file changed, 20 insertions(+), 35 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -419,16 +419,14 @@ void eventfs_set_ei_status_free(struct t + * @mode: The mode of the file. + * @data: The data to use to set the inode of the file with on open() + * @fops: The fops of the file to be created. +- * @lookup: If called by the lookup routine, in which case, dput() the created dentry. + * + * Create a dentry for a file of an eventfs_inode @ei and place it into the +- * address located at @e_dentry. If the @e_dentry already has a dentry, then +- * just do a dget() on it and return. Otherwise create the dentry and attach it. ++ * address located at @e_dentry. + */ + static struct dentry * + create_file_dentry(struct eventfs_inode *ei, int idx, + struct dentry *parent, const char *name, umode_t mode, void *data, +- const struct file_operations *fops, bool lookup) ++ const struct file_operations *fops) + { + struct eventfs_attr *attr = NULL; + struct dentry **e_dentry = &ei->d_children[idx]; +@@ -443,9 +441,7 @@ create_file_dentry(struct eventfs_inode + } + /* If the e_dentry already has a dentry, use it */ + if (*e_dentry) { +- /* lookup does not need to up the ref count */ +- if (!lookup) +- dget(*e_dentry); ++ dget(*e_dentry); + mutex_unlock(&eventfs_mutex); + return *e_dentry; + } +@@ -470,13 +466,12 @@ create_file_dentry(struct eventfs_inode + * way to being freed, don't return it. If e_dentry is NULL + * it means it was already freed. + */ +- if (ei->is_freed) ++ if (ei->is_freed) { + dentry = NULL; +- else ++ } else { + dentry = *e_dentry; +- /* The lookup does not need to up the dentry refcount */ +- if (dentry && !lookup) + dget(dentry); ++ } + mutex_unlock(&eventfs_mutex); + return dentry; + } +@@ -494,9 +489,6 @@ create_file_dentry(struct eventfs_inode + } + mutex_unlock(&eventfs_mutex); + +- if (lookup) +- dput(dentry); +- + return dentry; + } + +@@ -529,13 +521,12 @@ static void eventfs_post_create_dir(stru + * @pei: The eventfs_inode parent of ei. + * @ei: The eventfs_inode to create the directory for + * @parent: The dentry of the parent of this directory +- * @lookup: True if this is called by the lookup code + * + * This creates and attaches a directory dentry to the eventfs_inode @ei. + */ + static struct dentry * + create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, +- struct dentry *parent, bool lookup) ++ struct dentry *parent) + { + struct dentry *dentry = NULL; + +@@ -547,11 +538,9 @@ create_dir_dentry(struct eventfs_inode * + return NULL; + } + if (ei->dentry) { +- /* If the dentry already has a dentry, use it */ ++ /* If the eventfs_inode already has a dentry, use it */ + dentry = ei->dentry; +- /* lookup does not need to up the ref count */ +- if (!lookup) +- dget(dentry); ++ dget(dentry); + mutex_unlock(&eventfs_mutex); + return dentry; + } +@@ -571,7 +560,7 @@ create_dir_dentry(struct eventfs_inode * + * way to being freed. + */ + dentry = ei->dentry; +- if (dentry && !lookup) ++ if (dentry) + dget(dentry); + mutex_unlock(&eventfs_mutex); + return dentry; +@@ -591,9 +580,6 @@ create_dir_dentry(struct eventfs_inode * + } + mutex_unlock(&eventfs_mutex); + +- if (lookup) +- dput(dentry); +- + return dentry; + } + +@@ -618,8 +604,8 @@ static struct dentry *eventfs_root_looku + struct eventfs_inode *ei; + struct dentry *ei_dentry = NULL; + struct dentry *ret = NULL; ++ struct dentry *d; + const char *name = dentry->d_name.name; +- bool created = false; + umode_t mode; + void *data; + int idx; +@@ -655,13 +641,10 @@ static struct dentry *eventfs_root_looku + ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; +- create_dir_dentry(ei, ei_child, ei_dentry, true); +- created = true; +- break; +- } +- +- if (created) ++ d = create_dir_dentry(ei, ei_child, ei_dentry); ++ dput(d); + goto out; ++ } + + for (i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; +@@ -679,8 +662,8 @@ static struct dentry *eventfs_root_looku + ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; +- create_file_dentry(ei, i, ei_dentry, name, mode, cdata, +- fops, true); ++ d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); ++ dput(d); + break; + } + } +@@ -797,9 +780,10 @@ static int dcache_dir_open_wrapper(struc + inode_lock(parent->d_inode); + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { +- d = create_dir_dentry(ei, ei_child, parent, false); ++ d = create_dir_dentry(ei, ei_child, parent); + if (d) { + ret = add_dentries(&dentries, d, cnt); ++ dput(d); + if (ret < 0) + break; + cnt++; +@@ -819,9 +803,10 @@ static int dcache_dir_open_wrapper(struc + mutex_unlock(&eventfs_mutex); + if (r <= 0) + continue; +- d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); ++ d = create_file_dentry(ei, i, parent, name, mode, cdata, fops); + if (d) { + ret = add_dentries(&dentries, d, cnt); ++ dput(d); + if (ret < 0) + break; + cnt++; diff --git a/queue-6.7/eventfs-remove-unused-d_parent-pointer-field.patch b/queue-6.7/eventfs-remove-unused-d_parent-pointer-field.patch new file mode 100644 index 00000000000..85d68697327 --- /dev/null +++ b/queue-6.7/eventfs-remove-unused-d_parent-pointer-field.patch @@ -0,0 +1,66 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:33 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:15 -0500 +Subject: eventfs: Remove unused d_parent pointer field +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113400.846719926@rostedt.homelinux.com> + +From: Linus Torvalds + +commit 408600be78cdb8c650a97ecc7ff411cb216811b5 upstream. + +It's never used + +Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.961772428@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 4 +--- + fs/tracefs/internal.h | 2 -- + 2 files changed, 1 insertion(+), 5 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -680,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir + INIT_LIST_HEAD(&ei->list); + + mutex_lock(&eventfs_mutex); +- if (!parent->is_freed) { ++ if (!parent->is_freed) + list_add_tail(&ei->list, &parent->children); +- ei->d_parent = parent->dentry; +- } + mutex_unlock(&eventfs_mutex); + + /* Was the parent freed? */ +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -36,7 +36,6 @@ struct eventfs_attr { + * @name: the name of the directory to create + * @children: link list into the child eventfs_inode + * @dentry: the dentry of the directory +- * @d_parent: pointer to the parent's dentry + * @d_children: The array of dentries to represent the files when created + * @entry_attrs: Saved mode and ownership of the @d_children + * @attr: Saved mode and ownership of eventfs_inode itself +@@ -51,7 +50,6 @@ struct eventfs_inode { + const char *name; + struct list_head children; + struct dentry *dentry; /* Check is_freed to access */ +- struct dentry *d_parent; + struct dentry **d_children; + struct eventfs_attr *entry_attrs; + struct eventfs_attr attr; diff --git a/queue-6.7/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch b/queue-6.7/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch new file mode 100644 index 00000000000..02987e8aead --- /dev/null +++ b/queue-6.7/eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch @@ -0,0 +1,86 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:39 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:19 -0500 +Subject: eventfs: Restructure eventfs_inode structure to be more condensed +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113401.490005209@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 264424dfdd5cbd92bc5b5ddf93944929fc877fac upstream. + +Some of the eventfs_inode structure has holes in it. Rework the structure +to be a bit more condensed, and also remove the no longer used llist +field. + +Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.002321438@goodmis.org + +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/internal.h | 27 ++++++++++++--------------- + 1 file changed, 12 insertions(+), 15 deletions(-) + +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -32,40 +32,37 @@ struct eventfs_attr { + /* + * struct eventfs_inode - hold the properties of the eventfs directories. + * @list: link list into the parent directory ++ * @rcu: Union with @list for freeing ++ * @children: link list into the child eventfs_inode + * @entries: the array of entries representing the files in the directory + * @name: the name of the directory to create +- * @children: link list into the child eventfs_inode + * @events_dir: the dentry of the events directory + * @entry_attrs: Saved mode and ownership of the @d_children +- * @attr: Saved mode and ownership of eventfs_inode itself + * @data: The private data to pass to the callbacks ++ * @attr: Saved mode and ownership of eventfs_inode itself + * @is_freed: Flag set if the eventfs is on its way to be freed + * Note if is_freed is set, then dentry is corrupted. ++ * @is_events: Flag set for only the top level "events" directory + * @nr_entries: The number of items in @entries ++ * @ino: The saved inode number + */ + struct eventfs_inode { +- struct kref kref; +- struct list_head list; ++ union { ++ struct list_head list; ++ struct rcu_head rcu; ++ }; ++ struct list_head children; + const struct eventfs_entry *entries; + const char *name; +- struct list_head children; + struct dentry *events_dir; + struct eventfs_attr *entry_attrs; +- struct eventfs_attr attr; + void *data; ++ struct eventfs_attr attr; ++ struct kref kref; + unsigned int is_freed:1; + unsigned int is_events:1; + unsigned int nr_entries:30; + unsigned int ino; +- /* +- * Union - used for deletion +- * @llist: for calling dput() if needed after RCU +- * @rcu: eventfs_inode to delete in RCU +- */ +- union { +- struct llist_node llist; +- struct rcu_head rcu; +- }; + }; + + static inline struct tracefs_inode *get_tracefs(const struct inode *inode) diff --git a/queue-6.7/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch b/queue-6.7/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch new file mode 100644 index 00000000000..ea676d4d08b --- /dev/null +++ b/queue-6.7/eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch @@ -0,0 +1,119 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:09 -0500 +Subject: eventfs: Save directory inodes in the eventfs_inode structure +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Geert Uytterhoeven , Geert Uytterhoeven , Kees Cook +Message-ID: <20240206113359.872124971@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 834bf76add3e6168038150f162cbccf1fd492a67 upstream. + +The eventfs inodes and directories are allocated when referenced. But this +leaves the issue of keeping consistent inode numbers and the number is +only saved in the inode structure itself. When the inode is no longer +referenced, it can be freed. When the file that the inode was representing +is referenced again, the inode is once again created, but the inode number +needs to be the same as it was before. + +Just making the inode numbers the same for all files is fine, but that +does not work with directories. The find command will check for loops via +the inode number and having the same inode number for directories triggers: + + # find /sys/kernel/tracing +find: File system loop detected; +'/sys/kernel/debug/tracing/events/initcall/initcall_finish' is part of the same file system loop as +'/sys/kernel/debug/tracing/events/initcall'. +[..] + +Linus pointed out that the eventfs_inode structure ends with a single +32bit int, and on 64 bit machines, there's likely a 4 byte hole due to +alignment. We can use this hole to store the inode number for the +eventfs_inode. All directories in eventfs are represented by an +eventfs_inode and that data structure can hold its inode number. + +That last int was also purposely placed at the end of the structure to +prevent holes from within. Now that there's a 4 byte number to hold the +inode, both the inode number and the last integer can be moved up in the +structure for better cache locality, where the llist and rcu fields can be +moved to the end as they are only used when the eventfs_inode is being +deleted. + +Link: https://lore.kernel.org/all/CAMuHMdXKiorg-jiuKoZpfZyDJ3Ynrfb8=X+c7x0Eewxn-YRdCA@mail.gmail.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240122152748.46897388@gandalf.local.home + +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Linus Torvalds +Reported-by: Geert Uytterhoeven +Tested-by: Geert Uytterhoeven +Fixes: 53c41052ba31 ("eventfs: Have the inodes all for files and directories all be the same") +Signed-off-by: Steven Rostedt (Google) +Reviewed-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 14 +++++++++++--- + fs/tracefs/internal.h | 7 ++++--- + 2 files changed, 15 insertions(+), 6 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex); + + /* Choose something "unique" ;-) */ + #define EVENTFS_FILE_INODE_INO 0x12c4e37 +-#define EVENTFS_DIR_INODE_INO 0x134b2f5 ++ ++/* Just try to make something consistent and unique */ ++static int eventfs_dir_ino(struct eventfs_inode *ei) ++{ ++ if (!ei->ino) ++ ei->ino = get_next_ino(); ++ ++ return ei->ino; ++} + + /* + * The eventfs_inode (ei) itself is protected by SRCU. It is released from +@@ -358,7 +366,7 @@ static struct dentry *create_dir(struct + inode->i_fop = &eventfs_file_operations; + + /* All directories will have the same inode number */ +- inode->i_ino = EVENTFS_DIR_INODE_INO; ++ inode->i_ino = eventfs_dir_ino(ei); + + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; +@@ -764,7 +772,7 @@ static int eventfs_iterate(struct file * + + name = ei_child->name; + +- ino = EVENTFS_DIR_INODE_INO; ++ ino = eventfs_dir_ino(ei_child); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) + goto out_dec; +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -55,6 +55,10 @@ struct eventfs_inode { + struct eventfs_attr *entry_attrs; + struct eventfs_attr attr; + void *data; ++ unsigned int is_freed:1; ++ unsigned int is_events:1; ++ unsigned int nr_entries:30; ++ unsigned int ino; + /* + * Union - used for deletion + * @llist: for calling dput() if needed after RCU +@@ -64,9 +68,6 @@ struct eventfs_inode { + struct llist_node llist; + struct rcu_head rcu; + }; +- unsigned int is_freed:1; +- unsigned int is_events:1; +- unsigned int nr_entries:30; + }; + + static inline struct tracefs_inode *get_tracefs(const struct inode *inode) diff --git a/queue-6.7/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch b/queue-6.7/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch new file mode 100644 index 00000000000..17a11a9b470 --- /dev/null +++ b/queue-6.7/eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch @@ -0,0 +1,93 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:05 -0500 +Subject: eventfs: Shortcut eventfs_iterate() by skipping entries already read +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Andrew Morton , Al Viro , Christian Brauner +Message-ID: <20240206113359.229236339@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 1de94b52d5e8d8b32f0252f14fad1f1edc2e71f1 upstream. + +As the ei->entries array is fixed for the duration of the eventfs_inode, +it can be used to skip over already read entries in eventfs_iterate(). + +That is, if ctx->pos is greater than zero, there's no reason in doing the +loop across the ei->entries array for the entries less than ctx->pos. +Instead, start the lookup of the entries at the current ctx->pos. + +Link: https://lore.kernel.org/all/CAHk-=wiKwDUDv3+jCsv-uacDcHDVTYsXtBR9=6sGM5mqX+DhOg@mail.gmail.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.494956957@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Al Viro +Cc: Christian Brauner +Cc: Greg Kroah-Hartman +Suggested-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 23 ++++++++++------------- + 1 file changed, 10 insertions(+), 13 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -708,21 +708,15 @@ static int eventfs_iterate(struct file * + if (!ei || !ei_dentry) + goto out; + +- ret = 0; +- + /* + * Need to create the dentries and inodes to have a consistent + * inode number. + */ +- for (i = 0; i < ei->nr_entries; i++) { +- void *cdata = ei->data; +- +- if (c > 0) { +- c--; +- continue; +- } ++ ret = 0; + +- ctx->pos++; ++ /* Start at 'c' to jump over already read entries */ ++ for (i = c; i < ei->nr_entries; i++, ctx->pos++) { ++ void *cdata = ei->data; + + entry = &ei->entries[i]; + name = entry->name; +@@ -731,7 +725,7 @@ static int eventfs_iterate(struct file * + /* If ei->is_freed then just bail here, nothing more to do */ + if (ei->is_freed) { + mutex_unlock(&eventfs_mutex); +- goto out_dec; ++ goto out; + } + r = entry->callback(name, &mode, &cdata, &fops); + mutex_unlock(&eventfs_mutex); +@@ -740,14 +734,17 @@ static int eventfs_iterate(struct file * + + dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); + if (!dentry) +- goto out_dec; ++ goto out; + ino = dentry->d_inode->i_ino; + dput(dentry); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) +- goto out_dec; ++ goto out; + } + ++ /* Subtract the skipped entries above */ ++ c -= min((unsigned int)c, (unsigned int)ei->nr_entries); ++ + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { + diff --git a/queue-6.7/eventfs-stop-using-dcache_readdir-for-getdents.patch b/queue-6.7/eventfs-stop-using-dcache_readdir-for-getdents.patch new file mode 100644 index 00000000000..aad64c715c7 --- /dev/null +++ b/queue-6.7/eventfs-stop-using-dcache_readdir-for-getdents.patch @@ -0,0 +1,296 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:00 -0500 +Subject: eventfs: Stop using dcache_readdir() for getdents() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Ajay Kaher , Al Viro , Christian Brauner +Message-ID: <20240206113358.399057730@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 493ec81a8fb8e4ada6f223b8b73791a1280d4774 upstream. + +The eventfs creates dynamically allocated dentries and inodes. Using the +dcache_readdir() logic for its own directory lookups requires hiding the +cursor of the dcache logic and playing games to allow the dcache_readdir() +to still have access to the cursor while the eventfs saved what it created +and what it needs to release. + +Instead, just have eventfs have its own iterate_shared callback function +that will fill in the dent entries. This simplifies the code quite a bit. + +Link: https://lore.kernel.org/linux-trace-kernel/20240104015435.682218477@goodmis.org + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Linus Torvalds +Cc: Ajay Kaher +Cc: Al Viro +Cc: Christian Brauner +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 194 +++++++++++++++-------------------------------- + 1 file changed, 64 insertions(+), 130 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -53,9 +53,7 @@ enum { + static struct dentry *eventfs_root_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags); +-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); +-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); +-static int eventfs_release(struct inode *inode, struct file *file); ++static int eventfs_iterate(struct file *file, struct dir_context *ctx); + + static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) + { +@@ -213,11 +211,9 @@ static const struct inode_operations eve + }; + + static const struct file_operations eventfs_file_operations = { +- .open = dcache_dir_open_wrapper, + .read = generic_read_dir, +- .iterate_shared = dcache_readdir_wrapper, ++ .iterate_shared = eventfs_iterate, + .llseek = generic_file_llseek, +- .release = eventfs_release, + }; + + /* Return the evenfs_inode of the "events" directory */ +@@ -672,128 +668,87 @@ static struct dentry *eventfs_root_looku + return ret; + } + +-struct dentry_list { +- void *cursor; +- struct dentry **dentries; +-}; +- +-/** +- * eventfs_release - called to release eventfs file/dir +- * @inode: inode to be released +- * @file: file to be released (not used) +- */ +-static int eventfs_release(struct inode *inode, struct file *file) +-{ +- struct tracefs_inode *ti; +- struct dentry_list *dlist = file->private_data; +- void *cursor; +- int i; +- +- ti = get_tracefs(inode); +- if (!(ti->flags & TRACEFS_EVENT_INODE)) +- return -EINVAL; +- +- if (WARN_ON_ONCE(!dlist)) +- return -EINVAL; +- +- for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { +- dput(dlist->dentries[i]); +- } +- +- cursor = dlist->cursor; +- kfree(dlist->dentries); +- kfree(dlist); +- file->private_data = cursor; +- return dcache_dir_close(inode, file); +-} +- +-static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) +-{ +- struct dentry **tmp; +- +- tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); +- if (!tmp) +- return -1; +- tmp[cnt] = d; +- tmp[cnt + 1] = NULL; +- *dentries = tmp; +- return 0; +-} +- +-/** +- * dcache_dir_open_wrapper - eventfs open wrapper +- * @inode: not used +- * @file: dir to be opened (to create it's children) +- * +- * Used to dynamic create file/dir with-in @file, all the +- * file/dir will be created. If already created then references +- * will be increased ++/* ++ * Walk the children of a eventfs_inode to fill in getdents(). + */ +-static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) ++static int eventfs_iterate(struct file *file, struct dir_context *ctx) + { + const struct file_operations *fops; ++ struct inode *f_inode = file_inode(file); + const struct eventfs_entry *entry; + struct eventfs_inode *ei_child; + struct tracefs_inode *ti; + struct eventfs_inode *ei; +- struct dentry_list *dlist; +- struct dentry **dentries = NULL; +- struct dentry *parent = file_dentry(file); +- struct dentry *d; +- struct inode *f_inode = file_inode(file); +- const char *name = parent->d_name.name; ++ struct dentry *ei_dentry = NULL; ++ struct dentry *dentry; ++ const char *name; + umode_t mode; +- void *data; +- int cnt = 0; + int idx; +- int ret; +- int i; +- int r; ++ int ret = -EINVAL; ++ int ino; ++ int i, r, c; ++ ++ if (!dir_emit_dots(file, ctx)) ++ return 0; + + ti = get_tracefs(f_inode); + if (!(ti->flags & TRACEFS_EVENT_INODE)) + return -EINVAL; + +- if (WARN_ON_ONCE(file->private_data)) +- return -EINVAL; ++ c = ctx->pos - 2; + + idx = srcu_read_lock(&eventfs_srcu); + + mutex_lock(&eventfs_mutex); + ei = READ_ONCE(ti->private); ++ if (ei && !ei->is_freed) ++ ei_dentry = READ_ONCE(ei->dentry); + mutex_unlock(&eventfs_mutex); + +- if (!ei) { +- srcu_read_unlock(&eventfs_srcu, idx); +- return -EINVAL; +- } +- +- +- data = ei->data; ++ if (!ei || !ei_dentry) ++ goto out; + +- dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); +- if (!dlist) { +- srcu_read_unlock(&eventfs_srcu, idx); +- return -ENOMEM; +- } ++ ret = 0; + +- inode_lock(parent->d_inode); ++ /* ++ * Need to create the dentries and inodes to have a consistent ++ * inode number. ++ */ + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { +- d = create_dir_dentry(ei, ei_child, parent); +- if (d) { +- ret = add_dentries(&dentries, d, cnt); +- dput(d); +- if (ret < 0) +- break; +- cnt++; ++ ++ if (c > 0) { ++ c--; ++ continue; + } ++ ++ if (ei_child->is_freed) ++ continue; ++ ++ name = ei_child->name; ++ ++ dentry = create_dir_dentry(ei, ei_child, ei_dentry); ++ if (!dentry) ++ goto out; ++ ino = dentry->d_inode->i_ino; ++ dput(dentry); ++ ++ if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) ++ goto out; ++ ctx->pos++; + } + + for (i = 0; i < ei->nr_entries; i++) { +- void *cdata = data; ++ void *cdata = ei->data; ++ ++ if (c > 0) { ++ c--; ++ continue; ++ } ++ + entry = &ei->entries[i]; + name = entry->name; ++ + mutex_lock(&eventfs_mutex); + /* If ei->is_freed, then the event itself may be too */ + if (!ei->is_freed) +@@ -803,42 +758,21 @@ static int dcache_dir_open_wrapper(struc + mutex_unlock(&eventfs_mutex); + if (r <= 0) + continue; +- d = create_file_dentry(ei, i, parent, name, mode, cdata, fops); +- if (d) { +- ret = add_dentries(&dentries, d, cnt); +- dput(d); +- if (ret < 0) +- break; +- cnt++; +- } +- } +- inode_unlock(parent->d_inode); +- srcu_read_unlock(&eventfs_srcu, idx); +- ret = dcache_dir_open(inode, file); + +- /* +- * dcache_dir_open() sets file->private_data to a dentry cursor. +- * Need to save that but also save all the dentries that were +- * opened by this function. +- */ +- dlist->cursor = file->private_data; +- dlist->dentries = dentries; +- file->private_data = dlist; +- return ret; +-} ++ dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); ++ if (!dentry) ++ goto out; ++ ino = dentry->d_inode->i_ino; ++ dput(dentry); + +-/* +- * This just sets the file->private_data back to the cursor and back. +- */ +-static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) +-{ +- struct dentry_list *dlist = file->private_data; +- int ret; ++ if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) ++ goto out; ++ ctx->pos++; ++ } ++ ret = 1; ++ out: ++ srcu_read_unlock(&eventfs_srcu, idx); + +- file->private_data = dlist->cursor; +- ret = dcache_readdir(file, ctx); +- dlist->cursor = file->private_data; +- file->private_data = dlist; + return ret; + } + diff --git a/queue-6.7/eventfs-use-kcalloc-instead-of-kzalloc.patch b/queue-6.7/eventfs-use-kcalloc-instead-of-kzalloc.patch new file mode 100644 index 00000000000..37bd3962f2f --- /dev/null +++ b/queue-6.7/eventfs-use-kcalloc-instead-of-kzalloc.patch @@ -0,0 +1,68 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:33:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:08 -0500 +Subject: eventfs: Use kcalloc() instead of kzalloc() +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Erick Archer , "Gustavo A. R. Silva" +Message-ID: <20240206113359.716264689@rostedt.homelinux.com> + +From: Erick Archer + +commit 1057066009c4325bb1d8430c9274894d0860e7c3 upstream. + +As noted in the "Deprecated Interfaces, Language Features, Attributes, +and Conventions" documentation [1], size calculations (especially +multiplication) should not be performed in memory allocator (or similar) +function arguments due to the risk of them overflowing. This could lead +to values wrapping around and a smaller allocation being made than the +caller was expecting. Using those allocations could lead to linear +overflows of heap memory and other misbehaviors. + +So, use the purpose specific kcalloc() function instead of the argument +size * count in the kzalloc() function. + +[1] https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments + +Link: https://lore.kernel.org/linux-trace-kernel/20240115181658.4562-1-erick.archer@gmx.com + +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Mark Rutland +Link: https://github.com/KSPP/linux/issues/162 +Signed-off-by: Erick Archer +Reviewed-by: Gustavo A. R. Silva +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -97,7 +97,7 @@ static int eventfs_set_attr(struct mnt_i + /* Preallocate the children mode array if necessary */ + if (!(dentry->d_inode->i_mode & S_IFDIR)) { + if (!ei->entry_attrs) { +- ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, ++ ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), + GFP_NOFS); + if (!ei->entry_attrs) { + ret = -ENOMEM; +@@ -836,7 +836,7 @@ struct eventfs_inode *eventfs_create_dir + } + + if (size) { +- ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); ++ ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); + if (!ei->d_children) { + kfree_const(ei->name); + kfree(ei); +@@ -903,7 +903,7 @@ struct eventfs_inode *eventfs_create_eve + goto fail; + + if (size) { +- ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); ++ ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); + if (!ei->d_children) + goto fail; + } diff --git a/queue-6.7/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch b/queue-6.7/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch new file mode 100644 index 00000000000..2d6a7ed19dd --- /dev/null +++ b/queue-6.7/eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch @@ -0,0 +1,87 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:34:58 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:18 -0500 +Subject: eventfs: Warn if an eventfs_inode is freed without is_freed being set +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113401.324608021@rostedt.homelinux.com> + +From: "Steven Rostedt (Google)" + +commit 5a49f996046ba947466bc7461e4b19c4d1daf978 upstream. + +There should never be a case where an evenfs_inode is being freed without +is_freed being set. Add a WARN_ON_ONCE() if it ever happens. That would +mean there was one too many put_ei()s. + +Link: https://lore.kernel.org/linux-trace-kernel/20240201161616.843551963@goodmis.org + +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -73,6 +73,9 @@ enum { + static void release_ei(struct kref *ref) + { + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); ++ ++ WARN_ON_ONCE(!ei->is_freed); ++ + kfree(ei->entry_attrs); + kfree_const(ei->name); + kfree_rcu(ei, rcu); +@@ -84,6 +87,14 @@ static inline void put_ei(struct eventfs + kref_put(&ei->kref, release_ei); + } + ++static inline void free_ei(struct eventfs_inode *ei) ++{ ++ if (ei) { ++ ei->is_freed = 1; ++ put_ei(ei); ++ } ++} ++ + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) + { + if (ei) +@@ -679,7 +690,7 @@ struct eventfs_inode *eventfs_create_dir + + /* Was the parent freed? */ + if (list_empty(&ei->list)) { +- put_ei(ei); ++ free_ei(ei); + ei = NULL; + } + return ei; +@@ -770,7 +781,7 @@ struct eventfs_inode *eventfs_create_eve + return ei; + + fail: +- put_ei(ei); ++ free_ei(ei); + tracefs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); + } +@@ -801,9 +812,8 @@ static void eventfs_remove_rec(struct ev + list_for_each_entry(ei_child, &ei->children, list) + eventfs_remove_rec(ei_child, level + 1); + +- ei->is_freed = 1; + list_del(&ei->list); +- put_ei(ei); ++ free_ei(ei); + } + + /** diff --git a/queue-6.7/series b/queue-6.7/series index 6f87d59de37..49fa90ecf12 100644 --- a/queue-6.7/series +++ b/queue-6.7/series @@ -178,3 +178,24 @@ iio-imu-bno055-serdev-requires-regmap.patch iio-pressure-bmp280-add-missing-bmp085-to-spi-id-table.patch pmdomain-mediatek-fix-race-conditions-with-genpd.patch media-rc-bpf-attach-detach-requires-write-permission.patch +eventfs-remove-lookup-parameter-from-create_dir-file_dentry.patch +eventfs-stop-using-dcache_readdir-for-getdents.patch +eventfs-have-eventfs_iterate-stop-immediately-if-ei-is_freed-is-set.patch +eventfs-do-ctx-pos-update-for-all-iterations-in-eventfs_iterate.patch +eventfs-read-ei-entries-before-ei-children-in-eventfs_iterate.patch +eventfs-shortcut-eventfs_iterate-by-skipping-entries-already-read.patch +eventfs-have-the-inodes-all-for-files-and-directories-all-be-the-same.patch +eventfs-do-not-create-dentries-nor-inodes-in-iterate_shared.patch +eventfs-use-kcalloc-instead-of-kzalloc.patch +eventfs-save-directory-inodes-in-the-eventfs_inode-structure.patch +tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch +eventfs-initialize-the-tracefs-inode-properly.patch +tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch +tracefs-dentry-lookup-crapectomy.patch +eventfs-remove-unused-d_parent-pointer-field.patch +eventfs-clean-up-dentry-ops-and-add-revalidate-function.patch +eventfs-get-rid-of-dentry-pointers-without-refcounts.patch +eventfs-warn-if-an-eventfs_inode-is-freed-without-is_freed-being-set.patch +eventfs-restructure-eventfs_inode-structure-to-be-more-condensed.patch +eventfs-remove-fsnotify-functions-from-lookup.patch +eventfs-keep-all-directory-links-at-1.patch diff --git a/queue-6.7/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch b/queue-6.7/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch new file mode 100644 index 00000000000..cfbdc453ac8 --- /dev/null +++ b/queue-6.7/tracefs-avoid-using-the-ei-dentry-pointer-unnecessarily.patch @@ -0,0 +1,122 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:32 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:13 -0500 +Subject:[PATCH v2 15/23] tracefs: Avoid using the ei->dentry pointer unnecessarily +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher +Message-ID: <20240206113400.525587454@rostedt.homelinux.com> + +From: Steven Rostedt + +From: Linus Torvalds + +The eventfs_find_events() code tries to walk up the tree to find the +event directory that a dentry belongs to, in order to then find the +eventfs inode that is associated with that event directory. + +However, it uses an odd combination of walking the dentry parent, +looking up the eventfs inode associated with that, and then looking up +the dentry from there. Repeat. + +But the code shouldn't have back-pointers to dentries in the first +place, and it should just walk the dentry parenthood chain directly. + +Similarly, 'set_top_events_ownership()' looks up the dentry from the +eventfs inode, but the only reason it wants a dentry is to look up the +superblock in order to look up the root dentry. + +But it already has the real filesystem inode, which has that same +superblock pointer. So just pass in the superblock pointer using the +information that's already there, instead of looking up extraneous data +that is irrelevant. + +Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.638645365@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +(cherry picked from commit 99c001cb617df409dac275a059d6c3f187a2da7a) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 26 ++++++++++++-------------- + 1 file changed, 12 insertions(+), 14 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -156,33 +156,30 @@ static int eventfs_set_attr(struct mnt_i + return ret; + } + +-static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) ++static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) + { +- struct inode *inode; ++ struct inode *root; + + /* Only update if the "events" was on the top level */ + if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + return; + + /* Get the tracefs root inode. */ +- inode = d_inode(dentry->d_sb->s_root); +- ei->attr.uid = inode->i_uid; +- ei->attr.gid = inode->i_gid; ++ root = d_inode(sb->s_root); ++ ei->attr.uid = root->i_uid; ++ ei->attr.gid = root->i_gid; + } + + static void set_top_events_ownership(struct inode *inode) + { + struct tracefs_inode *ti = get_tracefs(inode); + struct eventfs_inode *ei = ti->private; +- struct dentry *dentry; + + /* The top events directory doesn't get automatically updated */ + if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + return; + +- dentry = ei->dentry; +- +- update_top_events_attr(ei, dentry); ++ update_top_events_attr(ei, inode->i_sb); + + if (!(ei->attr.mode & EVENTFS_SAVE_UID)) + inode->i_uid = ei->attr.uid; +@@ -235,8 +232,10 @@ static struct eventfs_inode *eventfs_fin + + mutex_lock(&eventfs_mutex); + do { +- /* The parent always has an ei, except for events itself */ +- ei = dentry->d_parent->d_fsdata; ++ // The parent is stable because we do not do renames ++ dentry = dentry->d_parent; ++ // ... and directories always have d_fsdata ++ ei = dentry->d_fsdata; + + /* + * If the ei is being freed, the ownership of the children +@@ -246,12 +245,11 @@ static struct eventfs_inode *eventfs_fin + ei = NULL; + break; + } +- +- dentry = ei->dentry; ++ // Walk upwards until you find the events inode + } while (!ei->is_events); + mutex_unlock(&eventfs_mutex); + +- update_top_events_attr(ei, dentry); ++ update_top_events_attr(ei, dentry->d_sb); + + return ei; + } diff --git a/queue-6.7/tracefs-dentry-lookup-crapectomy.patch b/queue-6.7/tracefs-dentry-lookup-crapectomy.patch new file mode 100644 index 00000000000..389c85fc4aa --- /dev/null +++ b/queue-6.7/tracefs-dentry-lookup-crapectomy.patch @@ -0,0 +1,542 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:34:57 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:14 -0500 +Subject:[PATCH v2 16/23] tracefs: dentry lookup crapectomy +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Al Viro , Christian Brauner , Ajay Kaher +Message-ID: <20240206113400.685349714@rostedt.homelinux.com> + +From: Steven Rostedt + +From: Linus Torvalds + +The dentry lookup for eventfs files was very broken, and had lots of +signs of the old situation where the filesystem names were all created +statically in the dentry tree, rather than being looked up dynamically +based on the eventfs data structures. + +You could see it in the naming - how it claimed to "create" dentries +rather than just look up the dentries that were given it. + +You could see it in various nonsensical and very incorrect operations, +like using "simple_lookup()" on the dentries that were passed in, which +only results in those dentries becoming negative dentries. Which meant +that any other lookup would possibly return ENOENT if it saw that +negative dentry before the data was then later filled in. + +You could see it in the immense amount of nonsensical code that didn't +actually just do lookups. + +Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20240131233227.73db55e1@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Al Viro +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Greg Kroah-Hartman +Cc: Ajay Kaher +Cc: Mark Rutland +Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") +Signed-off-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +(cherry picked from commit 49304c2b93e4f7468b51ef717cbe637981397115) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 275 ++++++++--------------------------------------- + fs/tracefs/inode.c | 69 ----------- + fs/tracefs/internal.h | 3 + 3 files changed, 50 insertions(+), 297 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -230,7 +230,6 @@ static struct eventfs_inode *eventfs_fin + { + struct eventfs_inode *ei; + +- mutex_lock(&eventfs_mutex); + do { + // The parent is stable because we do not do renames + dentry = dentry->d_parent; +@@ -247,7 +246,6 @@ static struct eventfs_inode *eventfs_fin + } + // Walk upwards until you find the events inode + } while (!ei->is_events); +- mutex_unlock(&eventfs_mutex); + + update_top_events_attr(ei, dentry->d_sb); + +@@ -280,11 +278,10 @@ static void update_inode_attr(struct den + } + + /** +- * create_file - create a file in the tracefs filesystem +- * @name: the name of the file to create. ++ * lookup_file - look up a file in the tracefs filesystem ++ * @dentry: the dentry to look up + * @mode: the permission that the file should have. + * @attr: saved attributes changed by user +- * @parent: parent dentry for this file. + * @data: something that the caller will want to get to later on. + * @fop: struct file_operations that should be used for this file. + * +@@ -292,13 +289,13 @@ static void update_inode_attr(struct den + * directory. The inode.i_private pointer will point to @data in the open() + * call. + */ +-static struct dentry *create_file(const char *name, umode_t mode, ++static struct dentry *lookup_file(struct dentry *dentry, ++ umode_t mode, + struct eventfs_attr *attr, +- struct dentry *parent, void *data, ++ void *data, + const struct file_operations *fop) + { + struct tracefs_inode *ti; +- struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) +@@ -307,15 +304,9 @@ static struct dentry *create_file(const + if (WARN_ON_ONCE(!S_ISREG(mode))) + return NULL; + +- WARN_ON_ONCE(!parent); +- dentry = eventfs_start_creating(name, parent); +- +- if (IS_ERR(dentry)) +- return dentry; +- + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) +- return eventfs_failed_creating(dentry); ++ return ERR_PTR(-ENOMEM); + + /* If the user updated the directory's attributes, use them */ + update_inode_attr(dentry, inode, attr, mode); +@@ -329,32 +320,29 @@ static struct dentry *create_file(const + + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; +- d_instantiate(dentry, inode); ++ ++ d_add(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); +- return eventfs_end_creating(dentry); ++ return dentry; + }; + + /** +- * create_dir - create a dir in the tracefs filesystem ++ * lookup_dir_entry - look up a dir in the tracefs filesystem ++ * @dentry: the directory to look up + * @ei: the eventfs_inode that represents the directory to create +- * @parent: parent dentry for this file. + * +- * This function will create a dentry for a directory represented by ++ * This function will look up a dentry for a directory represented by + * a eventfs_inode. + */ +-static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) ++static struct dentry *lookup_dir_entry(struct dentry *dentry, ++ struct eventfs_inode *pei, struct eventfs_inode *ei) + { + struct tracefs_inode *ti; +- struct dentry *dentry; + struct inode *inode; + +- dentry = eventfs_start_creating(ei->name, parent); +- if (IS_ERR(dentry)) +- return dentry; +- + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) +- return eventfs_failed_creating(dentry); ++ return ERR_PTR(-ENOMEM); + + /* If the user updated the directory's attributes, use them */ + update_inode_attr(dentry, inode, &ei->attr, +@@ -371,11 +359,14 @@ static struct dentry *create_dir(struct + /* Only directories have ti->private set to an ei, not files */ + ti->private = ei; + ++ dentry->d_fsdata = ei; ++ ei->dentry = dentry; // Remove me! ++ + inc_nlink(inode); +- d_instantiate(dentry, inode); ++ d_add(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); +- return eventfs_end_creating(dentry); ++ return dentry; + } + + static void free_ei(struct eventfs_inode *ei) +@@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct t + } + + /** +- * create_file_dentry - create a dentry for a file of an eventfs_inode ++ * lookup_file_dentry - create a dentry for a file of an eventfs_inode + * @ei: the eventfs_inode that the file will be created under + * @idx: the index into the d_children[] of the @ei + * @parent: The parent dentry of the created file. +@@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct t + * address located at @e_dentry. + */ + static struct dentry * +-create_file_dentry(struct eventfs_inode *ei, int idx, +- struct dentry *parent, const char *name, umode_t mode, void *data, ++lookup_file_dentry(struct dentry *dentry, ++ struct eventfs_inode *ei, int idx, ++ umode_t mode, void *data, + const struct file_operations *fops) + { + struct eventfs_attr *attr = NULL; + struct dentry **e_dentry = &ei->d_children[idx]; +- struct dentry *dentry; + +- WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); +- +- mutex_lock(&eventfs_mutex); +- if (ei->is_freed) { +- mutex_unlock(&eventfs_mutex); +- return NULL; +- } +- /* If the e_dentry already has a dentry, use it */ +- if (*e_dentry) { +- dget(*e_dentry); +- mutex_unlock(&eventfs_mutex); +- return *e_dentry; +- } +- +- /* ei->entry_attrs are protected by SRCU */ + if (ei->entry_attrs) + attr = &ei->entry_attrs[idx]; + +- mutex_unlock(&eventfs_mutex); +- +- dentry = create_file(name, mode, attr, parent, data, fops); +- +- mutex_lock(&eventfs_mutex); +- +- if (IS_ERR_OR_NULL(dentry)) { +- /* +- * When the mutex was released, something else could have +- * created the dentry for this e_dentry. In which case +- * use that one. +- * +- * If ei->is_freed is set, the e_dentry is currently on its +- * way to being freed, don't return it. If e_dentry is NULL +- * it means it was already freed. +- */ +- if (ei->is_freed) { +- dentry = NULL; +- } else { +- dentry = *e_dentry; +- dget(dentry); +- } +- mutex_unlock(&eventfs_mutex); +- return dentry; +- } ++ dentry->d_fsdata = ei; // NOTE: ei of _parent_ ++ lookup_file(dentry, mode, attr, data, fops); + +- if (!*e_dentry && !ei->is_freed) { +- *e_dentry = dentry; +- dentry->d_fsdata = ei; +- } else { +- /* +- * Should never happen unless we get here due to being freed. +- * Otherwise it means two dentries exist with the same name. +- */ +- WARN_ON_ONCE(!ei->is_freed); +- dentry = NULL; +- } +- mutex_unlock(&eventfs_mutex); +- +- return dentry; +-} +- +-/** +- * eventfs_post_create_dir - post create dir routine +- * @ei: eventfs_inode of recently created dir +- * +- * Map the meta-data of files within an eventfs dir to their parent dentry +- */ +-static void eventfs_post_create_dir(struct eventfs_inode *ei) +-{ +- struct eventfs_inode *ei_child; +- +- lockdep_assert_held(&eventfs_mutex); +- +- /* srcu lock already held */ +- /* fill parent-child relation */ +- list_for_each_entry_srcu(ei_child, &ei->children, list, +- srcu_read_lock_held(&eventfs_srcu)) { +- ei_child->d_parent = ei->dentry; +- } +-} +- +-/** +- * create_dir_dentry - Create a directory dentry for the eventfs_inode +- * @pei: The eventfs_inode parent of ei. +- * @ei: The eventfs_inode to create the directory for +- * @parent: The dentry of the parent of this directory +- * +- * This creates and attaches a directory dentry to the eventfs_inode @ei. +- */ +-static struct dentry * +-create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, +- struct dentry *parent) +-{ +- struct dentry *dentry = NULL; +- +- WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); +- +- mutex_lock(&eventfs_mutex); +- if (pei->is_freed || ei->is_freed) { +- mutex_unlock(&eventfs_mutex); +- return NULL; +- } +- if (ei->dentry) { +- /* If the eventfs_inode already has a dentry, use it */ +- dentry = ei->dentry; +- dget(dentry); +- mutex_unlock(&eventfs_mutex); +- return dentry; +- } +- mutex_unlock(&eventfs_mutex); +- +- dentry = create_dir(ei, parent); +- +- mutex_lock(&eventfs_mutex); +- +- if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { +- /* +- * When the mutex was released, something else could have +- * created the dentry for this e_dentry. In which case +- * use that one. +- * +- * If ei->is_freed is set, the e_dentry is currently on its +- * way to being freed. +- */ +- dentry = ei->dentry; +- if (dentry) +- dget(dentry); +- mutex_unlock(&eventfs_mutex); +- return dentry; +- } +- +- if (!ei->dentry && !ei->is_freed) { +- ei->dentry = dentry; +- eventfs_post_create_dir(ei); +- dentry->d_fsdata = ei; +- } else { +- /* +- * Should never happen unless we get here due to being freed. +- * Otherwise it means two dentries exist with the same name. +- */ +- WARN_ON_ONCE(!ei->is_freed); +- dentry = NULL; +- } +- mutex_unlock(&eventfs_mutex); ++ *e_dentry = dentry; // Remove me + + return dentry; + } +@@ -607,79 +462,49 @@ static struct dentry *eventfs_root_looku + struct dentry *dentry, + unsigned int flags) + { +- const struct file_operations *fops; +- const struct eventfs_entry *entry; + struct eventfs_inode *ei_child; + struct tracefs_inode *ti; + struct eventfs_inode *ei; +- struct dentry *ei_dentry = NULL; +- struct dentry *ret = NULL; +- struct dentry *d; + const char *name = dentry->d_name.name; +- umode_t mode; +- void *data; +- int idx; +- int i; +- int r; + + ti = get_tracefs(dir); + if (!(ti->flags & TRACEFS_EVENT_INODE)) +- return NULL; ++ return ERR_PTR(-EIO); + +- /* Grab srcu to prevent the ei from going away */ +- idx = srcu_read_lock(&eventfs_srcu); +- +- /* +- * Grab the eventfs_mutex to consistent value from ti->private. +- * This s +- */ + mutex_lock(&eventfs_mutex); +- ei = READ_ONCE(ti->private); +- if (ei && !ei->is_freed) +- ei_dentry = READ_ONCE(ei->dentry); +- mutex_unlock(&eventfs_mutex); + +- if (!ei || !ei_dentry) ++ ei = ti->private; ++ if (!ei || ei->is_freed) + goto out; + +- data = ei->data; +- +- list_for_each_entry_srcu(ei_child, &ei->children, list, +- srcu_read_lock_held(&eventfs_srcu)) { ++ list_for_each_entry(ei_child, &ei->children, list) { + if (strcmp(ei_child->name, name) != 0) + continue; +- ret = simple_lookup(dir, dentry, flags); +- if (IS_ERR(ret)) ++ if (ei_child->is_freed) + goto out; +- d = create_dir_dentry(ei, ei_child, ei_dentry); +- dput(d); ++ lookup_dir_entry(dentry, ei, ei_child); + goto out; + } + +- for (i = 0; i < ei->nr_entries; i++) { +- entry = &ei->entries[i]; +- if (strcmp(name, entry->name) == 0) { +- void *cdata = data; +- mutex_lock(&eventfs_mutex); +- /* If ei->is_freed, then the event itself may be too */ +- if (!ei->is_freed) +- r = entry->callback(name, &mode, &cdata, &fops); +- else +- r = -1; +- mutex_unlock(&eventfs_mutex); +- if (r <= 0) +- continue; +- ret = simple_lookup(dir, dentry, flags); +- if (IS_ERR(ret)) +- goto out; +- d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); +- dput(d); +- break; +- } ++ for (int i = 0; i < ei->nr_entries; i++) { ++ void *data; ++ umode_t mode; ++ const struct file_operations *fops; ++ const struct eventfs_entry *entry = &ei->entries[i]; ++ ++ if (strcmp(name, entry->name) != 0) ++ continue; ++ ++ data = ei->data; ++ if (entry->callback(name, &mode, &data, &fops) <= 0) ++ goto out; ++ ++ lookup_file_dentry(dentry, ei, i, mode, data, fops); ++ goto out; + } + out: +- srcu_read_unlock(&eventfs_srcu, idx); +- return ret; ++ mutex_unlock(&eventfs_mutex); ++ return NULL; + } + + /* +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -495,75 +495,6 @@ struct dentry *tracefs_end_creating(stru + return dentry; + } + +-/** +- * eventfs_start_creating - start the process of creating a dentry +- * @name: Name of the file created for the dentry +- * @parent: The parent dentry where this dentry will be created +- * +- * This is a simple helper function for the dynamically created eventfs +- * files. When the directory of the eventfs files are accessed, their +- * dentries are created on the fly. This function is used to start that +- * process. +- */ +-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) +-{ +- struct dentry *dentry; +- int error; +- +- /* Must always have a parent. */ +- if (WARN_ON_ONCE(!parent)) +- return ERR_PTR(-EINVAL); +- +- error = simple_pin_fs(&trace_fs_type, &tracefs_mount, +- &tracefs_mount_count); +- if (error) +- return ERR_PTR(error); +- +- if (unlikely(IS_DEADDIR(parent->d_inode))) +- dentry = ERR_PTR(-ENOENT); +- else +- dentry = lookup_one_len(name, parent, strlen(name)); +- +- if (!IS_ERR(dentry) && dentry->d_inode) { +- dput(dentry); +- dentry = ERR_PTR(-EEXIST); +- } +- +- if (IS_ERR(dentry)) +- simple_release_fs(&tracefs_mount, &tracefs_mount_count); +- +- return dentry; +-} +- +-/** +- * eventfs_failed_creating - clean up a failed eventfs dentry creation +- * @dentry: The dentry to clean up +- * +- * If after calling eventfs_start_creating(), a failure is detected, the +- * resources created by eventfs_start_creating() needs to be cleaned up. In +- * that case, this function should be called to perform that clean up. +- */ +-struct dentry *eventfs_failed_creating(struct dentry *dentry) +-{ +- dput(dentry); +- simple_release_fs(&tracefs_mount, &tracefs_mount_count); +- return NULL; +-} +- +-/** +- * eventfs_end_creating - Finish the process of creating a eventfs dentry +- * @dentry: The dentry that has successfully been created. +- * +- * This function is currently just a place holder to match +- * eventfs_start_creating(). In case any synchronization needs to be added, +- * this function will be used to implement that without having to modify +- * the callers of eventfs_start_creating(). +- */ +-struct dentry *eventfs_end_creating(struct dentry *dentry) +-{ +- return dentry; +-} +- + /* Find the inode that this will use for default */ + static struct inode *instance_inode(struct dentry *parent, struct inode *inode) + { +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -80,9 +80,6 @@ struct dentry *tracefs_start_creating(co + struct dentry *tracefs_end_creating(struct dentry *dentry); + struct dentry *tracefs_failed_creating(struct dentry *dentry); + struct inode *tracefs_get_inode(struct super_block *sb); +-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); +-struct dentry *eventfs_failed_creating(struct dentry *dentry); +-struct dentry *eventfs_end_creating(struct dentry *dentry); + void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); + + #endif /* _TRACEFS_INTERNAL_H */ diff --git a/queue-6.7/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch b/queue-6.7/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch new file mode 100644 index 00000000000..8f66d536a96 --- /dev/null +++ b/queue-6.7/tracefs-zero-out-the-tracefs_inode-when-allocating-it.patch @@ -0,0 +1,81 @@ +From SRS0=eEWY=JP=rostedt.homelinux.com=rostedt@kernel.org Tue Feb 6 12:35:17 2024 +From: Steven Rostedt +Date: Tue, 06 Feb 2024 06:32:11 -0500 +Subject:[PATCH v2 13/23] tracefs: Zero out the tracefs_inode when allocating it +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: Linus Torvalds , Greg Kroah-Hartman , Sasha Levin , Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Christian Brauner , Al Viro , Ajay Kaher , kernel test robot +Message-ID: <20240206113400.202745776@rostedt.homelinux.com> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +eventfs uses the tracefs_inode and assumes that it's already initialized +to zero. That is, it doesn't set fields to zero (like ti->private) after +getting its tracefs_inode. This causes bugs due to stale values. + +Just initialize the entire structure to zero on allocation so there isn't +any more surprises. + +This is a partial fix to access to ti->private. The assignment still needs +to be made before the dentry is instantiated. + +Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.315825944@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Christian Brauner +Cc: Al Viro +Cc: Ajay Kaher +Cc: Greg Kroah-Hartman +Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com +Suggested-by: Linus Torvalds +Signed-off-by: Steven Rostedt (Google) +(cherry picked from commit d81786f53aec14fd4d56263145a0635afbc64617) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 6 ++++-- + fs/tracefs/internal.h | 3 ++- + 2 files changed, 6 insertions(+), 3 deletions(-) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode + if (!ti) + return NULL; + +- ti->flags = 0; +- + return &ti->vfs_inode; + } + +@@ -779,7 +777,11 @@ static void init_once(void *foo) + { + struct tracefs_inode *ti = (struct tracefs_inode *) foo; + ++ /* inode_init_once() calls memset() on the vfs_inode portion */ + inode_init_once(&ti->vfs_inode); ++ ++ /* Zero out the rest */ ++ memset_after(ti, 0, vfs_inode); + } + + static int __init tracefs_init(void) +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -11,9 +11,10 @@ enum { + }; + + struct tracefs_inode { ++ struct inode vfs_inode; ++ /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + unsigned long flags; + void *private; +- struct inode vfs_inode; + }; + + /* -- 2.47.3