From: Greg Kroah-Hartman Date: Mon, 6 Nov 2023 11:41:20 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.14.329~19 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a61d8a1867744ad90e9aeaa0f309fd2a8669e9c9;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch eventfs-remove-is_freed-union-with-rcu-head.patch eventfs-save-ownership-and-mode.patch eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch tracing-have-trace_event_file-have-ref-counters.patch --- diff --git a/queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch b/queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch new file mode 100644 index 00000000000..5a46131a5c7 --- /dev/null +++ b/queue-6.6/eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch @@ -0,0 +1,271 @@ +From stable-owner@vger.kernel.org Sun Nov 5 17:01:46 2023 +From: Steven Rostedt +Date: Sun, 05 Nov 2023 10:56:34 -0500 +Subject:[PATCH 4/5] eventfs: Delete eventfs_inode when the last dentry is freed +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, +Cc: Masami Hiramatsu , Mark Rutland , Andrew Morton , Ajay Kaher +Message-ID: <20231105160139.821908367@goodmis.org> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +commit 020010fbfa202aa528a52743eba4ab0da3400a4e upstream + +There exists a race between holding a reference of an eventfs_inode dentry +and the freeing of the eventfs_inode. If user space has a dentry held long +enough, it may still be able to access the dentry's eventfs_inode after it +has been freed. + +To prevent this, have he eventfs_inode freed via the last dput() (or via +RCU if the eventfs_inode does not have a dentry). + +This means reintroducing the eventfs_inode del_list field at a temporary +place to put the eventfs_inode. It needs to mark it as freed (via the +list) but also must invalidate the dentry immediately as the return from +eventfs_remove_dir() expects that they are. But the dentry invalidation +must not be called under the eventfs_mutex, so it must be done after the +eventfs_inode is marked as free (put on a deletion list). + +Link: https://lkml.kernel.org/r/20231101172650.123479767@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Ajay Kaher +Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 150 +++++++++++++++++++++++------------------------ + 1 file changed, 74 insertions(+), 76 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -53,10 +53,12 @@ struct eventfs_file { + const struct inode_operations *iop; + /* + * Union - used for deletion ++ * @llist: for calling dput() if needed after RCU + * @del_list: list of eventfs_file to delete + * @rcu: eventfs_file to delete in RCU + */ + union { ++ struct llist_node llist; + struct list_head del_list; + struct rcu_head rcu; + }; +@@ -113,8 +115,7 @@ static int eventfs_set_attr(struct mnt_i + + mutex_lock(&eventfs_mutex); + ef = dentry->d_fsdata; +- /* The LSB is set when the eventfs_inode is being freed */ +- if (((unsigned long)ef & 1UL) || ef->is_freed) { ++ if (ef->is_freed) { + /* Do not allow changes if the event is about to be removed. */ + mutex_unlock(&eventfs_mutex); + return -ENODEV; +@@ -258,6 +259,13 @@ static struct dentry *create_dir(struct + return eventfs_end_creating(dentry); + } + ++static void free_ef(struct eventfs_file *ef) ++{ ++ kfree(ef->name); ++ kfree(ef->ei); ++ kfree(ef); ++} ++ + /** + * eventfs_set_ef_status_free - set the ef->status to free + * @ti: the tracefs_inode of the dentry +@@ -270,34 +278,20 @@ void eventfs_set_ef_status_free(struct t + { + struct tracefs_inode *ti_parent; + struct eventfs_inode *ei; +- struct eventfs_file *ef, *tmp; ++ struct eventfs_file *ef; + + /* The top level events directory may be freed by this */ + if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) { +- LIST_HEAD(ef_del_list); +- + mutex_lock(&eventfs_mutex); +- + ei = ti->private; + +- /* Record all the top level files */ +- list_for_each_entry_srcu(ef, &ei->e_top_files, list, +- lockdep_is_held(&eventfs_mutex)) { +- list_add_tail(&ef->del_list, &ef_del_list); +- } +- + /* Nothing should access this, but just in case! */ + ti->private = NULL; +- + mutex_unlock(&eventfs_mutex); + +- /* Now safely free the top level files and their children */ +- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { +- list_del(&ef->del_list); +- eventfs_remove(ef); +- } +- +- kfree(ei); ++ ef = dentry->d_fsdata; ++ if (ef) ++ free_ef(ef); + return; + } + +@@ -311,16 +305,13 @@ void eventfs_set_ef_status_free(struct t + if (!ef) + goto out; + +- /* +- * If ef was freed, then the LSB bit is set for d_fsdata. +- * But this should not happen, as it should still have a +- * ref count that prevents it. Warn in case it does. +- */ +- if (WARN_ON_ONCE((unsigned long)ef & 1)) +- goto out; ++ if (ef->is_freed) { ++ free_ef(ef); ++ } else { ++ ef->dentry = NULL; ++ } + + dentry->d_fsdata = NULL; +- ef->dentry = NULL; + out: + mutex_unlock(&eventfs_mutex); + } +@@ -847,13 +838,53 @@ int eventfs_add_file(const char *name, u + return 0; + } + +-static void free_ef(struct rcu_head *head) ++static LLIST_HEAD(free_list); ++ ++static void eventfs_workfn(struct work_struct *work) ++{ ++ struct eventfs_file *ef, *tmp; ++ struct llist_node *llnode; ++ ++ llnode = llist_del_all(&free_list); ++ llist_for_each_entry_safe(ef, tmp, llnode, llist) { ++ /* This should only get here if it had a dentry */ ++ if (!WARN_ON_ONCE(!ef->dentry)) ++ dput(ef->dentry); ++ } ++} ++ ++static DECLARE_WORK(eventfs_work, eventfs_workfn); ++ ++static void free_rcu_ef(struct rcu_head *head) + { + struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu); + +- kfree(ef->name); +- kfree(ef->ei); +- kfree(ef); ++ if (ef->dentry) { ++ /* Do not free the ef until all references of dentry are gone */ ++ if (llist_add(&ef->llist, &free_list)) ++ queue_work(system_unbound_wq, &eventfs_work); ++ return; ++ } ++ ++ free_ef(ef); ++} ++ ++static void unhook_dentry(struct dentry *dentry) ++{ ++ if (!dentry) ++ return; ++ ++ /* Keep the dentry from being freed yet (see eventfs_workfn()) */ ++ dget(dentry); ++ ++ dentry->d_fsdata = NULL; ++ d_invalidate(dentry); ++ mutex_lock(&eventfs_mutex); ++ /* dentry should now have at least a single reference */ ++ WARN_ONCE((int)d_count(dentry) < 1, ++ "dentry %px (%s) less than one reference (%d) after invalidate\n", ++ dentry, dentry->d_name.name, d_count(dentry)); ++ mutex_unlock(&eventfs_mutex); + } + + /** +@@ -905,58 +936,25 @@ void eventfs_remove(struct eventfs_file + { + struct eventfs_file *tmp; + LIST_HEAD(ef_del_list); +- struct dentry *dentry_list = NULL; +- struct dentry *dentry; + + if (!ef) + return; + ++ /* ++ * Move the deleted eventfs_inodes onto the ei_del_list ++ * which will also set the is_freed value. Note, this has to be ++ * done under the eventfs_mutex, but the deletions of ++ * the dentries must be done outside the eventfs_mutex. ++ * Hence moving them to this temporary list. ++ */ + mutex_lock(&eventfs_mutex); + eventfs_remove_rec(ef, &ef_del_list, 0); +- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { +- if (ef->dentry) { +- unsigned long ptr = (unsigned long)dentry_list; +- +- /* Keep the dentry from being freed yet */ +- dget(ef->dentry); +- +- /* +- * Paranoid: The dget() above should prevent the dentry +- * from being freed and calling eventfs_set_ef_status_free(). +- * But just in case, set the link list LSB pointer to 1 +- * and have eventfs_set_ef_status_free() check that to +- * make sure that if it does happen, it will not think +- * the d_fsdata is an event_file. +- * +- * For this to work, no event_file should be allocated +- * on a odd space, as the ef should always be allocated +- * to be at least word aligned. Check for that too. +- */ +- WARN_ON_ONCE(ptr & 1); +- +- ef->dentry->d_fsdata = (void *)(ptr | 1); +- dentry_list = ef->dentry; +- ef->dentry = NULL; +- } +- call_srcu(&eventfs_srcu, &ef->rcu, free_ef); +- } + mutex_unlock(&eventfs_mutex); + +- while (dentry_list) { +- unsigned long ptr; +- +- dentry = dentry_list; +- ptr = (unsigned long)dentry->d_fsdata & ~1UL; +- dentry_list = (struct dentry *)ptr; +- dentry->d_fsdata = NULL; +- d_invalidate(dentry); +- mutex_lock(&eventfs_mutex); +- /* dentry should now have at least a single reference */ +- WARN_ONCE((int)d_count(dentry) < 1, +- "dentry %p less than one reference (%d) after invalidate\n", +- dentry, d_count(dentry)); +- mutex_unlock(&eventfs_mutex); +- dput(dentry); ++ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { ++ unhook_dentry(ef->dentry); ++ list_del(&ef->del_list); ++ call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef); + } + } + diff --git a/queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch b/queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch new file mode 100644 index 00000000000..1b3366ceb46 --- /dev/null +++ b/queue-6.6/eventfs-remove-is_freed-union-with-rcu-head.patch @@ -0,0 +1,75 @@ +From stable-owner@vger.kernel.org Sun Nov 5 17:01:44 2023 +From: Steven Rostedt +Date: Sun, 05 Nov 2023 10:56:32 -0500 +Subject:[PATCH 2/5] eventfs: Remove "is_freed" union with rcu head +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, +Cc: Masami Hiramatsu , Mark Rutland , Andrew Morton , Ajay Kaher +Message-ID: <20231105160139.498444992@goodmis.org> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +commit f2f496370afcbc5227d7002da28c74b91fed12ff upstream + +The eventfs_inode->is_freed was a union with the rcu_head with the +assumption that when it was on the srcu list the head would contain a +pointer which would make "is_freed" true. But that was a wrong assumption +as the rcu head is a single link list where the last element is NULL. + +Instead, split the nr_entries integer so that "is_freed" is one bit and +the nr_entries is the next 31 bits. As there shouldn't be more than 10 +(currently there's at most 5 to 7 depending on the config), this should +not be a problem. + +Link: https://lkml.kernel.org/r/20231101172649.049758712@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Ajay Kaher +Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions") +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -38,6 +38,7 @@ struct eventfs_inode { + * @fop: file_operations for file or directory + * @iop: inode_operations for file or directory + * @data: something that the caller will want to get to later on ++ * @is_freed: Flag set if the eventfs is on its way to be freed + * @mode: the permission that the file or directory should have + */ + struct eventfs_file { +@@ -52,15 +53,14 @@ struct eventfs_file { + * Union - used for deletion + * @del_list: list of eventfs_file to delete + * @rcu: eventfs_file to delete in RCU +- * @is_freed: node is freed if one of the above is set + */ + union { + struct list_head del_list; + struct rcu_head rcu; +- unsigned long is_freed; + }; + void *data; +- umode_t mode; ++ unsigned int is_freed:1; ++ unsigned int mode:31; + }; + + static DEFINE_MUTEX(eventfs_mutex); +@@ -814,6 +814,8 @@ static void eventfs_remove_rec(struct ev + } + } + ++ ef->is_freed = 1; ++ + list_del_rcu(&ef->list); + list_add_tail(&ef->del_list, head); + } diff --git a/queue-6.6/eventfs-save-ownership-and-mode.patch b/queue-6.6/eventfs-save-ownership-and-mode.patch new file mode 100644 index 00000000000..b3e90c04995 --- /dev/null +++ b/queue-6.6/eventfs-save-ownership-and-mode.patch @@ -0,0 +1,261 @@ +From stable-owner@vger.kernel.org Sun Nov 5 17:01:46 2023 +From: Steven Rostedt +Date: Sun, 05 Nov 2023 10:56:33 -0500 +Subject:[PATCH 3/5] eventfs: Save ownership and mode +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, +Cc: Masami Hiramatsu , Mark Rutland , Andrew Morton , Ajay Kaher +Message-ID: <20231105160139.660634360@goodmis.org> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +commit 28e12c09f5aa081b2d13d1340e3610070b6c624d upstream + +Now that inodes and dentries are created on the fly, they are also +reclaimed on memory pressure. Since the ownership and file mode are saved +in the inode, if they are freed, any changes to the ownership and mode +will be lost. + +To counter this, if the user changes the permissions or ownership, save +them, and when creating the inodes again, restore those changes. + +Link: https://lkml.kernel.org/r/20231101172649.691841445@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Ajay Kaher +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Fixes: 63940449555e7 ("eventfs: Implement eventfs lookup, read, open functions") +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 107 +++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 91 insertions(+), 16 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -40,6 +40,8 @@ struct eventfs_inode { + * @data: something that the caller will want to get to later on + * @is_freed: Flag set if the eventfs is on its way to be freed + * @mode: the permission that the file or directory should have ++ * @uid: saved uid if changed ++ * @gid: saved gid if changed + */ + struct eventfs_file { + const char *name; +@@ -61,11 +63,22 @@ struct eventfs_file { + void *data; + unsigned int is_freed:1; + unsigned int mode:31; ++ kuid_t uid; ++ kgid_t gid; + }; + + static DEFINE_MUTEX(eventfs_mutex); + DEFINE_STATIC_SRCU(eventfs_srcu); + ++/* Mode is unsigned short, use the upper bits for flags */ ++enum { ++ EVENTFS_SAVE_MODE = BIT(16), ++ EVENTFS_SAVE_UID = BIT(17), ++ EVENTFS_SAVE_GID = BIT(18), ++}; ++ ++#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) ++ + static struct dentry *eventfs_root_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags); +@@ -73,8 +86,54 @@ static int dcache_dir_open_wrapper(struc + static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); + static int eventfs_release(struct inode *inode, struct file *file); + ++static void update_attr(struct eventfs_file *ef, struct iattr *iattr) ++{ ++ unsigned int ia_valid = iattr->ia_valid; ++ ++ if (ia_valid & ATTR_MODE) { ++ ef->mode = (ef->mode & ~EVENTFS_MODE_MASK) | ++ (iattr->ia_mode & EVENTFS_MODE_MASK) | ++ EVENTFS_SAVE_MODE; ++ } ++ if (ia_valid & ATTR_UID) { ++ ef->mode |= EVENTFS_SAVE_UID; ++ ef->uid = iattr->ia_uid; ++ } ++ if (ia_valid & ATTR_GID) { ++ ef->mode |= EVENTFS_SAVE_GID; ++ ef->gid = iattr->ia_gid; ++ } ++} ++ ++static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, ++ struct iattr *iattr) ++{ ++ struct eventfs_file *ef; ++ int ret; ++ ++ mutex_lock(&eventfs_mutex); ++ ef = dentry->d_fsdata; ++ /* The LSB is set when the eventfs_inode is being freed */ ++ if (((unsigned long)ef & 1UL) || ef->is_freed) { ++ /* Do not allow changes if the event is about to be removed. */ ++ mutex_unlock(&eventfs_mutex); ++ return -ENODEV; ++ } ++ ++ ret = simple_setattr(idmap, dentry, iattr); ++ if (!ret) ++ update_attr(ef, iattr); ++ mutex_unlock(&eventfs_mutex); ++ return ret; ++} ++ + static const struct inode_operations eventfs_root_dir_inode_operations = { + .lookup = eventfs_root_lookup, ++ .setattr = eventfs_set_attr, ++}; ++ ++static const struct inode_operations eventfs_file_inode_operations = { ++ .setattr = eventfs_set_attr, + }; + + static const struct file_operations eventfs_file_operations = { +@@ -85,10 +144,20 @@ static const struct file_operations even + .release = eventfs_release, + }; + ++static void update_inode_attr(struct inode *inode, struct eventfs_file *ef) ++{ ++ inode->i_mode = ef->mode & EVENTFS_MODE_MASK; ++ ++ if (ef->mode & EVENTFS_SAVE_UID) ++ inode->i_uid = ef->uid; ++ ++ if (ef->mode & EVENTFS_SAVE_GID) ++ inode->i_gid = ef->gid; ++} ++ + /** + * create_file - create a file in the tracefs filesystem +- * @name: the name of the file to create. +- * @mode: the permission that the file should have. ++ * @ef: the eventfs_file + * @parent: parent dentry for this file. + * @data: something that the caller will want to get to later on. + * @fop: struct file_operations that should be used for this file. +@@ -104,7 +173,7 @@ static const struct file_operations even + * If tracefs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +-static struct dentry *create_file(const char *name, umode_t mode, ++static struct dentry *create_file(struct eventfs_file *ef, + struct dentry *parent, void *data, + const struct file_operations *fop) + { +@@ -112,13 +181,13 @@ static struct dentry *create_file(const + struct dentry *dentry; + struct inode *inode; + +- if (!(mode & S_IFMT)) +- mode |= S_IFREG; ++ if (!(ef->mode & S_IFMT)) ++ ef->mode |= S_IFREG; + +- if (WARN_ON_ONCE(!S_ISREG(mode))) ++ if (WARN_ON_ONCE(!S_ISREG(ef->mode))) + return NULL; + +- dentry = eventfs_start_creating(name, parent); ++ dentry = eventfs_start_creating(ef->name, parent); + + if (IS_ERR(dentry)) + return dentry; +@@ -127,7 +196,10 @@ static struct dentry *create_file(const + if (unlikely(!inode)) + return eventfs_failed_creating(dentry); + +- inode->i_mode = mode; ++ /* If the user updated the directory's attributes, use them */ ++ update_inode_attr(inode, ef); ++ ++ inode->i_op = &eventfs_file_inode_operations; + inode->i_fop = fop; + inode->i_private = data; + +@@ -140,7 +212,7 @@ static struct dentry *create_file(const + + /** + * create_dir - create a dir in the tracefs filesystem +- * @name: the name of the file to create. ++ * @ei: the eventfs_inode that represents the directory to create + * @parent: parent dentry for this file. + * @data: something that the caller will want to get to later on. + * +@@ -155,13 +227,14 @@ static struct dentry *create_file(const + * If tracefs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +-static struct dentry *create_dir(const char *name, struct dentry *parent, void *data) ++static struct dentry *create_dir(struct eventfs_file *ef, ++ struct dentry *parent, void *data) + { + struct tracefs_inode *ti; + struct dentry *dentry; + struct inode *inode; + +- dentry = eventfs_start_creating(name, parent); ++ dentry = eventfs_start_creating(ef->name, parent); + if (IS_ERR(dentry)) + return dentry; + +@@ -169,7 +242,8 @@ static struct dentry *create_dir(const c + if (unlikely(!inode)) + return eventfs_failed_creating(dentry); + +- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; ++ update_inode_attr(inode, ef); ++ + inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_fop = &eventfs_file_operations; + inode->i_private = data; +@@ -306,10 +380,9 @@ create_dentry(struct eventfs_file *ef, s + inode_lock(parent->d_inode); + + if (ef->ei) +- dentry = create_dir(ef->name, parent, ef->data); ++ dentry = create_dir(ef, parent, ef->data); + else +- dentry = create_file(ef->name, ef->mode, parent, +- ef->data, ef->fop); ++ dentry = create_file(ef, parent, ef->data, ef->fop); + + if (!lookup) + inode_unlock(parent->d_inode); +@@ -475,6 +548,7 @@ static int dcache_dir_open_wrapper(struc + if (d) { + struct dentry **tmp; + ++ + tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + if (!tmp) + break; +@@ -549,13 +623,14 @@ static struct eventfs_file *eventfs_prep + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&ef->ei->e_top_files); ++ ef->mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + } else { + ef->ei = NULL; ++ ef->mode = mode; + } + + ef->iop = iop; + ef->fop = fop; +- ef->mode = mode; + ef->data = data; + return ef; + } diff --git a/queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch b/queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch new file mode 100644 index 00000000000..29407efa805 --- /dev/null +++ b/queue-6.6/eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch @@ -0,0 +1,198 @@ +From stable-owner@vger.kernel.org Sun Nov 5 17:01:45 2023 +From: Steven Rostedt +Date: Sun, 05 Nov 2023 10:56:35 -0500 +Subject:[PATCH 5/5] eventfs: Use simple_recursive_removal() to clean up dentries +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, +Cc: Masami Hiramatsu , Mark Rutland , Andrew Morton , Al Viro +Message-ID: <20231105160139.983291500@goodmis.org> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +commit 407c6726ca71b33330d2d6345d9ea7ebc02575e9 upstream + +Looking at how dentry is removed via the tracefs system, I found that +eventfs does not do everything that it did under tracefs. The tracefs +removal of a dentry calls simple_recursive_removal() that does a lot more +than a simple d_invalidate(). + +As it should be a requirement that any eventfs_inode that has a dentry, so +does its parent. When removing a eventfs_inode, if it has a dentry, a call +to simple_recursive_removal() on that dentry should clean up all the +dentries underneath it. + +Add WARN_ON_ONCE() to check for the parent having a dentry if any children +do. + +Link: https://lore.kernel.org/all/20231101022553.GE1957730@ZenIV/ +Link: https://lkml.kernel.org/r/20231101172650.552471568@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Al Viro +Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 71 +++++++++++++++++++++-------------------------- + 1 file changed, 33 insertions(+), 38 deletions(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -54,12 +54,10 @@ struct eventfs_file { + /* + * Union - used for deletion + * @llist: for calling dput() if needed after RCU +- * @del_list: list of eventfs_file to delete + * @rcu: eventfs_file to delete in RCU + */ + union { + struct llist_node llist; +- struct list_head del_list; + struct rcu_head rcu; + }; + void *data; +@@ -276,7 +274,6 @@ static void free_ef(struct eventfs_file + */ + void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry) + { +- struct tracefs_inode *ti_parent; + struct eventfs_inode *ei; + struct eventfs_file *ef; + +@@ -297,10 +294,6 @@ void eventfs_set_ef_status_free(struct t + + mutex_lock(&eventfs_mutex); + +- ti_parent = get_tracefs(dentry->d_parent->d_inode); +- if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE)) +- goto out; +- + ef = dentry->d_fsdata; + if (!ef) + goto out; +@@ -873,30 +866,29 @@ static void unhook_dentry(struct dentry + { + if (!dentry) + return; +- +- /* Keep the dentry from being freed yet (see eventfs_workfn()) */ ++ /* ++ * Need to add a reference to the dentry that is expected by ++ * simple_recursive_removal(), which will include a dput(). ++ */ + dget(dentry); + +- dentry->d_fsdata = NULL; +- d_invalidate(dentry); +- mutex_lock(&eventfs_mutex); +- /* dentry should now have at least a single reference */ +- WARN_ONCE((int)d_count(dentry) < 1, +- "dentry %px (%s) less than one reference (%d) after invalidate\n", +- dentry, dentry->d_name.name, d_count(dentry)); +- mutex_unlock(&eventfs_mutex); ++ /* ++ * Also add a reference for the dput() in eventfs_workfn(). ++ * That is required as that dput() will free the ei after ++ * the SRCU grace period is over. ++ */ ++ dget(dentry); + } + + /** + * eventfs_remove_rec - remove eventfs dir or file from list + * @ef: eventfs_file to be removed. +- * @head: to create list of eventfs_file to be deleted + * @level: to check recursion depth + * + * The helper function eventfs_remove_rec() is used to clean up and free the + * associated data from eventfs for both of the added functions. + */ +-static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level) ++static void eventfs_remove_rec(struct eventfs_file *ef, int level) + { + struct eventfs_file *ef_child; + +@@ -916,14 +908,16 @@ static void eventfs_remove_rec(struct ev + /* search for nested folders or files */ + list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, + lockdep_is_held(&eventfs_mutex)) { +- eventfs_remove_rec(ef_child, head, level + 1); ++ eventfs_remove_rec(ef_child, level + 1); + } + } + + ef->is_freed = 1; + ++ unhook_dentry(ef->dentry); ++ + list_del_rcu(&ef->list); +- list_add_tail(&ef->del_list, head); ++ call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef); + } + + /** +@@ -934,28 +928,22 @@ static void eventfs_remove_rec(struct ev + */ + void eventfs_remove(struct eventfs_file *ef) + { +- struct eventfs_file *tmp; +- LIST_HEAD(ef_del_list); ++ struct dentry *dentry; + + if (!ef) + return; + +- /* +- * Move the deleted eventfs_inodes onto the ei_del_list +- * which will also set the is_freed value. Note, this has to be +- * done under the eventfs_mutex, but the deletions of +- * the dentries must be done outside the eventfs_mutex. +- * Hence moving them to this temporary list. +- */ + mutex_lock(&eventfs_mutex); +- eventfs_remove_rec(ef, &ef_del_list, 0); ++ dentry = ef->dentry; ++ eventfs_remove_rec(ef, 0); + mutex_unlock(&eventfs_mutex); + +- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { +- unhook_dentry(ef->dentry); +- list_del(&ef->del_list); +- call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef); +- } ++ /* ++ * If any of the ei children has a dentry, then the ei itself ++ * must have a dentry. ++ */ ++ if (dentry) ++ simple_recursive_removal(dentry, NULL); + } + + /** +@@ -966,6 +954,8 @@ void eventfs_remove(struct eventfs_file + */ + void eventfs_remove_events_dir(struct dentry *dentry) + { ++ struct eventfs_file *ef_child; ++ struct eventfs_inode *ei; + struct tracefs_inode *ti; + + if (!dentry || !dentry->d_inode) +@@ -975,6 +965,11 @@ void eventfs_remove_events_dir(struct de + if (!ti || !(ti->flags & TRACEFS_EVENT_INODE)) + return; + +- d_invalidate(dentry); +- dput(dentry); ++ mutex_lock(&eventfs_mutex); ++ ei = ti->private; ++ list_for_each_entry_srcu(ef_child, &ei->e_top_files, list, ++ lockdep_is_held(&eventfs_mutex)) { ++ eventfs_remove_rec(ef_child, 0); ++ } ++ mutex_unlock(&eventfs_mutex); + } diff --git a/queue-6.6/series b/queue-6.6/series index 8e9e59a13e8..35359b39e62 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -1,3 +1,8 @@ drm-amd-display-don-t-use-fsleep-for-psr-exit-waits.patch power-supply-core-use-blocking_notifier_call_chain-t.patch perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch +tracing-have-trace_event_file-have-ref-counters.patch +eventfs-remove-is_freed-union-with-rcu-head.patch +eventfs-save-ownership-and-mode.patch +eventfs-delete-eventfs_inode-when-the-last-dentry-is-freed.patch +eventfs-use-simple_recursive_removal-to-clean-up-dentries.patch diff --git a/queue-6.6/tracing-have-trace_event_file-have-ref-counters.patch b/queue-6.6/tracing-have-trace_event_file-have-ref-counters.patch new file mode 100644 index 00000000000..b6e87aac5d6 --- /dev/null +++ b/queue-6.6/tracing-have-trace_event_file-have-ref-counters.patch @@ -0,0 +1,243 @@ +From stable-owner@vger.kernel.org Sun Nov 5 17:01:45 2023 +From: Steven Rostedt +Date: Sun, 05 Nov 2023 10:56:31 -0500 +Subject:[PATCH 1/5] tracing: Have trace_event_file have ref counters +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, +Cc: Masami Hiramatsu , Mark Rutland , Andrew Morton , Beau Belgrave +Message-ID: <20231105160139.346174799@goodmis.org> + +From: Steven Rostedt + +From: "Steven Rostedt (Google)" + +commit bb32500fb9b78215e4ef6ee8b4345c5f5d7eafb4 upstream + +The following can crash the kernel: + + # cd /sys/kernel/tracing + # echo 'p:sched schedule' > kprobe_events + # exec 5>>events/kprobes/sched/enable + # > kprobe_events + # exec 5>&- + +The above commands: + + 1. Change directory to the tracefs directory + 2. Create a kprobe event (doesn't matter what one) + 3. Open bash file descriptor 5 on the enable file of the kprobe event + 4. Delete the kprobe event (removes the files too) + 5. Close the bash file descriptor 5 + +The above causes a crash! + + BUG: kernel NULL pointer dereference, address: 0000000000000028 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP PTI + CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty #186 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 + RIP: 0010:tracing_release_file_tr+0xc/0x50 + +What happens here is that the kprobe event creates a trace_event_file +"file" descriptor that represents the file in tracefs to the event. It +maintains state of the event (is it enabled for the given instance?). +Opening the "enable" file gets a reference to the event "file" descriptor +via the open file descriptor. When the kprobe event is deleted, the file is +also deleted from the tracefs system which also frees the event "file" +descriptor. + +But as the tracefs file is still opened by user space, it will not be +totally removed until the final dput() is called on it. But this is not +true with the event "file" descriptor that is already freed. If the user +does a write to or simply closes the file descriptor it will reference the +event "file" descriptor that was just freed, causing a use-after-free bug. + +To solve this, add a ref count to the event "file" descriptor as well as a +new flag called "FREED". The "file" will not be freed until the last +reference is released. But the FREE flag will be set when the event is +removed to prevent any more modifications to that event from happening, +even if there's still a reference to the event "file" descriptor. + +Link: https://lore.kernel.org/linux-trace-kernel/20231031000031.1e705592@gandalf.local.home/ +Link: https://lore.kernel.org/linux-trace-kernel/20231031122453.7a48b923@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Mark Rutland +Fixes: f5ca233e2e66d ("tracing: Increase trace array ref count on enable and filter files") +Reported-by: Beau Belgrave +Tested-by: Beau Belgrave +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/trace_events.h | 4 ++++ + kernel/trace/trace.c | 15 +++++++++++++++ + kernel/trace/trace.h | 3 +++ + kernel/trace/trace_events.c | 31 +++++++++++++++++++++++++++---- + kernel/trace/trace_events_filter.c | 3 +++ + 5 files changed, 52 insertions(+), 4 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -492,6 +492,7 @@ enum { + EVENT_FILE_FL_TRIGGER_COND_BIT, + EVENT_FILE_FL_PID_FILTER_BIT, + EVENT_FILE_FL_WAS_ENABLED_BIT, ++ EVENT_FILE_FL_FREED_BIT, + }; + + extern struct trace_event_file *trace_get_event_file(const char *instance, +@@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(str + * TRIGGER_COND - When set, one or more triggers has an associated filter + * PID_FILTER - When set, the event is filtered based on pid + * WAS_ENABLED - Set when enabled to know to clear trace on module removal ++ * FREED - File descriptor is freed, all fields should be considered invalid + */ + enum { + EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), +@@ -643,6 +645,7 @@ enum { + EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), + EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), + EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), ++ EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), + }; + + struct trace_event_file { +@@ -671,6 +674,7 @@ struct trace_event_file { + * caching and such. Which is mostly OK ;-) + */ + unsigned long flags; ++ atomic_t ref; /* ref count for opened files */ + atomic_t sm_ref; /* soft-mode reference counter */ + atomic_t tm_ref; /* trigger-mode reference counter */ + }; +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4986,6 +4986,20 @@ int tracing_open_file_tr(struct inode *i + if (ret) + return ret; + ++ mutex_lock(&event_mutex); ++ ++ /* Fail if the file is marked for removal */ ++ if (file->flags & EVENT_FILE_FL_FREED) { ++ trace_array_put(file->tr); ++ ret = -ENODEV; ++ } else { ++ event_file_get(file); ++ } ++ ++ mutex_unlock(&event_mutex); ++ if (ret) ++ return ret; ++ + filp->private_data = inode->i_private; + + return 0; +@@ -4996,6 +5010,7 @@ int tracing_release_file_tr(struct inode + struct trace_event_file *file = inode->i_private; + + trace_array_put(file->tr); ++ event_file_put(file); + + return 0; + } +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -1664,6 +1664,9 @@ extern void event_trigger_unregister(str + char *glob, + struct event_trigger_data *trigger_data); + ++extern void event_file_get(struct trace_event_file *file); ++extern void event_file_put(struct trace_event_file *file); ++ + /** + * struct event_trigger_ops - callbacks for trace event triggers + * +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -990,13 +990,35 @@ static void remove_subsystem(struct trac + } + } + ++void event_file_get(struct trace_event_file *file) ++{ ++ atomic_inc(&file->ref); ++} ++ ++void event_file_put(struct trace_event_file *file) ++{ ++ if (WARN_ON_ONCE(!atomic_read(&file->ref))) { ++ if (file->flags & EVENT_FILE_FL_FREED) ++ kmem_cache_free(file_cachep, file); ++ return; ++ } ++ ++ if (atomic_dec_and_test(&file->ref)) { ++ /* Count should only go to zero when it is freed */ ++ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) ++ return; ++ kmem_cache_free(file_cachep, file); ++ } ++} ++ + static void remove_event_file_dir(struct trace_event_file *file) + { + eventfs_remove(file->ef); + list_del(&file->list); + remove_subsystem(file->system); + free_event_filter(file->filter); +- kmem_cache_free(file_cachep, file); ++ file->flags |= EVENT_FILE_FL_FREED; ++ event_file_put(file); + } + + /* +@@ -1369,7 +1391,7 @@ event_enable_read(struct file *filp, cha + flags = file->flags; + mutex_unlock(&event_mutex); + +- if (!file) ++ if (!file || flags & EVENT_FILE_FL_FREED) + return -ENODEV; + + if (flags & EVENT_FILE_FL_ENABLED && +@@ -1407,7 +1429,7 @@ event_enable_write(struct file *filp, co + ret = -ENODEV; + mutex_lock(&event_mutex); + file = event_file_data(filp); +- if (likely(file)) ++ if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) + ret = ftrace_event_enable_disable(file, val); + mutex_unlock(&event_mutex); + break; +@@ -1681,7 +1703,7 @@ event_filter_read(struct file *filp, cha + + mutex_lock(&event_mutex); + file = event_file_data(filp); +- if (file) ++ if (file && !(file->flags & EVENT_FILE_FL_FREED)) + print_event_filter(file, s); + mutex_unlock(&event_mutex); + +@@ -2803,6 +2825,7 @@ trace_create_new_event(struct trace_even + atomic_set(&file->tm_ref, 0); + INIT_LIST_HEAD(&file->triggers); + list_add(&file->list, &tr->events); ++ event_file_get(file); + + return file; + } +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -2349,6 +2349,9 @@ int apply_event_filter(struct trace_even + struct event_filter *filter = NULL; + int err; + ++ if (file->flags & EVENT_FILE_FL_FREED) ++ return -ENODEV; ++ + if (!strcmp(strstrip(filter_string), "0")) { + filter_disable(file); + filter = event_filter(file);