From: Greg Kroah-Hartman Date: Thu, 13 Jun 2024 11:03:07 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v4.19.316~31 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4c18fd1ff69134f0f0cf038e3154ac5c45cda99a;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: alsa-seq-fix-incorrect-ump-type-for-system-messages.patch btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch edac-amd64-convert-pcibios_-return-codes-to-errnos.patch edac-igen6-convert-pcibios_-return-codes-to-errnos.patch eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch nfs-fix-undefined-behavior-in-nfs_block_bits.patch powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch --- diff --git a/queue-6.6/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch b/queue-6.6/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch new file mode 100644 index 00000000000..1ed656e762f --- /dev/null +++ b/queue-6.6/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch @@ -0,0 +1,40 @@ +From edb32776196afa393c074d6a2733e3a69e66b299 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 29 May 2024 10:37:59 +0200 +Subject: ALSA: seq: Fix incorrect UMP type for system messages + +From: Takashi Iwai + +commit edb32776196afa393c074d6a2733e3a69e66b299 upstream. + +When converting a legacy system message to a UMP packet, it forgot to +modify the UMP type field but keeping the default type (either type 2 +or 4). Correct to the right type for system messages. + +Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") +Cc: +Link: https://lore.kernel.org/r/20240529083800.5742-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/core/seq/seq_ump_convert.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/sound/core/seq/seq_ump_convert.c ++++ b/sound/core/seq/seq_ump_convert.c +@@ -740,6 +740,7 @@ static int system_1p_ev_to_ump_midi1(con + union snd_ump_midi1_msg *data, + unsigned char status) + { ++ data->system.type = UMP_MSG_TYPE_SYSTEM; // override + data->system.status = status; + data->system.parm1 = event->data.control.value & 0x7f; + return 1; +@@ -751,6 +752,7 @@ static int system_2p_ev_to_ump_midi1(con + union snd_ump_midi1_msg *data, + unsigned char status) + { ++ data->system.type = UMP_MSG_TYPE_SYSTEM; // override + data->system.status = status; + data->system.parm1 = event->data.control.value & 0x7f; + data->system.parm2 = (event->data.control.value >> 7) & 0x7f; diff --git a/queue-6.6/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch b/queue-6.6/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch new file mode 100644 index 00000000000..2fb7994d903 --- /dev/null +++ b/queue-6.6/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch @@ -0,0 +1,218 @@ +From 9d274c19a71b3a276949933859610721a453946b Mon Sep 17 00:00:00 2001 +From: Omar Sandoval +Date: Fri, 24 May 2024 13:58:11 -0700 +Subject: btrfs: fix crash on racing fsync and size-extending write into prealloc + +From: Omar Sandoval + +commit 9d274c19a71b3a276949933859610721a453946b upstream. + +We have been seeing crashes on duplicate keys in +btrfs_set_item_key_safe(): + + BTRFS critical (device vdb): slot 4 key (450 108 8192) new key (450 108 8192) + ------------[ cut here ]------------ + kernel BUG at fs/btrfs/ctree.c:2620! + invalid opcode: 0000 [#1] PREEMPT SMP PTI + CPU: 0 PID: 3139 Comm: xfs_io Kdump: loaded Not tainted 6.9.0 #6 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 + RIP: 0010:btrfs_set_item_key_safe+0x11f/0x290 [btrfs] + +With the following stack trace: + + #0 btrfs_set_item_key_safe (fs/btrfs/ctree.c:2620:4) + #1 btrfs_drop_extents (fs/btrfs/file.c:411:4) + #2 log_one_extent (fs/btrfs/tree-log.c:4732:9) + #3 btrfs_log_changed_extents (fs/btrfs/tree-log.c:4955:9) + #4 btrfs_log_inode (fs/btrfs/tree-log.c:6626:9) + #5 btrfs_log_inode_parent (fs/btrfs/tree-log.c:7070:8) + #6 btrfs_log_dentry_safe (fs/btrfs/tree-log.c:7171:8) + #7 btrfs_sync_file (fs/btrfs/file.c:1933:8) + #8 vfs_fsync_range (fs/sync.c:188:9) + #9 vfs_fsync (fs/sync.c:202:9) + #10 do_fsync (fs/sync.c:212:9) + #11 __do_sys_fdatasync (fs/sync.c:225:9) + #12 __se_sys_fdatasync (fs/sync.c:223:1) + #13 __x64_sys_fdatasync (fs/sync.c:223:1) + #14 do_syscall_x64 (arch/x86/entry/common.c:52:14) + #15 do_syscall_64 (arch/x86/entry/common.c:83:7) + #16 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121) + +So we're logging a changed extent from fsync, which is splitting an +extent in the log tree. But this split part already exists in the tree, +triggering the BUG(). + +This is the state of the log tree at the time of the crash, dumped with +drgn (https://github.com/osandov/drgn/blob/main/contrib/btrfs_tree.py) +to get more details than btrfs_print_leaf() gives us: + + >>> print_extent_buffer(prog.crashed_thread().stack_trace()[0]["eb"]) + leaf 33439744 level 0 items 72 generation 9 owner 18446744073709551610 + leaf 33439744 flags 0x100000000000000 + fs uuid e5bd3946-400c-4223-8923-190ef1f18677 + chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da + item 0 key (450 INODE_ITEM 0) itemoff 16123 itemsize 160 + generation 7 transid 9 size 8192 nbytes 8473563889606862198 + block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 + sequence 204 flags 0x10(PREALLOC) + atime 1716417703.220000000 (2024-05-22 15:41:43) + ctime 1716417704.983333333 (2024-05-22 15:41:44) + mtime 1716417704.983333333 (2024-05-22 15:41:44) + otime 17592186044416.000000000 (559444-03-08 01:40:16) + item 1 key (450 INODE_REF 256) itemoff 16110 itemsize 13 + index 195 namelen 3 name: 193 + item 2 key (450 XATTR_ITEM 1640047104) itemoff 16073 itemsize 37 + location key (0 UNKNOWN.0 0) type XATTR + transid 7 data_len 1 name_len 6 + name: user.a + data a + item 3 key (450 EXTENT_DATA 0) itemoff 16020 itemsize 53 + generation 9 type 1 (regular) + extent data disk byte 303144960 nr 12288 + extent data offset 0 nr 4096 ram 12288 + extent compression 0 (none) + item 4 key (450 EXTENT_DATA 4096) itemoff 15967 itemsize 53 + generation 9 type 2 (prealloc) + prealloc data disk byte 303144960 nr 12288 + prealloc data offset 4096 nr 8192 + item 5 key (450 EXTENT_DATA 8192) itemoff 15914 itemsize 53 + generation 9 type 2 (prealloc) + prealloc data disk byte 303144960 nr 12288 + prealloc data offset 8192 nr 4096 + ... + +So the real problem happened earlier: notice that items 4 (4k-12k) and 5 +(8k-12k) overlap. Both are prealloc extents. Item 4 straddles i_size and +item 5 starts at i_size. + +Here is the state of the filesystem tree at the time of the crash: + + >>> root = prog.crashed_thread().stack_trace()[2]["inode"].root + >>> ret, nodes, slots = btrfs_search_slot(root, BtrfsKey(450, 0, 0)) + >>> print_extent_buffer(nodes[0]) + leaf 30425088 level 0 items 184 generation 9 owner 5 + leaf 30425088 flags 0x100000000000000 + fs uuid e5bd3946-400c-4223-8923-190ef1f18677 + chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da + ... + item 179 key (450 INODE_ITEM 0) itemoff 4907 itemsize 160 + generation 7 transid 7 size 4096 nbytes 12288 + block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 + sequence 6 flags 0x10(PREALLOC) + atime 1716417703.220000000 (2024-05-22 15:41:43) + ctime 1716417703.220000000 (2024-05-22 15:41:43) + mtime 1716417703.220000000 (2024-05-22 15:41:43) + otime 1716417703.220000000 (2024-05-22 15:41:43) + item 180 key (450 INODE_REF 256) itemoff 4894 itemsize 13 + index 195 namelen 3 name: 193 + item 181 key (450 XATTR_ITEM 1640047104) itemoff 4857 itemsize 37 + location key (0 UNKNOWN.0 0) type XATTR + transid 7 data_len 1 name_len 6 + name: user.a + data a + item 182 key (450 EXTENT_DATA 0) itemoff 4804 itemsize 53 + generation 9 type 1 (regular) + extent data disk byte 303144960 nr 12288 + extent data offset 0 nr 8192 ram 12288 + extent compression 0 (none) + item 183 key (450 EXTENT_DATA 8192) itemoff 4751 itemsize 53 + generation 9 type 2 (prealloc) + prealloc data disk byte 303144960 nr 12288 + prealloc data offset 8192 nr 4096 + +Item 5 in the log tree corresponds to item 183 in the filesystem tree, +but nothing matches item 4. Furthermore, item 183 is the last item in +the leaf. + +btrfs_log_prealloc_extents() is responsible for logging prealloc extents +beyond i_size. It first truncates any previously logged prealloc extents +that start beyond i_size. Then, it walks the filesystem tree and copies +the prealloc extent items to the log tree. + +If it hits the end of a leaf, then it calls btrfs_next_leaf(), which +unlocks the tree and does another search. However, while the filesystem +tree is unlocked, an ordered extent completion may modify the tree. In +particular, it may insert an extent item that overlaps with an extent +item that was already copied to the log tree. + +This may manifest in several ways depending on the exact scenario, +including an EEXIST error that is silently translated to a full sync, +overlapping items in the log tree, or this crash. This particular crash +is triggered by the following sequence of events: + +- Initially, the file has i_size=4k, a regular extent from 0-4k, and a + prealloc extent beyond i_size from 4k-12k. The prealloc extent item is + the last item in its B-tree leaf. +- The file is fsync'd, which copies its inode item and both extent items + to the log tree. +- An xattr is set on the file, which sets the + BTRFS_INODE_COPY_EVERYTHING flag. +- The range 4k-8k in the file is written using direct I/O. i_size is + extended to 8k, but the ordered extent is still in flight. +- The file is fsync'd. Since BTRFS_INODE_COPY_EVERYTHING is set, this + calls copy_inode_items_to_log(), which calls + btrfs_log_prealloc_extents(). +- btrfs_log_prealloc_extents() finds the 4k-12k prealloc extent in the + filesystem tree. Since it starts before i_size, it skips it. Since it + is the last item in its B-tree leaf, it calls btrfs_next_leaf(). +- btrfs_next_leaf() unlocks the path. +- The ordered extent completion runs, which converts the 4k-8k part of + the prealloc extent to written and inserts the remaining prealloc part + from 8k-12k. +- btrfs_next_leaf() does a search and finds the new prealloc extent + 8k-12k. +- btrfs_log_prealloc_extents() copies the 8k-12k prealloc extent into + the log tree. Note that it overlaps with the 4k-12k prealloc extent + that was copied to the log tree by the first fsync. +- fsync calls btrfs_log_changed_extents(), which tries to log the 4k-8k + extent that was written. +- This tries to drop the range 4k-8k in the log tree, which requires + adjusting the start of the 4k-12k prealloc extent in the log tree to + 8k. +- btrfs_set_item_key_safe() sees that there is already an extent + starting at 8k in the log tree and calls BUG(). + +Fix this by detecting when we're about to insert an overlapping file +extent item in the log tree and truncating the part that would overlap. + +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Filipe Manana +Signed-off-by: Omar Sandoval +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-log.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4800,18 +4800,23 @@ static int btrfs_log_prealloc_extents(st + path->slots[0]++; + continue; + } +- if (!dropped_extents) { +- /* +- * Avoid logging extent items logged in past fsync calls +- * and leading to duplicate keys in the log tree. +- */ ++ /* ++ * Avoid overlapping items in the log tree. The first time we ++ * get here, get rid of everything from a past fsync. After ++ * that, if the current extent starts before the end of the last ++ * extent we copied, truncate the last one. This can happen if ++ * an ordered extent completion modifies the subvolume tree ++ * while btrfs_next_leaf() has the tree unlocked. ++ */ ++ if (!dropped_extents || key.offset < truncate_offset) { + ret = truncate_inode_items(trans, root->log_root, inode, +- truncate_offset, ++ min(key.offset, truncate_offset), + BTRFS_EXTENT_DATA_KEY); + if (ret) + goto out; + dropped_extents = true; + } ++ truncate_offset = btrfs_file_extent_end(path); + if (ins_nr == 0) + start_slot = slot; + ins_nr++; diff --git a/queue-6.6/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch b/queue-6.6/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch new file mode 100644 index 00000000000..539a5b27c20 --- /dev/null +++ b/queue-6.6/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch @@ -0,0 +1,60 @@ +From fb33eb2ef0d88e75564983ef057b44c5b7e4fded Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 3 Jun 2024 12:49:08 +0100 +Subject: btrfs: fix leak of qgroup extent records after transaction abort + +From: Filipe Manana + +commit fb33eb2ef0d88e75564983ef057b44c5b7e4fded upstream. + +Qgroup extent records are created when delayed ref heads are created and +then released after accounting extents at btrfs_qgroup_account_extents(), +called during the transaction commit path. + +If a transaction is aborted we free the qgroup records by calling +btrfs_qgroup_destroy_extent_records() at btrfs_destroy_delayed_refs(), +unless we don't have delayed references. We are incorrectly assuming +that no delayed references means we don't have qgroup extents records. + +We can currently have no delayed references because we ran them all +during a transaction commit and the transaction was aborted after that +due to some error in the commit path. + +So fix this by ensuring we btrfs_qgroup_destroy_extent_records() at +btrfs_destroy_delayed_refs() even if we don't have any delayed references. + +Reported-by: syzbot+0fecc032fa134afd49df@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/0000000000004e7f980619f91835@google.com/ +Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Josef Bacik +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4594,18 +4594,10 @@ static void btrfs_destroy_delayed_refs(s + struct btrfs_fs_info *fs_info) + { + struct rb_node *node; +- struct btrfs_delayed_ref_root *delayed_refs; ++ struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs; + struct btrfs_delayed_ref_node *ref; + +- delayed_refs = &trans->delayed_refs; +- + spin_lock(&delayed_refs->lock); +- if (atomic_read(&delayed_refs->num_entries) == 0) { +- spin_unlock(&delayed_refs->lock); +- btrfs_debug(fs_info, "delayed_refs has NO entry"); +- return; +- } +- + while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { + struct btrfs_delayed_ref_head *head; + struct rb_node *n; diff --git a/queue-6.6/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch b/queue-6.6/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch new file mode 100644 index 00000000000..1ce2c12c8be --- /dev/null +++ b/queue-6.6/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch @@ -0,0 +1,65 @@ +From 3ec8ebd8a5b782d56347ae884de880af26f93996 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= +Date: Mon, 27 May 2024 16:22:34 +0300 +Subject: EDAC/amd64: Convert PCIBIOS_* return codes to errnos +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +commit 3ec8ebd8a5b782d56347ae884de880af26f93996 upstream. + +gpu_get_node_map() uses pci_read_config_dword() that returns PCIBIOS_* +codes. The return code is then returned all the way into the module +init function amd64_edac_init() that returns it as is. The module init +functions, however, should return normal errnos. + +Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal +errno before returning it from gpu_get_node_map(). + +For consistency, convert also the other similar cases which return +PCIBIOS_* codes even if they do not have any bugs at the moment. + +Fixes: 4251566ebc1c ("EDAC/amd64: Cache and use GPU node map") +Signed-off-by: Ilpo Järvinen +Signed-off-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240527132236.13875-1-ilpo.jarvinen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/edac/amd64_edac.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/edac/amd64_edac.c ++++ b/drivers/edac/amd64_edac.c +@@ -80,7 +80,7 @@ int __amd64_read_pci_cfg_dword(struct pc + amd64_warn("%s: error reading F%dx%03x.\n", + func, PCI_FUNC(pdev->devfn), offset); + +- return err; ++ return pcibios_err_to_errno(err); + } + + int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, +@@ -93,7 +93,7 @@ int __amd64_write_pci_cfg_dword(struct p + amd64_warn("%s: error writing to F%dx%03x.\n", + func, PCI_FUNC(pdev->devfn), offset); + +- return err; ++ return pcibios_err_to_errno(err); + } + + /* +@@ -1016,8 +1016,10 @@ static int gpu_get_node_map(void) + } + + ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); +- if (ret) ++ if (ret) { ++ ret = pcibios_err_to_errno(ret); + goto out; ++ } + + gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp); + gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp); diff --git a/queue-6.6/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch b/queue-6.6/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch new file mode 100644 index 00000000000..475d77868ca --- /dev/null +++ b/queue-6.6/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch @@ -0,0 +1,51 @@ +From f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= +Date: Mon, 27 May 2024 16:22:35 +0300 +Subject: EDAC/igen6: Convert PCIBIOS_* return codes to errnos +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +commit f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 upstream. + +errcmd_enable_error_reporting() uses pci_{read,write}_config_word() +that return PCIBIOS_* codes. The return code is then returned all the +way into the probe function igen6_probe() that returns it as is. The +probe functions, however, should return normal errnos. + +Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal +errno before returning it from errcmd_enable_error_reporting(). + +Fixes: 10590a9d4f23 ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC") +Signed-off-by: Ilpo Järvinen +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Qiuxu Zhuo +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240527132236.13875-2-ilpo.jarvinen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/edac/igen6_edac.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/edac/igen6_edac.c ++++ b/drivers/edac/igen6_edac.c +@@ -627,7 +627,7 @@ static int errcmd_enable_error_reporting + + rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); + if (rc) +- return rc; ++ return pcibios_err_to_errno(rc); + + if (enable) + errcmd |= ERRCMD_CE | ERRSTS_UE; +@@ -636,7 +636,7 @@ static int errcmd_enable_error_reporting + + rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); + if (rc) +- return rc; ++ return pcibios_err_to_errno(rc); + + return 0; + } diff --git a/queue-6.6/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch b/queue-6.6/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch new file mode 100644 index 00000000000..ac05929e66f --- /dev/null +++ b/queue-6.6/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch @@ -0,0 +1,47 @@ +From d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 Mon Sep 17 00:00:00 2001 +From: Hao Ge +Date: Mon, 13 May 2024 13:33:38 +0800 +Subject: eventfs: Fix a possible null pointer dereference in eventfs_find_events() + +From: Hao Ge + +commit d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 upstream. + +In function eventfs_find_events,there is a potential null pointer +that may be caused by calling update_events_attr which will perform +some operations on the members of the ei struct when ei is NULL. + +Hence,When ei->is_freed is set,return NULL directly. + +Link: https://lore.kernel.org/linux-trace-kernel/20240513053338.63017-1-hao.ge@linux.dev + +Cc: stable@vger.kernel.org +Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership") +Signed-off-by: Hao Ge +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c +index a878cea70f4c..0256afdd4acf 100644 +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -345,10 +345,9 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) + * If the ei is being freed, the ownership of the children + * doesn't matter. + */ +- if (ei->is_freed) { +- ei = NULL; +- break; +- } ++ if (ei->is_freed) ++ return NULL; ++ + // Walk upwards until you find the events inode + } while (!ei->is_events); + +-- +2.45.2 + diff --git a/queue-6.6/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch b/queue-6.6/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch new file mode 100644 index 00000000000..f297499f8b9 --- /dev/null +++ b/queue-6.6/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch @@ -0,0 +1,44 @@ +From 8898e7f288c47d450a3cf1511c791a03550c0789 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Thu, 23 May 2024 01:14:26 -0400 +Subject: eventfs: Keep the directories from having the same inode number as files + +From: Steven Rostedt (Google) + +commit 8898e7f288c47d450a3cf1511c791a03550c0789 upstream. + +The directories require unique inode numbers but all the eventfs files +have the same inode number. Prevent the directories from having the same +inode numbers as the files as that can confuse some tooling. + +Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.428826685@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Masahiro Yamada +Fixes: 834bf76add3e6 ("eventfs: Save directory inodes in the eventfs_inode structure") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -50,8 +50,12 @@ static struct eventfs_root_inode *get_ro + /* Just try to make something consistent and unique */ + static int eventfs_dir_ino(struct eventfs_inode *ei) + { +- if (!ei->ino) ++ if (!ei->ino) { + ei->ino = get_next_ino(); ++ /* Must not have the file inode number */ ++ if (ei->ino == EVENTFS_FILE_INODE_INO) ++ ei->ino = get_next_ino(); ++ } + + return ei->ino; + } diff --git a/queue-6.6/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch b/queue-6.6/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch new file mode 100644 index 00000000000..83b509ba8b3 --- /dev/null +++ b/queue-6.6/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch @@ -0,0 +1,41 @@ +From f06d1b10cb016d5aaecdb1804fefca025387bd10 Mon Sep 17 00:00:00 2001 +From: Anna Schumaker +Date: Thu, 25 Apr 2024 16:24:29 -0400 +Subject: NFS: Fix READ_PLUS when server doesn't support OP_READ_PLUS + +From: Anna Schumaker + +commit f06d1b10cb016d5aaecdb1804fefca025387bd10 upstream. + +Olga showed me a case where the client was sending multiple READ_PLUS +calls to the server in parallel, and the server replied +NFS4ERR_OPNOTSUPP to each. The client would fall back to READ for the +first reply, but fail to retry the other calls. + +I fix this by removing the test for NFS_CAP_READ_PLUS in +nfs4_read_plus_not_supported(). This allows us to reschedule any +READ_PLUS call that has a NFS4ERR_OPNOTSUPP return value, even after the +capability has been cleared. + +Reported-by: Olga Kornievskaia +Fixes: c567552612ec ("NFS: Add READ_PLUS data segment support") +Cc: stable@vger.kernel.org # v5.10+ +Signed-off-by: Anna Schumaker +Reviewed-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/nfs4proc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -5435,7 +5435,7 @@ static bool nfs4_read_plus_not_supported + struct rpc_message *msg = &task->tk_msg; + + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && +- server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { ++ task->tk_status == -ENOTSUPP) { + server->caps &= ~NFS_CAP_READ_PLUS; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + rpc_restart_call_prepare(task); diff --git a/queue-6.6/nfs-fix-undefined-behavior-in-nfs_block_bits.patch b/queue-6.6/nfs-fix-undefined-behavior-in-nfs_block_bits.patch new file mode 100644 index 00000000000..4afc984e286 --- /dev/null +++ b/queue-6.6/nfs-fix-undefined-behavior-in-nfs_block_bits.patch @@ -0,0 +1,38 @@ +From 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 Mon Sep 17 00:00:00 2001 +From: Sergey Shtylyov +Date: Fri, 10 May 2024 23:24:04 +0300 +Subject: nfs: fix undefined behavior in nfs_block_bits() + +From: Sergey Shtylyov + +commit 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 upstream. + +Shifting *signed int* typed constant 1 left by 31 bits causes undefined +behavior. Specify the correct *unsigned long* type by using 1UL instead. + +Found by Linux Verification Center (linuxtesting.org) with the Svace static +analysis tool. + +Cc: stable@vger.kernel.org +Signed-off-by: Sergey Shtylyov +Reviewed-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/internal.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -710,9 +710,9 @@ unsigned long nfs_block_bits(unsigned lo + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned char nrbits; + +- for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) ++ for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) + ; +- bsize = 1 << nrbits; ++ bsize = 1UL << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + } diff --git a/queue-6.6/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch b/queue-6.6/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch new file mode 100644 index 00000000000..0ab77fd12c7 --- /dev/null +++ b/queue-6.6/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch @@ -0,0 +1,109 @@ +From 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d Mon Sep 17 00:00:00 2001 +From: Hari Bathini +Date: Thu, 2 May 2024 23:02:04 +0530 +Subject: powerpc/64/bpf: fix tail calls for PCREL addressing + +From: Hari Bathini + +commit 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d upstream. + +With PCREL addressing, there is no kernel TOC. So, it is not setup in +prologue when PCREL addressing is used. But the number of instructions +to skip on a tail call was not adjusted accordingly. That resulted in +not so obvious failures while using tailcalls. 'tailcalls' selftest +crashed the system with the below call trace: + + bpf_test_run+0xe8/0x3cc (unreliable) + bpf_prog_test_run_skb+0x348/0x778 + __sys_bpf+0xb04/0x2b00 + sys_bpf+0x28/0x38 + system_call_exception+0x168/0x340 + system_call_vectored_common+0x15c/0x2ec + +Also, as bpf programs are always module addresses and a bpf helper in +general is a core kernel text address, using PC relative addressing +often fails with "out of range of pcrel address" error. Switch to +using kernel base for relative addressing to handle this better. + +Fixes: 7e3a68be42e1 ("powerpc/64: vmlinux support building with PCREL addresing") +Cc: stable@vger.kernel.org # v6.4+ +Signed-off-by: Hari Bathini +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240502173205.142794-1-hbathini@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit_comp64.c | 30 ++++++++++++++++-------------- + 1 file changed, 16 insertions(+), 14 deletions(-) + +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -202,7 +202,8 @@ void bpf_jit_build_epilogue(u32 *image, + EMIT(PPC_RAW_BLR()); + } + +-static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) ++static int ++bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) + { + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; +@@ -211,19 +212,20 @@ static int bpf_jit_emit_func_call_hlp(u3 + return -EINVAL; + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { +- reladdr = func_addr - CTX_NIA(ctx); ++ reladdr = func_addr - local_paca->kernelbase; + + if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { +- pr_err("eBPF: address of %ps out of range of pcrel address.\n", +- (void *)func); ++ pr_err("eBPF: address of %ps out of range of 34-bit relative address.\n", ++ (void *)func); + return -ERANGE; + } +- /* pla r12,addr */ +- EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr)); +- EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr)); +- EMIT(PPC_RAW_MTCTR(_R12)); +- EMIT(PPC_RAW_BCTR()); +- ++ EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase))); ++ /* Align for subsequent prefix instruction */ ++ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8)) ++ EMIT(PPC_RAW_NOP()); ++ /* paddi r12,r12,addr */ ++ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr)); ++ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr)); + } else { + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { +@@ -233,9 +235,9 @@ static int bpf_jit_emit_func_call_hlp(u3 + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); +- EMIT(PPC_RAW_MTCTR(_R12)); +- EMIT(PPC_RAW_BCTRL()); + } ++ EMIT(PPC_RAW_MTCTR(_R12)); ++ EMIT(PPC_RAW_BCTRL()); + + return 0; + } +@@ -285,7 +287,7 @@ static int bpf_jit_emit_tail_call(u32 *i + int b2p_index = bpf_to_ppc(BPF_REG_3); + int bpf_tailcall_prologue_size = 8; + +- if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) ++ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ + + /* +@@ -993,7 +995,7 @@ emit_clear: + return ret; + + if (func_addr_fixed) +- ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); ++ ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr); + else + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + diff --git a/queue-6.6/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch b/queue-6.6/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch new file mode 100644 index 00000000000..dd57eca5b49 --- /dev/null +++ b/queue-6.6/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch @@ -0,0 +1,138 @@ +From b1e7cee96127468c2483cf10c2899c9b5cf79bf8 Mon Sep 17 00:00:00 2001 +From: Puranjay Mohan +Date: Mon, 13 May 2024 10:02:48 +0000 +Subject: powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH + +From: Puranjay Mohan + +commit b1e7cee96127468c2483cf10c2899c9b5cf79bf8 upstream. + +The Linux Kernel Memory Model [1][2] requires RMW operations that have a +return value to be fully ordered. + +BPF atomic operations with BPF_FETCH (including BPF_XCHG and +BPF_CMPXCHG) return a value back so they need to be JITed to fully +ordered operations. POWERPC currently emits relaxed operations for +these. + +We can show this by running the following litmus-test: + + PPC SB+atomic_add+fetch + + { + 0:r0=x; (* dst reg assuming offset is 0 *) + 0:r1=2; (* src reg *) + 0:r2=1; + 0:r4=y; (* P0 writes to this, P1 reads this *) + 0:r5=z; (* P1 writes to this, P0 reads this *) + 0:r6=0; + + 1:r2=1; + 1:r4=y; + 1:r5=z; + } + + P0 | P1 ; + stw r2, 0(r4) | stw r2,0(r5) ; + | ; + loop:lwarx r3, r6, r0 | ; + mr r8, r3 | ; + add r3, r3, r1 | sync ; + stwcx. r3, r6, r0 | ; + bne loop | ; + mr r1, r8 | ; + | ; + lwa r7, 0(r5) | lwa r7,0(r4) ; + + ~exists(0:r7=0 /\ 1:r7=0) + + Witnesses + Positive: 9 Negative: 3 + Condition ~exists (0:r7=0 /\ 1:r7=0) + Observation SB+atomic_add+fetch Sometimes 3 9 + +This test shows that the older store in P0 is reordered with a newer +load to a different address. Although there is a RMW operation with +fetch between them. Adding a sync before and after RMW fixes the issue: + + Witnesses + Positive: 9 Negative: 0 + Condition ~exists (0:r7=0 /\ 1:r7=0) + Observation SB+atomic_add+fetch Never 0 9 + +[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt +[2] https://www.kernel.org/doc/Documentation/atomic_t.txt + +Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations") +Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg") +Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations") +Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg") +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Puranjay Mohan +Reviewed-by: Christophe Leroy +Reviewed-by: Naveen N Rao +Acked-by: Paul E. McKenney +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240513100248.110535-1-puranjay@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++ + arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++ + 2 files changed, 24 insertions(+) + +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -851,6 +851,15 @@ int bpf_jit_build_body(struct bpf_prog * + + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); ++ /* ++ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync' ++ * before and after the operation. ++ * ++ * This is a requirement in the Linux Kernel Memory Model. ++ * See __cmpxchg_u32() in asm/cmpxchg.h as an example. ++ */ ++ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP)) ++ EMIT(PPC_RAW_SYNC()); + tmp_idx = ctx->idx * 4; + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); +@@ -904,6 +913,9 @@ int bpf_jit_build_body(struct bpf_prog * + + /* For the BPF_FETCH variant, get old data into src_reg */ + if (imm & BPF_FETCH) { ++ /* Emit 'sync' to enforce full ordering */ ++ if (IS_ENABLED(CONFIG_SMP)) ++ EMIT(PPC_RAW_SYNC()); + EMIT(PPC_RAW_MR(ret_reg, ax_reg)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */ +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -805,6 +805,15 @@ emit_clear: + + /* Get offset into TMP_REG_1 */ + EMIT(PPC_RAW_LI(tmp1_reg, off)); ++ /* ++ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync' ++ * before and after the operation. ++ * ++ * This is a requirement in the Linux Kernel Memory Model. ++ * See __cmpxchg_u64() in asm/cmpxchg.h as an example. ++ */ ++ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP)) ++ EMIT(PPC_RAW_SYNC()); + tmp_idx = ctx->idx * 4; + /* load value from memory into TMP_REG_2 */ + if (size == BPF_DW) +@@ -867,6 +876,9 @@ emit_clear: + PPC_BCC_SHORT(COND_NE, tmp_idx); + + if (imm & BPF_FETCH) { ++ /* Emit 'sync' to enforce full ordering */ ++ if (IS_ENABLED(CONFIG_SMP)) ++ EMIT(PPC_RAW_SYNC()); + EMIT(PPC_RAW_MR(ret_reg, _R0)); + /* + * Skip unnecessary zero-extension for 32-bit cmpxchg. diff --git a/queue-6.6/series b/queue-6.6/series index 3c5bd4e99f9..11d584a217b 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -121,3 +121,16 @@ genirq-irqdesc-prevent-use-after-free-in-irq_find_at_or_after.patch asoc-sof-ipc4-topology-fix-input-format-query-of-process-modules-without-base-extension.patch alsa-ump-don-t-clear-bank-selection-after-sending-a-program-change.patch alsa-ump-don-t-accept-an-invalid-ump-protocol-number.patch +edac-amd64-convert-pcibios_-return-codes-to-errnos.patch +edac-igen6-convert-pcibios_-return-codes-to-errnos.patch +nfs-fix-undefined-behavior-in-nfs_block_bits.patch +nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch +eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch +eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch +tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch +btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch +btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch +alsa-seq-fix-incorrect-ump-type-for-system-messages.patch +powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch +powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch +smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch diff --git a/queue-6.6/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch b/queue-6.6/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch new file mode 100644 index 00000000000..dd714f0837f --- /dev/null +++ b/queue-6.6/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch @@ -0,0 +1,34 @@ +From 02c418774f76a0a36a6195c9dbf8971eb4130a15 Mon Sep 17 00:00:00 2001 +From: Enzo Matsumiya +Date: Thu, 6 Jun 2024 13:13:13 -0300 +Subject: smb: client: fix deadlock in smb2_find_smb_tcon() + +From: Enzo Matsumiya + +commit 02c418774f76a0a36a6195c9dbf8971eb4130a15 upstream. + +Unlock cifs_tcp_ses_lock before calling cifs_put_smb_ses() to avoid such +deadlock. + +Cc: stable@vger.kernel.org +Signed-off-by: Enzo Matsumiya +Reviewed-by: Shyam Prasad N +Reviewed-by: Paulo Alcantara (Red Hat) +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/smb2transport.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/smb/client/smb2transport.c ++++ b/fs/smb/client/smb2transport.c +@@ -216,8 +216,8 @@ smb2_find_smb_tcon(struct TCP_Server_Inf + } + tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); + if (!tcon) { +- cifs_put_smb_ses(ses); + spin_unlock(&cifs_tcp_ses_lock); ++ cifs_put_smb_ses(ses); + return NULL; + } + spin_unlock(&cifs_tcp_ses_lock); diff --git a/queue-6.6/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch b/queue-6.6/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch new file mode 100644 index 00000000000..dbfa66a2509 --- /dev/null +++ b/queue-6.6/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch @@ -0,0 +1,95 @@ +From 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Thu, 23 May 2024 01:14:29 -0400 +Subject: tracefs: Clear EVENT_INODE flag in tracefs_drop_inode() + +From: Steven Rostedt (Google) + +commit 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 upstream. + +When the inode is being dropped from the dentry, the TRACEFS_EVENT_INODE +flag needs to be cleared to prevent a remount from calling +eventfs_remount() on the tracefs_inode private data. There's a race +between the inode is dropped (and the dentry freed) to where the inode is +actually freed. If a remount happens between the two, the eventfs_inode +could be accessed after it is freed (only the dentry keeps a ref count on +it). + +Currently the TRACEFS_EVENT_INODE flag is cleared from the dentry iput() +function. But this is incorrect, as it is possible that the inode has +another reference to it. The flag should only be cleared when the inode is +really being dropped and has no more references. That happens in the +drop_inode callback of the inode, as that gets called when the last +reference of the inode is released. + +Remove the tracefs_d_iput() function and move its logic to the more +appropriate tracefs_drop_inode() callback function. + +Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.908205106@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Masahiro Yamada +Fixes: baa23a8d4360d ("tracefs: Reset permissions on remount if permissions are options") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 33 +++++++++++++++++---------------- + 1 file changed, 17 insertions(+), 16 deletions(-) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -439,10 +439,26 @@ static int tracefs_show_options(struct s + return 0; + } + ++static int tracefs_drop_inode(struct inode *inode) ++{ ++ struct tracefs_inode *ti = get_tracefs(inode); ++ ++ /* ++ * This inode is being freed and cannot be used for ++ * eventfs. Clear the flag so that it doesn't call into ++ * eventfs during the remount flag updates. The eventfs_inode ++ * gets freed after an RCU cycle, so the content will still ++ * be safe if the iteration is going on now. ++ */ ++ ti->flags &= ~TRACEFS_EVENT_INODE; ++ ++ return 1; ++} ++ + static const struct super_operations tracefs_super_operations = { + .alloc_inode = tracefs_alloc_inode, + .free_inode = tracefs_free_inode, +- .drop_inode = generic_delete_inode, ++ .drop_inode = tracefs_drop_inode, + .statfs = simple_statfs, + .remount_fs = tracefs_remount, + .show_options = tracefs_show_options, +@@ -469,22 +485,7 @@ static int tracefs_d_revalidate(struct d + return !(ei && ei->is_freed); + } + +-static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) +-{ +- struct tracefs_inode *ti = get_tracefs(inode); +- +- /* +- * This inode is being freed and cannot be used for +- * eventfs. Clear the flag so that it doesn't call into +- * eventfs during the remount flag updates. The eventfs_inode +- * gets freed after an RCU cycle, so the content will still +- * be safe if the iteration is going on now. +- */ +- ti->flags &= ~TRACEFS_EVENT_INODE; +-} +- + static const struct dentry_operations tracefs_dentry_operations = { +- .d_iput = tracefs_d_iput, + .d_revalidate = tracefs_d_revalidate, + .d_release = tracefs_d_release, + };