]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 11:03:22 +0000 (13:03 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Jun 2024 11:03:22 +0000 (13:03 +0200)
added patches:
alsa-seq-fix-incorrect-ump-type-for-system-messages.patch
bpf-fix-multi-uprobe-pid-filtering-logic.patch
btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch
btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch
btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch
btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch
btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch
btrfs-re-introduce-norecovery-mount-option.patch
cifs-fix-creating-sockets-when-using-sfu-mount-options.patch
edac-amd64-convert-pcibios_-return-codes-to-errnos.patch
edac-igen6-convert-pcibios_-return-codes-to-errnos.patch
eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch
eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch
nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch
nfs-fix-undefined-behavior-in-nfs_block_bits.patch
nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch
nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch
powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch
smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch
tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch

22 files changed:
queue-6.9/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch [new file with mode: 0644]
queue-6.9/bpf-fix-multi-uprobe-pid-filtering-logic.patch [new file with mode: 0644]
queue-6.9/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch [new file with mode: 0644]
queue-6.9/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch [new file with mode: 0644]
queue-6.9/btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch [new file with mode: 0644]
queue-6.9/btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch [new file with mode: 0644]
queue-6.9/btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch [new file with mode: 0644]
queue-6.9/btrfs-re-introduce-norecovery-mount-option.patch [new file with mode: 0644]
queue-6.9/cifs-fix-creating-sockets-when-using-sfu-mount-options.patch [new file with mode: 0644]
queue-6.9/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch [new file with mode: 0644]
queue-6.9/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch [new file with mode: 0644]
queue-6.9/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch [new file with mode: 0644]
queue-6.9/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch [new file with mode: 0644]
queue-6.9/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch [new file with mode: 0644]
queue-6.9/nfs-fix-undefined-behavior-in-nfs_block_bits.patch [new file with mode: 0644]
queue-6.9/nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch [new file with mode: 0644]
queue-6.9/nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch [new file with mode: 0644]
queue-6.9/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch [new file with mode: 0644]
queue-6.9/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch [new file with mode: 0644]
queue-6.9/series
queue-6.9/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch [new file with mode: 0644]
queue-6.9/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch [new file with mode: 0644]

diff --git a/queue-6.9/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch b/queue-6.9/alsa-seq-fix-incorrect-ump-type-for-system-messages.patch
new file mode 100644 (file)
index 0000000..1ed656e
--- /dev/null
@@ -0,0 +1,40 @@
+From edb32776196afa393c074d6a2733e3a69e66b299 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Wed, 29 May 2024 10:37:59 +0200
+Subject: ALSA: seq: Fix incorrect UMP type for system messages
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit edb32776196afa393c074d6a2733e3a69e66b299 upstream.
+
+When converting a legacy system message to a UMP packet, it forgot to
+modify the UMP type field but keeping the default type (either type 2
+or 4).  Correct to the right type for system messages.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20240529083800.5742-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/core/seq/seq_ump_convert.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -740,6 +740,7 @@ static int system_1p_ev_to_ump_midi1(con
+                                    union snd_ump_midi1_msg *data,
+                                    unsigned char status)
+ {
++      data->system.type = UMP_MSG_TYPE_SYSTEM; // override
+       data->system.status = status;
+       data->system.parm1 = event->data.control.value & 0x7f;
+       return 1;
+@@ -751,6 +752,7 @@ static int system_2p_ev_to_ump_midi1(con
+                                    union snd_ump_midi1_msg *data,
+                                    unsigned char status)
+ {
++      data->system.type = UMP_MSG_TYPE_SYSTEM; // override
+       data->system.status = status;
+       data->system.parm1 = event->data.control.value & 0x7f;
+       data->system.parm2 = (event->data.control.value >> 7) & 0x7f;
diff --git a/queue-6.9/bpf-fix-multi-uprobe-pid-filtering-logic.patch b/queue-6.9/bpf-fix-multi-uprobe-pid-filtering-logic.patch
new file mode 100644 (file)
index 0000000..9d4db30
--- /dev/null
@@ -0,0 +1,94 @@
+From 46ba0e49b64232adac35a2bc892f1710c5b0fb7f Mon Sep 17 00:00:00 2001
+From: Andrii Nakryiko <andrii@kernel.org>
+Date: Tue, 21 May 2024 09:33:57 -0700
+Subject: bpf: fix multi-uprobe PID filtering logic
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+commit 46ba0e49b64232adac35a2bc892f1710c5b0fb7f upstream.
+
+Current implementation of PID filtering logic for multi-uprobes in
+uprobe_prog_run() is filtering down to exact *thread*, while the intent
+for PID filtering it to filter by *process* instead. The check in
+uprobe_prog_run() also differs from the analogous one in
+uprobe_multi_link_filter() for some reason. The latter is correct,
+checking task->mm, not the task itself.
+
+Fix the check in uprobe_prog_run() to perform the same task->mm check.
+
+While doing this, we also update get_pid_task() use to use PIDTYPE_TGID
+type of lookup, given the intent is to get a representative task of an
+entire process. This doesn't change behavior, but seems more logical. It
+would hold task group leader task now, not any random thread task.
+
+Last but not least, given multi-uprobe support is half-broken due to
+this PID filtering logic (depending on whether PID filtering is
+important or not), we need to make it easy for user space consumers
+(including libbpf) to easily detect whether PID filtering logic was
+already fixed.
+
+We do it here by adding an early check on passed pid parameter. If it's
+negative (and so has no chance of being a valid PID), we return -EINVAL.
+Previous behavior would eventually return -ESRCH ("No process found"),
+given there can't be any process with negative PID. This subtle change
+won't make any practical change in behavior, but will allow applications
+to detect PID filtering fixes easily. Libbpf fixes take advantage of
+this in the next patch.
+
+Cc: stable@vger.kernel.org
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Fixes: b733eeade420 ("bpf: Add pid filter support for uprobe_multi link")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20240521163401.3005045-2-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c                                   |    8 ++++----
+ tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c |    2 +-
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -3260,7 +3260,7 @@ static int uprobe_prog_run(struct bpf_up
+       struct bpf_run_ctx *old_run_ctx;
+       int err = 0;
+-      if (link->task && current != link->task)
++      if (link->task && current->mm != link->task->mm)
+               return 0;
+       if (sleepable)
+@@ -3361,8 +3361,9 @@ int bpf_uprobe_multi_link_attach(const u
+       upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
+       uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
+       cnt = attr->link_create.uprobe_multi.cnt;
++      pid = attr->link_create.uprobe_multi.pid;
+-      if (!upath || !uoffsets || !cnt)
++      if (!upath || !uoffsets || !cnt || pid < 0)
+               return -EINVAL;
+       if (cnt > MAX_UPROBE_MULTI_CNT)
+               return -E2BIG;
+@@ -3386,10 +3387,9 @@ int bpf_uprobe_multi_link_attach(const u
+               goto error_path_put;
+       }
+-      pid = attr->link_create.uprobe_multi.pid;
+       if (pid) {
+               rcu_read_lock();
+-              task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
++              task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+               rcu_read_unlock();
+               if (!task) {
+                       err = -ESRCH;
+--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
++++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+@@ -397,7 +397,7 @@ static void test_attach_api_fails(void)
+       link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+       if (!ASSERT_ERR(link_fd, "link_fd"))
+               goto cleanup;
+-      ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
++      ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
+ cleanup:
+       if (link_fd >= 0)
diff --git a/queue-6.9/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch b/queue-6.9/btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch
new file mode 100644 (file)
index 0000000..a173202
--- /dev/null
@@ -0,0 +1,218 @@
+From 9d274c19a71b3a276949933859610721a453946b Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Fri, 24 May 2024 13:58:11 -0700
+Subject: btrfs: fix crash on racing fsync and size-extending write into prealloc
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 9d274c19a71b3a276949933859610721a453946b upstream.
+
+We have been seeing crashes on duplicate keys in
+btrfs_set_item_key_safe():
+
+  BTRFS critical (device vdb): slot 4 key (450 108 8192) new key (450 108 8192)
+  ------------[ cut here ]------------
+  kernel BUG at fs/btrfs/ctree.c:2620!
+  invalid opcode: 0000 [#1] PREEMPT SMP PTI
+  CPU: 0 PID: 3139 Comm: xfs_io Kdump: loaded Not tainted 6.9.0 #6
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014
+  RIP: 0010:btrfs_set_item_key_safe+0x11f/0x290 [btrfs]
+
+With the following stack trace:
+
+  #0  btrfs_set_item_key_safe (fs/btrfs/ctree.c:2620:4)
+  #1  btrfs_drop_extents (fs/btrfs/file.c:411:4)
+  #2  log_one_extent (fs/btrfs/tree-log.c:4732:9)
+  #3  btrfs_log_changed_extents (fs/btrfs/tree-log.c:4955:9)
+  #4  btrfs_log_inode (fs/btrfs/tree-log.c:6626:9)
+  #5  btrfs_log_inode_parent (fs/btrfs/tree-log.c:7070:8)
+  #6  btrfs_log_dentry_safe (fs/btrfs/tree-log.c:7171:8)
+  #7  btrfs_sync_file (fs/btrfs/file.c:1933:8)
+  #8  vfs_fsync_range (fs/sync.c:188:9)
+  #9  vfs_fsync (fs/sync.c:202:9)
+  #10 do_fsync (fs/sync.c:212:9)
+  #11 __do_sys_fdatasync (fs/sync.c:225:9)
+  #12 __se_sys_fdatasync (fs/sync.c:223:1)
+  #13 __x64_sys_fdatasync (fs/sync.c:223:1)
+  #14 do_syscall_x64 (arch/x86/entry/common.c:52:14)
+  #15 do_syscall_64 (arch/x86/entry/common.c:83:7)
+  #16 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121)
+
+So we're logging a changed extent from fsync, which is splitting an
+extent in the log tree. But this split part already exists in the tree,
+triggering the BUG().
+
+This is the state of the log tree at the time of the crash, dumped with
+drgn (https://github.com/osandov/drgn/blob/main/contrib/btrfs_tree.py)
+to get more details than btrfs_print_leaf() gives us:
+
+  >>> print_extent_buffer(prog.crashed_thread().stack_trace()[0]["eb"])
+  leaf 33439744 level 0 items 72 generation 9 owner 18446744073709551610
+  leaf 33439744 flags 0x100000000000000
+  fs uuid e5bd3946-400c-4223-8923-190ef1f18677
+  chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
+          item 0 key (450 INODE_ITEM 0) itemoff 16123 itemsize 160
+                  generation 7 transid 9 size 8192 nbytes 8473563889606862198
+                  block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
+                  sequence 204 flags 0x10(PREALLOC)
+                  atime 1716417703.220000000 (2024-05-22 15:41:43)
+                  ctime 1716417704.983333333 (2024-05-22 15:41:44)
+                  mtime 1716417704.983333333 (2024-05-22 15:41:44)
+                  otime 17592186044416.000000000 (559444-03-08 01:40:16)
+          item 1 key (450 INODE_REF 256) itemoff 16110 itemsize 13
+                  index 195 namelen 3 name: 193
+          item 2 key (450 XATTR_ITEM 1640047104) itemoff 16073 itemsize 37
+                  location key (0 UNKNOWN.0 0) type XATTR
+                  transid 7 data_len 1 name_len 6
+                  name: user.a
+                  data a
+          item 3 key (450 EXTENT_DATA 0) itemoff 16020 itemsize 53
+                  generation 9 type 1 (regular)
+                  extent data disk byte 303144960 nr 12288
+                  extent data offset 0 nr 4096 ram 12288
+                  extent compression 0 (none)
+          item 4 key (450 EXTENT_DATA 4096) itemoff 15967 itemsize 53
+                  generation 9 type 2 (prealloc)
+                  prealloc data disk byte 303144960 nr 12288
+                  prealloc data offset 4096 nr 8192
+          item 5 key (450 EXTENT_DATA 8192) itemoff 15914 itemsize 53
+                  generation 9 type 2 (prealloc)
+                  prealloc data disk byte 303144960 nr 12288
+                  prealloc data offset 8192 nr 4096
+  ...
+
+So the real problem happened earlier: notice that items 4 (4k-12k) and 5
+(8k-12k) overlap. Both are prealloc extents. Item 4 straddles i_size and
+item 5 starts at i_size.
+
+Here is the state of the filesystem tree at the time of the crash:
+
+  >>> root = prog.crashed_thread().stack_trace()[2]["inode"].root
+  >>> ret, nodes, slots = btrfs_search_slot(root, BtrfsKey(450, 0, 0))
+  >>> print_extent_buffer(nodes[0])
+  leaf 30425088 level 0 items 184 generation 9 owner 5
+  leaf 30425088 flags 0x100000000000000
+  fs uuid e5bd3946-400c-4223-8923-190ef1f18677
+  chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
+       ...
+          item 179 key (450 INODE_ITEM 0) itemoff 4907 itemsize 160
+                  generation 7 transid 7 size 4096 nbytes 12288
+                  block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
+                  sequence 6 flags 0x10(PREALLOC)
+                  atime 1716417703.220000000 (2024-05-22 15:41:43)
+                  ctime 1716417703.220000000 (2024-05-22 15:41:43)
+                  mtime 1716417703.220000000 (2024-05-22 15:41:43)
+                  otime 1716417703.220000000 (2024-05-22 15:41:43)
+          item 180 key (450 INODE_REF 256) itemoff 4894 itemsize 13
+                  index 195 namelen 3 name: 193
+          item 181 key (450 XATTR_ITEM 1640047104) itemoff 4857 itemsize 37
+                  location key (0 UNKNOWN.0 0) type XATTR
+                  transid 7 data_len 1 name_len 6
+                  name: user.a
+                  data a
+          item 182 key (450 EXTENT_DATA 0) itemoff 4804 itemsize 53
+                  generation 9 type 1 (regular)
+                  extent data disk byte 303144960 nr 12288
+                  extent data offset 0 nr 8192 ram 12288
+                  extent compression 0 (none)
+          item 183 key (450 EXTENT_DATA 8192) itemoff 4751 itemsize 53
+                  generation 9 type 2 (prealloc)
+                  prealloc data disk byte 303144960 nr 12288
+                  prealloc data offset 8192 nr 4096
+
+Item 5 in the log tree corresponds to item 183 in the filesystem tree,
+but nothing matches item 4. Furthermore, item 183 is the last item in
+the leaf.
+
+btrfs_log_prealloc_extents() is responsible for logging prealloc extents
+beyond i_size. It first truncates any previously logged prealloc extents
+that start beyond i_size. Then, it walks the filesystem tree and copies
+the prealloc extent items to the log tree.
+
+If it hits the end of a leaf, then it calls btrfs_next_leaf(), which
+unlocks the tree and does another search. However, while the filesystem
+tree is unlocked, an ordered extent completion may modify the tree. In
+particular, it may insert an extent item that overlaps with an extent
+item that was already copied to the log tree.
+
+This may manifest in several ways depending on the exact scenario,
+including an EEXIST error that is silently translated to a full sync,
+overlapping items in the log tree, or this crash. This particular crash
+is triggered by the following sequence of events:
+
+- Initially, the file has i_size=4k, a regular extent from 0-4k, and a
+  prealloc extent beyond i_size from 4k-12k. The prealloc extent item is
+  the last item in its B-tree leaf.
+- The file is fsync'd, which copies its inode item and both extent items
+  to the log tree.
+- An xattr is set on the file, which sets the
+  BTRFS_INODE_COPY_EVERYTHING flag.
+- The range 4k-8k in the file is written using direct I/O. i_size is
+  extended to 8k, but the ordered extent is still in flight.
+- The file is fsync'd. Since BTRFS_INODE_COPY_EVERYTHING is set, this
+  calls copy_inode_items_to_log(), which calls
+  btrfs_log_prealloc_extents().
+- btrfs_log_prealloc_extents() finds the 4k-12k prealloc extent in the
+  filesystem tree. Since it starts before i_size, it skips it. Since it
+  is the last item in its B-tree leaf, it calls btrfs_next_leaf().
+- btrfs_next_leaf() unlocks the path.
+- The ordered extent completion runs, which converts the 4k-8k part of
+  the prealloc extent to written and inserts the remaining prealloc part
+  from 8k-12k.
+- btrfs_next_leaf() does a search and finds the new prealloc extent
+  8k-12k.
+- btrfs_log_prealloc_extents() copies the 8k-12k prealloc extent into
+  the log tree. Note that it overlaps with the 4k-12k prealloc extent
+  that was copied to the log tree by the first fsync.
+- fsync calls btrfs_log_changed_extents(), which tries to log the 4k-8k
+  extent that was written.
+- This tries to drop the range 4k-8k in the log tree, which requires
+  adjusting the start of the 4k-12k prealloc extent in the log tree to
+  8k.
+- btrfs_set_item_key_safe() sees that there is already an extent
+  starting at 8k in the log tree and calls BUG().
+
+Fix this by detecting when we're about to insert an overlapping file
+extent item in the log tree and truncating the part that would overlap.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c |   17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4856,18 +4856,23 @@ static int btrfs_log_prealloc_extents(st
+                       path->slots[0]++;
+                       continue;
+               }
+-              if (!dropped_extents) {
+-                      /*
+-                       * Avoid logging extent items logged in past fsync calls
+-                       * and leading to duplicate keys in the log tree.
+-                       */
++              /*
++               * Avoid overlapping items in the log tree. The first time we
++               * get here, get rid of everything from a past fsync. After
++               * that, if the current extent starts before the end of the last
++               * extent we copied, truncate the last one. This can happen if
++               * an ordered extent completion modifies the subvolume tree
++               * while btrfs_next_leaf() has the tree unlocked.
++               */
++              if (!dropped_extents || key.offset < truncate_offset) {
+                       ret = truncate_inode_items(trans, root->log_root, inode,
+-                                                 truncate_offset,
++                                                 min(key.offset, truncate_offset),
+                                                  BTRFS_EXTENT_DATA_KEY);
+                       if (ret)
+                               goto out;
+                       dropped_extents = true;
+               }
++              truncate_offset = btrfs_file_extent_end(path);
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
diff --git a/queue-6.9/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch b/queue-6.9/btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch
new file mode 100644 (file)
index 0000000..7c17f23
--- /dev/null
@@ -0,0 +1,60 @@
+From fb33eb2ef0d88e75564983ef057b44c5b7e4fded Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 3 Jun 2024 12:49:08 +0100
+Subject: btrfs: fix leak of qgroup extent records after transaction abort
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit fb33eb2ef0d88e75564983ef057b44c5b7e4fded upstream.
+
+Qgroup extent records are created when delayed ref heads are created and
+then released after accounting extents at btrfs_qgroup_account_extents(),
+called during the transaction commit path.
+
+If a transaction is aborted we free the qgroup records by calling
+btrfs_qgroup_destroy_extent_records() at btrfs_destroy_delayed_refs(),
+unless we don't have delayed references. We are incorrectly assuming
+that no delayed references means we don't have qgroup extents records.
+
+We can currently have no delayed references because we ran them all
+during a transaction commit and the transaction was aborted after that
+due to some error in the commit path.
+
+So fix this by ensuring we btrfs_qgroup_destroy_extent_records() at
+btrfs_destroy_delayed_refs() even if we don't have any delayed references.
+
+Reported-by: syzbot+0fecc032fa134afd49df@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/0000000000004e7f980619f91835@google.com/
+Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c |   10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -4544,18 +4544,10 @@ static void btrfs_destroy_delayed_refs(s
+                                      struct btrfs_fs_info *fs_info)
+ {
+       struct rb_node *node;
+-      struct btrfs_delayed_ref_root *delayed_refs;
++      struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
+       struct btrfs_delayed_ref_node *ref;
+-      delayed_refs = &trans->delayed_refs;
+-
+       spin_lock(&delayed_refs->lock);
+-      if (atomic_read(&delayed_refs->num_entries) == 0) {
+-              spin_unlock(&delayed_refs->lock);
+-              btrfs_debug(fs_info, "delayed_refs has NO entry");
+-              return;
+-      }
+-
+       while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
+               struct btrfs_delayed_ref_head *head;
+               struct rb_node *n;
diff --git a/queue-6.9/btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch b/queue-6.9/btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch
new file mode 100644 (file)
index 0000000..eefb650
--- /dev/null
@@ -0,0 +1,279 @@
+From f3a5367c679d31473d3fbb391675055b4792c309 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 6 Jun 2024 11:01:51 +0930
+Subject: btrfs: protect folio::private when attaching extent buffer folios
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f3a5367c679d31473d3fbb391675055b4792c309 upstream.
+
+[BUG]
+Since v6.8 there are rare kernel crashes reported by various people,
+the common factor is bad page status error messages like this:
+
+  BUG: Bad page state in process kswapd0  pfn:d6e840
+  page: refcount:0 mapcount:0 mapping:000000007512f4f2 index:0x2796c2c7c
+  pfn:0xd6e840
+  aops:btree_aops ino:1
+  flags: 0x17ffffe0000008(uptodate|node=0|zone=2|lastcpupid=0x3fffff)
+  page_type: 0xffffffff()
+  raw: 0017ffffe0000008 dead000000000100 dead000000000122 ffff88826d0be4c0
+  raw: 00000002796c2c7c 0000000000000000 00000000ffffffff 0000000000000000
+  page dumped because: non-NULL mapping
+
+[CAUSE]
+Commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
+allocate-then-attach method") changes the sequence when allocating a new
+extent buffer.
+
+Previously we always called grab_extent_buffer() under
+mapping->i_private_lock, to ensure the safety on modification on
+folio::private (which is a pointer to extent buffer for regular
+sectorsize).
+
+This can lead to the following race:
+
+Thread A is trying to allocate an extent buffer at bytenr X, with 4
+4K pages, meanwhile thread B is trying to release the page at X + 4K
+(the second page of the extent buffer at X).
+
+           Thread A                |                 Thread B
+-----------------------------------+-------------------------------------
+                                   | btree_release_folio()
+                                  | | This is for the page at X + 4K,
+                                  | | Not page X.
+                                  | |
+alloc_extent_buffer()              | |- release_extent_buffer()
+|- filemap_add_folio() for the     | |  |- atomic_dec_and_test(eb->refs)
+|  page at bytenr X (the first     | |  |
+|  page).                          | |  |
+|  Which returned -EEXIST.         | |  |
+|                                  | |  |
+|- filemap_lock_folio()            | |  |
+|  Returned the first page locked. | |  |
+|                                  | |  |
+|- grab_extent_buffer()            | |  |
+|  |- atomic_inc_not_zero()        | |  |
+|  |  Returned false               | |  |
+|  |- folio_detach_private()       | |  |- folio_detach_private() for X
+|     |- folio_test_private()      | |     |- folio_test_private()
+      |  Returned true             | |     |  Returned true
+      |- folio_put()               |       |- folio_put()
+
+Now there are two puts on the same folio at folio X, leading to refcount
+underflow of the folio X, and eventually causing the BUG_ON() on the
+page->mapping.
+
+The condition is not that easy to hit:
+
+- The release must be triggered for the middle page of an eb
+  If the release is on the same first page of an eb, page lock would kick
+  in and prevent the race.
+
+- folio_detach_private() has a very small race window
+  It's only between folio_test_private() and folio_clear_private().
+
+That's exactly when mapping->i_private_lock is used to prevent such race,
+and commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
+allocate-then-attach method") screwed that up.
+
+At that time, I thought the page lock would kick in as
+filemap_release_folio() also requires the page to be locked, but forgot
+the filemap_release_folio() only locks one page, not all pages of an
+extent buffer.
+
+[FIX]
+Move all the code requiring i_private_lock into
+attach_eb_folio_to_filemap(), so that everything is done with proper
+lock protection.
+
+Furthermore to prevent future problems, add an extra
+lockdep_assert_locked() to ensure we're holding the proper lock.
+
+To reproducer that is able to hit the race (takes a few minutes with
+instrumented code inserting delays to alloc_extent_buffer()):
+
+  #!/bin/sh
+  drop_caches () {
+         while(true); do
+                 echo 3 > /proc/sys/vm/drop_caches
+                 echo 1 > /proc/sys/vm/compact_memory
+         done
+  }
+
+  run_tar () {
+         while(true); do
+                 for x in `seq 1 80` ; do
+                         tar cf /dev/zero /mnt > /dev/null &
+                 done
+                 wait
+         done
+  }
+
+  mkfs.btrfs -f -d single -m single /dev/vda
+  mount -o noatime /dev/vda /mnt
+  # create 200,000 files, 1K each
+  ./simoop -n 200000 -E -f 1k /mnt
+  drop_caches &
+  (run_tar)
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/linux-btrfs/CAHk-=wgt362nGfScVOOii8cgKn2LVVHeOvOA7OBwg1OwbuJQcw@mail.gmail.com/
+Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
+Link: https://lore.kernel.org/lkml/CABXGCsPktcHQOvKTbPaTwegMExije=Gpgci5NW=hqORo-s7diA@mail.gmail.com/
+Reported-by: Toralf Förster <toralf.foerster@gmx.de>
+Link: https://lore.kernel.org/linux-btrfs/e8b3311c-9a75-4903-907f-fc0f7a3fe423@gmx.de/
+Reported-by: syzbot+f80b066392366b4af85e@syzkaller.appspotmail.com
+Fixes: 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to allocate-then-attach method")
+CC: stable@vger.kernel.org # 6.8+
+CC: Chris Mason <clm@fb.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c |   60 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 31 insertions(+), 29 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3662,6 +3662,8 @@ static struct extent_buffer *grab_extent
+       struct folio *folio = page_folio(page);
+       struct extent_buffer *exists;
++      lockdep_assert_held(&page->mapping->i_private_lock);
++
+       /*
+        * For subpage case, we completely rely on radix tree to ensure we
+        * don't try to insert two ebs for the same bytenr.  So here we always
+@@ -3729,13 +3731,14 @@ static int check_eb_alignment(struct btr
+  * The caller needs to free the existing folios and retry using the same order.
+  */
+ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
++                                    struct btrfs_subpage *prealloc,
+                                     struct extent_buffer **found_eb_ret)
+ {
+       struct btrfs_fs_info *fs_info = eb->fs_info;
+       struct address_space *mapping = fs_info->btree_inode->i_mapping;
+       const unsigned long index = eb->start >> PAGE_SHIFT;
+-      struct folio *existing_folio;
++      struct folio *existing_folio = NULL;
+       int ret;
+       ASSERT(found_eb_ret);
+@@ -3747,12 +3750,14 @@ retry:
+       ret = filemap_add_folio(mapping, eb->folios[i], index + i,
+                               GFP_NOFS | __GFP_NOFAIL);
+       if (!ret)
+-              return 0;
++              goto finish;
+       existing_folio = filemap_lock_folio(mapping, index + i);
+       /* The page cache only exists for a very short time, just retry. */
+-      if (IS_ERR(existing_folio))
++      if (IS_ERR(existing_folio)) {
++              existing_folio = NULL;
+               goto retry;
++      }
+       /* For now, we should only have single-page folios for btree inode. */
+       ASSERT(folio_nr_pages(existing_folio) == 1);
+@@ -3763,14 +3768,13 @@ retry:
+               return -EAGAIN;
+       }
+-      if (fs_info->nodesize < PAGE_SIZE) {
+-              /*
+-               * We're going to reuse the existing page, can drop our page
+-               * and subpage structure now.
+-               */
++finish:
++      spin_lock(&mapping->i_private_lock);
++      if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
++              /* We're going to reuse the existing page, can drop our folio now. */
+               __free_page(folio_page(eb->folios[i], 0));
+               eb->folios[i] = existing_folio;
+-      } else {
++      } else if (existing_folio) {
+               struct extent_buffer *existing_eb;
+               existing_eb = grab_extent_buffer(fs_info,
+@@ -3778,6 +3782,7 @@ retry:
+               if (existing_eb) {
+                       /* The extent buffer still exists, we can use it directly. */
+                       *found_eb_ret = existing_eb;
++                      spin_unlock(&mapping->i_private_lock);
+                       folio_unlock(existing_folio);
+                       folio_put(existing_folio);
+                       return 1;
+@@ -3786,6 +3791,22 @@ retry:
+               __free_page(folio_page(eb->folios[i], 0));
+               eb->folios[i] = existing_folio;
+       }
++      eb->folio_size = folio_size(eb->folios[i]);
++      eb->folio_shift = folio_shift(eb->folios[i]);
++      /* Should not fail, as we have preallocated the memory. */
++      ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
++      ASSERT(!ret);
++      /*
++       * To inform we have an extra eb under allocation, so that
++       * detach_extent_buffer_page() won't release the folio private when the
++       * eb hasn't been inserted into radix tree yet.
++       *
++       * The ref will be decreased when the eb releases the page, in
++       * detach_extent_buffer_page().  Thus needs no special handling in the
++       * error path.
++       */
++      btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
++      spin_unlock(&mapping->i_private_lock);
+       return 0;
+ }
+@@ -3797,7 +3818,6 @@ struct extent_buffer *alloc_extent_buffe
+       int attached = 0;
+       struct extent_buffer *eb;
+       struct extent_buffer *existing_eb = NULL;
+-      struct address_space *mapping = fs_info->btree_inode->i_mapping;
+       struct btrfs_subpage *prealloc = NULL;
+       u64 lockdep_owner = owner_root;
+       bool page_contig = true;
+@@ -3863,7 +3883,7 @@ reallocate:
+       for (int i = 0; i < num_folios; i++) {
+               struct folio *folio;
+-              ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
++              ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
+               if (ret > 0) {
+                       ASSERT(existing_eb);
+                       goto out;
+@@ -3900,24 +3920,6 @@ reallocate:
+                * and free the allocated page.
+                */
+               folio = eb->folios[i];
+-              eb->folio_size = folio_size(folio);
+-              eb->folio_shift = folio_shift(folio);
+-              spin_lock(&mapping->i_private_lock);
+-              /* Should not fail, as we have preallocated the memory */
+-              ret = attach_extent_buffer_folio(eb, folio, prealloc);
+-              ASSERT(!ret);
+-              /*
+-               * To inform we have extra eb under allocation, so that
+-               * detach_extent_buffer_page() won't release the folio private
+-               * when the eb hasn't yet been inserted into radix tree.
+-               *
+-               * The ref will be decreased when the eb released the page, in
+-               * detach_extent_buffer_page().
+-               * Thus needs no special handling in error path.
+-               */
+-              btrfs_folio_inc_eb_refs(fs_info, folio);
+-              spin_unlock(&mapping->i_private_lock);
+-
+               WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
+               /*
diff --git a/queue-6.9/btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch b/queue-6.9/btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch
new file mode 100644 (file)
index 0000000..479d316
--- /dev/null
@@ -0,0 +1,66 @@
+From 2b8aa78cf1279ec5e418baa26bfed5df682568d8 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Thu, 9 May 2024 15:34:40 -0700
+Subject: btrfs: qgroup: fix qgroup id collision across mounts
+
+From: Boris Burkov <boris@bur.io>
+
+commit 2b8aa78cf1279ec5e418baa26bfed5df682568d8 upstream.
+
+If we delete subvolumes whose ID is the largest in the filesystem, then
+unmount and mount again, then btrfs_init_root_free_objectid on the
+tree_root will select a subvolid smaller than that one and thus allow
+reusing it.
+
+If we are also using qgroups (and particularly squotas) it is possible
+to delete the subvol without deleting the qgroup. In that case, we will
+be able to create a new subvol whose id already has a level 0 qgroup.
+This will result in re-using that qgroup which would then lead to
+incorrect accounting.
+
+Fixes: 6ed05643ddb1 ("btrfs: create qgroup earlier in snapshot creation")
+CC: stable@vger.kernel.org # 6.7+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -468,6 +468,7 @@ int btrfs_read_qgroup_config(struct btrf
+               }
+               if (!qgroup) {
+                       struct btrfs_qgroup *prealloc;
++                      struct btrfs_root *tree_root = fs_info->tree_root;
+                       prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
+                       if (!prealloc) {
+@@ -475,6 +476,25 @@ int btrfs_read_qgroup_config(struct btrf
+                               goto out;
+                       }
+                       qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset);
++                      /*
++                       * If a qgroup exists for a subvolume ID, it is possible
++                       * that subvolume has been deleted, in which case
++                       * re-using that ID would lead to incorrect accounting.
++                       *
++                       * Ensure that we skip any such subvol ids.
++                       *
++                       * We don't need to lock because this is only called
++                       * during mount before we start doing things like creating
++                       * subvolumes.
++                       */
++                      if (is_fstree(qgroup->qgroupid) &&
++                          qgroup->qgroupid > tree_root->free_objectid)
++                              /*
++                               * Don't need to check against BTRFS_LAST_FREE_OBJECTID,
++                               * as it will get checked on the next call to
++                               * btrfs_get_free_objectid.
++                               */
++                              tree_root->free_objectid = qgroup->qgroupid + 1;
+               }
+               ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+               if (ret < 0)
diff --git a/queue-6.9/btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch b/queue-6.9/btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch
new file mode 100644 (file)
index 0000000..c8f28e4
--- /dev/null
@@ -0,0 +1,65 @@
+From 1fa7603d569b9e738e9581937ba8725cd7d39b48 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Thu, 2 May 2024 22:45:58 +0200
+Subject: btrfs: qgroup: update rescan message levels and error codes
+
+From: David Sterba <dsterba@suse.com>
+
+commit 1fa7603d569b9e738e9581937ba8725cd7d39b48 upstream.
+
+On filesystems without enabled quotas there's still a warning message in
+the logs when rescan is called. In that case it's not a problem that
+should be reported, rescan can be called unconditionally.  Change the
+error code to ENOTCONN which is used for 'quotas not enabled' elsewhere.
+
+Remove message (also a warning) when rescan is called during an ongoing
+rescan, this brings no useful information and the error code is
+sufficient.
+
+Change message levels to debug for now, they can be removed eventually.
+
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3826,14 +3826,14 @@ qgroup_rescan_init(struct btrfs_fs_info
+               /* we're resuming qgroup rescan at mount time */
+               if (!(fs_info->qgroup_flags &
+                     BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
+-                      btrfs_warn(fs_info,
++                      btrfs_debug(fs_info,
+                       "qgroup rescan init failed, qgroup rescan is not queued");
+                       ret = -EINVAL;
+               } else if (!(fs_info->qgroup_flags &
+                            BTRFS_QGROUP_STATUS_FLAG_ON)) {
+-                      btrfs_warn(fs_info,
++                      btrfs_debug(fs_info,
+                       "qgroup rescan init failed, qgroup is not enabled");
+-                      ret = -EINVAL;
++                      ret = -ENOTCONN;
+               }
+               if (ret)
+@@ -3844,14 +3844,12 @@ qgroup_rescan_init(struct btrfs_fs_info
+       if (init_flags) {
+               if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+-                      btrfs_warn(fs_info,
+-                                 "qgroup rescan is already in progress");
+                       ret = -EINPROGRESS;
+               } else if (!(fs_info->qgroup_flags &
+                            BTRFS_QGROUP_STATUS_FLAG_ON)) {
+-                      btrfs_warn(fs_info,
++                      btrfs_debug(fs_info,
+                       "qgroup rescan init failed, qgroup is not enabled");
+-                      ret = -EINVAL;
++                      ret = -ENOTCONN;
+               } else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
+                       /* Quota disable is in progress */
+                       ret = -EBUSY;
diff --git a/queue-6.9/btrfs-re-introduce-norecovery-mount-option.patch b/queue-6.9/btrfs-re-introduce-norecovery-mount-option.patch
new file mode 100644 (file)
index 0000000..be11a39
--- /dev/null
@@ -0,0 +1,68 @@
+From 440861b1a03c72cc7be4a307e178dcaa6894479b Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 May 2024 19:27:31 +0930
+Subject: btrfs: re-introduce 'norecovery' mount option
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 440861b1a03c72cc7be4a307e178dcaa6894479b upstream.
+
+Although 'norecovery' mount option was marked as deprecated for a long
+time and a warning message was printed during the deprecation window,
+it's still actively utilized by several projects that need a safer way
+to mount a btrfs without any writes.
+
+Furthermore this 'norecovery' mount option is supported by other major
+filesystems, which makes it less clear what's our motivation to remove
+it.
+
+Re-introduce the 'norecovery' mount option, and output a message to recommend
+'rescue=nologreplay' option.
+
+Link: https://lore.kernel.org/linux-btrfs/ZkxZT0J-z0GYvfy8@gardel-login/#t
+Link: https://github.com/systemd/systemd/pull/32892
+Link: https://bugzilla.suse.com/show_bug.cgi?id=1222429
+Reported-by: Lennart Poettering <lennart@poettering.net>
+Reported-by: Jiri Slaby <jslaby@suse.com>
+Fixes: a1912f712188 ("btrfs: remove code for inode_cache and recovery mount options")
+CC: stable@vger.kernel.org # 6.8+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -119,6 +119,7 @@ enum {
+       Opt_thread_pool,
+       Opt_treelog,
+       Opt_user_subvol_rm_allowed,
++      Opt_norecovery,
+       /* Rescue options */
+       Opt_rescue,
+@@ -245,6 +246,8 @@ static const struct fs_parameter_spec bt
+       __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL),
+       /* Deprecated, with alias rescue=usebackuproot */
+       __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL),
++      /* For compatibility only, alias for "rescue=nologreplay". */
++      fsparam_flag("norecovery", Opt_norecovery),
+       /* Debugging options. */
+       fsparam_flag_no("enospc_debug", Opt_enospc_debug),
+@@ -438,6 +441,11 @@ static int btrfs_parse_param(struct fs_c
+               "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
+               btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
+               break;
++      case Opt_norecovery:
++              btrfs_info(NULL,
++"'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'");
++              btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
++              break;
+       case Opt_flushoncommit:
+               if (result.negated)
+                       btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT);
diff --git a/queue-6.9/cifs-fix-creating-sockets-when-using-sfu-mount-options.patch b/queue-6.9/cifs-fix-creating-sockets-when-using-sfu-mount-options.patch
new file mode 100644 (file)
index 0000000..5a4e503
--- /dev/null
@@ -0,0 +1,65 @@
+From 518549c120e671c4906f77d1802b97e9b23f673a Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Wed, 29 May 2024 18:16:56 -0500
+Subject: cifs: fix creating sockets when using sfu mount options
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 518549c120e671c4906f77d1802b97e9b23f673a upstream.
+
+When running fstest generic/423 with sfu mount option, it
+was being skipped due to inability to create sockets:
+
+  generic/423  [not run] cifs does not support mknod/mkfifo
+
+which can also be easily reproduced with their af_unix tool:
+
+  ./src/af_unix /mnt1/socket-two bind: Operation not permitted
+
+Fix sfu mount option to allow creating and reporting sockets.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/cifspdu.h |    2 +-
+ fs/smb/client/inode.c   |    4 ++++
+ fs/smb/client/smb2ops.c |    3 +++
+ 3 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/client/cifspdu.h
++++ b/fs/smb/client/cifspdu.h
+@@ -2574,7 +2574,7 @@ typedef struct {
+ struct win_dev {
+-      unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO*/
++      unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO or LnxSOCK */
+       __le64 major;
+       __le64 minor;
+ } __attribute__((packed));
+--- a/fs/smb/client/inode.c
++++ b/fs/smb/client/inode.c
+@@ -591,6 +591,10 @@ cifs_sfu_type(struct cifs_fattr *fattr,
+                               mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
+                               fattr->cf_rdev = MKDEV(mjr, mnr);
+                       }
++              } else if (memcmp("LnxSOCK", pbuf, 8) == 0) {
++                      cifs_dbg(FYI, "Socket\n");
++                      fattr->cf_mode |= S_IFSOCK;
++                      fattr->cf_dtype = DT_SOCK;
+               } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
+                       cifs_dbg(FYI, "Symlink\n");
+                       fattr->cf_mode |= S_IFLNK;
+--- a/fs/smb/client/smb2ops.c
++++ b/fs/smb/client/smb2ops.c
+@@ -4996,6 +4996,9 @@ static int __cifs_sfu_make_node(unsigned
+               pdev.major = cpu_to_le64(MAJOR(dev));
+               pdev.minor = cpu_to_le64(MINOR(dev));
+               break;
++      case S_IFSOCK:
++              strscpy(pdev.type, "LnxSOCK");
++              break;
+       case S_IFIFO:
+               strscpy(pdev.type, "LnxFIFO");
+               break;
diff --git a/queue-6.9/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch b/queue-6.9/edac-amd64-convert-pcibios_-return-codes-to-errnos.patch
new file mode 100644 (file)
index 0000000..6052e09
--- /dev/null
@@ -0,0 +1,65 @@
+From 3ec8ebd8a5b782d56347ae884de880af26f93996 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
+Date: Mon, 27 May 2024 16:22:34 +0300
+Subject: EDAC/amd64: Convert PCIBIOS_* return codes to errnos
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+commit 3ec8ebd8a5b782d56347ae884de880af26f93996 upstream.
+
+gpu_get_node_map() uses pci_read_config_dword() that returns PCIBIOS_*
+codes. The return code is then returned all the way into the module
+init function amd64_edac_init() that returns it as is. The module init
+functions, however, should return normal errnos.
+
+Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
+errno before returning it from gpu_get_node_map().
+
+For consistency, convert also the other similar cases which return
+PCIBIOS_* codes even if they do not have any bugs at the moment.
+
+Fixes: 4251566ebc1c ("EDAC/amd64: Cache and use GPU node map")
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240527132236.13875-1-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/edac/amd64_edac.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/edac/amd64_edac.c
++++ b/drivers/edac/amd64_edac.c
+@@ -81,7 +81,7 @@ int __amd64_read_pci_cfg_dword(struct pc
+               amd64_warn("%s: error reading F%dx%03x.\n",
+                          func, PCI_FUNC(pdev->devfn), offset);
+-      return err;
++      return pcibios_err_to_errno(err);
+ }
+ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
+@@ -94,7 +94,7 @@ int __amd64_write_pci_cfg_dword(struct p
+               amd64_warn("%s: error writing to F%dx%03x.\n",
+                          func, PCI_FUNC(pdev->devfn), offset);
+-      return err;
++      return pcibios_err_to_errno(err);
+ }
+ /*
+@@ -1025,8 +1025,10 @@ static int gpu_get_node_map(struct amd64
+       }
+       ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+-      if (ret)
++      if (ret) {
++              ret = pcibios_err_to_errno(ret);
+               goto out;
++      }
+       gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
+       gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
diff --git a/queue-6.9/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch b/queue-6.9/edac-igen6-convert-pcibios_-return-codes-to-errnos.patch
new file mode 100644 (file)
index 0000000..fc919d8
--- /dev/null
@@ -0,0 +1,51 @@
+From f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
+Date: Mon, 27 May 2024 16:22:35 +0300
+Subject: EDAC/igen6: Convert PCIBIOS_* return codes to errnos
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+commit f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 upstream.
+
+errcmd_enable_error_reporting() uses pci_{read,write}_config_word()
+that return PCIBIOS_* codes. The return code is then returned all the
+way into the probe function igen6_probe() that returns it as is. The
+probe functions, however, should return normal errnos.
+
+Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
+errno before returning it from errcmd_enable_error_reporting().
+
+Fixes: 10590a9d4f23 ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC")
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240527132236.13875-2-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/edac/igen6_edac.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/edac/igen6_edac.c
++++ b/drivers/edac/igen6_edac.c
+@@ -800,7 +800,7 @@ static int errcmd_enable_error_reporting
+       rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
+       if (rc)
+-              return rc;
++              return pcibios_err_to_errno(rc);
+       if (enable)
+               errcmd |= ERRCMD_CE | ERRSTS_UE;
+@@ -809,7 +809,7 @@ static int errcmd_enable_error_reporting
+       rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
+       if (rc)
+-              return rc;
++              return pcibios_err_to_errno(rc);
+       return 0;
+ }
diff --git a/queue-6.9/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch b/queue-6.9/eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch
new file mode 100644 (file)
index 0000000..39b1c58
--- /dev/null
@@ -0,0 +1,42 @@
+From d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 Mon Sep 17 00:00:00 2001
+From: Hao Ge <gehao@kylinos.cn>
+Date: Mon, 13 May 2024 13:33:38 +0800
+Subject: eventfs: Fix a possible null pointer dereference in eventfs_find_events()
+
+From: Hao Ge <gehao@kylinos.cn>
+
+commit d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 upstream.
+
+In function eventfs_find_events,there is a potential null pointer
+that may be caused by calling update_events_attr which will perform
+some operations on the members of the ei struct when ei is NULL.
+
+Hence,When ei->is_freed is set,return NULL directly.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240513053338.63017-1-hao.ge@linux.dev
+
+Cc: stable@vger.kernel.org
+Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership")
+Signed-off-by: Hao Ge <gehao@kylinos.cn>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -345,10 +345,9 @@ static struct eventfs_inode *eventfs_fin
+                * If the ei is being freed, the ownership of the children
+                * doesn't matter.
+                */
+-              if (ei->is_freed) {
+-                      ei = NULL;
+-                      break;
+-              }
++              if (ei->is_freed)
++                      return NULL;
++
+               // Walk upwards until you find the events inode
+       } while (!ei->is_events);
diff --git a/queue-6.9/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch b/queue-6.9/eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch
new file mode 100644 (file)
index 0000000..f297499
--- /dev/null
@@ -0,0 +1,44 @@
+From 8898e7f288c47d450a3cf1511c791a03550c0789 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Thu, 23 May 2024 01:14:26 -0400
+Subject: eventfs: Keep the directories from having the same inode number as files
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 8898e7f288c47d450a3cf1511c791a03550c0789 upstream.
+
+The directories require unique inode numbers but all the eventfs files
+have the same inode number. Prevent the directories from having the same
+inode numbers as the files as that can confuse some tooling.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.428826685@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Fixes: 834bf76add3e6 ("eventfs: Save directory inodes in the eventfs_inode structure")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -50,8 +50,12 @@ static struct eventfs_root_inode *get_ro
+ /* Just try to make something consistent and unique */
+ static int eventfs_dir_ino(struct eventfs_inode *ei)
+ {
+-      if (!ei->ino)
++      if (!ei->ino) {
+               ei->ino = get_next_ino();
++              /* Must not have the file inode number */
++              if (ei->ino == EVENTFS_FILE_INODE_INO)
++                      ei->ino = get_next_ino();
++      }
+       return ei->ino;
+ }
diff --git a/queue-6.9/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch b/queue-6.9/nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch
new file mode 100644 (file)
index 0000000..f56bf80
--- /dev/null
@@ -0,0 +1,41 @@
+From f06d1b10cb016d5aaecdb1804fefca025387bd10 Mon Sep 17 00:00:00 2001
+From: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Date: Thu, 25 Apr 2024 16:24:29 -0400
+Subject: NFS: Fix READ_PLUS when server doesn't support OP_READ_PLUS
+
+From: Anna Schumaker <Anna.Schumaker@Netapp.com>
+
+commit f06d1b10cb016d5aaecdb1804fefca025387bd10 upstream.
+
+Olga showed me a case where the client was sending multiple READ_PLUS
+calls to the server in parallel, and the server replied
+NFS4ERR_OPNOTSUPP to each. The client would fall back to READ for the
+first reply, but fail to retry the other calls.
+
+I fix this by removing the test for NFS_CAP_READ_PLUS in
+nfs4_read_plus_not_supported(). This allows us to reschedule any
+READ_PLUS call that has a NFS4ERR_OPNOTSUPP return value, even after the
+capability has been cleared.
+
+Reported-by: Olga Kornievskaia <kolga@netapp.com>
+Fixes: c567552612ec ("NFS: Add READ_PLUS data segment support")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/nfs4proc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -5456,7 +5456,7 @@ static bool nfs4_read_plus_not_supported
+       struct rpc_message *msg = &task->tk_msg;
+       if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
+-          server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
++          task->tk_status == -ENOTSUPP) {
+               server->caps &= ~NFS_CAP_READ_PLUS;
+               msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+               rpc_restart_call_prepare(task);
diff --git a/queue-6.9/nfs-fix-undefined-behavior-in-nfs_block_bits.patch b/queue-6.9/nfs-fix-undefined-behavior-in-nfs_block_bits.patch
new file mode 100644 (file)
index 0000000..4afc984
--- /dev/null
@@ -0,0 +1,38 @@
+From 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 Mon Sep 17 00:00:00 2001
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+Date: Fri, 10 May 2024 23:24:04 +0300
+Subject: nfs: fix undefined behavior in nfs_block_bits()
+
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+
+commit 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 upstream.
+
+Shifting *signed int* typed constant 1 left by 31 bits causes undefined
+behavior. Specify the correct *unsigned long* type by using 1UL instead.
+
+Found by Linux Verification Center (linuxtesting.org) with the Svace static
+analysis tool.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/internal.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -710,9 +710,9 @@ unsigned long nfs_block_bits(unsigned lo
+       if ((bsize & (bsize - 1)) || nrbitsp) {
+               unsigned char   nrbits;
+-              for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
++              for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--)
+                       ;
+-              bsize = 1 << nrbits;
++              bsize = 1UL << nrbits;
+               if (nrbitsp)
+                       *nrbitsp = nrbits;
+       }
diff --git a/queue-6.9/nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch b/queue-6.9/nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch
new file mode 100644 (file)
index 0000000..68c4f62
--- /dev/null
@@ -0,0 +1,46 @@
+From 7373a51e7998b508af7136530f3a997b286ce81c Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 4 Jun 2024 22:42:55 +0900
+Subject: nilfs2: fix nilfs_empty_dir() misjudgment and long loop on I/O errors
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 7373a51e7998b508af7136530f3a997b286ce81c upstream.
+
+The error handling in nilfs_empty_dir() when a directory folio/page read
+fails is incorrect, as in the old ext2 implementation, and if the
+folio/page cannot be read or nilfs_check_folio() fails, it will falsely
+determine the directory as empty and corrupt the file system.
+
+In addition, since nilfs_empty_dir() does not immediately return on a
+failed folio/page read, but continues to loop, this can cause a long loop
+with I/O if i_size of the directory's inode is also corrupted, causing the
+log writer thread to wait and hang, as reported by syzbot.
+
+Fix these issues by making nilfs_empty_dir() immediately return a false
+value (0) if it fails to get a directory folio/page.
+
+Link: https://lkml.kernel.org/r/20240604134255.7165-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+c8166c541d3971bf6c87@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=c8166c541d3971bf6c87
+Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dir.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nilfs2/dir.c
++++ b/fs/nilfs2/dir.c
+@@ -608,7 +608,7 @@ int nilfs_empty_dir(struct inode *inode)
+               kaddr = nilfs_get_folio(inode, i, &folio);
+               if (IS_ERR(kaddr))
+-                      continue;
++                      return 0;
+               de = (struct nilfs_dir_entry *)kaddr;
+               kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
diff --git a/queue-6.9/nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch b/queue-6.9/nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
new file mode 100644 (file)
index 0000000..aa5a7ff
--- /dev/null
@@ -0,0 +1,76 @@
+From a4ca369ca221bb7e06c725792ac107f0e48e82e7 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Thu, 30 May 2024 23:15:56 +0900
+Subject: nilfs2: fix potential kernel bug due to lack of writeback flag waiting
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit a4ca369ca221bb7e06c725792ac107f0e48e82e7 upstream.
+
+Destructive writes to a block device on which nilfs2 is mounted can cause
+a kernel bug in the folio/page writeback start routine or writeback end
+routine (__folio_start_writeback in the log below):
+
+ kernel BUG at mm/page-writeback.c:3070!
+ Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
+ ...
+ RIP: 0010:__folio_start_writeback+0xbaa/0x10e0
+ Code: 25 ff 0f 00 00 0f 84 18 01 00 00 e8 40 ca c6 ff e9 17 f6 ff ff
+  e8 36 ca c6 ff 4c 89 f7 48 c7 c6 80 c0 12 84 e8 e7 b3 0f 00 90 <0f>
+  0b e8 1f ca c6 ff 4c 89 f7 48 c7 c6 a0 c6 12 84 e8 d0 b3 0f 00
+ ...
+ Call Trace:
+  <TASK>
+  nilfs_segctor_do_construct+0x4654/0x69d0 [nilfs2]
+  nilfs_segctor_construct+0x181/0x6b0 [nilfs2]
+  nilfs_segctor_thread+0x548/0x11c0 [nilfs2]
+  kthread+0x2f0/0x390
+  ret_from_fork+0x4b/0x80
+  ret_from_fork_asm+0x1a/0x30
+  </TASK>
+
+This is because when the log writer starts a writeback for segment summary
+blocks or a super root block that use the backing device's page cache, it
+does not wait for the ongoing folio/page writeback, resulting in an
+inconsistent writeback state.
+
+Fix this issue by waiting for ongoing writebacks when putting
+folios/pages on the backing device into writeback state.
+
+Link: https://lkml.kernel.org/r/20240530141556.4411-1-konishi.ryusuke@gmail.com
+Fixes: 9ff05123e3bf ("nilfs2: segment constructor")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -1652,6 +1652,7 @@ static void nilfs_segctor_prepare_write(
+                       if (bh->b_folio != bd_folio) {
+                               if (bd_folio) {
+                                       folio_lock(bd_folio);
++                                      folio_wait_writeback(bd_folio);
+                                       folio_clear_dirty_for_io(bd_folio);
+                                       folio_start_writeback(bd_folio);
+                                       folio_unlock(bd_folio);
+@@ -1665,6 +1666,7 @@ static void nilfs_segctor_prepare_write(
+                       if (bh == segbuf->sb_super_root) {
+                               if (bh->b_folio != bd_folio) {
+                                       folio_lock(bd_folio);
++                                      folio_wait_writeback(bd_folio);
+                                       folio_clear_dirty_for_io(bd_folio);
+                                       folio_start_writeback(bd_folio);
+                                       folio_unlock(bd_folio);
+@@ -1681,6 +1683,7 @@ static void nilfs_segctor_prepare_write(
+       }
+       if (bd_folio) {
+               folio_lock(bd_folio);
++              folio_wait_writeback(bd_folio);
+               folio_clear_dirty_for_io(bd_folio);
+               folio_start_writeback(bd_folio);
+               folio_unlock(bd_folio);
diff --git a/queue-6.9/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch b/queue-6.9/powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch
new file mode 100644 (file)
index 0000000..a710ef7
--- /dev/null
@@ -0,0 +1,109 @@
+From 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d Mon Sep 17 00:00:00 2001
+From: Hari Bathini <hbathini@linux.ibm.com>
+Date: Thu, 2 May 2024 23:02:04 +0530
+Subject: powerpc/64/bpf: fix tail calls for PCREL addressing
+
+From: Hari Bathini <hbathini@linux.ibm.com>
+
+commit 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d upstream.
+
+With PCREL addressing, there is no kernel TOC. So, it is not setup in
+prologue when PCREL addressing is used. But the number of instructions
+to skip on a tail call was not adjusted accordingly. That resulted in
+not so obvious failures while using tailcalls. 'tailcalls' selftest
+crashed the system with the below call trace:
+
+  bpf_test_run+0xe8/0x3cc (unreliable)
+  bpf_prog_test_run_skb+0x348/0x778
+  __sys_bpf+0xb04/0x2b00
+  sys_bpf+0x28/0x38
+  system_call_exception+0x168/0x340
+  system_call_vectored_common+0x15c/0x2ec
+
+Also, as bpf programs are always module addresses and a bpf helper in
+general is a core kernel text address, using PC relative addressing
+often fails with "out of range of pcrel address" error. Switch to
+using kernel base for relative addressing to handle this better.
+
+Fixes: 7e3a68be42e1 ("powerpc/64: vmlinux support building with PCREL addresing")
+Cc: stable@vger.kernel.org # v6.4+
+Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240502173205.142794-1-hbathini@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp64.c |   30 ++++++++++++++++--------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -202,7 +202,8 @@ void bpf_jit_build_epilogue(u32 *image,
+       EMIT(PPC_RAW_BLR());
+ }
+-static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func)
++static int
++bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
+ {
+       unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
+       long reladdr;
+@@ -211,19 +212,20 @@ static int bpf_jit_emit_func_call_hlp(u3
+               return -EINVAL;
+       if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+-              reladdr = func_addr - CTX_NIA(ctx);
++              reladdr = func_addr - local_paca->kernelbase;
+               if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+-                      pr_err("eBPF: address of %ps out of range of pcrel address.\n",
+-                              (void *)func);
++                      pr_err("eBPF: address of %ps out of range of 34-bit relative address.\n",
++                             (void *)func);
+                       return -ERANGE;
+               }
+-              /* pla r12,addr */
+-              EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+-              EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+-              EMIT(PPC_RAW_MTCTR(_R12));
+-              EMIT(PPC_RAW_BCTR());
+-
++              EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
++              /* Align for subsequent prefix instruction */
++              if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
++                      EMIT(PPC_RAW_NOP());
++              /* paddi r12,r12,addr */
++              EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr));
++              EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr));
+       } else {
+               reladdr = func_addr - kernel_toc_addr();
+               if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+@@ -233,9 +235,9 @@ static int bpf_jit_emit_func_call_hlp(u3
+               EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
+               EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
+-              EMIT(PPC_RAW_MTCTR(_R12));
+-              EMIT(PPC_RAW_BCTRL());
+       }
++      EMIT(PPC_RAW_MTCTR(_R12));
++      EMIT(PPC_RAW_BCTRL());
+       return 0;
+ }
+@@ -285,7 +287,7 @@ static int bpf_jit_emit_tail_call(u32 *i
+       int b2p_index = bpf_to_ppc(BPF_REG_3);
+       int bpf_tailcall_prologue_size = 8;
+-      if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
++      if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+               bpf_tailcall_prologue_size += 4; /* skip past the toc load */
+       /*
+@@ -993,7 +995,7 @@ emit_clear:
+                               return ret;
+                       if (func_addr_fixed)
+-                              ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
++                              ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr);
+                       else
+                               ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
diff --git a/queue-6.9/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch b/queue-6.9/powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch
new file mode 100644 (file)
index 0000000..fa49925
--- /dev/null
@@ -0,0 +1,138 @@
+From b1e7cee96127468c2483cf10c2899c9b5cf79bf8 Mon Sep 17 00:00:00 2001
+From: Puranjay Mohan <puranjay@kernel.org>
+Date: Mon, 13 May 2024 10:02:48 +0000
+Subject: powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH
+
+From: Puranjay Mohan <puranjay@kernel.org>
+
+commit b1e7cee96127468c2483cf10c2899c9b5cf79bf8 upstream.
+
+The Linux Kernel Memory Model [1][2] requires RMW operations that have a
+return value to be fully ordered.
+
+BPF atomic operations with BPF_FETCH (including BPF_XCHG and
+BPF_CMPXCHG) return a value back so they need to be JITed to fully
+ordered operations. POWERPC currently emits relaxed operations for
+these.
+
+We can show this by running the following litmus-test:
+
+  PPC SB+atomic_add+fetch
+
+  {
+      0:r0=x;  (* dst reg assuming offset is 0 *)
+      0:r1=2;  (* src reg *)
+      0:r2=1;
+      0:r4=y;  (* P0 writes to this, P1 reads this *)
+      0:r5=z;  (* P1 writes to this, P0 reads this *)
+      0:r6=0;
+
+      1:r2=1;
+      1:r4=y;
+      1:r5=z;
+  }
+
+  P0                      | P1            ;
+  stw         r2, 0(r4)   | stw  r2,0(r5) ;
+                          |               ;
+  loop:lwarx  r3, r6, r0  |               ;
+  mr          r8, r3      |               ;
+  add         r3, r3, r1  | sync          ;
+  stwcx.      r3, r6, r0  |               ;
+  bne         loop        |               ;
+  mr          r1, r8      |               ;
+                          |               ;
+  lwa         r7, 0(r5)   | lwa  r7,0(r4) ;
+
+  ~exists(0:r7=0 /\ 1:r7=0)
+
+  Witnesses
+  Positive: 9 Negative: 3
+  Condition ~exists (0:r7=0 /\ 1:r7=0)
+  Observation SB+atomic_add+fetch Sometimes 3 9
+
+This test shows that the older store in P0 is reordered with a newer
+load to a different address. Although there is a RMW operation with
+fetch between them. Adding a sync before and after RMW fixes the issue:
+
+  Witnesses
+  Positive: 9 Negative: 0
+  Condition ~exists (0:r7=0 /\ 1:r7=0)
+  Observation SB+atomic_add+fetch Never 0 9
+
+[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
+[2] https://www.kernel.org/doc/Documentation/atomic_t.txt
+
+Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations")
+Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg")
+Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations")
+Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg")
+Cc: stable@vger.kernel.org # v6.0+
+Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
+Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Naveen N Rao <naveen@kernel.org>
+Acked-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240513100248.110535-1-puranjay@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp32.c |   12 ++++++++++++
+ arch/powerpc/net/bpf_jit_comp64.c |   12 ++++++++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/powerpc/net/bpf_jit_comp32.c
++++ b/arch/powerpc/net/bpf_jit_comp32.c
+@@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *
+                       /* Get offset into TMP_REG */
+                       EMIT(PPC_RAW_LI(tmp_reg, off));
++                      /*
++                       * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
++                       * before and after the operation.
++                       *
++                       * This is a requirement in the Linux Kernel Memory Model.
++                       * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
++                       */
++                      if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
++                              EMIT(PPC_RAW_SYNC());
+                       tmp_idx = ctx->idx * 4;
+                       /* load value from memory into r0 */
+                       EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
+@@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *
+                       /* For the BPF_FETCH variant, get old data into src_reg */
+                       if (imm & BPF_FETCH) {
++                              /* Emit 'sync' to enforce full ordering */
++                              if (IS_ENABLED(CONFIG_SMP))
++                                      EMIT(PPC_RAW_SYNC());
+                               EMIT(PPC_RAW_MR(ret_reg, ax_reg));
+                               if (!fp->aux->verifier_zext)
+                                       EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -805,6 +805,15 @@ emit_clear:
+                       /* Get offset into TMP_REG_1 */
+                       EMIT(PPC_RAW_LI(tmp1_reg, off));
++                      /*
++                       * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
++                       * before and after the operation.
++                       *
++                       * This is a requirement in the Linux Kernel Memory Model.
++                       * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
++                       */
++                      if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
++                              EMIT(PPC_RAW_SYNC());
+                       tmp_idx = ctx->idx * 4;
+                       /* load value from memory into TMP_REG_2 */
+                       if (size == BPF_DW)
+@@ -867,6 +876,9 @@ emit_clear:
+                       PPC_BCC_SHORT(COND_NE, tmp_idx);
+                       if (imm & BPF_FETCH) {
++                              /* Emit 'sync' to enforce full ordering */
++                              if (IS_ENABLED(CONFIG_SMP))
++                                      EMIT(PPC_RAW_SYNC());
+                               EMIT(PPC_RAW_MR(ret_reg, _R0));
+                               /*
+                                * Skip unnecessary zero-extension for 32-bit cmpxchg.
index ac6b0e0a17d4b552300c577d3b7b97cf929334d9..a38cf68965b412e95f5ed1fa0332d42a9f6609bc 100644 (file)
@@ -134,3 +134,24 @@ riscv-enable-have_arch_huge_vmap-for-xip-kernel.patch
 asoc-sof-ipc4-topology-fix-input-format-query-of-process-modules-without-base-extension.patch
 alsa-ump-don-t-clear-bank-selection-after-sending-a-program-change.patch
 alsa-ump-don-t-accept-an-invalid-ump-protocol-number.patch
+edac-amd64-convert-pcibios_-return-codes-to-errnos.patch
+edac-igen6-convert-pcibios_-return-codes-to-errnos.patch
+cifs-fix-creating-sockets-when-using-sfu-mount-options.patch
+nfs-fix-undefined-behavior-in-nfs_block_bits.patch
+nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch
+eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch
+eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch
+tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch
+btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch
+btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch
+btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch
+btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch
+btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch
+btrfs-re-introduce-norecovery-mount-option.patch
+alsa-seq-fix-incorrect-ump-type-for-system-messages.patch
+bpf-fix-multi-uprobe-pid-filtering-logic.patch
+powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch
+powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch
+nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
+nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch
+smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch
diff --git a/queue-6.9/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch b/queue-6.9/smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch
new file mode 100644 (file)
index 0000000..dd714f0
--- /dev/null
@@ -0,0 +1,34 @@
+From 02c418774f76a0a36a6195c9dbf8971eb4130a15 Mon Sep 17 00:00:00 2001
+From: Enzo Matsumiya <ematsumiya@suse.de>
+Date: Thu, 6 Jun 2024 13:13:13 -0300
+Subject: smb: client: fix deadlock in smb2_find_smb_tcon()
+
+From: Enzo Matsumiya <ematsumiya@suse.de>
+
+commit 02c418774f76a0a36a6195c9dbf8971eb4130a15 upstream.
+
+Unlock cifs_tcp_ses_lock before calling cifs_put_smb_ses() to avoid such
+deadlock.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
+Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/smb2transport.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/smb/client/smb2transport.c
++++ b/fs/smb/client/smb2transport.c
+@@ -216,8 +216,8 @@ smb2_find_smb_tcon(struct TCP_Server_Inf
+       }
+       tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+       if (!tcon) {
+-              cifs_put_smb_ses(ses);
+               spin_unlock(&cifs_tcp_ses_lock);
++              cifs_put_smb_ses(ses);
+               return NULL;
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
diff --git a/queue-6.9/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch b/queue-6.9/tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch
new file mode 100644 (file)
index 0000000..dbfa66a
--- /dev/null
@@ -0,0 +1,95 @@
+From 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Thu, 23 May 2024 01:14:29 -0400
+Subject: tracefs: Clear EVENT_INODE flag in tracefs_drop_inode()
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 upstream.
+
+When the inode is being dropped from the dentry, the TRACEFS_EVENT_INODE
+flag needs to be cleared to prevent a remount from calling
+eventfs_remount() on the tracefs_inode private data. There's a race
+between the inode is dropped (and the dentry freed) to where the inode is
+actually freed. If a remount happens between the two, the eventfs_inode
+could be accessed after it is freed (only the dentry keeps a ref count on
+it).
+
+Currently the TRACEFS_EVENT_INODE flag is cleared from the dentry iput()
+function. But this is incorrect, as it is possible that the inode has
+another reference to it. The flag should only be cleared when the inode is
+really being dropped and has no more references. That happens in the
+drop_inode callback of the inode, as that gets called when the last
+reference of the inode is released.
+
+Remove the tracefs_d_iput() function and move its logic to the more
+appropriate tracefs_drop_inode() callback function.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.908205106@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Fixes: baa23a8d4360d ("tracefs: Reset permissions on remount if permissions are options")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c |   33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -439,10 +439,26 @@ static int tracefs_show_options(struct s
+       return 0;
+ }
++static int tracefs_drop_inode(struct inode *inode)
++{
++      struct tracefs_inode *ti = get_tracefs(inode);
++
++      /*
++       * This inode is being freed and cannot be used for
++       * eventfs. Clear the flag so that it doesn't call into
++       * eventfs during the remount flag updates. The eventfs_inode
++       * gets freed after an RCU cycle, so the content will still
++       * be safe if the iteration is going on now.
++       */
++      ti->flags &= ~TRACEFS_EVENT_INODE;
++
++      return 1;
++}
++
+ static const struct super_operations tracefs_super_operations = {
+       .alloc_inode    = tracefs_alloc_inode,
+       .free_inode     = tracefs_free_inode,
+-      .drop_inode     = generic_delete_inode,
++      .drop_inode     = tracefs_drop_inode,
+       .statfs         = simple_statfs,
+       .remount_fs     = tracefs_remount,
+       .show_options   = tracefs_show_options,
+@@ -469,22 +485,7 @@ static int tracefs_d_revalidate(struct d
+       return !(ei && ei->is_freed);
+ }
+-static void tracefs_d_iput(struct dentry *dentry, struct inode *inode)
+-{
+-      struct tracefs_inode *ti = get_tracefs(inode);
+-
+-      /*
+-       * This inode is being freed and cannot be used for
+-       * eventfs. Clear the flag so that it doesn't call into
+-       * eventfs during the remount flag updates. The eventfs_inode
+-       * gets freed after an RCU cycle, so the content will still
+-       * be safe if the iteration is going on now.
+-       */
+-      ti->flags &= ~TRACEFS_EVENT_INODE;
+-}
+-
+ static const struct dentry_operations tracefs_dentry_operations = {
+-      .d_iput = tracefs_d_iput,
+       .d_revalidate = tracefs_d_revalidate,
+       .d_release = tracefs_d_release,
+ };