--- /dev/null
+From edb32776196afa393c074d6a2733e3a69e66b299 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Wed, 29 May 2024 10:37:59 +0200
+Subject: ALSA: seq: Fix incorrect UMP type for system messages
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit edb32776196afa393c074d6a2733e3a69e66b299 upstream.
+
+When converting a legacy system message to a UMP packet, it forgot to
+modify the UMP type field but keeping the default type (either type 2
+or 4). Correct to the right type for system messages.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20240529083800.5742-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/core/seq/seq_ump_convert.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -740,6 +740,7 @@ static int system_1p_ev_to_ump_midi1(con
+ union snd_ump_midi1_msg *data,
+ unsigned char status)
+ {
++ data->system.type = UMP_MSG_TYPE_SYSTEM; // override
+ data->system.status = status;
+ data->system.parm1 = event->data.control.value & 0x7f;
+ return 1;
+@@ -751,6 +752,7 @@ static int system_2p_ev_to_ump_midi1(con
+ union snd_ump_midi1_msg *data,
+ unsigned char status)
+ {
++ data->system.type = UMP_MSG_TYPE_SYSTEM; // override
+ data->system.status = status;
+ data->system.parm1 = event->data.control.value & 0x7f;
+ data->system.parm2 = (event->data.control.value >> 7) & 0x7f;
--- /dev/null
+From 46ba0e49b64232adac35a2bc892f1710c5b0fb7f Mon Sep 17 00:00:00 2001
+From: Andrii Nakryiko <andrii@kernel.org>
+Date: Tue, 21 May 2024 09:33:57 -0700
+Subject: bpf: fix multi-uprobe PID filtering logic
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+commit 46ba0e49b64232adac35a2bc892f1710c5b0fb7f upstream.
+
+Current implementation of PID filtering logic for multi-uprobes in
+uprobe_prog_run() is filtering down to exact *thread*, while the intent
+for PID filtering it to filter by *process* instead. The check in
+uprobe_prog_run() also differs from the analogous one in
+uprobe_multi_link_filter() for some reason. The latter is correct,
+checking task->mm, not the task itself.
+
+Fix the check in uprobe_prog_run() to perform the same task->mm check.
+
+While doing this, we also update get_pid_task() use to use PIDTYPE_TGID
+type of lookup, given the intent is to get a representative task of an
+entire process. This doesn't change behavior, but seems more logical. It
+would hold task group leader task now, not any random thread task.
+
+Last but not least, given multi-uprobe support is half-broken due to
+this PID filtering logic (depending on whether PID filtering is
+important or not), we need to make it easy for user space consumers
+(including libbpf) to easily detect whether PID filtering logic was
+already fixed.
+
+We do it here by adding an early check on passed pid parameter. If it's
+negative (and so has no chance of being a valid PID), we return -EINVAL.
+Previous behavior would eventually return -ESRCH ("No process found"),
+given there can't be any process with negative PID. This subtle change
+won't make any practical change in behavior, but will allow applications
+to detect PID filtering fixes easily. Libbpf fixes take advantage of
+this in the next patch.
+
+Cc: stable@vger.kernel.org
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Fixes: b733eeade420 ("bpf: Add pid filter support for uprobe_multi link")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20240521163401.3005045-2-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c | 8 ++++----
+ tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 2 +-
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -3260,7 +3260,7 @@ static int uprobe_prog_run(struct bpf_up
+ struct bpf_run_ctx *old_run_ctx;
+ int err = 0;
+
+- if (link->task && current != link->task)
++ if (link->task && current->mm != link->task->mm)
+ return 0;
+
+ if (sleepable)
+@@ -3361,8 +3361,9 @@ int bpf_uprobe_multi_link_attach(const u
+ upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
+ uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
+ cnt = attr->link_create.uprobe_multi.cnt;
++ pid = attr->link_create.uprobe_multi.pid;
+
+- if (!upath || !uoffsets || !cnt)
++ if (!upath || !uoffsets || !cnt || pid < 0)
+ return -EINVAL;
+ if (cnt > MAX_UPROBE_MULTI_CNT)
+ return -E2BIG;
+@@ -3386,10 +3387,9 @@ int bpf_uprobe_multi_link_attach(const u
+ goto error_path_put;
+ }
+
+- pid = attr->link_create.uprobe_multi.pid;
+ if (pid) {
+ rcu_read_lock();
+- task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
++ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+ rcu_read_unlock();
+ if (!task) {
+ err = -ESRCH;
+--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
++++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+@@ -397,7 +397,7 @@ static void test_attach_api_fails(void)
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+- ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
++ ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
+
+ cleanup:
+ if (link_fd >= 0)
--- /dev/null
+From 9d274c19a71b3a276949933859610721a453946b Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Fri, 24 May 2024 13:58:11 -0700
+Subject: btrfs: fix crash on racing fsync and size-extending write into prealloc
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 9d274c19a71b3a276949933859610721a453946b upstream.
+
+We have been seeing crashes on duplicate keys in
+btrfs_set_item_key_safe():
+
+ BTRFS critical (device vdb): slot 4 key (450 108 8192) new key (450 108 8192)
+ ------------[ cut here ]------------
+ kernel BUG at fs/btrfs/ctree.c:2620!
+ invalid opcode: 0000 [#1] PREEMPT SMP PTI
+ CPU: 0 PID: 3139 Comm: xfs_io Kdump: loaded Not tainted 6.9.0 #6
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014
+ RIP: 0010:btrfs_set_item_key_safe+0x11f/0x290 [btrfs]
+
+With the following stack trace:
+
+ #0 btrfs_set_item_key_safe (fs/btrfs/ctree.c:2620:4)
+ #1 btrfs_drop_extents (fs/btrfs/file.c:411:4)
+ #2 log_one_extent (fs/btrfs/tree-log.c:4732:9)
+ #3 btrfs_log_changed_extents (fs/btrfs/tree-log.c:4955:9)
+ #4 btrfs_log_inode (fs/btrfs/tree-log.c:6626:9)
+ #5 btrfs_log_inode_parent (fs/btrfs/tree-log.c:7070:8)
+ #6 btrfs_log_dentry_safe (fs/btrfs/tree-log.c:7171:8)
+ #7 btrfs_sync_file (fs/btrfs/file.c:1933:8)
+ #8 vfs_fsync_range (fs/sync.c:188:9)
+ #9 vfs_fsync (fs/sync.c:202:9)
+ #10 do_fsync (fs/sync.c:212:9)
+ #11 __do_sys_fdatasync (fs/sync.c:225:9)
+ #12 __se_sys_fdatasync (fs/sync.c:223:1)
+ #13 __x64_sys_fdatasync (fs/sync.c:223:1)
+ #14 do_syscall_x64 (arch/x86/entry/common.c:52:14)
+ #15 do_syscall_64 (arch/x86/entry/common.c:83:7)
+ #16 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121)
+
+So we're logging a changed extent from fsync, which is splitting an
+extent in the log tree. But this split part already exists in the tree,
+triggering the BUG().
+
+This is the state of the log tree at the time of the crash, dumped with
+drgn (https://github.com/osandov/drgn/blob/main/contrib/btrfs_tree.py)
+to get more details than btrfs_print_leaf() gives us:
+
+ >>> print_extent_buffer(prog.crashed_thread().stack_trace()[0]["eb"])
+ leaf 33439744 level 0 items 72 generation 9 owner 18446744073709551610
+ leaf 33439744 flags 0x100000000000000
+ fs uuid e5bd3946-400c-4223-8923-190ef1f18677
+ chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
+ item 0 key (450 INODE_ITEM 0) itemoff 16123 itemsize 160
+ generation 7 transid 9 size 8192 nbytes 8473563889606862198
+ block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
+ sequence 204 flags 0x10(PREALLOC)
+ atime 1716417703.220000000 (2024-05-22 15:41:43)
+ ctime 1716417704.983333333 (2024-05-22 15:41:44)
+ mtime 1716417704.983333333 (2024-05-22 15:41:44)
+ otime 17592186044416.000000000 (559444-03-08 01:40:16)
+ item 1 key (450 INODE_REF 256) itemoff 16110 itemsize 13
+ index 195 namelen 3 name: 193
+ item 2 key (450 XATTR_ITEM 1640047104) itemoff 16073 itemsize 37
+ location key (0 UNKNOWN.0 0) type XATTR
+ transid 7 data_len 1 name_len 6
+ name: user.a
+ data a
+ item 3 key (450 EXTENT_DATA 0) itemoff 16020 itemsize 53
+ generation 9 type 1 (regular)
+ extent data disk byte 303144960 nr 12288
+ extent data offset 0 nr 4096 ram 12288
+ extent compression 0 (none)
+ item 4 key (450 EXTENT_DATA 4096) itemoff 15967 itemsize 53
+ generation 9 type 2 (prealloc)
+ prealloc data disk byte 303144960 nr 12288
+ prealloc data offset 4096 nr 8192
+ item 5 key (450 EXTENT_DATA 8192) itemoff 15914 itemsize 53
+ generation 9 type 2 (prealloc)
+ prealloc data disk byte 303144960 nr 12288
+ prealloc data offset 8192 nr 4096
+ ...
+
+So the real problem happened earlier: notice that items 4 (4k-12k) and 5
+(8k-12k) overlap. Both are prealloc extents. Item 4 straddles i_size and
+item 5 starts at i_size.
+
+Here is the state of the filesystem tree at the time of the crash:
+
+ >>> root = prog.crashed_thread().stack_trace()[2]["inode"].root
+ >>> ret, nodes, slots = btrfs_search_slot(root, BtrfsKey(450, 0, 0))
+ >>> print_extent_buffer(nodes[0])
+ leaf 30425088 level 0 items 184 generation 9 owner 5
+ leaf 30425088 flags 0x100000000000000
+ fs uuid e5bd3946-400c-4223-8923-190ef1f18677
+ chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
+ ...
+ item 179 key (450 INODE_ITEM 0) itemoff 4907 itemsize 160
+ generation 7 transid 7 size 4096 nbytes 12288
+ block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
+ sequence 6 flags 0x10(PREALLOC)
+ atime 1716417703.220000000 (2024-05-22 15:41:43)
+ ctime 1716417703.220000000 (2024-05-22 15:41:43)
+ mtime 1716417703.220000000 (2024-05-22 15:41:43)
+ otime 1716417703.220000000 (2024-05-22 15:41:43)
+ item 180 key (450 INODE_REF 256) itemoff 4894 itemsize 13
+ index 195 namelen 3 name: 193
+ item 181 key (450 XATTR_ITEM 1640047104) itemoff 4857 itemsize 37
+ location key (0 UNKNOWN.0 0) type XATTR
+ transid 7 data_len 1 name_len 6
+ name: user.a
+ data a
+ item 182 key (450 EXTENT_DATA 0) itemoff 4804 itemsize 53
+ generation 9 type 1 (regular)
+ extent data disk byte 303144960 nr 12288
+ extent data offset 0 nr 8192 ram 12288
+ extent compression 0 (none)
+ item 183 key (450 EXTENT_DATA 8192) itemoff 4751 itemsize 53
+ generation 9 type 2 (prealloc)
+ prealloc data disk byte 303144960 nr 12288
+ prealloc data offset 8192 nr 4096
+
+Item 5 in the log tree corresponds to item 183 in the filesystem tree,
+but nothing matches item 4. Furthermore, item 183 is the last item in
+the leaf.
+
+btrfs_log_prealloc_extents() is responsible for logging prealloc extents
+beyond i_size. It first truncates any previously logged prealloc extents
+that start beyond i_size. Then, it walks the filesystem tree and copies
+the prealloc extent items to the log tree.
+
+If it hits the end of a leaf, then it calls btrfs_next_leaf(), which
+unlocks the tree and does another search. However, while the filesystem
+tree is unlocked, an ordered extent completion may modify the tree. In
+particular, it may insert an extent item that overlaps with an extent
+item that was already copied to the log tree.
+
+This may manifest in several ways depending on the exact scenario,
+including an EEXIST error that is silently translated to a full sync,
+overlapping items in the log tree, or this crash. This particular crash
+is triggered by the following sequence of events:
+
+- Initially, the file has i_size=4k, a regular extent from 0-4k, and a
+ prealloc extent beyond i_size from 4k-12k. The prealloc extent item is
+ the last item in its B-tree leaf.
+- The file is fsync'd, which copies its inode item and both extent items
+ to the log tree.
+- An xattr is set on the file, which sets the
+ BTRFS_INODE_COPY_EVERYTHING flag.
+- The range 4k-8k in the file is written using direct I/O. i_size is
+ extended to 8k, but the ordered extent is still in flight.
+- The file is fsync'd. Since BTRFS_INODE_COPY_EVERYTHING is set, this
+ calls copy_inode_items_to_log(), which calls
+ btrfs_log_prealloc_extents().
+- btrfs_log_prealloc_extents() finds the 4k-12k prealloc extent in the
+ filesystem tree. Since it starts before i_size, it skips it. Since it
+ is the last item in its B-tree leaf, it calls btrfs_next_leaf().
+- btrfs_next_leaf() unlocks the path.
+- The ordered extent completion runs, which converts the 4k-8k part of
+ the prealloc extent to written and inserts the remaining prealloc part
+ from 8k-12k.
+- btrfs_next_leaf() does a search and finds the new prealloc extent
+ 8k-12k.
+- btrfs_log_prealloc_extents() copies the 8k-12k prealloc extent into
+ the log tree. Note that it overlaps with the 4k-12k prealloc extent
+ that was copied to the log tree by the first fsync.
+- fsync calls btrfs_log_changed_extents(), which tries to log the 4k-8k
+ extent that was written.
+- This tries to drop the range 4k-8k in the log tree, which requires
+ adjusting the start of the 4k-12k prealloc extent in the log tree to
+ 8k.
+- btrfs_set_item_key_safe() sees that there is already an extent
+ starting at 8k in the log tree and calls BUG().
+
+Fix this by detecting when we're about to insert an overlapping file
+extent item in the log tree and truncating the part that would overlap.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4856,18 +4856,23 @@ static int btrfs_log_prealloc_extents(st
+ path->slots[0]++;
+ continue;
+ }
+- if (!dropped_extents) {
+- /*
+- * Avoid logging extent items logged in past fsync calls
+- * and leading to duplicate keys in the log tree.
+- */
++ /*
++ * Avoid overlapping items in the log tree. The first time we
++ * get here, get rid of everything from a past fsync. After
++ * that, if the current extent starts before the end of the last
++ * extent we copied, truncate the last one. This can happen if
++ * an ordered extent completion modifies the subvolume tree
++ * while btrfs_next_leaf() has the tree unlocked.
++ */
++ if (!dropped_extents || key.offset < truncate_offset) {
+ ret = truncate_inode_items(trans, root->log_root, inode,
+- truncate_offset,
++ min(key.offset, truncate_offset),
+ BTRFS_EXTENT_DATA_KEY);
+ if (ret)
+ goto out;
+ dropped_extents = true;
+ }
++ truncate_offset = btrfs_file_extent_end(path);
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
--- /dev/null
+From fb33eb2ef0d88e75564983ef057b44c5b7e4fded Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 3 Jun 2024 12:49:08 +0100
+Subject: btrfs: fix leak of qgroup extent records after transaction abort
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit fb33eb2ef0d88e75564983ef057b44c5b7e4fded upstream.
+
+Qgroup extent records are created when delayed ref heads are created and
+then released after accounting extents at btrfs_qgroup_account_extents(),
+called during the transaction commit path.
+
+If a transaction is aborted we free the qgroup records by calling
+btrfs_qgroup_destroy_extent_records() at btrfs_destroy_delayed_refs(),
+unless we don't have delayed references. We are incorrectly assuming
+that no delayed references means we don't have qgroup extents records.
+
+We can currently have no delayed references because we ran them all
+during a transaction commit and the transaction was aborted after that
+due to some error in the commit path.
+
+So fix this by ensuring we btrfs_qgroup_destroy_extent_records() at
+btrfs_destroy_delayed_refs() even if we don't have any delayed references.
+
+Reported-by: syzbot+0fecc032fa134afd49df@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/0000000000004e7f980619f91835@google.com/
+Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -4544,18 +4544,10 @@ static void btrfs_destroy_delayed_refs(s
+ struct btrfs_fs_info *fs_info)
+ {
+ struct rb_node *node;
+- struct btrfs_delayed_ref_root *delayed_refs;
++ struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+
+- delayed_refs = &trans->delayed_refs;
+-
+ spin_lock(&delayed_refs->lock);
+- if (atomic_read(&delayed_refs->num_entries) == 0) {
+- spin_unlock(&delayed_refs->lock);
+- btrfs_debug(fs_info, "delayed_refs has NO entry");
+- return;
+- }
+-
+ while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
+ struct btrfs_delayed_ref_head *head;
+ struct rb_node *n;
--- /dev/null
+From f3a5367c679d31473d3fbb391675055b4792c309 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 6 Jun 2024 11:01:51 +0930
+Subject: btrfs: protect folio::private when attaching extent buffer folios
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f3a5367c679d31473d3fbb391675055b4792c309 upstream.
+
+[BUG]
+Since v6.8 there are rare kernel crashes reported by various people,
+the common factor is bad page status error messages like this:
+
+ BUG: Bad page state in process kswapd0 pfn:d6e840
+ page: refcount:0 mapcount:0 mapping:000000007512f4f2 index:0x2796c2c7c
+ pfn:0xd6e840
+ aops:btree_aops ino:1
+ flags: 0x17ffffe0000008(uptodate|node=0|zone=2|lastcpupid=0x3fffff)
+ page_type: 0xffffffff()
+ raw: 0017ffffe0000008 dead000000000100 dead000000000122 ffff88826d0be4c0
+ raw: 00000002796c2c7c 0000000000000000 00000000ffffffff 0000000000000000
+ page dumped because: non-NULL mapping
+
+[CAUSE]
+Commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
+allocate-then-attach method") changes the sequence when allocating a new
+extent buffer.
+
+Previously we always called grab_extent_buffer() under
+mapping->i_private_lock, to ensure the safety on modification on
+folio::private (which is a pointer to extent buffer for regular
+sectorsize).
+
+This can lead to the following race:
+
+Thread A is trying to allocate an extent buffer at bytenr X, with 4
+4K pages, meanwhile thread B is trying to release the page at X + 4K
+(the second page of the extent buffer at X).
+
+ Thread A | Thread B
+-----------------------------------+-------------------------------------
+ | btree_release_folio()
+ | | This is for the page at X + 4K,
+ | | Not page X.
+ | |
+alloc_extent_buffer() | |- release_extent_buffer()
+|- filemap_add_folio() for the | | |- atomic_dec_and_test(eb->refs)
+| page at bytenr X (the first | | |
+| page). | | |
+| Which returned -EEXIST. | | |
+| | | |
+|- filemap_lock_folio() | | |
+| Returned the first page locked. | | |
+| | | |
+|- grab_extent_buffer() | | |
+| |- atomic_inc_not_zero() | | |
+| | Returned false | | |
+| |- folio_detach_private() | | |- folio_detach_private() for X
+| |- folio_test_private() | | |- folio_test_private()
+ | Returned true | | | Returned true
+ |- folio_put() | |- folio_put()
+
+Now there are two puts on the same folio at folio X, leading to refcount
+underflow of the folio X, and eventually causing the BUG_ON() on the
+page->mapping.
+
+The condition is not that easy to hit:
+
+- The release must be triggered for the middle page of an eb
+ If the release is on the same first page of an eb, page lock would kick
+ in and prevent the race.
+
+- folio_detach_private() has a very small race window
+ It's only between folio_test_private() and folio_clear_private().
+
+That's exactly when mapping->i_private_lock is used to prevent such race,
+and commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
+allocate-then-attach method") screwed that up.
+
+At that time, I thought the page lock would kick in as
+filemap_release_folio() also requires the page to be locked, but forgot
+the filemap_release_folio() only locks one page, not all pages of an
+extent buffer.
+
+[FIX]
+Move all the code requiring i_private_lock into
+attach_eb_folio_to_filemap(), so that everything is done with proper
+lock protection.
+
+Furthermore to prevent future problems, add an extra
+lockdep_assert_locked() to ensure we're holding the proper lock.
+
+To reproducer that is able to hit the race (takes a few minutes with
+instrumented code inserting delays to alloc_extent_buffer()):
+
+ #!/bin/sh
+ drop_caches () {
+ while(true); do
+ echo 3 > /proc/sys/vm/drop_caches
+ echo 1 > /proc/sys/vm/compact_memory
+ done
+ }
+
+ run_tar () {
+ while(true); do
+ for x in `seq 1 80` ; do
+ tar cf /dev/zero /mnt > /dev/null &
+ done
+ wait
+ done
+ }
+
+ mkfs.btrfs -f -d single -m single /dev/vda
+ mount -o noatime /dev/vda /mnt
+ # create 200,000 files, 1K each
+ ./simoop -n 200000 -E -f 1k /mnt
+ drop_caches &
+ (run_tar)
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/linux-btrfs/CAHk-=wgt362nGfScVOOii8cgKn2LVVHeOvOA7OBwg1OwbuJQcw@mail.gmail.com/
+Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
+Link: https://lore.kernel.org/lkml/CABXGCsPktcHQOvKTbPaTwegMExije=Gpgci5NW=hqORo-s7diA@mail.gmail.com/
+Reported-by: Toralf Förster <toralf.foerster@gmx.de>
+Link: https://lore.kernel.org/linux-btrfs/e8b3311c-9a75-4903-907f-fc0f7a3fe423@gmx.de/
+Reported-by: syzbot+f80b066392366b4af85e@syzkaller.appspotmail.com
+Fixes: 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to allocate-then-attach method")
+CC: stable@vger.kernel.org # 6.8+
+CC: Chris Mason <clm@fb.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 60 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 31 insertions(+), 29 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3662,6 +3662,8 @@ static struct extent_buffer *grab_extent
+ struct folio *folio = page_folio(page);
+ struct extent_buffer *exists;
+
++ lockdep_assert_held(&page->mapping->i_private_lock);
++
+ /*
+ * For subpage case, we completely rely on radix tree to ensure we
+ * don't try to insert two ebs for the same bytenr. So here we always
+@@ -3729,13 +3731,14 @@ static int check_eb_alignment(struct btr
+ * The caller needs to free the existing folios and retry using the same order.
+ */
+ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
++ struct btrfs_subpage *prealloc,
+ struct extent_buffer **found_eb_ret)
+ {
+
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct address_space *mapping = fs_info->btree_inode->i_mapping;
+ const unsigned long index = eb->start >> PAGE_SHIFT;
+- struct folio *existing_folio;
++ struct folio *existing_folio = NULL;
+ int ret;
+
+ ASSERT(found_eb_ret);
+@@ -3747,12 +3750,14 @@ retry:
+ ret = filemap_add_folio(mapping, eb->folios[i], index + i,
+ GFP_NOFS | __GFP_NOFAIL);
+ if (!ret)
+- return 0;
++ goto finish;
+
+ existing_folio = filemap_lock_folio(mapping, index + i);
+ /* The page cache only exists for a very short time, just retry. */
+- if (IS_ERR(existing_folio))
++ if (IS_ERR(existing_folio)) {
++ existing_folio = NULL;
+ goto retry;
++ }
+
+ /* For now, we should only have single-page folios for btree inode. */
+ ASSERT(folio_nr_pages(existing_folio) == 1);
+@@ -3763,14 +3768,13 @@ retry:
+ return -EAGAIN;
+ }
+
+- if (fs_info->nodesize < PAGE_SIZE) {
+- /*
+- * We're going to reuse the existing page, can drop our page
+- * and subpage structure now.
+- */
++finish:
++ spin_lock(&mapping->i_private_lock);
++ if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
++ /* We're going to reuse the existing page, can drop our folio now. */
+ __free_page(folio_page(eb->folios[i], 0));
+ eb->folios[i] = existing_folio;
+- } else {
++ } else if (existing_folio) {
+ struct extent_buffer *existing_eb;
+
+ existing_eb = grab_extent_buffer(fs_info,
+@@ -3778,6 +3782,7 @@ retry:
+ if (existing_eb) {
+ /* The extent buffer still exists, we can use it directly. */
+ *found_eb_ret = existing_eb;
++ spin_unlock(&mapping->i_private_lock);
+ folio_unlock(existing_folio);
+ folio_put(existing_folio);
+ return 1;
+@@ -3786,6 +3791,22 @@ retry:
+ __free_page(folio_page(eb->folios[i], 0));
+ eb->folios[i] = existing_folio;
+ }
++ eb->folio_size = folio_size(eb->folios[i]);
++ eb->folio_shift = folio_shift(eb->folios[i]);
++ /* Should not fail, as we have preallocated the memory. */
++ ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
++ ASSERT(!ret);
++ /*
++ * To inform we have an extra eb under allocation, so that
++ * detach_extent_buffer_page() won't release the folio private when the
++ * eb hasn't been inserted into radix tree yet.
++ *
++ * The ref will be decreased when the eb releases the page, in
++ * detach_extent_buffer_page(). Thus needs no special handling in the
++ * error path.
++ */
++ btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
++ spin_unlock(&mapping->i_private_lock);
+ return 0;
+ }
+
+@@ -3797,7 +3818,6 @@ struct extent_buffer *alloc_extent_buffe
+ int attached = 0;
+ struct extent_buffer *eb;
+ struct extent_buffer *existing_eb = NULL;
+- struct address_space *mapping = fs_info->btree_inode->i_mapping;
+ struct btrfs_subpage *prealloc = NULL;
+ u64 lockdep_owner = owner_root;
+ bool page_contig = true;
+@@ -3863,7 +3883,7 @@ reallocate:
+ for (int i = 0; i < num_folios; i++) {
+ struct folio *folio;
+
+- ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
++ ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
+ if (ret > 0) {
+ ASSERT(existing_eb);
+ goto out;
+@@ -3900,24 +3920,6 @@ reallocate:
+ * and free the allocated page.
+ */
+ folio = eb->folios[i];
+- eb->folio_size = folio_size(folio);
+- eb->folio_shift = folio_shift(folio);
+- spin_lock(&mapping->i_private_lock);
+- /* Should not fail, as we have preallocated the memory */
+- ret = attach_extent_buffer_folio(eb, folio, prealloc);
+- ASSERT(!ret);
+- /*
+- * To inform we have extra eb under allocation, so that
+- * detach_extent_buffer_page() won't release the folio private
+- * when the eb hasn't yet been inserted into radix tree.
+- *
+- * The ref will be decreased when the eb released the page, in
+- * detach_extent_buffer_page().
+- * Thus needs no special handling in error path.
+- */
+- btrfs_folio_inc_eb_refs(fs_info, folio);
+- spin_unlock(&mapping->i_private_lock);
+-
+ WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
+
+ /*
--- /dev/null
+From 2b8aa78cf1279ec5e418baa26bfed5df682568d8 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Thu, 9 May 2024 15:34:40 -0700
+Subject: btrfs: qgroup: fix qgroup id collision across mounts
+
+From: Boris Burkov <boris@bur.io>
+
+commit 2b8aa78cf1279ec5e418baa26bfed5df682568d8 upstream.
+
+If we delete subvolumes whose ID is the largest in the filesystem, then
+unmount and mount again, then btrfs_init_root_free_objectid on the
+tree_root will select a subvolid smaller than that one and thus allow
+reusing it.
+
+If we are also using qgroups (and particularly squotas) it is possible
+to delete the subvol without deleting the qgroup. In that case, we will
+be able to create a new subvol whose id already has a level 0 qgroup.
+This will result in re-using that qgroup which would then lead to
+incorrect accounting.
+
+Fixes: 6ed05643ddb1 ("btrfs: create qgroup earlier in snapshot creation")
+CC: stable@vger.kernel.org # 6.7+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -468,6 +468,7 @@ int btrfs_read_qgroup_config(struct btrf
+ }
+ if (!qgroup) {
+ struct btrfs_qgroup *prealloc;
++ struct btrfs_root *tree_root = fs_info->tree_root;
+
+ prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
+ if (!prealloc) {
+@@ -475,6 +476,25 @@ int btrfs_read_qgroup_config(struct btrf
+ goto out;
+ }
+ qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset);
++ /*
++ * If a qgroup exists for a subvolume ID, it is possible
++ * that subvolume has been deleted, in which case
++ * re-using that ID would lead to incorrect accounting.
++ *
++ * Ensure that we skip any such subvol ids.
++ *
++ * We don't need to lock because this is only called
++ * during mount before we start doing things like creating
++ * subvolumes.
++ */
++ if (is_fstree(qgroup->qgroupid) &&
++ qgroup->qgroupid > tree_root->free_objectid)
++ /*
++ * Don't need to check against BTRFS_LAST_FREE_OBJECTID,
++ * as it will get checked on the next call to
++ * btrfs_get_free_objectid.
++ */
++ tree_root->free_objectid = qgroup->qgroupid + 1;
+ }
+ ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+ if (ret < 0)
--- /dev/null
+From 1fa7603d569b9e738e9581937ba8725cd7d39b48 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Thu, 2 May 2024 22:45:58 +0200
+Subject: btrfs: qgroup: update rescan message levels and error codes
+
+From: David Sterba <dsterba@suse.com>
+
+commit 1fa7603d569b9e738e9581937ba8725cd7d39b48 upstream.
+
+On filesystems without enabled quotas there's still a warning message in
+the logs when rescan is called. In that case it's not a problem that
+should be reported, rescan can be called unconditionally. Change the
+error code to ENOTCONN which is used for 'quotas not enabled' elsewhere.
+
+Remove message (also a warning) when rescan is called during an ongoing
+rescan, this brings no useful information and the error code is
+sufficient.
+
+Change message levels to debug for now, they can be removed eventually.
+
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3826,14 +3826,14 @@ qgroup_rescan_init(struct btrfs_fs_info
+ /* we're resuming qgroup rescan at mount time */
+ if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
+- btrfs_warn(fs_info,
++ btrfs_debug(fs_info,
+ "qgroup rescan init failed, qgroup rescan is not queued");
+ ret = -EINVAL;
+ } else if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_ON)) {
+- btrfs_warn(fs_info,
++ btrfs_debug(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
+- ret = -EINVAL;
++ ret = -ENOTCONN;
+ }
+
+ if (ret)
+@@ -3844,14 +3844,12 @@ qgroup_rescan_init(struct btrfs_fs_info
+
+ if (init_flags) {
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+- btrfs_warn(fs_info,
+- "qgroup rescan is already in progress");
+ ret = -EINPROGRESS;
+ } else if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_ON)) {
+- btrfs_warn(fs_info,
++ btrfs_debug(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
+- ret = -EINVAL;
++ ret = -ENOTCONN;
+ } else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
+ /* Quota disable is in progress */
+ ret = -EBUSY;
--- /dev/null
+From 440861b1a03c72cc7be4a307e178dcaa6894479b Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 21 May 2024 19:27:31 +0930
+Subject: btrfs: re-introduce 'norecovery' mount option
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 440861b1a03c72cc7be4a307e178dcaa6894479b upstream.
+
+Although 'norecovery' mount option was marked as deprecated for a long
+time and a warning message was printed during the deprecation window,
+it's still actively utilized by several projects that need a safer way
+to mount a btrfs without any writes.
+
+Furthermore this 'norecovery' mount option is supported by other major
+filesystems, which makes it less clear what's our motivation to remove
+it.
+
+Re-introduce the 'norecovery' mount option, and output a message to recommend
+'rescue=nologreplay' option.
+
+Link: https://lore.kernel.org/linux-btrfs/ZkxZT0J-z0GYvfy8@gardel-login/#t
+Link: https://github.com/systemd/systemd/pull/32892
+Link: https://bugzilla.suse.com/show_bug.cgi?id=1222429
+Reported-by: Lennart Poettering <lennart@poettering.net>
+Reported-by: Jiri Slaby <jslaby@suse.com>
+Fixes: a1912f712188 ("btrfs: remove code for inode_cache and recovery mount options")
+CC: stable@vger.kernel.org # 6.8+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -119,6 +119,7 @@ enum {
+ Opt_thread_pool,
+ Opt_treelog,
+ Opt_user_subvol_rm_allowed,
++ Opt_norecovery,
+
+ /* Rescue options */
+ Opt_rescue,
+@@ -245,6 +246,8 @@ static const struct fs_parameter_spec bt
+ __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL),
+ /* Deprecated, with alias rescue=usebackuproot */
+ __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL),
++ /* For compatibility only, alias for "rescue=nologreplay". */
++ fsparam_flag("norecovery", Opt_norecovery),
+
+ /* Debugging options. */
+ fsparam_flag_no("enospc_debug", Opt_enospc_debug),
+@@ -438,6 +441,11 @@ static int btrfs_parse_param(struct fs_c
+ "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
+ btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
+ break;
++ case Opt_norecovery:
++ btrfs_info(NULL,
++"'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'");
++ btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
++ break;
+ case Opt_flushoncommit:
+ if (result.negated)
+ btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT);
--- /dev/null
+From 518549c120e671c4906f77d1802b97e9b23f673a Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Wed, 29 May 2024 18:16:56 -0500
+Subject: cifs: fix creating sockets when using sfu mount options
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 518549c120e671c4906f77d1802b97e9b23f673a upstream.
+
+When running fstest generic/423 with sfu mount option, it
+was being skipped due to inability to create sockets:
+
+ generic/423 [not run] cifs does not support mknod/mkfifo
+
+which can also be easily reproduced with their af_unix tool:
+
+ ./src/af_unix /mnt1/socket-two bind: Operation not permitted
+
+Fix sfu mount option to allow creating and reporting sockets.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/cifspdu.h | 2 +-
+ fs/smb/client/inode.c | 4 ++++
+ fs/smb/client/smb2ops.c | 3 +++
+ 3 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/client/cifspdu.h
++++ b/fs/smb/client/cifspdu.h
+@@ -2574,7 +2574,7 @@ typedef struct {
+
+
+ struct win_dev {
+- unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO*/
++ unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO or LnxSOCK */
+ __le64 major;
+ __le64 minor;
+ } __attribute__((packed));
+--- a/fs/smb/client/inode.c
++++ b/fs/smb/client/inode.c
+@@ -591,6 +591,10 @@ cifs_sfu_type(struct cifs_fattr *fattr,
+ mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
+ fattr->cf_rdev = MKDEV(mjr, mnr);
+ }
++ } else if (memcmp("LnxSOCK", pbuf, 8) == 0) {
++ cifs_dbg(FYI, "Socket\n");
++ fattr->cf_mode |= S_IFSOCK;
++ fattr->cf_dtype = DT_SOCK;
+ } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
+ cifs_dbg(FYI, "Symlink\n");
+ fattr->cf_mode |= S_IFLNK;
+--- a/fs/smb/client/smb2ops.c
++++ b/fs/smb/client/smb2ops.c
+@@ -4996,6 +4996,9 @@ static int __cifs_sfu_make_node(unsigned
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
++ case S_IFSOCK:
++ strscpy(pdev.type, "LnxSOCK");
++ break;
+ case S_IFIFO:
+ strscpy(pdev.type, "LnxFIFO");
+ break;
--- /dev/null
+From 3ec8ebd8a5b782d56347ae884de880af26f93996 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
+Date: Mon, 27 May 2024 16:22:34 +0300
+Subject: EDAC/amd64: Convert PCIBIOS_* return codes to errnos
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+commit 3ec8ebd8a5b782d56347ae884de880af26f93996 upstream.
+
+gpu_get_node_map() uses pci_read_config_dword() that returns PCIBIOS_*
+codes. The return code is then returned all the way into the module
+init function amd64_edac_init() that returns it as is. The module init
+functions, however, should return normal errnos.
+
+Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
+errno before returning it from gpu_get_node_map().
+
+For consistency, convert also the other similar cases which return
+PCIBIOS_* codes even if they do not have any bugs at the moment.
+
+Fixes: 4251566ebc1c ("EDAC/amd64: Cache and use GPU node map")
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240527132236.13875-1-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/edac/amd64_edac.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/edac/amd64_edac.c
++++ b/drivers/edac/amd64_edac.c
+@@ -81,7 +81,7 @@ int __amd64_read_pci_cfg_dword(struct pc
+ amd64_warn("%s: error reading F%dx%03x.\n",
+ func, PCI_FUNC(pdev->devfn), offset);
+
+- return err;
++ return pcibios_err_to_errno(err);
+ }
+
+ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
+@@ -94,7 +94,7 @@ int __amd64_write_pci_cfg_dword(struct p
+ amd64_warn("%s: error writing to F%dx%03x.\n",
+ func, PCI_FUNC(pdev->devfn), offset);
+
+- return err;
++ return pcibios_err_to_errno(err);
+ }
+
+ /*
+@@ -1025,8 +1025,10 @@ static int gpu_get_node_map(struct amd64
+ }
+
+ ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+- if (ret)
++ if (ret) {
++ ret = pcibios_err_to_errno(ret);
+ goto out;
++ }
+
+ gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
+ gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
--- /dev/null
+From f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
+Date: Mon, 27 May 2024 16:22:35 +0300
+Subject: EDAC/igen6: Convert PCIBIOS_* return codes to errnos
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+
+commit f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 upstream.
+
+errcmd_enable_error_reporting() uses pci_{read,write}_config_word()
+that return PCIBIOS_* codes. The return code is then returned all the
+way into the probe function igen6_probe() that returns it as is. The
+probe functions, however, should return normal errnos.
+
+Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
+errno before returning it from errcmd_enable_error_reporting().
+
+Fixes: 10590a9d4f23 ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC")
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240527132236.13875-2-ilpo.jarvinen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/edac/igen6_edac.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/edac/igen6_edac.c
++++ b/drivers/edac/igen6_edac.c
+@@ -800,7 +800,7 @@ static int errcmd_enable_error_reporting
+
+ rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
+ if (rc)
+- return rc;
++ return pcibios_err_to_errno(rc);
+
+ if (enable)
+ errcmd |= ERRCMD_CE | ERRSTS_UE;
+@@ -809,7 +809,7 @@ static int errcmd_enable_error_reporting
+
+ rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
+ if (rc)
+- return rc;
++ return pcibios_err_to_errno(rc);
+
+ return 0;
+ }
--- /dev/null
+From d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 Mon Sep 17 00:00:00 2001
+From: Hao Ge <gehao@kylinos.cn>
+Date: Mon, 13 May 2024 13:33:38 +0800
+Subject: eventfs: Fix a possible null pointer dereference in eventfs_find_events()
+
+From: Hao Ge <gehao@kylinos.cn>
+
+commit d4e9a968738bf66d3bb852dd5588d4c7afd6d7f4 upstream.
+
+In function eventfs_find_events,there is a potential null pointer
+that may be caused by calling update_events_attr which will perform
+some operations on the members of the ei struct when ei is NULL.
+
+Hence,When ei->is_freed is set,return NULL directly.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240513053338.63017-1-hao.ge@linux.dev
+
+Cc: stable@vger.kernel.org
+Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership")
+Signed-off-by: Hao Ge <gehao@kylinos.cn>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -345,10 +345,9 @@ static struct eventfs_inode *eventfs_fin
+ * If the ei is being freed, the ownership of the children
+ * doesn't matter.
+ */
+- if (ei->is_freed) {
+- ei = NULL;
+- break;
+- }
++ if (ei->is_freed)
++ return NULL;
++
+ // Walk upwards until you find the events inode
+ } while (!ei->is_events);
+
--- /dev/null
+From 8898e7f288c47d450a3cf1511c791a03550c0789 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Thu, 23 May 2024 01:14:26 -0400
+Subject: eventfs: Keep the directories from having the same inode number as files
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 8898e7f288c47d450a3cf1511c791a03550c0789 upstream.
+
+The directories require unique inode numbers but all the eventfs files
+have the same inode number. Prevent the directories from having the same
+inode numbers as the files as that can confuse some tooling.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.428826685@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Fixes: 834bf76add3e6 ("eventfs: Save directory inodes in the eventfs_inode structure")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -50,8 +50,12 @@ static struct eventfs_root_inode *get_ro
+ /* Just try to make something consistent and unique */
+ static int eventfs_dir_ino(struct eventfs_inode *ei)
+ {
+- if (!ei->ino)
++ if (!ei->ino) {
+ ei->ino = get_next_ino();
++ /* Must not have the file inode number */
++ if (ei->ino == EVENTFS_FILE_INODE_INO)
++ ei->ino = get_next_ino();
++ }
+
+ return ei->ino;
+ }
--- /dev/null
+From f06d1b10cb016d5aaecdb1804fefca025387bd10 Mon Sep 17 00:00:00 2001
+From: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Date: Thu, 25 Apr 2024 16:24:29 -0400
+Subject: NFS: Fix READ_PLUS when server doesn't support OP_READ_PLUS
+
+From: Anna Schumaker <Anna.Schumaker@Netapp.com>
+
+commit f06d1b10cb016d5aaecdb1804fefca025387bd10 upstream.
+
+Olga showed me a case where the client was sending multiple READ_PLUS
+calls to the server in parallel, and the server replied
+NFS4ERR_OPNOTSUPP to each. The client would fall back to READ for the
+first reply, but fail to retry the other calls.
+
+I fix this by removing the test for NFS_CAP_READ_PLUS in
+nfs4_read_plus_not_supported(). This allows us to reschedule any
+READ_PLUS call that has a NFS4ERR_OPNOTSUPP return value, even after the
+capability has been cleared.
+
+Reported-by: Olga Kornievskaia <kolga@netapp.com>
+Fixes: c567552612ec ("NFS: Add READ_PLUS data segment support")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/nfs4proc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -5456,7 +5456,7 @@ static bool nfs4_read_plus_not_supported
+ struct rpc_message *msg = &task->tk_msg;
+
+ if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
+- server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
++ task->tk_status == -ENOTSUPP) {
+ server->caps &= ~NFS_CAP_READ_PLUS;
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ rpc_restart_call_prepare(task);
--- /dev/null
+From 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 Mon Sep 17 00:00:00 2001
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+Date: Fri, 10 May 2024 23:24:04 +0300
+Subject: nfs: fix undefined behavior in nfs_block_bits()
+
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+
+commit 3c0a2e0b0ae661457c8505fecc7be5501aa7a715 upstream.
+
+Shifting *signed int* typed constant 1 left by 31 bits causes undefined
+behavior. Specify the correct *unsigned long* type by using 1UL instead.
+
+Found by Linux Verification Center (linuxtesting.org) with the Svace static
+analysis tool.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/internal.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -710,9 +710,9 @@ unsigned long nfs_block_bits(unsigned lo
+ if ((bsize & (bsize - 1)) || nrbitsp) {
+ unsigned char nrbits;
+
+- for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
++ for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--)
+ ;
+- bsize = 1 << nrbits;
++ bsize = 1UL << nrbits;
+ if (nrbitsp)
+ *nrbitsp = nrbits;
+ }
--- /dev/null
+From 7373a51e7998b508af7136530f3a997b286ce81c Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 4 Jun 2024 22:42:55 +0900
+Subject: nilfs2: fix nilfs_empty_dir() misjudgment and long loop on I/O errors
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 7373a51e7998b508af7136530f3a997b286ce81c upstream.
+
+The error handling in nilfs_empty_dir() when a directory folio/page read
+fails is incorrect, as in the old ext2 implementation, and if the
+folio/page cannot be read or nilfs_check_folio() fails, it will falsely
+determine the directory as empty and corrupt the file system.
+
+In addition, since nilfs_empty_dir() does not immediately return on a
+failed folio/page read, but continues to loop, this can cause a long loop
+with I/O if i_size of the directory's inode is also corrupted, causing the
+log writer thread to wait and hang, as reported by syzbot.
+
+Fix these issues by making nilfs_empty_dir() immediately return a false
+value (0) if it fails to get a directory folio/page.
+
+Link: https://lkml.kernel.org/r/20240604134255.7165-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+c8166c541d3971bf6c87@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=c8166c541d3971bf6c87
+Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nilfs2/dir.c
++++ b/fs/nilfs2/dir.c
+@@ -608,7 +608,7 @@ int nilfs_empty_dir(struct inode *inode)
+
+ kaddr = nilfs_get_folio(inode, i, &folio);
+ if (IS_ERR(kaddr))
+- continue;
++ return 0;
+
+ de = (struct nilfs_dir_entry *)kaddr;
+ kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
--- /dev/null
+From a4ca369ca221bb7e06c725792ac107f0e48e82e7 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Thu, 30 May 2024 23:15:56 +0900
+Subject: nilfs2: fix potential kernel bug due to lack of writeback flag waiting
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit a4ca369ca221bb7e06c725792ac107f0e48e82e7 upstream.
+
+Destructive writes to a block device on which nilfs2 is mounted can cause
+a kernel bug in the folio/page writeback start routine or writeback end
+routine (__folio_start_writeback in the log below):
+
+ kernel BUG at mm/page-writeback.c:3070!
+ Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
+ ...
+ RIP: 0010:__folio_start_writeback+0xbaa/0x10e0
+ Code: 25 ff 0f 00 00 0f 84 18 01 00 00 e8 40 ca c6 ff e9 17 f6 ff ff
+ e8 36 ca c6 ff 4c 89 f7 48 c7 c6 80 c0 12 84 e8 e7 b3 0f 00 90 <0f>
+ 0b e8 1f ca c6 ff 4c 89 f7 48 c7 c6 a0 c6 12 84 e8 d0 b3 0f 00
+ ...
+ Call Trace:
+ <TASK>
+ nilfs_segctor_do_construct+0x4654/0x69d0 [nilfs2]
+ nilfs_segctor_construct+0x181/0x6b0 [nilfs2]
+ nilfs_segctor_thread+0x548/0x11c0 [nilfs2]
+ kthread+0x2f0/0x390
+ ret_from_fork+0x4b/0x80
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+This is because when the log writer starts a writeback for segment summary
+blocks or a super root block that use the backing device's page cache, it
+does not wait for the ongoing folio/page writeback, resulting in an
+inconsistent writeback state.
+
+Fix this issue by waiting for ongoing writebacks when putting
+folios/pages on the backing device into writeback state.
+
+Link: https://lkml.kernel.org/r/20240530141556.4411-1-konishi.ryusuke@gmail.com
+Fixes: 9ff05123e3bf ("nilfs2: segment constructor")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -1652,6 +1652,7 @@ static void nilfs_segctor_prepare_write(
+ if (bh->b_folio != bd_folio) {
+ if (bd_folio) {
+ folio_lock(bd_folio);
++ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+@@ -1665,6 +1666,7 @@ static void nilfs_segctor_prepare_write(
+ if (bh == segbuf->sb_super_root) {
+ if (bh->b_folio != bd_folio) {
+ folio_lock(bd_folio);
++ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+@@ -1681,6 +1683,7 @@ static void nilfs_segctor_prepare_write(
+ }
+ if (bd_folio) {
+ folio_lock(bd_folio);
++ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
--- /dev/null
+From 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d Mon Sep 17 00:00:00 2001
+From: Hari Bathini <hbathini@linux.ibm.com>
+Date: Thu, 2 May 2024 23:02:04 +0530
+Subject: powerpc/64/bpf: fix tail calls for PCREL addressing
+
+From: Hari Bathini <hbathini@linux.ibm.com>
+
+commit 2ecfe59cd7de1f202e9af2516a61fbbf93d0bd4d upstream.
+
+With PCREL addressing, there is no kernel TOC. So, it is not setup in
+prologue when PCREL addressing is used. But the number of instructions
+to skip on a tail call was not adjusted accordingly. That resulted in
+not so obvious failures while using tailcalls. 'tailcalls' selftest
+crashed the system with the below call trace:
+
+ bpf_test_run+0xe8/0x3cc (unreliable)
+ bpf_prog_test_run_skb+0x348/0x778
+ __sys_bpf+0xb04/0x2b00
+ sys_bpf+0x28/0x38
+ system_call_exception+0x168/0x340
+ system_call_vectored_common+0x15c/0x2ec
+
+Also, as bpf programs are always module addresses and a bpf helper in
+general is a core kernel text address, using PC relative addressing
+often fails with "out of range of pcrel address" error. Switch to
+using kernel base for relative addressing to handle this better.
+
+Fixes: 7e3a68be42e1 ("powerpc/64: vmlinux support building with PCREL addresing")
+Cc: stable@vger.kernel.org # v6.4+
+Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240502173205.142794-1-hbathini@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp64.c | 30 ++++++++++++++++--------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -202,7 +202,8 @@ void bpf_jit_build_epilogue(u32 *image,
+ EMIT(PPC_RAW_BLR());
+ }
+
+-static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func)
++static int
++bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
+ {
+ unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
+ long reladdr;
+@@ -211,19 +212,20 @@ static int bpf_jit_emit_func_call_hlp(u3
+ return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+- reladdr = func_addr - CTX_NIA(ctx);
++ reladdr = func_addr - local_paca->kernelbase;
+
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+- pr_err("eBPF: address of %ps out of range of pcrel address.\n",
+- (void *)func);
++ pr_err("eBPF: address of %ps out of range of 34-bit relative address.\n",
++ (void *)func);
+ return -ERANGE;
+ }
+- /* pla r12,addr */
+- EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+- EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+- EMIT(PPC_RAW_MTCTR(_R12));
+- EMIT(PPC_RAW_BCTR());
+-
++ EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
++ /* Align for subsequent prefix instruction */
++ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
++ EMIT(PPC_RAW_NOP());
++ /* paddi r12,r12,addr */
++ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr));
++ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr));
+ } else {
+ reladdr = func_addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+@@ -233,9 +235,9 @@ static int bpf_jit_emit_func_call_hlp(u3
+
+ EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
+ EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
+- EMIT(PPC_RAW_MTCTR(_R12));
+- EMIT(PPC_RAW_BCTRL());
+ }
++ EMIT(PPC_RAW_MTCTR(_R12));
++ EMIT(PPC_RAW_BCTRL());
+
+ return 0;
+ }
+@@ -285,7 +287,7 @@ static int bpf_jit_emit_tail_call(u32 *i
+ int b2p_index = bpf_to_ppc(BPF_REG_3);
+ int bpf_tailcall_prologue_size = 8;
+
+- if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
++ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ bpf_tailcall_prologue_size += 4; /* skip past the toc load */
+
+ /*
+@@ -993,7 +995,7 @@ emit_clear:
+ return ret;
+
+ if (func_addr_fixed)
+- ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
++ ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr);
+ else
+ ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
+
--- /dev/null
+From b1e7cee96127468c2483cf10c2899c9b5cf79bf8 Mon Sep 17 00:00:00 2001
+From: Puranjay Mohan <puranjay@kernel.org>
+Date: Mon, 13 May 2024 10:02:48 +0000
+Subject: powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH
+
+From: Puranjay Mohan <puranjay@kernel.org>
+
+commit b1e7cee96127468c2483cf10c2899c9b5cf79bf8 upstream.
+
+The Linux Kernel Memory Model [1][2] requires RMW operations that have a
+return value to be fully ordered.
+
+BPF atomic operations with BPF_FETCH (including BPF_XCHG and
+BPF_CMPXCHG) return a value back so they need to be JITed to fully
+ordered operations. POWERPC currently emits relaxed operations for
+these.
+
+We can show this by running the following litmus-test:
+
+ PPC SB+atomic_add+fetch
+
+ {
+ 0:r0=x; (* dst reg assuming offset is 0 *)
+ 0:r1=2; (* src reg *)
+ 0:r2=1;
+ 0:r4=y; (* P0 writes to this, P1 reads this *)
+ 0:r5=z; (* P1 writes to this, P0 reads this *)
+ 0:r6=0;
+
+ 1:r2=1;
+ 1:r4=y;
+ 1:r5=z;
+ }
+
+ P0 | P1 ;
+ stw r2, 0(r4) | stw r2,0(r5) ;
+ | ;
+ loop:lwarx r3, r6, r0 | ;
+ mr r8, r3 | ;
+ add r3, r3, r1 | sync ;
+ stwcx. r3, r6, r0 | ;
+ bne loop | ;
+ mr r1, r8 | ;
+ | ;
+ lwa r7, 0(r5) | lwa r7,0(r4) ;
+
+ ~exists(0:r7=0 /\ 1:r7=0)
+
+ Witnesses
+ Positive: 9 Negative: 3
+ Condition ~exists (0:r7=0 /\ 1:r7=0)
+ Observation SB+atomic_add+fetch Sometimes 3 9
+
+This test shows that the older store in P0 is reordered with a newer
+load to a different address. Although there is a RMW operation with
+fetch between them. Adding a sync before and after RMW fixes the issue:
+
+ Witnesses
+ Positive: 9 Negative: 0
+ Condition ~exists (0:r7=0 /\ 1:r7=0)
+ Observation SB+atomic_add+fetch Never 0 9
+
+[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
+[2] https://www.kernel.org/doc/Documentation/atomic_t.txt
+
+Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations")
+Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg")
+Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations")
+Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg")
+Cc: stable@vger.kernel.org # v6.0+
+Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
+Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Naveen N Rao <naveen@kernel.org>
+Acked-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240513100248.110535-1-puranjay@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
+ arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/powerpc/net/bpf_jit_comp32.c
++++ b/arch/powerpc/net/bpf_jit_comp32.c
+@@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *
+
+ /* Get offset into TMP_REG */
+ EMIT(PPC_RAW_LI(tmp_reg, off));
++ /*
++ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
++ * before and after the operation.
++ *
++ * This is a requirement in the Linux Kernel Memory Model.
++ * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
++ */
++ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
++ EMIT(PPC_RAW_SYNC());
+ tmp_idx = ctx->idx * 4;
+ /* load value from memory into r0 */
+ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
+@@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *
+
+ /* For the BPF_FETCH variant, get old data into src_reg */
+ if (imm & BPF_FETCH) {
++ /* Emit 'sync' to enforce full ordering */
++ if (IS_ENABLED(CONFIG_SMP))
++ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_MR(ret_reg, ax_reg));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -805,6 +805,15 @@ emit_clear:
+
+ /* Get offset into TMP_REG_1 */
+ EMIT(PPC_RAW_LI(tmp1_reg, off));
++ /*
++ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
++ * before and after the operation.
++ *
++ * This is a requirement in the Linux Kernel Memory Model.
++ * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
++ */
++ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
++ EMIT(PPC_RAW_SYNC());
+ tmp_idx = ctx->idx * 4;
+ /* load value from memory into TMP_REG_2 */
+ if (size == BPF_DW)
+@@ -867,6 +876,9 @@ emit_clear:
+ PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+ if (imm & BPF_FETCH) {
++ /* Emit 'sync' to enforce full ordering */
++ if (IS_ENABLED(CONFIG_SMP))
++ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_MR(ret_reg, _R0));
+ /*
+ * Skip unnecessary zero-extension for 32-bit cmpxchg.
asoc-sof-ipc4-topology-fix-input-format-query-of-process-modules-without-base-extension.patch
alsa-ump-don-t-clear-bank-selection-after-sending-a-program-change.patch
alsa-ump-don-t-accept-an-invalid-ump-protocol-number.patch
+edac-amd64-convert-pcibios_-return-codes-to-errnos.patch
+edac-igen6-convert-pcibios_-return-codes-to-errnos.patch
+cifs-fix-creating-sockets-when-using-sfu-mount-options.patch
+nfs-fix-undefined-behavior-in-nfs_block_bits.patch
+nfs-fix-read_plus-when-server-doesn-t-support-op_read_plus.patch
+eventfs-fix-a-possible-null-pointer-dereference-in-eventfs_find_events.patch
+eventfs-keep-the-directories-from-having-the-same-inode-number-as-files.patch
+tracefs-clear-event_inode-flag-in-tracefs_drop_inode.patch
+btrfs-qgroup-update-rescan-message-levels-and-error-codes.patch
+btrfs-qgroup-fix-qgroup-id-collision-across-mounts.patch
+btrfs-protect-folio-private-when-attaching-extent-buffer-folios.patch
+btrfs-fix-crash-on-racing-fsync-and-size-extending-write-into-prealloc.patch
+btrfs-fix-leak-of-qgroup-extent-records-after-transaction-abort.patch
+btrfs-re-introduce-norecovery-mount-option.patch
+alsa-seq-fix-incorrect-ump-type-for-system-messages.patch
+bpf-fix-multi-uprobe-pid-filtering-logic.patch
+powerpc-64-bpf-fix-tail-calls-for-pcrel-addressing.patch
+powerpc-bpf-enforce-full-ordering-for-atomic-operations-with-bpf_fetch.patch
+nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
+nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch
+smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch
--- /dev/null
+From 02c418774f76a0a36a6195c9dbf8971eb4130a15 Mon Sep 17 00:00:00 2001
+From: Enzo Matsumiya <ematsumiya@suse.de>
+Date: Thu, 6 Jun 2024 13:13:13 -0300
+Subject: smb: client: fix deadlock in smb2_find_smb_tcon()
+
+From: Enzo Matsumiya <ematsumiya@suse.de>
+
+commit 02c418774f76a0a36a6195c9dbf8971eb4130a15 upstream.
+
+Unlock cifs_tcp_ses_lock before calling cifs_put_smb_ses() to avoid such
+deadlock.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
+Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/client/smb2transport.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/smb/client/smb2transport.c
++++ b/fs/smb/client/smb2transport.c
+@@ -216,8 +216,8 @@ smb2_find_smb_tcon(struct TCP_Server_Inf
+ }
+ tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+ if (!tcon) {
+- cifs_put_smb_ses(ses);
+ spin_unlock(&cifs_tcp_ses_lock);
++ cifs_put_smb_ses(ses);
+ return NULL;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
--- /dev/null
+From 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Thu, 23 May 2024 01:14:29 -0400
+Subject: tracefs: Clear EVENT_INODE flag in tracefs_drop_inode()
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 0bcfd9aa4dafa03b88d68bf66b694df2a3e76cf3 upstream.
+
+When the inode is being dropped from the dentry, the TRACEFS_EVENT_INODE
+flag needs to be cleared to prevent a remount from calling
+eventfs_remount() on the tracefs_inode private data. There's a race
+between the inode is dropped (and the dentry freed) to where the inode is
+actually freed. If a remount happens between the two, the eventfs_inode
+could be accessed after it is freed (only the dentry keeps a ref count on
+it).
+
+Currently the TRACEFS_EVENT_INODE flag is cleared from the dentry iput()
+function. But this is incorrect, as it is possible that the inode has
+another reference to it. The flag should only be cleared when the inode is
+really being dropped and has no more references. That happens in the
+drop_inode callback of the inode, as that gets called when the last
+reference of the inode is released.
+
+Remove the tracefs_d_iput() function and move its logic to the more
+appropriate tracefs_drop_inode() callback function.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.908205106@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Fixes: baa23a8d4360d ("tracefs: Reset permissions on remount if permissions are options")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c | 33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -439,10 +439,26 @@ static int tracefs_show_options(struct s
+ return 0;
+ }
+
++static int tracefs_drop_inode(struct inode *inode)
++{
++ struct tracefs_inode *ti = get_tracefs(inode);
++
++ /*
++ * This inode is being freed and cannot be used for
++ * eventfs. Clear the flag so that it doesn't call into
++ * eventfs during the remount flag updates. The eventfs_inode
++ * gets freed after an RCU cycle, so the content will still
++ * be safe if the iteration is going on now.
++ */
++ ti->flags &= ~TRACEFS_EVENT_INODE;
++
++ return 1;
++}
++
+ static const struct super_operations tracefs_super_operations = {
+ .alloc_inode = tracefs_alloc_inode,
+ .free_inode = tracefs_free_inode,
+- .drop_inode = generic_delete_inode,
++ .drop_inode = tracefs_drop_inode,
+ .statfs = simple_statfs,
+ .remount_fs = tracefs_remount,
+ .show_options = tracefs_show_options,
+@@ -469,22 +485,7 @@ static int tracefs_d_revalidate(struct d
+ return !(ei && ei->is_freed);
+ }
+
+-static void tracefs_d_iput(struct dentry *dentry, struct inode *inode)
+-{
+- struct tracefs_inode *ti = get_tracefs(inode);
+-
+- /*
+- * This inode is being freed and cannot be used for
+- * eventfs. Clear the flag so that it doesn't call into
+- * eventfs during the remount flag updates. The eventfs_inode
+- * gets freed after an RCU cycle, so the content will still
+- * be safe if the iteration is going on now.
+- */
+- ti->flags &= ~TRACEFS_EVENT_INODE;
+-}
+-
+ static const struct dentry_operations tracefs_dentry_operations = {
+- .d_iput = tracefs_d_iput,
+ .d_revalidate = tracefs_d_revalidate,
+ .d_release = tracefs_d_release,
+ };