]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Mon, 19 Aug 2024 14:19:31 +0000 (10:19 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 19 Aug 2024 14:19:31 +0000 (10:19 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
82 files changed:
queue-6.1/9p-fs-fix-wild-memory-access-write-in-v9fs_get_acl.patch [new file with mode: 0644]
queue-6.1/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch [new file with mode: 0644]
queue-6.1/alsa-usb-fix-ubsan-warning-in-parse_audio_unit.patch [new file with mode: 0644]
queue-6.1/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch [new file with mode: 0644]
queue-6.1/bluetooth-fix-hci_link_tx_to-rcu-lock-usage.patch [new file with mode: 0644]
queue-6.1/bluetooth-rfcomm-fix-not-validating-setsockopt-user-.patch [new file with mode: 0644]
queue-6.1/bpf-avoid-kfree_rcu-under-lock-in-bpf_lpm_trie.patch [new file with mode: 0644]
queue-6.1/bpf-drop-unnecessary-user-triggerable-warn_once-in-v.patch [new file with mode: 0644]
queue-6.1/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch [new file with mode: 0644]
queue-6.1/bpf-net-use-dev_stat_inc.patch [new file with mode: 0644]
queue-6.1/bpf-replace-bpf_lpm_trie_key-0-length-array-with-fle.patch [new file with mode: 0644]
queue-6.1/bpf-split-off-basic-bpf-verifier-log-into-separate-f.patch [new file with mode: 0644]
queue-6.1/docs-bpf-document-bpf_map_type_lpm_trie-map.patch [new file with mode: 0644]
queue-6.1/erofs-avoid-debugging-output-for-de-compressed-data.patch [new file with mode: 0644]
queue-6.1/ext4-check-the-return-value-of-ext4_xattr_inode_dec_.patch [new file with mode: 0644]
queue-6.1/ext4-do-not-create-ea-inode-under-buffer-lock.patch [new file with mode: 0644]
queue-6.1/ext4-fold-quota-accounting-into-ext4_xattr_inode_loo.patch [new file with mode: 0644]
queue-6.1/ext4-jbd2-add-an-optimized-bmap-for-the-journal-inod.patch [new file with mode: 0644]
queue-6.1/fou-remove-warn-in-gue_gro_receive-on-unsupported-pr.patch [new file with mode: 0644]
queue-6.1/fs-ntfs3-do-copy_to_user-out-of-run_lock.patch [new file with mode: 0644]
queue-6.1/gfs2-rename-gfs2_freeze_lock-_shared.patch [new file with mode: 0644]
queue-6.1/gfs2-rename-remaining-transaction-glock-references.patch [new file with mode: 0644]
queue-6.1/gfs2-rename-sdf_-fs_frozen-freeze_initiator.patch [new file with mode: 0644]
queue-6.1/gfs2-rename-the-freeze-thaw-_super-callbacks.patch [new file with mode: 0644]
queue-6.1/gfs2-rework-freeze-thaw-logic.patch [new file with mode: 0644]
queue-6.1/gfs2-stop-using-gfs2_make_fs_ro-for-withdraw.patch [new file with mode: 0644]
queue-6.1/igc-correct-the-launchtime-offset.patch [new file with mode: 0644]
queue-6.1/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch [new file with mode: 0644]
queue-6.1/input-bcm5974-check-endpoint-type-before-starting-tr.patch [new file with mode: 0644]
queue-6.1/jfs-fix-null-ptr-deref-in-dtinsertentry.patch [new file with mode: 0644]
queue-6.1/jfs-fix-shift-out-of-bounds-in-dbdiscardag.patch [new file with mode: 0644]
queue-6.1/jfs-fix-shift-out-of-bounds-in-dbjoin.patch [new file with mode: 0644]
queue-6.1/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch [new file with mode: 0644]
queue-6.1/mlxbf_gige-remove-two-unused-function-declarations.patch [new file with mode: 0644]
queue-6.1/mm-khugepaged-fix-kernel-bug-in-hpage_collapse_scan_.patch [new file with mode: 0644]
queue-6.1/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch [new file with mode: 0644]
queue-6.1/net-axienet-fix-register-defines-comment-description.patch [new file with mode: 0644]
queue-6.1/net-don-t-dump-stack-on-queue-timeout.patch [new file with mode: 0644]
queue-6.1/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch [new file with mode: 0644]
queue-6.1/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch [new file with mode: 0644]
queue-6.1/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch [new file with mode: 0644]
queue-6.1/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch [new file with mode: 0644]
queue-6.1/net-hns3-fix-wrong-use-of-semaphore-up.patch [new file with mode: 0644]
queue-6.1/net-hns3-use-the-user-s-cfg-after-reset.patch [new file with mode: 0644]
queue-6.1/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch [new file with mode: 0644]
queue-6.1/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch [new file with mode: 0644]
queue-6.1/net-rds-fix-possible-deadlock-in-rds_message_put.patch [new file with mode: 0644]
queue-6.1/net-sched-print-msecs-when-transmit-queue-time-out.patch [new file with mode: 0644]
queue-6.1/net-sctp-fix-skb-leak-in-sctp_inq_free.patch [new file with mode: 0644]
queue-6.1/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch [new file with mode: 0644]
queue-6.1/netfilter-flowtable-initialise-extack-before-use.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch [new file with mode: 0644]
queue-6.1/nilfs2-initialize-struct-nilfs_binfo_dat-bi_pad-fiel.patch [new file with mode: 0644]
queue-6.1/nilfs2-prevent-warning-in-nilfs_dat_commit_end.patch [new file with mode: 0644]
queue-6.1/pid-replace-struct-pid-1-element-array-with-flex-arr.patch [new file with mode: 0644]
queue-6.1/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch [new file with mode: 0644]
queue-6.1/pppoe-fix-memory-leak-in-pppoe_sendmsg.patch [new file with mode: 0644]
queue-6.1/quota-detect-loops-in-quota-tree.patch [new file with mode: 0644]
queue-6.1/reiserfs-fix-uninit-value-in-comp_keys.patch [new file with mode: 0644]
queue-6.1/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/squashfs-fix-variable-overflow-triggered-by-sysbot.patch [new file with mode: 0644]
queue-6.1/squashfs-squashfs_read_data-need-to-check-if-the-len.patch [new file with mode: 0644]
queue-6.1/udf-fix-bogus-checksum-computation-in-udf_rename.patch [new file with mode: 0644]
queue-6.1/wifi-cfg80211-check-a-msdu-format-more-carefully.patch [new file with mode: 0644]
queue-6.1/wifi-cfg80211-factor-out-bridge-tunnel-rfc1042-heade.patch [new file with mode: 0644]
queue-6.1/wifi-cfg80211-move-a-msdu-check-in-ieee80211_data_to.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-add-a-workaround-for-receiving-non-sta.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-fix-and-simplify-unencrypted-drop-chec.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-fix-change_address-deadlock-during-unr.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-fix-receiving-a-msdu-frames-on-mesh-in.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-remove-mesh-forwarding-congestion-chec.patch [new file with mode: 0644]
queue-6.1/wifi-mac80211-take-wiphy-lock-for-mac-addr-change.patch [new file with mode: 0644]

diff --git a/queue-6.1/9p-fs-fix-wild-memory-access-write-in-v9fs_get_acl.patch b/queue-6.1/9p-fs-fix-wild-memory-access-write-in-v9fs_get_acl.patch
new file mode 100644 (file)
index 0000000..0a14a9c
--- /dev/null
@@ -0,0 +1,104 @@
+From a819f9ec1f2ed9a1022753929816295635cb6648 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 11 Mar 2023 16:50:25 +0400
+Subject: 9P FS: Fix wild-memory-access write in v9fs_get_acl
+
+From: Ivan Orlov <ivan.orlov0322@gmail.com>
+
+[ Upstream commit 707823e7f22f3864ddc7d85e8e9b614afe4f1b16 ]
+
+KASAN reported the following issue:
+[   36.825817][ T5923] BUG: KASAN: wild-memory-access in v9fs_get_acl+0x1a4/0x390
+[   36.827479][ T5923] Write of size 4 at addr 9fffeb37f97f1c00 by task syz-executor798/5923
+[   36.829303][ T5923]
+[   36.829846][ T5923] CPU: 0 PID: 5923 Comm: syz-executor798 Not tainted 6.2.0-syzkaller-18302-g596b6b709632 #0
+[   36.832110][ T5923] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
+[   36.834464][ T5923] Call trace:
+[   36.835196][ T5923]  dump_backtrace+0x1c8/0x1f4
+[   36.836229][ T5923]  show_stack+0x2c/0x3c
+[   36.837100][ T5923]  dump_stack_lvl+0xd0/0x124
+[   36.838103][ T5923]  print_report+0xe4/0x4c0
+[   36.839068][ T5923]  kasan_report+0xd4/0x130
+[   36.840052][ T5923]  kasan_check_range+0x264/0x2a4
+[   36.841199][ T5923]  __kasan_check_write+0x2c/0x3c
+[   36.842216][ T5923]  v9fs_get_acl+0x1a4/0x390
+[   36.843232][ T5923]  v9fs_mount+0x77c/0xa5c
+[   36.844163][ T5923]  legacy_get_tree+0xd4/0x16c
+[   36.845173][ T5923]  vfs_get_tree+0x90/0x274
+[   36.846137][ T5923]  do_new_mount+0x25c/0x8c8
+[   36.847066][ T5923]  path_mount+0x590/0xe58
+[   36.848147][ T5923]  __arm64_sys_mount+0x45c/0x594
+[   36.849273][ T5923]  invoke_syscall+0x98/0x2c0
+[   36.850421][ T5923]  el0_svc_common+0x138/0x258
+[   36.851397][ T5923]  do_el0_svc+0x64/0x198
+[   36.852398][ T5923]  el0_svc+0x58/0x168
+[   36.853224][ T5923]  el0t_64_sync_handler+0x84/0xf0
+[   36.854293][ T5923]  el0t_64_sync+0x190/0x194
+
+Calling '__v9fs_get_acl' method in 'v9fs_get_acl' creates the
+following chain of function calls:
+
+__v9fs_get_acl
+       v9fs_fid_get_acl
+               v9fs_fid_xattr_get
+                       p9_client_xattrwalk
+
+Function p9_client_xattrwalk accepts a pointer to u64-typed
+variable attr_size and puts some u64 value into it. However,
+after the executing the p9_client_xattrwalk, in some circumstances
+we assign the value of u64-typed variable 'attr_size' to the
+variable 'retval', which we will return. However, the type of
+'retval' is ssize_t, and if the value of attr_size is larger
+than SSIZE_MAX, we will face the signed type overflow. If the
+overflow occurs, the result of v9fs_fid_xattr_get may be
+negative, but not classified as an error. When we try to allocate
+an acl with 'broken' size we receive an error, but don't process
+it. When we try to free this acl, we face the 'wild-memory-access'
+error (because it wasn't allocated).
+
+This patch will add new condition to the 'v9fs_fid_xattr_get'
+function, so it will return an EOVERFLOW error if the 'attr_size'
+is larger than SSIZE_MAX.
+
+In this version of the patch I simplified the condition.
+
+In previous (v2) version of the patch I removed explicit type conversion
+and added separate condition to check the possible overflow and return
+an error (in v1 version I've just modified the existing condition).
+
+Tested via syzkaller.
+
+Suggested-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Reported-by: syzbot+cb1d16facb3cc90de5fb@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=fbbef66d9e4d096242f3617de5d14d12705b4659
+Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Eric Van Hensbergen <ericvh@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/9p/xattr.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
+index 3b9aa61de8c2d..2aac0e8c4835e 100644
+--- a/fs/9p/xattr.c
++++ b/fs/9p/xattr.c
+@@ -34,10 +34,12 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
+               return retval;
+       }
+       if (attr_size > buffer_size) {
+-              if (!buffer_size) /* request to get the attr_size */
+-                      retval = attr_size;
+-              else
++              if (buffer_size)
+                       retval = -ERANGE;
++              else if (attr_size > SSIZE_MAX)
++                      retval = -EOVERFLOW;
++              else /* request to get the attr_size */
++                      retval = attr_size;
+       } else {
+               iov_iter_truncate(&to, attr_size);
+               retval = p9_client_read(attr_fid, 0, &to, &err);
+-- 
+2.43.0
+
diff --git a/queue-6.1/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch b/queue-6.1/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch
new file mode 100644 (file)
index 0000000..a5c8e86
--- /dev/null
@@ -0,0 +1,39 @@
+From 2bcecff69c9856e84eb0aa637ba048e4a411d4cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Aug 2024 18:39:06 +0330
+Subject: ALSA: hda/realtek: Fix noise from speakers on Lenovo IdeaPad 3 15IAU7
+
+From: Parsa Poorshikhian <parsa.poorsh@gmail.com>
+
+[ Upstream commit ef9718b3d54e822de294351251f3a574f8a082ce ]
+
+Fix noise from speakers connected to AUX port when no sound is playing.
+The problem occurs because the `alc_shutup_pins` function includes
+a 0x10ec0257 vendor ID, which causes noise on Lenovo IdeaPad 3 15IAU7 with
+Realtek ALC257 codec when no sound is playing.
+Removing this vendor ID from the function fixes the bug.
+
+Fixes: 70794b9563fe ("ALSA: hda/realtek: Add more codec ID to no shutup pins list")
+Signed-off-by: Parsa Poorshikhian <parsa.poorsh@gmail.com>
+Link: https://patch.msgid.link/20240810150939.330693-1-parsa.poorsh@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 93d65a1acc475..b942ed868070d 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -583,7 +583,6 @@ static void alc_shutup_pins(struct hda_codec *codec)
+       switch (codec->core.vendor_id) {
+       case 0x10ec0236:
+       case 0x10ec0256:
+-      case 0x10ec0257:
+       case 0x19e58326:
+       case 0x10ec0283:
+       case 0x10ec0285:
+-- 
+2.43.0
+
diff --git a/queue-6.1/alsa-usb-fix-ubsan-warning-in-parse_audio_unit.patch b/queue-6.1/alsa-usb-fix-ubsan-warning-in-parse_audio_unit.patch
new file mode 100644 (file)
index 0000000..dfab040
--- /dev/null
@@ -0,0 +1,47 @@
+From c64b7784ad9294a6c683f442cbabe1b3216b8004 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Jul 2024 14:35:54 +0200
+Subject: ALSA: usb: Fix UBSAN warning in parse_audio_unit()
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 2f38cf730caedaeacdefb7ff35b0a3c1168117f9 ]
+
+A malformed USB descriptor may pass the lengthy mixer description with
+a lot of channels, and this may overflow the 32bit integer shift
+size, as caught by syzbot UBSAN test.  Although this won't cause any
+real trouble, it's better to address.
+
+This patch introduces a sanity check of the number of channels to bail
+out the parsing when too many channels are found.
+
+Reported-by: syzbot+78d5b129a762182225aa@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/0000000000000adac5061d3c7355@google.com
+Link: https://patch.msgid.link/20240715123619.26612-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/mixer.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
+index 5699a62d17679..34ded71cb8077 100644
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -2023,6 +2023,13 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
+               bmaControls = ftr->bmaControls;
+       }
++      if (channels > 32) {
++              usb_audio_info(state->chip,
++                             "usbmixer: too many channels (%d) in unit %d\n",
++                             channels, unitid);
++              return -EINVAL;
++      }
++
+       /* parse the source unit */
+       err = parse_audio_unit(state, hdr->bSourceID);
+       if (err < 0)
+-- 
+2.43.0
+
diff --git a/queue-6.1/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch b/queue-6.1/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch
new file mode 100644 (file)
index 0000000..c33aca4
--- /dev/null
@@ -0,0 +1,56 @@
+From 8bfdf9c76a3cd57080eca8c46526f2f2fe7f3819 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:28:19 +0300
+Subject: atm: idt77252: prevent use after free in dequeue_rx()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit a9a18e8f770c9b0703dab93580d0b02e199a4c79 ]
+
+We can't dereference "skb" after calling vcc->push() because the skb
+is released.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/atm/idt77252.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
+index 2daf50d4cd47a..7810f974b2ca9 100644
+--- a/drivers/atm/idt77252.c
++++ b/drivers/atm/idt77252.c
+@@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
+       rpp->len += skb->len;
+       if (stat & SAR_RSQE_EPDU) {
++              unsigned int len, truesize;
+               unsigned char *l1l2;
+-              unsigned int len;
+               l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6);
+@@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
+               ATM_SKB(skb)->vcc = vcc;
+               __net_timestamp(skb);
++              truesize = skb->truesize;
+               vcc->push(vcc, skb);
+               atomic_inc(&vcc->stats->rx);
+-              if (skb->truesize > SAR_FB_SIZE_3)
++              if (truesize > SAR_FB_SIZE_3)
+                       add_rx_skb(card, 3, SAR_FB_SIZE_3, 1);
+-              else if (skb->truesize > SAR_FB_SIZE_2)
++              else if (truesize > SAR_FB_SIZE_2)
+                       add_rx_skb(card, 2, SAR_FB_SIZE_2, 1);
+-              else if (skb->truesize > SAR_FB_SIZE_1)
++              else if (truesize > SAR_FB_SIZE_1)
+                       add_rx_skb(card, 1, SAR_FB_SIZE_1, 1);
+               else
+                       add_rx_skb(card, 0, SAR_FB_SIZE_0, 1);
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-fix-hci_link_tx_to-rcu-lock-usage.patch b/queue-6.1/bluetooth-fix-hci_link_tx_to-rcu-lock-usage.patch
new file mode 100644 (file)
index 0000000..420e30e
--- /dev/null
@@ -0,0 +1,64 @@
+From a5cf52690d06f832e857dbcf2baa2c2dd6827dcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 14:11:51 +0000
+Subject: Bluetooth: Fix hci_link_tx_to RCU lock usage
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit c7eaf80bfb0c8cef852cce9501b95dd5a6bddcb9 ]
+
+Syzbot found a bug "BUG: sleeping function called from invalid context
+at kernel/locking/mutex.c:580". It is because hci_link_tx_to holds an
+RCU read lock and calls hci_disconnect which would hold a mutex lock
+since the commit a13f316e90fd ("Bluetooth: hci_conn: Consolidate code
+for aborting connections"). Here's an example call trace:
+
+   __dump_stack lib/dump_stack.c:88 [inline]
+   dump_stack_lvl+0xfc/0x174 lib/dump_stack.c:106
+   ___might_sleep+0x4a9/0x4d3 kernel/sched/core.c:9663
+   __mutex_lock_common kernel/locking/mutex.c:576 [inline]
+   __mutex_lock+0xc7/0x6e7 kernel/locking/mutex.c:732
+   hci_cmd_sync_queue+0x3a/0x287 net/bluetooth/hci_sync.c:388
+   hci_abort_conn+0x2cd/0x2e4 net/bluetooth/hci_conn.c:1812
+   hci_disconnect+0x207/0x237 net/bluetooth/hci_conn.c:244
+   hci_link_tx_to net/bluetooth/hci_core.c:3254 [inline]
+   __check_timeout net/bluetooth/hci_core.c:3419 [inline]
+   __check_timeout+0x310/0x361 net/bluetooth/hci_core.c:3399
+   hci_sched_le net/bluetooth/hci_core.c:3602 [inline]
+   hci_tx_work+0xe8f/0x12d0 net/bluetooth/hci_core.c:3652
+   process_one_work+0x75c/0xba1 kernel/workqueue.c:2310
+   worker_thread+0x5b2/0x73a kernel/workqueue.c:2457
+   kthread+0x2f7/0x30b kernel/kthread.c:319
+   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298
+
+This patch releases RCU read lock before calling hci_disconnect and
+reacquires it afterward to fix the bug.
+
+Fixes: a13f316e90fd ("Bluetooth: hci_conn: Consolidate code for aborting connections")
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_core.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index 398a324657697..cf164ec9899c3 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -3419,7 +3419,12 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
+               if (c->type == type && c->sent) {
+                       bt_dev_err(hdev, "killing stalled connection %pMR",
+                                  &c->dst);
++                      /* hci_disconnect might sleep, so, we have to release
++                       * the RCU read lock before calling it.
++                       */
++                      rcu_read_unlock();
+                       hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
++                      rcu_read_lock();
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-rfcomm-fix-not-validating-setsockopt-user-.patch b/queue-6.1/bluetooth-rfcomm-fix-not-validating-setsockopt-user-.patch
new file mode 100644 (file)
index 0000000..72196fb
--- /dev/null
@@ -0,0 +1,83 @@
+From 78f7e14a899dc29f97f33558e1be81371b108cda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 15:43:45 -0400
+Subject: Bluetooth: RFCOMM: Fix not validating setsockopt user input
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit a97de7bff13b1cc825c1b1344eaed8d6c2d3e695 ]
+
+syzbot reported rfcomm_sock_setsockopt_old() is copying data without
+checking user input length.
+
+BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset
+include/linux/sockptr.h:49 [inline]
+BUG: KASAN: slab-out-of-bounds in copy_from_sockptr
+include/linux/sockptr.h:55 [inline]
+BUG: KASAN: slab-out-of-bounds in rfcomm_sock_setsockopt_old
+net/bluetooth/rfcomm/sock.c:632 [inline]
+BUG: KASAN: slab-out-of-bounds in rfcomm_sock_setsockopt+0x893/0xa70
+net/bluetooth/rfcomm/sock.c:673
+Read of size 4 at addr ffff8880209a8bc3 by task syz-executor632/5064
+
+Fixes: 9f2c8a03fbb3 ("Bluetooth: Replace RFCOMM link mode with security level")
+Fixes: bb23c0ab8246 ("Bluetooth: Add support for deferring RFCOMM connection setup")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/rfcomm/sock.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index b54e8a530f55a..29aa07e9db9d7 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
+       switch (optname) {
+       case RFCOMM_LM:
+-              if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
++              if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
+                       err = -EFAULT;
+                       break;
+               }
+@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+       struct sock *sk = sock->sk;
+       struct bt_security sec;
+       int err = 0;
+-      size_t len;
+       u32 opt;
+       BT_DBG("sk %p", sk);
+@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+               sec.level = BT_SECURITY_LOW;
+-              len = min_t(unsigned int, sizeof(sec), optlen);
+-              if (copy_from_sockptr(&sec, optval, len)) {
+-                      err = -EFAULT;
++              err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
++              if (err)
+                       break;
+-              }
+               if (sec.level > BT_SECURITY_HIGH) {
+                       err = -EINVAL;
+@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+                       break;
+               }
+-              if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+-                      err = -EFAULT;
++              err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
++              if (err)
+                       break;
+-              }
+               if (opt)
+                       set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-avoid-kfree_rcu-under-lock-in-bpf_lpm_trie.patch b/queue-6.1/bpf-avoid-kfree_rcu-under-lock-in-bpf_lpm_trie.patch
new file mode 100644 (file)
index 0000000..56a9b9c
--- /dev/null
@@ -0,0 +1,102 @@
+From add6ca2987c556241a9e35ae068ca9d41cb9137d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Mar 2024 10:14:39 -0700
+Subject: bpf: Avoid kfree_rcu() under lock in bpf_lpm_trie.
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ Upstream commit 59f2f841179aa6a0899cb9cf53659149a35749b7 ]
+
+syzbot reported the following lock sequence:
+cpu 2:
+  grabs timer_base lock
+    spins on bpf_lpm lock
+
+cpu 1:
+  grab rcu krcp lock
+    spins on timer_base lock
+
+cpu 0:
+  grab bpf_lpm lock
+    spins on rcu krcp lock
+
+bpf_lpm lock can be the same.
+timer_base lock can also be the same due to timer migration.
+but rcu krcp lock is always per-cpu, so it cannot be the same lock.
+Hence it's a false positive.
+To avoid lockdep complaining move kfree_rcu() after spin_unlock.
+
+Reported-by: syzbot+1fa663a2100308ab6eab@syzkaller.appspotmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20240329171439.37813-1-alexei.starovoitov@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/lpm_trie.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
+index b80bffc59e5fb..37b510d91b810 100644
+--- a/kernel/bpf/lpm_trie.c
++++ b/kernel/bpf/lpm_trie.c
+@@ -308,6 +308,7 @@ static int trie_update_elem(struct bpf_map *map,
+ {
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+       struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
++      struct lpm_trie_node *free_node = NULL;
+       struct lpm_trie_node __rcu **slot;
+       struct bpf_lpm_trie_key_u8 *key = _key;
+       unsigned long irq_flags;
+@@ -382,7 +383,7 @@ static int trie_update_elem(struct bpf_map *map,
+                       trie->n_entries--;
+               rcu_assign_pointer(*slot, new_node);
+-              kfree_rcu(node, rcu);
++              free_node = node;
+               goto out;
+       }
+@@ -429,6 +430,7 @@ static int trie_update_elem(struct bpf_map *map,
+       }
+       spin_unlock_irqrestore(&trie->lock, irq_flags);
++      kfree_rcu(free_node, rcu);
+       return ret;
+ }
+@@ -437,6 +439,7 @@ static int trie_update_elem(struct bpf_map *map,
+ static int trie_delete_elem(struct bpf_map *map, void *_key)
+ {
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
++      struct lpm_trie_node *free_node = NULL, *free_parent = NULL;
+       struct bpf_lpm_trie_key_u8 *key = _key;
+       struct lpm_trie_node __rcu **trim, **trim2;
+       struct lpm_trie_node *node, *parent;
+@@ -506,8 +509,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
+               else
+                       rcu_assign_pointer(
+                               *trim2, rcu_access_pointer(parent->child[0]));
+-              kfree_rcu(parent, rcu);
+-              kfree_rcu(node, rcu);
++              free_parent = parent;
++              free_node = node;
+               goto out;
+       }
+@@ -521,10 +524,12 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
+               rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
+       else
+               RCU_INIT_POINTER(*trim, NULL);
+-      kfree_rcu(node, rcu);
++      free_node = node;
+ out:
+       spin_unlock_irqrestore(&trie->lock, irq_flags);
++      kfree_rcu(free_parent, rcu);
++      kfree_rcu(free_node, rcu);
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-drop-unnecessary-user-triggerable-warn_once-in-v.patch b/queue-6.1/bpf-drop-unnecessary-user-triggerable-warn_once-in-v.patch
new file mode 100644 (file)
index 0000000..992c9d5
--- /dev/null
@@ -0,0 +1,47 @@
+From 45d34172f0c414afc52a4055325bde67c8217fb5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 11:04:09 -0700
+Subject: bpf: drop unnecessary user-triggerable WARN_ONCE in verifierl log
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit cff36398bd4c7d322d424433db437f3c3391c491 ]
+
+It's trivial for user to trigger "verifier log line truncated" warning,
+as verifier has a fixed-sized buffer of 1024 bytes (as of now), and there are at
+least two pieces of user-provided information that can be output through
+this buffer, and both can be arbitrarily sized by user:
+  - BTF names;
+  - BTF.ext source code lines strings.
+
+Verifier log buffer should be properly sized for typical verifier state
+output. But it's sort-of expected that this buffer won't be long enough
+in some circumstances. So let's drop the check. In any case code will
+work correctly, at worst truncating a part of a single line output.
+
+Reported-by: syzbot+8b2a08dfbd25fd933d75@syzkaller.appspotmail.com
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20230516180409.3549088-1-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/log.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
+index 920061e38d2e1..cd1b7113fbfd0 100644
+--- a/kernel/bpf/log.c
++++ b/kernel/bpf/log.c
+@@ -22,9 +22,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+       n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
+-      WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
+-                "verifier log line truncated - local buffer too short\n");
+-
+       if (log->level == BPF_LOG_KERNEL) {
+               bool newline = n > 0 && log->kbuf[n - 1] == '\n';
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch b/queue-6.1/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch
new file mode 100644 (file)
index 0000000..bd1a668
--- /dev/null
@@ -0,0 +1,61 @@
+From d11cfd1c5805a788532a57652c86258e72351d54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 28 Jul 2024 19:46:11 +0800
+Subject: bpf: Fix updating attached freplace prog in prog_array map
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Leon Hwang <leon.hwang@linux.dev>
+
+[ Upstream commit fdad456cbcca739bae1849549c7a999857c56f88 ]
+
+The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
+fixed a NULL pointer dereference panic, but didn't fix the issue that
+fails to update attached freplace prog to prog_array map.
+
+Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"),
+freplace prog and its target prog are able to tail call each other.
+
+And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach")
+sets prog->aux->dst_prog as NULL after attaching freplace prog to its
+target prog.
+
+After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS.
+Then, after attaching freplace its prog->aux->dst_prog is NULL.
+Then, while updating freplace in prog_array the bpf_prog_map_compatible()
+incorrectly returns false because resolve_prog_type() returns
+BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS.
+After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS
+and update to prog_array can succeed.
+
+Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT")
+Cc: Toke Høiland-Jørgensen <toke@redhat.com>
+Cc: Martin KaFai Lau <martin.lau@kernel.org>
+Acked-by: Yonghong Song <yonghong.song@linux.dev>
+Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
+Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 6a524c5462a6f..131adc98080b8 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -645,8 +645,8 @@ static inline u32 type_flag(u32 type)
+ /* only use after check_attach_btf_id() */
+ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
+ {
+-      return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ?
+-              prog->aux->dst_prog->type : prog->type;
++      return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ?
++              prog->aux->saved_dst_prog_type : prog->type;
+ }
+ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-net-use-dev_stat_inc.patch b/queue-6.1/bpf-net-use-dev_stat_inc.patch
new file mode 100644 (file)
index 0000000..a29e01c
--- /dev/null
@@ -0,0 +1,59 @@
+From 46c4c07236c79cb840e26da0788897fbf7da3c7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 11:35:20 +0800
+Subject: bpf, net: Use DEV_STAT_INC()
+
+From: yunshui <jiangyunshui@kylinos.cn>
+
+[ Upstream commit d9cbd8343b010016fcaabc361c37720dcafddcbe ]
+
+syzbot/KCSAN reported that races happen when multiple CPUs updating
+dev->stats.tx_error concurrently. Adopt SMP safe DEV_STATS_INC() to
+update the dev->stats fields.
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: yunshui <jiangyunshui@kylinos.cn>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20240523033520.4029314-1-jiangyunshui@kylinos.cn
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 210b881cb50b8..1cd5f146cafe4 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2264,12 +2264,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+       err = bpf_out_neigh_v6(net, skb, dev, nh);
+       if (unlikely(net_xmit_eval(err)))
+-              dev->stats.tx_errors++;
++              DEV_STATS_INC(dev, tx_errors);
+       else
+               ret = NET_XMIT_SUCCESS;
+       goto out_xmit;
+ out_drop:
+-      dev->stats.tx_errors++;
++      DEV_STATS_INC(dev, tx_errors);
+       kfree_skb(skb);
+ out_xmit:
+       return ret;
+@@ -2371,12 +2371,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+       err = bpf_out_neigh_v4(net, skb, dev, nh);
+       if (unlikely(net_xmit_eval(err)))
+-              dev->stats.tx_errors++;
++              DEV_STATS_INC(dev, tx_errors);
+       else
+               ret = NET_XMIT_SUCCESS;
+       goto out_xmit;
+ out_drop:
+-      dev->stats.tx_errors++;
++      DEV_STATS_INC(dev, tx_errors);
+       kfree_skb(skb);
+ out_xmit:
+       return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-replace-bpf_lpm_trie_key-0-length-array-with-fle.patch b/queue-6.1/bpf-replace-bpf_lpm_trie_key-0-length-array-with-fle.patch
new file mode 100644 (file)
index 0000000..398428a
--- /dev/null
@@ -0,0 +1,380 @@
+From 8adac65c23041658823e034ef4407493d2a51dc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Feb 2024 07:56:15 -0800
+Subject: bpf: Replace bpf_lpm_trie_key 0-length array with flexible array
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit 896880ff30866f386ebed14ab81ce1ad3710cfc4 ]
+
+Replace deprecated 0-length array in struct bpf_lpm_trie_key with
+flexible array. Found with GCC 13:
+
+../kernel/bpf/lpm_trie.c:207:51: warning: array subscript i is outside array bounds of 'const __u8[0]' {aka 'const unsigned char[]'} [-Warray-bounds=]
+  207 |                                        *(__be16 *)&key->data[i]);
+      |                                                   ^~~~~~~~~~~~~
+../include/uapi/linux/swab.h:102:54: note: in definition of macro '__swab16'
+  102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
+      |                                                      ^
+../include/linux/byteorder/generic.h:97:21: note: in expansion of macro '__be16_to_cpu'
+   97 | #define be16_to_cpu __be16_to_cpu
+      |                     ^~~~~~~~~~~~~
+../kernel/bpf/lpm_trie.c:206:28: note: in expansion of macro 'be16_to_cpu'
+  206 |                 u16 diff = be16_to_cpu(*(__be16 *)&node->data[i]
+^
+      |                            ^~~~~~~~~~~
+In file included from ../include/linux/bpf.h:7:
+../include/uapi/linux/bpf.h:82:17: note: while referencing 'data'
+   82 |         __u8    data[0];        /* Arbitrary size */
+      |                 ^~~~
+
+And found at run-time under CONFIG_FORTIFY_SOURCE:
+
+  UBSAN: array-index-out-of-bounds in kernel/bpf/lpm_trie.c:218:49
+  index 0 is out of range for type '__u8 [*]'
+
+Changing struct bpf_lpm_trie_key is difficult since has been used by
+userspace. For example, in Cilium:
+
+       struct egress_gw_policy_key {
+               struct bpf_lpm_trie_key lpm_key;
+               __u32 saddr;
+               __u32 daddr;
+       };
+
+While direct references to the "data" member haven't been found, there
+are static initializers what include the final member. For example,
+the "{}" here:
+
+        struct egress_gw_policy_key in_key = {
+                .lpm_key = { 32 + 24, {} },
+                .saddr   = CLIENT_IP,
+                .daddr   = EXTERNAL_SVC_IP & 0Xffffff,
+        };
+
+To avoid the build time and run time warnings seen with a 0-sized
+trailing array for struct bpf_lpm_trie_key, introduce a new struct
+that correctly uses a flexible array for the trailing bytes,
+struct bpf_lpm_trie_key_u8. As part of this, include the "header"
+portion (which is just the "prefixlen" member), so it can be used
+by anything building a bpf_lpr_trie_key that has trailing members that
+aren't a u8 flexible array (like the self-test[1]), which is named
+struct bpf_lpm_trie_key_hdr.
+
+Unfortunately, C++ refuses to parse the __struct_group() helper, so
+it is not possible to define struct bpf_lpm_trie_key_hdr directly in
+struct bpf_lpm_trie_key_u8, so we must open-code the union directly.
+
+Adjust the kernel code to use struct bpf_lpm_trie_key_u8 through-out,
+and for the selftest to use struct bpf_lpm_trie_key_hdr. Add a comment
+to the UAPI header directing folks to the two new options.
+
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Closes: https://paste.debian.net/hidden/ca500597/
+Link: https://lore.kernel.org/all/202206281009.4332AA33@keescook/ [1]
+Link: https://lore.kernel.org/bpf/20240222155612.it.533-kees@kernel.org
+Stable-dep-of: 59f2f841179a ("bpf: Avoid kfree_rcu() under lock in bpf_lpm_trie.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/bpf/map_lpm_trie.rst            |  2 +-
+ include/uapi/linux/bpf.h                      | 19 +++++++++++++++++-
+ kernel/bpf/lpm_trie.c                         | 20 +++++++++----------
+ samples/bpf/map_perf_test_user.c              |  2 +-
+ samples/bpf/xdp_router_ipv4_user.c            |  2 +-
+ tools/include/uapi/linux/bpf.h                | 19 +++++++++++++++++-
+ .../selftests/bpf/progs/map_ptr_kern.c        |  2 +-
+ tools/testing/selftests/bpf/test_lpm_map.c    | 18 ++++++++---------
+ 8 files changed, 59 insertions(+), 25 deletions(-)
+
+diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
+index 31be1aa7ba2cb..b4fce3f7c98ff 100644
+--- a/Documentation/bpf/map_lpm_trie.rst
++++ b/Documentation/bpf/map_lpm_trie.rst
+@@ -17,7 +17,7 @@ significant byte.
+ LPM tries may be created with a maximum prefix length that is a multiple
+ of 8, in the range from 8 to 2048. The key used for lookup and update
+-operations is a ``struct bpf_lpm_trie_key``, extended by
++operations is a ``struct bpf_lpm_trie_key_u8``, extended by
+ ``max_prefixlen/8`` bytes.
+ - For IPv4 addresses the data length is 4 bytes
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+index a17688011440e..58c7fc75da752 100644
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -76,12 +76,29 @@ struct bpf_insn {
+       __s32   imm;            /* signed immediate constant */
+ };
+-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
++ * the trailing flexible array member) instead.
++ */
+ struct bpf_lpm_trie_key {
+       __u32   prefixlen;      /* up to 32 for AF_INET, 128 for AF_INET6 */
+       __u8    data[0];        /* Arbitrary size */
+ };
++/* Header for bpf_lpm_trie_key structs */
++struct bpf_lpm_trie_key_hdr {
++      __u32   prefixlen;
++};
++
++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
++struct bpf_lpm_trie_key_u8 {
++      union {
++              struct bpf_lpm_trie_key_hdr     hdr;
++              __u32                           prefixlen;
++      };
++      __u8    data[];         /* Arbitrary size */
++};
++
+ struct bpf_cgroup_storage_key {
+       __u64   cgroup_inode_id;        /* cgroup inode id */
+       __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
+diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
+index ce3a091d52e89..b80bffc59e5fb 100644
+--- a/kernel/bpf/lpm_trie.c
++++ b/kernel/bpf/lpm_trie.c
+@@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index)
+  */
+ static size_t longest_prefix_match(const struct lpm_trie *trie,
+                                  const struct lpm_trie_node *node,
+-                                 const struct bpf_lpm_trie_key *key)
++                                 const struct bpf_lpm_trie_key_u8 *key)
+ {
+       u32 limit = min(node->prefixlen, key->prefixlen);
+       u32 prefixlen = 0, i = 0;
+       BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
+-      BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
++      BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32));
+ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
+@@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
+ {
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+       struct lpm_trie_node *node, *found = NULL;
+-      struct bpf_lpm_trie_key *key = _key;
++      struct bpf_lpm_trie_key_u8 *key = _key;
+       if (key->prefixlen > trie->max_prefixlen)
+               return NULL;
+@@ -309,7 +309,7 @@ static int trie_update_elem(struct bpf_map *map,
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+       struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+       struct lpm_trie_node __rcu **slot;
+-      struct bpf_lpm_trie_key *key = _key;
++      struct bpf_lpm_trie_key_u8 *key = _key;
+       unsigned long irq_flags;
+       unsigned int next_bit;
+       size_t matchlen = 0;
+@@ -437,7 +437,7 @@ static int trie_update_elem(struct bpf_map *map,
+ static int trie_delete_elem(struct bpf_map *map, void *_key)
+ {
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+-      struct bpf_lpm_trie_key *key = _key;
++      struct bpf_lpm_trie_key_u8 *key = _key;
+       struct lpm_trie_node __rcu **trim, **trim2;
+       struct lpm_trie_node *node, *parent;
+       unsigned long irq_flags;
+@@ -536,7 +536,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
+                                sizeof(struct lpm_trie_node))
+ #define LPM_VAL_SIZE_MIN      1
+-#define LPM_KEY_SIZE(X)               (sizeof(struct bpf_lpm_trie_key) + (X))
++#define LPM_KEY_SIZE(X)               (sizeof(struct bpf_lpm_trie_key_u8) + (X))
+ #define LPM_KEY_SIZE_MAX      LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
+ #define LPM_KEY_SIZE_MIN      LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
+@@ -568,7 +568,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
+       /* copy mandatory map attributes */
+       bpf_map_init_from_attr(&trie->map, attr);
+       trie->data_size = attr->key_size -
+-                        offsetof(struct bpf_lpm_trie_key, data);
++                        offsetof(struct bpf_lpm_trie_key_u8, data);
+       trie->max_prefixlen = trie->data_size * 8;
+       spin_lock_init(&trie->lock);
+@@ -619,7 +619,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+ {
+       struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+-      struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
++      struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key;
+       struct lpm_trie_node **node_stack = NULL;
+       int err = 0, stack_ptr = -1;
+       unsigned int next_bit;
+@@ -706,7 +706,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+       }
+ do_copy:
+       next_key->prefixlen = next_node->prefixlen;
+-      memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
++      memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data),
+              next_node->data, trie->data_size);
+ free_stack:
+       kfree(node_stack);
+@@ -718,7 +718,7 @@ static int trie_check_btf(const struct bpf_map *map,
+                         const struct btf_type *key_type,
+                         const struct btf_type *value_type)
+ {
+-      /* Keys must have struct bpf_lpm_trie_key embedded. */
++      /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */
+       return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+              -EINVAL : 0;
+ }
+diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
+index 1bb53f4b29e11..cb5c776103b99 100644
+--- a/samples/bpf/map_perf_test_user.c
++++ b/samples/bpf/map_perf_test_user.c
+@@ -370,7 +370,7 @@ static void run_perf_test(int tasks)
+ static void fill_lpm_trie(void)
+ {
+-      struct bpf_lpm_trie_key *key;
++      struct bpf_lpm_trie_key_u8 *key;
+       unsigned long value = 0;
+       unsigned int i;
+       int r;
+diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
+index 683913bbf2797..28bae295d0ed1 100644
+--- a/samples/bpf/xdp_router_ipv4_user.c
++++ b/samples/bpf/xdp_router_ipv4_user.c
+@@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock)
+ static void read_route(struct nlmsghdr *nh, int nll)
+ {
+       char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
+-      struct bpf_lpm_trie_key *prefix_key;
++      struct bpf_lpm_trie_key_u8 *prefix_key;
+       struct rtattr *rt_attr;
+       struct rtmsg *rt_msg;
+       int rtm_family;
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
+index a17688011440e..58c7fc75da752 100644
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -76,12 +76,29 @@ struct bpf_insn {
+       __s32   imm;            /* signed immediate constant */
+ };
+-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
++ * the trailing flexible array member) instead.
++ */
+ struct bpf_lpm_trie_key {
+       __u32   prefixlen;      /* up to 32 for AF_INET, 128 for AF_INET6 */
+       __u8    data[0];        /* Arbitrary size */
+ };
++/* Header for bpf_lpm_trie_key structs */
++struct bpf_lpm_trie_key_hdr {
++      __u32   prefixlen;
++};
++
++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
++struct bpf_lpm_trie_key_u8 {
++      union {
++              struct bpf_lpm_trie_key_hdr     hdr;
++              __u32                           prefixlen;
++      };
++      __u8    data[];         /* Arbitrary size */
++};
++
+ struct bpf_cgroup_storage_key {
+       __u64   cgroup_inode_id;        /* cgroup inode id */
+       __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
+diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+index db388f593d0a2..96eed198af361 100644
+--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
++++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+@@ -311,7 +311,7 @@ struct lpm_trie {
+ } __attribute__((preserve_access_index));
+ struct lpm_key {
+-      struct bpf_lpm_trie_key trie_key;
++      struct bpf_lpm_trie_key_hdr trie_key;
+       __u32 data;
+ };
+diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
+index c028d621c744d..d98c72dc563ea 100644
+--- a/tools/testing/selftests/bpf/test_lpm_map.c
++++ b/tools/testing/selftests/bpf/test_lpm_map.c
+@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize)
+       volatile size_t n_matches, n_matches_after_delete;
+       size_t i, j, n_nodes, n_lookups;
+       struct tlpm_node *t, *list = NULL;
+-      struct bpf_lpm_trie_key *key;
++      struct bpf_lpm_trie_key_u8 *key;
+       uint8_t *data, *value;
+       int r, map;
+@@ -331,8 +331,8 @@ static void test_lpm_map(int keysize)
+ static void test_lpm_ipaddr(void)
+ {
+       LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+-      struct bpf_lpm_trie_key *key_ipv4;
+-      struct bpf_lpm_trie_key *key_ipv6;
++      struct bpf_lpm_trie_key_u8 *key_ipv4;
++      struct bpf_lpm_trie_key_u8 *key_ipv6;
+       size_t key_size_ipv4;
+       size_t key_size_ipv6;
+       int map_fd_ipv4;
+@@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void)
+ static void test_lpm_delete(void)
+ {
+       LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+-      struct bpf_lpm_trie_key *key;
++      struct bpf_lpm_trie_key_u8 *key;
+       size_t key_size;
+       int map_fd;
+       __u64 value;
+@@ -532,7 +532,7 @@ static void test_lpm_delete(void)
+ static void test_lpm_get_next_key(void)
+ {
+       LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+-      struct bpf_lpm_trie_key *key_p, *next_key_p;
++      struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
+       size_t key_size;
+       __u32 value = 0;
+       int map_fd;
+@@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg)
+ {
+       int i, j, ret, iter, key_size;
+       struct lpm_mt_test_info *info = arg;
+-      struct bpf_lpm_trie_key *key_p;
++      struct bpf_lpm_trie_key_u8 *key_p;
+-      key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
++      key_size = sizeof(*key_p) + sizeof(__u32);
+       key_p = alloca(key_size);
+       for (iter = 0; iter < info->iter; iter++)
+               for (i = 0; i < MAX_TEST_KEYS; i++) {
+@@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg)
+                               ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+                               assert(ret == 0 || errno == ENOENT);
+                       } else {
+-                              struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
++                              struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
+                               ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+                               assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+                       }
+@@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void)
+       /* create a trie */
+       value_size = sizeof(__u32);
+-      key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
++      key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
+       map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
+       /* create 4 threads to test update, delete, lookup and get_next_key */
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-split-off-basic-bpf-verifier-log-into-separate-f.patch b/queue-6.1/bpf-split-off-basic-bpf-verifier-log-into-separate-f.patch
new file mode 100644 (file)
index 0000000..4d16bee
--- /dev/null
@@ -0,0 +1,291 @@
+From 9852eff65c9648ff0ec9e3518777efb6f7ee74ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Apr 2023 16:41:47 -0700
+Subject: bpf: Split off basic BPF verifier log into separate file
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 4294a0a7ab6282c3d92f03de84e762dda993c93d ]
+
+kernel/bpf/verifier.c file is large and growing larger all the time. So
+it's good to start splitting off more or less self-contained parts into
+separate files to keep source code size (somewhat) somewhat under
+control.
+
+This patch is a one step in this direction, moving some of BPF verifier log
+routines into a separate kernel/bpf/log.c. Right now it's most low-level
+and isolated routines to append data to log, reset log to previous
+position, etc. Eventually we could probably move verifier state
+printing logic here as well, but this patch doesn't attempt to do that
+yet.
+
+Subsequent patches will add more logic to verifier log management, so
+having basics in a separate file will make sure verifier.c doesn't grow
+more with new changes.
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Lorenz Bauer <lmb@isovalent.com>
+Link: https://lore.kernel.org/bpf/20230406234205.323208-2-andrii@kernel.org
+Stable-dep-of: cff36398bd4c ("bpf: drop unnecessary user-triggerable WARN_ONCE in verifierl log")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h | 19 +++-----
+ kernel/bpf/Makefile          |  3 +-
+ kernel/bpf/log.c             | 85 ++++++++++++++++++++++++++++++++++++
+ kernel/bpf/verifier.c        | 69 -----------------------------
+ 4 files changed, 94 insertions(+), 82 deletions(-)
+ create mode 100644 kernel/bpf/log.c
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 131adc98080b8..33b073deb8c17 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -445,11 +445,6 @@ struct bpf_verifier_log {
+       u32 len_total;
+ };
+-static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
+-{
+-      return log->len_used >= log->len_total - 1;
+-}
+-
+ #define BPF_LOG_LEVEL1        1
+ #define BPF_LOG_LEVEL2        2
+ #define BPF_LOG_STATS 4
+@@ -459,6 +454,11 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
+ #define BPF_LOG_MIN_ALIGNMENT 8U
+ #define BPF_LOG_ALIGNMENT 40U
++static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
++{
++      return log->len_used >= log->len_total - 1;
++}
++
+ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+ {
+       return log &&
+@@ -466,13 +466,6 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+                log->level == BPF_LOG_KERNEL);
+ }
+-static inline bool
+-bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
+-{
+-      return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
+-             log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
+-}
+-
+ #define BPF_MAX_SUBPROGS 256
+ struct bpf_subprog_info {
+@@ -556,12 +549,14 @@ struct bpf_verifier_env {
+       char type_str_buf[TYPE_STR_BUF_LEN];
+ };
++bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log);
+ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+                                     const char *fmt, va_list args);
+ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+                                          const char *fmt, ...);
+ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+                           const char *fmt, ...);
++void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos);
+ static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
+ {
+diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
+index 341c94f208f4c..5b86ea9f09c46 100644
+--- a/kernel/bpf/Makefile
++++ b/kernel/bpf/Makefile
+@@ -6,7 +6,8 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+ endif
+ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
+-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
++obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
++obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
+ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
+ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+ obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
+diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
+new file mode 100644
+index 0000000000000..920061e38d2e1
+--- /dev/null
++++ b/kernel/bpf/log.c
+@@ -0,0 +1,85 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++ * Copyright (c) 2016 Facebook
++ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
++ */
++#include <uapi/linux/btf.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/bpf.h>
++#include <linux/bpf_verifier.h>
++
++bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
++{
++      return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
++             log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
++}
++
++void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
++                     va_list args)
++{
++      unsigned int n;
++
++      n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
++
++      WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
++                "verifier log line truncated - local buffer too short\n");
++
++      if (log->level == BPF_LOG_KERNEL) {
++              bool newline = n > 0 && log->kbuf[n - 1] == '\n';
++
++              pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
++              return;
++      }
++
++      n = min(log->len_total - log->len_used - 1, n);
++      log->kbuf[n] = '\0';
++      if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
++              log->len_used += n;
++      else
++              log->ubuf = NULL;
++}
++
++void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
++{
++      char zero = 0;
++
++      if (!bpf_verifier_log_needed(log))
++              return;
++
++      log->len_used = new_pos;
++      if (put_user(zero, log->ubuf + new_pos))
++              log->ubuf = NULL;
++}
++
++/* log_level controls verbosity level of eBPF verifier.
++ * bpf_verifier_log_write() is used to dump the verification trace to the log,
++ * so the user can figure out what's wrong with the program
++ */
++__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
++                                         const char *fmt, ...)
++{
++      va_list args;
++
++      if (!bpf_verifier_log_needed(&env->log))
++              return;
++
++      va_start(args, fmt);
++      bpf_verifier_vlog(&env->log, fmt, args);
++      va_end(args);
++}
++EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
++
++__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
++                          const char *fmt, ...)
++{
++      va_list args;
++
++      if (!bpf_verifier_log_needed(log))
++              return;
++
++      va_start(args, fmt);
++      bpf_verifier_vlog(log, fmt, args);
++      va_end(args);
++}
++EXPORT_SYMBOL_GPL(bpf_log);
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 8973d3c9597ce..4efa50eb07d72 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -291,61 +291,6 @@ find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
+       return &linfo[i - 1];
+ }
+-void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+-                     va_list args)
+-{
+-      unsigned int n;
+-
+-      n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
+-
+-      WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
+-                "verifier log line truncated - local buffer too short\n");
+-
+-      if (log->level == BPF_LOG_KERNEL) {
+-              bool newline = n > 0 && log->kbuf[n - 1] == '\n';
+-
+-              pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
+-              return;
+-      }
+-
+-      n = min(log->len_total - log->len_used - 1, n);
+-      log->kbuf[n] = '\0';
+-      if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
+-              log->len_used += n;
+-      else
+-              log->ubuf = NULL;
+-}
+-
+-static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
+-{
+-      char zero = 0;
+-
+-      if (!bpf_verifier_log_needed(log))
+-              return;
+-
+-      log->len_used = new_pos;
+-      if (put_user(zero, log->ubuf + new_pos))
+-              log->ubuf = NULL;
+-}
+-
+-/* log_level controls verbosity level of eBPF verifier.
+- * bpf_verifier_log_write() is used to dump the verification trace to the log,
+- * so the user can figure out what's wrong with the program
+- */
+-__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+-                                         const char *fmt, ...)
+-{
+-      va_list args;
+-
+-      if (!bpf_verifier_log_needed(&env->log))
+-              return;
+-
+-      va_start(args, fmt);
+-      bpf_verifier_vlog(&env->log, fmt, args);
+-      va_end(args);
+-}
+-EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
+-
+ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
+ {
+       struct bpf_verifier_env *env = private_data;
+@@ -359,20 +304,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
+       va_end(args);
+ }
+-__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+-                          const char *fmt, ...)
+-{
+-      va_list args;
+-
+-      if (!bpf_verifier_log_needed(log))
+-              return;
+-
+-      va_start(args, fmt);
+-      bpf_verifier_vlog(log, fmt, args);
+-      va_end(args);
+-}
+-EXPORT_SYMBOL_GPL(bpf_log);
+-
+ static const char *ltrim(const char *s)
+ {
+       while (isspace(*s))
+-- 
+2.43.0
+
diff --git a/queue-6.1/docs-bpf-document-bpf_map_type_lpm_trie-map.patch b/queue-6.1/docs-bpf-document-bpf_map_type_lpm_trie-map.patch
new file mode 100644 (file)
index 0000000..b26c8c9
--- /dev/null
@@ -0,0 +1,212 @@
+From 43ad39965230dadde2ca18e2cb0a78d247af5e02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 11:45:42 +0000
+Subject: docs/bpf: Document BPF_MAP_TYPE_LPM_TRIE map
+
+From: Donald Hunter <donald.hunter@gmail.com>
+
+[ Upstream commit 83177c0dca3811faa051124731a692609caee7c7 ]
+
+Add documentation for BPF_MAP_TYPE_LPM_TRIE including kernel
+BPF helper usage, userspace usage and examples.
+
+Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20221101114542.24481-2-donald.hunter@gmail.com
+Stable-dep-of: 59f2f841179a ("bpf: Avoid kfree_rcu() under lock in bpf_lpm_trie.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/bpf/map_lpm_trie.rst | 181 +++++++++++++++++++++++++++++
+ 1 file changed, 181 insertions(+)
+ create mode 100644 Documentation/bpf/map_lpm_trie.rst
+
+diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
+new file mode 100644
+index 0000000000000..31be1aa7ba2cb
+--- /dev/null
++++ b/Documentation/bpf/map_lpm_trie.rst
+@@ -0,0 +1,181 @@
++.. SPDX-License-Identifier: GPL-2.0-only
++.. Copyright (C) 2022 Red Hat, Inc.
++
++=====================
++BPF_MAP_TYPE_LPM_TRIE
++=====================
++
++.. note::
++   - ``BPF_MAP_TYPE_LPM_TRIE`` was introduced in kernel version 4.11
++
++``BPF_MAP_TYPE_LPM_TRIE`` provides a longest prefix match algorithm that
++can be used to match IP addresses to a stored set of prefixes.
++Internally, data is stored in an unbalanced trie of nodes that uses
++``prefixlen,data`` pairs as its keys. The ``data`` is interpreted in
++network byte order, i.e. big endian, so ``data[0]`` stores the most
++significant byte.
++
++LPM tries may be created with a maximum prefix length that is a multiple
++of 8, in the range from 8 to 2048. The key used for lookup and update
++operations is a ``struct bpf_lpm_trie_key``, extended by
++``max_prefixlen/8`` bytes.
++
++- For IPv4 addresses the data length is 4 bytes
++- For IPv6 addresses the data length is 16 bytes
++
++The value type stored in the LPM trie can be any user defined type.
++
++.. note::
++   When creating a map of type ``BPF_MAP_TYPE_LPM_TRIE`` you must set the
++   ``BPF_F_NO_PREALLOC`` flag.
++
++Usage
++=====
++
++Kernel BPF
++----------
++
++.. c:function::
++   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
++
++The longest prefix entry for a given data value can be found using the
++``bpf_map_lookup_elem()`` helper. This helper returns a pointer to the
++value associated with the longest matching ``key``, or ``NULL`` if no
++entry was found.
++
++The ``key`` should have ``prefixlen`` set to ``max_prefixlen`` when
++performing longest prefix lookups. For example, when searching for the
++longest prefix match for an IPv4 address, ``prefixlen`` should be set to
++``32``.
++
++.. c:function::
++   long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
++
++Prefix entries can be added or updated using the ``bpf_map_update_elem()``
++helper. This helper replaces existing elements atomically.
++
++``bpf_map_update_elem()`` returns ``0`` on success, or negative error in
++case of failure.
++
++ .. note::
++    The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST,
++    but the value is ignored, giving BPF_ANY semantics.
++
++.. c:function::
++   long bpf_map_delete_elem(struct bpf_map *map, const void *key)
++
++Prefix entries can be deleted using the ``bpf_map_delete_elem()``
++helper. This helper will return 0 on success, or negative error in case
++of failure.
++
++Userspace
++---------
++
++Access from userspace uses libbpf APIs with the same names as above, with
++the map identified by ``fd``.
++
++.. c:function::
++   int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key)
++
++A userspace program can iterate through the entries in an LPM trie using
++libbpf's ``bpf_map_get_next_key()`` function. The first key can be
++fetched by calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
++``NULL``. Subsequent calls will fetch the next key that follows the
++current key. ``bpf_map_get_next_key()`` returns ``0`` on success,
++``-ENOENT`` if ``cur_key`` is the last key in the trie, or negative
++error in case of failure.
++
++``bpf_map_get_next_key()`` will iterate through the LPM trie elements
++from leftmost leaf first. This means that iteration will return more
++specific keys before less specific ones.
++
++Examples
++========
++
++Please see ``tools/testing/selftests/bpf/test_lpm_map.c`` for examples
++of LPM trie usage from userspace. The code snippets below demonstrate
++API usage.
++
++Kernel BPF
++----------
++
++The following BPF code snippet shows how to declare a new LPM trie for IPv4
++address prefixes:
++
++.. code-block:: c
++
++    #include <linux/bpf.h>
++    #include <bpf/bpf_helpers.h>
++
++    struct ipv4_lpm_key {
++            __u32 prefixlen;
++            __u32 data;
++    };
++
++    struct {
++            __uint(type, BPF_MAP_TYPE_LPM_TRIE);
++            __type(key, struct ipv4_lpm_key);
++            __type(value, __u32);
++            __uint(map_flags, BPF_F_NO_PREALLOC);
++            __uint(max_entries, 255);
++    } ipv4_lpm_map SEC(".maps");
++
++The following BPF code snippet shows how to lookup by IPv4 address:
++
++.. code-block:: c
++
++    void *lookup(__u32 ipaddr)
++    {
++            struct ipv4_lpm_key key = {
++                    .prefixlen = 32,
++                    .data = ipaddr
++            };
++
++            return bpf_map_lookup_elem(&ipv4_lpm_map, &key);
++    }
++
++Userspace
++---------
++
++The following snippet shows how to insert an IPv4 prefix entry into an
++LPM trie:
++
++.. code-block:: c
++
++    int add_prefix_entry(int lpm_fd, __u32 addr, __u32 prefixlen, struct value *value)
++    {
++            struct ipv4_lpm_key ipv4_key = {
++                    .prefixlen = prefixlen,
++                    .data = addr
++            };
++            return bpf_map_update_elem(lpm_fd, &ipv4_key, value, BPF_ANY);
++    }
++
++The following snippet shows a userspace program walking through the entries
++of an LPM trie:
++
++
++.. code-block:: c
++
++    #include <bpf/libbpf.h>
++    #include <bpf/bpf.h>
++
++    void iterate_lpm_trie(int map_fd)
++    {
++            struct ipv4_lpm_key *cur_key = NULL;
++            struct ipv4_lpm_key next_key;
++            struct value value;
++            int err;
++
++            for (;;) {
++                    err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
++                    if (err)
++                            break;
++
++                    bpf_map_lookup_elem(map_fd, &next_key, &value);
++
++                    /* Use key and value here */
++
++                    cur_key = &next_key;
++            }
++    }
+-- 
+2.43.0
+
diff --git a/queue-6.1/erofs-avoid-debugging-output-for-de-compressed-data.patch b/queue-6.1/erofs-avoid-debugging-output-for-de-compressed-data.patch
new file mode 100644 (file)
index 0000000..acb27db
--- /dev/null
@@ -0,0 +1,60 @@
+From a0c25016ada38565c19ebde3ed8cc441fd4b4cd0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Dec 2023 23:19:03 +0800
+Subject: erofs: avoid debugging output for (de)compressed data
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 496530c7c1dfc159d59a75ae00b572f570710c53 ]
+
+Syzbot reported a KMSAN warning,
+erofs: (device loop0): z_erofs_lz4_decompress_mem: failed to decompress -12 in[46, 4050] out[917]
+=====================================================
+BUG: KMSAN: uninit-value in hex_dump_to_buffer+0xae9/0x10f0 lib/hexdump.c:194
+  ..
+  print_hex_dump+0x13d/0x3e0 lib/hexdump.c:276
+  z_erofs_lz4_decompress_mem fs/erofs/decompressor.c:252 [inline]
+  z_erofs_lz4_decompress+0x257e/0x2a70 fs/erofs/decompressor.c:311
+  z_erofs_decompress_pcluster fs/erofs/zdata.c:1290 [inline]
+  z_erofs_decompress_queue+0x338c/0x6460 fs/erofs/zdata.c:1372
+  z_erofs_runqueue+0x36cd/0x3830
+  z_erofs_read_folio+0x435/0x810 fs/erofs/zdata.c:1843
+
+The root cause is that the printed decompressed buffer may be filled
+incompletely due to decompression failure.  Since they were once only
+used for debugging, get rid of them now.
+
+Reported-and-tested-by: syzbot+6c746eea496f34b3161d@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/r/000000000000321c24060d7cfa1c@google.com
+Reviewed-by: Yue Hu <huyue2@coolpad.com>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20231227151903.2900413-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/decompressor.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
+index 1eefa4411e066..708bf142b1888 100644
+--- a/fs/erofs/decompressor.c
++++ b/fs/erofs/decompressor.c
+@@ -248,15 +248,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
+       if (ret != rq->outputsize) {
+               erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+                         ret, rq->inputsize, inputmargin, rq->outputsize);
+-
+-              print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
+-                             16, 1, src + inputmargin, rq->inputsize, true);
+-              print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
+-                             16, 1, out, rq->outputsize, true);
+-
+               if (ret >= 0)
+                       memset(out + ret, 0, rq->outputsize - ret);
+-              ret = -EIO;
++              ret = -EFSCORRUPTED;
+       } else {
+               ret = 0;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/ext4-check-the-return-value-of-ext4_xattr_inode_dec_.patch b/queue-6.1/ext4-check-the-return-value-of-ext4_xattr_inode_dec_.patch
new file mode 100644 (file)
index 0000000..412cebf
--- /dev/null
@@ -0,0 +1,38 @@
+From 21a645b83ae2cc3615d4ef0fa53bb0de1f407d4f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Sep 2022 17:28:16 -0700
+Subject: ext4: check the return value of ext4_xattr_inode_dec_ref()
+
+From: Li Zhong <floridsleeves@gmail.com>
+
+[ Upstream commit 56d0d0b9289dae041becc7ee6bd966a00dd610e0 ]
+
+Check the return value of ext4_xattr_inode_dec_ref(), which could
+return error code and need to be warned.
+
+Signed-off-by: Li Zhong <floridsleeves@gmail.com>
+Link: https://lore.kernel.org/r/20220917002816.3804400-1-floridsleeves@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 0a46ef234756 ("ext4: do not create EA inode under buffer lock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index f0a45d3ec4ebb..0df0a3ecba37a 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1550,7 +1550,8 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+       err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+       if (err) {
+-              ext4_xattr_inode_dec_ref(handle, ea_inode);
++              if (ext4_xattr_inode_dec_ref(handle, ea_inode))
++                      ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
+               iput(ea_inode);
+               return err;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/ext4-do-not-create-ea-inode-under-buffer-lock.patch b/queue-6.1/ext4-do-not-create-ea-inode-under-buffer-lock.patch
new file mode 100644 (file)
index 0000000..e875de3
--- /dev/null
@@ -0,0 +1,242 @@
+From 286f02a8b8fb7deb4942f789f55196d11be9e508 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Mar 2024 17:26:50 +0100
+Subject: ext4: do not create EA inode under buffer lock
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 0a46ef234756dca04623b7591e8ebb3440622f0b ]
+
+ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
+on the external xattr block. This is problematic as it nests all the
+allocation locking (which acquires locks on other buffers) under the
+buffer lock. This can even deadlock when the filesystem is corrupted and
+e.g. quota file is setup to contain xattr block as data block. Move the
+allocation of EA inode out of ext4_xattr_set_entry() into the callers.
+
+Reported-by: syzbot+a43d4f48b8397d0e41a9@syzkaller.appspotmail.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240321162657.27420-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 113 +++++++++++++++++++++++-------------------------
+ 1 file changed, 53 insertions(+), 60 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index b18035b8887be..d94b1a6c60e27 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1576,6 +1576,7 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
+ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+                               struct ext4_xattr_search *s,
+                               handle_t *handle, struct inode *inode,
++                              struct inode *new_ea_inode,
+                               bool is_block)
+ {
+       struct ext4_xattr_entry *last, *next;
+@@ -1583,7 +1584,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+       size_t min_offs = s->end - s->base, name_len = strlen(i->name);
+       int in_inode = i->in_inode;
+       struct inode *old_ea_inode = NULL;
+-      struct inode *new_ea_inode = NULL;
+       size_t old_size, new_size;
+       int ret;
+@@ -1668,38 +1668,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+                       old_ea_inode = NULL;
+                       goto out;
+               }
+-      }
+-      if (i->value && in_inode) {
+-              WARN_ON_ONCE(!i->value_len);
+-
+-              new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+-                                      i->value, i->value_len);
+-              if (IS_ERR(new_ea_inode)) {
+-                      ret = PTR_ERR(new_ea_inode);
+-                      new_ea_inode = NULL;
+-                      goto out;
+-              }
+-      }
+-      if (old_ea_inode) {
+               /* We are ready to release ref count on the old_ea_inode. */
+               ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
+-              if (ret) {
+-                      /* Release newly required ref count on new_ea_inode. */
+-                      if (new_ea_inode) {
+-                              int err;
+-
+-                              err = ext4_xattr_inode_dec_ref(handle,
+-                                                             new_ea_inode);
+-                              if (err)
+-                                      ext4_warning_inode(new_ea_inode,
+-                                                "dec ref new_ea_inode err=%d",
+-                                                err);
+-                              ext4_xattr_inode_free_quota(inode, new_ea_inode,
+-                                                          i->value_len);
+-                      }
++              if (ret)
+                       goto out;
+-              }
+               ext4_xattr_inode_free_quota(inode, old_ea_inode,
+                                           le32_to_cpu(here->e_value_size));
+@@ -1823,7 +1796,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+       ret = 0;
+ out:
+       iput(old_ea_inode);
+-      iput(new_ea_inode);
+       return ret;
+ }
+@@ -1886,9 +1858,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+       size_t old_ea_inode_quota = 0;
+       unsigned int ea_ino;
+-
+ #define header(x) ((struct ext4_xattr_header *)(x))
++      /* If we need EA inode, prepare it before locking the buffer */
++      if (i->value && i->in_inode) {
++              WARN_ON_ONCE(!i->value_len);
++
++              ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
++                                      i->value, i->value_len);
++              if (IS_ERR(ea_inode)) {
++                      error = PTR_ERR(ea_inode);
++                      ea_inode = NULL;
++                      goto cleanup;
++              }
++      }
++
+       if (s->base) {
+               int offset = (char *)s->here - bs->bh->b_data;
+@@ -1897,6 +1881,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                                                     EXT4_JTR_NONE);
+               if (error)
+                       goto cleanup;
++
+               lock_buffer(bs->bh);
+               if (header(s->base)->h_refcount == cpu_to_le32(1)) {
+@@ -1923,7 +1908,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       }
+                       ea_bdebug(bs->bh, "modifying in-place");
+                       error = ext4_xattr_set_entry(i, s, handle, inode,
+-                                                   true /* is_block */);
++                                           ea_inode, true /* is_block */);
+                       ext4_xattr_block_csum_set(inode, bs->bh);
+                       unlock_buffer(bs->bh);
+                       if (error == -EFSCORRUPTED)
+@@ -1991,29 +1976,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+               s->end = s->base + sb->s_blocksize;
+       }
+-      error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
++      error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
++                                   true /* is_block */);
+       if (error == -EFSCORRUPTED)
+               goto bad_block;
+       if (error)
+               goto cleanup;
+-      if (i->value && s->here->e_value_inum) {
+-              /*
+-               * A ref count on ea_inode has been taken as part of the call to
+-               * ext4_xattr_set_entry() above. We would like to drop this
+-               * extra ref but we have to wait until the xattr block is
+-               * initialized and has its own ref count on the ea_inode.
+-               */
+-              ea_ino = le32_to_cpu(s->here->e_value_inum);
+-              error = ext4_xattr_inode_iget(inode, ea_ino,
+-                                            le32_to_cpu(s->here->e_hash),
+-                                            &ea_inode);
+-              if (error) {
+-                      ea_inode = NULL;
+-                      goto cleanup;
+-              }
+-      }
+-
+ inserted:
+       if (!IS_LAST_ENTRY(s->first)) {
+               new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
+@@ -2166,17 +2135,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ cleanup:
+       if (ea_inode) {
+-              int error2;
+-
+-              error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+-              if (error2)
+-                      ext4_warning_inode(ea_inode, "dec ref error=%d",
+-                                         error2);
++              if (error) {
++                      int error2;
+-              /* If there was an error, revert the quota charge. */
+-              if (error)
++                      error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
++                      if (error2)
++                              ext4_warning_inode(ea_inode, "dec ref error=%d",
++                                                 error2);
+                       ext4_xattr_inode_free_quota(inode, ea_inode,
+                                                   i_size_read(ea_inode));
++              }
+               iput(ea_inode);
+       }
+       if (ce)
+@@ -2234,14 +2202,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ {
+       struct ext4_xattr_ibody_header *header;
+       struct ext4_xattr_search *s = &is->s;
++      struct inode *ea_inode = NULL;
+       int error;
+       if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return -ENOSPC;
+-      error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
+-      if (error)
++      /* If we need EA inode, prepare it before locking the buffer */
++      if (i->value && i->in_inode) {
++              WARN_ON_ONCE(!i->value_len);
++
++              ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
++                                      i->value, i->value_len);
++              if (IS_ERR(ea_inode))
++                      return PTR_ERR(ea_inode);
++      }
++      error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
++                                   false /* is_block */);
++      if (error) {
++              if (ea_inode) {
++                      int error2;
++
++                      error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
++                      if (error2)
++                              ext4_warning_inode(ea_inode, "dec ref error=%d",
++                                                 error2);
++
++                      ext4_xattr_inode_free_quota(inode, ea_inode,
++                                                  i_size_read(ea_inode));
++                      iput(ea_inode);
++              }
+               return error;
++      }
+       header = IHDR(inode, ext4_raw_inode(&is->iloc));
+       if (!IS_LAST_ENTRY(s->first)) {
+               header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+@@ -2250,6 +2242,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+               header->h_magic = cpu_to_le32(0);
+               ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
+       }
++      iput(ea_inode);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/ext4-fold-quota-accounting-into-ext4_xattr_inode_loo.patch b/queue-6.1/ext4-fold-quota-accounting-into-ext4_xattr_inode_loo.patch
new file mode 100644 (file)
index 0000000..1700141
--- /dev/null
@@ -0,0 +1,120 @@
+From 83ed2f1027910e92efa90474780a2e455e3435d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Feb 2024 12:20:59 +0100
+Subject: ext4: fold quota accounting into ext4_xattr_inode_lookup_create()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 8208c41c43ad5e9b63dce6c45a73e326109ca658 ]
+
+When allocating EA inode, quota accounting is done just before
+ext4_xattr_inode_lookup_create(). Logically these two operations belong
+together so just fold quota accounting into
+ext4_xattr_inode_lookup_create(). We also make
+ext4_xattr_inode_lookup_create() return the looked up / created inode to
+convert the function to a more standard calling convention.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240209112107.10585-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 0a46ef234756 ("ext4: do not create EA inode under buffer lock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 50 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 24 insertions(+), 26 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 0df0a3ecba37a..b18035b8887be 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1522,46 +1522,49 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+ /*
+  * Add value of the EA in an inode.
+  */
+-static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+-                                        const void *value, size_t value_len,
+-                                        struct inode **ret_inode)
++static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
++              struct inode *inode, const void *value, size_t value_len)
+ {
+       struct inode *ea_inode;
+       u32 hash;
+       int err;
++      /* Account inode & space to quota even if sharing... */
++      err = ext4_xattr_inode_alloc_quota(inode, value_len);
++      if (err)
++              return ERR_PTR(err);
++
+       hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
+       ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
+       if (ea_inode) {
+               err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+-              if (err) {
+-                      iput(ea_inode);
+-                      return err;
+-              }
+-
+-              *ret_inode = ea_inode;
+-              return 0;
++              if (err)
++                      goto out_err;
++              return ea_inode;
+       }
+       /* Create an inode for the EA value */
+       ea_inode = ext4_xattr_inode_create(handle, inode, hash);
+-      if (IS_ERR(ea_inode))
+-              return PTR_ERR(ea_inode);
++      if (IS_ERR(ea_inode)) {
++              ext4_xattr_inode_free_quota(inode, NULL, value_len);
++              return ea_inode;
++      }
+       err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+       if (err) {
+               if (ext4_xattr_inode_dec_ref(handle, ea_inode))
+                       ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
+-              iput(ea_inode);
+-              return err;
++              goto out_err;
+       }
+       if (EA_INODE_CACHE(inode))
+               mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+                                     ea_inode->i_ino, true /* reusable */);
+-
+-      *ret_inode = ea_inode;
+-      return 0;
++      return ea_inode;
++out_err:
++      iput(ea_inode);
++      ext4_xattr_inode_free_quota(inode, NULL, value_len);
++      return ERR_PTR(err);
+ }
+ /*
+@@ -1669,16 +1672,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+       if (i->value && in_inode) {
+               WARN_ON_ONCE(!i->value_len);
+-              ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
+-              if (ret)
+-                      goto out;
+-
+-              ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
+-                                                   i->value_len,
+-                                                   &new_ea_inode);
+-              if (ret) {
++              new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
++                                      i->value, i->value_len);
++              if (IS_ERR(new_ea_inode)) {
++                      ret = PTR_ERR(new_ea_inode);
+                       new_ea_inode = NULL;
+-                      ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
+                       goto out;
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/ext4-jbd2-add-an-optimized-bmap-for-the-journal-inod.patch b/queue-6.1/ext4-jbd2-add-an-optimized-bmap-for-the-journal-inod.patch
new file mode 100644 (file)
index 0000000..aba6c50
--- /dev/null
@@ -0,0 +1,108 @@
+From bf3cf28d19a1167d523c430b507d55ea844b72f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Mar 2023 23:15:49 -0500
+Subject: ext4, jbd2: add an optimized bmap for the journal inode
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+[ Upstream commit 62913ae96de747091c4dacd06d158e7729c1a76d ]
+
+The generic bmap() function exported by the VFS takes locks and does
+checks that are not necessary for the journal inode.  So allow the
+file system to set a journal-optimized bmap function in
+journal->j_bmap.
+
+Reported-by: syzbot+9543479984ae9e576000@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=e4aaa78795e490421c79f76ec3679006c8ff4cf0
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c      | 23 +++++++++++++++++++++++
+ fs/jbd2/journal.c    |  9 ++++++---
+ include/linux/jbd2.h |  8 ++++++++
+ 3 files changed, 37 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 274542d869d0c..3db39758486e9 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5752,6 +5752,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
+       return journal_inode;
+ }
++static int ext4_journal_bmap(journal_t *journal, sector_t *block)
++{
++      struct ext4_map_blocks map;
++      int ret;
++
++      if (journal->j_inode == NULL)
++              return 0;
++
++      map.m_lblk = *block;
++      map.m_len = 1;
++      ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
++      if (ret <= 0) {
++              ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
++                       "journal bmap failed: block %llu ret %d\n",
++                       *block, ret);
++              jbd2_journal_abort(journal, ret ? ret : -EIO);
++              return ret;
++      }
++      *block = map.m_pblk;
++      return 0;
++}
++
+ static journal_t *ext4_get_journal(struct super_block *sb,
+                                  unsigned int journal_inum)
+ {
+@@ -5772,6 +5794,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
+               return NULL;
+       }
+       journal->j_private = sb;
++      journal->j_bmap = ext4_journal_bmap;
+       ext4_init_journal_params(sb, journal);
+       return journal;
+ }
+diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
+index c8d59f7c47453..d3d3ea439d29b 100644
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -971,10 +971,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
+ {
+       int err = 0;
+       unsigned long long ret;
+-      sector_t block = 0;
++      sector_t block = blocknr;
+-      if (journal->j_inode) {
+-              block = blocknr;
++      if (journal->j_bmap) {
++              err = journal->j_bmap(journal, &block);
++              if (err == 0)
++                      *retp = block;
++      } else if (journal->j_inode) {
+               ret = bmap(journal->j_inode, &block);
+               if (ret || !block) {
+diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
+index e301d323108d1..5bf7ada754d79 100644
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1302,6 +1302,14 @@ struct journal_s
+                                   struct buffer_head *bh,
+                                   enum passtype pass, int off,
+                                   tid_t expected_commit_id);
++
++      /**
++       * @j_bmap:
++       *
++       * Bmap function that should be used instead of the generic
++       * VFS bmap function.
++       */
++      int (*j_bmap)(struct journal_s *journal, sector_t *block);
+ };
+ #define jbd2_might_wait_for_commit(j) \
+-- 
+2.43.0
+
diff --git a/queue-6.1/fou-remove-warn-in-gue_gro_receive-on-unsupported-pr.patch b/queue-6.1/fou-remove-warn-in-gue_gro_receive-on-unsupported-pr.patch
new file mode 100644 (file)
index 0000000..4688b5a
--- /dev/null
@@ -0,0 +1,46 @@
+From bbb1268c222d029e682159a3f09aa4a44c0422e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jun 2024 08:25:18 -0400
+Subject: fou: remove warn in gue_gro_receive on unsupported protocol
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit dd89a81d850fa9a65f67b4527c0e420d15bf836c ]
+
+Drop the WARN_ON_ONCE inn gue_gro_receive if the encapsulated type is
+not known or does not have a GRO handler.
+
+Such a packet is easily constructed. Syzbot generates them and sets
+off this warning.
+
+Remove the warning as it is expected and not actionable.
+
+The warning was previously reduced from WARN_ON to WARN_ON_ONCE in
+commit 270136613bf7 ("fou: Do WARN_ON_ONCE in gue_gro_receive for bad
+proto callbacks").
+
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240614122552.1649044-1-willemdebruijn.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fou.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
+index 0c3c6d0cee290..358bff068eef8 100644
+--- a/net/ipv4/fou.c
++++ b/net/ipv4/fou.c
+@@ -431,7 +431,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
+       offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+       ops = rcu_dereference(offloads[proto]);
+-      if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
++      if (!ops || !ops->callbacks.gro_receive)
+               goto out;
+       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+-- 
+2.43.0
+
diff --git a/queue-6.1/fs-ntfs3-do-copy_to_user-out-of-run_lock.patch b/queue-6.1/fs-ntfs3-do-copy_to_user-out-of-run_lock.patch
new file mode 100644 (file)
index 0000000..22585b5
--- /dev/null
@@ -0,0 +1,145 @@
+From fed2f4e5e0a2fca06170bcb2292ca2d7550d0f57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jun 2024 15:14:07 +0300
+Subject: fs/ntfs3: Do copy_to_user out of run_lock
+
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+
+[ Upstream commit d57431c6f511bf020e474026d9f3123d7bfbea8c ]
+
+In order not to call copy_to_user (from fiemap_fill_next_extent)
+we allocate memory in the kernel, fill it and copy it to user memory
+after up_read(run_lock).
+
+Reported-by: syzbot+36bb70085ef6edc2ebb9@syzkaller.appspotmail.com
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ntfs3/frecord.c | 75 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 72 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
+index 02465ab3f398c..6cce71cc750ea 100644
+--- a/fs/ntfs3/frecord.c
++++ b/fs/ntfs3/frecord.c
+@@ -1897,6 +1897,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
+       return REPARSE_LINK;
+ }
++/*
++ * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent
++ * but it accepts kernel address for fi_extents_start
++ */
++static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo,
++                                   u64 logical, u64 phys, u64 len, u32 flags)
++{
++      struct fiemap_extent extent;
++      struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
++
++      /* only count the extents */
++      if (fieinfo->fi_extents_max == 0) {
++              fieinfo->fi_extents_mapped++;
++              return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++      }
++
++      if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
++              return 1;
++
++      if (flags & FIEMAP_EXTENT_DELALLOC)
++              flags |= FIEMAP_EXTENT_UNKNOWN;
++      if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
++              flags |= FIEMAP_EXTENT_ENCODED;
++      if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
++              flags |= FIEMAP_EXTENT_NOT_ALIGNED;
++
++      memset(&extent, 0, sizeof(extent));
++      extent.fe_logical = logical;
++      extent.fe_physical = phys;
++      extent.fe_length = len;
++      extent.fe_flags = flags;
++
++      dest += fieinfo->fi_extents_mapped;
++      memcpy(dest, &extent, sizeof(extent));
++
++      fieinfo->fi_extents_mapped++;
++      if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
++              return 1;
++      return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++}
++
+ /*
+  * ni_fiemap - Helper for file_fiemap().
+  *
+@@ -1907,6 +1948,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+             __u64 vbo, __u64 len)
+ {
+       int err = 0;
++      struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start;
++      struct fiemap_extent *fe_k = NULL;
+       struct ntfs_sb_info *sbi = ni->mi.sbi;
+       u8 cluster_bits = sbi->cluster_bits;
+       struct runs_tree *run;
+@@ -1954,6 +1997,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+               goto out;
+       }
++      /*
++       * To avoid lock problems replace pointer to user memory by pointer to kernel memory.
++       */
++      fe_k = kmalloc_array(fieinfo->fi_extents_max,
++                           sizeof(struct fiemap_extent),
++                           GFP_NOFS | __GFP_ZERO);
++      if (!fe_k) {
++              err = -ENOMEM;
++              goto out;
++      }
++      fieinfo->fi_extents_start = fe_k;
++
+       end = vbo + len;
+       alloc_size = le64_to_cpu(attr->nres.alloc_size);
+       if (end > alloc_size)
+@@ -2042,8 +2097,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+                       if (vbo + dlen >= end)
+                               flags |= FIEMAP_EXTENT_LAST;
+-                      err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
+-                                                    flags);
++                      err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen,
++                                                      flags);
++
+                       if (err < 0)
+                               break;
+                       if (err == 1) {
+@@ -2063,7 +2119,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+               if (vbo + bytes >= end)
+                       flags |= FIEMAP_EXTENT_LAST;
+-              err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
++              err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes,
++                                              flags);
+               if (err < 0)
+                       break;
+               if (err == 1) {
+@@ -2076,7 +2133,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+       up_read(run_lock);
++      /*
++       * Copy to user memory out of lock
++       */
++      if (copy_to_user(fe_u, fe_k,
++                       fieinfo->fi_extents_max *
++                               sizeof(struct fiemap_extent))) {
++              err = -EFAULT;
++      }
++
+ out:
++      /* Restore original pointer. */
++      fieinfo->fi_extents_start = fe_u;
++      kfree(fe_k);
+       return err;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-rename-gfs2_freeze_lock-_shared.patch b/queue-6.1/gfs2-rename-gfs2_freeze_lock-_shared.patch
new file mode 100644 (file)
index 0000000..c60494d
--- /dev/null
@@ -0,0 +1,123 @@
+From 4714f18c5f7b9616da8c5d1f8d7a4f6cd2b95496 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 18:26:00 +0100
+Subject: gfs2: Rename gfs2_freeze_lock{ => _shared }
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit e392edd5d52a6742595ecaf8270c1af3e96b9a38 ]
+
+Rename gfs2_freeze_lock to gfs2_freeze_lock_shared to make it a bit more
+obvious that this function establishes the "thawed" state of the freeze
+glock.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Stable-dep-of: f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/ops_fstype.c |  4 ++--
+ fs/gfs2/recovery.c   |  2 +-
+ fs/gfs2/super.c      |  2 +-
+ fs/gfs2/util.c       | 10 +++++-----
+ fs/gfs2/util.h       |  5 +++--
+ 5 files changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
+index c7f6208ad98c0..e427fb7fbe998 100644
+--- a/fs/gfs2/ops_fstype.c
++++ b/fs/gfs2/ops_fstype.c
+@@ -1266,7 +1266,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
+               }
+       }
+-      error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
++      error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0);
+       if (error)
+               goto fail_per_node;
+@@ -1587,7 +1587,7 @@ static int gfs2_reconfigure(struct fs_context *fc)
+       if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+               struct gfs2_holder freeze_gh;
+-              error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
++              error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0);
+               if (error)
+                       return -EINVAL;
+diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
+index d8e522f389aa7..61ef07da40b22 100644
+--- a/fs/gfs2/recovery.c
++++ b/fs/gfs2/recovery.c
+@@ -470,7 +470,7 @@ void gfs2_recover_func(struct work_struct *work)
+               /* Acquire a shared hold on the freeze glock */
+-              error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
++              error = gfs2_freeze_lock_shared(sdp, &thaw_gh, LM_FLAG_PRIORITY);
+               if (error)
+                       goto fail_gunlock_ji;
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index d7b3a982552cf..cb05332e473bd 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -662,7 +662,7 @@ void gfs2_freeze_func(struct work_struct *work)
+       struct super_block *sb = sdp->sd_vfs;
+       atomic_inc(&sb->s_active);
+-      error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
++      error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0);
+       if (error) {
+               gfs2_assert_withdraw(sdp, 0);
+       } else {
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index 11cc59ac64fdc..1195ea08f9ca4 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -93,13 +93,13 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ }
+ /**
+- * gfs2_freeze_lock - hold the freeze glock
++ * gfs2_freeze_lock_shared - hold the freeze glock
+  * @sdp: the superblock
+  * @freeze_gh: pointer to the requested holder
+  * @caller_flags: any additional flags needed by the caller
+  */
+-int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
+-                   int caller_flags)
++int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
++                          int caller_flags)
+ {
+       int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
+       int error;
+@@ -157,8 +157,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+               gfs2_holder_mark_uninitialized(&freeze_gh);
+               if (sdp->sd_freeze_gl &&
+                   !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+-                      ret = gfs2_freeze_lock(sdp, &freeze_gh,
+-                                     log_write_allowed ? 0 : LM_FLAG_TRY);
++                      ret = gfs2_freeze_lock_shared(sdp, &freeze_gh,
++                                      log_write_allowed ? 0 : LM_FLAG_TRY);
+                       if (ret == GLR_TRYFAILED)
+                               ret = 0;
+               }
+diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
+index 78ec190f4155b..3291e33e81e97 100644
+--- a/fs/gfs2/util.h
++++ b/fs/gfs2/util.h
+@@ -149,8 +149,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
+ extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+                              bool verbose);
+-extern int gfs2_freeze_lock(struct gfs2_sbd *sdp,
+-                          struct gfs2_holder *freeze_gh, int caller_flags);
++extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp,
++                                 struct gfs2_holder *freeze_gh,
++                                 int caller_flags);
+ extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
+ #define gfs2_io_error(sdp) \
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-rename-remaining-transaction-glock-references.patch b/queue-6.1/gfs2-rename-remaining-transaction-glock-references.patch
new file mode 100644 (file)
index 0000000..c0a146c
--- /dev/null
@@ -0,0 +1,113 @@
+From 657615088f34258627464dd8e409e53b64bc337a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 14:19:06 +0100
+Subject: gfs2: Rename remaining "transaction" glock references
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit af1abe11466f1a6cb6ba22ee0d815c21c3559947 ]
+
+The transaction glock was repurposed to serve as the new freeze glock
+years ago.  Don't refer to it as the transaction glock anymore.
+
+Also, to be more precise, call it the "freeze glock" instead of the
+"freeze lock".  Ditto for the journal glock.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Stable-dep-of: f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/glock.c      | 4 ++--
+ fs/gfs2/ops_fstype.c | 2 +-
+ fs/gfs2/recovery.c   | 8 ++++----
+ fs/gfs2/super.c      | 2 +-
+ fs/gfs2/util.c       | 2 +-
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
+index 95353982e643a..be05c43b89a59 100644
+--- a/fs/gfs2/glock.c
++++ b/fs/gfs2/glock.c
+@@ -146,8 +146,8 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
+  *
+  * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
+  * when we're withdrawn. For example, to maintain metadata integrity, we should
+- * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
+- * iopen or the transaction glocks may be safely used because none of their
++ * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like
++ * the iopen or freeze glock may be safely used because none of their
+  * metadata goes through the journal. So in general, we should disallow all
+  * glocks that are journaled, and allow all the others. One exception is:
+  * we need to allow our active journal to be promoted and demoted so others
+diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
+index c0cf1d2d0ef5b..c7f6208ad98c0 100644
+--- a/fs/gfs2/ops_fstype.c
++++ b/fs/gfs2/ops_fstype.c
+@@ -434,7 +434,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
+       error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops,
+                              CREATE, &sdp->sd_freeze_gl);
+       if (error) {
+-              fs_err(sdp, "can't create transaction glock: %d\n", error);
++              fs_err(sdp, "can't create freeze glock: %d\n", error);
+               goto fail_rename;
+       }
+diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
+index 2bb085a72e8ee..d8e522f389aa7 100644
+--- a/fs/gfs2/recovery.c
++++ b/fs/gfs2/recovery.c
+@@ -420,10 +420,10 @@ void gfs2_recover_func(struct work_struct *work)
+       if (sdp->sd_args.ar_spectator)
+               goto fail;
+       if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
+-              fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
++              fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n",
+                       jd->jd_jid);
+               jlocked = 1;
+-              /* Acquire the journal lock so we can do recovery */
++              /* Acquire the journal glock so we can do recovery */
+               error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
+                                         LM_ST_EXCLUSIVE,
+@@ -465,10 +465,10 @@ void gfs2_recover_func(struct work_struct *work)
+               ktime_ms_delta(t_jhd, t_jlck));
+       if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+-              fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
++              fs_info(sdp, "jid=%u: Acquiring the freeze glock...\n",
+                       jd->jd_jid);
+-              /* Acquire a shared hold on the freeze lock */
++              /* Acquire a shared hold on the freeze glock */
+               error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
+               if (error)
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index 6107cd680176c..c87fafbe710a6 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -463,7 +463,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
+  * @flags: The type of dirty
+  *
+  * Unfortunately it can be called under any combination of inode
+- * glock and transaction lock, so we have to check carefully.
++ * glock and freeze glock, so we have to check carefully.
+  *
+  * At the moment this deals only with atime - it should be possible
+  * to expand that role in future, once a review of the locking has
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index 48c69aa60cd17..86d1415932a43 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -107,7 +107,7 @@ int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
+       error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
+                                  freeze_gh);
+       if (error && error != GLR_TRYFAILED)
+-              fs_err(sdp, "can't lock the freeze lock: %d\n", error);
++              fs_err(sdp, "can't lock the freeze glock: %d\n", error);
+       return error;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-rename-sdf_-fs_frozen-freeze_initiator.patch b/queue-6.1/gfs2-rename-sdf_-fs_frozen-freeze_initiator.patch
new file mode 100644 (file)
index 0000000..313ecc0
--- /dev/null
@@ -0,0 +1,98 @@
+From 4104c2798fdb3f62d544eb48aa091c9f9b5e6706 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 23:09:38 +0100
+Subject: gfs2: Rename SDF_{FS_FROZEN => FREEZE_INITIATOR}
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit cad1e15804a83afd9a5c1d95a428d60d1f9c0340 ]
+
+Rename the SDF_FS_FROZEN flag to SDF_FREEZE_INITIATOR to indicate more
+clearly that the node that has this flag set is the initiator of the
+freeze.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com
+Stable-dep-of: f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/incore.h | 2 +-
+ fs/gfs2/super.c  | 8 ++++----
+ fs/gfs2/sys.c    | 2 +-
+ fs/gfs2/util.c   | 2 +-
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
+index d09d9892cd055..113aeb5877027 100644
+--- a/fs/gfs2/incore.h
++++ b/fs/gfs2/incore.h
+@@ -600,7 +600,7 @@ enum {
+       SDF_RORECOVERY          = 7, /* read only recovery */
+       SDF_SKIP_DLM_UNLOCK     = 8,
+       SDF_FORCE_AIL_FLUSH     = 9,
+-      SDF_FS_FROZEN           = 10,
++      SDF_FREEZE_INITIATOR    = 10,
+       SDF_WITHDRAWING         = 11, /* Will withdraw eventually */
+       SDF_WITHDRAW_IN_PROG    = 12, /* Withdraw is in progress */
+       SDF_REMOTE_WITHDRAW     = 13, /* Performing remote recovery */
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index cb05332e473bd..cdfbfda046945 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -676,8 +676,8 @@ void gfs2_freeze_func(struct work_struct *work)
+               gfs2_freeze_unlock(&freeze_gh);
+       }
+       deactivate_super(sb);
+-      clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
+-      wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
++      clear_bit_unlock(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
++      wake_up_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR);
+       return;
+ }
+@@ -720,7 +720,7 @@ static int gfs2_freeze_super(struct super_block *sb)
+               fs_err(sdp, "retrying...\n");
+               msleep(1000);
+       }
+-      set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
++      set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+ out:
+       mutex_unlock(&sdp->sd_freeze_mutex);
+       return error;
+@@ -745,7 +745,7 @@ static int gfs2_thaw_super(struct super_block *sb)
+       gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+       mutex_unlock(&sdp->sd_freeze_mutex);
+-      return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
++      return wait_on_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR, TASK_INTERRUPTIBLE);
+ }
+ /**
+diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
+index d87ea98cf5350..e1fa76d4a7c22 100644
+--- a/fs/gfs2/sys.c
++++ b/fs/gfs2/sys.c
+@@ -110,7 +110,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
+                    test_bit(SDF_RORECOVERY, &f),
+                    test_bit(SDF_SKIP_DLM_UNLOCK, &f),
+                    test_bit(SDF_FORCE_AIL_FLUSH, &f),
+-                   test_bit(SDF_FS_FROZEN, &f),
++                   test_bit(SDF_FREEZE_INITIATOR, &f),
+                    test_bit(SDF_WITHDRAWING, &f),
+                    test_bit(SDF_WITHDRAW_IN_PROG, &f),
+                    test_bit(SDF_REMOTE_WITHDRAW, &f),
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index 1195ea08f9ca4..ebf87fb7b3bf5 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -187,7 +187,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+       }
+       sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
+       gfs2_glock_dq(&sdp->sd_jinode_gh);
+-      if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
++      if (test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) {
+               /* Make sure gfs2_thaw_super works if partially-frozen */
+               flush_work(&sdp->sd_freeze_work);
+               atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-rename-the-freeze-thaw-_super-callbacks.patch b/queue-6.1/gfs2-rename-the-freeze-thaw-_super-callbacks.patch
new file mode 100644 (file)
index 0000000..c26f8a7
--- /dev/null
@@ -0,0 +1,82 @@
+From 4cc607531871ebd2383d0f6b83e7eac16c53bbb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 16:40:15 +0100
+Subject: gfs2: Rename the {freeze,thaw}_super callbacks
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit 097cca525adf10f35c9dac037155564f1b1a688b ]
+
+Rename gfs2_freeze to gfs2_freeze_super and gfs2_unfreeze to
+gfs2_thaw_super to match the names of the corresponding super
+operations.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Stable-dep-of: f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/super.c | 12 ++++++------
+ fs/gfs2/util.c  |  2 +-
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index c87fafbe710a6..d7b3a982552cf 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -682,12 +682,12 @@ void gfs2_freeze_func(struct work_struct *work)
+ }
+ /**
+- * gfs2_freeze - prevent further writes to the filesystem
++ * gfs2_freeze_super - prevent further writes to the filesystem
+  * @sb: the VFS structure for the filesystem
+  *
+  */
+-static int gfs2_freeze(struct super_block *sb)
++static int gfs2_freeze_super(struct super_block *sb)
+ {
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+       int error;
+@@ -727,12 +727,12 @@ static int gfs2_freeze(struct super_block *sb)
+ }
+ /**
+- * gfs2_unfreeze - reallow writes to the filesystem
++ * gfs2_thaw_super - reallow writes to the filesystem
+  * @sb: the VFS structure for the filesystem
+  *
+  */
+-static int gfs2_unfreeze(struct super_block *sb)
++static int gfs2_thaw_super(struct super_block *sb)
+ {
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+@@ -1499,8 +1499,8 @@ const struct super_operations gfs2_super_ops = {
+       .evict_inode            = gfs2_evict_inode,
+       .put_super              = gfs2_put_super,
+       .sync_fs                = gfs2_sync_fs,
+-      .freeze_super           = gfs2_freeze,
+-      .thaw_super             = gfs2_unfreeze,
++      .freeze_super           = gfs2_freeze_super,
++      .thaw_super             = gfs2_thaw_super,
+       .statfs                 = gfs2_statfs,
+       .drop_inode             = gfs2_drop_inode,
+       .show_options           = gfs2_show_options,
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index 86d1415932a43..11cc59ac64fdc 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -188,7 +188,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+       sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
+       gfs2_glock_dq(&sdp->sd_jinode_gh);
+       if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
+-              /* Make sure gfs2_unfreeze works if partially-frozen */
++              /* Make sure gfs2_thaw_super works if partially-frozen */
+               flush_work(&sdp->sd_freeze_work);
+               atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+               thaw_super(sdp->sd_vfs);
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-rework-freeze-thaw-logic.patch b/queue-6.1/gfs2-rework-freeze-thaw-logic.patch
new file mode 100644 (file)
index 0000000..f3ee94a
--- /dev/null
@@ -0,0 +1,594 @@
+From 96806a5bbe1494fcf93c8f7a5ec4754659b80c40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 23:34:50 +0100
+Subject: gfs2: Rework freeze / thaw logic
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit b77b4a4815a9651d1d6e07b8e6548eee9531a5eb ]
+
+So far, at mount time, gfs2 would take the freeze glock in shared mode
+and then immediately drop it again, turning it into a cached glock that
+can be reclaimed at any time.  To freeze the filesystem cluster-wide,
+the node initiating the freeze would take the freeze glock in exclusive
+mode, which would cause the freeze glock's freeze_go_sync() callback to
+run on each node.  There, gfs2 would freeze the filesystem and schedule
+gfs2_freeze_func() to run.  gfs2_freeze_func() would re-acquire the
+freeze glock in shared mode, thaw the filesystem, and drop the freeze
+glock again.  The initiating node would keep the freeze glock held in
+exclusive mode.  To thaw the filesystem, the initiating node would drop
+the freeze glock again, which would allow gfs2_freeze_func() to resume
+on all nodes, leaving the filesystem in the thawed state.
+
+It turns out that in freeze_go_sync(), we cannot reliably and safely
+freeze the filesystem.  This is primarily because the final unmount of a
+filesystem takes a write lock on the s_umount rw semaphore before
+calling into gfs2_put_super(), and freeze_go_sync() needs to call
+freeze_super() which also takes a write lock on the same semaphore,
+causing a deadlock.  We could work around this by trying to take an
+active reference on the super block first, which would prevent unmount
+from running at the same time.  But that can fail, and freeze_go_sync()
+isn't actually allowed to fail.
+
+To get around this, this patch changes the freeze glock locking scheme
+as follows:
+
+At mount time, each node takes the freeze glock in shared mode.  To
+freeze a filesystem, the initiating node first freezes the filesystem
+locally and then drops and re-acquires the freeze glock in exclusive
+mode.  All other nodes notice that there is contention on the freeze
+glock in their go_callback callbacks, and they schedule
+gfs2_freeze_func() to run.  There, they freeze the filesystem locally
+and drop and re-acquire the freeze glock before re-thawing the
+filesystem.  This is happening outside of the glock state engine, so
+there, we are allowed to fail.
+
+From a cluster point of view, taking and immediately dropping a glock is
+indistinguishable from taking the glock and only dropping it upon
+contention, so this new scheme is compatible with the old one.
+
+Thanks to Li Dong <lidong@vivo.com> for reporting a locking bug in
+gfs2_freeze_func() in a previous version of this commit.
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Stable-dep-of: f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/glops.c      |  52 +++++--------
+ fs/gfs2/log.c        |   2 -
+ fs/gfs2/ops_fstype.c |   5 +-
+ fs/gfs2/recovery.c   |  24 +++---
+ fs/gfs2/super.c      | 172 +++++++++++++++++++++++++++++++++----------
+ fs/gfs2/super.h      |   1 +
+ fs/gfs2/util.c       |  32 +++-----
+ 7 files changed, 178 insertions(+), 110 deletions(-)
+
+diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
+index 91a542b9d81e8..089b3d811e43d 100644
+--- a/fs/gfs2/glops.c
++++ b/fs/gfs2/glops.c
+@@ -555,47 +555,33 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ }
+ /**
+- * freeze_go_sync - promote/demote the freeze glock
++ * freeze_go_callback - A cluster node is requesting a freeze
+  * @gl: the glock
++ * @remote: true if this came from a different cluster node
+  */
+-static int freeze_go_sync(struct gfs2_glock *gl)
++static void freeze_go_callback(struct gfs2_glock *gl, bool remote)
+ {
+-      int error = 0;
+       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
++      struct super_block *sb = sdp->sd_vfs;
++
++      if (!remote ||
++          gl->gl_state != LM_ST_SHARED ||
++          gl->gl_demote_state != LM_ST_UNLOCKED)
++              return;
+       /*
+-       * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
+-       * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
+-       * all the nodes should have the freeze glock in SH mode and they all
+-       * call do_xmote: One for EX and the others for UN. They ALL must
+-       * freeze locally, and they ALL must queue freeze work. The freeze_work
+-       * calls freeze_func, which tries to reacquire the freeze glock in SH,
+-       * effectively waiting for the thaw on the node who holds it in EX.
+-       * Once thawed, the work func acquires the freeze glock in
+-       * SH and everybody goes back to thawed.
++       * Try to get an active super block reference to prevent racing with
++       * unmount (see trylock_super()).  But note that unmount isn't the only
++       * place where a write lock on s_umount is taken, and we can fail here
++       * because of things like remount as well.
+        */
+-      if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
+-          !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
+-              atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
+-              error = freeze_super(sdp->sd_vfs);
+-              if (error) {
+-                      fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+-                              error);
+-                      if (gfs2_withdrawn(sdp)) {
+-                              atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+-                              return 0;
+-                      }
+-                      gfs2_assert_withdraw(sdp, 0);
+-              }
+-              queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
+-              if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+-                      gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
+-                                     GFS2_LFC_FREEZE_GO_SYNC);
+-              else /* read-only mounts */
+-                      atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
++      if (down_read_trylock(&sb->s_umount)) {
++              atomic_inc(&sb->s_active);
++              up_read(&sb->s_umount);
++              if (!queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work))
++                      deactivate_super(sb);
+       }
+-      return 0;
+ }
+ /**
+@@ -760,9 +746,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
+ };
+ const struct gfs2_glock_operations gfs2_freeze_glops = {
+-      .go_sync = freeze_go_sync,
+       .go_xmote_bh = freeze_go_xmote_bh,
+       .go_demote_ok = freeze_go_demote_ok,
++      .go_callback = freeze_go_callback,
+       .go_type = LM_TYPE_NONDISK,
+       .go_flags = GLOF_NONDISK,
+ };
+diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
+index e021d5f50c231..8fd8bb8604869 100644
+--- a/fs/gfs2/log.c
++++ b/fs/gfs2/log.c
+@@ -1136,8 +1136,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
+               if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
+                            GFS2_LOG_HEAD_FLUSH_FREEZE))
+                       gfs2_log_shutdown(sdp);
+-              if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
+-                      atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+       }
+ out_end:
+diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
+index e427fb7fbe998..8299113858ce4 100644
+--- a/fs/gfs2/ops_fstype.c
++++ b/fs/gfs2/ops_fstype.c
+@@ -1143,7 +1143,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
+       int silent = fc->sb_flags & SB_SILENT;
+       struct gfs2_sbd *sdp;
+       struct gfs2_holder mount_gh;
+-      struct gfs2_holder freeze_gh;
+       int error;
+       sdp = init_sbd(sb);
+@@ -1266,15 +1265,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
+               }
+       }
+-      error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0);
++      error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0);
+       if (error)
+               goto fail_per_node;
+       if (!sb_rdonly(sb))
+               error = gfs2_make_fs_rw(sdp);
+-      gfs2_freeze_unlock(&freeze_gh);
+       if (error) {
++              gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+               if (sdp->sd_quotad_process)
+                       kthread_stop(sdp->sd_quotad_process);
+               sdp->sd_quotad_process = NULL;
+diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
+index 61ef07da40b22..afeda936e2beb 100644
+--- a/fs/gfs2/recovery.c
++++ b/fs/gfs2/recovery.c
+@@ -404,7 +404,7 @@ void gfs2_recover_func(struct work_struct *work)
+       struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+       struct gfs2_log_header_host head;
+-      struct gfs2_holder j_gh, ji_gh, thaw_gh;
++      struct gfs2_holder j_gh, ji_gh;
+       ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
+       int ro = 0;
+       unsigned int pass;
+@@ -465,14 +465,14 @@ void gfs2_recover_func(struct work_struct *work)
+               ktime_ms_delta(t_jhd, t_jlck));
+       if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+-              fs_info(sdp, "jid=%u: Acquiring the freeze glock...\n",
+-                      jd->jd_jid);
+-
+-              /* Acquire a shared hold on the freeze glock */
++              mutex_lock(&sdp->sd_freeze_mutex);
+-              error = gfs2_freeze_lock_shared(sdp, &thaw_gh, LM_FLAG_PRIORITY);
+-              if (error)
++              if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
++                      mutex_unlock(&sdp->sd_freeze_mutex);
++                      fs_warn(sdp, "jid=%u: Can't replay: filesystem "
++                              "is frozen\n", jd->jd_jid);
+                       goto fail_gunlock_ji;
++              }
+               if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
+                       ro = 1;
+@@ -496,7 +496,7 @@ void gfs2_recover_func(struct work_struct *work)
+                       fs_warn(sdp, "jid=%u: Can't replay: read-only block "
+                               "device\n", jd->jd_jid);
+                       error = -EROFS;
+-                      goto fail_gunlock_thaw;
++                      goto fail_gunlock_nofreeze;
+               }
+               t_tlck = ktime_get();
+@@ -514,7 +514,7 @@ void gfs2_recover_func(struct work_struct *work)
+                       lops_after_scan(jd, error, pass);
+                       if (error) {
+                               up_read(&sdp->sd_log_flush_lock);
+-                              goto fail_gunlock_thaw;
++                              goto fail_gunlock_nofreeze;
+                       }
+               }
+@@ -522,7 +522,7 @@ void gfs2_recover_func(struct work_struct *work)
+               clean_journal(jd, &head);
+               up_read(&sdp->sd_log_flush_lock);
+-              gfs2_freeze_unlock(&thaw_gh);
++              mutex_unlock(&sdp->sd_freeze_mutex);
+               t_rep = ktime_get();
+               fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
+                       "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
+@@ -543,8 +543,8 @@ void gfs2_recover_func(struct work_struct *work)
+       fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
+       goto done;
+-fail_gunlock_thaw:
+-      gfs2_freeze_unlock(&thaw_gh);
++fail_gunlock_nofreeze:
++      mutex_unlock(&sdp->sd_freeze_mutex);
+ fail_gunlock_ji:
+       if (jlocked) {
+               gfs2_glock_dq_uninit(&ji_gh);
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index cdfbfda046945..1a888b9c3d110 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -332,7 +332,12 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
+       struct lfcc *lfcc;
+       LIST_HEAD(list);
+       struct gfs2_log_header_host lh;
+-      int error;
++      int error, error2;
++
++      /*
++       * Grab all the journal glocks in SH mode.  We are *probably* doing
++       * that to prevent recovery.
++       */
+       list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+               lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
+@@ -349,11 +354,13 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
+               list_add(&lfcc->list, &list);
+       }
++      gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++
+       error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
+                                  LM_FLAG_NOEXP | GL_NOPID,
+                                  &sdp->sd_freeze_gh);
+       if (error)
+-              goto out;
++              goto relock_shared;
+       list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+               error = gfs2_jdesc_check(jd);
+@@ -368,8 +375,14 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
+               }
+       }
+-      if (error)
+-              gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++      if (!error)
++              goto out;  /* success */
++
++      gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++
++relock_shared:
++      error2 = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0);
++      gfs2_assert_withdraw(sdp, !error2);
+ out:
+       while (!list_empty(&list)) {
+@@ -600,6 +613,8 @@ static void gfs2_put_super(struct super_block *sb)
+       /*  Release stuff  */
++      gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++
+       iput(sdp->sd_jindex);
+       iput(sdp->sd_statfs_inode);
+       iput(sdp->sd_rindex);
+@@ -654,31 +669,82 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
+       return sdp->sd_log_error;
+ }
+-void gfs2_freeze_func(struct work_struct *work)
++static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
+ {
+-      int error;
+-      struct gfs2_holder freeze_gh;
+-      struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
+       struct super_block *sb = sdp->sd_vfs;
++      int error;
+-      atomic_inc(&sb->s_active);
+-      error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0);
+-      if (error) {
+-              gfs2_assert_withdraw(sdp, 0);
+-      } else {
+-              atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+-              error = thaw_super(sb);
+-              if (error) {
+-                      fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
+-                              error);
+-                      gfs2_assert_withdraw(sdp, 0);
++      atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
++
++      error = freeze_super(sb);
++      if (error)
++              goto fail;
++
++      if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
++              gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
++                             GFS2_LFC_FREEZE_GO_SYNC);
++              if (gfs2_withdrawn(sdp)) {
++                      thaw_super(sb);
++                      error = -EIO;
++                      goto fail;
+               }
+-              gfs2_freeze_unlock(&freeze_gh);
+       }
++      return 0;
++
++fail:
++      atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
++      return error;
++}
++
++static int gfs2_do_thaw(struct gfs2_sbd *sdp)
++{
++      struct super_block *sb = sdp->sd_vfs;
++      int error;
++
++      error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0);
++      if (error)
++              goto fail;
++      error = thaw_super(sb);
++      if (!error)
++              return 0;
++
++fail:
++      fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error);
++      gfs2_assert_withdraw(sdp, 0);
++      return error;
++}
++
++void gfs2_freeze_func(struct work_struct *work)
++{
++      struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
++      struct super_block *sb = sdp->sd_vfs;
++      int error;
++
++      mutex_lock(&sdp->sd_freeze_mutex);
++      error = -EBUSY;
++      if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
++              goto freeze_failed;
++
++      error = gfs2_freeze_locally(sdp);
++      if (error)
++              goto freeze_failed;
++
++      gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++      atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
++
++      error = gfs2_do_thaw(sdp);
++      if (error)
++              goto out;
++
++      atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
++      goto out;
++
++freeze_failed:
++      fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error);
++
++out:
++      mutex_unlock(&sdp->sd_freeze_mutex);
+       deactivate_super(sb);
+-      clear_bit_unlock(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+-      wake_up_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR);
+-      return;
+ }
+ /**
+@@ -692,21 +758,27 @@ static int gfs2_freeze_super(struct super_block *sb)
+       struct gfs2_sbd *sdp = sb->s_fs_info;
+       int error;
+-      mutex_lock(&sdp->sd_freeze_mutex);
+-      if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
+-              error = -EBUSY;
++      if (!mutex_trylock(&sdp->sd_freeze_mutex))
++              return -EBUSY;
++      error = -EBUSY;
++      if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+               goto out;
+-      }
+       for (;;) {
+-              if (gfs2_withdrawn(sdp)) {
+-                      error = -EINVAL;
++              error = gfs2_freeze_locally(sdp);
++              if (error) {
++                      fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
++                              error);
+                       goto out;
+               }
+               error = gfs2_lock_fs_check_clean(sdp);
+               if (!error)
+-                      break;
++                      break;  /* success */
++
++              error = gfs2_do_thaw(sdp);
++              if (error)
++                      goto out;
+               if (error == -EBUSY)
+                       fs_err(sdp, "waiting for recovery before freeze\n");
+@@ -720,8 +792,12 @@ static int gfs2_freeze_super(struct super_block *sb)
+               fs_err(sdp, "retrying...\n");
+               msleep(1000);
+       }
+-      set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
++
+ out:
++      if (!error) {
++              set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
++              atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
++      }
+       mutex_unlock(&sdp->sd_freeze_mutex);
+       return error;
+ }
+@@ -735,17 +811,39 @@ static int gfs2_freeze_super(struct super_block *sb)
+ static int gfs2_thaw_super(struct super_block *sb)
+ {
+       struct gfs2_sbd *sdp = sb->s_fs_info;
++      int error;
+-      mutex_lock(&sdp->sd_freeze_mutex);
+-      if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+-          !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
+-              mutex_unlock(&sdp->sd_freeze_mutex);
+-              return -EINVAL;
++      if (!mutex_trylock(&sdp->sd_freeze_mutex))
++              return -EBUSY;
++      error = -EINVAL;
++      if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags))
++              goto out;
++
++      gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++
++      error = gfs2_do_thaw(sdp);
++
++      if (!error) {
++              clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
++              atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+       }
++out:
++      mutex_unlock(&sdp->sd_freeze_mutex);
++      return error;
++}
++
++void gfs2_thaw_freeze_initiator(struct super_block *sb)
++{
++      struct gfs2_sbd *sdp = sb->s_fs_info;
++
++      mutex_lock(&sdp->sd_freeze_mutex);
++      if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags))
++              goto out;
+       gfs2_freeze_unlock(&sdp->sd_freeze_gh);
++
++out:
+       mutex_unlock(&sdp->sd_freeze_mutex);
+-      return wait_on_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR, TASK_INTERRUPTIBLE);
+ }
+ /**
+diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
+index 58d13fd77aed5..bba58629bc458 100644
+--- a/fs/gfs2/super.h
++++ b/fs/gfs2/super.h
+@@ -46,6 +46,7 @@ extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
+ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
+ extern int gfs2_statfs_sync(struct super_block *sb, int type);
+ extern void gfs2_freeze_func(struct work_struct *work);
++extern void gfs2_thaw_freeze_initiator(struct super_block *sb);
+ extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
+ extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index ebf87fb7b3bf5..d4cc8667a5b72 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -124,7 +124,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+       struct gfs2_inode *ip;
+       struct gfs2_glock *i_gl;
+       u64 no_formal_ino;
+-      int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+       int ret = 0;
+       int tries;
+@@ -152,24 +151,18 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+        */
+       clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+       if (!sb_rdonly(sdp->sd_vfs)) {
+-              struct gfs2_holder freeze_gh;
+-
+-              gfs2_holder_mark_uninitialized(&freeze_gh);
+-              if (sdp->sd_freeze_gl &&
+-                  !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+-                      ret = gfs2_freeze_lock_shared(sdp, &freeze_gh,
+-                                      log_write_allowed ? 0 : LM_FLAG_TRY);
+-                      if (ret == GLR_TRYFAILED)
+-                              ret = 0;
+-              }
+-              if (!ret)
+-                      gfs2_make_fs_ro(sdp);
++              bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
++
++              gfs2_make_fs_ro(sdp);
++
++              if (locked)
++                      mutex_unlock(&sdp->sd_freeze_mutex);
++
+               /*
+                * Dequeue any pending non-system glock holders that can no
+                * longer be granted because the file system is withdrawn.
+                */
+               gfs2_gl_dq_holders(sdp);
+-              gfs2_freeze_unlock(&freeze_gh);
+       }
+       if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
+@@ -187,15 +180,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+       }
+       sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
+       gfs2_glock_dq(&sdp->sd_jinode_gh);
+-      if (test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) {
+-              /* Make sure gfs2_thaw_super works if partially-frozen */
+-              flush_work(&sdp->sd_freeze_work);
+-              atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+-              thaw_super(sdp->sd_vfs);
+-      } else {
+-              wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
+-                          TASK_UNINTERRUPTIBLE);
+-      }
++      gfs2_thaw_freeze_initiator(sdp->sd_vfs);
++      wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
+       /*
+        * holder_uninit to force glock_put, to force dlm to let go
+-- 
+2.43.0
+
diff --git a/queue-6.1/gfs2-stop-using-gfs2_make_fs_ro-for-withdraw.patch b/queue-6.1/gfs2-stop-using-gfs2_make_fs_ro-for-withdraw.patch
new file mode 100644 (file)
index 0000000..5c21dfa
--- /dev/null
@@ -0,0 +1,132 @@
+From d5c37df10b8a646f314ae4b7293b6f111e8e5f75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Aug 2023 16:39:20 +0200
+Subject: gfs2: Stop using gfs2_make_fs_ro for withdraw
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit f66af88e33212b57ea86da2c5d66c0d9d5c46344 ]
+
+[   81.372851][ T5532] CPU: 1 PID: 5532 Comm: syz-executor.0 Not tainted 6.2.0-rc1-syzkaller-dirty #0
+[   81.382080][ T5532] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023
+[   81.392343][ T5532] Call Trace:
+[   81.395654][ T5532]  <TASK>
+[   81.398603][ T5532]  dump_stack_lvl+0x1b1/0x290
+[   81.418421][ T5532]  gfs2_assert_warn_i+0x19a/0x2e0
+[   81.423480][ T5532]  gfs2_quota_cleanup+0x4c6/0x6b0
+[   81.428611][ T5532]  gfs2_make_fs_ro+0x517/0x610
+[   81.457802][ T5532]  gfs2_withdraw+0x609/0x1540
+[   81.481452][ T5532]  gfs2_inode_refresh+0xb2d/0xf60
+[   81.506658][ T5532]  gfs2_instantiate+0x15e/0x220
+[   81.511504][ T5532]  gfs2_glock_wait+0x1d9/0x2a0
+[   81.516352][ T5532]  do_sync+0x485/0xc80
+[   81.554943][ T5532]  gfs2_quota_sync+0x3da/0x8b0
+[   81.559738][ T5532]  gfs2_sync_fs+0x49/0xb0
+[   81.564063][ T5532]  sync_filesystem+0xe8/0x220
+[   81.568740][ T5532]  generic_shutdown_super+0x6b/0x310
+[   81.574112][ T5532]  kill_block_super+0x79/0xd0
+[   81.578779][ T5532]  deactivate_locked_super+0xa7/0xf0
+[   81.584064][ T5532]  cleanup_mnt+0x494/0x520
+[   81.593753][ T5532]  task_work_run+0x243/0x300
+[   81.608837][ T5532]  exit_to_user_mode_loop+0x124/0x150
+[   81.614232][ T5532]  exit_to_user_mode_prepare+0xb2/0x140
+[   81.619820][ T5532]  syscall_exit_to_user_mode+0x26/0x60
+[   81.625287][ T5532]  do_syscall_64+0x49/0xb0
+[   81.629710][ T5532]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+In this backtrace, gfs2_quota_sync() takes quota data references and
+then calls do_sync().  Function do_sync() encounters filesystem
+corruption and withdraws the filesystem, which (among other things) calls
+gfs2_quota_cleanup().  Function gfs2_quota_cleanup() wrongly assumes
+that nobody is holding any quota data references anymore, and destroys
+all quota data objects.  When gfs2_quota_sync() then resumes and
+dereferences the quota data objects it is holding, those objects are no
+longer there.
+
+Function gfs2_quota_cleanup() deals with resource deallocation and can
+easily be delayed until gfs2_put_super() in the case of a filesystem
+withdraw.  In fact, most of the other work gfs2_make_fs_ro() does is
+unnecessary during a withdraw as well, so change signal_our_withdraw()
+to skip gfs2_make_fs_ro() and perform the necessary steps directly
+instead.
+
+Thanks to Edward Adam Davis <eadavis@sina.com> for the initial patches.
+
+Link: https://lore.kernel.org/all/0000000000002b5e2405f14e860f@google.com
+Reported-by: syzbot+3f6a670108ce43356017@syzkaller.appspotmail.com
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/super.c |  9 ++-------
+ fs/gfs2/util.c  | 19 ++++++++++++++++++-
+ 2 files changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index 1a888b9c3d110..f9b47df485d17 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -563,15 +563,8 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+                                  gfs2_log_is_empty(sdp),
+                                  HZ * 5);
+               gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
+-      } else {
+-              wait_event_timeout(sdp->sd_log_waitq,
+-                                 gfs2_log_is_empty(sdp),
+-                                 HZ * 5);
+       }
+       gfs2_quota_cleanup(sdp);
+-
+-      if (!log_write_allowed)
+-              sdp->sd_vfs->s_flags |= SB_RDONLY;
+ }
+ /**
+@@ -607,6 +600,8 @@ static void gfs2_put_super(struct super_block *sb)
+       } else {
+               gfs2_quota_cleanup(sdp);
+       }
++      if (gfs2_withdrawn(sdp))
++              gfs2_quota_cleanup(sdp);
+       WARN_ON(gfs2_withdrawing(sdp));
+       /*  At this point, we're through modifying the disk  */
+diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
+index d4cc8667a5b72..30b8821c54ad4 100644
+--- a/fs/gfs2/util.c
++++ b/fs/gfs2/util.c
+@@ -9,6 +9,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/completion.h>
+ #include <linux/buffer_head.h>
++#include <linux/kthread.h>
+ #include <linux/crc32.h>
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/delay.h>
+@@ -153,7 +154,23 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
+       if (!sb_rdonly(sdp->sd_vfs)) {
+               bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
+-              gfs2_make_fs_ro(sdp);
++              if (sdp->sd_quotad_process &&
++                  current != sdp->sd_quotad_process) {
++                      kthread_stop(sdp->sd_quotad_process);
++                      sdp->sd_quotad_process = NULL;
++              }
++
++              if (sdp->sd_logd_process &&
++                  current != sdp->sd_logd_process) {
++                      kthread_stop(sdp->sd_logd_process);
++                      sdp->sd_logd_process = NULL;
++              }
++
++              wait_event_timeout(sdp->sd_log_waitq,
++                                 gfs2_log_is_empty(sdp),
++                                 HZ * 5);
++
++              sdp->sd_vfs->s_flags |= SB_RDONLY;
+               if (locked)
+                       mutex_unlock(&sdp->sd_freeze_mutex);
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-correct-the-launchtime-offset.patch b/queue-6.1/igc-correct-the-launchtime-offset.patch
new file mode 100644 (file)
index 0000000..0c2a2c8
--- /dev/null
@@ -0,0 +1,168 @@
+From 5cf4817309de7a399af32372e857714e8db16469 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Sep 2022 10:49:40 +0800
+Subject: igc: Correct the launchtime offset
+
+From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+
+[ Upstream commit 790835fcc0cb9992349ae3c9010dbc7321aaa24d ]
+
+The launchtime offset should be corrected according to sections 7.5.2.6
+Transmit Scheduling Latency of the Intel Ethernet I225/I226 Software
+User Manual.
+
+Software can compensate the latency between the transmission scheduling
+and the time that packet is transmitted to the network by setting this
+GTxOffset register. Without setting this register, there may be a
+significant delay between the packet scheduling and the network point.
+
+This patch helps to reduce the latency for each of the link speed.
+
+Before:
+
+10Mbps   : 11000 - 13800 nanosecond
+100Mbps  : 1300 - 1700 nanosecond
+1000Mbps : 190 - 600 nanosecond
+2500Mbps : 1400 - 1700 nanosecond
+
+After:
+
+10Mbps   : less than 750 nanosecond
+100Mbps  : less than 192 nanosecond
+1000Mbps : less than 128 nanosecond
+2500Mbps : less than 128 nanosecond
+
+Test Setup:
+
+Talker : Use l2_tai.c to generate the launchtime into packet payload.
+Listener: Use timedump.c to compute the delta between packet arrival and
+LaunchTime packet payload.
+
+Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Acked-by: Sasha Neftin <sasha.neftin@intel.com>
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: e037a26ead18 ("igc: Fix packet still tx after gate close by reducing i226 MAC retry buffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_defines.h |  9 ++++++
+ drivers/net/ethernet/intel/igc/igc_main.c    |  7 +++++
+ drivers/net/ethernet/intel/igc/igc_regs.h    |  1 +
+ drivers/net/ethernet/intel/igc/igc_tsn.c     | 30 ++++++++++++++++++++
+ drivers/net/ethernet/intel/igc/igc_tsn.h     |  1 +
+ 5 files changed, 48 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
+index efdabcbd66ddd..63fa7608861b2 100644
+--- a/drivers/net/ethernet/intel/igc/igc_defines.h
++++ b/drivers/net/ethernet/intel/igc/igc_defines.h
+@@ -402,6 +402,15 @@
+ #define IGC_DTXMXPKTSZ_TSN    0x19 /* 1600 bytes of max TX DMA packet size */
+ #define IGC_DTXMXPKTSZ_DEFAULT        0x98 /* 9728-byte Jumbo frames */
++/* Transmit Scheduling Latency */
++/* Latency between transmission scheduling (LaunchTime) and the time
++ * the packet is transmitted to the network in nanosecond.
++ */
++#define IGC_TXOFFSET_SPEED_10 0x000034BC
++#define IGC_TXOFFSET_SPEED_100        0x00000578
++#define IGC_TXOFFSET_SPEED_1000       0x0000012C
++#define IGC_TXOFFSET_SPEED_2500       0x00000578
++
+ /* Time Sync Interrupt Causes */
+ #define IGC_TSICR_SYS_WRAP    BIT(0) /* SYSTIM Wrap around. */
+ #define IGC_TSICR_TXTS                BIT(1) /* Transmit Timestamp. */
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index e052f49cc08d7..39f8f28288aaa 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -5586,6 +5586,13 @@ static void igc_watchdog_task(struct work_struct *work)
+                               break;
+                       }
++                      /* Once the launch time has been set on the wire, there
++                       * is a delay before the link speed can be determined
++                       * based on link-up activity. Write into the register
++                       * as soon as we know the correct link speed.
++                       */
++                      igc_tsn_adjust_txtime_offset(adapter);
++
+                       if (adapter->link_speed != SPEED_1000)
+                               goto no_wait;
+diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
+index c0d8214148d1d..01c86d36856d2 100644
+--- a/drivers/net/ethernet/intel/igc/igc_regs.h
++++ b/drivers/net/ethernet/intel/igc/igc_regs.h
+@@ -224,6 +224,7 @@
+ /* Transmit Scheduling Registers */
+ #define IGC_TQAVCTRL          0x3570
+ #define IGC_TXQCTL(_n)                (0x3344 + 0x4 * (_n))
++#define IGC_GTXOFFSET         0x3310
+ #define IGC_BASET_L           0x3314
+ #define IGC_BASET_H           0x3318
+ #define IGC_QBVCYCLET         0x331C
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 31ea0781b65ec..83f02b00735d3 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -49,6 +49,35 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+       return new_flags;
+ }
++void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++      u16 txoffset;
++
++      if (!is_any_launchtime(adapter))
++              return;
++
++      switch (adapter->link_speed) {
++      case SPEED_10:
++              txoffset = IGC_TXOFFSET_SPEED_10;
++              break;
++      case SPEED_100:
++              txoffset = IGC_TXOFFSET_SPEED_100;
++              break;
++      case SPEED_1000:
++              txoffset = IGC_TXOFFSET_SPEED_1000;
++              break;
++      case SPEED_2500:
++              txoffset = IGC_TXOFFSET_SPEED_2500;
++              break;
++      default:
++              txoffset = 0;
++              break;
++      }
++
++      wr32(IGC_GTXOFFSET, txoffset);
++}
++
+ /* Returns the TSN specific registers to their default values after
+  * the adapter is reset.
+  */
+@@ -58,6 +87,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+       u32 tqavctrl;
+       int i;
++      wr32(IGC_GTXOFFSET, 0);
+       wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+       wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
+index 1512307f5a528..b53e6af560b73 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.h
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
+@@ -6,5 +6,6 @@
+ int igc_tsn_offload_apply(struct igc_adapter *adapter);
+ int igc_tsn_reset(struct igc_adapter *adapter);
++void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter);
+ #endif /* _IGC_BASE_H */
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch b/queue-6.1/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch
new file mode 100644 (file)
index 0000000..a3256c7
--- /dev/null
@@ -0,0 +1,149 @@
+From 460cb01d0cdd741674bd7cd41b8fc856ebe10de2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 6 Jul 2024 11:38:07 -0400
+Subject: igc: Fix packet still tx after gate close by reducing i226 MAC retry
+ buffer
+
+From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+
+[ Upstream commit e037a26ead187901f83cad9c503ccece5ff6817a ]
+
+Testing uncovered that even when the taprio gate is closed, some packets
+still transmit.
+
+According to i225/6 hardware errata [1], traffic might overflow the
+planned QBV window. This happens because MAC maintains an internal buffer,
+primarily for supporting half duplex retries. Therefore, even when the
+gate closes, residual MAC data in the buffer may still transmit.
+
+To mitigate this for i226, reduce the MAC's internal buffer from 192 bytes
+to the recommended 88 bytes by modifying the RETX_CTL register value.
+
+This follows guidelines from:
+[1] Ethernet Controller I225/I22 Spec Update Rev 2.1 Errata Item 9:
+    TSN: Packet Transmission Might Cross Qbv Window
+[2] I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control
+
+Note that the RETX_CTL register can't be used in TSN mode because half
+duplex feature cannot coexist with TSN.
+
+Test Steps:
+1.  Send taprio cmd to board A:
+    tc qdisc replace dev enp1s0 parent root handle 100 taprio \
+    num_tc 4 \
+    map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \
+    queues 1@0 1@1 1@2 1@3 \
+    base-time 0 \
+    sched-entry S 0x07 500000 \
+    sched-entry S 0x0f 500000 \
+    flags 0x2 \
+    txtime-delay 0
+
+    Note that for TC3, gate should open for 500us and close for another
+    500us.
+
+3.  Take tcpdump log on Board B.
+
+4.  Send udp packets via UDP tai app from Board A to Board B.
+
+5.  Analyze tcpdump log via wireshark log on Board B. Ensure that the
+    total time from the first to the last packet received during one cycle
+    for TC3 does not exceed 500us.
+
+Fixes: 43546211738e ("igc: Add new device ID's")
+Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_defines.h |  6 ++++
+ drivers/net/ethernet/intel/igc/igc_tsn.c     | 34 ++++++++++++++++++++
+ 2 files changed, 40 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
+index 63fa7608861b2..8187a658dcbd5 100644
+--- a/drivers/net/ethernet/intel/igc/igc_defines.h
++++ b/drivers/net/ethernet/intel/igc/igc_defines.h
+@@ -402,6 +402,12 @@
+ #define IGC_DTXMXPKTSZ_TSN    0x19 /* 1600 bytes of max TX DMA packet size */
+ #define IGC_DTXMXPKTSZ_DEFAULT        0x98 /* 9728-byte Jumbo frames */
++/* Retry Buffer Control */
++#define IGC_RETX_CTL                  0x041C
++#define IGC_RETX_CTL_WATERMARK_MASK   0xF
++#define IGC_RETX_CTL_QBVFULLTH_SHIFT  8 /* QBV Retry Buffer Full Threshold */
++#define IGC_RETX_CTL_QBVFULLEN        0x1000 /* Enable QBV Retry Buffer Full Threshold */
++
+ /* Transmit Scheduling Latency */
+ /* Latency between transmission scheduling (LaunchTime) and the time
+  * the packet is transmitted to the network in nanosecond.
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 83f02b00735d3..abdaaf7db4125 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -78,6 +78,15 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
+       wr32(IGC_GTXOFFSET, txoffset);
+ }
++static void igc_tsn_restore_retx_default(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++      u32 retxctl;
++
++      retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK;
++      wr32(IGC_RETX_CTL, retxctl);
++}
++
+ /* Returns the TSN specific registers to their default values after
+  * the adapter is reset.
+  */
+@@ -91,6 +100,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+       wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+       wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
++      if (igc_is_device_id_i226(hw))
++              igc_tsn_restore_retx_default(adapter);
++
+       tqavctrl = rd32(IGC_TQAVCTRL);
+       tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
+                     IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS);
+@@ -111,6 +123,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+       return 0;
+ }
++/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes
++ * to 88 Bytes by setting RETX_CTL register using the recommendation from:
++ * a) Ethernet Controller I225/I226 Specification Update Rev 2.1
++ *    Item 9: TSN: Packet Transmission Might Cross the Qbv Window
++ * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control
++ */
++static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter)
++{
++      struct igc_hw *hw = &adapter->hw;
++      u32 retxctl, watermark;
++
++      retxctl = rd32(IGC_RETX_CTL);
++      watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK;
++      /* Set QBVFULLTH value using watermark and set QBVFULLEN */
++      retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) |
++                 IGC_RETX_CTL_QBVFULLEN;
++      wr32(IGC_RETX_CTL, retxctl);
++}
++
+ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+ {
+       struct igc_hw *hw = &adapter->hw;
+@@ -124,6 +155,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+       wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
+       wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
++      if (igc_is_device_id_i226(hw))
++              igc_tsn_set_retx_qbvfullthreshold(adapter);
++
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+               u32 txqctl = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/input-bcm5974-check-endpoint-type-before-starting-tr.patch b/queue-6.1/input-bcm5974-check-endpoint-type-before-starting-tr.patch
new file mode 100644 (file)
index 0000000..e87bead
--- /dev/null
@@ -0,0 +1,102 @@
+From 7a657bc8aefd38591aa4992b226a4e5dbdc65d03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 14 Oct 2023 12:20:15 +0200
+Subject: Input: bcm5974 - check endpoint type before starting traffic
+
+From: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+
+[ Upstream commit 2b9c3eb32a699acdd4784d6b93743271b4970899 ]
+
+syzbot has found a type mismatch between a USB pipe and the transfer
+endpoint, which is triggered by the bcm5974 driver[1].
+
+This driver expects the device to provide input interrupt endpoints and
+if that is not the case, the driver registration should terminate.
+
+Repros are available to reproduce this issue with a certain setup for
+the dummy_hcd, leading to an interrupt/bulk mismatch which is caught in
+the USB core after calling usb_submit_urb() with the following message:
+"BOGUS urb xfer, pipe 1 != type 3"
+
+Some other device drivers (like the appletouch driver bcm5974 is mainly
+based on) provide some checking mechanism to make sure that an IN
+interrupt endpoint is available. In this particular case the endpoint
+addresses are provided by a config table, so the checking can be
+targeted to the provided endpoints.
+
+Add some basic checking to guarantee that the endpoints available match
+the expected type for both the trackpad and button endpoints.
+
+This issue was only found for the trackpad endpoint, but the checking
+has been added to the button endpoint as well for the same reasons.
+
+Given that there was never a check for the endpoint type, this bug has
+been there since the first implementation of the driver (f89bd95c5c94).
+
+[1] https://syzkaller.appspot.com/bug?extid=348331f63b034f89b622
+
+Fixes: f89bd95c5c94 ("Input: bcm5974 - add driver for Macbook Air and Pro Penryn touchpads")
+Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+Reported-and-tested-by: syzbot+348331f63b034f89b622@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/r/20231007-topic-bcm5974_bulk-v3-1-d0f38b9d2935@gmail.com
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
+index ca150618d32f1..953992b458e9f 100644
+--- a/drivers/input/mouse/bcm5974.c
++++ b/drivers/input/mouse/bcm5974.c
+@@ -19,6 +19,7 @@
+  * Copyright (C) 2006    Nicolas Boichat (nicolas@boichat.ch)
+  */
++#include "linux/usb.h"
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/slab.h>
+@@ -193,6 +194,8 @@ enum tp_type {
+ /* list of device capability bits */
+ #define HAS_INTEGRATED_BUTTON 1
++/* maximum number of supported endpoints (currently trackpad and button) */
++#define MAX_ENDPOINTS 2
+ /* trackpad finger data block size */
+ #define FSIZE_TYPE1           (14 * sizeof(__le16))
+@@ -891,6 +894,18 @@ static int bcm5974_resume(struct usb_interface *iface)
+       return error;
+ }
++static bool bcm5974_check_endpoints(struct usb_interface *iface,
++                                  const struct bcm5974_config *cfg)
++{
++      u8 ep_addr[MAX_ENDPOINTS + 1] = {0};
++
++      ep_addr[0] = cfg->tp_ep;
++      if (cfg->tp_type == TYPE1)
++              ep_addr[1] = cfg->bt_ep;
++
++      return usb_check_int_endpoints(iface, ep_addr);
++}
++
+ static int bcm5974_probe(struct usb_interface *iface,
+                        const struct usb_device_id *id)
+ {
+@@ -903,6 +918,11 @@ static int bcm5974_probe(struct usb_interface *iface,
+       /* find the product index */
+       cfg = bcm5974_get_config(udev);
++      if (!bcm5974_check_endpoints(iface, cfg)) {
++              dev_err(&iface->dev, "Unexpected non-int endpoint\n");
++              return -ENODEV;
++      }
++
+       /* allocate memory for our device state and initialize it */
+       dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL);
+       input_dev = input_allocate_device();
+-- 
+2.43.0
+
diff --git a/queue-6.1/jfs-fix-null-ptr-deref-in-dtinsertentry.patch b/queue-6.1/jfs-fix-null-ptr-deref-in-dtinsertentry.patch
new file mode 100644 (file)
index 0000000..c4701ea
--- /dev/null
@@ -0,0 +1,51 @@
+From 363bfbb9232fd4a4fbdf6e203f26c39a1b6a2771 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Apr 2024 20:05:28 +0800
+Subject: jfs: fix null ptr deref in dtInsertEntry
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit ce6dede912f064a855acf6f04a04cbb2c25b8c8c ]
+
+[syzbot reported]
+general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
+CPU: 0 PID: 5061 Comm: syz-executor404 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
+RIP: 0010:dtInsertEntry+0xd0c/0x1780 fs/jfs/jfs_dtree.c:3713
+...
+[Analyze]
+In dtInsertEntry(), when the pointer h has the same value as p, after writing
+name in UniStrncpy_to_le(), p->header.flag will be cleared. This will cause the
+previously true judgment "p->header.flag & BT-LEAF" to change to no after writing
+the name operation, this leads to entering an incorrect branch and accessing the
+uninitialized object ih when judging this condition for the second time.
+
+[Fix]
+After got the page, check freelist first, if freelist == 0 then exit dtInsert()
+and return -EINVAL.
+
+Reported-by: syzbot+bba84aef3a26fb93deb9@syzkaller.appspotmail.com
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_dtree.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
+index 031d8f570f581..5d3127ca68a42 100644
+--- a/fs/jfs/jfs_dtree.c
++++ b/fs/jfs/jfs_dtree.c
+@@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip,
+        * the full page.
+        */
+       DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
++      if (p->header.freelist == 0)
++              return -EINVAL;
+       /*
+        *      insert entry for new key
+-- 
+2.43.0
+
diff --git a/queue-6.1/jfs-fix-shift-out-of-bounds-in-dbdiscardag.patch b/queue-6.1/jfs-fix-shift-out-of-bounds-in-dbdiscardag.patch
new file mode 100644 (file)
index 0000000..b8048a0
--- /dev/null
@@ -0,0 +1,40 @@
+From c1f790fa4b19e4aa7cd904092a769a4a1dd347ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 09:42:05 -0700
+Subject: jfs: Fix shift-out-of-bounds in dbDiscardAG
+
+From: Pei Li <peili.dev@gmail.com>
+
+[ Upstream commit 7063b80268e2593e58bee8a8d709c2f3ff93e2f2 ]
+
+When searching for the next smaller log2 block, BLKSTOL2() returned 0,
+causing shift exponent -1 to be negative.
+
+This patch fixes the issue by exiting the loop directly when negative
+shift is found.
+
+Reported-by: syzbot+61be3359d2ee3467e7e4@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=61be3359d2ee3467e7e4
+Signed-off-by: Pei Li <peili.dev@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_dmap.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index 8d064c9e9605d..7a3f4f62c34bc 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -1626,6 +1626,8 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
+               } else if (rc == -ENOSPC) {
+                       /* search for next smaller log2 block */
+                       l2nb = BLKSTOL2(nblocks) - 1;
++                      if (unlikely(l2nb < 0))
++                              break;
+                       nblocks = 1LL << l2nb;
+               } else {
+                       /* Trim any already allocated blocks */
+-- 
+2.43.0
+
diff --git a/queue-6.1/jfs-fix-shift-out-of-bounds-in-dbjoin.patch b/queue-6.1/jfs-fix-shift-out-of-bounds-in-dbjoin.patch
new file mode 100644 (file)
index 0000000..90539f3
--- /dev/null
@@ -0,0 +1,51 @@
+From 0add9d06ade5a3f49114ee9837a6f10ea7dbbbc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 20:09:37 +0530
+Subject: jfs: fix shift-out-of-bounds in dbJoin
+
+From: Manas Ghandat <ghandatmanas@gmail.com>
+
+[ Upstream commit cca974daeb6c43ea971f8ceff5a7080d7d49ee30 ]
+
+Currently while joining the leaf in a buddy system there is shift out
+of bound error in calculation of BUDSIZE. Added the required check
+to the BUDSIZE and fixed the documentation as well.
+
+Reported-by: syzbot+411debe54d318eaed386@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=411debe54d318eaed386
+Signed-off-by: Manas Ghandat <ghandatmanas@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_dmap.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index 4462274e325ac..8d064c9e9605d 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -2763,7 +2763,9 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
+  *    leafno  - the number of the leaf to be updated.
+  *    newval  - the new value for the leaf.
+  *
+- * RETURN VALUES: none
++ * RETURN VALUES:
++ *  0         - success
++ *    -EIO    - i/o error
+  */
+ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
+ {
+@@ -2790,6 +2792,10 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
+                * get the buddy size (number of words covered) of
+                * the new value.
+                */
++
++              if ((newval - tp->dmt_budmin) > BUDMIN)
++                      return -EIO;
++
+               budsz = BUDSIZE(newval, tp->dmt_budmin);
+               /* try to join.
+-- 
+2.43.0
+
diff --git a/queue-6.1/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch b/queue-6.1/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch
new file mode 100644 (file)
index 0000000..6875e8b
--- /dev/null
@@ -0,0 +1,188 @@
+From 81a9c8c952ed8b7ecfa0b733e8be502335797869 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 12:36:12 -0400
+Subject: mlxbf_gige: disable RX filters until RX path initialized
+
+From: David Thompson <davthompson@nvidia.com>
+
+[ Upstream commit df934abb185c71c9f2fa07a5013672d0cbd36560 ]
+
+A recent change to the driver exposed a bug where the MAC RX
+filters (unicast MAC, broadcast MAC, and multicast MAC) are
+configured and enabled before the RX path is fully initialized.
+The result of this bug is that after the PHY is started packets
+that match these MAC RX filters start to flow into the RX FIFO.
+And then, after rx_init() is completed, these packets will go
+into the driver RX ring as well. If enough packets are received
+to fill the RX ring (default size is 128 packets) before the call
+to request_irq() completes, the driver RX function becomes stuck.
+
+This bug is intermittent but is most likely to be seen where the
+oob_net0 interface is connected to a busy network with lots of
+broadcast and multicast traffic.
+
+All the MAC RX filters must be disabled until the RX path is ready,
+i.e. all initialization is done and all the IRQs are installed.
+
+Fixes: f7442a634ac0 ("mlxbf_gige: call request_irq() after NAPI initialized")
+Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Signed-off-by: David Thompson <davthompson@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20240809163612.12852-1-davthompson@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h |  8 +++
+ .../mellanox/mlxbf_gige/mlxbf_gige_main.c     | 10 ++++
+ .../mellanox/mlxbf_gige/mlxbf_gige_regs.h     |  2 +
+ .../mellanox/mlxbf_gige/mlxbf_gige_rx.c       | 50 ++++++++++++++++---
+ 4 files changed, 64 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+index 483fca0cc5a0c..bf1a2883f0820 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+@@ -39,6 +39,7 @@
+  */
+ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0
+ #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1
++#define MLXBF_GIGE_MAX_FILTER_IDX       3
+ /* Define for broadcast MAC literal */
+ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF
+@@ -148,6 +149,13 @@ enum mlxbf_gige_res {
+ int mlxbf_gige_mdio_probe(struct platform_device *pdev,
+                         struct mlxbf_gige *priv);
+ void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
++
++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv);
++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv);
++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
++                                   unsigned int index);
++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
++                                    unsigned int index);
+ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+                                 unsigned int index, u64 dmac);
+ void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+index d6b4d163bbbfd..6d90576fda597 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+       if (err)
+               goto napi_deinit;
++      mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX);
++      mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX);
++      mlxbf_gige_enable_multicast_rx(priv);
++
+       /* Set bits in INT_EN that we care about */
+       int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
+                MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
+@@ -293,6 +297,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
+       void __iomem *plu_base;
+       void __iomem *base;
+       int addr, phy_irq;
++      unsigned int i;
+       int err;
+       base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
+@@ -335,6 +340,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
+       priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ;
+       priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ;
++      for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++)
++              mlxbf_gige_disable_mac_rx_filter(priv, i);
++      mlxbf_gige_disable_multicast_rx(priv);
++      mlxbf_gige_disable_promisc(priv);
++
+       /* Write initial MAC address to hardware */
+       mlxbf_gige_initial_mac(priv);
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+index 7be3a793984d5..d27535a1fb86f 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+@@ -59,6 +59,8 @@
+ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL           BIT(1)
+ #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START     0x0520
+ #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END       0x0528
++#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL              0x0530
++#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST         BIT(1)
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC           0x0540
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN        BIT(0)
+ #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS           0x0548
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+index 6999843584934..eb62620b63c7f 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+@@ -11,15 +11,31 @@
+ #include "mlxbf_gige.h"
+ #include "mlxbf_gige_regs.h"
+-void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+-                                unsigned int index, u64 dmac)
++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv)
+ {
+       void __iomem *base = priv->base;
+-      u64 control;
++      u64 data;
+-      /* Write destination MAC to specified MAC RX filter */
+-      writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
+-             (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
++      data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++      data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
++      writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++}
++
++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv)
++{
++      void __iomem *base = priv->base;
++      u64 data;
++
++      data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++      data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
++      writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
++}
++
++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
++                                   unsigned int index)
++{
++      void __iomem *base = priv->base;
++      u64 control;
+       /* Enable MAC receive filter mask for specified index */
+       control = readq(base + MLXBF_GIGE_CONTROL);
+@@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+       writeq(control, base + MLXBF_GIGE_CONTROL);
+ }
++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
++                                    unsigned int index)
++{
++      void __iomem *base = priv->base;
++      u64 control;
++
++      /* Disable MAC receive filter mask for specified index */
++      control = readq(base + MLXBF_GIGE_CONTROL);
++      control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
++      writeq(control, base + MLXBF_GIGE_CONTROL);
++}
++
++void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
++                                unsigned int index, u64 dmac)
++{
++      void __iomem *base = priv->base;
++
++      /* Write destination MAC to specified MAC RX filter */
++      writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
++             (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
++}
++
+ void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+                                 unsigned int index, u64 *dmac)
+ {
+-- 
+2.43.0
+
diff --git a/queue-6.1/mlxbf_gige-remove-two-unused-function-declarations.patch b/queue-6.1/mlxbf_gige-remove-two-unused-function-declarations.patch
new file mode 100644 (file)
index 0000000..ffad8c1
--- /dev/null
@@ -0,0 +1,40 @@
+From a1363e9074c9f17c8b9f2ae2915eaef2bfda7727 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Aug 2023 22:52:49 +0800
+Subject: mlxbf_gige: Remove two unused function declarations
+
+From: Yue Haibing <yuehaibing@huawei.com>
+
+[ Upstream commit 98261be155f8de38f11b6542d4a8935e5532687b ]
+
+Commit f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
+declared but never implemented these.
+
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Link: https://lore.kernel.org/r/20230808145249.41596-1-yuehaibing@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: df934abb185c ("mlxbf_gige: disable RX filters until RX path initialized")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+index 5a1027b072155..483fca0cc5a0c 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+@@ -148,9 +148,6 @@ enum mlxbf_gige_res {
+ int mlxbf_gige_mdio_probe(struct platform_device *pdev,
+                         struct mlxbf_gige *priv);
+ void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
+-irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id);
+-void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv);
+-
+ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+                                 unsigned int index, u64 dmac);
+ void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-khugepaged-fix-kernel-bug-in-hpage_collapse_scan_.patch b/queue-6.1/mm-khugepaged-fix-kernel-bug-in-hpage_collapse_scan_.patch
new file mode 100644 (file)
index 0000000..dfc6433
--- /dev/null
@@ -0,0 +1,143 @@
+From 0f4e4d9e96e9773b2f9dd19979c9348680cf5902 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Mar 2023 18:53:30 +0400
+Subject: mm: khugepaged: fix kernel BUG in hpage_collapse_scan_file()
+
+From: Ivan Orlov <ivan.orlov0322@gmail.com>
+
+[ Upstream commit 2ce0bdfebc74f6cbd4e97a4e767d505a81c38cf2 ]
+
+Syzkaller reported the following issue:
+
+kernel BUG at mm/khugepaged.c:1823!
+invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+CPU: 1 PID: 5097 Comm: syz-executor220 Not tainted 6.2.0-syzkaller-13154-g857f1268a591 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/16/2023
+RIP: 0010:collapse_file mm/khugepaged.c:1823 [inline]
+RIP: 0010:hpage_collapse_scan_file+0x67c8/0x7580 mm/khugepaged.c:2233
+Code: 00 00 89 de e8 c9 66 a3 ff 31 ff 89 de e8 c0 66 a3 ff 45 84 f6 0f 85 28 0d 00 00 e8 22 64 a3 ff e9 dc f7 ff ff e8 18 64 a3 ff <0f> 0b f3 0f 1e fa e8 0d 64 a3 ff e9 93 f6 ff ff f3 0f 1e fa 4c 89
+RSP: 0018:ffffc90003dff4e0 EFLAGS: 00010093
+RAX: ffffffff81e95988 RBX: 00000000000001c1 RCX: ffff8880205b3a80
+RDX: 0000000000000000 RSI: 00000000000001c0 RDI: 00000000000001c1
+RBP: ffffc90003dff830 R08: ffffffff81e90e67 R09: fffffbfff1a433c3
+R10: 0000000000000000 R11: dffffc0000000001 R12: 0000000000000000
+R13: ffffc90003dff6c0 R14: 00000000000001c0 R15: 0000000000000000
+FS:  00007fdbae5ee700(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fdbae6901e0 CR3: 000000007b2dd000 CR4: 00000000003506e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ madvise_collapse+0x721/0xf50 mm/khugepaged.c:2693
+ madvise_vma_behavior mm/madvise.c:1086 [inline]
+ madvise_walk_vmas mm/madvise.c:1260 [inline]
+ do_madvise+0x9e5/0x4680 mm/madvise.c:1439
+ __do_sys_madvise mm/madvise.c:1452 [inline]
+ __se_sys_madvise mm/madvise.c:1450 [inline]
+ __x64_sys_madvise+0xa5/0xb0 mm/madvise.c:1450
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+The xas_store() call during page cache scanning can potentially translate
+'xas' into the error state (with the reproducer provided by the syzkaller
+the error code is -ENOMEM).  However, there are no further checks after
+the 'xas_store', and the next call of 'xas_next' at the start of the
+scanning cycle doesn't increase the xa_index, and the issue occurs.
+
+This patch will add the xarray state error checking after the xas_store()
+and the corresponding result error code.
+
+Tested via syzbot.
+
+[akpm@linux-foundation.org: update include/trace/events/huge_memory.h's SCAN_STATUS]
+Link: https://lkml.kernel.org/r/20230329145330.23191-1-ivan.orlov0322@gmail.com
+Link: https://syzkaller.appspot.com/bug?id=7d6bb3760e026ece7524500fe44fb024a0e959fc
+Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com>
+Reported-by: syzbot+9578faa5475acb35fa50@syzkaller.appspotmail.com
+Tested-by: Zach O'Keefe <zokeefe@google.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Himadri Pandya <himadrispandya@gmail.com>
+Cc: Ivan Orlov <ivan.orlov0322@gmail.com>
+Cc: Shuah Khan <skhan@linuxfoundation.org>
+Cc: Song Liu <songliubraving@fb.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/huge_memory.h |  3 ++-
+ mm/khugepaged.c                    | 20 ++++++++++++++++++++
+ 2 files changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
+index 760455dfa8600..01591e7995235 100644
+--- a/include/trace/events/huge_memory.h
++++ b/include/trace/events/huge_memory.h
+@@ -36,7 +36,8 @@
+       EM( SCAN_ALLOC_HUGE_PAGE_FAIL,  "alloc_huge_page_failed")       \
+       EM( SCAN_CGROUP_CHARGE_FAIL,    "ccgroup_charge_failed")        \
+       EM( SCAN_TRUNCATED,             "truncated")                    \
+-      EMe(SCAN_PAGE_HAS_PRIVATE,      "page_has_private")             \
++      EM( SCAN_PAGE_HAS_PRIVATE,      "page_has_private")             \
++      EMe(SCAN_STORE_FAILED,          "store_failed")
+ #undef EM
+ #undef EMe
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 65bd0b105266a..085fca1fa27af 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -55,6 +55,7 @@ enum scan_result {
+       SCAN_CGROUP_CHARGE_FAIL,
+       SCAN_TRUNCATED,
+       SCAN_PAGE_HAS_PRIVATE,
++      SCAN_STORE_FAILED,
+ };
+ #define CREATE_TRACE_POINTS
+@@ -1840,6 +1841,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+                                       goto xa_locked;
+                               }
+                               xas_store(&xas, hpage);
++                              if (xas_error(&xas)) {
++                                      /* revert shmem_charge performed
++                                       * in the previous condition
++                                       */
++                                      mapping->nrpages--;
++                                      shmem_uncharge(mapping->host, 1);
++                                      result = SCAN_STORE_FAILED;
++                                      goto xa_locked;
++                              }
+                               nr_none++;
+                               continue;
+                       }
+@@ -1991,6 +2001,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+               /* Finally, replace with the new page. */
+               xas_store(&xas, hpage);
++              /* We can't get an ENOMEM here (because the allocation happened before)
++               * but let's check for errors (XArray implementation can be
++               * changed in the future)
++               */
++              WARN_ON_ONCE(xas_error(&xas));
+               continue;
+ out_unlock:
+               unlock_page(page);
+@@ -2028,6 +2043,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+       /* Join all the small entries into a single multi-index entry */
+       xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+       xas_store(&xas, hpage);
++      /* Here we can't get an ENOMEM (because entries were
++       * previously allocated) But let's check for errors
++       * (XArray implementation can be changed in the future)
++       */
++      WARN_ON_ONCE(xas_error(&xas));
+ xa_locked:
+       xas_unlock_irq(&xas);
+ xa_unlocked:
+-- 
+2.43.0
+
diff --git a/queue-6.1/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch b/queue-6.1/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch
new file mode 100644 (file)
index 0000000..6a58a27
--- /dev/null
@@ -0,0 +1,42 @@
+From 78d530de020e0a06f81efc16aadb55867e9f8c08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Aug 2024 08:51:23 +0200
+Subject: mptcp: correct MPTCP_SUBFLOW_ATTR_SSN_OFFSET reserved size
+
+From: Eugene Syromiatnikov <esyr@redhat.com>
+
+[ Upstream commit 655111b838cdabdb604f3625a9ff08c5eedb11da ]
+
+ssn_offset field is u32 and is placed into the netlink response with
+nla_put_u32(), but only 2 bytes are reserved for the attribute payload
+in subflow_get_info_size() (even though it makes no difference
+in the end, as it is aligned up to 4 bytes).  Supply the correct
+argument to the relevant nla_total_size() call to make it less
+confusing.
+
+Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace")
+Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20240812065024.GA19719@asgard.redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/diag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
+index 7017dd60659dc..b2199cc282384 100644
+--- a/net/mptcp/diag.c
++++ b/net/mptcp/diag.c
+@@ -95,7 +95,7 @@ static size_t subflow_get_info_size(const struct sock *sk)
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */
+               nla_total_size_64bit(8) +       /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */
+-              nla_total_size(2) +     /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
++              nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
+               nla_total_size(2) +     /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */
+               nla_total_size(4) +     /* MPTCP_SUBFLOW_ATTR_FLAGS */
+               nla_total_size(1) +     /* MPTCP_SUBFLOW_ATTR_ID_REM */
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-axienet-fix-register-defines-comment-description.patch b/queue-6.1/net-axienet-fix-register-defines-comment-description.patch
new file mode 100644 (file)
index 0000000..edb6844
--- /dev/null
@@ -0,0 +1,62 @@
+From 06b4dbe10f981bb20373595a5ebee5cd1287d96b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 11:56:09 +0530
+Subject: net: axienet: Fix register defines comment description
+
+From: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+
+[ Upstream commit 9ff2f816e2aa65ca9a1cdf0954842f8173c0f48d ]
+
+In axiethernet header fix register defines comment description to be
+inline with IP documentation. It updates MAC configuration register,
+MDIO configuration register and frame filter control description.
+
+Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
+Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/xilinx_axienet.h | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
+index 6370c447ac5ca..969bea5541976 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
+@@ -159,16 +159,16 @@
+ #define XAE_RCW1_OFFSET               0x00000404 /* Rx Configuration Word 1 */
+ #define XAE_TC_OFFSET         0x00000408 /* Tx Configuration */
+ #define XAE_FCC_OFFSET                0x0000040C /* Flow Control Configuration */
+-#define XAE_EMMC_OFFSET               0x00000410 /* EMAC mode configuration */
+-#define XAE_PHYC_OFFSET               0x00000414 /* RGMII/SGMII configuration */
++#define XAE_EMMC_OFFSET               0x00000410 /* MAC speed configuration */
++#define XAE_PHYC_OFFSET               0x00000414 /* RX Max Frame Configuration */
+ #define XAE_ID_OFFSET         0x000004F8 /* Identification register */
+-#define XAE_MDIO_MC_OFFSET    0x00000500 /* MII Management Config */
+-#define XAE_MDIO_MCR_OFFSET   0x00000504 /* MII Management Control */
+-#define XAE_MDIO_MWD_OFFSET   0x00000508 /* MII Management Write Data */
+-#define XAE_MDIO_MRD_OFFSET   0x0000050C /* MII Management Read Data */
++#define XAE_MDIO_MC_OFFSET    0x00000500 /* MDIO Setup */
++#define XAE_MDIO_MCR_OFFSET   0x00000504 /* MDIO Control */
++#define XAE_MDIO_MWD_OFFSET   0x00000508 /* MDIO Write Data */
++#define XAE_MDIO_MRD_OFFSET   0x0000050C /* MDIO Read Data */
+ #define XAE_UAW0_OFFSET               0x00000700 /* Unicast address word 0 */
+ #define XAE_UAW1_OFFSET               0x00000704 /* Unicast address word 1 */
+-#define XAE_FMI_OFFSET                0x00000708 /* Filter Mask Index */
++#define XAE_FMI_OFFSET                0x00000708 /* Frame Filter Control */
+ #define XAE_AF0_OFFSET                0x00000710 /* Address Filter 0 */
+ #define XAE_AF1_OFFSET                0x00000714 /* Address Filter 1 */
+@@ -307,7 +307,7 @@
+  */
+ #define XAE_UAW1_UNICASTADDR_MASK     0x0000FFFF
+-/* Bit masks for Axi Ethernet FMI register */
++/* Bit masks for Axi Ethernet FMC register */
+ #define XAE_FMI_PM_MASK                       0x80000000 /* Promis. mode enable */
+ #define XAE_FMI_IND_MASK              0x00000003 /* Index Mask */
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-don-t-dump-stack-on-queue-timeout.patch b/queue-6.1/net-don-t-dump-stack-on-queue-timeout.patch
new file mode 100644 (file)
index 0000000..cecdd67
--- /dev/null
@@ -0,0 +1,68 @@
+From e5b627df1845f92814d8a9e037d6cfaab4f6a7a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 00:11:42 -0500
+Subject: net: don't dump stack on queue timeout
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit e316dd1cf1358ff9c44b37c7be273a7dc4349986 ]
+
+The top syzbot report for networking (#14 for the entire kernel)
+is the queue timeout splat. We kept it around for a long time,
+because in real life it provides pretty strong signal that
+something is wrong with the driver or the device.
+
+Removing it is also likely to break monitoring for those who
+track it as a kernel warning.
+
+Nevertheless, WARN()ings are best suited for catching kernel
+programming bugs. If a Tx queue gets starved due to a pause
+storm, priority configuration, or other weirdness - that's
+obviously a problem, but not a problem we can fix at
+the kernel level.
+
+Bite the bullet and convert the WARN() to a print.
+
+Before:
+
+  NETDEV WATCHDOG: eni1np1 (netdevsim): transmit queue 0 timed out 1975 ms
+  WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:525 dev_watchdog+0x39e/0x3b0
+  [... completely pointless stack trace of a timer follows ...]
+
+Now:
+
+  netdevsim netdevsim1 eni1np1: NETDEV WATCHDOG: CPU: 0: transmit queue 0 timed out 1769 ms
+
+Alternatively we could mark the drivers which syzbot has
+learned to abuse as "print-instead-of-WARN" selectively.
+
+Reported-by: syzbot+d55372214aff0faa1f1f@syzkaller.appspotmail.com
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_generic.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 4023c955036b1..6ab9359c1706f 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
+                       if (unlikely(timedout_ms)) {
+                               trace_net_dev_xmit_timeout(dev, i);
+-                              WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
+-                                        dev->name, netdev_drivername(dev), i, timedout_ms);
++                              netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
++                                          raw_smp_processor_id(),
++                                          i, timedout_ms);
+                               netif_freeze_queues(dev);
+                               dev->netdev_ops->ndo_tx_timeout(dev, i);
+                               netif_unfreeze_queues(dev);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch b/queue-6.1/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch
new file mode 100644 (file)
index 0000000..5d57d06
--- /dev/null
@@ -0,0 +1,114 @@
+From e56af52cd7ff5da5564528ab8763adf461e95951 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 21:38:04 +0200
+Subject: net: dsa: vsc73xx: check busy flag in MDIO operations
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit fa63c6434b6f6aaf9d8d599dc899bc0a074cc0ad ]
+
+The VSC73xx has a busy flag used during MDIO operations. It is raised
+when MDIO read/write operations are in progress. Without it, PHYs are
+misconfigured and bus operations do not work as expected.
+
+Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 37 +++++++++++++++++++++++++-
+ 1 file changed, 36 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index 92087f9d73550..c8e9ca5d5c284 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -39,6 +39,10 @@
+ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */
+ #define VSC73XX_BLOCK_SYSTEM  0x7 /* Only subblock 0 */
++/* MII Block subblock */
++#define VSC73XX_BLOCK_MII_INTERNAL    0x0 /* Internal MDIO subblock */
++#define VSC73XX_BLOCK_MII_EXTERNAL    0x1 /* External MDIO subblock */
++
+ #define CPU_PORT      6 /* CPU port */
+ /* MAC Block registers */
+@@ -197,6 +201,8 @@
+ #define VSC73XX_MII_CMD               0x1
+ #define VSC73XX_MII_DATA      0x2
++#define VSC73XX_MII_STAT_BUSY BIT(3)
++
+ /* Arbiter block 5 registers */
+ #define VSC73XX_ARBEMPTY              0x0c
+ #define VSC73XX_ARBDISC                       0x0e
+@@ -271,6 +277,7 @@
+ #define IS_739X(a) (IS_7395(a) || IS_7398(a))
+ #define VSC73XX_POLL_SLEEP_US         1000
++#define VSC73XX_MDIO_POLL_SLEEP_US    5
+ #define VSC73XX_POLL_TIMEOUT_US               10000
+ struct vsc73xx_counter {
+@@ -488,6 +495,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc)
+       return 0;
+ }
++static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc)
++{
++      int ret, err;
++      u32 val;
++
++      ret = read_poll_timeout(vsc73xx_read, err,
++                              err < 0 || !(val & VSC73XX_MII_STAT_BUSY),
++                              VSC73XX_MDIO_POLL_SLEEP_US,
++                              VSC73XX_POLL_TIMEOUT_US, false, vsc,
++                              VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL,
++                              VSC73XX_MII_STAT, &val);
++      if (ret)
++              return ret;
++      return err;
++}
++
+ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum)
+ {
+       struct vsc73xx *vsc = ds->priv;
+@@ -495,12 +518,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum)
+       u32 val;
+       int ret;
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       /* Setting bit 26 means "read" */
+       cmd = BIT(26) | (phy << 21) | (regnum << 16);
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+-      msleep(2);
++
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val);
+       if (ret)
+               return ret;
+@@ -524,6 +555,10 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
+       u32 cmd;
+       int ret;
++      ret = vsc73xx_mdio_busy_check(vsc);
++      if (ret)
++              return ret;
++
+       /* It was found through tedious experiments that this router
+        * chip really hates to have it's PHYs reset. They
+        * never recover if that happens: autonegotiation stops
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch b/queue-6.1/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch
new file mode 100644 (file)
index 0000000..a0dac2d
--- /dev/null
@@ -0,0 +1,40 @@
+From 1f9b5f4bbc5706ab6cc29cb50f6e697f979378cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 21:38:03 +0200
+Subject: net: dsa: vsc73xx: pass value in phy_write operation
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit 5b9eebc2c7a5f0cc7950d918c1e8a4ad4bed5010 ]
+
+In the 'vsc73xx_phy_write' function, the register value is missing,
+and the phy write operation always sends zeros.
+
+This commit passes the value variable into the proper register.
+
+Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index 3efd556690563..81d39dfe21f45 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -531,7 +531,7 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
+               return 0;
+       }
+-      cmd = (phy << 21) | (regnum << 16);
++      cmd = (phy << 21) | (regnum << 16) | val;
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch b/queue-6.1/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch
new file mode 100644 (file)
index 0000000..aedb69f
--- /dev/null
@@ -0,0 +1,100 @@
+From 3786d60c8460f28235dbbabe02108a63e663c42c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Apr 2024 22:50:44 +0200
+Subject: net: dsa: vsc73xx: use read_poll_timeout instead delay loop
+
+From: Pawel Dembicki <paweldembicki@gmail.com>
+
+[ Upstream commit eb7e33d01db3aec128590391b2397384bab406b6 ]
+
+Switch the delay loop during the Arbiter empty check from
+vsc73xx_adjust_link() to use read_poll_timeout(). Functionally,
+one msleep() call is eliminated at the end of the loop in the timeout
+case.
+
+As Russell King suggested:
+
+"This [change] avoids the issue that on the last iteration, the code reads
+the register, tests it, finds the condition that's being waiting for is
+false, _then_ waits and end up printing the error message - that last
+wait is rather useless, and as the arbiter state isn't checked after
+waiting, it could be that we had success during the last wait."
+
+Suggested-by: Russell King <linux@armlinux.org.uk>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
+Link: https://lore.kernel.org/r/20240417205048.3542839-2-paweldembicki@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: fa63c6434b6f ("net: dsa: vsc73xx: check busy flag in MDIO operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/vitesse-vsc73xx-core.c | 30 ++++++++++++++------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index 81d39dfe21f45..92087f9d73550 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -17,6 +17,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/device.h>
++#include <linux/iopoll.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ #include <linux/of_mdio.h>
+@@ -269,6 +270,9 @@
+ #define IS_7398(a) ((a)->chipid == VSC73XX_CHIPID_ID_7398)
+ #define IS_739X(a) (IS_7395(a) || IS_7398(a))
++#define VSC73XX_POLL_SLEEP_US         1000
++#define VSC73XX_POLL_TIMEOUT_US               10000
++
+ struct vsc73xx_counter {
+       u8 counter;
+       const char *name;
+@@ -780,7 +784,7 @@ static void vsc73xx_adjust_link(struct dsa_switch *ds, int port,
+        * after a PHY or the CPU port comes up or down.
+        */
+       if (!phydev->link) {
+-              int maxloop = 10;
++              int ret, err;
+               dev_dbg(vsc->dev, "port %d: went down\n",
+                       port);
+@@ -795,19 +799,17 @@ static void vsc73xx_adjust_link(struct dsa_switch *ds, int port,
+                                   VSC73XX_ARBDISC, BIT(port), BIT(port));
+               /* Wait until queue is empty */
+-              vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
+-                           VSC73XX_ARBEMPTY, &val);
+-              while (!(val & BIT(port))) {
+-                      msleep(1);
+-                      vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
+-                                   VSC73XX_ARBEMPTY, &val);
+-                      if (--maxloop == 0) {
+-                              dev_err(vsc->dev,
+-                                      "timeout waiting for block arbiter\n");
+-                              /* Continue anyway */
+-                              break;
+-                      }
+-              }
++              ret = read_poll_timeout(vsc73xx_read, err,
++                                      err < 0 || (val & BIT(port)),
++                                      VSC73XX_POLL_SLEEP_US,
++                                      VSC73XX_POLL_TIMEOUT_US, false,
++                                      vsc, VSC73XX_BLOCK_ARBITER, 0,
++                                      VSC73XX_ARBEMPTY, &val);
++              if (ret)
++                      dev_err(vsc->dev,
++                              "timeout waiting for block arbiter\n");
++              else if (err < 0)
++                      dev_err(vsc->dev, "error reading arbiter\n");
+               /* Put this port into reset */
+               vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG,
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch b/queue-6.1/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch
new file mode 100644 (file)
index 0000000..4099ec9
--- /dev/null
@@ -0,0 +1,76 @@
+From f2b9d2b01ec06384628749bcd22b15beb1b50e7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:22 +0800
+Subject: net: hns3: fix a deadlock problem when config TC during resetting
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit be5e816d00a506719e9dbb1a9c861c5ced30a109 ]
+
+When config TC during the reset process, may cause a deadlock, the flow is
+as below:
+                             pf reset start
+                                 â”‚
+                                 â–¼
+                              ......
+setup tc                         â”‚
+    â”‚                            â–¼
+    â–¼                      DOWN: napi_disable()
+napi_disable()(skip)             â”‚
+    â”‚                            â”‚
+    â–¼                            â–¼
+  ......                      ......
+    â”‚                            â”‚
+    â–¼                            â”‚
+napi_enable()                    â”‚
+                                 â–¼
+                           UINIT: netif_napi_del()
+                                 â”‚
+                                 â–¼
+                              ......
+                                 â”‚
+                                 â–¼
+                           INIT: netif_napi_add()
+                                 â”‚
+                                 â–¼
+                              ......                 global reset start
+                                 â”‚                      â”‚
+                                 â–¼                      â–¼
+                           UP: napi_enable()(skip)    ......
+                                 â”‚                      â”‚
+                                 â–¼                      â–¼
+                              ......                 napi_disable()
+
+In reset process, the driver will DOWN the port and then UINIT, in this
+case, the setup tc process will UP the port before UINIT, so cause the
+problem. Adds a DOWN process in UINIT to fix it.
+
+Fixes: bb6b94a896d4 ("net: hns3: Add reset interface implementation in client")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index 4ce43c3a00a37..0377a056aaecc 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -5728,6 +5728,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
+       struct net_device *netdev = handle->kinfo.netdev;
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
++      if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              hns3_nic_net_stop(netdev);
++
+       if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+               netdev_warn(netdev, "already uninitialized\n");
+               return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-hns3-fix-wrong-use-of-semaphore-up.patch b/queue-6.1/net-hns3-fix-wrong-use-of-semaphore-up.patch
new file mode 100644 (file)
index 0000000..b6ce4ae
--- /dev/null
@@ -0,0 +1,61 @@
+From 87f6bd9b18d49325459b5ec4dedafd95d32fd6c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:20 +0800
+Subject: net: hns3: fix wrong use of semaphore up
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 8445d9d3c03101859663d34fda747f6a50947556 ]
+
+Currently, if hns3 PF or VF FLR reset failed after five times retry,
+the reset done process will directly release the semaphore
+which has already released in hclge_reset_prepare_general.
+This will cause down operation fail.
+
+So this patch fixes it by adding reset state judgement. The up operation is
+only called after successful PF FLR reset.
+
+Fixes: 8627bdedc435 ("net: hns3: refactor the precedure of PF FLR")
+Fixes: f28368bb4542 ("net: hns3: refactor the procedure of VF FLR")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 4 ++--
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 01e24b69e9203..dfb428550ac03 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -11538,8 +11538,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
+               dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret);
+       hdev->reset_type = HNAE3_NONE_RESET;
+-      clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+-      up(&hdev->reset_sem);
++      if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
++              up(&hdev->reset_sem);
+ }
+ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index 1f5a27fb309aa..aebb104f4c290 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -1764,8 +1764,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev)
+                        ret);
+       hdev->reset_type = HNAE3_NONE_RESET;
+-      clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+-      up(&hdev->reset_sem);
++      if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
++              up(&hdev->reset_sem);
+ }
+ static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-hns3-use-the-user-s-cfg-after-reset.patch b/queue-6.1/net-hns3-use-the-user-s-cfg-after-reset.patch
new file mode 100644 (file)
index 0000000..77356bf
--- /dev/null
@@ -0,0 +1,122 @@
+From 22f31d594c2f51ae59b8ff8317c7db7d5686588a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 22:10:21 +0800
+Subject: net: hns3: use the user's cfg after reset
+
+From: Peiyang Wang <wangpeiyang1@huawei.com>
+
+[ Upstream commit 30545e17eac1f50c5ef49644daf6af205100a965 ]
+
+Consider the followed case that the user change speed and reset the net
+interface. Before the hw change speed successfully, the driver get old
+old speed from hw by timer task. After reset, the previous speed is config
+to hw. As a result, the new speed is configed successfully but lost after
+PF reset. The followed pictured shows more dirrectly.
+
++------+              +----+                 +----+
+| USER |              | PF |                 | HW |
++---+--+              +-+--+                 +-+--+
+    |  ethtool -s 100G  |                      |
+    +------------------>|   set speed 100G     |
+    |                   +--------------------->|
+    |                   |  set successfully    |
+    |                   |<---------------------+---+
+    |                   |query cfg (timer task)|   |
+    |                   +--------------------->|   | handle speed
+    |                   |     return 200G      |   | changing event
+    |  ethtool --reset  |<---------------------+   | (100G)
+    +------------------>|  cfg previous speed  |<--+
+    |                   |  after reset (200G)  |
+    |                   +--------------------->|
+    |                   |                      +---+
+    |                   |query cfg (timer task)|   |
+    |                   +--------------------->|   | handle speed
+    |                   |     return 100G      |   | changing event
+    |                   |<---------------------+   | (200G)
+    |                   |                      |<--+
+    |                   |query cfg (timer task)|
+    |                   +--------------------->|
+    |                   |     return 200G      |
+    |                   |<---------------------+
+    |                   |                      |
+    v                   v                      v
+
+This patch save new speed if hw change speed successfully, which will be
+used after reset successfully.
+
+Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary")
+Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../hisilicon/hns3/hns3pf/hclge_main.c        | 24 ++++++++++++++-----
+ .../hisilicon/hns3/hns3pf/hclge_mdio.c        |  3 +++
+ 2 files changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index dfb428550ac03..45bd5c79e4da8 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -2696,8 +2696,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
+ {
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
++      int ret;
++
++      ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
+-      return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
++      if (ret)
++              return ret;
++
++      hdev->hw.mac.req_speed = speed;
++      hdev->hw.mac.req_duplex = duplex;
++
++      return 0;
+ }
+ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
+@@ -2999,17 +3008,20 @@ static int hclge_mac_init(struct hclge_dev *hdev)
+       if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+               hdev->hw.mac.duplex = HCLGE_MAC_FULL;
+-      ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
+-                                       hdev->hw.mac.duplex, hdev->hw.mac.lane_num);
+-      if (ret)
+-              return ret;
+-
+       if (hdev->hw.mac.support_autoneg) {
+               ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
+               if (ret)
+                       return ret;
+       }
++      if (!hdev->hw.mac.autoneg) {
++              ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed,
++                                               hdev->hw.mac.req_duplex,
++                                               hdev->hw.mac.lane_num);
++              if (ret)
++                      return ret;
++      }
++
+       mac->link = 0;
+       if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+index 85fb11de43a12..80079657afebe 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+@@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev)
+       if (ret)
+               netdev_err(netdev, "failed to adjust link.\n");
++      hdev->hw.mac.req_speed = (u32)speed;
++      hdev->hw.mac.req_duplex = (u8)duplex;
++
+       ret = hclge_cfg_flowctrl(hdev);
+       if (ret)
+               netdev_err(netdev, "failed to configure flow control.\n");
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch b/queue-6.1/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch
new file mode 100644 (file)
index 0000000..99cf2a2
--- /dev/null
@@ -0,0 +1,46 @@
+From e4a859de92769178e2ae354646bb2f3df26e6363 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 17:41:05 +0300
+Subject: net/mlx5e: Correctly report errors for ethtool rx flows
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit cbc796be1779c4dbc9a482c7233995e2a8b6bfb3 ]
+
+Previously, an ethtool rx flow with no attrs would not be added to the
+NIC as it has no rules to configure the hw with, but it would be
+reported as successful to the caller (return code 0). This is confusing
+for the user as ethtool then reports "Added rule $num", but no rule was
+actually added.
+
+This change corrects that by instead reporting these wrong rules as
+-EINVAL.
+
+Fixes: b29c61dac3a2 ("net/mlx5e: Ethtool steering flow validation refactoring")
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20240808144107.2095424-5-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+index aac32e505c14f..a8870c6daec6c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -738,7 +738,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+       if (num_tuples <= 0) {
+               netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
+                           __func__, num_tuples);
+-              return num_tuples;
++              return num_tuples < 0 ? num_tuples : -EINVAL;
+       }
+       eth_ft = get_flow_table(priv, fs, num_tuples);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch b/queue-6.1/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch
new file mode 100644 (file)
index 0000000..ed2e11a
--- /dev/null
@@ -0,0 +1,45 @@
+From 0d0306f99a9933a9d1874f0aca395fd33b006331 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Aug 2024 17:41:04 +0300
+Subject: net/mlx5e: Take state lock during tx timeout reporter
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit e6b5afd30b99b43682a7764e1a74a42fe4d5f4b3 ]
+
+mlx5e_safe_reopen_channels() requires the state lock taken. The
+referenced changed in the Fixes tag removed the lock to fix another
+issue. This patch adds it back but at a later point (when calling
+mlx5e_safe_reopen_channels()) to avoid the deadlock referenced in the
+Fixes tag.
+
+Fixes: eab0da38912e ("net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Link: https://lore.kernel.org/all/ZplpKq8FKi3vwfxv@gmail.com/T/
+Reviewed-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20240808144107.2095424-4-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+index 60bc5b577ab99..02d9fb0c5ec24 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+@@ -111,7 +111,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
+               return err;
+       }
++      mutex_lock(&priv->state_lock);
+       err = mlx5e_safe_reopen_channels(priv);
++      mutex_unlock(&priv->state_lock);
+       if (!err) {
+               to_ctx->status = 1; /* all channels recovered */
+               return err;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-rds-fix-possible-deadlock-in-rds_message_put.patch b/queue-6.1/net-rds-fix-possible-deadlock-in-rds_message_put.patch
new file mode 100644 (file)
index 0000000..7542187
--- /dev/null
@@ -0,0 +1,84 @@
+From 6886f936fe465fa7580e235758b78e0c7f459bd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 19:28:54 -0700
+Subject: net:rds: Fix possible deadlock in rds_message_put
+
+From: Allison Henderson <allison.henderson@oracle.com>
+
+[ Upstream commit f1acf1ac84d2ae97b7889b87223c1064df850069 ]
+
+Functions rds_still_queued and rds_clear_recv_queue lock a given socket
+in order to safely iterate over the incoming rds messages. However
+calling rds_inc_put while under this lock creates a potential deadlock.
+rds_inc_put may eventually call rds_message_purge, which will lock
+m_rs_lock. This is the incorrect locking order since m_rs_lock is
+meant to be locked before the socket. To fix this, we move the message
+item to a local list or variable that wont need rs_recv_lock protection.
+Then we can safely call rds_inc_put on any item stored locally after
+rs_recv_lock is released.
+
+Fixes: bdbe6fbc6a2f ("RDS: recv.c")
+Reported-by: syzbot+f9db6ff27b9bfdcfeca0@syzkaller.appspotmail.com
+Reported-by: syzbot+dcd73ff9291e6d34b3ab@syzkaller.appspotmail.com
+Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
+Link: https://lore.kernel.org/r/20240209022854.200292-1-allison.henderson@oracle.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rds/recv.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/net/rds/recv.c b/net/rds/recv.c
+index 5b426dc3634d1..a316180d3c32e 100644
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -424,6 +424,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
+       struct sock *sk = rds_rs_to_sk(rs);
+       int ret = 0;
+       unsigned long flags;
++      struct rds_incoming *to_drop = NULL;
+       write_lock_irqsave(&rs->rs_recv_lock, flags);
+       if (!list_empty(&inc->i_item)) {
+@@ -434,11 +435,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
+                                             -be32_to_cpu(inc->i_hdr.h_len),
+                                             inc->i_hdr.h_dport);
+                       list_del_init(&inc->i_item);
+-                      rds_inc_put(inc);
++                      to_drop = inc;
+               }
+       }
+       write_unlock_irqrestore(&rs->rs_recv_lock, flags);
++      if (to_drop)
++              rds_inc_put(to_drop);
++
+       rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
+       return ret;
+ }
+@@ -757,16 +761,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
+       struct sock *sk = rds_rs_to_sk(rs);
+       struct rds_incoming *inc, *tmp;
+       unsigned long flags;
++      LIST_HEAD(to_drop);
+       write_lock_irqsave(&rs->rs_recv_lock, flags);
+       list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
+               rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
+                                     -be32_to_cpu(inc->i_hdr.h_len),
+                                     inc->i_hdr.h_dport);
++              list_move(&inc->i_item, &to_drop);
++      }
++      write_unlock_irqrestore(&rs->rs_recv_lock, flags);
++
++      list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
+               list_del_init(&inc->i_item);
+               rds_inc_put(inc);
+       }
+-      write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ }
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-print-msecs-when-transmit-queue-time-out.patch b/queue-6.1/net-sched-print-msecs-when-transmit-queue-time-out.patch
new file mode 100644 (file)
index 0000000..7b64b20
--- /dev/null
@@ -0,0 +1,80 @@
+From bfef20f8c285ac59c09bd10035247ab9ad5e816c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Apr 2023 16:26:06 +0800
+Subject: net: sched: Print msecs when transmit queue time out
+
+From: Yajun Deng <yajun.deng@linux.dev>
+
+[ Upstream commit 2f0f9465ad9fa9c93f30009184c10da0f504f313 ]
+
+The kernel will print several warnings in a short period of time
+when it stalls. Like this:
+
+First warning:
+[ 7100.097547] ------------[ cut here ]------------
+[ 7100.097550] NETDEV WATCHDOG: eno2 (xxx): transmit queue 8 timed out
+[ 7100.097571] WARNING: CPU: 8 PID: 0 at net/sched/sch_generic.c:467
+                       dev_watchdog+0x260/0x270
+...
+
+Second warning:
+[ 7147.756952] rcu: INFO: rcu_preempt self-detected stall on CPU
+[ 7147.756958] rcu:   24-....: (59999 ticks this GP) idle=546/1/0x400000000000000
+                      softirq=367      3137/3673146 fqs=13844
+[ 7147.756960]        (t=60001 jiffies g=4322709 q=133381)
+[ 7147.756962] NMI backtrace for cpu 24
+...
+
+We calculate that the transmit queue start stall should occur before
+7095s according to watchdog_timeo, the rcu start stall at 7087s.
+These two times are close together, it is difficult to confirm which
+happened first.
+
+To let users know the exact time the stall started, print msecs when
+the transmit queue time out.
+
+Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e316dd1cf135 ("net: don't dump stack on queue timeout")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_generic.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 7053c0292c335..4023c955036b1 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -502,7 +502,7 @@ static void dev_watchdog(struct timer_list *t)
+               if (netif_device_present(dev) &&
+                   netif_running(dev) &&
+                   netif_carrier_ok(dev)) {
+-                      int some_queue_timedout = 0;
++                      unsigned int timedout_ms = 0;
+                       unsigned int i;
+                       unsigned long trans_start;
+@@ -514,16 +514,16 @@ static void dev_watchdog(struct timer_list *t)
+                               if (netif_xmit_stopped(txq) &&
+                                   time_after(jiffies, (trans_start +
+                                                        dev->watchdog_timeo))) {
+-                                      some_queue_timedout = 1;
++                                      timedout_ms = jiffies_to_msecs(jiffies - trans_start);
+                                       atomic_long_inc(&txq->trans_timeout);
+                                       break;
+                               }
+                       }
+-                      if (unlikely(some_queue_timedout)) {
++                      if (unlikely(timedout_ms)) {
+                               trace_net_dev_xmit_timeout(dev, i);
+-                              WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
+-                                     dev->name, netdev_drivername(dev), i);
++                              WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
++                                        dev->name, netdev_drivername(dev), i, timedout_ms);
+                               netif_freeze_queues(dev);
+                               dev->netdev_ops->ndo_tx_timeout(dev, i);
+                               netif_unfreeze_queues(dev);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sctp-fix-skb-leak-in-sctp_inq_free.patch b/queue-6.1/net-sctp-fix-skb-leak-in-sctp_inq_free.patch
new file mode 100644 (file)
index 0000000..796a3cf
--- /dev/null
@@ -0,0 +1,72 @@
+From 46fa42f00af85de45310a2b2d92096d1c21a1ad6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Feb 2024 11:22:24 +0300
+Subject: net: sctp: fix skb leak in sctp_inq_free()
+
+From: Dmitry Antipov <dmantipov@yandex.ru>
+
+[ Upstream commit 4e45170d9acc2d5ae8f545bf3f2f67504a361338 ]
+
+In case of GSO, 'chunk->skb' pointer may point to an entry from
+fraglist created in 'sctp_packet_gso_append()'. To avoid freeing
+random fraglist entry (and so undefined behavior and/or memory
+leak), introduce 'sctp_inq_chunk_free()' helper to ensure that
+'chunk->skb' is set to 'chunk->head_skb' (i.e. fraglist head)
+before calling 'sctp_chunk_free()', and use the aforementioned
+helper in 'sctp_inq_pop()' as well.
+
+Reported-by: syzbot+8bb053b5d63595ab47db@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?id=0d8351bbe54fd04a492c2daab0164138db008042
+Fixes: 90017accff61 ("sctp: Add GSO support")
+Suggested-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+Acked-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/20240214082224.10168-1-dmantipov@yandex.ru
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/inqueue.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
+index 7182c5a450fb5..5c16521818058 100644
+--- a/net/sctp/inqueue.c
++++ b/net/sctp/inqueue.c
+@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
+       INIT_WORK(&queue->immediate, NULL);
+ }
++/* Properly release the chunk which is being worked on. */
++static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
++{
++      if (chunk->head_skb)
++              chunk->skb = chunk->head_skb;
++      sctp_chunk_free(chunk);
++}
++
+ /* Release the memory associated with an SCTP inqueue.  */
+ void sctp_inq_free(struct sctp_inq *queue)
+ {
+@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
+        * free it as well.
+        */
+       if (queue->in_progress) {
+-              sctp_chunk_free(queue->in_progress);
++              sctp_inq_chunk_free(queue->in_progress);
+               queue->in_progress = NULL;
+       }
+ }
+@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
+                               goto new_skb;
+                       }
+-                      if (chunk->head_skb)
+-                              chunk->skb = chunk->head_skb;
+-                      sctp_chunk_free(chunk);
++                      sctp_inq_chunk_free(chunk);
+                       chunk = queue->in_progress = NULL;
+               } else {
+                       /* Nothing to do. Next chunk in the packet, please. */
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch b/queue-6.1/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch
new file mode 100644 (file)
index 0000000..78b72e3
--- /dev/null
@@ -0,0 +1,46 @@
+From 8129483f0a67657fb513fbc2ca72106f7bc7188a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2024 12:40:52 +0100
+Subject: netfilter: allow ipv6 fragments to arrive on different devices
+
+From: Tom Hughes <tom@compton.nu>
+
+[ Upstream commit 3cd740b985963f874a1a094f1969e998b9d05554 ]
+
+Commit 264640fc2c5f4 ("ipv6: distinguish frag queues by device
+for multicast and link-local packets") modified the ipv6 fragment
+reassembly logic to distinguish frag queues by device for multicast
+and link-local packets but in fact only the main reassembly code
+limits the use of the device to those address types and the netfilter
+reassembly code uses the device for all packets.
+
+This means that if fragments of a packet arrive on different interfaces
+then netfilter will fail to reassemble them and the fragments will be
+expired without going any further through the filters.
+
+Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
+Signed-off-by: Tom Hughes <tom@compton.nu>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 87a394179092c..e4b45db8a3992 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+       };
+       struct inet_frag_queue *q;
++      if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
++                                          IPV6_ADDR_LINKLOCAL)))
++              key.iif = 0;
++
+       q = inet_frag_find(nf_frag->fqdir, &key);
+       if (!q)
+               return NULL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-flowtable-initialise-extack-before-use.patch b/queue-6.1/netfilter-flowtable-initialise-extack-before-use.patch
new file mode 100644 (file)
index 0000000..e100846
--- /dev/null
@@ -0,0 +1,37 @@
+From 626526b46724dc15fc75cd4e55e99dfcc4c13d7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2024 17:16:37 +0100
+Subject: netfilter: flowtable: initialise extack before use
+
+From: Donald Hunter <donald.hunter@gmail.com>
+
+[ Upstream commit e9767137308daf906496613fd879808a07f006a2 ]
+
+Fix missing initialisation of extack in flow offload.
+
+Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support")
+Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_flow_table_offload.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index 1c26f03fc6617..1904a4f295d4a 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
+                                struct list_head *block_cb_list)
+ {
+       struct flow_cls_offload cls_flow = {};
++      struct netlink_ext_ack extack = {};
+       struct flow_block_cb *block_cb;
+-      struct netlink_ext_ack extack;
+       __be16 proto = ETH_P_ALL;
+       int err, i = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch b/queue-6.1/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch
new file mode 100644 (file)
index 0000000..29c85cc
--- /dev/null
@@ -0,0 +1,109 @@
+From 1def73ef443c37967929d2ffc08327e71875757d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Aug 2024 21:28:41 +0200
+Subject: netfilter: nf_queue: drop packets with cloned unconfirmed conntracks
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 7d8dc1c7be8d3509e8f5164dd5df64c8e34d7eeb ]
+
+Conntrack assumes an unconfirmed entry (not yet committed to global hash
+table) has a refcount of 1 and is not visible to other cores.
+
+With multicast forwarding this assumption breaks down because such
+skbs get cloned after being picked up, i.e.  ct->use refcount is > 1.
+
+Likewise, bridge netfilter will clone broad/mutlicast frames and
+all frames in case they need to be flood-forwarded during learning
+phase.
+
+For ip multicast forwarding or plain bridge flood-forward this will
+"work" because packets don't leave softirq and are implicitly
+serialized.
+
+With nfqueue this no longer holds true, the packets get queued
+and can be reinjected in arbitrary ways.
+
+Disable this feature, I see no other solution.
+
+After this patch, nfqueue cannot queue packets except the last
+multicast/broadcast packet.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_netfilter_hooks.c |  6 +++++-
+ net/netfilter/nfnetlink_queue.c | 35 +++++++++++++++++++++++++++++++--
+ 2 files changed, 38 insertions(+), 3 deletions(-)
+
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 9ac70c27da835..9229300881b5f 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -618,8 +618,12 @@ static unsigned int br_nf_local_in(void *priv,
+       if (likely(nf_ct_is_confirmed(ct)))
+               return NF_ACCEPT;
++      if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
++              nf_reset_ct(skb);
++              return NF_ACCEPT;
++      }
++
+       WARN_ON_ONCE(skb_shared(skb));
+-      WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+       /* We can't call nf_confirm here, it would create a dependency
+        * on nf_conntrack module.
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 5bc342cb13767..f13eed826cbb8 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -647,10 +647,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+ {
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+-      const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
++      struct nf_conn *ct = (void *)skb_nfct(entry->skb);
++      unsigned long status;
++      unsigned int use;
+-      if (ct && ((ct->status & flags) == IPS_DYING))
++      if (!ct)
++              return false;
++
++      status = READ_ONCE(ct->status);
++      if ((status & flags) == IPS_DYING)
+               return true;
++
++      if (status & IPS_CONFIRMED)
++              return false;
++
++      /* in some cases skb_clone() can occur after initial conntrack
++       * pickup, but conntrack assumes exclusive skb->_nfct ownership for
++       * unconfirmed entries.
++       *
++       * This happens for br_netfilter and with ip multicast routing.
++       * We can't be solved with serialization here because one clone could
++       * have been queued for local delivery.
++       */
++      use = refcount_read(&ct->ct_general.use);
++      if (likely(use == 1))
++              return false;
++
++      /* Can't decrement further? Exclusive ownership. */
++      if (!refcount_dec_not_one(&ct->ct_general.use))
++              return false;
++
++      skb_set_nfct(entry->skb, 0);
++      /* No nf_ct_put(): we already decremented .use and it cannot
++       * drop down to 0.
++       */
++      return true;
+ #endif
+       return false;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch b/queue-6.1/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch
new file mode 100644 (file)
index 0000000..ac7e344
--- /dev/null
@@ -0,0 +1,104 @@
+From fdf14710e5c4439d1779dd253147392f5136e804 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:30 +0200
+Subject: netfilter: nf_tables: A better name for nft_obj_filter
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit ecf49cad807061d880bea27a5da8e0114ddc7690 ]
+
+Name it for what it is supposed to become, a real nft_obj_dump_ctx. No
+functional change intended.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 32 ++++++++++++++++----------------
+ 1 file changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 07140899a8d1d..f4bdfd5dd319a 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7416,7 +7416,7 @@ static void audit_log_obj_reset(const struct nft_table *table,
+       kfree(buf);
+ }
+-struct nft_obj_filter {
++struct nft_obj_dump_ctx {
+       char            *table;
+       u32             type;
+ };
+@@ -7426,7 +7426,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       const struct nft_table *table;
+       unsigned int idx = 0, s_idx = cb->args[0];
+-      struct nft_obj_filter *filter = cb->data;
++      struct nft_obj_dump_ctx *ctx = cb->data;
+       struct net *net = sock_net(skb->sk);
+       int family = nfmsg->nfgen_family;
+       struct nftables_pernet *nft_net;
+@@ -7452,10 +7452,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+                               goto cont;
+                       if (idx < s_idx)
+                               goto cont;
+-                      if (filter->table && strcmp(filter->table, table->name))
++                      if (ctx->table && strcmp(ctx->table, table->name))
+                               goto cont;
+-                      if (filter->type != NFT_OBJECT_UNSPEC &&
+-                          obj->ops->type->type != filter->type)
++                      if (ctx->type != NFT_OBJECT_UNSPEC &&
++                          obj->ops->type->type != ctx->type)
+                               goto cont;
+                       rc = nf_tables_fill_obj_info(skb, net,
+@@ -7487,33 +7487,33 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+ {
+       const struct nlattr * const *nla = cb->data;
+-      struct nft_obj_filter *filter = NULL;
++      struct nft_obj_dump_ctx *ctx = NULL;
+-      filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+-      if (!filter)
++      ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
++      if (!ctx)
+               return -ENOMEM;
+       if (nla[NFTA_OBJ_TABLE]) {
+-              filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+-              if (!filter->table) {
+-                      kfree(filter);
++              ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
++              if (!ctx->table) {
++                      kfree(ctx);
+                       return -ENOMEM;
+               }
+       }
+       if (nla[NFTA_OBJ_TYPE])
+-              filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
++              ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+-      cb->data = filter;
++      cb->data = ctx;
+       return 0;
+ }
+ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ {
+-      struct nft_obj_filter *filter = cb->data;
++      struct nft_obj_dump_ctx *ctx = cb->data;
+-      kfree(filter->table);
+-      kfree(filter);
++      kfree(ctx->table);
++      kfree(ctx);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch b/queue-6.1/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch
new file mode 100644 (file)
index 0000000..4333fc0
--- /dev/null
@@ -0,0 +1,155 @@
+From fd171f72b640cf8201c3dcb6554051a0c9e8cddf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:32 +0200
+Subject: netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit bd662c4218f9648e888bebde9468146965f3f8a0 ]
+
+Objects' dump callbacks are not concurrency-safe per-se with reset bit
+set. If two CPUs perform a reset at the same time, at least counter and
+quota objects suffer from value underrun.
+
+Prevent this by introducing dedicated locking callbacks for nfnetlink
+and the asynchronous dump handling to serialize access.
+
+Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
+ 1 file changed, 59 insertions(+), 13 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 88eacfe746810..63b7be0a95d04 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7482,6 +7482,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+       return skb->len;
+ }
++static int nf_tables_dumpreset_obj(struct sk_buff *skb,
++                                 struct netlink_callback *cb)
++{
++      struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
++      int ret;
++
++      mutex_lock(&nft_net->commit_mutex);
++      ret = nf_tables_dump_obj(skb, cb);
++      mutex_unlock(&nft_net->commit_mutex);
++
++      return ret;
++}
++
+ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+ {
+       struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+@@ -7498,12 +7511,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+       if (nla[NFTA_OBJ_TYPE])
+               ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+-      if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+-              ctx->reset = true;
+-
+       return 0;
+ }
++static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
++{
++      struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
++
++      ctx->reset = true;
++
++      return nf_tables_dump_obj_start(cb);
++}
++
+ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ {
+       struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+@@ -7562,18 +7581,43 @@ nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
+ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
++{
++      u32 portid = NETLINK_CB(skb).portid;
++      struct sk_buff *skb2;
++
++      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
++              struct netlink_dump_control c = {
++                      .start = nf_tables_dump_obj_start,
++                      .dump = nf_tables_dump_obj,
++                      .done = nf_tables_dump_obj_done,
++                      .module = THIS_MODULE,
++                      .data = (void *)nla,
++              };
++
++              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
++      }
++
++      skb2 = nf_tables_getobj_single(portid, info, nla, false);
++      if (IS_ERR(skb2))
++              return PTR_ERR(skb2);
++
++      return nfnetlink_unicast(skb2, info->net, portid);
++}
++
++static int nf_tables_getobj_reset(struct sk_buff *skb,
++                                const struct nfnl_info *info,
++                                const struct nlattr * const nla[])
+ {
+       struct nftables_pernet *nft_net = nft_pernet(info->net);
+       u32 portid = NETLINK_CB(skb).portid;
+       struct net *net = info->net;
+       struct sk_buff *skb2;
+-      bool reset = false;
+       char *buf;
+       if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+-                      .start = nf_tables_dump_obj_start,
+-                      .dump = nf_tables_dump_obj,
++                      .start = nf_tables_dumpreset_obj_start,
++                      .dump = nf_tables_dumpreset_obj,
+                       .done = nf_tables_dump_obj_done,
+                       .module = THIS_MODULE,
+                       .data = (void *)nla,
+@@ -7582,16 +7626,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+               return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+       }
+-      if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+-              reset = true;
++      if (!try_module_get(THIS_MODULE))
++              return -EINVAL;
++      rcu_read_unlock();
++      mutex_lock(&nft_net->commit_mutex);
++      skb2 = nf_tables_getobj_single(portid, info, nla, true);
++      mutex_unlock(&nft_net->commit_mutex);
++      rcu_read_lock();
++      module_put(THIS_MODULE);
+-      skb2 = nf_tables_getobj_single(portid, info, nla, reset);
+       if (IS_ERR(skb2))
+               return PTR_ERR(skb2);
+-      if (!reset)
+-              return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+-
+       buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+                       nla_len(nla[NFTA_OBJ_TABLE]),
+                       (char *)nla_data(nla[NFTA_OBJ_TABLE]),
+@@ -8807,7 +8853,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+               .policy         = nft_obj_policy,
+       },
+       [NFT_MSG_GETOBJ_RESET] = {
+-              .call           = nf_tables_getobj,
++              .call           = nf_tables_getobj_reset,
+               .type           = NFNL_CB_RCU,
+               .attr_count     = NFTA_OBJ_MAX,
+               .policy         = nft_obj_policy,
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch b/queue-6.1/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch
new file mode 100644 (file)
index 0000000..fe8eee3
--- /dev/null
@@ -0,0 +1,91 @@
+From 0ae32f73aabe1bef2bc3caaae8624b550cee6792 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:30 +0200
+Subject: netfilter: nf_tables: Audit log dump reset after the fact
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit e0b6648b0446e59522819c75ba1dcb09e68d3e94 ]
+
+In theory, dumpreset may fail and invalidate the preceeding log message.
+Fix this and use the occasion to prepare for object reset locking, which
+benefits from a few unrelated changes:
+
+* Add an early call to nfnetlink_unicast if not resetting which
+  effectively skips the audit logging but also unindents it.
+* Extract the table's name from the netlink attribute (which is verified
+  via earlier table lookup) to not rely upon validity of the looked up
+  table pointer.
+* Do not use local variable family, it will vanish.
+
+Fixes: 8e6cf365e1d5 ("audit: log nftables configuration change events")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 28 +++++++++++++---------------
+ 1 file changed, 13 insertions(+), 15 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 10180d280e792..747033129c0fe 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7531,6 +7531,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                           const struct nlattr * const nla[])
+ {
++      const struct nftables_pernet *nft_net = nft_pernet(info->net);
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_cur(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+@@ -7540,6 +7541,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       struct sk_buff *skb2;
+       bool reset = false;
+       u32 objtype;
++      char *buf;
+       int err;
+       if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+@@ -7578,27 +7580,23 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+               reset = true;
+-      if (reset) {
+-              const struct nftables_pernet *nft_net;
+-              char *buf;
+-
+-              nft_net = nft_pernet(net);
+-              buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+-
+-              audit_log_nfcfg(buf,
+-                              family,
+-                              1,
+-                              AUDIT_NFT_OP_OBJ_RESET,
+-                              GFP_ATOMIC);
+-              kfree(buf);
+-      }
+-
+       err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+                                     info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+                                     family, table, obj, reset);
+       if (err < 0)
+               goto err_fill_obj_info;
++      if (!reset)
++              return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
++
++      buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
++                      nla_len(nla[NFTA_OBJ_TABLE]),
++                      (char *)nla_data(nla[NFTA_OBJ_TABLE]),
++                      nft_net->base_seq);
++      audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
++                      AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
++      kfree(buf);
++
+       return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ err_fill_obj_info:
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch b/queue-6.1/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch
new file mode 100644 (file)
index 0000000..c99b962
--- /dev/null
@@ -0,0 +1,76 @@
+From ec29bf4a24ab506fc3854345fdcf1b4a82ce2748 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:33 +0200
+Subject: netfilter: nf_tables: Carry reset boolean in nft_obj_dump_ctx
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit a552339063d37b3b1133d9dfc31f851edafb27bb ]
+
+Relieve the dump callback from having to inspect nlmsg_type upon each
+call, just do it once at start of the dump.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 05c93af417120..38a5e5c5530c7 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7420,6 +7420,7 @@ struct nft_obj_dump_ctx {
+       unsigned int    s_idx;
+       char            *table;
+       u32             type;
++      bool            reset;
+ };
+ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+@@ -7433,12 +7434,8 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+       unsigned int entries = 0;
+       struct nft_object *obj;
+       unsigned int idx = 0;
+-      bool reset = false;
+       int rc = 0;
+-      if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+-              reset = true;
+-
+       rcu_read_lock();
+       nft_net = nft_pernet(net);
+       cb->seq = READ_ONCE(nft_net->base_seq);
+@@ -7465,7 +7462,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+                                                    NFT_MSG_NEWOBJ,
+                                                    NLM_F_MULTI | NLM_F_APPEND,
+                                                    table->family, table,
+-                                                   obj, reset);
++                                                   obj, ctx->reset);
+                       if (rc < 0)
+                               break;
+@@ -7474,7 +7471,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ cont:
+                       idx++;
+               }
+-              if (reset && entries)
++              if (ctx->reset && entries)
+                       audit_log_obj_reset(table, nft_net->base_seq, entries);
+               if (rc < 0)
+                       break;
+@@ -7501,6 +7498,9 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+       if (nla[NFTA_OBJ_TYPE])
+               ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
++      if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
++              ctx->reset = true;
++
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch b/queue-6.1/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch
new file mode 100644 (file)
index 0000000..8a1b34a
--- /dev/null
@@ -0,0 +1,70 @@
+From b6a44b21e1666124f804bc5ec1275318b8f54529 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:31 +0200
+Subject: netfilter: nf_tables: Carry s_idx in nft_obj_dump_ctx
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 2eda95cfa2fc43bcb21a801dc1d16a0b7cc73860 ]
+
+Prep work for moving the context into struct netlink_callback scratch
+area.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index f4bdfd5dd319a..48cd3e2dde69c 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7417,6 +7417,7 @@ static void audit_log_obj_reset(const struct nft_table *table,
+ }
+ struct nft_obj_dump_ctx {
++      unsigned int    s_idx;
+       char            *table;
+       u32             type;
+ };
+@@ -7424,14 +7425,14 @@ struct nft_obj_dump_ctx {
+ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+-      const struct nft_table *table;
+-      unsigned int idx = 0, s_idx = cb->args[0];
+       struct nft_obj_dump_ctx *ctx = cb->data;
+       struct net *net = sock_net(skb->sk);
+       int family = nfmsg->nfgen_family;
+       struct nftables_pernet *nft_net;
++      const struct nft_table *table;
+       unsigned int entries = 0;
+       struct nft_object *obj;
++      unsigned int idx = 0;
+       bool reset = false;
+       int rc = 0;
+@@ -7450,7 +7451,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+               list_for_each_entry_rcu(obj, &table->objects, list) {
+                       if (!nft_is_active(net, obj))
+                               goto cont;
+-                      if (idx < s_idx)
++                      if (idx < ctx->s_idx)
+                               goto cont;
+                       if (ctx->table && strcmp(ctx->table, table->name))
+                               goto cont;
+@@ -7480,7 +7481,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+       }
+       rcu_read_unlock();
+-      cb->args[0] = idx;
++      ctx->s_idx = idx;
+       return skb->len;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch b/queue-6.1/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch
new file mode 100644 (file)
index 0000000..f2c0392
--- /dev/null
@@ -0,0 +1,37 @@
+From 5e08d0c33a60f878931cd1f921128664f5e119a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:28 +0200
+Subject: netfilter: nf_tables: Drop pointless memset in nf_tables_dump_obj
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit ff16111cc10c82ee065ffbd9fa8d6210394ff8c6 ]
+
+The code does not make use of cb->args fields past the first one, no
+need to zero them.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 747033129c0fe..ddf84f226822b 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7452,9 +7452,6 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+                               goto cont;
+                       if (idx < s_idx)
+                               goto cont;
+-                      if (idx > s_idx)
+-                              memset(&cb->args[1], 0,
+-                                     sizeof(cb->args) - sizeof(cb->args[0]));
+                       if (filter && filter->table &&
+                           strcmp(filter->table, table->name))
+                               goto cont;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch b/queue-6.1/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch
new file mode 100644 (file)
index 0000000..de24c07
--- /dev/null
@@ -0,0 +1,148 @@
+From b194a9d5573f11c865c8284a584363cfa63a299b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Aug 2024 15:07:31 +0200
+Subject: netfilter: nf_tables: Introduce nf_tables_getobj_single
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 69fc3e9e90f1afc11f4015e6b75d18ab9acee348 ]
+
+Outsource the reply skb preparation for non-dump getrule requests into a
+distinct function. Prep work for object reset locking.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 75 ++++++++++++++++++++---------------
+ 1 file changed, 44 insertions(+), 31 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 38a5e5c5530c7..88eacfe746810 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7514,10 +7514,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ }
+ /* called with rcu_read_lock held */
+-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+-                          const struct nlattr * const nla[])
++static struct sk_buff *
++nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
++                      const struct nlattr * const nla[], bool reset)
+ {
+-      const struct nftables_pernet *nft_net = nft_pernet(info->net);
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_cur(info->net);
+       u8 family = info->nfmsg->nfgen_family;
+@@ -7525,52 +7525,69 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+       struct net *net = info->net;
+       struct nft_object *obj;
+       struct sk_buff *skb2;
+-      bool reset = false;
+       u32 objtype;
+-      char *buf;
+       int err;
+-      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+-              struct netlink_dump_control c = {
+-                      .start = nf_tables_dump_obj_start,
+-                      .dump = nf_tables_dump_obj,
+-                      .done = nf_tables_dump_obj_done,
+-                      .module = THIS_MODULE,
+-                      .data = (void *)nla,
+-              };
+-
+-              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+-      }
+-
+       if (!nla[NFTA_OBJ_NAME] ||
+           !nla[NFTA_OBJ_TYPE])
+-              return -EINVAL;
++              return ERR_PTR(-EINVAL);
+       table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
+       if (IS_ERR(table)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+-              return PTR_ERR(table);
++              return ERR_CAST(table);
+       }
+       objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+       obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
+       if (IS_ERR(obj)) {
+               NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+-              return PTR_ERR(obj);
++              return ERR_CAST(obj);
+       }
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+       if (!skb2)
+-              return -ENOMEM;
++              return ERR_PTR(-ENOMEM);
++
++      err = nf_tables_fill_obj_info(skb2, net, portid,
++                                    info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
++                                    family, table, obj, reset);
++      if (err < 0) {
++              kfree_skb(skb2);
++              return ERR_PTR(err);
++      }
++
++      return skb2;
++}
++
++static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
++                          const struct nlattr * const nla[])
++{
++      struct nftables_pernet *nft_net = nft_pernet(info->net);
++      u32 portid = NETLINK_CB(skb).portid;
++      struct net *net = info->net;
++      struct sk_buff *skb2;
++      bool reset = false;
++      char *buf;
++
++      if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
++              struct netlink_dump_control c = {
++                      .start = nf_tables_dump_obj_start,
++                      .dump = nf_tables_dump_obj,
++                      .done = nf_tables_dump_obj_done,
++                      .module = THIS_MODULE,
++                      .data = (void *)nla,
++              };
++
++              return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
++      }
+       if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+               reset = true;
+-      err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+-                                    info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+-                                    family, table, obj, reset);
+-      if (err < 0)
+-              goto err_fill_obj_info;
++      skb2 = nf_tables_getobj_single(portid, info, nla, reset);
++      if (IS_ERR(skb2))
++              return PTR_ERR(skb2);
+       if (!reset)
+               return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+@@ -7583,11 +7600,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+                       AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+       kfree(buf);
+-      return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+-
+-err_fill_obj_info:
+-      kfree_skb(skb2);
+-      return err;
++      return nfnetlink_unicast(skb2, net, portid);
+ }
+ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch b/queue-6.1/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch
new file mode 100644 (file)
index 0000000..e81d3ba
--- /dev/null
@@ -0,0 +1,75 @@
+From 883be28063c1849b7ad0d0bdea6570577292279f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:32 +0200
+Subject: netfilter: nf_tables: nft_obj_filter fits into cb->ctx
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 5a893b9cdf6fa5758f43d323a1d7fa6d1bf489ff ]
+
+No need to allocate it if one may just use struct netlink_callback's
+scratch area for it.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 48cd3e2dde69c..05c93af417120 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7425,7 +7425,7 @@ struct nft_obj_dump_ctx {
+ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+-      struct nft_obj_dump_ctx *ctx = cb->data;
++      struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+       struct net *net = sock_net(skb->sk);
+       int family = nfmsg->nfgen_family;
+       struct nftables_pernet *nft_net;
+@@ -7487,34 +7487,28 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+ {
++      struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+       const struct nlattr * const *nla = cb->data;
+-      struct nft_obj_dump_ctx *ctx = NULL;
+-      ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+-      if (!ctx)
+-              return -ENOMEM;
++      BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+       if (nla[NFTA_OBJ_TABLE]) {
+               ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+-              if (!ctx->table) {
+-                      kfree(ctx);
++              if (!ctx->table)
+                       return -ENOMEM;
+-              }
+       }
+       if (nla[NFTA_OBJ_TYPE])
+               ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+-      cb->data = ctx;
+       return 0;
+ }
+ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ {
+-      struct nft_obj_dump_ctx *ctx = cb->data;
++      struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+       kfree(ctx->table);
+-      kfree(ctx);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch b/queue-6.1/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch
new file mode 100644 (file)
index 0000000..816ba12
--- /dev/null
@@ -0,0 +1,89 @@
+From 11d90876b386f104cb161a4c1f268c84946bcfab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 19:34:29 +0200
+Subject: netfilter: nf_tables: Unconditionally allocate nft_obj_filter
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 4279cc60b354d2d2b970655a70a151cbfa1d958b ]
+
+Prep work for moving the filter into struct netlink_callback's scratch
+area.
+
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 36 +++++++++++++++--------------------
+ 1 file changed, 15 insertions(+), 21 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index ddf84f226822b..07140899a8d1d 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -7452,11 +7452,9 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+                               goto cont;
+                       if (idx < s_idx)
+                               goto cont;
+-                      if (filter && filter->table &&
+-                          strcmp(filter->table, table->name))
++                      if (filter->table && strcmp(filter->table, table->name))
+                               goto cont;
+-                      if (filter &&
+-                          filter->type != NFT_OBJECT_UNSPEC &&
++                      if (filter->type != NFT_OBJECT_UNSPEC &&
+                           obj->ops->type->type != filter->type)
+                               goto cont;
+@@ -7491,23 +7489,21 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+       const struct nlattr * const *nla = cb->data;
+       struct nft_obj_filter *filter = NULL;
+-      if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
+-              filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+-              if (!filter)
+-                      return -ENOMEM;
++      filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
++      if (!filter)
++              return -ENOMEM;
+-              if (nla[NFTA_OBJ_TABLE]) {
+-                      filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+-                      if (!filter->table) {
+-                              kfree(filter);
+-                              return -ENOMEM;
+-                      }
++      if (nla[NFTA_OBJ_TABLE]) {
++              filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
++              if (!filter->table) {
++                      kfree(filter);
++                      return -ENOMEM;
+               }
+-
+-              if (nla[NFTA_OBJ_TYPE])
+-                      filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+       }
++      if (nla[NFTA_OBJ_TYPE])
++              filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
++
+       cb->data = filter;
+       return 0;
+ }
+@@ -7516,10 +7512,8 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+ {
+       struct nft_obj_filter *filter = cb->data;
+-      if (filter) {
+-              kfree(filter->table);
+-              kfree(filter);
+-      }
++      kfree(filter->table);
++      kfree(filter);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nilfs2-initialize-struct-nilfs_binfo_dat-bi_pad-fiel.patch b/queue-6.1/nilfs2-initialize-struct-nilfs_binfo_dat-bi_pad-fiel.patch
new file mode 100644 (file)
index 0000000..bb42350
--- /dev/null
@@ -0,0 +1,55 @@
+From 90f3f6a64c4b5c5f4df39d11561f02c4ff9a75f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Mar 2023 00:21:46 +0900
+Subject: nilfs2: initialize "struct nilfs_binfo_dat"->bi_pad field
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7397031622e05ca206e2d674ec199d6bb66fc9ba ]
+
+nilfs_btree_assign_p() and nilfs_direct_assign_p() are not initializing
+"struct nilfs_binfo_dat"->bi_pad field, causing uninit-value reports when
+being passed to CRC function.
+
+Link: https://lkml.kernel.org/r/20230326152146.15872-1-konishi.ryusuke@gmail.com
+Reported-by: syzbot <syzbot+048585f3f4227bb2b49b@syzkaller.appspotmail.com>
+  Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b
+Reported-by: Dipanjan Das <mail.dipanjan.das@gmail.com>
+  Link: https://lkml.kernel.org/r/CANX2M5bVbzRi6zH3PTcNE_31TzerstOXUa9Bay4E6y6dX23_pg@mail.gmail.com
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: Alexander Potapenko <glider@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nilfs2/btree.c  | 1 +
+ fs/nilfs2/direct.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
+index bd24a33fc72e1..42617080a8384 100644
+--- a/fs/nilfs2/btree.c
++++ b/fs/nilfs2/btree.c
+@@ -2224,6 +2224,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
+       /* on-disk format */
+       binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
+       binfo->bi_dat.bi_level = level;
++      memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
+       return 0;
+ }
+diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
+index 8f802f7b0840b..893ab36824cc2 100644
+--- a/fs/nilfs2/direct.c
++++ b/fs/nilfs2/direct.c
+@@ -319,6 +319,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
+       binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
+       binfo->bi_dat.bi_level = 0;
++      memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nilfs2-prevent-warning-in-nilfs_dat_commit_end.patch b/queue-6.1/nilfs2-prevent-warning-in-nilfs_dat_commit_end.patch
new file mode 100644 (file)
index 0000000..db7fb26
--- /dev/null
@@ -0,0 +1,75 @@
+From afd7452797528fbf2351518041948586cf73cd23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Jan 2023 22:22:02 +0900
+Subject: nilfs2: prevent WARNING in nilfs_dat_commit_end()
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+[ Upstream commit 602ce7b8e1343b19c0cf93a3dd1926838ac5a1cc ]
+
+If nilfs2 reads a corrupted disk image and its DAT metadata file contains
+invalid lifetime data for a virtual block number, a kernel warning can be
+generated by the WARN_ON check in nilfs_dat_commit_end() and can panic if
+the kernel is booted with panic_on_warn.
+
+This patch avoids the issue with a sanity check that treats it as an
+error.
+
+Since error return is not allowed in the execution phase of
+nilfs_dat_commit_end(), this inserts that sanity check in
+nilfs_dat_prepare_end(), which prepares for nilfs_dat_commit_end().
+
+As the error code, -EINVAL is returned to notify bmap layer of the
+metadata corruption.  When the bmap layer sees this code, it handles the
+abnormal situation and replaces the return code with -EIO as it should.
+
+Link: https://lkml.kernel.org/r/000000000000154d2c05e9ec7df6@google.com
+Link: https://lkml.kernel.org/r/20230127132202.6083-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: <syzbot+cbff7a52b6f99059e67f@syzkaller.appspotmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nilfs2/dat.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
+index 242cc36bf1e97..351010828d883 100644
+--- a/fs/nilfs2/dat.c
++++ b/fs/nilfs2/dat.c
+@@ -158,6 +158,7 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
+ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
+ {
+       struct nilfs_dat_entry *entry;
++      __u64 start;
+       sector_t blocknr;
+       void *kaddr;
+       int ret;
+@@ -169,6 +170,7 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
+       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
+                                            req->pr_entry_bh, kaddr);
++      start = le64_to_cpu(entry->de_start);
+       blocknr = le64_to_cpu(entry->de_blocknr);
+       kunmap_atomic(kaddr);
+@@ -179,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
+                       return ret;
+               }
+       }
++      if (unlikely(start > nilfs_mdt_cno(dat))) {
++              nilfs_err(dat->i_sb,
++                        "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)",
++                        (unsigned long long)req->pr_entry_nr,
++                        (unsigned long long)start,
++                        (unsigned long long)nilfs_mdt_cno(dat));
++              nilfs_dat_abort_entry(dat, req);
++              return -EINVAL;
++      }
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/pid-replace-struct-pid-1-element-array-with-flex-arr.patch b/queue-6.1/pid-replace-struct-pid-1-element-array-with-flex-arr.patch
new file mode 100644 (file)
index 0000000..743e959
--- /dev/null
@@ -0,0 +1,93 @@
+From 16c4d3cca8f2167365f9bfb9272e2221e9bbef52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 09:46:17 +0200
+Subject: pid: Replace struct pid 1-element array with flex-array
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit b69f0aeb068980af983d399deafc7477cec8bc04 ]
+
+For pid namespaces, struct pid uses a dynamically sized array member,
+"numbers".  This was implemented using the ancient 1-element fake
+flexible array, which has been deprecated for decades.
+
+Replace it with a C99 flexible array, refactor the array size
+calculations to use struct_size(), and address elements via indexes.
+Note that the static initializer (which defines a single element) works
+as-is, and requires no special handling.
+
+Without this, CONFIG_UBSAN_BOUNDS (and potentially
+CONFIG_FORTIFY_SOURCE) will trigger bounds checks:
+
+  https://lore.kernel.org/lkml/20230517-bushaltestelle-super-e223978c1ba6@brauner
+
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jeff Xu <jeffxu@google.com>
+Cc: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: Daniel Verkamp <dverkamp@chromium.org>
+Cc: "Paul E. McKenney" <paulmck@kernel.org>
+Cc: Jeff Xu <jeffxu@google.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Boqun Feng <boqun.feng@gmail.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Frederic Weisbecker <frederic@kernel.org>
+Reported-by: syzbot+ac3b41786a2d0565b6d5@syzkaller.appspotmail.com
+[brauner: dropped unrelated changes and remove 0 with NULL cast]
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/pid.h    | 2 +-
+ kernel/pid.c           | 7 +++++--
+ kernel/pid_namespace.c | 2 +-
+ 3 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/pid.h b/include/linux/pid.h
+index 343abf22092e6..bf3af54de6165 100644
+--- a/include/linux/pid.h
++++ b/include/linux/pid.h
+@@ -67,7 +67,7 @@ struct pid
+       /* wait queue for pidfd notifications */
+       wait_queue_head_t wait_pidfd;
+       struct rcu_head rcu;
+-      struct upid numbers[1];
++      struct upid numbers[];
+ };
+ extern struct pid init_struct_pid;
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 3fbc5e46b7217..74834c04a0818 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -661,8 +661,11 @@ void __init pid_idr_init(void)
+       idr_init(&init_pid_ns.idr);
+-      init_pid_ns.pid_cachep = KMEM_CACHE(pid,
+-                      SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
++      init_pid_ns.pid_cachep = kmem_cache_create("pid",
++                      struct_size((struct pid *)NULL, numbers, 1),
++                      __alignof__(struct pid),
++                      SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
++                      NULL);
+ }
+ static struct file *__pidfd_fget(struct task_struct *task, int fd)
+diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
+index 1daadbefcee3a..a575fabf697eb 100644
+--- a/kernel/pid_namespace.c
++++ b/kernel/pid_namespace.c
+@@ -47,7 +47,7 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
+               return kc;
+       snprintf(name, sizeof(name), "pid_%u", level + 1);
+-      len = sizeof(struct pid) + level * sizeof(struct upid);
++      len = struct_size((struct pid *)NULL, numbers, level + 1);
+       mutex_lock(&pid_caches_mutex);
+       /* Name collision forces to do allocation under mutex. */
+       if (!*pkc)
+-- 
+2.43.0
+
diff --git a/queue-6.1/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch b/queue-6.1/posix-timers-ensure-timer-id-search-loop-limit-is-va.patch
new file mode 100644 (file)
index 0000000..01a9e47
--- /dev/null
@@ -0,0 +1,115 @@
+From ec7c5fe374c27a219a435db2e98afd22e3eb99fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Jun 2023 20:58:47 +0200
+Subject: posix-timers: Ensure timer ID search-loop limit is valid
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 8ce8849dd1e78dadcee0ec9acbd259d239b7069f ]
+
+posix_timer_add() tries to allocate a posix timer ID by starting from the
+cached ID which was stored by the last successful allocation.
+
+This is done in a loop searching the ID space for a free slot one by
+one. The loop has to terminate when the search wrapped around to the
+starting point.
+
+But that's racy vs. establishing the starting point. That is read out
+lockless, which leads to the following problem:
+
+CPU0                              CPU1
+posix_timer_add()
+  start = sig->posix_timer_id;
+  lock(hash_lock);
+  ...                             posix_timer_add()
+  if (++sig->posix_timer_id < 0)
+                                    start = sig->posix_timer_id;
+     sig->posix_timer_id = 0;
+
+So CPU1 can observe a negative start value, i.e. -1, and the loop break
+never happens because the condition can never be true:
+
+  if (sig->posix_timer_id == start)
+     break;
+
+While this is unlikely to ever turn into an endless loop as the ID space is
+huge (INT_MAX), the racy read of the start value caught the attention of
+KCSAN and Dmitry unearthed that incorrectness.
+
+Rewrite it so that all id operations are under the hash lock.
+
+Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched/signal.h |  2 +-
+ kernel/time/posix-timers.c   | 31 ++++++++++++++++++-------------
+ 2 files changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
+index 20099268fa257..669e8cff40c74 100644
+--- a/include/linux/sched/signal.h
++++ b/include/linux/sched/signal.h
+@@ -135,7 +135,7 @@ struct signal_struct {
+ #ifdef CONFIG_POSIX_TIMERS
+       /* POSIX.1b Interval Timers */
+-      int                     posix_timer_id;
++      unsigned int            next_posix_timer_id;
+       struct list_head        posix_timers;
+       /* ITIMER_REAL timer for the process */
+diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
+index ed3c4a9543982..2d6cf93ca370a 100644
+--- a/kernel/time/posix-timers.c
++++ b/kernel/time/posix-timers.c
+@@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
+ static int posix_timer_add(struct k_itimer *timer)
+ {
+       struct signal_struct *sig = current->signal;
+-      int first_free_id = sig->posix_timer_id;
+       struct hlist_head *head;
+-      int ret = -ENOENT;
++      unsigned int cnt, id;
+-      do {
++      /*
++       * FIXME: Replace this by a per signal struct xarray once there is
++       * a plan to handle the resulting CRIU regression gracefully.
++       */
++      for (cnt = 0; cnt <= INT_MAX; cnt++) {
+               spin_lock(&hash_lock);
+-              head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+-              if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
++              id = sig->next_posix_timer_id;
++
++              /* Write the next ID back. Clamp it to the positive space */
++              sig->next_posix_timer_id = (id + 1) & INT_MAX;
++
++              head = &posix_timers_hashtable[hash(sig, id)];
++              if (!__posix_timers_find(head, sig, id)) {
+                       hlist_add_head_rcu(&timer->t_hash, head);
+-                      ret = sig->posix_timer_id;
++                      spin_unlock(&hash_lock);
++                      return id;
+               }
+-              if (++sig->posix_timer_id < 0)
+-                      sig->posix_timer_id = 0;
+-              if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+-                      /* Loop over all possible ids completed */
+-                      ret = -EAGAIN;
+               spin_unlock(&hash_lock);
+-      } while (ret == -ENOENT);
+-      return ret;
++      }
++      /* POSIX return code when no timer ID could be allocated */
++      return -EAGAIN;
+ }
+ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
+-- 
+2.43.0
+
diff --git a/queue-6.1/pppoe-fix-memory-leak-in-pppoe_sendmsg.patch b/queue-6.1/pppoe-fix-memory-leak-in-pppoe_sendmsg.patch
new file mode 100644 (file)
index 0000000..9dae951
--- /dev/null
@@ -0,0 +1,85 @@
+From 9ae4690a51316ede322f8a1ccfd804fd99e9a773 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Feb 2024 09:01:50 +0000
+Subject: pppoe: Fix memory leak in pppoe_sendmsg()
+
+From: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+
+[ Upstream commit dc34ebd5c018b0edf47f39d11083ad8312733034 ]
+
+syzbot reports a memory leak in pppoe_sendmsg [1].
+
+The problem is in the pppoe_recvmsg() function that handles errors
+in the wrong order. For the skb_recv_datagram() function, check
+the pointer to skb for NULL first, and then check the 'error' variable,
+because the skb_recv_datagram() function can set 'error'
+to -EAGAIN in a loop but return a correct pointer to socket buffer
+after a number of attempts, though 'error' remains set to -EAGAIN.
+
+skb_recv_datagram
+      __skb_recv_datagram          // Loop. if (err == -EAGAIN) then
+                                   // go to the next loop iteration
+          __skb_try_recv_datagram  // if (skb != NULL) then return 'skb'
+                                   // else if a signal is received then
+                                   // return -EAGAIN
+
+Found by InfoTeCS on behalf of Linux Verification Center
+(linuxtesting.org) with Syzkaller.
+
+Link: https://syzkaller.appspot.com/bug?extid=6bdfd184eac7709e5cc9 [1]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot+6bdfd184eac7709e5cc9@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=6bdfd184eac7709e5cc9
+Signed-off-by: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Link: https://lore.kernel.org/r/20240214085814.3894917-1-Ilia.Gavrilov@infotecs.ru
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/pppoe.c | 23 +++++++++--------------
+ 1 file changed, 9 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index ce2cbb5903d7b..c6f44af35889d 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+       struct sk_buff *skb;
+       int error = 0;
+-      if (sk->sk_state & PPPOX_BOUND) {
+-              error = -EIO;
+-              goto end;
+-      }
++      if (sk->sk_state & PPPOX_BOUND)
++              return -EIO;
+       skb = skb_recv_datagram(sk, flags, &error);
+-      if (error < 0)
+-              goto end;
++      if (!skb)
++              return error;
+-      if (skb) {
+-              total_len = min_t(size_t, total_len, skb->len);
+-              error = skb_copy_datagram_msg(skb, 0, m, total_len);
+-              if (error == 0) {
+-                      consume_skb(skb);
+-                      return total_len;
+-              }
++      total_len = min_t(size_t, total_len, skb->len);
++      error = skb_copy_datagram_msg(skb, 0, m, total_len);
++      if (error == 0) {
++              consume_skb(skb);
++              return total_len;
+       }
+       kfree_skb(skb);
+-end:
+       return error;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/quota-detect-loops-in-quota-tree.patch b/queue-6.1/quota-detect-loops-in-quota-tree.patch
new file mode 100644 (file)
index 0000000..0cc45ec
--- /dev/null
@@ -0,0 +1,326 @@
+From ab276cfae9acd924981dfe7abf0e64f0e4b3d4e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Feb 2024 19:12:15 +0100
+Subject: quota: Detect loops in quota tree
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit a898cb621ac589b0b9e959309689a027e765aa12 ]
+
+Syzbot has found that when it creates corrupted quota files where the
+quota tree contains a loop, we will deadlock when tryling to insert a
+dquot. Add loop detection into functions traversing the quota tree.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/quota/quota_tree.c | 128 +++++++++++++++++++++++++++++++-----------
+ fs/quota/quota_v2.c   |  15 +++--
+ 2 files changed, 105 insertions(+), 38 deletions(-)
+
+diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
+index 0f1493e0f6d05..254f6359b287f 100644
+--- a/fs/quota/quota_tree.c
++++ b/fs/quota/quota_tree.c
+@@ -21,6 +21,12 @@ MODULE_AUTHOR("Jan Kara");
+ MODULE_DESCRIPTION("Quota trie support");
+ MODULE_LICENSE("GPL");
++/*
++ * Maximum quota tree depth we support. Only to limit recursion when working
++ * with the tree.
++ */
++#define MAX_QTREE_DEPTH 6
++
+ #define __QUOTA_QT_PARANOIA
+ static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+@@ -327,27 +333,36 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+ /* Insert reference to structure into the trie */
+ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+-                        uint *treeblk, int depth)
++                        uint *blks, int depth)
+ {
+       char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+       int ret = 0, newson = 0, newact = 0;
+       __le32 *ref;
+       uint newblk;
++      int i;
+       if (!buf)
+               return -ENOMEM;
+-      if (!*treeblk) {
++      if (!blks[depth]) {
+               ret = get_free_dqblk(info);
+               if (ret < 0)
+                       goto out_buf;
+-              *treeblk = ret;
++              for (i = 0; i < depth; i++)
++                      if (ret == blks[i]) {
++                              quota_error(dquot->dq_sb,
++                                      "Free block already used in tree: block %u",
++                                      ret);
++                              ret = -EIO;
++                              goto out_buf;
++                      }
++              blks[depth] = ret;
+               memset(buf, 0, info->dqi_usable_bs);
+               newact = 1;
+       } else {
+-              ret = read_blk(info, *treeblk, buf);
++              ret = read_blk(info, blks[depth], buf);
+               if (ret < 0) {
+                       quota_error(dquot->dq_sb, "Can't read tree quota "
+-                                  "block %u", *treeblk);
++                                  "block %u", blks[depth]);
+                       goto out_buf;
+               }
+       }
+@@ -357,8 +372,20 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+                            info->dqi_blocks - 1);
+       if (ret)
+               goto out_buf;
+-      if (!newblk)
++      if (!newblk) {
+               newson = 1;
++      } else {
++              for (i = 0; i <= depth; i++)
++                      if (newblk == blks[i]) {
++                              quota_error(dquot->dq_sb,
++                                      "Cycle in quota tree detected: block %u index %u",
++                                      blks[depth],
++                                      get_index(info, dquot->dq_id, depth));
++                              ret = -EIO;
++                              goto out_buf;
++                      }
++      }
++      blks[depth + 1] = newblk;
+       if (depth == info->dqi_qtree_depth - 1) {
+ #ifdef __QUOTA_QT_PARANOIA
+               if (newblk) {
+@@ -370,16 +397,16 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+                       goto out_buf;
+               }
+ #endif
+-              newblk = find_free_dqentry(info, dquot, &ret);
++              blks[depth + 1] = find_free_dqentry(info, dquot, &ret);
+       } else {
+-              ret = do_insert_tree(info, dquot, &newblk, depth+1);
++              ret = do_insert_tree(info, dquot, blks, depth + 1);
+       }
+       if (newson && ret >= 0) {
+               ref[get_index(info, dquot->dq_id, depth)] =
+-                                                      cpu_to_le32(newblk);
+-              ret = write_blk(info, *treeblk, buf);
++                                              cpu_to_le32(blks[depth + 1]);
++              ret = write_blk(info, blks[depth], buf);
+       } else if (newact && ret < 0) {
+-              put_free_dqblk(info, buf, *treeblk);
++              put_free_dqblk(info, buf, blks[depth]);
+       }
+ out_buf:
+       kfree(buf);
+@@ -390,7 +417,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+                                struct dquot *dquot)
+ {
+-      int tmp = QT_TREEOFF;
++      uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
+ #ifdef __QUOTA_QT_PARANOIA
+       if (info->dqi_blocks <= QT_TREEOFF) {
+@@ -398,7 +425,11 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+               return -EIO;
+       }
+ #endif
+-      return do_insert_tree(info, dquot, &tmp, 0);
++      if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++              quota_error(dquot->dq_sb, "Quota tree depth too big!");
++              return -EIO;
++      }
++      return do_insert_tree(info, dquot, blks, 0);
+ }
+ /*
+@@ -511,19 +542,20 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ /* Remove reference to dquot from tree */
+ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+-                     uint *blk, int depth)
++                     uint *blks, int depth)
+ {
+       char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+       int ret = 0;
+       uint newblk;
+       __le32 *ref = (__le32 *)buf;
++      int i;
+       if (!buf)
+               return -ENOMEM;
+-      ret = read_blk(info, *blk, buf);
++      ret = read_blk(info, blks[depth], buf);
+       if (ret < 0) {
+               quota_error(dquot->dq_sb, "Can't read quota data block %u",
+-                          *blk);
++                          blks[depth]);
+               goto out_buf;
+       }
+       newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+@@ -532,29 +564,38 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+       if (ret)
+               goto out_buf;
++      for (i = 0; i <= depth; i++)
++              if (newblk == blks[i]) {
++                      quota_error(dquot->dq_sb,
++                              "Cycle in quota tree detected: block %u index %u",
++                              blks[depth],
++                              get_index(info, dquot->dq_id, depth));
++                      ret = -EIO;
++                      goto out_buf;
++              }
+       if (depth == info->dqi_qtree_depth - 1) {
+               ret = free_dqentry(info, dquot, newblk);
+-              newblk = 0;
++              blks[depth + 1] = 0;
+       } else {
+-              ret = remove_tree(info, dquot, &newblk, depth+1);
++              blks[depth + 1] = newblk;
++              ret = remove_tree(info, dquot, blks, depth + 1);
+       }
+-      if (ret >= 0 && !newblk) {
+-              int i;
++      if (ret >= 0 && !blks[depth + 1]) {
+               ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+               /* Block got empty? */
+               for (i = 0; i < (info->dqi_usable_bs >> 2) && !ref[i]; i++)
+                       ;
+               /* Don't put the root block into the free block list */
+               if (i == (info->dqi_usable_bs >> 2)
+-                  && *blk != QT_TREEOFF) {
+-                      put_free_dqblk(info, buf, *blk);
+-                      *blk = 0;
++                  && blks[depth] != QT_TREEOFF) {
++                      put_free_dqblk(info, buf, blks[depth]);
++                      blks[depth] = 0;
+               } else {
+-                      ret = write_blk(info, *blk, buf);
++                      ret = write_blk(info, blks[depth], buf);
+                       if (ret < 0)
+                               quota_error(dquot->dq_sb,
+                                           "Can't write quota tree block %u",
+-                                          *blk);
++                                          blks[depth]);
+               }
+       }
+ out_buf:
+@@ -565,11 +606,15 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ /* Delete dquot from tree */
+ int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+ {
+-      uint tmp = QT_TREEOFF;
++      uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
+       if (!dquot->dq_off)     /* Even not allocated? */
+               return 0;
+-      return remove_tree(info, dquot, &tmp, 0);
++      if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++              quota_error(dquot->dq_sb, "Quota tree depth too big!");
++              return -EIO;
++      }
++      return remove_tree(info, dquot, blks, 0);
+ }
+ EXPORT_SYMBOL(qtree_delete_dquot);
+@@ -613,18 +658,20 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+ /* Find entry for given id in the tree */
+ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+-                              struct dquot *dquot, uint blk, int depth)
++                              struct dquot *dquot, uint *blks, int depth)
+ {
+       char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+       loff_t ret = 0;
+       __le32 *ref = (__le32 *)buf;
++      uint blk;
++      int i;
+       if (!buf)
+               return -ENOMEM;
+-      ret = read_blk(info, blk, buf);
++      ret = read_blk(info, blks[depth], buf);
+       if (ret < 0) {
+               quota_error(dquot->dq_sb, "Can't read quota tree block %u",
+-                          blk);
++                          blks[depth]);
+               goto out_buf;
+       }
+       ret = 0;
+@@ -636,8 +683,19 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+       if (ret)
+               goto out_buf;
++      /* Check for cycles in the tree */
++      for (i = 0; i <= depth; i++)
++              if (blk == blks[i]) {
++                      quota_error(dquot->dq_sb,
++                              "Cycle in quota tree detected: block %u index %u",
++                              blks[depth],
++                              get_index(info, dquot->dq_id, depth));
++                      ret = -EIO;
++                      goto out_buf;
++              }
++      blks[depth + 1] = blk;
+       if (depth < info->dqi_qtree_depth - 1)
+-              ret = find_tree_dqentry(info, dquot, blk, depth+1);
++              ret = find_tree_dqentry(info, dquot, blks, depth + 1);
+       else
+               ret = find_block_dqentry(info, dquot, blk);
+ out_buf:
+@@ -649,7 +707,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+                                 struct dquot *dquot)
+ {
+-      return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
++      uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
++
++      if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++              quota_error(dquot->dq_sb, "Quota tree depth too big!");
++              return -EIO;
++      }
++      return find_tree_dqentry(info, dquot, blks, 0);
+ }
+ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
+index b1467f3921c28..6921d40645a7e 100644
+--- a/fs/quota/quota_v2.c
++++ b/fs/quota/quota_v2.c
+@@ -166,14 +166,17 @@ static int v2_read_file_info(struct super_block *sb, int type)
+                   i_size_read(sb_dqopt(sb)->files[type]));
+               goto out_free;
+       }
+-      if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) {
+-              quota_error(sb, "Free block number too big (%u >= %u).",
+-                          qinfo->dqi_free_blk, qinfo->dqi_blocks);
++      if (qinfo->dqi_free_blk && (qinfo->dqi_free_blk <= QT_TREEOFF ||
++          qinfo->dqi_free_blk >= qinfo->dqi_blocks)) {
++              quota_error(sb, "Free block number %u out of range (%u, %u).",
++                          qinfo->dqi_free_blk, QT_TREEOFF, qinfo->dqi_blocks);
+               goto out_free;
+       }
+-      if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) {
+-              quota_error(sb, "Block with free entry too big (%u >= %u).",
+-                          qinfo->dqi_free_entry, qinfo->dqi_blocks);
++      if (qinfo->dqi_free_entry && (qinfo->dqi_free_entry <= QT_TREEOFF ||
++          qinfo->dqi_free_entry >= qinfo->dqi_blocks)) {
++              quota_error(sb, "Block with free entry %u out of range (%u, %u).",
++                          qinfo->dqi_free_entry, QT_TREEOFF,
++                          qinfo->dqi_blocks);
+               goto out_free;
+       }
+       ret = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/reiserfs-fix-uninit-value-in-comp_keys.patch b/queue-6.1/reiserfs-fix-uninit-value-in-comp_keys.patch
new file mode 100644 (file)
index 0000000..20baee8
--- /dev/null
@@ -0,0 +1,37 @@
+From c4a24c523ea0300aebec77aba5458b56dc531176 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Dec 2023 15:16:09 +0800
+Subject: reiserfs: fix uninit-value in comp_keys
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit dd8f87f21dc3da2eaf46e7401173f935b90b13a8 ]
+
+The cpu_key was not initialized in reiserfs_delete_solid_item(), which triggered
+this issue.
+
+Reported-and-tested-by:  <syzbot+b3b14fb9f8a14c5d0267@syzkaller.appspotmail.com>
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Link: https://lore.kernel.org/r/tencent_9EA7E746DE92DBC66049A62EDF6ED64CA706@qq.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/reiserfs/stree.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
+index 84c12a1947b22..6ecf772919688 100644
+--- a/fs/reiserfs/stree.c
++++ b/fs/reiserfs/stree.c
+@@ -1409,7 +1409,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
+       INITIALIZE_PATH(path);
+       int item_len = 0;
+       int tb_init = 0;
+-      struct cpu_key cpu_key;
++      struct cpu_key cpu_key = {};
+       int retval;
+       int quota_cut_bytes = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch b/queue-6.1/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch
new file mode 100644 (file)
index 0000000..b93da7c
--- /dev/null
@@ -0,0 +1,60 @@
+From 29da12fda2fa79cbf25d8540f04e9cd51336e459 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Aug 2024 13:25:48 +0200
+Subject: s390/uv: Panic for set and remove shared access UVC errors
+
+From: Claudio Imbrenda <imbrenda@linux.ibm.com>
+
+[ Upstream commit cff59d8631e1409ffdd22d9d717e15810181b32c ]
+
+The return value uv_set_shared() and uv_remove_shared() (which are
+wrappers around the share() function) is not always checked. The system
+integrity of a protected guest depends on the Share and Unshare UVCs
+being successful. This means that any caller that fails to check the
+return value will compromise the security of the protected guest.
+
+No code path that would lead to such violation of the security
+guarantees is currently exercised, since all the areas that are shared
+never get unshared during the lifetime of the system. This might
+change and become an issue in the future.
+
+The Share and Unshare UVCs can only fail in case of hypervisor
+misbehaviour (either a bug or malicious behaviour). In such cases there
+is no reasonable way forward, and the system needs to panic.
+
+This patch replaces the return at the end of the share() function with
+a panic, to guarantee system integrity.
+
+Fixes: 5abb9351dfd9 ("s390/uv: introduce guest side ultravisor code")
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240801112548.85303-1-imbrenda@linux.ibm.com
+Message-ID: <20240801112548.85303-1-imbrenda@linux.ibm.com>
+[frankja@linux.ibm.com: Fixed up patch subject]
+Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/uv.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
+index be3ef9dd69726..6abcb46a8dfe2 100644
+--- a/arch/s390/include/asm/uv.h
++++ b/arch/s390/include/asm/uv.h
+@@ -387,7 +387,10 @@ static inline int share(unsigned long addr, u16 cmd)
+       if (!uv_call(0, (u64)&uvcb))
+               return 0;
+-      return -EINVAL;
++      pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n",
++             uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare",
++             uvcb.header.rc, uvcb.header.rrc);
++      panic("System security cannot be guaranteed unless the system panics now.\n");
+ }
+ /*
+-- 
+2.43.0
+
index 1db79e002e06a7cb161849b99a93b242edf2a2c1..c7f52f1c458f2d7157a62e172835febae6d4c30d 100644 (file)
@@ -28,3 +28,84 @@ btrfs-tree-checker-add-dev-extent-item-checks.patch
 drm-amdgpu-actually-check-flags-for-all-context-ops.patch
 memcg_write_event_control-fix-a-user-triggerable-oops.patch
 drm-amdgpu-jpeg2-properly-set-atomics-vmid-field.patch
+s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch
+bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch
+nilfs2-prevent-warning-in-nilfs_dat_commit_end.patch
+ext4-jbd2-add-an-optimized-bmap-for-the-journal-inod.patch
+9p-fs-fix-wild-memory-access-write-in-v9fs_get_acl.patch
+nilfs2-initialize-struct-nilfs_binfo_dat-bi_pad-fiel.patch
+mm-khugepaged-fix-kernel-bug-in-hpage_collapse_scan_.patch
+bpf-split-off-basic-bpf-verifier-log-into-separate-f.patch
+bpf-drop-unnecessary-user-triggerable-warn_once-in-v.patch
+posix-timers-ensure-timer-id-search-loop-limit-is-va.patch
+pid-replace-struct-pid-1-element-array-with-flex-arr.patch
+gfs2-rename-remaining-transaction-glock-references.patch
+gfs2-rename-the-freeze-thaw-_super-callbacks.patch
+gfs2-rename-gfs2_freeze_lock-_shared.patch
+gfs2-rename-sdf_-fs_frozen-freeze_initiator.patch
+gfs2-rework-freeze-thaw-logic.patch
+gfs2-stop-using-gfs2_make_fs_ro-for-withdraw.patch
+bluetooth-fix-hci_link_tx_to-rcu-lock-usage.patch
+wifi-mac80211-take-wiphy-lock-for-mac-addr-change.patch
+wifi-mac80211-fix-change_address-deadlock-during-unr.patch
+net-sched-print-msecs-when-transmit-queue-time-out.patch
+net-don-t-dump-stack-on-queue-timeout.patch
+jfs-fix-shift-out-of-bounds-in-dbjoin.patch
+squashfs-squashfs_read_data-need-to-check-if-the-len.patch
+squashfs-fix-variable-overflow-triggered-by-sysbot.patch
+reiserfs-fix-uninit-value-in-comp_keys.patch
+erofs-avoid-debugging-output-for-de-compressed-data.patch
+input-bcm5974-check-endpoint-type-before-starting-tr.patch
+quota-detect-loops-in-quota-tree.patch
+net-rds-fix-possible-deadlock-in-rds_message_put.patch
+net-sctp-fix-skb-leak-in-sctp_inq_free.patch
+pppoe-fix-memory-leak-in-pppoe_sendmsg.patch
+wifi-mac80211-fix-and-simplify-unencrypted-drop-chec.patch
+wifi-cfg80211-move-a-msdu-check-in-ieee80211_data_to.patch
+wifi-cfg80211-factor-out-bridge-tunnel-rfc1042-heade.patch
+wifi-mac80211-remove-mesh-forwarding-congestion-chec.patch
+wifi-mac80211-fix-receiving-a-msdu-frames-on-mesh-in.patch
+wifi-mac80211-add-a-workaround-for-receiving-non-sta.patch
+wifi-cfg80211-check-a-msdu-format-more-carefully.patch
+docs-bpf-document-bpf_map_type_lpm_trie-map.patch
+bpf-replace-bpf_lpm_trie_key-0-length-array-with-fle.patch
+bpf-avoid-kfree_rcu-under-lock-in-bpf_lpm_trie.patch
+bluetooth-rfcomm-fix-not-validating-setsockopt-user-.patch
+ext4-check-the-return-value-of-ext4_xattr_inode_dec_.patch
+ext4-fold-quota-accounting-into-ext4_xattr_inode_loo.patch
+ext4-do-not-create-ea-inode-under-buffer-lock.patch
+udf-fix-bogus-checksum-computation-in-udf_rename.patch
+bpf-net-use-dev_stat_inc.patch
+fou-remove-warn-in-gue_gro_receive-on-unsupported-pr.patch
+jfs-fix-null-ptr-deref-in-dtinsertentry.patch
+jfs-fix-shift-out-of-bounds-in-dbdiscardag.patch
+fs-ntfs3-do-copy_to_user-out-of-run_lock.patch
+alsa-usb-fix-ubsan-warning-in-parse_audio_unit.patch
+igc-correct-the-launchtime-offset.patch
+igc-fix-packet-still-tx-after-gate-close-by-reducing.patch
+net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch
+net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch
+atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch
+net-axienet-fix-register-defines-comment-description.patch
+net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch
+net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch
+net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch
+mlxbf_gige-remove-two-unused-function-declarations.patch
+mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch
+mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch
+netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch
+netfilter-flowtable-initialise-extack-before-use.patch
+netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch
+netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch
+netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch
+netfilter-nf_tables-unconditionally-allocate-nft_obj.patch
+netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch
+netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch
+netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch
+netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch
+netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch
+netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch
+net-hns3-fix-wrong-use-of-semaphore-up.patch
+net-hns3-use-the-user-s-cfg-after-reset.patch
+net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch
+alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch
diff --git a/queue-6.1/squashfs-fix-variable-overflow-triggered-by-sysbot.patch b/queue-6.1/squashfs-fix-variable-overflow-triggered-by-sysbot.patch
new file mode 100644 (file)
index 0000000..c53bc21
--- /dev/null
@@ -0,0 +1,62 @@
+From a2d879187a9216d64025b2e8c6b0637a6061cab6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 16:09:01 +0000
+Subject: Squashfs: fix variable overflow triggered by sysbot
+
+From: Phillip Lougher <phillip@squashfs.org.uk>
+
+[ Upstream commit 12427de9439d68b8e96ba6f50b601ef15f437612 ]
+
+Sysbot reports a slab out of bounds write in squashfs_readahead().
+
+This is ultimately caused by a file reporting an (infeasibly) large file
+size (1407374883553280 bytes) with the minimum block size of 4K.
+
+This causes variable overflow.
+
+Link: https://lkml.kernel.org/r/20231113160901.6444-1-phillip@squashfs.org.uk
+Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
+Reported-by: syzbot+604424eb051c2f696163@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/000000000000b1fda20609ede0d1@google.com/
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/squashfs/file.c        | 3 ++-
+ fs/squashfs/file_direct.c | 6 +++---
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
+index 8ba8c4c507707..e8df6430444b0 100644
+--- a/fs/squashfs/file.c
++++ b/fs/squashfs/file.c
+@@ -544,7 +544,8 @@ static void squashfs_readahead(struct readahead_control *ractl)
+       struct squashfs_page_actor *actor;
+       unsigned int nr_pages = 0;
+       struct page **pages;
+-      int i, file_end = i_size_read(inode) >> msblk->block_log;
++      int i;
++      loff_t file_end = i_size_read(inode) >> msblk->block_log;
+       unsigned int max_pages = 1UL << shift;
+       readahead_expand(ractl, start, (len | mask) + 1);
+diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
+index f1ccad519e28c..763a3f7a75f6d 100644
+--- a/fs/squashfs/file_direct.c
++++ b/fs/squashfs/file_direct.c
+@@ -26,10 +26,10 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
+       struct inode *inode = target_page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+-      int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
++      loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+       int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+-      int start_index = target_page->index & ~mask;
+-      int end_index = start_index | mask;
++      loff_t start_index = target_page->index & ~mask;
++      loff_t end_index = start_index | mask;
+       int i, n, pages, bytes, res = -ENOMEM;
+       struct page **page;
+       struct squashfs_page_actor *actor;
+-- 
+2.43.0
+
diff --git a/queue-6.1/squashfs-squashfs_read_data-need-to-check-if-the-len.patch b/queue-6.1/squashfs-squashfs_read_data-need-to-check-if-the-len.patch
new file mode 100644 (file)
index 0000000..18f8f91
--- /dev/null
@@ -0,0 +1,39 @@
+From 416573cc98a8097b7c93c8b8dfaa3096be9eea36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Nov 2023 11:13:52 +0800
+Subject: squashfs: squashfs_read_data need to check if the length is 0
+
+From: Lizhi Xu <lizhi.xu@windriver.com>
+
+[ Upstream commit eb66b8abae98f869c224f7c852b685ae02144564 ]
+
+When the length passed in is 0, the pagemap_scan_test_walk() caller should
+bail.  This error causes at least a WARN_ON().
+
+Link: https://lkml.kernel.org/r/20231116031352.40853-1-lizhi.xu@windriver.com
+Reported-by: syzbot+32d3767580a1ea339a81@syzkaller.appspotmail.com
+Closes: https://lkml.kernel.org/r/0000000000000526f2060a30a085@google.com
+Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
+Reviewed-by: Phillip Lougher <phillip@squashfs.org.uk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/squashfs/block.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
+index 833aca92301f0..45ea5d62cef42 100644
+--- a/fs/squashfs/block.c
++++ b/fs/squashfs/block.c
+@@ -198,7 +198,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
+               TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2,
+                     compressed ? "" : "un", length);
+       }
+-      if (length < 0 || length > output->length ||
++      if (length <= 0 || length > output->length ||
+                       (index + length) > msblk->bytes_used) {
+               res = -EIO;
+               goto out;
+-- 
+2.43.0
+
diff --git a/queue-6.1/udf-fix-bogus-checksum-computation-in-udf_rename.patch b/queue-6.1/udf-fix-bogus-checksum-computation-in-udf_rename.patch
new file mode 100644 (file)
index 0000000..cad2d05
--- /dev/null
@@ -0,0 +1,44 @@
+From 7f25164e064fbc09897135caccebcfe545a87cc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jun 2024 17:41:51 +0200
+Subject: udf: Fix bogus checksum computation in udf_rename()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 27ab33854873e6fb958cb074681a0107cc2ecc4c ]
+
+Syzbot reports uninitialized memory access in udf_rename() when updating
+checksum of '..' directory entry of a moved directory. This is indeed
+true as we pass on-stack diriter.fi to the udf_update_tag() and because
+that has only struct fileIdentDesc included in it and not the impUse or
+name fields, the checksumming function is going to checksum random stack
+contents beyond the end of the structure. This is actually harmless
+because the following udf_fiiter_write_fi() will recompute the checksum
+from on-disk buffers where everything is properly included. So all that
+is needed is just removing the bogus calculation.
+
+Fixes: e9109a92d2a9 ("udf: Convert udf_rename() to new directory iteration code")
+Link: https://lore.kernel.org/all/000000000000cf405f060d8f75a9@google.com/T/
+Link: https://patch.msgid.link/20240617154201.29512-1-jack@suse.cz
+Reported-by: syzbot+d31185aa54170f7fc1f5@syzkaller.appspotmail.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/namei.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/udf/namei.c b/fs/udf/namei.c
+index 7c95c549dd64e..ded71044988ab 100644
+--- a/fs/udf/namei.c
++++ b/fs/udf/namei.c
+@@ -1183,7 +1183,6 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+       if (dir_fi) {
+               dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
+-              udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi));
+               if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+                       mark_inode_dirty(old_inode);
+               else
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-cfg80211-check-a-msdu-format-more-carefully.patch b/queue-6.1/wifi-cfg80211-check-a-msdu-format-more-carefully.patch
new file mode 100644 (file)
index 0000000..7e0296c
--- /dev/null
@@ -0,0 +1,91 @@
+From 0b69d81c09fdc9f2cd9b3f472dd299c5425d012d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Feb 2024 20:34:06 +0100
+Subject: wifi: cfg80211: check A-MSDU format more carefully
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 9ad7974856926129f190ffbe3beea78460b3b7cc ]
+
+If it looks like there's another subframe in the A-MSDU
+but the header isn't fully there, we can end up reading
+data out of bounds, only to discard later. Make this a
+bit more careful and check if the subframe header can
+even be present.
+
+Reported-by: syzbot+d050d437fe47d479d210@syzkaller.appspotmail.com
+Link: https://msgid.link/20240226203405.a731e2c95e38.I82ce7d8c0cc8970ce29d0a39fdc07f1ffc425be4@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/util.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 4cf17c3c18392..d1a4a9fd2bcba 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -778,15 +778,19 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr)
+ {
+-      int offset = 0, remaining, subframe_len, padding;
++      int offset = 0, subframe_len, padding;
+       for (offset = 0; offset < skb->len; offset += subframe_len + padding) {
++              int remaining = skb->len - offset;
+               struct {
+                   __be16 len;
+                   u8 mesh_flags;
+               } hdr;
+               u16 len;
++              if (sizeof(hdr) > remaining)
++                      return false;
++
+               if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0)
+                       return false;
+@@ -798,7 +802,6 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr)
+               subframe_len = sizeof(struct ethhdr) + len;
+               padding = (4 - subframe_len) & 0x3;
+-              remaining = skb->len - offset;
+               if (subframe_len > remaining)
+                       return false;
+@@ -816,7 +819,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+ {
+       unsigned int hlen = ALIGN(extra_headroom, 4);
+       struct sk_buff *frame = NULL;
+-      int offset = 0, remaining;
++      int offset = 0;
+       struct {
+               struct ethhdr eth;
+               uint8_t flags;
+@@ -830,10 +833,14 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+               copy_len = sizeof(hdr);
+       while (!last) {
++              int remaining = skb->len - offset;
+               unsigned int subframe_len;
+               int len, mesh_len = 0;
+               u8 padding;
++              if (copy_len > remaining)
++                      goto purge;
++
+               skb_copy_bits(skb, offset, &hdr, copy_len);
+               if (iftype == NL80211_IFTYPE_MESH_POINT)
+                       mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags);
+@@ -846,7 +853,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+               padding = (4 - subframe_len) & 0x3;
+               /* the last MSDU has no padding */
+-              remaining = skb->len - offset;
+               if (subframe_len > remaining)
+                       goto purge;
+               /* mitigate A-MSDU aggregation injection attacks */
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-cfg80211-factor-out-bridge-tunnel-rfc1042-heade.patch b/queue-6.1/wifi-cfg80211-factor-out-bridge-tunnel-rfc1042-heade.patch
new file mode 100644 (file)
index 0000000..6d4d091
--- /dev/null
@@ -0,0 +1,91 @@
+From 345213defb89b7c5d751e7dfc7ab7f6660dc56e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:08:52 +0100
+Subject: wifi: cfg80211: factor out bridge tunnel / RFC1042 header check
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 9f718554e7eacea62d3f972cae24d969755bf3b6 ]
+
+The same check is done in multiple places, unify it.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20230213100855.34315-2-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/util.c | 34 ++++++++++++++++++----------------
+ 1 file changed, 18 insertions(+), 16 deletions(-)
+
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 4680e65460c85..8597694a0cfdb 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -542,6 +542,21 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
+ }
+ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
++static bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto)
++{
++      const __be16 *hdr_proto = hdr + ETH_ALEN;
++
++      if (!(ether_addr_equal(hdr, rfc1042_header) &&
++            *hdr_proto != htons(ETH_P_AARP) &&
++            *hdr_proto != htons(ETH_P_IPX)) &&
++          !ether_addr_equal(hdr, bridge_tunnel_header))
++              return false;
++
++      *proto = *hdr_proto;
++
++      return true;
++}
++
+ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                                 const u8 *addr, enum nl80211_iftype iftype,
+                                 u8 data_offset, bool is_amsdu)
+@@ -633,14 +648,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+       if (likely(!is_amsdu &&
+                  skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
+-                 ((ether_addr_equal(payload.hdr, rfc1042_header) &&
+-                   payload.proto != htons(ETH_P_AARP) &&
+-                   payload.proto != htons(ETH_P_IPX)) ||
+-                  ether_addr_equal(payload.hdr, bridge_tunnel_header)))) {
+-              /* remove RFC1042 or Bridge-Tunnel encapsulation and
+-               * replace EtherType */
++                 ieee80211_get_8023_tunnel_proto(&payload, &tmp.h_proto))) {
++              /* remove RFC1042 or Bridge-Tunnel encapsulation */
+               hdrlen += ETH_ALEN + 2;
+-              tmp.h_proto = payload.proto;
+               skb_postpull_rcsum(skb, &payload, ETH_ALEN + 2);
+       } else {
+               tmp.h_proto = htons(skb->len - hdrlen);
+@@ -756,8 +766,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+ {
+       unsigned int hlen = ALIGN(extra_headroom, 4);
+       struct sk_buff *frame = NULL;
+-      u16 ethertype;
+-      u8 *payload;
+       int offset = 0, remaining;
+       struct ethhdr eth;
+       bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
+@@ -811,14 +819,8 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+               frame->dev = skb->dev;
+               frame->priority = skb->priority;
+-              payload = frame->data;
+-              ethertype = (payload[6] << 8) | payload[7];
+-              if (likely((ether_addr_equal(payload, rfc1042_header) &&
+-                          ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+-                         ether_addr_equal(payload, bridge_tunnel_header))) {
+-                      eth.h_proto = htons(ethertype);
++              if (likely(ieee80211_get_8023_tunnel_proto(frame->data, &eth.h_proto)))
+                       skb_pull(frame, ETH_ALEN + 2);
+-              }
+               memcpy(skb_push(frame, sizeof(eth)), &eth, sizeof(eth));
+               __skb_queue_tail(list, frame);
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-cfg80211-move-a-msdu-check-in-ieee80211_data_to.patch b/queue-6.1/wifi-cfg80211-move-a-msdu-check-in-ieee80211_data_to.patch
new file mode 100644 (file)
index 0000000..e608616
--- /dev/null
@@ -0,0 +1,40 @@
+From d52a9e9a218f41dddb33481b09e52a45fd1fc21d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:08:51 +0100
+Subject: wifi: cfg80211: move A-MSDU check in ieee80211_data_to_8023_exthdr
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 0f690e6b4dcd7243e2805a76981b252c2d4bdce6 ]
+
+When parsing the outer A-MSDU header, don't check for inner bridge tunnel
+or RFC1042 headers. This is handled by ieee80211_amsdu_to_8023s already.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20230213100855.34315-1-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/util.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 1665320d22146..4680e65460c85 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -631,8 +631,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+               break;
+       }
+-      if (likely(skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
+-                 ((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
++      if (likely(!is_amsdu &&
++                 skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
++                 ((ether_addr_equal(payload.hdr, rfc1042_header) &&
+                    payload.proto != htons(ETH_P_AARP) &&
+                    payload.proto != htons(ETH_P_IPX)) ||
+                   ether_addr_equal(payload.hdr, bridge_tunnel_header)))) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-add-a-workaround-for-receiving-non-sta.patch b/queue-6.1/wifi-mac80211-add-a-workaround-for-receiving-non-sta.patch
new file mode 100644 (file)
index 0000000..0c26fb5
--- /dev/null
@@ -0,0 +1,173 @@
+From 198bd502580d0c3d7433b11d6b2ae7a2032689e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:08:55 +0100
+Subject: wifi: mac80211: add a workaround for receiving non-standard mesh
+ A-MSDU
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 6e4c0d0460bd32ca9244dff3ba2d2da27235de11 ]
+
+At least ath10k and ath11k supported hardware (maybe more) does not implement
+mesh A-MSDU aggregation in a standard compliant way.
+802.11-2020 9.3.2.2.2 declares that the Mesh Control field is part of the
+A-MSDU header (and little-endian).
+As such, its length must not be included in the subframe length field.
+Hardware affected by this bug treats the mesh control field as part of the
+MSDU data and sets the length accordingly.
+In order to avoid packet loss, keep track of which stations are affected
+by this and take it into account when converting A-MSDU to 802.3 + mesh control
+packets.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20230213100855.34315-5-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/cfg80211.h  | 13 +++++++++++++
+ net/mac80211/rx.c       | 15 ++++++++++++---
+ net/mac80211/sta_info.c |  3 +++
+ net/mac80211/sta_info.h |  1 +
+ net/wireless/util.c     | 32 ++++++++++++++++++++++++++++++++
+ 5 files changed, 61 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
+index c2f7d01b3a16e..2a0fc4a64af1e 100644
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -6301,6 +6301,19 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
+ }
++/**
++ * ieee80211_is_valid_amsdu - check if subframe lengths of an A-MSDU are valid
++ *
++ * This is used to detect non-standard A-MSDU frames, e.g. the ones generated
++ * by ath10k and ath11k, where the subframe length includes the length of the
++ * mesh control field.
++ *
++ * @skb: The input A-MSDU frame without any headers.
++ * @mesh_hdr: use standard compliant mesh A-MSDU subframe header
++ * Returns: true if subframe header lengths are valid for the @mesh_hdr mode
++ */
++bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr);
++
+ /**
+  * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
+  *
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index 8d2379944f3de..7cf1444c242d0 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2904,7 +2904,6 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+       static ieee80211_rx_result res;
+       struct ethhdr ethhdr;
+       const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
+-      bool mesh = false;
+       if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+               check_da = NULL;
+@@ -2922,7 +2921,6 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+               case NL80211_IFTYPE_MESH_POINT:
+                       check_sa = NULL;
+                       check_da = NULL;
+-                      mesh = true;
+                       break;
+               default:
+                       break;
+@@ -2937,10 +2935,21 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+                                         data_offset, true))
+               return RX_DROP_UNUSABLE;
++      if (rx->sta && rx->sta->amsdu_mesh_control < 0) {
++              bool valid_std = ieee80211_is_valid_amsdu(skb, true);
++              bool valid_nonstd = ieee80211_is_valid_amsdu(skb, false);
++
++              if (valid_std && !valid_nonstd)
++                      rx->sta->amsdu_mesh_control = 1;
++              else if (valid_nonstd && !valid_std)
++                      rx->sta->amsdu_mesh_control = 0;
++      }
++
+       ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
+                                rx->sdata->vif.type,
+                                rx->local->hw.extra_tx_headroom,
+-                               check_da, check_sa, mesh);
++                               check_da, check_sa,
++                               rx->sta->amsdu_mesh_control);
+       while (!skb_queue_empty(&frame_list)) {
+               rx->skb = __skb_dequeue(&frame_list);
+diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
+index f388b39531748..91768abf2d75b 100644
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -594,6 +594,9 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata,
+       sta->sta_state = IEEE80211_STA_NONE;
++      if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
++              sta->amsdu_mesh_control = -1;
++
+       /* Mark TID as unreserved */
+       sta->reserved_tid = IEEE80211_TID_UNRESERVED;
+diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
+index 4809756a43dd1..dbf441a0ac6b6 100644
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -706,6 +706,7 @@ struct sta_info {
+       struct codel_params cparams;
+       u8 reserved_tid;
++      s8 amsdu_mesh_control;
+       struct cfg80211_chan_def tdls_chandef;
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 61a76f31fac89..4cf17c3c18392 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -776,6 +776,38 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+       return frame;
+ }
++bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr)
++{
++      int offset = 0, remaining, subframe_len, padding;
++
++      for (offset = 0; offset < skb->len; offset += subframe_len + padding) {
++              struct {
++                  __be16 len;
++                  u8 mesh_flags;
++              } hdr;
++              u16 len;
++
++              if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0)
++                      return false;
++
++              if (mesh_hdr)
++                      len = le16_to_cpu(*(__le16 *)&hdr.len) +
++                            __ieee80211_get_mesh_hdrlen(hdr.mesh_flags);
++              else
++                      len = ntohs(hdr.len);
++
++              subframe_len = sizeof(struct ethhdr) + len;
++              padding = (4 - subframe_len) & 0x3;
++              remaining = skb->len - offset;
++
++              if (subframe_len > remaining)
++                      return false;
++      }
++
++      return true;
++}
++EXPORT_SYMBOL(ieee80211_is_valid_amsdu);
++
+ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+                             const u8 *addr, enum nl80211_iftype iftype,
+                             const unsigned int extra_headroom,
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-fix-and-simplify-unencrypted-drop-chec.patch b/queue-6.1/wifi-mac80211-fix-and-simplify-unencrypted-drop-chec.patch
new file mode 100644 (file)
index 0000000..261870a
--- /dev/null
@@ -0,0 +1,100 @@
+From 073cfdd27c6ef0f154402b9efb85792f02b5a185 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Dec 2022 14:57:30 +0100
+Subject: wifi: mac80211: fix and simplify unencrypted drop check for mesh
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 94b9b9de05b62ac54d8766caa9865fb4d82cc47e ]
+
+ieee80211_drop_unencrypted is called from ieee80211_rx_h_mesh_fwding and
+ieee80211_frame_allowed.
+
+Since ieee80211_rx_h_mesh_fwding can forward packets for other mesh nodes
+and is called earlier, it needs to check the decryptions status and if the
+packet is using the control protocol on its own, instead of deferring to
+the later call from ieee80211_frame_allowed.
+
+Because of that, ieee80211_drop_unencrypted has a mesh specific check
+that skips over the mesh header in order to check the payload protocol.
+This code is invalid when called from ieee80211_frame_allowed, since that
+happens after the 802.11->802.3 conversion.
+
+Fix this by moving the mesh specific check directly into
+ieee80211_rx_h_mesh_fwding.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20221201135730.19723-1-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/rx.c | 38 ++++++++++----------------------------
+ 1 file changed, 10 insertions(+), 28 deletions(-)
+
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index c4c80037df91d..b68a9200403e7 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2408,7 +2408,6 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
+ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
+ {
+-      struct ieee80211_hdr *hdr = (void *)rx->skb->data;
+       struct sk_buff *skb = rx->skb;
+       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+@@ -2419,31 +2418,6 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
+       if (status->flag & RX_FLAG_DECRYPTED)
+               return 0;
+-      /* check mesh EAPOL frames first */
+-      if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) &&
+-                   ieee80211_is_data(fc))) {
+-              struct ieee80211s_hdr *mesh_hdr;
+-              u16 hdr_len = ieee80211_hdrlen(fc);
+-              u16 ethertype_offset;
+-              __be16 ethertype;
+-
+-              if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr))
+-                      goto drop_check;
+-
+-              /* make sure fixed part of mesh header is there, also checks skb len */
+-              if (!pskb_may_pull(rx->skb, hdr_len + 6))
+-                      goto drop_check;
+-
+-              mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len);
+-              ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) +
+-                                 sizeof(rfc1042_header);
+-
+-              if (skb_copy_bits(rx->skb, ethertype_offset, &ethertype, 2) == 0 &&
+-                  ethertype == rx->sdata->control_port_protocol)
+-                      return 0;
+-      }
+-
+-drop_check:
+       /* Drop unencrypted frames if key is set. */
+       if (unlikely(!ieee80211_has_protected(fc) &&
+                    !ieee80211_is_any_nullfunc(fc) &&
+@@ -2897,8 +2871,16 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+       hdr = (struct ieee80211_hdr *) skb->data;
+       mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
+-      if (ieee80211_drop_unencrypted(rx, hdr->frame_control))
+-              return RX_DROP_MONITOR;
++      if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) {
++              int offset = hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr) +
++                           sizeof(rfc1042_header);
++              __be16 ethertype;
++
++              if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr) ||
++                  skb_copy_bits(rx->skb, offset, &ethertype, 2) != 0 ||
++                  ethertype != rx->sdata->control_port_protocol)
++                      return RX_DROP_MONITOR;
++      }
+       /* frame is in RMC, don't forward */
+       if (ieee80211_is_data(hdr->frame_control) &&
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-fix-change_address-deadlock-during-unr.patch b/queue-6.1/wifi-mac80211-fix-change_address-deadlock-during-unr.patch
new file mode 100644 (file)
index 0000000..d07ff40
--- /dev/null
@@ -0,0 +1,54 @@
+From 183ed81eba8438e70922971a17a12ea68254a19c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Oct 2023 12:34:47 +0200
+Subject: wifi: mac80211: fix change_address deadlock during unregister
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 74a7c93f45abba538914a65dd2ef2ea7cf7150e2 ]
+
+When using e.g. bonding, and doing a sequence such as
+
+ # iw wlan0 set type __ap
+ # ip link add name bond1 type bond
+ # ip link set wlan0 master bond1
+ # iw wlan0 interface del
+
+we deadlock, since the wlan0 interface removal will cause
+bonding to reset the MAC address of wlan0.
+
+The locking would be somewhat difficult to fix, but since
+this only happens during removal, we can simply ignore the
+MAC address change at this time.
+
+Reported-by: syzbot+25b3a0b24216651bc2af@syzkaller.appspotmail.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Link: https://lore.kernel.org/r/20231012123447.9f9d7fd1f237.Ic3a5ef4391b670941a69cec5592aefc79d9c2890@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/iface.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index 408ee5afc9ae7..6a9d81e9069c9 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -300,6 +300,14 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
+       struct ieee80211_local *local = sdata->local;
+       int ret;
++      /*
++       * This happens during unregistration if there's a bond device
++       * active (maybe other cases?) and we must get removed from it.
++       * But we really don't care anymore if it's not registered now.
++       */
++      if (!dev->ieee80211_ptr->registered)
++              return 0;
++
+       wiphy_lock(local->hw.wiphy);
+       ret = _ieee80211_change_mac(sdata, addr);
+       wiphy_unlock(local->hw.wiphy);
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-fix-receiving-a-msdu-frames-on-mesh-in.patch b/queue-6.1/wifi-mac80211-fix-receiving-a-msdu-frames-on-mesh-in.patch
new file mode 100644 (file)
index 0000000..8ebd883
--- /dev/null
@@ -0,0 +1,778 @@
+From b7c5924a7261066e7c86a354ce509a8b8a6f9d85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:08:54 +0100
+Subject: wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 986e43b19ae9176093da35e0a844e65c8bf9ede7 ]
+
+The current mac80211 mesh A-MSDU receive path fails to parse A-MSDU packets
+on mesh interfaces, because it assumes that the Mesh Control field is always
+directly after the 802.11 header.
+802.11-2020 9.3.2.2.2 Figure 9-70 shows that the Mesh Control field is
+actually part of the A-MSDU subframe header.
+This makes more sense, since it allows packets for multiple different
+destinations to be included in the same A-MSDU, as long as RA and TID are
+still the same.
+Another issue is the fact that the A-MSDU subframe length field was apparently
+accidentally defined as little-endian in the standard.
+
+In order to fix this, the mesh forwarding path needs happen at a different
+point in the receive path.
+
+ieee80211_data_to_8023_exthdr is changed to ignore the mesh control field
+and leave it in after the ethernet header. This also affects the source/dest
+MAC address fields, which now in the case of mesh point to the mesh SA/DA.
+
+ieee80211_amsdu_to_8023s is changed to deal with the endian difference and
+to add the Mesh Control length to the subframe length, since it's not covered
+by the MSDU length field.
+
+With these changes, the mac80211 will get the same packet structure for
+converted regular data packets and unpacked A-MSDU subframes.
+
+The mesh forwarding checks are now only performed after the A-MSDU decap.
+For locally received packets, the Mesh Control header is stripped away.
+For forwarded packets, a new 802.11 header gets added.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20230213100855.34315-4-nbd@nbd.name
+[fix fortify build error]
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../wireless/marvell/mwifiex/11n_rxreorder.c  |   2 +-
+ include/net/cfg80211.h                        |  27 +-
+ net/mac80211/rx.c                             | 350 ++++++++++--------
+ net/wireless/util.c                           | 120 +++---
+ 4 files changed, 297 insertions(+), 202 deletions(-)
+
+diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+index 54ab8b54369ba..4ab3a14567b65 100644
+--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
++++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+@@ -33,7 +33,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv,
+               skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length));
+               ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr,
+-                                       priv->wdev.iftype, 0, NULL, NULL);
++                                       priv->wdev.iftype, 0, NULL, NULL, false);
+               while (!skb_queue_empty(&list)) {
+                       struct rx_packet_hdr *rx_hdr;
+diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
+index 5bf5c1ab542ce..c2f7d01b3a16e 100644
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -6316,11 +6316,36 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+  * @extra_headroom: The hardware extra headroom for SKBs in the @list.
+  * @check_da: DA to check in the inner ethernet header, or NULL
+  * @check_sa: SA to check in the inner ethernet header, or NULL
++ * @mesh_control: A-MSDU subframe header includes the mesh control field
+  */
+ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+                             const u8 *addr, enum nl80211_iftype iftype,
+                             const unsigned int extra_headroom,
+-                            const u8 *check_da, const u8 *check_sa);
++                            const u8 *check_da, const u8 *check_sa,
++                            bool mesh_control);
++
++/**
++ * ieee80211_get_8023_tunnel_proto - get RFC1042 or bridge tunnel encap protocol
++ *
++ * Check for RFC1042 or bridge tunnel header and fetch the encapsulated
++ * protocol.
++ *
++ * @hdr: pointer to the MSDU payload
++ * @proto: destination pointer to store the protocol
++ * Return: true if encapsulation was found
++ */
++bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto);
++
++/**
++ * ieee80211_strip_8023_mesh_hdr - strip mesh header from converted 802.3 frames
++ *
++ * Strip the mesh header, which was left in by ieee80211_data_to_8023 as part
++ * of the MSDU data. Also move any source/destination addresses from the mesh
++ * header to the ethernet header (if present).
++ *
++ * @skb: The 802.3 frame with embedded mesh header
++ */
++int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb);
+ /**
+  * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index 1d50126aebbc8..8d2379944f3de 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2725,6 +2725,174 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
+       }
+ }
++static ieee80211_rx_result
++ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta,
++                     struct sk_buff *skb)
++{
++#ifdef CONFIG_MAC80211_MESH
++      struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
++      struct ieee80211_local *local = sdata->local;
++      uint16_t fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA;
++      struct ieee80211_hdr hdr = {
++              .frame_control = cpu_to_le16(fc)
++      };
++      struct ieee80211_hdr *fwd_hdr;
++      struct ieee80211s_hdr *mesh_hdr;
++      struct ieee80211_tx_info *info;
++      struct sk_buff *fwd_skb;
++      struct ethhdr *eth;
++      bool multicast;
++      int tailroom = 0;
++      int hdrlen, mesh_hdrlen;
++      u8 *qos;
++
++      if (!ieee80211_vif_is_mesh(&sdata->vif))
++              return RX_CONTINUE;
++
++      if (!pskb_may_pull(skb, sizeof(*eth) + 6))
++              return RX_DROP_MONITOR;
++
++      mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(*eth));
++      mesh_hdrlen = ieee80211_get_mesh_hdrlen(mesh_hdr);
++
++      if (!pskb_may_pull(skb, sizeof(*eth) + mesh_hdrlen))
++              return RX_DROP_MONITOR;
++
++      eth = (struct ethhdr *)skb->data;
++      multicast = is_multicast_ether_addr(eth->h_dest);
++
++      mesh_hdr = (struct ieee80211s_hdr *)(eth + 1);
++      if (!mesh_hdr->ttl)
++              return RX_DROP_MONITOR;
++
++      /* frame is in RMC, don't forward */
++      if (is_multicast_ether_addr(eth->h_dest) &&
++          mesh_rmc_check(sdata, eth->h_source, mesh_hdr))
++              return RX_DROP_MONITOR;
++
++      /* Frame has reached destination.  Don't forward */
++      if (ether_addr_equal(sdata->vif.addr, eth->h_dest))
++              goto rx_accept;
++
++      if (!ifmsh->mshcfg.dot11MeshForwarding) {
++              if (is_multicast_ether_addr(eth->h_dest))
++                      goto rx_accept;
++
++              return RX_DROP_MONITOR;
++      }
++
++      /* forward packet */
++      if (sdata->crypto_tx_tailroom_needed_cnt)
++              tailroom = IEEE80211_ENCRYPT_TAILROOM;
++
++      if (!--mesh_hdr->ttl) {
++              if (multicast)
++                      goto rx_accept;
++
++              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl);
++              return RX_DROP_MONITOR;
++      }
++
++      if (mesh_hdr->flags & MESH_FLAGS_AE) {
++              struct mesh_path *mppath;
++              char *proxied_addr;
++
++              if (multicast)
++                      proxied_addr = mesh_hdr->eaddr1;
++              else if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6)
++                      /* has_a4 already checked in ieee80211_rx_mesh_check */
++                      proxied_addr = mesh_hdr->eaddr2;
++              else
++                      return RX_DROP_MONITOR;
++
++              rcu_read_lock();
++              mppath = mpp_path_lookup(sdata, proxied_addr);
++              if (!mppath) {
++                      mpp_path_add(sdata, proxied_addr, eth->h_source);
++              } else {
++                      spin_lock_bh(&mppath->state_lock);
++                      if (!ether_addr_equal(mppath->mpp, eth->h_source))
++                              memcpy(mppath->mpp, eth->h_source, ETH_ALEN);
++                      mppath->exp_time = jiffies;
++                      spin_unlock_bh(&mppath->state_lock);
++              }
++              rcu_read_unlock();
++      }
++
++      skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]);
++
++      ieee80211_fill_mesh_addresses(&hdr, &hdr.frame_control,
++                                    eth->h_dest, eth->h_source);
++      hdrlen = ieee80211_hdrlen(hdr.frame_control);
++      if (multicast) {
++              int extra_head = sizeof(struct ieee80211_hdr) - sizeof(*eth);
++
++              fwd_skb = skb_copy_expand(skb, local->tx_headroom + extra_head +
++                                             IEEE80211_ENCRYPT_HEADROOM,
++                                        tailroom, GFP_ATOMIC);
++              if (!fwd_skb)
++                      goto rx_accept;
++      } else {
++              fwd_skb = skb;
++              skb = NULL;
++
++              if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr)))
++                      return RX_DROP_UNUSABLE;
++      }
++
++      fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr));
++      memcpy(fwd_hdr, &hdr, hdrlen - 2);
++      qos = ieee80211_get_qos_ctl(fwd_hdr);
++      qos[0] = qos[1] = 0;
++
++      skb_reset_mac_header(fwd_skb);
++      hdrlen += mesh_hdrlen;
++      if (ieee80211_get_8023_tunnel_proto(fwd_skb->data + hdrlen,
++                                          &fwd_skb->protocol))
++              hdrlen += ETH_ALEN;
++      else
++              fwd_skb->protocol = htons(fwd_skb->len - hdrlen);
++      skb_set_network_header(fwd_skb, hdrlen);
++
++      info = IEEE80211_SKB_CB(fwd_skb);
++      memset(info, 0, sizeof(*info));
++      info->control.flags |= IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
++      info->control.vif = &sdata->vif;
++      info->control.jiffies = jiffies;
++      if (multicast) {
++              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
++              memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
++              /* update power mode indication when forwarding */
++              ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
++      } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) {
++              /* mesh power mode flags updated in mesh_nexthop_lookup */
++              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
++      } else {
++              /* unable to resolve next hop */
++              if (sta)
++                      mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl,
++                                         hdr.addr3, 0,
++                                         WLAN_REASON_MESH_PATH_NOFORWARD,
++                                         sta->sta.addr);
++              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route);
++              kfree_skb(fwd_skb);
++              goto rx_accept;
++      }
++
++      IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
++      fwd_skb->dev = sdata->dev;
++      ieee80211_add_pending_skb(local, fwd_skb);
++
++rx_accept:
++      if (!skb)
++              return RX_QUEUED;
++
++      ieee80211_strip_8023_mesh_hdr(skb);
++#endif
++
++      return RX_CONTINUE;
++}
++
+ static ieee80211_rx_result debug_noinline
+ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+ {
+@@ -2733,8 +2901,10 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+       __le16 fc = hdr->frame_control;
+       struct sk_buff_head frame_list;
++      static ieee80211_rx_result res;
+       struct ethhdr ethhdr;
+       const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
++      bool mesh = false;
+       if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+               check_da = NULL;
+@@ -2751,6 +2921,8 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+                       break;
+               case NL80211_IFTYPE_MESH_POINT:
+                       check_sa = NULL;
++                      check_da = NULL;
++                      mesh = true;
+                       break;
+               default:
+                       break;
+@@ -2768,17 +2940,29 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
+       ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
+                                rx->sdata->vif.type,
+                                rx->local->hw.extra_tx_headroom,
+-                               check_da, check_sa);
++                               check_da, check_sa, mesh);
+       while (!skb_queue_empty(&frame_list)) {
+               rx->skb = __skb_dequeue(&frame_list);
+-              if (!ieee80211_frame_allowed(rx, fc)) {
+-                      dev_kfree_skb(rx->skb);
++              res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
++              switch (res) {
++              case RX_QUEUED:
+                       continue;
++              case RX_CONTINUE:
++                      break;
++              default:
++                      goto free;
+               }
++              if (!ieee80211_frame_allowed(rx, fc))
++                      goto free;
++
+               ieee80211_deliver_skb(rx);
++              continue;
++
++free:
++              dev_kfree_skb(rx->skb);
+       }
+       return RX_QUEUED;
+@@ -2811,6 +2995,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+                       if (!rx->sdata->u.mgd.use_4addr)
+                               return RX_DROP_UNUSABLE;
+                       break;
++              case NL80211_IFTYPE_MESH_POINT:
++                      break;
+               default:
+                       return RX_DROP_UNUSABLE;
+               }
+@@ -2839,155 +3025,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+       return __ieee80211_rx_h_amsdu(rx, 0);
+ }
+-#ifdef CONFIG_MAC80211_MESH
+-static ieee80211_rx_result
+-ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+-{
+-      struct ieee80211_hdr *fwd_hdr, *hdr;
+-      struct ieee80211_tx_info *info;
+-      struct ieee80211s_hdr *mesh_hdr;
+-      struct sk_buff *skb = rx->skb, *fwd_skb;
+-      struct ieee80211_local *local = rx->local;
+-      struct ieee80211_sub_if_data *sdata = rx->sdata;
+-      struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+-      u16 ac, q, hdrlen;
+-      int tailroom = 0;
+-
+-      hdr = (struct ieee80211_hdr *) skb->data;
+-      hdrlen = ieee80211_hdrlen(hdr->frame_control);
+-
+-      /* make sure fixed part of mesh header is there, also checks skb len */
+-      if (!pskb_may_pull(rx->skb, hdrlen + 6))
+-              return RX_DROP_MONITOR;
+-
+-      mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
+-
+-      /* make sure full mesh header is there, also checks skb len */
+-      if (!pskb_may_pull(rx->skb,
+-                         hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr)))
+-              return RX_DROP_MONITOR;
+-
+-      /* reload pointers */
+-      hdr = (struct ieee80211_hdr *) skb->data;
+-      mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
+-
+-      if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) {
+-              int offset = hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr) +
+-                           sizeof(rfc1042_header);
+-              __be16 ethertype;
+-
+-              if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr) ||
+-                  skb_copy_bits(rx->skb, offset, &ethertype, 2) != 0 ||
+-                  ethertype != rx->sdata->control_port_protocol)
+-                      return RX_DROP_MONITOR;
+-      }
+-
+-      /* frame is in RMC, don't forward */
+-      if (ieee80211_is_data(hdr->frame_control) &&
+-          is_multicast_ether_addr(hdr->addr1) &&
+-          mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
+-              return RX_DROP_MONITOR;
+-
+-      if (!ieee80211_is_data(hdr->frame_control))
+-              return RX_CONTINUE;
+-
+-      if (!mesh_hdr->ttl)
+-              return RX_DROP_MONITOR;
+-
+-      if (mesh_hdr->flags & MESH_FLAGS_AE) {
+-              struct mesh_path *mppath;
+-              char *proxied_addr;
+-              char *mpp_addr;
+-
+-              if (is_multicast_ether_addr(hdr->addr1)) {
+-                      mpp_addr = hdr->addr3;
+-                      proxied_addr = mesh_hdr->eaddr1;
+-              } else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+-                          MESH_FLAGS_AE_A5_A6) {
+-                      /* has_a4 already checked in ieee80211_rx_mesh_check */
+-                      mpp_addr = hdr->addr4;
+-                      proxied_addr = mesh_hdr->eaddr2;
+-              } else {
+-                      return RX_DROP_MONITOR;
+-              }
+-
+-              rcu_read_lock();
+-              mppath = mpp_path_lookup(sdata, proxied_addr);
+-              if (!mppath) {
+-                      mpp_path_add(sdata, proxied_addr, mpp_addr);
+-              } else {
+-                      spin_lock_bh(&mppath->state_lock);
+-                      if (!ether_addr_equal(mppath->mpp, mpp_addr))
+-                              memcpy(mppath->mpp, mpp_addr, ETH_ALEN);
+-                      mppath->exp_time = jiffies;
+-                      spin_unlock_bh(&mppath->state_lock);
+-              }
+-              rcu_read_unlock();
+-      }
+-
+-      /* Frame has reached destination.  Don't forward */
+-      if (!is_multicast_ether_addr(hdr->addr1) &&
+-          ether_addr_equal(sdata->vif.addr, hdr->addr3))
+-              return RX_CONTINUE;
+-
+-      ac = ieee802_1d_to_ac[skb->priority];
+-      skb_set_queue_mapping(skb, ac);
+-
+-      if (!--mesh_hdr->ttl) {
+-              if (!is_multicast_ether_addr(hdr->addr1))
+-                      IEEE80211_IFSTA_MESH_CTR_INC(ifmsh,
+-                                                   dropped_frames_ttl);
+-              goto out;
+-      }
+-
+-      if (!ifmsh->mshcfg.dot11MeshForwarding)
+-              goto out;
+-
+-      if (sdata->crypto_tx_tailroom_needed_cnt)
+-              tailroom = IEEE80211_ENCRYPT_TAILROOM;
+-
+-      fwd_skb = skb_copy_expand(skb, local->tx_headroom +
+-                                     IEEE80211_ENCRYPT_HEADROOM,
+-                                tailroom, GFP_ATOMIC);
+-      if (!fwd_skb)
+-              goto out;
+-
+-      fwd_skb->dev = sdata->dev;
+-      fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
+-      fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
+-      info = IEEE80211_SKB_CB(fwd_skb);
+-      memset(info, 0, sizeof(*info));
+-      info->control.flags |= IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
+-      info->control.vif = &rx->sdata->vif;
+-      info->control.jiffies = jiffies;
+-      if (is_multicast_ether_addr(fwd_hdr->addr1)) {
+-              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
+-              memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
+-              /* update power mode indication when forwarding */
+-              ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
+-      } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) {
+-              /* mesh power mode flags updated in mesh_nexthop_lookup */
+-              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
+-      } else {
+-              /* unable to resolve next hop */
+-              mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl,
+-                                 fwd_hdr->addr3, 0,
+-                                 WLAN_REASON_MESH_PATH_NOFORWARD,
+-                                 fwd_hdr->addr2);
+-              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route);
+-              kfree_skb(fwd_skb);
+-              return RX_DROP_MONITOR;
+-      }
+-
+-      IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
+-      ieee80211_add_pending_skb(local, fwd_skb);
+- out:
+-      if (is_multicast_ether_addr(hdr->addr1))
+-              return RX_CONTINUE;
+-      return RX_DROP_MONITOR;
+-}
+-#endif
+-
+ static ieee80211_rx_result debug_noinline
+ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
+ {
+@@ -2996,6 +3033,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
+       struct net_device *dev = sdata->dev;
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+       __le16 fc = hdr->frame_control;
++      static ieee80211_rx_result res;
+       bool port_control;
+       int err;
+@@ -3022,6 +3060,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
+       if (unlikely(err))
+               return RX_DROP_UNUSABLE;
++      res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
++      if (res != RX_CONTINUE)
++              return res;
++
+       if (!ieee80211_frame_allowed(rx, fc))
+               return RX_DROP_MONITOR;
+@@ -3996,10 +4038,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
+               CALL_RXH(ieee80211_rx_h_defragment);
+               CALL_RXH(ieee80211_rx_h_michael_mic_verify);
+               /* must be after MMIC verify so header is counted in MPDU mic */
+-#ifdef CONFIG_MAC80211_MESH
+-              if (ieee80211_vif_is_mesh(&rx->sdata->vif))
+-                      CALL_RXH(ieee80211_rx_h_mesh_fwding);
+-#endif
+               CALL_RXH(ieee80211_rx_h_amsdu);
+               CALL_RXH(ieee80211_rx_h_data);
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 8597694a0cfdb..61a76f31fac89 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -542,7 +542,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
+ }
+ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
+-static bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto)
++bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto)
+ {
+       const __be16 *hdr_proto = hdr + ETH_ALEN;
+@@ -556,6 +556,49 @@ static bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto)
+       return true;
+ }
++EXPORT_SYMBOL(ieee80211_get_8023_tunnel_proto);
++
++int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb)
++{
++      const void *mesh_addr;
++      struct {
++              struct ethhdr eth;
++              u8 flags;
++      } payload;
++      int hdrlen;
++      int ret;
++
++      ret = skb_copy_bits(skb, 0, &payload, sizeof(payload));
++      if (ret)
++              return ret;
++
++      hdrlen = sizeof(payload.eth) + __ieee80211_get_mesh_hdrlen(payload.flags);
++
++      if (likely(pskb_may_pull(skb, hdrlen + 8) &&
++                 ieee80211_get_8023_tunnel_proto(skb->data + hdrlen,
++                                                 &payload.eth.h_proto)))
++              hdrlen += ETH_ALEN + 2;
++      else if (!pskb_may_pull(skb, hdrlen))
++              return -EINVAL;
++
++      mesh_addr = skb->data + sizeof(payload.eth) + ETH_ALEN;
++      switch (payload.flags & MESH_FLAGS_AE) {
++      case MESH_FLAGS_AE_A4:
++              memcpy(&payload.eth.h_source, mesh_addr, ETH_ALEN);
++              break;
++      case MESH_FLAGS_AE_A5_A6:
++              memcpy(&payload.eth, mesh_addr, 2 * ETH_ALEN);
++              break;
++      default:
++              break;
++      }
++
++      pskb_pull(skb, hdrlen - sizeof(payload.eth));
++      memcpy(skb->data, &payload.eth, sizeof(payload.eth));
++
++      return 0;
++}
++EXPORT_SYMBOL(ieee80211_strip_8023_mesh_hdr);
+ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                                 const u8 *addr, enum nl80211_iftype iftype,
+@@ -568,7 +611,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+       } payload;
+       struct ethhdr tmp;
+       u16 hdrlen;
+-      u8 mesh_flags = 0;
+       if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
+               return -1;
+@@ -589,12 +631,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+       memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN);
+       memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN);
+-      if (iftype == NL80211_IFTYPE_MESH_POINT &&
+-          skb_copy_bits(skb, hdrlen, &mesh_flags, 1) < 0)
+-              return -1;
+-
+-      mesh_flags &= MESH_FLAGS_AE;
+-
+       switch (hdr->frame_control &
+               cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
+       case cpu_to_le16(IEEE80211_FCTL_TODS):
+@@ -608,17 +644,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                            iftype != NL80211_IFTYPE_AP_VLAN &&
+                            iftype != NL80211_IFTYPE_STATION))
+                       return -1;
+-              if (iftype == NL80211_IFTYPE_MESH_POINT) {
+-                      if (mesh_flags == MESH_FLAGS_AE_A4)
+-                              return -1;
+-                      if (mesh_flags == MESH_FLAGS_AE_A5_A6 &&
+-                          skb_copy_bits(skb, hdrlen +
+-                                        offsetof(struct ieee80211s_hdr, eaddr1),
+-                                        tmp.h_dest, 2 * ETH_ALEN) < 0)
+-                              return -1;
+-
+-                      hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
+-              }
+               break;
+       case cpu_to_le16(IEEE80211_FCTL_FROMDS):
+               if ((iftype != NL80211_IFTYPE_STATION &&
+@@ -627,16 +652,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                   (is_multicast_ether_addr(tmp.h_dest) &&
+                    ether_addr_equal(tmp.h_source, addr)))
+                       return -1;
+-              if (iftype == NL80211_IFTYPE_MESH_POINT) {
+-                      if (mesh_flags == MESH_FLAGS_AE_A5_A6)
+-                              return -1;
+-                      if (mesh_flags == MESH_FLAGS_AE_A4 &&
+-                          skb_copy_bits(skb, hdrlen +
+-                                        offsetof(struct ieee80211s_hdr, eaddr1),
+-                                        tmp.h_source, ETH_ALEN) < 0)
+-                              return -1;
+-                      hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
+-              }
+               break;
+       case cpu_to_le16(0):
+               if (iftype != NL80211_IFTYPE_ADHOC &&
+@@ -646,7 +661,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+               break;
+       }
+-      if (likely(!is_amsdu &&
++      if (likely(!is_amsdu && iftype != NL80211_IFTYPE_MESH_POINT &&
+                  skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
+                  ieee80211_get_8023_tunnel_proto(&payload, &tmp.h_proto))) {
+               /* remove RFC1042 or Bridge-Tunnel encapsulation */
+@@ -722,7 +737,8 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
+ static struct sk_buff *
+ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+-                     int offset, int len, bool reuse_frag)
++                     int offset, int len, bool reuse_frag,
++                     int min_len)
+ {
+       struct sk_buff *frame;
+       int cur_len = len;
+@@ -736,7 +752,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+        * in the stack later.
+        */
+       if (reuse_frag)
+-              cur_len = min_t(int, len, 32);
++              cur_len = min_t(int, len, min_len);
+       /*
+        * Allocate and reserve two bytes more for payload
+@@ -746,6 +762,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+       if (!frame)
+               return NULL;
++      frame->priority = skb->priority;
+       skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
+       skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
+@@ -762,23 +779,37 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+                             const u8 *addr, enum nl80211_iftype iftype,
+                             const unsigned int extra_headroom,
+-                            const u8 *check_da, const u8 *check_sa)
++                            const u8 *check_da, const u8 *check_sa,
++                            bool mesh_control)
+ {
+       unsigned int hlen = ALIGN(extra_headroom, 4);
+       struct sk_buff *frame = NULL;
+       int offset = 0, remaining;
+-      struct ethhdr eth;
++      struct {
++              struct ethhdr eth;
++              uint8_t flags;
++      } hdr;
+       bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
+       bool reuse_skb = false;
+       bool last = false;
++      int copy_len = sizeof(hdr.eth);
++
++      if (iftype == NL80211_IFTYPE_MESH_POINT)
++              copy_len = sizeof(hdr);
+       while (!last) {
+               unsigned int subframe_len;
+-              int len;
++              int len, mesh_len = 0;
+               u8 padding;
+-              skb_copy_bits(skb, offset, &eth, sizeof(eth));
+-              len = ntohs(eth.h_proto);
++              skb_copy_bits(skb, offset, &hdr, copy_len);
++              if (iftype == NL80211_IFTYPE_MESH_POINT)
++                      mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags);
++              if (mesh_control)
++                      len = le16_to_cpu(*(__le16 *)&hdr.eth.h_proto) + mesh_len;
++              else
++                      len = ntohs(hdr.eth.h_proto);
++
+               subframe_len = sizeof(struct ethhdr) + len;
+               padding = (4 - subframe_len) & 0x3;
+@@ -787,16 +818,16 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+               if (subframe_len > remaining)
+                       goto purge;
+               /* mitigate A-MSDU aggregation injection attacks */
+-              if (ether_addr_equal(eth.h_dest, rfc1042_header))
++              if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header))
+                       goto purge;
+               offset += sizeof(struct ethhdr);
+               last = remaining <= subframe_len + padding;
+               /* FIXME: should we really accept multicast DA? */
+-              if ((check_da && !is_multicast_ether_addr(eth.h_dest) &&
+-                   !ether_addr_equal(check_da, eth.h_dest)) ||
+-                  (check_sa && !ether_addr_equal(check_sa, eth.h_source))) {
++              if ((check_da && !is_multicast_ether_addr(hdr.eth.h_dest) &&
++                   !ether_addr_equal(check_da, hdr.eth.h_dest)) ||
++                  (check_sa && !ether_addr_equal(check_sa, hdr.eth.h_source))) {
+                       offset += len + padding;
+                       continue;
+               }
+@@ -808,7 +839,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+                       reuse_skb = true;
+               } else {
+                       frame = __ieee80211_amsdu_copy(skb, hlen, offset, len,
+-                                                     reuse_frag);
++                                                     reuse_frag, 32 + mesh_len);
+                       if (!frame)
+                               goto purge;
+@@ -819,10 +850,11 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
+               frame->dev = skb->dev;
+               frame->priority = skb->priority;
+-              if (likely(ieee80211_get_8023_tunnel_proto(frame->data, &eth.h_proto)))
++              if (likely(iftype != NL80211_IFTYPE_MESH_POINT &&
++                         ieee80211_get_8023_tunnel_proto(frame->data, &hdr.eth.h_proto)))
+                       skb_pull(frame, ETH_ALEN + 2);
+-              memcpy(skb_push(frame, sizeof(eth)), &eth, sizeof(eth));
++              memcpy(skb_push(frame, sizeof(hdr.eth)), &hdr.eth, sizeof(hdr.eth));
+               __skb_queue_tail(list, frame);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-remove-mesh-forwarding-congestion-chec.patch b/queue-6.1/wifi-mac80211-remove-mesh-forwarding-congestion-chec.patch
new file mode 100644 (file)
index 0000000..e0675b9
--- /dev/null
@@ -0,0 +1,76 @@
+From 760a552afcd3c24843fde1a5ca8ef94ec701e482 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:08:53 +0100
+Subject: wifi: mac80211: remove mesh forwarding congestion check
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit 5c1e269aa5ebafeec69b68ff560522faa5bcb6c1 ]
+
+Now that all drivers use iTXQ, it does not make sense to check to drop
+tx forwarding packets when the driver has stopped the queues.
+fq_codel will take care of dropping packets when the queues fill up
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20230213100855.34315-3-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 9ad797485692 ("wifi: cfg80211: check A-MSDU format more carefully")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/debugfs_netdev.c | 3 ---
+ net/mac80211/ieee80211_i.h    | 1 -
+ net/mac80211/rx.c             | 5 -----
+ 3 files changed, 9 deletions(-)
+
+diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
+index 08a1d7564b7f2..8ced615add712 100644
+--- a/net/mac80211/debugfs_netdev.c
++++ b/net/mac80211/debugfs_netdev.c
+@@ -603,8 +603,6 @@ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC);
+ IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC);
+ IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC);
+ IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC);
+-IEEE80211_IF_FILE(dropped_frames_congestion,
+-                u.mesh.mshstats.dropped_frames_congestion, DEC);
+ IEEE80211_IF_FILE(dropped_frames_no_route,
+                 u.mesh.mshstats.dropped_frames_no_route, DEC);
+@@ -741,7 +739,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
+       MESHSTATS_ADD(fwded_frames);
+       MESHSTATS_ADD(dropped_frames_ttl);
+       MESHSTATS_ADD(dropped_frames_no_route);
+-      MESHSTATS_ADD(dropped_frames_congestion);
+ #undef MESHSTATS_ADD
+ }
+diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
+index 709eb7bfcf194..8a3af4144d3f0 100644
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -327,7 +327,6 @@ struct mesh_stats {
+       __u32 fwded_frames;             /* Mesh total forwarded frames */
+       __u32 dropped_frames_ttl;       /* Not transmitted since mesh_ttl == 0*/
+       __u32 dropped_frames_no_route;  /* Not transmitted, no route found */
+-      __u32 dropped_frames_congestion;/* Not forwarded due to congestion */
+ };
+ #define PREQ_Q_F_START                0x1
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index b68a9200403e7..1d50126aebbc8 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2931,11 +2931,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+               return RX_CONTINUE;
+       ac = ieee802_1d_to_ac[skb->priority];
+-      q = sdata->vif.hw_queue[ac];
+-      if (ieee80211_queue_stopped(&local->hw, q)) {
+-              IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
+-              return RX_DROP_MONITOR;
+-      }
+       skb_set_queue_mapping(skb, ac);
+       if (!--mesh_hdr->ttl) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-mac80211-take-wiphy-lock-for-mac-addr-change.patch b/queue-6.1/wifi-mac80211-take-wiphy-lock-for-mac-addr-change.patch
new file mode 100644 (file)
index 0000000..2c8c971
--- /dev/null
@@ -0,0 +1,67 @@
+From b4d83a3dcbf3972ca7967df401ff7370638787ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Aug 2023 14:00:00 +0200
+Subject: wifi: mac80211: take wiphy lock for MAC addr change
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit a26787aa13974fb0b3fb42bfeb4256c1b686e305 ]
+
+We want to ensure everything holds the wiphy lock,
+so also extend that to the MAC change callback.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Stable-dep-of: 74a7c93f45ab ("wifi: mac80211: fix change_address deadlock during unregister")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/iface.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index e00e1bf0f754a..408ee5afc9ae7 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -251,9 +251,9 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
+       return ret;
+ }
+-static int ieee80211_change_mac(struct net_device *dev, void *addr)
++static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata,
++                               void *addr)
+ {
+-      struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+       struct ieee80211_local *local = sdata->local;
+       struct sockaddr *sa = addr;
+       bool check_dup = true;
+@@ -278,7 +278,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
+       if (live)
+               drv_remove_interface(local, sdata);
+-      ret = eth_mac_addr(dev, sa);
++      ret = eth_mac_addr(sdata->dev, sa);
+       if (ret == 0) {
+               memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
+@@ -294,6 +294,19 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
+       return ret;
+ }
++static int ieee80211_change_mac(struct net_device *dev, void *addr)
++{
++      struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
++      struct ieee80211_local *local = sdata->local;
++      int ret;
++
++      wiphy_lock(local->hw.wiphy);
++      ret = _ieee80211_change_mac(sdata, addr);
++      wiphy_unlock(local->hw.wiphy);
++
++      return ret;
++}
++
+ static inline int identical_mac_addr_allowed(int type1, int type2)
+ {
+       return type1 == NL80211_IFTYPE_MONITOR ||
+-- 
+2.43.0
+