]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sun, 7 Jan 2024 01:58:50 +0000 (20:58 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 7 Jan 2024 01:58:50 +0000 (20:58 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
121 files changed:
queue-6.1/alsa-hda-realtek-add-quirk-for-lenovo-yoga-pro-7.patch [new file with mode: 0644]
queue-6.1/arm-sun9i-smp-fix-array-index-out-of-bounds-read-in-.patch [new file with mode: 0644]
queue-6.1/arm64-dts-qcom-sdm845-align-rpmh-regulator-nodes-wit.patch [new file with mode: 0644]
queue-6.1/arm64-dts-qcom-sdm845-fix-psci-power-domain-names.patch [new file with mode: 0644]
queue-6.1/asix-add-check-for-usbnet_get_endpoints.patch [new file with mode: 0644]
queue-6.1/asoc-fsl_rpmsg-fix-error-handler-with-pm_runtime_ena.patch [new file with mode: 0644]
queue-6.1/asoc-mediatek-mt8186-fix-aud_pad_top-register-and-of.patch [new file with mode: 0644]
queue-6.1/asoc-meson-g12a-toacodec-fix-event-generation.patch [new file with mode: 0644]
queue-6.1/asoc-meson-g12a-toacodec-validate-written-enum-value.patch [new file with mode: 0644]
queue-6.1/asoc-meson-g12a-tohdmitx-fix-event-generation-for-s-.patch [new file with mode: 0644]
queue-6.1/asoc-meson-g12a-tohdmitx-validate-written-enum-value.patch [new file with mode: 0644]
queue-6.1/blk-mq-make-sure-active-queue-usage-is-held-for-bio_.patch [new file with mode: 0644]
queue-6.1/block-update-the-stable_writes-flag-in-bdev_add.patch [new file with mode: 0644]
queue-6.1/bnxt_en-remove-mis-applied-code-from-bnxt_cfg_ntp_fi.patch [new file with mode: 0644]
queue-6.1/bpf-clean-up-visit_insn-s-instruction-processing.patch [new file with mode: 0644]
queue-6.1/bpf-decouple-prune-and-jump-points.patch [new file with mode: 0644]
queue-6.1/bpf-fix-precision-backtracking-instruction-iteration.patch [new file with mode: 0644]
queue-6.1/bpf-handle-ldimm64-properly-in-check_cfg.patch [new file with mode: 0644]
queue-6.1/bpf-remove-unnecessary-prune-and-jump-points.patch [new file with mode: 0644]
queue-6.1/bpf-remove-unused-insn_cnt-argument-from-visit_-func.patch [new file with mode: 0644]
queue-6.1/bpf-sockmap-af_unix-stream-sockets-need-to-hold-ref-.patch [new file with mode: 0644]
queue-6.1/bpf-support-new-32bit-offset-jmp-instruction.patch [new file with mode: 0644]
queue-6.1/bpf-x64-fix-tailcall-infinite-loop.patch [new file with mode: 0644]
queue-6.1/bpf-x86-save-restore-regs-with-bpf_dw-size.patch [new file with mode: 0644]
queue-6.1/bpf-x86-simplify-the-parsing-logic-of-structure-para.patch [new file with mode: 0644]
queue-6.1/btrfs-fix-qgroup_free_reserved_data-int-overflow.patch [new file with mode: 0644]
queue-6.1/btrfs-mark-the-len-field-in-struct-btrfs_ordered_sum.patch [new file with mode: 0644]
queue-6.1/can-raw-add-support-for-so_mark.patch [new file with mode: 0644]
queue-6.1/cpu-smt-create-topology_smt_thread_allowed.patch [new file with mode: 0644]
queue-6.1/cpu-smt-make-smt-control-more-robust-against-enumera.patch [new file with mode: 0644]
queue-6.1/dpaa2-eth-recycle-the-rx-buffer-only-after-all-proce.patch [new file with mode: 0644]
queue-6.1/drm-bridge-ti-sn65dsi86-never-store-more-than-msg-si.patch [new file with mode: 0644]
queue-6.1/drm-i915-dp-fix-passing-the-correct-dpcd_rev-for-drm.patch [new file with mode: 0644]
queue-6.1/ethtool-don-t-propagate-eopnotsupp-from-dumps.patch [new file with mode: 0644]
queue-6.1/ext4-convert-move_extent_per_page-to-use-folios.patch [new file with mode: 0644]
queue-6.1/f2fs-assign-default-compression-level.patch [new file with mode: 0644]
queue-6.1/f2fs-clean-up-i_compress_flag-and-i_compress_level-u.patch [new file with mode: 0644]
queue-6.1/f2fs-convert-to-use-bitmap-api.patch [new file with mode: 0644]
queue-6.1/f2fs-set-the-default-compress_level-on-ioctl.patch [new file with mode: 0644]
queue-6.1/fbdev-imsttfb-fix-double-free-in-probe.patch [new file with mode: 0644]
queue-6.1/fbdev-imsttfb-release-framebuffer-and-dealloc-cmap-o.patch [new file with mode: 0644]
queue-6.1/filemap-add-a-per-mapping-stable-writes-flag.patch [new file with mode: 0644]
queue-6.1/firmware-arm_scmi-fix-frequency-truncation-by-promot.patch [new file with mode: 0644]
queue-6.1/genirq-affinity-don-t-pass-irq_affinity_desc-array-t.patch [new file with mode: 0644]
queue-6.1/genirq-affinity-move-group_cpus_evenly-into-lib.patch [new file with mode: 0644]
queue-6.1/genirq-affinity-pass-affinity-managed-mask-array-to-.patch [new file with mode: 0644]
queue-6.1/genirq-affinity-remove-the-firstvec-parameter-from-i.patch [new file with mode: 0644]
queue-6.1/genirq-affinity-rename-irq_build_affinity_masks-as-g.patch [new file with mode: 0644]
queue-6.1/i40e-fix-filter-input-checks-to-prevent-config-with-.patch [new file with mode: 0644]
queue-6.1/i40e-fix-use-after-free-in-i40e_aqc_add_filters.patch [new file with mode: 0644]
queue-6.1/i40e-restore-vf-msi-x-state-during-pci-reset.patch [new file with mode: 0644]
queue-6.1/ice-fix-link_down_on_close-message.patch [new file with mode: 0644]
queue-6.1/ice-shut-down-vsi-with-link-down-on-close-enabled.patch [new file with mode: 0644]
queue-6.1/igc-check-vlan-ethertype-mask.patch [new file with mode: 0644]
queue-6.1/igc-check-vlan-tci-mask.patch [new file with mode: 0644]
queue-6.1/igc-fix-hicredit-calculation.patch [new file with mode: 0644]
queue-6.1/igc-report-vlan-ethertype-matching-back-to-user.patch [new file with mode: 0644]
queue-6.1/ipv4-ipv6-use-splice_eof-to-flush.patch [new file with mode: 0644]
queue-6.1/khugepage-replace-try_to_release_page-with-filemap_r.patch [new file with mode: 0644]
queue-6.1/lib-group_cpus.c-avoid-acquiring-cpu-hotplug-lock-in.patch [new file with mode: 0644]
queue-6.1/media-camss-sm8250-virtual-channels-for-csid.patch [new file with mode: 0644]
queue-6.1/media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-v.patch [new file with mode: 0644]
queue-6.1/memory-failure-convert-truncate_error_page-to-use-fo.patch [new file with mode: 0644]
queue-6.1/mlxbf_gige-fix-receive-packet-race-condition.patch [new file with mode: 0644]
queue-6.1/mm-memory_hotplug-add-missing-mem_hotplug_lock.patch [new file with mode: 0644]
queue-6.1/mm-memory_hotplug-fix-error-handling-in-add_memory_r.patch [new file with mode: 0644]
queue-6.1/mm-merge-folio_has_private-filemap_release_folio-cal.patch [new file with mode: 0644]
queue-6.1/mm-netfs-fscache-stop-read-optimisation-when-folio-r.patch [new file with mode: 0644]
queue-6.1/net-annotate-data-races-around-sk-sk_bind_phc.patch [new file with mode: 0644]
queue-6.1/net-annotate-data-races-around-sk-sk_tsflags.patch [new file with mode: 0644]
queue-6.1/net-bcmgenet-fix-fcs-generation-for-fragmented-skbuf.patch [new file with mode: 0644]
queue-6.1/net-declare-msg_splice_pages-internal-sendmsg-flag.patch [new file with mode: 0644]
queue-6.1/net-dpaa2-eth-rearrange-variable-in-dpaa2_eth_get_et.patch [new file with mode: 0644]
queue-6.1/net-implement-missing-getsockopt-so_timestamping_new.patch [new file with mode: 0644]
queue-6.1/net-implement-missing-so_timestamping_new-cmsg-suppo.patch [new file with mode: 0644]
queue-6.1/net-mlx5-increase-size-of-irq-name-buffer.patch [new file with mode: 0644]
queue-6.1/net-qla3xxx-fix-potential-memleak-in-ql_alloc_buffer.patch [new file with mode: 0644]
queue-6.1/net-ravb-wait-for-operating-mode-to-be-applied.patch [new file with mode: 0644]
queue-6.1/net-save-and-restore-msg_namelen-in-sock_sendmsg.patch [new file with mode: 0644]
queue-6.1/net-sched-act_ct-fix-promotion-of-offloaded-unreplie.patch [new file with mode: 0644]
queue-6.1/net-sched-act_ct-offload-udp-new-connections.patch [new file with mode: 0644]
queue-6.1/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch [new file with mode: 0644]
queue-6.1/net-sched-call-tcf_ct_params_free-to-free-params-in-.patch [new file with mode: 0644]
queue-6.1/net-sched-em_text-fix-possible-memory-leak-in-em_tex.patch [new file with mode: 0644]
queue-6.1/net-smc-fix-invalid-link-access-in-dumping-smc-r-con.patch [new file with mode: 0644]
queue-6.1/net-timestamp-extend-sof_timestamping_opt_id-to-hw-t.patch [new file with mode: 0644]
queue-6.1/netfilter-flowtable-allow-unidirectional-rules.patch [new file with mode: 0644]
queue-6.1/netfilter-flowtable-cache-info-of-last-offload.patch [new file with mode: 0644]
queue-6.1/netfilter-flowtable-gc-pushes-back-packets-to-classi.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-set-transport-offset-from-mac-he.patch [new file with mode: 0644]
queue-6.1/netfilter-nft_immediate-drop-chain-reference-counter.patch [new file with mode: 0644]
queue-6.1/netfilter-use-skb_ip_totlen-and-iph_totlen.patch [new file with mode: 0644]
queue-6.1/nfc-llcp_core-hold-a-ref-to-llcp_local-dev-when-hold.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-always-configure-nix-tx-link-credits-ba.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-fix-marking-couple-of-structure-as-__pa.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-fix-pause-frame-configuration.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-re-enable-mac-tx-in-otx2_stop-processin.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-support-variable-number-of-lmacs.patch [new file with mode: 0644]
queue-6.1/r8169-fix-pci-error-on-system-resume.patch [new file with mode: 0644]
queue-6.1/ring-buffer-fix-32-bit-rb_time_read-race-with-rb_tim.patch [new file with mode: 0644]
queue-6.1/s390-cpumf-support-user-space-events-for-counting.patch [new file with mode: 0644]
queue-6.1/s390-mm-add-missing-arch_set_page_dat-call-to-vmem_c.patch [new file with mode: 0644]
queue-6.1/selftests-bonding-do-not-set-port-down-when-adding-t.patch [new file with mode: 0644]
queue-6.1/selftests-mptcp-fix-fastclose-with-csum-failure.patch [new file with mode: 0644]
queue-6.1/selftests-mptcp-set-failing_links-in-run_tests.patch [new file with mode: 0644]
queue-6.1/selftests-secretmem-floor-the-memory-size-to-the-mul.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/sfc-fix-a-double-free-bug-in-efx_probe_filters.patch [new file with mode: 0644]
queue-6.1/smb-client-fix-missing-mode-bits-for-smb-symlinks.patch [new file with mode: 0644]
queue-6.1/splice-net-add-a-splice_eof-op-to-file-ops-and-socke.patch [new file with mode: 0644]
queue-6.1/srcu-fix-callbacks-acceleration-mishandling.patch [new file with mode: 0644]
queue-6.1/udp-annotate-data-races-around-udp-encap_type.patch [new file with mode: 0644]
queue-6.1/udp-convert-udp_sendpage-to-use-msg_splice_pages.patch [new file with mode: 0644]
queue-6.1/udp-introduce-udp-udp_flags.patch [new file with mode: 0644]
queue-6.1/udp-lockless-udp_encap_l2tpinudp-udp_gro.patch [new file with mode: 0644]
queue-6.1/udp-move-udp-accept_udp_-l4-fraglist-to-udp-udp_flag.patch [new file with mode: 0644]
queue-6.1/udp-move-udp-gro_enabled-to-udp-udp_flags.patch [new file with mode: 0644]
queue-6.1/udp-move-udp-no_check6_rx-to-udp-udp_flags.patch [new file with mode: 0644]
queue-6.1/udp-move-udp-no_check6_tx-to-udp-udp_flags.patch [new file with mode: 0644]
queue-6.1/wifi-iwlwifi-pcie-don-t-synchronize-irqs-from-irq.patch [new file with mode: 0644]
queue-6.1/wifi-iwlwifi-yoyo-swap-cdb-and-jacket-bits-values.patch [new file with mode: 0644]

diff --git a/queue-6.1/alsa-hda-realtek-add-quirk-for-lenovo-yoga-pro-7.patch b/queue-6.1/alsa-hda-realtek-add-quirk-for-lenovo-yoga-pro-7.patch
new file mode 100644 (file)
index 0000000..54a423e
--- /dev/null
@@ -0,0 +1,36 @@
+From 4ee3561ae2a5201c99335c17f13f5ba05802f179 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 19:20:35 +0100
+Subject: ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 634e5e1e06f5cdd614a1bc429ecb243a51cc009d ]
+
+Lenovo Yoga Pro 7 14APH8 (PCI SSID 17aa:3882) seems requiring the
+similar workaround like Yoga 9 model for the bass speaker.
+
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/CAGGk=CRRQ1L9p771HsXTN_ebZP41Qj+3gw35Gezurn+nokRewg@mail.gmail.com
+Link: https://lore.kernel.org/r/20231207182035.30248-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index a88ed60dcd96a..48155aa52828c 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9904,6 +9904,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
++      SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
+       SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
+       SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
+       SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
+-- 
+2.43.0
+
diff --git a/queue-6.1/arm-sun9i-smp-fix-array-index-out-of-bounds-read-in-.patch b/queue-6.1/arm-sun9i-smp-fix-array-index-out-of-bounds-read-in-.patch
new file mode 100644 (file)
index 0000000..71b08b4
--- /dev/null
@@ -0,0 +1,64 @@
+From 3e16e0cda98b5db7e47533ca0dcd626b759cc327 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Dec 2023 20:39:02 +0100
+Subject: ARM: sun9i: smp: Fix array-index-out-of-bounds read in
+ sunxi_mc_smp_init
+
+From: Stefan Wahren <wahrenst@gmx.net>
+
+[ Upstream commit 72ad3b772b6d393701df58ba1359b0bb346a19ed ]
+
+Running a multi-arch kernel (multi_v7_defconfig) on a Raspberry Pi 3B+
+with enabled CONFIG_UBSAN triggers the following warning:
+
+ UBSAN: array-index-out-of-bounds in arch/arm/mach-sunxi/mc_smp.c:810:29
+ index 2 is out of range for type 'sunxi_mc_smp_data [2]'
+ CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.7.0-rc6-00248-g5254c0cbc92d
+ Hardware name: BCM2835
+  unwind_backtrace from show_stack+0x10/0x14
+  show_stack from dump_stack_lvl+0x40/0x4c
+  dump_stack_lvl from ubsan_epilogue+0x8/0x34
+  ubsan_epilogue from __ubsan_handle_out_of_bounds+0x78/0x80
+  __ubsan_handle_out_of_bounds from sunxi_mc_smp_init+0xe4/0x4cc
+  sunxi_mc_smp_init from do_one_initcall+0xa0/0x2fc
+  do_one_initcall from kernel_init_freeable+0xf4/0x2f4
+  kernel_init_freeable from kernel_init+0x18/0x158
+  kernel_init from ret_from_fork+0x14/0x28
+
+Since the enabled method couldn't match with any entry from
+sunxi_mc_smp_data, the value of the index shouldn't be used right after
+the loop. So move it after the check of ret in order to have a valid
+index.
+
+Fixes: 1631090e34f5 ("ARM: sun9i: smp: Add is_a83t field")
+Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
+Link: https://lore.kernel.org/r/20231228193903.9078-1-wahrenst@gmx.net
+Reviewed-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mach-sunxi/mc_smp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c
+index 26cbce1353387..b2f5f4f28705f 100644
+--- a/arch/arm/mach-sunxi/mc_smp.c
++++ b/arch/arm/mach-sunxi/mc_smp.c
+@@ -808,12 +808,12 @@ static int __init sunxi_mc_smp_init(void)
+                       break;
+       }
+-      is_a83t = sunxi_mc_smp_data[i].is_a83t;
+-
+       of_node_put(node);
+       if (ret)
+               return -ENODEV;
++      is_a83t = sunxi_mc_smp_data[i].is_a83t;
++
+       if (!sunxi_mc_smp_cpu_table_init())
+               return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/arm64-dts-qcom-sdm845-align-rpmh-regulator-nodes-wit.patch b/queue-6.1/arm64-dts-qcom-sdm845-align-rpmh-regulator-nodes-wit.patch
new file mode 100644 (file)
index 0000000..04538d8
--- /dev/null
@@ -0,0 +1,304 @@
+From e19d878f0cb36876b9df3d6fa13866e0e1f207f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Jan 2023 12:43:42 +0100
+Subject: arm64: dts: qcom: sdm845: align RPMh regulator nodes with bindings
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit 86dd19bbdea2b7d3feb69c0c39f141de30a18ec9 ]
+
+Device node names should be generic and bindings expect certain pattern
+for RPMh regulator nodes.
+
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+Link: https://lore.kernel.org/r/20230127114347.235963-6-krzysztof.kozlowski@linaro.org
+Stable-dep-of: a5f01673d394 ("arm64: dts: qcom: sdm845: Fix PSCI power domain names")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi            | 4 ++--
+ arch/arm64/boot/dts/qcom/sdm845-db845c.dts            | 4 ++--
+ arch/arm64/boot/dts/qcom/sdm845-lg-common.dtsi        | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm845-mtp.dts               | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi   | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts     | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama.dtsi | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts  | 2 +-
+ arch/arm64/boot/dts/qcom/sdm845-xiaomi-polaris.dts    | 6 +++---
+ arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts  | 2 +-
+ arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts      | 2 +-
+ 11 files changed, 25 insertions(+), 25 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+index a5c0c788969fb..985824032c522 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+@@ -351,7 +351,7 @@ flash@0 {
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -633,7 +633,7 @@ src_pp1800_lvs2: lvs2 {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+index c9efcb894a52f..8c9ccf5b4ea41 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+@@ -271,7 +271,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+               vdd-s1-supply = <&vph_pwr>;
+@@ -396,7 +396,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-lg-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-lg-common.dtsi
+index 20f275f8694dc..e2921640880a1 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-lg-common.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-lg-common.dtsi
+@@ -166,7 +166,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -419,7 +419,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -433,7 +433,7 @@ vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
+index 64958dee17d8b..b47e333aa3510 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
+@@ -117,7 +117,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -382,7 +382,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -396,7 +396,7 @@ vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
+index 392461c29e76e..0713b774a97be 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
+@@ -144,7 +144,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -280,7 +280,7 @@ vreg_l28a_3p0: ldo28 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -294,7 +294,7 @@ vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
+index 83261c9bb4f23..b65c35865dab9 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
+@@ -110,7 +110,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -375,7 +375,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -389,7 +389,7 @@ vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama.dtsi b/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama.dtsi
+index d6918e6d19799..249a715d5aae1 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama.dtsi
+@@ -78,7 +78,7 @@ ramoops@ffc00000 {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -308,7 +308,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -319,7 +319,7 @@ src_vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
+index 0f470cf1ed1c1..6d6b3dd699475 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
+@@ -125,7 +125,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-polaris.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-polaris.dts
+index 093b04359ec39..ffbe45a99b74a 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-polaris.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-polaris.dts
+@@ -143,7 +143,7 @@ vreg_s4a_1p8: vreg-s4a-1p8 {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+@@ -343,7 +343,7 @@ vreg_lvs2a_1p8: lvs2 {
+               };
+       };
+-      pmi8998-rpmh-regulators {
++      regulators-1 {
+               compatible = "qcom,pmi8998-rpmh-regulators";
+               qcom,pmic-id = "b";
+@@ -355,7 +355,7 @@ vreg_bob: bob {
+               };
+       };
+-      pm8005-rpmh-regulators {
++      regulators-2 {
+               compatible = "qcom,pm8005-rpmh-regulators";
+               qcom,pmic-id = "c";
+diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+index 74f43da51fa50..48a41ace8fc58 100644
+--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
++++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+@@ -99,7 +99,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+diff --git a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
+index d028a7eb364a6..c169d2870bdf4 100644
+--- a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
++++ b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts
+@@ -129,7 +129,7 @@ &adsp_pas {
+ };
+ &apps_rsc {
+-      pm8998-rpmh-regulators {
++      regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+-- 
+2.43.0
+
diff --git a/queue-6.1/arm64-dts-qcom-sdm845-fix-psci-power-domain-names.patch b/queue-6.1/arm64-dts-qcom-sdm845-fix-psci-power-domain-names.patch
new file mode 100644 (file)
index 0000000..769a9fa
--- /dev/null
@@ -0,0 +1,66 @@
+From e0335f9198238cec81a096f299f7f121093303f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 12:42:03 +0530
+Subject: arm64: dts: qcom: sdm845: Fix PSCI power domain names
+
+From: David Heidelberg <david@ixit.cz>
+
+[ Upstream commit a5f01673d3946e424091e6b8ff274716f9c21454 ]
+
+The original commit hasn't been updated according to
+refactoring done in sdm845.dtsi.
+
+Fixes: a1ade6cac5a2 ("arm64: dts: qcom: sdm845: Switch PSCI cpu idle states from PC to OSI")
+Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Douglas Anderson <dianders@chromium.org>
+Signed-off-by: David Heidelberg <david@ixit.cz>
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Reviewed-by: Abel Vesa <abel.vesa@linaro.org>
+Link: https://lore.kernel.org/r/20230912071205.11502-1-david@ixit.cz
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi | 20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+index 985824032c522..43ee28db61aa8 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+@@ -150,15 +150,15 @@ &cpufreq_hw {
+ };
+ &psci {
+-      /delete-node/ cpu0;
+-      /delete-node/ cpu1;
+-      /delete-node/ cpu2;
+-      /delete-node/ cpu3;
+-      /delete-node/ cpu4;
+-      /delete-node/ cpu5;
+-      /delete-node/ cpu6;
+-      /delete-node/ cpu7;
+-      /delete-node/ cpu-cluster0;
++      /delete-node/ power-domain-cpu0;
++      /delete-node/ power-domain-cpu1;
++      /delete-node/ power-domain-cpu2;
++      /delete-node/ power-domain-cpu3;
++      /delete-node/ power-domain-cpu4;
++      /delete-node/ power-domain-cpu5;
++      /delete-node/ power-domain-cpu6;
++      /delete-node/ power-domain-cpu7;
++      /delete-node/ power-domain-cluster;
+ };
+ &cpus {
+@@ -351,6 +351,8 @@ flash@0 {
+ &apps_rsc {
++      /delete-property/ power-domains;
++
+       regulators-0 {
+               compatible = "qcom,pm8998-rpmh-regulators";
+               qcom,pmic-id = "a";
+-- 
+2.43.0
+
diff --git a/queue-6.1/asix-add-check-for-usbnet_get_endpoints.patch b/queue-6.1/asix-add-check-for-usbnet_get_endpoints.patch
new file mode 100644 (file)
index 0000000..3feea06
--- /dev/null
@@ -0,0 +1,38 @@
+From b44044d49c2870abfd79fa40e990603cafdfaf2e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 03:35:34 +0000
+Subject: asix: Add check for usbnet_get_endpoints
+
+From: Chen Ni <nichen@iscas.ac.cn>
+
+[ Upstream commit eaac6a2d26b65511e164772bec6918fcbc61938e ]
+
+Add check for usbnet_get_endpoints() and return the error if it fails
+in order to transfer the error.
+
+Fixes: 16626b0cc3d5 ("asix: Add a new driver for the AX88172A")
+Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/ax88172a.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
+index 3777c7e2e6fc0..e47bb125048d4 100644
+--- a/drivers/net/usb/ax88172a.c
++++ b/drivers/net/usb/ax88172a.c
+@@ -161,7 +161,9 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
+       u8 buf[ETH_ALEN];
+       struct ax88172a_private *priv;
+-      usbnet_get_endpoints(dev, intf);
++      ret = usbnet_get_endpoints(dev, intf);
++      if (ret)
++              return ret;
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-fsl_rpmsg-fix-error-handler-with-pm_runtime_ena.patch b/queue-6.1/asoc-fsl_rpmsg-fix-error-handler-with-pm_runtime_ena.patch
new file mode 100644 (file)
index 0000000..76b1a85
--- /dev/null
@@ -0,0 +1,65 @@
+From 49ca35addbb9c2d01e802de2b473789fe8bb5f35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Dec 2023 17:06:08 +0900
+Subject: ASoC: fsl_rpmsg: Fix error handler with pm_runtime_enable
+
+From: Chancel Liu <chancel.liu@nxp.com>
+
+[ Upstream commit f9d378fc68c43fd41b35133edec9cd902ec334ec ]
+
+There is error message when defer probe happens:
+
+fsl_rpmsg rpmsg_audio: Unbalanced pm_runtime_enable!
+
+Fix the error handler with pm_runtime_enable.
+
+Fixes: b73d9e6225e8 ("ASoC: fsl_rpmsg: Add CPU DAI driver for audio base on rpmsg")
+Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
+Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>
+Link: https://lore.kernel.org/r/20231225080608.967953-1-chancel.liu@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/fsl/fsl_rpmsg.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/fsl/fsl_rpmsg.c b/sound/soc/fsl/fsl_rpmsg.c
+index bf94838bdbefe..5c07a8ff0c9c0 100644
+--- a/sound/soc/fsl/fsl_rpmsg.c
++++ b/sound/soc/fsl/fsl_rpmsg.c
+@@ -231,7 +231,7 @@ static int fsl_rpmsg_probe(struct platform_device *pdev)
+       ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
+                                             &fsl_rpmsg_dai, 1);
+       if (ret)
+-              return ret;
++              goto err_pm_disable;
+       rpmsg->card_pdev = platform_device_register_data(&pdev->dev,
+                                                        "imx-audio-rpmsg",
+@@ -241,16 +241,22 @@ static int fsl_rpmsg_probe(struct platform_device *pdev)
+       if (IS_ERR(rpmsg->card_pdev)) {
+               dev_err(&pdev->dev, "failed to register rpmsg card\n");
+               ret = PTR_ERR(rpmsg->card_pdev);
+-              return ret;
++              goto err_pm_disable;
+       }
+       return 0;
++
++err_pm_disable:
++      pm_runtime_disable(&pdev->dev);
++      return ret;
+ }
+ static int fsl_rpmsg_remove(struct platform_device *pdev)
+ {
+       struct fsl_rpmsg *rpmsg = platform_get_drvdata(pdev);
++      pm_runtime_disable(&pdev->dev);
++
+       if (rpmsg->card_pdev)
+               platform_device_unregister(rpmsg->card_pdev);
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-mediatek-mt8186-fix-aud_pad_top-register-and-of.patch b/queue-6.1/asoc-mediatek-mt8186-fix-aud_pad_top-register-and-of.patch
new file mode 100644 (file)
index 0000000..b3d59b7
--- /dev/null
@@ -0,0 +1,39 @@
+From e73ec909528dde319b11d6058116ce61ef4cf670 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Dec 2023 13:43:42 +0200
+Subject: ASoC: mediatek: mt8186: fix AUD_PAD_TOP register and offset
+
+From: Eugen Hristev <eugen.hristev@collabora.com>
+
+[ Upstream commit 38744c3fa00109c51076121c2deb4f02e2f09194 ]
+
+AUD_PAD_TOP widget's correct register is AFE_AUD_PAD_TOP , and not zero.
+Having a zero as register, it would mean that the `snd_soc_dapm_new_widgets`
+would try to read the register at offset zero when trying to get the power
+status of this widget, which is incorrect.
+
+Fixes: b65c466220b3 ("ASoC: mediatek: mt8186: support adda in platform driver")
+Signed-off-by: Eugen Hristev <eugen.hristev@collabora.com>
+Link: https://lore.kernel.org/r/20231229114342.195867-1-eugen.hristev@collabora.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/mediatek/mt8186/mt8186-dai-adda.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/mediatek/mt8186/mt8186-dai-adda.c b/sound/soc/mediatek/mt8186/mt8186-dai-adda.c
+index 094402470dc23..858b95b199dcb 100644
+--- a/sound/soc/mediatek/mt8186/mt8186-dai-adda.c
++++ b/sound/soc/mediatek/mt8186/mt8186-dai-adda.c
+@@ -499,7 +499,7 @@ static const struct snd_soc_dapm_widget mtk_dai_adda_widgets[] = {
+                             SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+       SND_SOC_DAPM_SUPPLY_S("AUD_PAD_TOP", SUPPLY_SEQ_ADDA_AUD_PAD_TOP,
+-                            0, 0, 0,
++                            AFE_AUD_PAD_TOP, RG_RX_FIFO_ON_SFT, 0,
+                             mtk_adda_pad_top_event,
+                             SND_SOC_DAPM_PRE_PMU),
+       SND_SOC_DAPM_SUPPLY_S("ADDA_MTKAIF_CFG", SUPPLY_SEQ_ADDA_MTKAIF_CFG,
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-meson-g12a-toacodec-fix-event-generation.patch b/queue-6.1/asoc-meson-g12a-toacodec-fix-event-generation.patch
new file mode 100644 (file)
index 0000000..9ccd296
--- /dev/null
@@ -0,0 +1,39 @@
+From 1aae4192aa31ef02321a89bd34bbf0650c634bb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 18:34:03 +0000
+Subject: ASoC: meson: g12a-toacodec: Fix event generation
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit 172c88244b5f2d3375403ebb504d407be0fded59 ]
+
+When a control changes value the return value from _put() should be 1 so
+we get events generated to userspace notifying applications of the change.
+We are checking if there has been a change and exiting early if not but we
+are not providing the correct return value in the latter case, fix this.
+
+Fixes: af2618a2eee8 ("ASoC: meson: g12a: add internal DAC glue driver")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20240103-meson-enum-val-v1-3-424af7a8fb91@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/meson/g12a-toacodec.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/meson/g12a-toacodec.c b/sound/soc/meson/g12a-toacodec.c
+index 3b1ce9143c653..8d8d848ebd58b 100644
+--- a/sound/soc/meson/g12a-toacodec.c
++++ b/sound/soc/meson/g12a-toacodec.c
+@@ -104,7 +104,7 @@ static int g12a_toacodec_mux_put_enum(struct snd_kcontrol *kcontrol,
+       snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+-      return 0;
++      return 1;
+ }
+ static SOC_ENUM_SINGLE_DECL(g12a_toacodec_mux_enum, TOACODEC_CTRL0,
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-meson-g12a-toacodec-validate-written-enum-value.patch b/queue-6.1/asoc-meson-g12a-toacodec-validate-written-enum-value.patch
new file mode 100644 (file)
index 0000000..4196bab
--- /dev/null
@@ -0,0 +1,40 @@
+From 69dc7179c8414af40c52fc10df2814e6916b95a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 18:34:01 +0000
+Subject: ASoC: meson: g12a-toacodec: Validate written enum values
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit 3150b70e944ead909260285dfb5707d0bedcf87b ]
+
+When writing to an enum we need to verify that the value written is valid
+for the enumeration, the helper function snd_soc_item_enum_to_val() doesn't
+do it since it needs to return an unsigned (and in any case we'd need to
+check the return value).
+
+Fixes: af2618a2eee8 ("ASoC: meson: g12a: add internal DAC glue driver")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20240103-meson-enum-val-v1-1-424af7a8fb91@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/meson/g12a-toacodec.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/sound/soc/meson/g12a-toacodec.c b/sound/soc/meson/g12a-toacodec.c
+index ddc667956cf5e..3b1ce9143c653 100644
+--- a/sound/soc/meson/g12a-toacodec.c
++++ b/sound/soc/meson/g12a-toacodec.c
+@@ -71,6 +71,9 @@ static int g12a_toacodec_mux_put_enum(struct snd_kcontrol *kcontrol,
+       struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+       unsigned int mux, reg;
++      if (ucontrol->value.enumerated.item[0] >= e->items)
++              return -EINVAL;
++
+       mux = snd_soc_enum_item_to_val(e, ucontrol->value.enumerated.item[0]);
+       regmap_field_read(priv->field_dat_sel, &reg);
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-meson-g12a-tohdmitx-fix-event-generation-for-s-.patch b/queue-6.1/asoc-meson-g12a-tohdmitx-fix-event-generation-for-s-.patch
new file mode 100644 (file)
index 0000000..89d91a2
--- /dev/null
@@ -0,0 +1,39 @@
+From 4329af718ecce922ee648d6baabf15690d321821 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 18:34:04 +0000
+Subject: ASoC: meson: g12a-tohdmitx: Fix event generation for S/PDIF mux
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit b036d8ef3120b996751495ce25994eea58032a98 ]
+
+When a control changes value the return value from _put() should be 1 so
+we get events generated to userspace notifying applications of the change.
+While the I2S mux gets this right the S/PDIF mux does not, fix the return
+value.
+
+Fixes: c8609f3870f7 ("ASoC: meson: add g12a tohdmitx control")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20240103-meson-enum-val-v1-4-424af7a8fb91@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/meson/g12a-tohdmitx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c
+index 46d1f04e0e8a3..154c324fdd42a 100644
+--- a/sound/soc/meson/g12a-tohdmitx.c
++++ b/sound/soc/meson/g12a-tohdmitx.c
+@@ -118,7 +118,7 @@ static int g12a_tohdmitx_spdif_mux_put_enum(struct snd_kcontrol *kcontrol,
+       snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+-      return 0;
++      return 1;
+ }
+ static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_spdif_mux_enum, TOHDMITX_CTRL0,
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-meson-g12a-tohdmitx-validate-written-enum-value.patch b/queue-6.1/asoc-meson-g12a-tohdmitx-validate-written-enum-value.patch
new file mode 100644 (file)
index 0000000..4519919
--- /dev/null
@@ -0,0 +1,50 @@
+From c652c462e27c9733ea410b8af1d8eccf55790e67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 18:34:02 +0000
+Subject: ASoC: meson: g12a-tohdmitx: Validate written enum values
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit 1e001206804be3f3d21f4a1cf16e5d059d75643f ]
+
+When writing to an enum we need to verify that the value written is valid
+for the enumeration, the helper function snd_soc_item_enum_to_val() doesn't
+do it since it needs to return an unsigned (and in any case we'd need to
+check the return value).
+
+Fixes: c8609f3870f7 ("ASoC: meson: add g12a tohdmitx control")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20240103-meson-enum-val-v1-2-424af7a8fb91@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/meson/g12a-tohdmitx.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c
+index 579a04ad4d197..46d1f04e0e8a3 100644
+--- a/sound/soc/meson/g12a-tohdmitx.c
++++ b/sound/soc/meson/g12a-tohdmitx.c
+@@ -45,6 +45,9 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol,
+       struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+       unsigned int mux, changed;
++      if (ucontrol->value.enumerated.item[0] >= e->items)
++              return -EINVAL;
++
+       mux = snd_soc_enum_item_to_val(e, ucontrol->value.enumerated.item[0]);
+       changed = snd_soc_component_test_bits(component, e->reg,
+                                             CTRL0_I2S_DAT_SEL,
+@@ -93,6 +96,9 @@ static int g12a_tohdmitx_spdif_mux_put_enum(struct snd_kcontrol *kcontrol,
+       struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+       unsigned int mux, changed;
++      if (ucontrol->value.enumerated.item[0] >= e->items)
++              return -EINVAL;
++
+       mux = snd_soc_enum_item_to_val(e, ucontrol->value.enumerated.item[0]);
+       changed = snd_soc_component_test_bits(component, TOHDMITX_CTRL0,
+                                             CTRL0_SPDIF_SEL,
+-- 
+2.43.0
+
diff --git a/queue-6.1/blk-mq-make-sure-active-queue-usage-is-held-for-bio_.patch b/queue-6.1/blk-mq-make-sure-active-queue-usage-is-held-for-bio_.patch
new file mode 100644 (file)
index 0000000..1f907b6
--- /dev/null
@@ -0,0 +1,168 @@
+From 3e937b5c8e6387b2914b22893d6bc030db02f58f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Nov 2023 11:52:31 +0800
+Subject: blk-mq: make sure active queue usage is held for bio_integrity_prep()
+
+From: Christoph Hellwig <hch@infradead.org>
+
+[ Upstream commit b0077e269f6c152e807fdac90b58caf012cdbaab ]
+
+blk_integrity_unregister() can come if queue usage counter isn't held
+for one bio with integrity prepared, so this request may be completed with
+calling profile->complete_fn, then kernel panic.
+
+Another constraint is that bio_integrity_prep() needs to be called
+before bio merge.
+
+Fix the issue by:
+
+- call bio_integrity_prep() with one queue usage counter grabbed reliably
+
+- call bio_integrity_prep() before bio merge
+
+Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()")
+Reported-by: Yi Zhang <yi.zhang@redhat.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Tested-by: Yi Zhang <yi.zhang@redhat.com>
+Link: https://lore.kernel.org/r/20231113035231.2708053-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-mq.c | 75 +++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 37 deletions(-)
+
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index 100fb0c3114f8..383d94615e502 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -2855,11 +2855,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
+       };
+       struct request *rq;
+-      if (unlikely(bio_queue_enter(bio)))
+-              return NULL;
+-
+       if (blk_mq_attempt_bio_merge(q, bio, nsegs))
+-              goto queue_exit;
++              return NULL;
+       rq_qos_throttle(q, bio);
+@@ -2875,35 +2872,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
+       rq_qos_cleanup(q, bio);
+       if (bio->bi_opf & REQ_NOWAIT)
+               bio_wouldblock_error(bio);
+-queue_exit:
+-      blk_queue_exit(q);
+       return NULL;
+ }
+-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
+-              struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
++/* return true if this @rq can be used for @bio */
++static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
++              struct bio *bio)
+ {
+-      struct request *rq;
+-      enum hctx_type type, hctx_type;
++      enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
++      enum hctx_type hctx_type = rq->mq_hctx->type;
+-      if (!plug)
+-              return NULL;
+-      rq = rq_list_peek(&plug->cached_rq);
+-      if (!rq || rq->q != q)
+-              return NULL;
+-
+-      if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
+-              *bio = NULL;
+-              return NULL;
+-      }
++      WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
+-      type = blk_mq_get_hctx_type((*bio)->bi_opf);
+-      hctx_type = rq->mq_hctx->type;
+       if (type != hctx_type &&
+           !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
+-              return NULL;
+-      if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
+-              return NULL;
++              return false;
++      if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
++              return false;
+       /*
+        * If any qos ->throttle() end up blocking, we will have flushed the
+@@ -2911,11 +2896,11 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
+        * before we throttle.
+        */
+       plug->cached_rq = rq_list_next(rq);
+-      rq_qos_throttle(q, *bio);
++      rq_qos_throttle(rq->q, bio);
+-      rq->cmd_flags = (*bio)->bi_opf;
++      rq->cmd_flags = bio->bi_opf;
+       INIT_LIST_HEAD(&rq->queuelist);
+-      return rq;
++      return true;
+ }
+ static void bio_set_ioprio(struct bio *bio)
+@@ -2944,7 +2929,7 @@ void blk_mq_submit_bio(struct bio *bio)
+       struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+       struct blk_plug *plug = blk_mq_plug(bio);
+       const int is_sync = op_is_sync(bio->bi_opf);
+-      struct request *rq;
++      struct request *rq = NULL;
+       unsigned int nr_segs = 1;
+       blk_status_t ret;
+@@ -2955,20 +2940,36 @@ void blk_mq_submit_bio(struct bio *bio)
+                       return;
+       }
+-      if (!bio_integrity_prep(bio))
+-              return;
+-
+       bio_set_ioprio(bio);
+-      rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
+-      if (!rq) {
+-              if (!bio)
++      if (plug) {
++              rq = rq_list_peek(&plug->cached_rq);
++              if (rq && rq->q != q)
++                      rq = NULL;
++      }
++      if (rq) {
++              if (!bio_integrity_prep(bio))
+                       return;
+-              rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+-              if (unlikely(!rq))
++              if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
+                       return;
++              if (blk_mq_can_use_cached_rq(rq, plug, bio))
++                      goto done;
++              percpu_ref_get(&q->q_usage_counter);
++      } else {
++              if (unlikely(bio_queue_enter(bio)))
++                      return;
++              if (!bio_integrity_prep(bio))
++                      goto fail;
++      }
++
++      rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
++      if (unlikely(!rq)) {
++fail:
++              blk_queue_exit(q);
++              return;
+       }
++done:
+       trace_block_getrq(bio);
+       rq_qos_track(q, rq, bio);
+-- 
+2.43.0
+
diff --git a/queue-6.1/block-update-the-stable_writes-flag-in-bdev_add.patch b/queue-6.1/block-update-the-stable_writes-flag-in-bdev_add.patch
new file mode 100644 (file)
index 0000000..4030c68
--- /dev/null
@@ -0,0 +1,46 @@
+From 4620179873d798d8815800dcaa3f411857d6aee7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Oct 2023 16:10:18 +0200
+Subject: block: update the stable_writes flag in bdev_add
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 1898efcdbed32bb1c67269c985a50bab0dbc9493 ]
+
+Propagate the per-queue stable_write flags into each bdev inode in bdev_add.
+This makes sure devices that require stable writes have it set for I/O
+on the block device node as well.
+
+Note that this doesn't cover the case of a flag changing on a live device
+yet.  We should handle that as well, but I plan to cover it as part of a
+more general rework of how changing runtime paramters on block devices
+works.
+
+Fixes: 1cb039f3dc16 ("bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag")
+Reported-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20231025141020.192413-3-hch@lst.de
+Tested-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bdev.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/block/bdev.c b/block/bdev.c
+index d699ecdb32604..b61502ec8da06 100644
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -507,6 +507,8 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
+ void bdev_add(struct block_device *bdev, dev_t dev)
+ {
++      if (bdev_stable_writes(bdev))
++              mapping_set_stable_writes(bdev->bd_inode->i_mapping);
+       bdev->bd_dev = dev;
+       bdev->bd_inode->i_rdev = dev;
+       bdev->bd_inode->i_ino = dev;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bnxt_en-remove-mis-applied-code-from-bnxt_cfg_ntp_fi.patch b/queue-6.1/bnxt_en-remove-mis-applied-code-from-bnxt_cfg_ntp_fi.patch
new file mode 100644 (file)
index 0000000..ebaec4b
--- /dev/null
@@ -0,0 +1,47 @@
+From af39ac5b0695d95e5a080366d3ec9115d9fa2e72 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 16:59:24 -0800
+Subject: bnxt_en: Remove mis-applied code from bnxt_cfg_ntp_filters()
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit e009b2efb7a8850498796b360043ac25c8d3d28f ]
+
+The 2 lines to check for the BNXT_HWRM_PF_UNLOAD_SP_EVENT bit was
+mis-applied to bnxt_cfg_ntp_filters() and should have been applied to
+bnxt_sp_task().
+
+Fixes: 19241368443f ("bnxt_en: Send PF driver unload notification to all VFs.")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 623cdeb29ed90..df4d88d35701b 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -12081,6 +12081,8 @@ static void bnxt_sp_task(struct work_struct *work)
+               bnxt_cfg_ntp_filters(bp);
+       if (test_and_clear_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event))
+               bnxt_hwrm_exec_fwd_req(bp);
++      if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event))
++              netdev_info(bp->dev, "Receive PF driver unload event!\n");
+       if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) {
+               bnxt_hwrm_port_qstats(bp, 0);
+               bnxt_hwrm_port_qstats_ext(bp, 0);
+@@ -13059,8 +13061,6 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
+                       }
+               }
+       }
+-      if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event))
+-              netdev_info(bp->dev, "Receive PF driver unload event!\n");
+ }
+ #else
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-clean-up-visit_insn-s-instruction-processing.patch b/queue-6.1/bpf-clean-up-visit_insn-s-instruction-processing.patch
new file mode 100644 (file)
index 0000000..595498a
--- /dev/null
@@ -0,0 +1,96 @@
+From 02818dc2580eae9be766e1be3885bdeeeb7ef526 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 15:50:04 -0800
+Subject: bpf: clean up visit_insn()'s instruction processing
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 653ae3a874aca6764a4c1f5a8bf1b072ade0d6f4 ]
+
+Instead of referencing processed instruction repeatedly as insns[t]
+throughout entire visit_insn() function, take a local insn pointer and
+work with it in a cleaner way.
+
+It makes enhancing this function further a bit easier as well.
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20230302235015.2044271-7-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 3feb263bb516 ("bpf: handle ldimm64 properly in check_cfg()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index d1393e07ab2c9..73d500c51bd86 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -11115,44 +11115,43 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
+  */
+ static int visit_insn(int t, struct bpf_verifier_env *env)
+ {
+-      struct bpf_insn *insns = env->prog->insnsi;
++      struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
+       int ret;
+-      if (bpf_pseudo_func(insns + t))
++      if (bpf_pseudo_func(insn))
+               return visit_func_call_insn(t, insns, env, true);
+       /* All non-branch instructions have a single fall-through edge. */
+-      if (BPF_CLASS(insns[t].code) != BPF_JMP &&
+-          BPF_CLASS(insns[t].code) != BPF_JMP32)
++      if (BPF_CLASS(insn->code) != BPF_JMP &&
++          BPF_CLASS(insn->code) != BPF_JMP32)
+               return push_insn(t, t + 1, FALLTHROUGH, env, false);
+-      switch (BPF_OP(insns[t].code)) {
++      switch (BPF_OP(insn->code)) {
+       case BPF_EXIT:
+               return DONE_EXPLORING;
+       case BPF_CALL:
+-              if (insns[t].imm == BPF_FUNC_timer_set_callback)
++              if (insn->imm == BPF_FUNC_timer_set_callback)
+                       /* Mark this call insn as a prune point to trigger
+                        * is_state_visited() check before call itself is
+                        * processed by __check_func_call(). Otherwise new
+                        * async state will be pushed for further exploration.
+                        */
+                       mark_prune_point(env, t);
+-              return visit_func_call_insn(t, insns, env,
+-                                          insns[t].src_reg == BPF_PSEUDO_CALL);
++              return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
+       case BPF_JA:
+-              if (BPF_SRC(insns[t].code) != BPF_K)
++              if (BPF_SRC(insn->code) != BPF_K)
+                       return -EINVAL;
+               /* unconditional jump with single edge */
+-              ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
++              ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env,
+                               true);
+               if (ret)
+                       return ret;
+-              mark_prune_point(env, t + insns[t].off + 1);
+-              mark_jmp_point(env, t + insns[t].off + 1);
++              mark_prune_point(env, t + insn->off + 1);
++              mark_jmp_point(env, t + insn->off + 1);
+               return ret;
+@@ -11164,7 +11163,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
+               if (ret)
+                       return ret;
+-              return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
++              return push_insn(t, t + insn->off + 1, BRANCH, env, true);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-decouple-prune-and-jump-points.patch b/queue-6.1/bpf-decouple-prune-and-jump-points.patch
new file mode 100644 (file)
index 0000000..5a9392d
--- /dev/null
@@ -0,0 +1,197 @@
+From 1d848bcf5df37f2bdcc07a0518140fe62ed6383b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 15:33:43 -0800
+Subject: bpf: decouple prune and jump points
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit bffdeaa8a5af7200b0e74c9d5a41167f86626a36 ]
+
+BPF verifier marks some instructions as prune points. Currently these
+prune points serve two purposes.
+
+It's a point where verifier tries to find previously verified state and
+check current state's equivalence to short circuit verification for
+current code path.
+
+But also currently it's a point where jump history, used for precision
+backtracking, is updated. This is done so that non-linear flow of
+execution could be properly backtracked.
+
+Such coupling is coincidental and unnecessary. Some prune points are not
+part of some non-linear jump path, so don't need update of jump history.
+On the other hand, not all instructions which have to be recorded in
+jump history necessarily are good prune points.
+
+This patch splits prune and jump points into independent flags.
+Currently all prune points are marked as jump points to minimize amount
+of changes in this patch, but next patch will perform some optimization
+of prune vs jmp point placement.
+
+No functional changes are intended.
+
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20221206233345.438540-2-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 3feb263bb516 ("bpf: handle ldimm64 properly in check_cfg()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h |  1 +
+ kernel/bpf/verifier.c        | 57 +++++++++++++++++++++++++++---------
+ 2 files changed, 44 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 1a32baa78ce26..f080ccf27d256 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -429,6 +429,7 @@ struct bpf_insn_aux_data {
+       /* below fields are initialized once */
+       unsigned int orig_idx; /* original instruction index */
+       bool prune_point;
++      bool jmp_point;
+ };
+ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index ee6e811b43158..ec688665aaa25 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2512,6 +2512,16 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
+       return 0;
+ }
++static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
++{
++      env->insn_aux_data[idx].jmp_point = true;
++}
++
++static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
++{
++      return env->insn_aux_data[insn_idx].jmp_point;
++}
++
+ /* for any branch, call, exit record the history of jmps in the given state */
+ static int push_jmp_history(struct bpf_verifier_env *env,
+                           struct bpf_verifier_state *cur)
+@@ -2520,6 +2530,9 @@ static int push_jmp_history(struct bpf_verifier_env *env,
+       struct bpf_idx_pair *p;
+       size_t alloc_size;
++      if (!is_jmp_point(env, env->insn_idx))
++              return 0;
++
+       cnt++;
+       alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+       p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
+@@ -11000,11 +11013,16 @@ static struct bpf_verifier_state_list **explored_state(
+       return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+ }
+-static void init_explored_state(struct bpf_verifier_env *env, int idx)
++static void mark_prune_point(struct bpf_verifier_env *env, int idx)
+ {
+       env->insn_aux_data[idx].prune_point = true;
+ }
++static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
++{
++      return env->insn_aux_data[insn_idx].prune_point;
++}
++
+ enum {
+       DONE_EXPLORING = 0,
+       KEEP_EXPLORING = 1,
+@@ -11033,9 +11051,11 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
+               return -EINVAL;
+       }
+-      if (e == BRANCH)
++      if (e == BRANCH) {
+               /* mark branch target for state pruning */
+-              init_explored_state(env, w);
++              mark_prune_point(env, w);
++              mark_jmp_point(env, w);
++      }
+       if (insn_state[w] == 0) {
+               /* tree-edge */
+@@ -11073,10 +11093,13 @@ static int visit_func_call_insn(int t, int insn_cnt,
+       if (ret)
+               return ret;
+-      if (t + 1 < insn_cnt)
+-              init_explored_state(env, t + 1);
++      if (t + 1 < insn_cnt) {
++              mark_prune_point(env, t + 1);
++              mark_jmp_point(env, t + 1);
++      }
+       if (visit_callee) {
+-              init_explored_state(env, t);
++              mark_prune_point(env, t);
++              mark_jmp_point(env, t);
+               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
+                               /* It's ok to allow recursion from CFG point of
+                                * view. __check_func_call() will do the actual
+@@ -11110,13 +11133,15 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+               return DONE_EXPLORING;
+       case BPF_CALL:
+-              if (insns[t].imm == BPF_FUNC_timer_set_callback)
++              if (insns[t].imm == BPF_FUNC_timer_set_callback) {
+                       /* Mark this call insn to trigger is_state_visited() check
+                        * before call itself is processed by __check_func_call().
+                        * Otherwise new async state will be pushed for further
+                        * exploration.
+                        */
+-                      init_explored_state(env, t);
++                      mark_prune_point(env, t);
++                      mark_jmp_point(env, t);
++              }
+               return visit_func_call_insn(t, insn_cnt, insns, env,
+                                           insns[t].src_reg == BPF_PSEUDO_CALL);
+@@ -11134,18 +11159,22 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+                * but it's marked, since backtracking needs
+                * to record jmp history in is_state_visited().
+                */
+-              init_explored_state(env, t + insns[t].off + 1);
++              mark_prune_point(env, t + insns[t].off + 1);
++              mark_jmp_point(env, t + insns[t].off + 1);
+               /* tell verifier to check for equivalent states
+                * after every call and jump
+                */
+-              if (t + 1 < insn_cnt)
+-                      init_explored_state(env, t + 1);
++              if (t + 1 < insn_cnt) {
++                      mark_prune_point(env, t + 1);
++                      mark_jmp_point(env, t + 1);
++              }
+               return ret;
+       default:
+               /* conditional jump with two edges */
+-              init_explored_state(env, t);
++              mark_prune_point(env, t);
++              mark_jmp_point(env, t);
+               ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+               if (ret)
+                       return ret;
+@@ -12178,11 +12207,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
+       bool add_new_state = env->test_state_freq ? true : false;
+       cur->last_insn_idx = env->prev_insn_idx;
+-      if (!env->insn_aux_data[insn_idx].prune_point)
++      if (!is_prune_point(env, insn_idx))
+               /* this 'insn_idx' instruction wasn't marked, so we will not
+                * be doing state search here
+                */
+-              return 0;
++              return push_jmp_history(env, cur);
+       /* bpf progs typically have pruning point every 4 instructions
+        * http://vger.kernel.org/bpfconf2019.html#session-1
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-fix-precision-backtracking-instruction-iteration.patch b/queue-6.1/bpf-fix-precision-backtracking-instruction-iteration.patch
new file mode 100644 (file)
index 0000000..83a120e
--- /dev/null
@@ -0,0 +1,89 @@
+From 5f576d9732e2017e5f5e1da533df5a11be2b311b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Nov 2023 16:26:37 -0800
+Subject: bpf: fix precision backtracking instruction iteration
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 4bb7ea946a370707315ab774432963ce47291946 ]
+
+Fix an edge case in __mark_chain_precision() which prematurely stops
+backtracking instructions in a state if it happens that state's first
+and last instruction indexes are the same. This situations doesn't
+necessarily mean that there were no instructions simulated in a state,
+but rather that we starting from the instruction, jumped around a bit,
+and then ended up at the same instruction before checkpointing or
+marking precision.
+
+To distinguish between these two possible situations, we need to consult
+jump history. If it's empty or contain a single record "bridging" parent
+state and first instruction of processed state, then we indeed
+backtracked all instructions in this state. But if history is not empty,
+we are definitely not done yet.
+
+Move this logic inside get_prev_insn_idx() to contain it more nicely.
+Use -ENOENT return code to denote "we are out of instructions"
+situation.
+
+This bug was exposed by verifier_loop1.c's bounded_recursion subtest, once
+the next fix in this patch set is applied.
+
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20231110002638.4168352-3-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 95521beec66c5..142e10d49fd81 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2551,12 +2551,29 @@ static int push_jmp_history(struct bpf_verifier_env *env,
+ /* Backtrack one insn at a time. If idx is not at the top of recorded
+  * history then previous instruction came from straight line execution.
++ * Return -ENOENT if we exhausted all instructions within given state.
++ *
++ * It's legal to have a bit of a looping with the same starting and ending
++ * insn index within the same state, e.g.: 3->4->5->3, so just because current
++ * instruction index is the same as state's first_idx doesn't mean we are
++ * done. If there is still some jump history left, we should keep going. We
++ * need to take into account that we might have a jump history between given
++ * state's parent and itself, due to checkpointing. In this case, we'll have
++ * history entry recording a jump from last instruction of parent state and
++ * first instruction of given state.
+  */
+ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+                            u32 *history)
+ {
+       u32 cnt = *history;
++      if (i == st->first_insn_idx) {
++              if (cnt == 0)
++                      return -ENOENT;
++              if (cnt == 1 && st->jmp_history[0].idx == i)
++                      return -ENOENT;
++      }
++
+       if (cnt && st->jmp_history[cnt - 1].idx == i) {
+               i = st->jmp_history[cnt - 1].prev_idx;
+               (*history)--;
+@@ -3052,9 +3069,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
+                                * Nothing to be tracked further in the parent state.
+                                */
+                               return 0;
+-                      if (i == first_idx)
+-                              break;
+                       i = get_prev_insn_idx(st, i, &history);
++                      if (i == -ENOENT)
++                              break;
+                       if (i >= env->prog->len) {
+                               /* This can happen if backtracking reached insn 0
+                                * and there are still reg_mask or stack_mask
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-handle-ldimm64-properly-in-check_cfg.patch b/queue-6.1/bpf-handle-ldimm64-properly-in-check_cfg.patch
new file mode 100644 (file)
index 0000000..546520d
--- /dev/null
@@ -0,0 +1,153 @@
+From e72d96cb30d0d7cac5d70679da65b38e3fded5d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Nov 2023 16:26:36 -0800
+Subject: bpf: handle ldimm64 properly in check_cfg()
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 3feb263bb516ee7e1da0acd22b15afbb9a7daa19 ]
+
+ldimm64 instructions are 16-byte long, and so have to be handled
+appropriately in check_cfg(), just like the rest of BPF verifier does.
+
+This has implications in three places:
+  - when determining next instruction for non-jump instructions;
+  - when determining next instruction for callback address ldimm64
+    instructions (in visit_func_call_insn());
+  - when checking for unreachable instructions, where second half of
+    ldimm64 is expected to be unreachable;
+
+We take this also as an opportunity to report jump into the middle of
+ldimm64. And adjust few test_verifier tests accordingly.
+
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Reported-by: Hao Sun <sunhao.th@gmail.com>
+Fixes: 475fb78fbf48 ("bpf: verifier (add branch/goto checks)")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/r/20231110002638.4168352-2-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf.h                           |  8 ++++--
+ kernel/bpf/verifier.c                         | 27 ++++++++++++++-----
+ .../testing/selftests/bpf/verifier/ld_imm64.c |  8 +++---
+ 3 files changed, 30 insertions(+), 13 deletions(-)
+
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 619fcba84be22..ba22cf4f5fc0e 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -702,10 +702,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+       aux->ctx_field_size = size;
+ }
++static bool bpf_is_ldimm64(const struct bpf_insn *insn)
++{
++      return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
++}
++
+ static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
+ {
+-      return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+-             insn->src_reg == BPF_PSEUDO_FUNC;
++      return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+ }
+ struct bpf_prog_ops {
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index dd025f66efabc..95521beec66c5 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -11090,15 +11090,16 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
+                               struct bpf_verifier_env *env,
+                               bool visit_callee)
+ {
+-      int ret;
++      int ret, insn_sz;
+-      ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
++      insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
++      ret = push_insn(t, t + insn_sz, FALLTHROUGH, env, false);
+       if (ret)
+               return ret;
+-      mark_prune_point(env, t + 1);
++      mark_prune_point(env, t + insn_sz);
+       /* when we exit from subprog, we need to record non-linear history */
+-      mark_jmp_point(env, t + 1);
++      mark_jmp_point(env, t + insn_sz);
+       if (visit_callee) {
+               mark_prune_point(env, t);
+@@ -11120,15 +11121,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
+ static int visit_insn(int t, struct bpf_verifier_env *env)
+ {
+       struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
+-      int ret, off;
++      int ret, off, insn_sz;
+       if (bpf_pseudo_func(insn))
+               return visit_func_call_insn(t, insns, env, true);
+       /* All non-branch instructions have a single fall-through edge. */
+       if (BPF_CLASS(insn->code) != BPF_JMP &&
+-          BPF_CLASS(insn->code) != BPF_JMP32)
+-              return push_insn(t, t + 1, FALLTHROUGH, env, false);
++          BPF_CLASS(insn->code) != BPF_JMP32) {
++              insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
++              return push_insn(t, t + insn_sz, FALLTHROUGH, env, false);
++      }
+       switch (BPF_OP(insn->code)) {
+       case BPF_EXIT:
+@@ -11227,11 +11230,21 @@ static int check_cfg(struct bpf_verifier_env *env)
+       }
+       for (i = 0; i < insn_cnt; i++) {
++              struct bpf_insn *insn = &env->prog->insnsi[i];
++
+               if (insn_state[i] != EXPLORED) {
+                       verbose(env, "unreachable insn %d\n", i);
+                       ret = -EINVAL;
+                       goto err_free;
+               }
++              if (bpf_is_ldimm64(insn)) {
++                      if (insn_state[i + 1] != 0) {
++                              verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
++                              ret = -EINVAL;
++                              goto err_free;
++                      }
++                      i++; /* skip second half of ldimm64 */
++              }
+       }
+       ret = 0; /* cfg looks good */
+diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
+index f9297900cea6d..78f19c255f20b 100644
+--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
++++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
+@@ -9,8 +9,8 @@
+       BPF_MOV64_IMM(BPF_REG_0, 2),
+       BPF_EXIT_INSN(),
+       },
+-      .errstr = "invalid BPF_LD_IMM insn",
+-      .errstr_unpriv = "R1 pointer comparison",
++      .errstr = "jump into the middle of ldimm64 insn 1",
++      .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
+       .result = REJECT,
+ },
+ {
+@@ -23,8 +23,8 @@
+       BPF_LD_IMM64(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+-      .errstr = "invalid BPF_LD_IMM insn",
+-      .errstr_unpriv = "R1 pointer comparison",
++      .errstr = "jump into the middle of ldimm64 insn 1",
++      .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
+       .result = REJECT,
+ },
+ {
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-remove-unnecessary-prune-and-jump-points.patch b/queue-6.1/bpf-remove-unnecessary-prune-and-jump-points.patch
new file mode 100644 (file)
index 0000000..1096eae
--- /dev/null
@@ -0,0 +1,112 @@
+From 90b6441df9cf455cd5ad99ec2231d29e605a5a47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 15:33:45 -0800
+Subject: bpf: remove unnecessary prune and jump points
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 618945fbed501b6e5865042068a51edfb2dda948 ]
+
+Don't mark some instructions as jump points when there are actually no
+jumps and instructions are just processed sequentially. Such case is
+handled naturally by precision backtracking logic without the need to
+update jump history. See get_prev_insn_idx(). It goes back linearly by
+one instruction, unless current top of jmp_history is pointing to
+current instruction. In such case we use `st->jmp_history[cnt - 1].prev_idx`
+to find instruction from which we jumped to the current instruction
+non-linearly.
+
+Also remove both jump and prune point marking for instruction right
+after unconditional jumps, as program flow can get to the instruction
+right after unconditional jump instruction only if there is a jump to
+that instruction from somewhere else in the program. In such case we'll
+mark such instruction as prune/jump point because it's a destination of
+a jump.
+
+This change has no changes in terms of number of instructions or states
+processes across Cilium and selftests programs.
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20221206233345.438540-4-andrii@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 3feb263bb516 ("bpf: handle ldimm64 properly in check_cfg()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 34 ++++++++++------------------------
+ 1 file changed, 10 insertions(+), 24 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index ec688665aaa25..09631797d9e0c 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -11093,13 +11093,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
+       if (ret)
+               return ret;
+-      if (t + 1 < insn_cnt) {
+-              mark_prune_point(env, t + 1);
+-              mark_jmp_point(env, t + 1);
+-      }
++      mark_prune_point(env, t + 1);
++      /* when we exit from subprog, we need to record non-linear history */
++      mark_jmp_point(env, t + 1);
++
+       if (visit_callee) {
+               mark_prune_point(env, t);
+-              mark_jmp_point(env, t);
+               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
+                               /* It's ok to allow recursion from CFG point of
+                                * view. __check_func_call() will do the actual
+@@ -11133,15 +11132,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+               return DONE_EXPLORING;
+       case BPF_CALL:
+-              if (insns[t].imm == BPF_FUNC_timer_set_callback) {
+-                      /* Mark this call insn to trigger is_state_visited() check
+-                       * before call itself is processed by __check_func_call().
+-                       * Otherwise new async state will be pushed for further
+-                       * exploration.
++              if (insns[t].imm == BPF_FUNC_timer_set_callback)
++                      /* Mark this call insn as a prune point to trigger
++                       * is_state_visited() check before call itself is
++                       * processed by __check_func_call(). Otherwise new
++                       * async state will be pushed for further exploration.
+                        */
+                       mark_prune_point(env, t);
+-                      mark_jmp_point(env, t);
+-              }
+               return visit_func_call_insn(t, insn_cnt, insns, env,
+                                           insns[t].src_reg == BPF_PSEUDO_CALL);
+@@ -11155,26 +11152,15 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+               if (ret)
+                       return ret;
+-              /* unconditional jmp is not a good pruning point,
+-               * but it's marked, since backtracking needs
+-               * to record jmp history in is_state_visited().
+-               */
+               mark_prune_point(env, t + insns[t].off + 1);
+               mark_jmp_point(env, t + insns[t].off + 1);
+-              /* tell verifier to check for equivalent states
+-               * after every call and jump
+-               */
+-              if (t + 1 < insn_cnt) {
+-                      mark_prune_point(env, t + 1);
+-                      mark_jmp_point(env, t + 1);
+-              }
+               return ret;
+       default:
+               /* conditional jump with two edges */
+               mark_prune_point(env, t);
+-              mark_jmp_point(env, t);
++
+               ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+               if (ret)
+                       return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-remove-unused-insn_cnt-argument-from-visit_-func.patch b/queue-6.1/bpf-remove-unused-insn_cnt-argument-from-visit_-func.patch
new file mode 100644 (file)
index 0000000..6e5546d
--- /dev/null
@@ -0,0 +1,76 @@
+From b9857568c364a47cb907e60b86ee7c0a1f73a7b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Dec 2022 11:55:34 -0800
+Subject: bpf: Remove unused insn_cnt argument from visit_[func_call_]insn()
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit dcb2288b1fd9a8cdf2f3b8c0c7b3763346ef515f ]
+
+Number of total instructions in BPF program (including subprogs) can and
+is accessed from env->prog->len. visit_func_call_insn() doesn't do any
+checks against insn_cnt anymore, relying on push_insn() to do this check
+internally. So remove unnecessary insn_cnt input argument from
+visit_func_call_insn() and visit_insn() functions.
+
+Suggested-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20221207195534.2866030-1-andrii@kernel.org
+Stable-dep-of: 3feb263bb516 ("bpf: handle ldimm64 properly in check_cfg()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 09631797d9e0c..d1393e07ab2c9 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -11082,8 +11082,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
+       return DONE_EXPLORING;
+ }
+-static int visit_func_call_insn(int t, int insn_cnt,
+-                              struct bpf_insn *insns,
++static int visit_func_call_insn(int t, struct bpf_insn *insns,
+                               struct bpf_verifier_env *env,
+                               bool visit_callee)
+ {
+@@ -11114,13 +11113,13 @@ static int visit_func_call_insn(int t, int insn_cnt,
+  *  DONE_EXPLORING - the instruction was fully explored
+  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
+  */
+-static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
++static int visit_insn(int t, struct bpf_verifier_env *env)
+ {
+       struct bpf_insn *insns = env->prog->insnsi;
+       int ret;
+       if (bpf_pseudo_func(insns + t))
+-              return visit_func_call_insn(t, insn_cnt, insns, env, true);
++              return visit_func_call_insn(t, insns, env, true);
+       /* All non-branch instructions have a single fall-through edge. */
+       if (BPF_CLASS(insns[t].code) != BPF_JMP &&
+@@ -11139,7 +11138,7 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+                        * async state will be pushed for further exploration.
+                        */
+                       mark_prune_point(env, t);
+-              return visit_func_call_insn(t, insn_cnt, insns, env,
++              return visit_func_call_insn(t, insns, env,
+                                           insns[t].src_reg == BPF_PSEUDO_CALL);
+       case BPF_JA:
+@@ -11196,7 +11195,7 @@ static int check_cfg(struct bpf_verifier_env *env)
+       while (env->cfg.cur_stack > 0) {
+               int t = insn_stack[env->cfg.cur_stack - 1];
+-              ret = visit_insn(t, insn_cnt, env);
++              ret = visit_insn(t, env);
+               switch (ret) {
+               case DONE_EXPLORING:
+                       insn_state[t] = EXPLORED;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-sockmap-af_unix-stream-sockets-need-to-hold-ref-.patch b/queue-6.1/bpf-sockmap-af_unix-stream-sockets-need-to-hold-ref-.patch
new file mode 100644 (file)
index 0000000..e8786d2
--- /dev/null
@@ -0,0 +1,139 @@
+From 2e5ec045cba65071ef0736ce3d6a2e56106c261d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Nov 2023 17:25:56 -0800
+Subject: bpf, sockmap: af_unix stream sockets need to hold ref for pair sock
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 8866730aed5100f06d3d965c22f1c61f74942541 ]
+
+AF_UNIX stream sockets are a paired socket. So sending on one of the pairs
+will lookup the paired socket as part of the send operation. It is possible
+however to put just one of the pairs in a BPF map. This currently increments
+the refcnt on the sock in the sockmap to ensure it is not free'd by the
+stack before sockmap cleans up its state and stops any skbs being sent/recv'd
+to that socket.
+
+But we missed a case. If the peer socket is closed it will be free'd by the
+stack. However, the paired socket can still be referenced from BPF sockmap
+side because we hold a reference there. Then if we are sending traffic through
+BPF sockmap to that socket it will try to dereference the free'd pair in its
+send logic creating a use after free. And following splat:
+
+   [59.900375] BUG: KASAN: slab-use-after-free in sk_wake_async+0x31/0x1b0
+   [59.901211] Read of size 8 at addr ffff88811acbf060 by task kworker/1:2/954
+   [...]
+   [59.905468] Call Trace:
+   [59.905787]  <TASK>
+   [59.906066]  dump_stack_lvl+0x130/0x1d0
+   [59.908877]  print_report+0x16f/0x740
+   [59.910629]  kasan_report+0x118/0x160
+   [59.912576]  sk_wake_async+0x31/0x1b0
+   [59.913554]  sock_def_readable+0x156/0x2a0
+   [59.914060]  unix_stream_sendmsg+0x3f9/0x12a0
+   [59.916398]  sock_sendmsg+0x20e/0x250
+   [59.916854]  skb_send_sock+0x236/0xac0
+   [59.920527]  sk_psock_backlog+0x287/0xaa0
+
+To fix let BPF sockmap hold a refcnt on both the socket in the sockmap and its
+paired socket. It wasn't obvious how to contain the fix to bpf_unix logic. The
+primarily problem with keeping this logic in bpf_unix was: In the sock close()
+we could handle the deref by having a close handler. But, when we are destroying
+the psock through a map delete operation we wouldn't have gotten any signal
+thorugh the proto struct other than it being replaced. If we do the deref from
+the proto replace its too early because we need to deref the sk_pair after the
+backlog worker has been stopped.
+
+Given all this it seems best to just cache it at the end of the psock and eat 8B
+for the af_unix and vsock users. Notice dgram sockets are OK because they handle
+locking already.
+
+Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20231129012557.95371-2-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skmsg.h | 1 +
+ include/net/af_unix.h | 1 +
+ net/core/skmsg.c      | 2 ++
+ net/unix/af_unix.c    | 2 --
+ net/unix/unix_bpf.c   | 5 +++++
+ 5 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index c1637515a8a41..c953b8c0d2f43 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -106,6 +106,7 @@ struct sk_psock {
+       struct mutex                    work_mutex;
+       struct sk_psock_work_state      work_state;
+       struct delayed_work             work;
++      struct sock                     *sk_pair;
+       struct rcu_work                 rwork;
+ };
+diff --git a/include/net/af_unix.h b/include/net/af_unix.h
+index 480fa579787e5..55ca217c626b7 100644
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -77,6 +77,7 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
+ {
+       return (struct unix_sock *)sk;
+ }
++#define unix_peer(sk) (unix_sk(sk)->peer)
+ #define peer_wait peer_wq.wait
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index a5c1f67dc96ec..3818035ea0021 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -825,6 +825,8 @@ static void sk_psock_destroy(struct work_struct *work)
+       if (psock->sk_redir)
+               sock_put(psock->sk_redir);
++      if (psock->sk_pair)
++              sock_put(psock->sk_pair);
+       sock_put(psock->sk);
+       kfree(psock);
+ }
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 6dbeb80073338..be2ed7b0fe21c 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -211,8 +211,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
+ }
+ #endif /* CONFIG_SECURITY_NETWORK */
+-#define unix_peer(sk) (unix_sk(sk)->peer)
+-
+ static inline int unix_our_peer(struct sock *sk, struct sock *osk)
+ {
+       return unix_peer(osk) == sk;
+diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
+index 2f9d8271c6ec7..7ea7c3a0d0d06 100644
+--- a/net/unix/unix_bpf.c
++++ b/net/unix/unix_bpf.c
+@@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
+ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+ {
++      struct sock *sk_pair;
++
+       if (restore) {
+               sk->sk_write_space = psock->saved_write_space;
+               sock_replace_proto(sk, psock->sk_proto);
+               return 0;
+       }
++      sk_pair = unix_peer(sk);
++      sock_hold(sk_pair);
++      psock->sk_pair = sk_pair;
+       unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
+       sock_replace_proto(sk, &unix_stream_bpf_prot);
+       return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-support-new-32bit-offset-jmp-instruction.patch b/queue-6.1/bpf-support-new-32bit-offset-jmp-instruction.patch
new file mode 100644 (file)
index 0000000..d4048e1
--- /dev/null
@@ -0,0 +1,212 @@
+From 7c7c0669562f577fee8dbb0e780a26bc7a1b146a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 18:12:31 -0700
+Subject: bpf: Support new 32bit offset jmp instruction
+
+From: Yonghong Song <yonghong.song@linux.dev>
+
+[ Upstream commit 4cd58e9af8b9d9fff6b7145e742abbfcda0af4af ]
+
+Add interpreter/jit/verifier support for 32bit offset jmp instruction.
+If a conditional jmp instruction needs more than 16bit offset,
+it can be simulated with a conditional jmp + a 32bit jmp insn.
+
+Acked-by: Eduard Zingerman <eddyz87@gmail.com>
+Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
+Link: https://lore.kernel.org/r/20230728011231.3716103-1-yonghong.song@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 3feb263bb516 ("bpf: handle ldimm64 properly in check_cfg()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 28 ++++++++++++++++++----------
+ kernel/bpf/core.c           | 19 ++++++++++++++++---
+ kernel/bpf/verifier.c       | 32 ++++++++++++++++++++++----------
+ 3 files changed, 56 insertions(+), 23 deletions(-)
+
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 84c695ae1940f..b69aee6245e4a 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -1625,16 +1625,24 @@ st:                    if (is_imm8(insn->off))
+                       break;
+               case BPF_JMP | BPF_JA:
+-                      if (insn->off == -1)
+-                              /* -1 jmp instructions will always jump
+-                               * backwards two bytes. Explicitly handling
+-                               * this case avoids wasting too many passes
+-                               * when there are long sequences of replaced
+-                               * dead code.
+-                               */
+-                              jmp_offset = -2;
+-                      else
+-                              jmp_offset = addrs[i + insn->off] - addrs[i];
++              case BPF_JMP32 | BPF_JA:
++                      if (BPF_CLASS(insn->code) == BPF_JMP) {
++                              if (insn->off == -1)
++                                      /* -1 jmp instructions will always jump
++                                       * backwards two bytes. Explicitly handling
++                                       * this case avoids wasting too many passes
++                                       * when there are long sequences of replaced
++                                       * dead code.
++                                       */
++                                      jmp_offset = -2;
++                              else
++                                      jmp_offset = addrs[i + insn->off] - addrs[i];
++                      } else {
++                              if (insn->imm == -1)
++                                      jmp_offset = -2;
++                              else
++                                      jmp_offset = addrs[i + insn->imm] - addrs[i];
++                      }
+                       if (!jmp_offset) {
+                               /*
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 7225cb67c0d3a..0b55ebf4a9b1f 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -367,7 +367,12 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
+ {
+       const s32 off_min = S16_MIN, off_max = S16_MAX;
+       s32 delta = end_new - end_old;
+-      s32 off = insn->off;
++      s32 off;
++
++      if (insn->code == (BPF_JMP32 | BPF_JA))
++              off = insn->imm;
++      else
++              off = insn->off;
+       if (curr < pos && curr + off + 1 >= end_old)
+               off += delta;
+@@ -375,8 +380,12 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
+               off -= delta;
+       if (off < off_min || off > off_max)
+               return -ERANGE;
+-      if (!probe_pass)
+-              insn->off = off;
++      if (!probe_pass) {
++              if (insn->code == (BPF_JMP32 | BPF_JA))
++                      insn->imm = off;
++              else
++                      insn->off = off;
++      }
+       return 0;
+ }
+@@ -1586,6 +1595,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
+       INSN_3(JMP, JSLE, K),                   \
+       INSN_3(JMP, JSET, K),                   \
+       INSN_2(JMP, JA),                        \
++      INSN_2(JMP32, JA),                      \
+       /* Store instructions. */               \
+       /*   Register based. */                 \
+       INSN_3(STX, MEM,  B),                   \
+@@ -1862,6 +1872,9 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
+       JMP_JA:
+               insn += insn->off;
+               CONT;
++      JMP32_JA:
++              insn += insn->imm;
++              CONT;
+       JMP_EXIT:
+               return BPF_R0;
+       /* JMP */
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 73d500c51bd86..dd025f66efabc 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2254,7 +2254,10 @@ static int check_subprogs(struct bpf_verifier_env *env)
+                       goto next;
+               if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+                       goto next;
+-              off = i + insn[i].off + 1;
++              if (code == (BPF_JMP32 | BPF_JA))
++                      off = i + insn[i].imm + 1;
++              else
++                      off = i + insn[i].off + 1;
+               if (off < subprog_start || off >= subprog_end) {
+                       verbose(env, "jump out of range from insn %d to %d\n", i, off);
+                       return -EINVAL;
+@@ -2266,6 +2269,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
+                        * or unconditional jump back
+                        */
+                       if (code != (BPF_JMP | BPF_EXIT) &&
++                          code != (BPF_JMP32 | BPF_JA) &&
+                           code != (BPF_JMP | BPF_JA)) {
+                               verbose(env, "last insn is not an exit or jmp\n");
+                               return -EINVAL;
+@@ -11116,7 +11120,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
+ static int visit_insn(int t, struct bpf_verifier_env *env)
+ {
+       struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
+-      int ret;
++      int ret, off;
+       if (bpf_pseudo_func(insn))
+               return visit_func_call_insn(t, insns, env, true);
+@@ -11144,14 +11148,19 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
+               if (BPF_SRC(insn->code) != BPF_K)
+                       return -EINVAL;
++              if (BPF_CLASS(insn->code) == BPF_JMP)
++                      off = insn->off;
++              else
++                      off = insn->imm;
++
+               /* unconditional jump with single edge */
+-              ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env,
++              ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
+                               true);
+               if (ret)
+                       return ret;
+-              mark_prune_point(env, t + insn->off + 1);
+-              mark_jmp_point(env, t + insn->off + 1);
++              mark_prune_point(env, t + off + 1);
++              mark_jmp_point(env, t + off + 1);
+               return ret;
+@@ -12687,15 +12696,18 @@ static int do_check(struct bpf_verifier_env *env)
+                                       return err;
+                       } else if (opcode == BPF_JA) {
+                               if (BPF_SRC(insn->code) != BPF_K ||
+-                                  insn->imm != 0 ||
+                                   insn->src_reg != BPF_REG_0 ||
+                                   insn->dst_reg != BPF_REG_0 ||
+-                                  class == BPF_JMP32) {
++                                  (class == BPF_JMP && insn->imm != 0) ||
++                                  (class == BPF_JMP32 && insn->off != 0)) {
+                                       verbose(env, "BPF_JA uses reserved fields\n");
+                                       return -EINVAL;
+                               }
+-                              env->insn_idx += insn->off + 1;
++                              if (class == BPF_JMP)
++                                      env->insn_idx += insn->off + 1;
++                              else
++                                      env->insn_idx += insn->imm + 1;
+                               continue;
+                       } else if (opcode == BPF_EXIT) {
+@@ -13521,13 +13533,13 @@ static bool insn_is_cond_jump(u8 code)
+ {
+       u8 op;
++      op = BPF_OP(code);
+       if (BPF_CLASS(code) == BPF_JMP32)
+-              return true;
++              return op != BPF_JA;
+       if (BPF_CLASS(code) != BPF_JMP)
+               return false;
+-      op = BPF_OP(code);
+       return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-x64-fix-tailcall-infinite-loop.patch b/queue-6.1/bpf-x64-fix-tailcall-infinite-loop.patch
new file mode 100644 (file)
index 0000000..cd789f0
--- /dev/null
@@ -0,0 +1,167 @@
+From 1a57e1d64338a8af8a056d73c2ebac861d202331 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 23:04:41 +0800
+Subject: bpf, x64: Fix tailcall infinite loop
+
+From: Leon Hwang <hffilwlqm@gmail.com>
+
+[ Upstream commit 2b5dcb31a19a2e0acd869b12c9db9b2d696ef544 ]
+
+From commit ebf7d1f508a73871 ("bpf, x64: rework pro/epilogue and tailcall
+handling in JIT"), the tailcall on x64 works better than before.
+
+From commit e411901c0b775a3a ("bpf: allow for tailcalls in BPF subprograms
+for x64 JIT"), tailcall is able to run in BPF subprograms on x64.
+
+From commit 5b92a28aae4dd0f8 ("bpf: Support attaching tracing BPF program
+to other BPF programs"), BPF program is able to trace other BPF programs.
+
+How about combining them all together?
+
+1. FENTRY/FEXIT on a BPF subprogram.
+2. A tailcall runs in the BPF subprogram.
+3. The tailcall calls the subprogram's caller.
+
+As a result, a tailcall infinite loop comes up. And the loop would halt
+the machine.
+
+As we know, in tail call context, the tail_call_cnt propagates by stack
+and rax register between BPF subprograms. So do in trampolines.
+
+Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT")
+Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT")
+Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
+Link: https://lore.kernel.org/r/20230912150442.2009-3-hffilwlqm@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 28 ++++++++++++++++++++++------
+ include/linux/bpf.h         |  5 +++++
+ kernel/bpf/trampoline.c     |  4 ++--
+ kernel/bpf/verifier.c       |  3 +++
+ 4 files changed, 32 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 4686c1d9d0cfd..e6a031f8dd2e9 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -893,6 +893,10 @@ static void emit_nops(u8 **pprog, int len)
+ #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
++/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
++#define RESTORE_TAIL_CALL_CNT(stack)                          \
++      EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
++
+ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
+                 int oldproglen, struct jit_context *ctx, bool jmp_padding)
+ {
+@@ -1436,9 +1440,7 @@ st:                      if (is_imm8(insn->off))
+               case BPF_JMP | BPF_CALL:
+                       func = (u8 *) __bpf_call_base + imm32;
+                       if (tail_call_reachable) {
+-                              /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
+-                              EMIT3_off32(0x48, 0x8B, 0x85,
+-                                          -round_up(bpf_prog->aux->stack_depth, 8) - 8);
++                              RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
+                               if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+                                       return -EINVAL;
+                       } else {
+@@ -2070,6 +2072,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+        * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
+        *
+        * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
++       * RSP                 [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
+        */
+       /* room for return value of orig_call or fentry prog */
+@@ -2106,6 +2109,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       EMIT1(0x55);             /* push rbp */
+       EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+       EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
++      if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
++              EMIT1(0x50);            /* push rax */
+       EMIT1(0x53);             /* push rbx */
+       /* Store number of argument registers of the traced function:
+@@ -2156,9 +2161,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               restore_regs(m, &prog, nr_args, regs_off);
++              if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
++                      /* Before calling the original function, restore the
++                       * tail_call_cnt from stack to rax.
++                       */
++                      RESTORE_TAIL_CALL_CNT(stack_size);
++
+               if (flags & BPF_TRAMP_F_ORIG_STACK) {
+-                      emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
+-                      EMIT2(0xff, 0xd0); /* call *rax */
++                      emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
++                      EMIT2(0xff, 0xd3); /* call *rbx */
+               } else {
+                       /* call original function */
+                       if (emit_call(&prog, orig_call, prog)) {
+@@ -2209,7 +2220,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+                       ret = -EINVAL;
+                       goto cleanup;
+               }
+-      }
++      } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
++              /* Before running the original function, restore the
++               * tail_call_cnt from stack to rax.
++               */
++              RESTORE_TAIL_CALL_CNT(stack_size);
++
+       /* restore return value of orig_call or fentry prog back into RAX */
+       if (save_ret)
+               emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 3ce9e39ecdb85..619fcba84be22 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -825,6 +825,11 @@ struct btf_func_model {
+  */
+ #define BPF_TRAMP_F_SHARE_IPMODIFY    BIT(6)
++/* Indicate that current trampoline is in a tail call context. Then, it has to
++ * cache and restore tail_call_cnt to avoid infinite tail call loop.
++ */
++#define BPF_TRAMP_F_TAIL_CALL_CTX     BIT(7)
++
+ /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+  * bytes on x86.
+  */
+diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
+index c4381dfcd6b09..748ac86169941 100644
+--- a/kernel/bpf/trampoline.c
++++ b/kernel/bpf/trampoline.c
+@@ -443,8 +443,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
+               goto out;
+       }
+-      /* clear all bits except SHARE_IPMODIFY */
+-      tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
++      /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
++      tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
+       if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
+           tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 12d360d80c149..ee6e811b43158 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -15442,6 +15442,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
+       if (!tr)
+               return -ENOMEM;
++      if (tgt_prog && tgt_prog->aux->tail_call_reachable)
++              tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
++
+       prog->aux->dst_trampoline = tr;
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-x86-save-restore-regs-with-bpf_dw-size.patch b/queue-6.1/bpf-x86-save-restore-regs-with-bpf_dw-size.patch
new file mode 100644 (file)
index 0000000..ccc233f
--- /dev/null
@@ -0,0 +1,94 @@
+From c56095a745ac4ce4fa4f5e267d8e5610efb53c12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 12:07:36 +0800
+Subject: bpf, x86: save/restore regs with BPF_DW size
+
+From: Menglong Dong <imagedong@tencent.com>
+
+[ Upstream commit 02a6dfa8ff43efb1c989f87a4d862aedf436088a ]
+
+As we already reserve 8 byte in the stack for each reg, it is ok to
+store/restore the regs in BPF_DW size. This will make the code in
+save_regs()/restore_regs() simpler.
+
+Signed-off-by: Menglong Dong <imagedong@tencent.com>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20230713040738.1789742-2-imagedong@tencent.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 2b5dcb31a19a ("bpf, x64: Fix tailcall infinite loop")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 35 ++++++-----------------------------
+ 1 file changed, 6 insertions(+), 29 deletions(-)
+
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 87cea23f2da16..84c695ae1940f 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -1755,57 +1755,34 @@ st:                    if (is_imm8(insn->off))
+ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
+                     int stack_size)
+ {
+-      int i, j, arg_size;
+-      bool next_same_struct = false;
++      int i;
+       /* Store function arguments to stack.
+        * For a function that accepts two pointers the sequence will be:
+        * mov QWORD PTR [rbp-0x10],rdi
+        * mov QWORD PTR [rbp-0x8],rsi
+        */
+-      for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
+-              /* The arg_size is at most 16 bytes, enforced by the verifier. */
+-              arg_size = m->arg_size[j];
+-              if (arg_size > 8) {
+-                      arg_size = 8;
+-                      next_same_struct = !next_same_struct;
+-              }
+-
+-              emit_stx(prog, bytes_to_bpf_size(arg_size),
+-                       BPF_REG_FP,
++      for (i = 0; i < min(nr_regs, 6); i++)
++              emit_stx(prog, BPF_DW, BPF_REG_FP,
+                        i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+                        -(stack_size - i * 8));
+-
+-              j = next_same_struct ? j : j + 1;
+-      }
+ }
+ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
+                        int stack_size)
+ {
+-      int i, j, arg_size;
+-      bool next_same_struct = false;
++      int i;
+       /* Restore function arguments from stack.
+        * For a function that accepts two pointers the sequence will be:
+        * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+        * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+        */
+-      for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
+-              /* The arg_size is at most 16 bytes, enforced by the verifier. */
+-              arg_size = m->arg_size[j];
+-              if (arg_size > 8) {
+-                      arg_size = 8;
+-                      next_same_struct = !next_same_struct;
+-              }
+-
+-              emit_ldx(prog, bytes_to_bpf_size(arg_size),
++      for (i = 0; i < min(nr_regs, 6); i++)
++              emit_ldx(prog, BPF_DW,
+                        i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+                        BPF_REG_FP,
+                        -(stack_size - i * 8));
+-
+-              j = next_same_struct ? j : j + 1;
+-      }
+ }
+ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-x86-simplify-the-parsing-logic-of-structure-para.patch b/queue-6.1/bpf-x86-simplify-the-parsing-logic-of-structure-para.patch
new file mode 100644 (file)
index 0000000..5d1b4cc
--- /dev/null
@@ -0,0 +1,225 @@
+From 0cc5afc6ba7a0afb7289de880f54e9d3715ee8be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 11:50:26 +0800
+Subject: bpf, x86: Simplify the parsing logic of structure parameters
+
+From: Pu Lehui <pulehui@huawei.com>
+
+[ Upstream commit 7f7880495770329d095d402c2865bfa7089192f8 ]
+
+Extra_nregs of structure parameters and nr_args can be
+added directly at the beginning, and using a flip flag
+to identifiy structure parameters. Meantime, renaming
+some variables to make them more sense.
+
+Signed-off-by: Pu Lehui <pulehui@huawei.com>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20230105035026.3091988-1-pulehui@huaweicloud.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Stable-dep-of: 2b5dcb31a19a ("bpf, x64: Fix tailcall infinite loop")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/net/bpf_jit_comp.c | 101 +++++++++++++++++-------------------
+ 1 file changed, 48 insertions(+), 53 deletions(-)
+
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index e6a031f8dd2e9..87cea23f2da16 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -1752,62 +1752,59 @@ st:                    if (is_imm8(insn->off))
+       return proglen;
+ }
+-static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
++static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
+                     int stack_size)
+ {
+-      int i, j, arg_size, nr_regs;
++      int i, j, arg_size;
++      bool next_same_struct = false;
++
+       /* Store function arguments to stack.
+        * For a function that accepts two pointers the sequence will be:
+        * mov QWORD PTR [rbp-0x10],rdi
+        * mov QWORD PTR [rbp-0x8],rsi
+        */
+-      for (i = 0, j = 0; i < min(nr_args, 6); i++) {
+-              if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
+-                      nr_regs = (m->arg_size[i] + 7) / 8;
++      for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
++              /* The arg_size is at most 16 bytes, enforced by the verifier. */
++              arg_size = m->arg_size[j];
++              if (arg_size > 8) {
+                       arg_size = 8;
+-              } else {
+-                      nr_regs = 1;
+-                      arg_size = m->arg_size[i];
++                      next_same_struct = !next_same_struct;
+               }
+-              while (nr_regs) {
+-                      emit_stx(prog, bytes_to_bpf_size(arg_size),
+-                               BPF_REG_FP,
+-                               j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
+-                               -(stack_size - j * 8));
+-                      nr_regs--;
+-                      j++;
+-              }
++              emit_stx(prog, bytes_to_bpf_size(arg_size),
++                       BPF_REG_FP,
++                       i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
++                       -(stack_size - i * 8));
++
++              j = next_same_struct ? j : j + 1;
+       }
+ }
+-static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
++static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
+                        int stack_size)
+ {
+-      int i, j, arg_size, nr_regs;
++      int i, j, arg_size;
++      bool next_same_struct = false;
+       /* Restore function arguments from stack.
+        * For a function that accepts two pointers the sequence will be:
+        * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+        * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+        */
+-      for (i = 0, j = 0; i < min(nr_args, 6); i++) {
+-              if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
+-                      nr_regs = (m->arg_size[i] + 7) / 8;
++      for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
++              /* The arg_size is at most 16 bytes, enforced by the verifier. */
++              arg_size = m->arg_size[j];
++              if (arg_size > 8) {
+                       arg_size = 8;
+-              } else {
+-                      nr_regs = 1;
+-                      arg_size = m->arg_size[i];
++                      next_same_struct = !next_same_struct;
+               }
+-              while (nr_regs) {
+-                      emit_ldx(prog, bytes_to_bpf_size(arg_size),
+-                               j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
+-                               BPF_REG_FP,
+-                               -(stack_size - j * 8));
+-                      nr_regs--;
+-                      j++;
+-              }
++              emit_ldx(prog, bytes_to_bpf_size(arg_size),
++                       i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
++                       BPF_REG_FP,
++                       -(stack_size - i * 8));
++
++              j = next_same_struct ? j : j + 1;
+       }
+ }
+@@ -2033,8 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+                               struct bpf_tramp_links *tlinks,
+                               void *func_addr)
+ {
+-      int ret, i, nr_args = m->nr_args, extra_nregs = 0;
+-      int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
++      int i, ret, nr_regs = m->nr_args, stack_size = 0;
++      int regs_off, nregs_off, ip_off, run_ctx_off;
+       struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+       struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+       struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+@@ -2043,17 +2040,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       u8 *prog;
+       bool save_ret;
+-      /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+-      if (nr_args > 6)
+-              return -ENOTSUPP;
+-
+-      for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
++      /* extra registers for struct arguments */
++      for (i = 0; i < m->nr_args; i++)
+               if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+-                      extra_nregs += (m->arg_size[i] + 7) / 8 - 1;
+-      }
+-      if (nr_args + extra_nregs > 6)
++                      nr_regs += (m->arg_size[i] + 7) / 8 - 1;
++
++      /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
++      if (nr_regs > 6)
+               return -ENOTSUPP;
+-      stack_size += extra_nregs * 8;
+       /* Generated trampoline stack layout:
+        *
+@@ -2067,7 +2061,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+        *                 [ ...             ]
+        * RBP - regs_off  [ reg_arg1        ]  program's ctx pointer
+        *
+-       * RBP - args_off  [ arg regs count  ]  always
++       * RBP - nregs_off [ regs count      ]  always
+        *
+        * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
+        *
+@@ -2080,11 +2074,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       if (save_ret)
+               stack_size += 8;
++      stack_size += nr_regs * 8;
+       regs_off = stack_size;
+-      /* args count  */
++      /* regs count  */
+       stack_size += 8;
+-      args_off = stack_size;
++      nregs_off = stack_size;
+       if (flags & BPF_TRAMP_F_IP_ARG)
+               stack_size += 8; /* room for IP address argument */
+@@ -2114,11 +2109,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       EMIT1(0x53);             /* push rbx */
+       /* Store number of argument registers of the traced function:
+-       *   mov rax, nr_args + extra_nregs
+-       *   mov QWORD PTR [rbp - args_off], rax
++       *   mov rax, nr_regs
++       *   mov QWORD PTR [rbp - nregs_off], rax
+        */
+-      emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args + extra_nregs);
+-      emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
++      emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
++      emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);
+       if (flags & BPF_TRAMP_F_IP_ARG) {
+               /* Store IP address of the traced function:
+@@ -2129,7 +2124,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+               emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
+       }
+-      save_regs(m, &prog, nr_args, regs_off);
++      save_regs(m, &prog, nr_regs, regs_off);
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               /* arg1: mov rdi, im */
+@@ -2159,7 +2154,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+       }
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+-              restore_regs(m, &prog, nr_args, regs_off);
++              restore_regs(m, &prog, nr_regs, regs_off);
+               if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+                       /* Before calling the original function, restore the
+@@ -2206,7 +2201,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+               }
+       if (flags & BPF_TRAMP_F_RESTORE_REGS)
+-              restore_regs(m, &prog, nr_args, regs_off);
++              restore_regs(m, &prog, nr_regs, regs_off);
+       /* This needs to be done regardless. If there were fmod_ret programs,
+        * the return value is only updated on the stack and still needs to be
+-- 
+2.43.0
+
diff --git a/queue-6.1/btrfs-fix-qgroup_free_reserved_data-int-overflow.patch b/queue-6.1/btrfs-fix-qgroup_free_reserved_data-int-overflow.patch
new file mode 100644 (file)
index 0000000..a375df2
--- /dev/null
@@ -0,0 +1,269 @@
+From 513d47d3ddb69a718de4359b8ccbf68b1431cdde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Dec 2023 13:00:10 -0800
+Subject: btrfs: fix qgroup_free_reserved_data int overflow
+
+From: Boris Burkov <boris@bur.io>
+
+[ Upstream commit 9e65bfca24cf1d77e4a5c7a170db5867377b3fe7 ]
+
+The reserved data counter and input parameter is a u64, but we
+inadvertently accumulate it in an int. Overflowing that int results in
+freeing the wrong amount of data and breaking reserve accounting.
+
+Unfortunately, this overflow rot spreads from there, as the qgroup
+release/free functions rely on returning an int to take advantage of
+negative values for error codes.
+
+Therefore, the full fix is to return the "released" or "freed" amount by
+a u64 argument and to return 0 or negative error code via the return
+value.
+
+Most of the call sites simply ignore the return value, though some
+of them handle the error and count the returned bytes. Change all of
+them accordingly.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/delalloc-space.c |  2 +-
+ fs/btrfs/file.c           |  2 +-
+ fs/btrfs/inode.c          | 16 ++++++++--------
+ fs/btrfs/ordered-data.c   |  7 ++++---
+ fs/btrfs/qgroup.c         | 25 +++++++++++++++----------
+ fs/btrfs/qgroup.h         |  4 ++--
+ 6 files changed, 31 insertions(+), 25 deletions(-)
+
+diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
+index 0b62ce77053f5..f2bc5563c0f92 100644
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -197,7 +197,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
+       start = round_down(start, fs_info->sectorsize);
+       btrfs_free_reserved_data_space_noquota(fs_info, len);
+-      btrfs_qgroup_free_data(inode, reserved, start, len);
++      btrfs_qgroup_free_data(inode, reserved, start, len, NULL);
+ }
+ /**
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 0a46fff3dd067..1783a0fbf1665 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -3191,7 +3191,7 @@ static long btrfs_fallocate(struct file *file, int mode,
+                       qgroup_reserved -= range->len;
+               } else if (qgroup_reserved > 0) {
+                       btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved,
+-                                             range->start, range->len);
++                                             range->start, range->len, NULL);
+                       qgroup_reserved -= range->len;
+               }
+               list_del(&range->list);
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 81eac121c6b23..9a7d77c410e22 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -466,7 +466,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
+        * And at reserve time, it's always aligned to page size, so
+        * just free one page here.
+        */
+-      btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
++      btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL);
+       btrfs_free_path(path);
+       btrfs_end_transaction(trans);
+       return ret;
+@@ -5372,7 +5372,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
+                */
+               if (state_flags & EXTENT_DELALLOC)
+                       btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
+-                                             end - start + 1);
++                                             end - start + 1, NULL);
+               clear_extent_bit(io_tree, start, end,
+                                EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING,
+@@ -8440,7 +8440,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
+                *    reserved data space.
+                *    Since the IO will never happen for this page.
+                */
+-              btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
++              btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL);
+               if (!inode_evicting) {
+                       clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
+                                EXTENT_DELALLOC | EXTENT_UPTODATE |
+@@ -9902,7 +9902,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
+       struct btrfs_path *path;
+       u64 start = ins->objectid;
+       u64 len = ins->offset;
+-      int qgroup_released;
++      u64 qgroup_released = 0;
+       int ret;
+       memset(&stack_fi, 0, sizeof(stack_fi));
+@@ -9915,9 +9915,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
+       btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
+       /* Encryption and other encoding is reserved and all 0 */
+-      qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
+-      if (qgroup_released < 0)
+-              return ERR_PTR(qgroup_released);
++      ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released);
++      if (ret < 0)
++              return ERR_PTR(ret);
+       if (trans) {
+               ret = insert_reserved_file_extent(trans, inode,
+@@ -10903,7 +10903,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+       btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
+ out_qgroup_free_data:
+       if (ret < 0)
+-              btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
++              btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL);
+ out_free_data_space:
+       /*
+        * If btrfs_reserve_extent() succeeded, then we already decremented
+diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
+index 0321753c16b9f..1b2af4785c0e2 100644
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -172,11 +172,12 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+       struct rb_node *node;
+       struct btrfs_ordered_extent *entry;
+       int ret;
++      u64 qgroup_rsv = 0;
+       if (flags &
+           ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
+               /* For nocow write, we can release the qgroup rsv right now */
+-              ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
++              ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
+               if (ret < 0)
+                       return ret;
+               ret = 0;
+@@ -185,7 +186,7 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+                * The ordered extent has reserved qgroup space, release now
+                * and pass the reserved number for qgroup_record to free.
+                */
+-              ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
++              ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv);
+               if (ret < 0)
+                       return ret;
+       }
+@@ -203,7 +204,7 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+       entry->inode = igrab(&inode->vfs_inode);
+       entry->compress_type = compress_type;
+       entry->truncated_len = (u64)-1;
+-      entry->qgroup_rsv = ret;
++      entry->qgroup_rsv = qgroup_rsv;
+       entry->physical = (u64)-1;
+       ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 26cabffd59710..96ec9ccc2ef61 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3833,13 +3833,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
+ /* Free ranges specified by @reserved, normally in error path */
+ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
+-                      struct extent_changeset *reserved, u64 start, u64 len)
++                                   struct extent_changeset *reserved,
++                                   u64 start, u64 len, u64 *freed_ret)
+ {
+       struct btrfs_root *root = inode->root;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct extent_changeset changeset;
+-      int freed = 0;
++      u64 freed = 0;
+       int ret;
+       extent_changeset_init(&changeset);
+@@ -3880,7 +3881,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
+       }
+       btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
+                                 BTRFS_QGROUP_RSV_DATA);
+-      ret = freed;
++      if (freed_ret)
++              *freed_ret = freed;
++      ret = 0;
+ out:
+       extent_changeset_release(&changeset);
+       return ret;
+@@ -3888,7 +3891,7 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
+ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
+                       struct extent_changeset *reserved, u64 start, u64 len,
+-                      int free)
++                      u64 *released, int free)
+ {
+       struct extent_changeset changeset;
+       int trace_op = QGROUP_RELEASE;
+@@ -3900,7 +3903,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
+       /* In release case, we shouldn't have @reserved */
+       WARN_ON(!free && reserved);
+       if (free && reserved)
+-              return qgroup_free_reserved_data(inode, reserved, start, len);
++              return qgroup_free_reserved_data(inode, reserved, start, len, released);
+       extent_changeset_init(&changeset);
+       ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
+                                      EXTENT_QGROUP_RESERVED, &changeset);
+@@ -3915,7 +3918,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
+               btrfs_qgroup_free_refroot(inode->root->fs_info,
+                               inode->root->root_key.objectid,
+                               changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
+-      ret = changeset.bytes_changed;
++      if (released)
++              *released = changeset.bytes_changed;
+ out:
+       extent_changeset_release(&changeset);
+       return ret;
+@@ -3934,9 +3938,10 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
+  * NOTE: This function may sleep for memory allocation.
+  */
+ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
+-                      struct extent_changeset *reserved, u64 start, u64 len)
++                         struct extent_changeset *reserved,
++                         u64 start, u64 len, u64 *freed)
+ {
+-      return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
++      return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1);
+ }
+ /*
+@@ -3954,9 +3959,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
+  *
+  * NOTE: This function may sleep for memory allocation.
+  */
+-int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
++int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released)
+ {
+-      return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
++      return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0);
+ }
+ static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
+diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
+index 578c77e94200f..c382923f7628e 100644
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -360,10 +360,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
+ /* New io_tree based accurate qgroup reserve API */
+ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
+                       struct extent_changeset **reserved, u64 start, u64 len);
+-int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
++int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released);
+ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
+                          struct extent_changeset *reserved, u64 start,
+-                         u64 len);
++                         u64 len, u64 *freed);
+ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             enum btrfs_qgroup_rsv_type type, bool enforce);
+ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+-- 
+2.43.0
+
diff --git a/queue-6.1/btrfs-mark-the-len-field-in-struct-btrfs_ordered_sum.patch b/queue-6.1/btrfs-mark-the-len-field-in-struct-btrfs_ordered_sum.patch
new file mode 100644 (file)
index 0000000..bbbb423
--- /dev/null
@@ -0,0 +1,51 @@
+From c5154bfdcfc857cf2ee5f1b2d6b0778c026c11b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 May 2023 17:03:06 +0200
+Subject: btrfs: mark the len field in struct btrfs_ordered_sum as unsigned
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 6e4b2479ab38b3f949a85964da212295d32102f0 ]
+
+len can't ever be negative, so mark it as an u32 instead of int.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 9e65bfca24cf ("btrfs: fix qgroup_free_reserved_data int overflow")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/file-item.c    | 2 +-
+ fs/btrfs/ordered-data.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
+index b14d2da9b26d3..14478da875313 100644
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -602,7 +602,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
+                       }
+                       sums->bytenr = start;
+-                      sums->len = (int)size;
++                      sums->len = size;
+                       offset = (start - key.offset) >> fs_info->sectorsize_bits;
+                       offset *= csum_size;
+diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
+index f59f2dbdb25ed..cc3ca4bb9bd54 100644
+--- a/fs/btrfs/ordered-data.h
++++ b/fs/btrfs/ordered-data.h
+@@ -20,7 +20,7 @@ struct btrfs_ordered_sum {
+       /*
+        * this is the length in bytes covered by the sums array below.
+        */
+-      int len;
++      u32 len;
+       struct list_head list;
+       /* last field is a variable length array of csums */
+       u8 sums[];
+-- 
+2.43.0
+
diff --git a/queue-6.1/can-raw-add-support-for-so_mark.patch b/queue-6.1/can-raw-add-support-for-so_mark.patch
new file mode 100644 (file)
index 0000000..7da0798
--- /dev/null
@@ -0,0 +1,36 @@
+From 17fe236d4580c1fb90b59345b81b667d28253b36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 10:10:08 +0100
+Subject: can: raw: add support for SO_MARK
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+[ Upstream commit 0826e82b8a32e646b7b32ba8b68ba30812028e47 ]
+
+Add support for SO_MARK to the CAN_RAW protocol. This makes it
+possible to add traffic control filters based on the fwmark.
+
+Link: https://lore.kernel.org/all/20221210113653.170346-1-mkl@pengutronix.de
+Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Stable-dep-of: 7f6ca95d16b9 ("net: Implement missing getsockopt(SO_TIMESTAMPING_NEW)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/can/raw.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/can/raw.c b/net/can/raw.c
+index 8c104339d538d..488320738e319 100644
+--- a/net/can/raw.c
++++ b/net/can/raw.c
+@@ -881,6 +881,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+       skb->dev = dev;
+       skb->priority = sk->sk_priority;
++      skb->mark = sk->sk_mark;
+       skb->tstamp = sockc.transmit_time;
+       skb_setup_tx_timestamp(skb, sockc.tsflags);
+-- 
+2.43.0
+
diff --git a/queue-6.1/cpu-smt-create-topology_smt_thread_allowed.patch b/queue-6.1/cpu-smt-create-topology_smt_thread_allowed.patch
new file mode 100644 (file)
index 0000000..161626b
--- /dev/null
@@ -0,0 +1,111 @@
+From 853dc4a7fe0d006cee6fde50262d67a545487936 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 16:51:39 +0200
+Subject: cpu/SMT: Create topology_smt_thread_allowed()
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit 38253464bc821d6de6bba81bb1412ebb36f6cbd1 ]
+
+Some architectures allows partial SMT states, i.e. when not all SMT threads
+are brought online.
+
+To support that, add an architecture helper which checks whether a given
+CPU is allowed to be brought online depending on how many SMT threads are
+currently enabled. Since this is only applicable to architecture supporting
+partial SMT, only these architectures should select the new configuration
+variable CONFIG_SMT_NUM_THREADS_DYNAMIC. For the other architectures, not
+supporting the partial SMT states, there is no need to define
+topology_cpu_smt_allowed(), the generic code assumed that all the threads
+are allowed or only the primary ones.
+
+Call the helper from cpu_smt_enable(), and cpu_smt_allowed() when SMT is
+enabled, to check if the particular thread should be onlined. Notably,
+also call it from cpu_smt_disable() if CPU_SMT_ENABLED, to allow
+offlining some threads to move from a higher to lower number of threads
+online.
+
+[ ldufour: Slightly reword the commit's description ]
+[ ldufour: Introduce CONFIG_SMT_NUM_THREADS_DYNAMIC ]
+
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Zhang Rui <rui.zhang@intel.com>
+Link: https://lore.kernel.org/r/20230705145143.40545-7-ldufour@linux.ibm.com
+Stable-dep-of: d91bdd96b55c ("cpu/SMT: Make SMT control more robust against enumeration failures")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/Kconfig |  3 +++
+ kernel/cpu.c | 24 +++++++++++++++++++++++-
+ 2 files changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/arch/Kconfig b/arch/Kconfig
+index b60d271bf76a9..14273a6203dfc 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS
+ config HOTPLUG_SMT
+       bool
++config SMT_NUM_THREADS_DYNAMIC
++      bool
++
+ config GENERIC_ENTRY
+        bool
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 551468d9c5a85..c37f1758a4865 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -446,9 +446,23 @@ static int __init smt_cmdline_disable(char *str)
+ }
+ early_param("nosmt", smt_cmdline_disable);
++/*
++ * For Archicture supporting partial SMT states check if the thread is allowed.
++ * Otherwise this has already been checked through cpu_smt_max_threads when
++ * setting the SMT level.
++ */
++static inline bool cpu_smt_thread_allowed(unsigned int cpu)
++{
++#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
++      return topology_smt_thread_allowed(cpu);
++#else
++      return true;
++#endif
++}
++
+ static inline bool cpu_smt_allowed(unsigned int cpu)
+ {
+-      if (cpu_smt_control == CPU_SMT_ENABLED)
++      if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+               return true;
+       if (topology_is_primary_thread(cpu))
+@@ -2294,6 +2308,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+       for_each_online_cpu(cpu) {
+               if (topology_is_primary_thread(cpu))
+                       continue;
++              /*
++               * Disable can be called with CPU_SMT_ENABLED when changing
++               * from a higher to lower number of SMT threads per core.
++               */
++              if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
++                      continue;
+               ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+               if (ret)
+                       break;
+@@ -2328,6 +2348,8 @@ int cpuhp_smt_enable(void)
+               /* Skip online CPUs and CPUs on offline nodes */
+               if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+                       continue;
++              if (!cpu_smt_thread_allowed(cpu))
++                      continue;
+               ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
+               if (ret)
+                       break;
+-- 
+2.43.0
+
diff --git a/queue-6.1/cpu-smt-make-smt-control-more-robust-against-enumera.patch b/queue-6.1/cpu-smt-make-smt-control-more-robust-against-enumera.patch
new file mode 100644 (file)
index 0000000..f768759
--- /dev/null
@@ -0,0 +1,113 @@
+From 09e97aec954cf0a31689861b27a859e63d278e0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 10:18:27 +0200
+Subject: cpu/SMT: Make SMT control more robust against enumeration failures
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit d91bdd96b55cc3ce98d883a60f133713821b80a6 ]
+
+The SMT control mechanism got added as speculation attack vector
+mitigation. The implemented logic relies on the primary thread mask to
+be set up properly.
+
+This turns out to be an issue with XEN/PV guests because their CPU hotplug
+mechanics do not enumerate APICs and therefore the mask is never correctly
+populated.
+
+This went unnoticed so far because by chance XEN/PV ends up with
+smp_num_siblings == 2. So smt_hotplug_control stays at its default value
+CPU_SMT_ENABLED and the primary thread mask is never evaluated in the
+context of CPU hotplug.
+
+This stopped "working" with the upcoming overhaul of the topology
+evaluation which legitimately provides a fake topology for XEN/PV. That
+sets smp_num_siblings to 1, which causes the core CPU hot-plug core to
+refuse to bring up the APs.
+
+This happens because smt_hotplug_control is set to CPU_SMT_NOT_SUPPORTED
+which causes cpu_smt_allowed() to evaluate the unpopulated primary thread
+mask with the conclusion that all non-boot CPUs are not valid to be
+plugged.
+
+Make cpu_smt_allowed() more robust and take CPU_SMT_NOT_SUPPORTED and
+CPU_SMT_NOT_IMPLEMENTED into account. Rename it to cpu_bootable() while at
+it as that makes it more clear what the function is about.
+
+The primary mask issue on x86 XEN/PV needs to be addressed separately as
+there are users outside of the CPU hotplug code too.
+
+Fixes: 05736e4ac13c ("cpu/hotplug: Provide knobs to control SMT")
+Reported-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Juergen Gross <jgross@suse.com>
+Tested-by: Sohil Mehta <sohil.mehta@intel.com>
+Tested-by: Michael Kelley <mikelley@microsoft.com>
+Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Zhang Rui <rui.zhang@intel.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230814085112.149440843@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cpu.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index c37f1758a4865..e6f0101941ed8 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -460,11 +460,19 @@ static inline bool cpu_smt_thread_allowed(unsigned int cpu)
+ #endif
+ }
+-static inline bool cpu_smt_allowed(unsigned int cpu)
++static inline bool cpu_bootable(unsigned int cpu)
+ {
+       if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+               return true;
++      /* All CPUs are bootable if controls are not configured */
++      if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
++              return true;
++
++      /* All CPUs are bootable if CPU is not SMT capable */
++      if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
++              return true;
++
+       if (topology_is_primary_thread(cpu))
+               return true;
+@@ -485,7 +493,7 @@ bool cpu_smt_possible(void)
+ }
+ EXPORT_SYMBOL_GPL(cpu_smt_possible);
+ #else
+-static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
++static inline bool cpu_bootable(unsigned int cpu) { return true; }
+ #endif
+ static inline enum cpuhp_state
+@@ -588,10 +596,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
+        * SMT soft disabling on X86 requires to bring the CPU out of the
+        * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
+        * CPU marked itself as booted_once in notify_cpu_starting() so the
+-       * cpu_smt_allowed() check will now return false if this is not the
++       * cpu_bootable() check will now return false if this is not the
+        * primary sibling.
+        */
+-      if (!cpu_smt_allowed(cpu))
++      if (!cpu_bootable(cpu))
+               return -ECANCELED;
+       if (st->target <= CPUHP_AP_ONLINE_IDLE)
+@@ -1478,7 +1486,7 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target)
+               err = -EBUSY;
+               goto out;
+       }
+-      if (!cpu_smt_allowed(cpu)) {
++      if (!cpu_bootable(cpu)) {
+               err = -EPERM;
+               goto out;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/dpaa2-eth-recycle-the-rx-buffer-only-after-all-proce.patch b/queue-6.1/dpaa2-eth-recycle-the-rx-buffer-only-after-all-proce.patch
new file mode 100644 (file)
index 0000000..77c1326
--- /dev/null
@@ -0,0 +1,69 @@
+From 95fa91911ce94d90029ca22af93007ce4b006574 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Nov 2023 12:28:05 +0200
+Subject: dpaa2-eth: recycle the RX buffer only after all processing done
+
+From: Ioana Ciornei <ioana.ciornei@nxp.com>
+
+[ Upstream commit beb1930f966d1517921488bd5d64147f58f79abf ]
+
+The blamed commit added support for Rx copybreak. This meant that for
+certain frame sizes, a new skb was allocated and the initial data buffer
+was recycled. Instead of waiting to recycle the Rx buffer only after all
+processing was done on it (like accessing the parse results or timestamp
+information), the code path just went ahead and re-used the buffer right
+away.
+
+This sometimes lead to corrupted HW and SW annotation areas.
+Fix this by delaying the moment when the buffer is recycled.
+
+Fixes: 50f826999a80 ("dpaa2-eth: add rx copybreak support")
+Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+index b58162ce81d87..de62eee58a00e 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+@@ -509,8 +509,6 @@ static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
+       memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
+-      dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
+-
+       return skb;
+ }
+@@ -528,6 +526,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
+       struct dpaa2_eth_drv_stats *percpu_extras;
+       struct device *dev = priv->net_dev->dev.parent;
+       struct dpaa2_fas *fas;
++      bool recycle_rx_buf = false;
+       void *buf_data;
+       u32 status = 0;
+       u32 xdp_act;
+@@ -560,6 +559,8 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
+                       dma_unmap_page(dev, addr, priv->rx_buf_size,
+                                      DMA_BIDIRECTIONAL);
+                       skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
++              } else {
++                      recycle_rx_buf = true;
+               }
+       } else if (fd_format == dpaa2_fd_sg) {
+               WARN_ON(priv->xdp_prog);
+@@ -607,6 +608,8 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
+       list_add_tail(&skb->list, ch->rx_list);
++      if (recycle_rx_buf)
++              dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
+       return;
+ err_build_skb:
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-bridge-ti-sn65dsi86-never-store-more-than-msg-si.patch b/queue-6.1/drm-bridge-ti-sn65dsi86-never-store-more-than-msg-si.patch
new file mode 100644 (file)
index 0000000..2b64c92
--- /dev/null
@@ -0,0 +1,55 @@
+From 57568971e8ca978db98bde4b8e417daebc3ba871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Dec 2023 12:37:52 -0800
+Subject: drm/bridge: ti-sn65dsi86: Never store more than msg->size bytes in
+ AUX xfer
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit aca58eac52b88138ab98c814afb389a381725cd7 ]
+
+For aux reads, the value `msg->size` indicates the size of the buffer
+provided by `msg->buffer`. We should never in any circumstances write
+more bytes to the buffer since it may overflow the buffer.
+
+In the ti-sn65dsi86 driver there is one code path that reads the
+transfer length from hardware. Even though it's never been seen to be
+a problem, we should make extra sure that the hardware isn't
+increasing the length since doing so would cause us to overrun the
+buffer.
+
+Fixes: 982f589bde7a ("drm/bridge: ti-sn65dsi86: Update reply on aux failures")
+Reviewed-by: Stephen Boyd <swboyd@chromium.org>
+Reviewed-by: Guenter Roeck <groeck@chromium.org>
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231214123752.v3.2.I7b83c0f31aeedc6b1dc98c7c741d3e1f94f040f8@changeid
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+index 1b5c27ed27370..ff4d0564122a3 100644
+--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
++++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+@@ -527,6 +527,7 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux,
+       u32 request_val = AUX_CMD_REQ(msg->request);
+       u8 *buf = msg->buffer;
+       unsigned int len = msg->size;
++      unsigned int short_len;
+       unsigned int val;
+       int ret;
+       u8 addr_len[SN_AUX_LENGTH_REG + 1 - SN_AUX_ADDR_19_16_REG];
+@@ -600,7 +601,8 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux,
+       }
+       if (val & AUX_IRQ_STATUS_AUX_SHORT) {
+-              ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &len);
++              ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &short_len);
++              len = min(len, short_len);
+               if (ret)
+                       goto exit;
+       } else if (val & AUX_IRQ_STATUS_NAT_I2C_FAIL) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-i915-dp-fix-passing-the-correct-dpcd_rev-for-drm.patch b/queue-6.1/drm-i915-dp-fix-passing-the-correct-dpcd_rev-for-drm.patch
new file mode 100644 (file)
index 0000000..ee1500e
--- /dev/null
@@ -0,0 +1,42 @@
+From 232617028da8530bf010d2b095c3985e085efc4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Dec 2023 13:15:42 -0800
+Subject: drm/i915/dp: Fix passing the correct DPCD_REV for
+ drm_dp_set_phy_test_pattern
+
+From: Khaled Almahallawy <khaled.almahallawy@intel.com>
+
+[ Upstream commit 2bd7a06a1208aaacb4e7a2a5436c23bce8d70801 ]
+
+Using link_status to get DPCD_REV fails when disabling/defaulting
+phy pattern. Use intel_dp->dpcd to access DPCD_REV correctly.
+
+Fixes: 8cdf72711928 ("drm/i915/dp: Program vswing, pre-emphasis, test-pattern")
+Cc: Jani Nikula <jani.nikula@intel.com>
+Cc: Imre Deak <imre.deak@intel.com>
+Cc: Lee Shawn C <shawn.c.lee@intel.com>
+Signed-off-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231213211542.3585105-3-khaled.almahallawy@intel.com
+(cherry picked from commit 3ee302ec22d6e1d7d1e6d381b0d507ee80f2135c)
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_dp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index 5970f4149090f..4699c21102261 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -3707,7 +3707,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
+                         intel_dp->train_set, crtc_state->lane_count);
+       drm_dp_set_phy_test_pattern(&intel_dp->aux, data,
+-                                  link_status[DP_DPCD_REV]);
++                                  intel_dp->dpcd[DP_DPCD_REV]);
+ }
+ static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp)
+-- 
+2.43.0
+
diff --git a/queue-6.1/ethtool-don-t-propagate-eopnotsupp-from-dumps.patch b/queue-6.1/ethtool-don-t-propagate-eopnotsupp-from-dumps.patch
new file mode 100644 (file)
index 0000000..9af0257
--- /dev/null
@@ -0,0 +1,43 @@
+From e5e3d5fd00ba6004228b46e43f6ee0e8588c8fa3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Nov 2023 14:58:06 -0800
+Subject: ethtool: don't propagate EOPNOTSUPP from dumps
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit cbeb989e41f4094f54bec2cecce993f26f547bea ]
+
+The default dump handler needs to clear ret before returning.
+Otherwise if the last interface returns an inconsequential
+error this error will propagate to user space.
+
+This may confuse user space (ethtool CLI seems to ignore it,
+but YNL doesn't). It will also terminate the dump early
+for mutli-skb dump, because netlink core treats EOPNOTSUPP
+as a real error.
+
+Fixes: 728480f12442 ("ethtool: default handlers for GET requests")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20231126225806.2143528-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
+index 1a4c11356c96c..fc4ccecf9495c 100644
+--- a/net/ethtool/netlink.c
++++ b/net/ethtool/netlink.c
+@@ -509,7 +509,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
+ cont:
+                       idx++;
+               }
+-
++              ret = 0;
+       }
+       rtnl_unlock();
+-- 
+2.43.0
+
diff --git a/queue-6.1/ext4-convert-move_extent_per_page-to-use-folios.patch b/queue-6.1/ext4-convert-move_extent_per_page-to-use-folios.patch
new file mode 100644 (file)
index 0000000..feb31bd
--- /dev/null
@@ -0,0 +1,159 @@
+From 89cf2bd933e2b50444696df7ae8d806046d290e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 23:30:52 -0800
+Subject: ext4: convert move_extent_per_page() to use folios
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit 6dd8fe86fa84729538d8bed3149faf9c5886bb5b ]
+
+Patch series "Removing the try_to_release_page() wrapper", v3.
+
+This patchset replaces the remaining calls of try_to_release_page() with
+the folio equivalent: filemap_release_folio().  This allows us to remove
+the wrapper.
+
+This patch (of 4):
+
+Convert move_extent_per_page() to use folios.  This change removes 5 calls
+to compound_head() and is in preparation for the removal of the
+try_to_release_page() wrapper.
+
+Link: https://lkml.kernel.org/r/20221118073055.55694-1-vishal.moola@gmail.com
+Link: https://lkml.kernel.org/r/20221118073055.55694-2-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/move_extent.c | 52 ++++++++++++++++++++++++++-----------------
+ 1 file changed, 31 insertions(+), 21 deletions(-)
+
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index 044e34cd835c1..8dbb87edf24c4 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -253,6 +253,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ {
+       struct inode *orig_inode = file_inode(o_filp);
+       struct page *pagep[2] = {NULL, NULL};
++      struct folio *folio[2] = {NULL, NULL};
+       handle_t *handle;
+       ext4_lblk_t orig_blk_offset, donor_blk_offset;
+       unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+@@ -313,6 +314,13 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+        * hold page's lock, if it is still the case data copy is not
+        * necessary, just swap data blocks between orig and donor.
+        */
++      folio[0] = page_folio(pagep[0]);
++      folio[1] = page_folio(pagep[1]);
++
++      VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
++      VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
++      VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
++
+       if (unwritten) {
+               ext4_double_down_write_data_sem(orig_inode, donor_inode);
+               /* If any of extents in range became initialized we have to
+@@ -331,10 +339,10 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                       ext4_double_up_write_data_sem(orig_inode, donor_inode);
+                       goto data_copy;
+               }
+-              if ((page_has_private(pagep[0]) &&
+-                   !try_to_release_page(pagep[0], 0)) ||
+-                  (page_has_private(pagep[1]) &&
+-                   !try_to_release_page(pagep[1], 0))) {
++              if ((folio_has_private(folio[0]) &&
++                   !filemap_release_folio(folio[0], 0)) ||
++                  (folio_has_private(folio[1]) &&
++                   !filemap_release_folio(folio[1], 0))) {
+                       *err = -EBUSY;
+                       goto drop_data_sem;
+               }
+@@ -344,19 +352,21 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                                                  block_len_in_page, 1, err);
+       drop_data_sem:
+               ext4_double_up_write_data_sem(orig_inode, donor_inode);
+-              goto unlock_pages;
++              goto unlock_folios;
+       }
+ data_copy:
+-      *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
++      *err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size);
+       if (*err)
+-              goto unlock_pages;
++              goto unlock_folios;
+       /* At this point all buffers in range are uptodate, old mapping layout
+        * is no longer required, try to drop it now. */
+-      if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
+-          (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
++      if ((folio_has_private(folio[0]) &&
++              !filemap_release_folio(folio[0], 0)) ||
++          (folio_has_private(folio[1]) &&
++              !filemap_release_folio(folio[1], 0))) {
+               *err = -EBUSY;
+-              goto unlock_pages;
++              goto unlock_folios;
+       }
+       ext4_double_down_write_data_sem(orig_inode, donor_inode);
+       replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
+@@ -369,13 +379,13 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+               } else
+-                      goto unlock_pages;
++                      goto unlock_folios;
+       }
+       /* Perform all necessary steps similar write_begin()/write_end()
+        * but keeping in mind that i_size will not change */
+-      if (!page_has_buffers(pagep[0]))
+-              create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
+-      bh = page_buffers(pagep[0]);
++      if (!folio_buffers(folio[0]))
++              create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
++      bh = folio_buffers(folio[0]);
+       for (i = 0; i < data_offset_in_page; i++)
+               bh = bh->b_this_page;
+       for (i = 0; i < block_len_in_page; i++) {
+@@ -385,7 +395,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+               bh = bh->b_this_page;
+       }
+       if (!*err)
+-              *err = block_commit_write(pagep[0], from, from + replaced_size);
++              *err = block_commit_write(&folio[0]->page, from, from + replaced_size);
+       if (unlikely(*err < 0))
+               goto repair_branches;
+@@ -395,11 +405,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+       *err = ext4_jbd2_inode_add_write(handle, orig_inode,
+                       (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
+-unlock_pages:
+-      unlock_page(pagep[0]);
+-      put_page(pagep[0]);
+-      unlock_page(pagep[1]);
+-      put_page(pagep[1]);
++unlock_folios:
++      folio_unlock(folio[0]);
++      folio_put(folio[0]);
++      folio_unlock(folio[1]);
++      folio_put(folio[1]);
+ stop_journal:
+       ext4_journal_stop(handle);
+       if (*err == -ENOSPC &&
+@@ -430,7 +440,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+               *err = -EIO;
+       }
+       replaced_count = 0;
+-      goto unlock_pages;
++      goto unlock_folios;
+ }
+ /**
+-- 
+2.43.0
+
diff --git a/queue-6.1/f2fs-assign-default-compression-level.patch b/queue-6.1/f2fs-assign-default-compression-level.patch
new file mode 100644 (file)
index 0000000..f0c80be
--- /dev/null
@@ -0,0 +1,106 @@
+From e4a655eebbd80e0178fe542d71ec653c4f3486cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Jun 2023 12:58:34 -0700
+Subject: f2fs: assign default compression level
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+[ Upstream commit 00e120b5e4b5638cf19eee96d4332f2d100746ba ]
+
+Let's avoid any confusion from assigning compress_level=0 for LZ4HC and ZSTD.
+
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Stable-dep-of: f5f3bd903a5d ("f2fs: set the default compress_level on ioctl")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/compress.c |  3 +--
+ fs/f2fs/f2fs.h     |  2 ++
+ fs/f2fs/super.c    | 12 +++++++-----
+ 3 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
+index c3ba202a7c29f..4cb58e8d699e2 100644
+--- a/fs/f2fs/compress.c
++++ b/fs/f2fs/compress.c
+@@ -331,8 +331,6 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = {
+ #endif
+ #ifdef CONFIG_F2FS_FS_ZSTD
+-#define F2FS_ZSTD_DEFAULT_CLEVEL      1
+-
+ static int zstd_init_compress_ctx(struct compress_ctx *cc)
+ {
+       zstd_parameters params;
+@@ -341,6 +339,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
+       unsigned int workspace_size;
+       unsigned char level = F2FS_I(cc->inode)->i_compress_level;
++      /* Need to remain this for backward compatibility */
+       if (!level)
+               level = F2FS_ZSTD_DEFAULT_CLEVEL;
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index 6fa3ac2097b27..5c76ba764b71f 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -1501,6 +1501,8 @@ struct compress_data {
+ #define F2FS_COMPRESSED_PAGE_MAGIC    0xF5F2C000
++#define F2FS_ZSTD_DEFAULT_CLEVEL      1
++
+ #define       COMPRESS_LEVEL_OFFSET   8
+ /* compress context */
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 4f87e0e374c25..584fe00fdeeb1 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -613,14 +613,12 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
+ {
+ #ifdef CONFIG_F2FS_FS_LZ4HC
+       unsigned int level;
+-#endif
+       if (strlen(str) == 3) {
+-              F2FS_OPTION(sbi).compress_level = 0;
++              F2FS_OPTION(sbi).compress_level = LZ4HC_DEFAULT_CLEVEL;
+               return 0;
+       }
+-#ifdef CONFIG_F2FS_FS_LZ4HC
+       str += 3;
+       if (str[0] != ':') {
+@@ -638,6 +636,10 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
+       F2FS_OPTION(sbi).compress_level = level;
+       return 0;
+ #else
++      if (strlen(str) == 3) {
++              F2FS_OPTION(sbi).compress_level = 0;
++              return 0;
++      }
+       f2fs_info(sbi, "kernel doesn't support lz4hc compression");
+       return -EINVAL;
+ #endif
+@@ -651,7 +653,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
+       int len = 4;
+       if (strlen(str) == len) {
+-              F2FS_OPTION(sbi).compress_level = 0;
++              F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+               return 0;
+       }
+@@ -664,7 +666,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
+       if (kstrtouint(str + 1, 10, &level))
+               return -EINVAL;
+-      if (!level || level > zstd_max_clevel()) {
++      if (level < zstd_min_clevel() || level > zstd_max_clevel()) {
+               f2fs_info(sbi, "invalid zstd compress level: %d", level);
+               return -EINVAL;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/f2fs-clean-up-i_compress_flag-and-i_compress_level-u.patch b/queue-6.1/f2fs-clean-up-i_compress_flag-and-i_compress_level-u.patch
new file mode 100644 (file)
index 0000000..990b2a8
--- /dev/null
@@ -0,0 +1,135 @@
+From f8166c0421b9a097ec4c3230e5a09dec56b64c23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 Jan 2023 18:30:11 +0800
+Subject: f2fs: clean up i_compress_flag and i_compress_level usage
+
+From: Chao Yu <chao@kernel.org>
+
+[ Upstream commit b90e5086df6bf5ba819216d5ecf0667370bd565f ]
+
+.i_compress_level was introduced by commit 3fde13f817e2 ("f2fs: compress:
+support compress level"), but never be used.
+
+This patch updates as below:
+- load high 8-bits of on-disk .i_compress_flag to in-memory .i_compress_level
+- load low 8-bits of on-disk .i_compress_flag to in-memory .i_compress_flag
+- change type of in-memory .i_compress_flag from unsigned short to unsigned
+char.
+
+w/ above changes, we can avoid unneeded bit shift whenever during
+.init_compress_ctx(), and shrink size of struct f2fs_inode_info.
+
+Signed-off-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Stable-dep-of: f5f3bd903a5d ("f2fs: set the default compress_level on ioctl")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/compress.c |  8 +++-----
+ fs/f2fs/f2fs.h     |  7 +++----
+ fs/f2fs/inode.c    | 16 +++++++++++++---
+ 3 files changed, 19 insertions(+), 12 deletions(-)
+
+diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
+index 11d9dce994dbe..d509b47381d51 100644
+--- a/fs/f2fs/compress.c
++++ b/fs/f2fs/compress.c
+@@ -241,7 +241,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
+       unsigned int size = LZ4_MEM_COMPRESS;
+ #ifdef CONFIG_F2FS_FS_LZ4HC
+-      if (F2FS_I(cc->inode)->i_compress_flag >> COMPRESS_LEVEL_OFFSET)
++      if (F2FS_I(cc->inode)->i_compress_level)
+               size = LZ4HC_MEM_COMPRESS;
+ #endif
+@@ -267,8 +267,7 @@ static void lz4_destroy_compress_ctx(struct compress_ctx *cc)
+ #ifdef CONFIG_F2FS_FS_LZ4HC
+ static int lz4hc_compress_pages(struct compress_ctx *cc)
+ {
+-      unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
+-                                              COMPRESS_LEVEL_OFFSET;
++      unsigned char level = F2FS_I(cc->inode)->i_compress_level;
+       int len;
+       if (level)
+@@ -340,8 +339,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
+       zstd_cstream *stream;
+       void *workspace;
+       unsigned int workspace_size;
+-      unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
+-                                              COMPRESS_LEVEL_OFFSET;
++      unsigned char level = F2FS_I(cc->inode)->i_compress_level;
+       if (!level)
+               level = F2FS_ZSTD_DEFAULT_CLEVEL;
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index f56abb39601ac..faf1a4953e845 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -840,7 +840,7 @@ struct f2fs_inode_info {
+       unsigned char i_compress_algorithm;     /* algorithm type */
+       unsigned char i_log_cluster_size;       /* log of cluster size */
+       unsigned char i_compress_level;         /* compress level (lz4hc,zstd) */
+-      unsigned short i_compress_flag;         /* compress flag */
++      unsigned char i_compress_flag;          /* compress flag */
+       unsigned int i_cluster_size;            /* cluster size */
+       unsigned int atomic_write_cnt;
+@@ -4339,9 +4339,8 @@ static inline int set_compress_context(struct inode *inode)
+       if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
+               F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
+                       F2FS_OPTION(sbi).compress_level)
+-              F2FS_I(inode)->i_compress_flag |=
+-                              F2FS_OPTION(sbi).compress_level <<
+-                              COMPRESS_LEVEL_OFFSET;
++              F2FS_I(inode)->i_compress_level =
++                              F2FS_OPTION(sbi).compress_level;
+       F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
+       set_inode_flag(inode, FI_COMPRESSED_FILE);
+       stat_inc_compr_inode(inode);
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 1fc7760499f10..933554985d328 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -450,11 +450,17 @@ static int do_read_inode(struct inode *inode)
+                                       (fi->i_flags & F2FS_COMPR_FL)) {
+               if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
+                                       i_log_cluster_size)) {
++                      unsigned short compress_flag;
++
+                       atomic_set(&fi->i_compr_blocks,
+                                       le64_to_cpu(ri->i_compr_blocks));
+                       fi->i_compress_algorithm = ri->i_compress_algorithm;
+                       fi->i_log_cluster_size = ri->i_log_cluster_size;
+-                      fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag);
++                      compress_flag = le16_to_cpu(ri->i_compress_flag);
++                      fi->i_compress_level = compress_flag >>
++                                              COMPRESS_LEVEL_OFFSET;
++                      fi->i_compress_flag = compress_flag &
++                                      (BIT(COMPRESS_LEVEL_OFFSET) - 1);
+                       fi->i_cluster_size = 1 << fi->i_log_cluster_size;
+                       set_inode_flag(inode, FI_COMPRESSED_FILE);
+               }
+@@ -675,13 +681,17 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
+               if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
+                       F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
+                                                       i_log_cluster_size)) {
++                      unsigned short compress_flag;
++
+                       ri->i_compr_blocks =
+                               cpu_to_le64(atomic_read(
+                                       &F2FS_I(inode)->i_compr_blocks));
+                       ri->i_compress_algorithm =
+                               F2FS_I(inode)->i_compress_algorithm;
+-                      ri->i_compress_flag =
+-                              cpu_to_le16(F2FS_I(inode)->i_compress_flag);
++                      compress_flag = F2FS_I(inode)->i_compress_flag |
++                              F2FS_I(inode)->i_compress_level <<
++                                              COMPRESS_LEVEL_OFFSET;
++                      ri->i_compress_flag = cpu_to_le16(compress_flag);
+                       ri->i_log_cluster_size =
+                               F2FS_I(inode)->i_log_cluster_size;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/f2fs-convert-to-use-bitmap-api.patch b/queue-6.1/f2fs-convert-to-use-bitmap-api.patch
new file mode 100644 (file)
index 0000000..95b95b7
--- /dev/null
@@ -0,0 +1,440 @@
+From 5a0421b515853a9187b83dfe12fc55938c7eaa84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Feb 2023 21:53:24 +0800
+Subject: f2fs: convert to use bitmap API
+
+From: Yangtao Li <frank.li@vivo.com>
+
+[ Upstream commit 447286ebadaafa551550704ff0b42eb08b1d1cb2 ]
+
+Let's use BIT() and GENMASK() instead of open it.
+
+Signed-off-by: Yangtao Li <frank.li@vivo.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Stable-dep-of: f5f3bd903a5d ("f2fs: set the default compress_level on ioctl")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/checkpoint.c    |  2 +-
+ fs/f2fs/compress.c      |  4 ++--
+ fs/f2fs/data.c          | 12 ++++++------
+ fs/f2fs/dir.c           |  2 +-
+ fs/f2fs/f2fs.h          | 26 +++++++++++++-------------
+ fs/f2fs/file.c          |  2 +-
+ fs/f2fs/inode.c         |  4 ++--
+ fs/f2fs/node.h          | 20 +++++++++-----------
+ fs/f2fs/super.c         | 16 ++++++++--------
+ fs/f2fs/sysfs.c         |  2 +-
+ include/linux/f2fs_fs.h |  9 ++++-----
+ 11 files changed, 48 insertions(+), 51 deletions(-)
+
+diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
+index 5df04ed010cae..eb4d69f53337f 100644
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -984,7 +984,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
+       cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+       if (cur_page == cp2)
+-              cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
++              cp_blk_no += BIT(le32_to_cpu(fsb->log_blocks_per_seg));
+       for (i = 1; i < cp_blks; i++) {
+               void *sit_bitmap_ptr;
+diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
+index d509b47381d51..c3ba202a7c29f 100644
+--- a/fs/f2fs/compress.c
++++ b/fs/f2fs/compress.c
+@@ -673,7 +673,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
+       cc->cbuf->clen = cpu_to_le32(cc->clen);
+-      if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)
++      if (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))
+               chksum = f2fs_crc32(F2FS_I_SB(cc->inode),
+                                       cc->cbuf->cdata, cc->clen);
+       cc->cbuf->chksum = cpu_to_le32(chksum);
+@@ -771,7 +771,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
+       ret = cops->decompress_pages(dic);
+-      if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) {
++      if (!ret && (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))) {
+               u32 provided = le32_to_cpu(dic->cbuf->chksum);
+               u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen);
+diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
+index ea05710ca9bdf..3666c1fd77a64 100644
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -95,17 +95,17 @@ static enum count_type __read_io_type(struct page *page)
+ /* postprocessing steps for read bios */
+ enum bio_post_read_step {
+ #ifdef CONFIG_FS_ENCRYPTION
+-      STEP_DECRYPT    = 1 << 0,
++      STEP_DECRYPT    = BIT(0),
+ #else
+       STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
+ #endif
+ #ifdef CONFIG_F2FS_FS_COMPRESSION
+-      STEP_DECOMPRESS = 1 << 1,
++      STEP_DECOMPRESS = BIT(1),
+ #else
+       STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
+ #endif
+ #ifdef CONFIG_FS_VERITY
+-      STEP_VERITY     = 1 << 2,
++      STEP_VERITY     = BIT(2),
+ #else
+       STEP_VERITY     = 0,    /* compile out the verity-related code */
+ #endif
+@@ -409,7 +409,7 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
+ {
+-      unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
++      unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
+       unsigned int fua_flag, meta_flag, io_flag;
+       blk_opf_t op_flags = 0;
+@@ -431,9 +431,9 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
+        *    5 |    4 |   3 |    2 |    1 |   0 |
+        * Cold | Warm | Hot | Cold | Warm | Hot |
+        */
+-      if ((1 << fio->temp) & meta_flag)
++      if (BIT(fio->temp) & meta_flag)
+               op_flags |= REQ_META;
+-      if ((1 << fio->temp) & fua_flag)
++      if (BIT(fio->temp) & fua_flag)
+               op_flags |= REQ_FUA;
+       return op_flags;
+ }
+diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
+index 8373eba3a1337..510736d2ae110 100644
+--- a/fs/f2fs/dir.c
++++ b/fs/f2fs/dir.c
+@@ -29,7 +29,7 @@ static unsigned long dir_blocks(struct inode *inode)
+ static unsigned int dir_buckets(unsigned int level, int dir_level)
+ {
+       if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
+-              return 1 << (level + dir_level);
++              return BIT(level + dir_level);
+       else
+               return MAX_DIR_BUCKETS;
+ }
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index faf1a4953e845..6fa3ac2097b27 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -64,7 +64,7 @@ enum {
+ };
+ #ifdef CONFIG_F2FS_FAULT_INJECTION
+-#define F2FS_ALL_FAULT_TYPE           ((1 << FAULT_MAX) - 1)
++#define F2FS_ALL_FAULT_TYPE           (GENMASK(FAULT_MAX - 1, 0))
+ struct f2fs_fault_info {
+       atomic_t inject_ops;
+@@ -73,7 +73,7 @@ struct f2fs_fault_info {
+ };
+ extern const char *f2fs_fault_name[FAULT_MAX];
+-#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
++#define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type))
+ #endif
+ /*
+@@ -1412,7 +1412,7 @@ static inline void set_page_private_##name(struct page *page) \
+ static inline void clear_page_private_##name(struct page *page) \
+ { \
+       clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
+-      if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { \
++      if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) { \
+               set_page_private(page, 0); \
+               if (PagePrivate(page)) { \
+                       ClearPagePrivate(page); \
+@@ -1462,8 +1462,8 @@ static inline void set_page_private_data(struct page *page, unsigned long data)
+ static inline void clear_page_private_data(struct page *page)
+ {
+-      page_private(page) &= (1 << PAGE_PRIVATE_MAX) - 1;
+-      if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) {
++      page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0);
++      if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) {
+               set_page_private(page, 0);
+               if (PagePrivate(page)) {
+                       ClearPagePrivate(page);
+@@ -2882,7 +2882,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
+       int mask;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       return mask & *addr;
+ }
+@@ -2891,7 +2891,7 @@ static inline void f2fs_set_bit(unsigned int nr, char *addr)
+       int mask;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       *addr |= mask;
+ }
+@@ -2900,7 +2900,7 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr)
+       int mask;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       *addr &= ~mask;
+ }
+@@ -2910,7 +2910,7 @@ static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
+       int ret;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       ret = mask & *addr;
+       *addr |= mask;
+       return ret;
+@@ -2922,7 +2922,7 @@ static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
+       int ret;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       ret = mask & *addr;
+       *addr &= ~mask;
+       return ret;
+@@ -2933,7 +2933,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
+       int mask;
+       addr += (nr >> 3);
+-      mask = 1 << (7 - (nr & 0x07));
++      mask = BIT(7 - (nr & 0x07));
+       *addr ^= mask;
+ }
+@@ -4333,9 +4333,9 @@ static inline int set_compress_context(struct inode *inode)
+                       F2FS_OPTION(sbi).compress_log_size;
+       F2FS_I(inode)->i_compress_flag =
+                       F2FS_OPTION(sbi).compress_chksum ?
+-                              1 << COMPRESS_CHKSUM : 0;
++                              BIT(COMPRESS_CHKSUM) : 0;
+       F2FS_I(inode)->i_cluster_size =
+-                      1 << F2FS_I(inode)->i_log_cluster_size;
++                      BIT(F2FS_I(inode)->i_log_cluster_size);
+       if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
+               F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
+                       F2FS_OPTION(sbi).compress_level)
+diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
+index d0c17366ebf48..126c074deebdc 100644
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -3983,7 +3983,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
+       F2FS_I(inode)->i_compress_algorithm = option.algorithm;
+       F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
+-      F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
++      F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
+       f2fs_mark_inode_dirty_sync(inode, true);
+       if (!f2fs_is_compress_backend_ready(inode))
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 933554985d328..0010579f17368 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -460,8 +460,8 @@ static int do_read_inode(struct inode *inode)
+                       fi->i_compress_level = compress_flag >>
+                                               COMPRESS_LEVEL_OFFSET;
+                       fi->i_compress_flag = compress_flag &
+-                                      (BIT(COMPRESS_LEVEL_OFFSET) - 1);
+-                      fi->i_cluster_size = 1 << fi->i_log_cluster_size;
++                                      GENMASK(COMPRESS_LEVEL_OFFSET - 1, 0);
++                      fi->i_cluster_size = BIT(fi->i_log_cluster_size);
+                       set_inode_flag(inode, FI_COMPRESSED_FILE);
+               }
+       }
+diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
+index 0aa48704c77a0..7068f3ac036a5 100644
+--- a/fs/f2fs/node.h
++++ b/fs/f2fs/node.h
+@@ -93,17 +93,15 @@ static inline void copy_node_info(struct node_info *dst,
+ static inline void set_nat_flag(struct nat_entry *ne,
+                               unsigned int type, bool set)
+ {
+-      unsigned char mask = 0x01 << type;
+       if (set)
+-              ne->ni.flag |= mask;
++              ne->ni.flag |= BIT(type);
+       else
+-              ne->ni.flag &= ~mask;
++              ne->ni.flag &= ~BIT(type);
+ }
+ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
+ {
+-      unsigned char mask = 0x01 << type;
+-      return ne->ni.flag & mask;
++      return ne->ni.flag & BIT(type);
+ }
+ static inline void nat_reset_flag(struct nat_entry *ne)
+@@ -224,7 +222,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       block_addr -= nm_i->nat_blkaddr;
+-      block_addr ^= 1 << sbi->log_blocks_per_seg;
++      block_addr ^= BIT(sbi->log_blocks_per_seg);
+       return block_addr + nm_i->nat_blkaddr;
+ }
+@@ -394,7 +392,7 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
+ static inline int is_node(struct page *page, int type)
+ {
+       struct f2fs_node *rn = F2FS_NODE(page);
+-      return le32_to_cpu(rn->footer.flag) & (1 << type);
++      return le32_to_cpu(rn->footer.flag) & BIT(type);
+ }
+ #define is_cold_node(page)    is_node(page, COLD_BIT_SHIFT)
+@@ -407,9 +405,9 @@ static inline void set_cold_node(struct page *page, bool is_dir)
+       unsigned int flag = le32_to_cpu(rn->footer.flag);
+       if (is_dir)
+-              flag &= ~(0x1 << COLD_BIT_SHIFT);
++              flag &= ~BIT(COLD_BIT_SHIFT);
+       else
+-              flag |= (0x1 << COLD_BIT_SHIFT);
++              flag |= BIT(COLD_BIT_SHIFT);
+       rn->footer.flag = cpu_to_le32(flag);
+ }
+@@ -418,9 +416,9 @@ static inline void set_mark(struct page *page, int mark, int type)
+       struct f2fs_node *rn = F2FS_NODE(page);
+       unsigned int flag = le32_to_cpu(rn->footer.flag);
+       if (mark)
+-              flag |= (0x1 << type);
++              flag |= BIT(type);
+       else
+-              flag &= ~(0x1 << type);
++              flag &= ~BIT(type);
+       rn->footer.flag = cpu_to_le32(flag);
+ #ifdef CONFIG_F2FS_CHECK_FS
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 1ba85ef97cbd3..4f87e0e374c25 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -898,8 +898,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
+                       if (args->from && match_int(args, &arg))
+                               return -EINVAL;
+                       if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
+-                              f2fs_warn(sbi, "Not support %d, larger than %d",
+-                                        1 << arg, BIO_MAX_VECS);
++                              f2fs_warn(sbi, "Not support %ld, larger than %d",
++                                      BIT(arg), BIO_MAX_VECS);
+                               return -EINVAL;
+                       }
+                       F2FS_OPTION(sbi).write_io_size_bits = arg;
+@@ -1340,7 +1340,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
+ #endif
+       if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
+-              f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
++              f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO",
+                        F2FS_IO_SIZE_KB(sbi));
+               return -EINVAL;
+       }
+@@ -3356,7 +3356,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+       total_sections = le32_to_cpu(raw_super->section_count);
+       /* blocks_per_seg should be 512, given the above check */
+-      blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
++      blocks_per_seg = BIT(le32_to_cpu(raw_super->log_blocks_per_seg));
+       if (segment_count > F2FS_MAX_SEGMENT ||
+                               segment_count < F2FS_MIN_SEGMENTS) {
+@@ -3625,9 +3625,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
+       sbi->log_sectors_per_block =
+               le32_to_cpu(raw_super->log_sectors_per_block);
+       sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
+-      sbi->blocksize = 1 << sbi->log_blocksize;
++      sbi->blocksize = BIT(sbi->log_blocksize);
+       sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+-      sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
++      sbi->blocks_per_seg = BIT(sbi->log_blocks_per_seg);
+       sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+       sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+       sbi->total_sections = le32_to_cpu(raw_super->section_count);
+@@ -3883,7 +3883,7 @@ void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
+       f2fs_down_write(&sbi->sb_lock);
+-      if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1))
++      if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
+               raw_super->s_stop_reason[reason]++;
+       err = f2fs_commit_super(sbi, false);
+@@ -4033,7 +4033,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
+                         FDEV(i).start_blk, FDEV(i).end_blk);
+       }
+       f2fs_info(sbi,
+-                "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
++                "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi));
+       return 0;
+ }
+diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
+index 3d68bfa75cf2a..751a108e612ff 100644
+--- a/fs/f2fs/sysfs.c
++++ b/fs/f2fs/sysfs.c
+@@ -451,7 +451,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
+       if (ret < 0)
+               return ret;
+ #ifdef CONFIG_F2FS_FAULT_INJECTION
+-      if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
++      if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX))
+               return -EINVAL;
+       if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
+               return -EINVAL;
+diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
+index ee0d75d9a302d..1e0df607e40c4 100644
+--- a/include/linux/f2fs_fs.h
++++ b/include/linux/f2fs_fs.h
+@@ -40,9 +40,8 @@
+ #define F2FS_ENC_UTF8_12_1    1
+-#define F2FS_IO_SIZE(sbi)     (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
+-#define F2FS_IO_SIZE_KB(sbi)  (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */
+-#define F2FS_IO_SIZE_BYTES(sbi)       (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */
++#define F2FS_IO_SIZE(sbi)     BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
++#define F2FS_IO_SIZE_KB(sbi)  BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */
+ #define F2FS_IO_SIZE_BITS(sbi)        (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
+ #define F2FS_IO_SIZE_MASK(sbi)        (F2FS_IO_SIZE(sbi) - 1)
+ #define F2FS_IO_ALIGNED(sbi)  (F2FS_IO_SIZE(sbi) > 1)
+@@ -340,7 +339,7 @@ enum {
+       OFFSET_BIT_SHIFT
+ };
+-#define OFFSET_BIT_MASK               (0x07)  /* (0x01 << OFFSET_BIT_SHIFT) - 1 */
++#define OFFSET_BIT_MASK               GENMASK(OFFSET_BIT_SHIFT - 1, 0)
+ struct node_footer {
+       __le32 nid;             /* node id */
+@@ -545,7 +544,7 @@ typedef __le32     f2fs_hash_t;
+ #define MAX_DIR_HASH_DEPTH    63
+ /* MAX buckets in one level of dir */
+-#define MAX_DIR_BUCKETS               (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
++#define MAX_DIR_BUCKETS               BIT((MAX_DIR_HASH_DEPTH / 2) - 1)
+ /*
+  * space utilization of regular dentry and inline dentry (w/o extra reservation)
+-- 
+2.43.0
+
diff --git a/queue-6.1/f2fs-set-the-default-compress_level-on-ioctl.patch b/queue-6.1/f2fs-set-the-default-compress_level-on-ioctl.patch
new file mode 100644 (file)
index 0000000..3ed08a4
--- /dev/null
@@ -0,0 +1,47 @@
+From 58e5af6fa360d2c24949fe3057b862c27142ed6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Sep 2023 15:41:42 -0700
+Subject: f2fs: set the default compress_level on ioctl
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+[ Upstream commit f5f3bd903a5d3e3b2ba89f11e0e29db25e60c048 ]
+
+Otherwise, we'll get a broken inode.
+
+ # touch $FILE
+ # f2fs_io setflags compression $FILE
+ # f2fs_io set_coption 2 8 $FILE
+
+[  112.227612] F2FS-fs (dm-51): sanity_check_compress_inode: inode (ino=8d3fe) has unsupported compress level: 0, run fsck to fix
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/file.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
+index 126c074deebdc..9b9fb3c57ec6c 100644
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -3984,6 +3984,15 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
+       F2FS_I(inode)->i_compress_algorithm = option.algorithm;
+       F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
+       F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
++      /* Set default level */
++      if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD)
++              F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
++      else
++              F2FS_I(inode)->i_compress_level = 0;
++      /* Adjust mount option level */
++      if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
++          F2FS_OPTION(sbi).compress_level)
++              F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level;
+       f2fs_mark_inode_dirty_sync(inode, true);
+       if (!f2fs_is_compress_backend_ready(inode))
+-- 
+2.43.0
+
diff --git a/queue-6.1/fbdev-imsttfb-fix-double-free-in-probe.patch b/queue-6.1/fbdev-imsttfb-fix-double-free-in-probe.patch
new file mode 100644 (file)
index 0000000..eeba022
--- /dev/null
@@ -0,0 +1,51 @@
+From 0dba7e14edb61efe1ef25501ef2902a7619970f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 15:04:56 +0300
+Subject: fbdev: imsttfb: fix double free in probe()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit e08c30efda21ef4c0ec084a3a9581c220b442ba9 ]
+
+The init_imstt() function calls framebuffer_release() on error and then
+the probe() function calls it again.  It should only be done in probe.
+
+Fixes: 518ecb6a209f ("fbdev: imsttfb: Fix error path of imsttfb_probe()")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imsttfb.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
+index 3d1ae5267a738..aa51cb72cbba5 100644
+--- a/drivers/video/fbdev/imsttfb.c
++++ b/drivers/video/fbdev/imsttfb.c
+@@ -1419,7 +1419,6 @@ static int init_imstt(struct fb_info *info)
+       if ((info->var.xres * info->var.yres) * (info->var.bits_per_pixel >> 3) > info->fix.smem_len
+           || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) {
+               printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel);
+-              framebuffer_release(info);
+               return -ENODEV;
+       }
+@@ -1452,14 +1451,11 @@ static int init_imstt(struct fb_info *info)
+                     FBINFO_HWACCEL_FILLRECT |
+                     FBINFO_HWACCEL_YPAN;
+-      if (fb_alloc_cmap(&info->cmap, 0, 0)) {
+-              framebuffer_release(info);
++      if (fb_alloc_cmap(&info->cmap, 0, 0))
+               return -ENODEV;
+-      }
+       if (register_framebuffer(info) < 0) {
+               fb_dealloc_cmap(&info->cmap);
+-              framebuffer_release(info);
+               return -ENODEV;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/fbdev-imsttfb-release-framebuffer-and-dealloc-cmap-o.patch b/queue-6.1/fbdev-imsttfb-release-framebuffer-and-dealloc-cmap-o.patch
new file mode 100644 (file)
index 0000000..8571648
--- /dev/null
@@ -0,0 +1,40 @@
+From 1d9d0ecc1ce7f53db132d35e037803dd1265e7a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 May 2023 11:28:36 +0200
+Subject: fbdev: imsttfb: Release framebuffer and dealloc cmap on error path
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit 5cf9a090a39c97f4506b7b53739d469b1c05a7e9 ]
+
+Add missing cleanups in error path.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Stable-dep-of: e08c30efda21 ("fbdev: imsttfb: fix double free in probe()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imsttfb.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
+index b194e71f07bfc..3d1ae5267a738 100644
+--- a/drivers/video/fbdev/imsttfb.c
++++ b/drivers/video/fbdev/imsttfb.c
+@@ -1452,9 +1452,13 @@ static int init_imstt(struct fb_info *info)
+                     FBINFO_HWACCEL_FILLRECT |
+                     FBINFO_HWACCEL_YPAN;
+-      fb_alloc_cmap(&info->cmap, 0, 0);
++      if (fb_alloc_cmap(&info->cmap, 0, 0)) {
++              framebuffer_release(info);
++              return -ENODEV;
++      }
+       if (register_framebuffer(info) < 0) {
++              fb_dealloc_cmap(&info->cmap);
+               framebuffer_release(info);
+               return -ENODEV;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/filemap-add-a-per-mapping-stable-writes-flag.patch b/queue-6.1/filemap-add-a-per-mapping-stable-writes-flag.patch
new file mode 100644 (file)
index 0000000..8d79232
--- /dev/null
@@ -0,0 +1,103 @@
+From 28a5490b3586d1c511530d0848ade4165e206e96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Oct 2023 16:10:17 +0200
+Subject: filemap: add a per-mapping stable writes flag
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 762321dab9a72760bf9aec48362f932717c9424d ]
+
+folio_wait_stable waits for writeback to finish before modifying the
+contents of a folio again, e.g. to support check summing of the data
+in the block integrity code.
+
+Currently this behavior is controlled by the SB_I_STABLE_WRITES flag
+on the super_block, which means it is uniform for the entire file system.
+This is wrong for the block device pseudofs which is shared by all
+block devices, or file systems that can use multiple devices like XFS
+witht the RT subvolume or btrfs (although btrfs currently reimplements
+folio_wait_stable anyway).
+
+Add a per-address_space AS_STABLE_WRITES flag to control the behavior
+in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
+to initialize AS_STABLE_WRITES to the existing default which covers
+most cases.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20231025141020.192413-2-hch@lst.de
+Tested-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/inode.c              |  2 ++
+ include/linux/pagemap.h | 17 +++++++++++++++++
+ mm/page-writeback.c     |  2 +-
+ 3 files changed, 20 insertions(+), 1 deletion(-)
+
+diff --git a/fs/inode.c b/fs/inode.c
+index 73ad1b0d47758..8cfda7a6d5900 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+       lockdep_set_class_and_name(&mapping->invalidate_lock,
+                                  &sb->s_type->invalidate_lock_key,
+                                  "mapping.invalidate_lock");
++      if (sb->s_iflags & SB_I_STABLE_WRITES)
++              mapping_set_stable_writes(mapping);
+       inode->i_private = NULL;
+       inode->i_mapping = mapping;
+       INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index fdbb90ae56c70..1be5a1fa6a3a8 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -200,6 +200,8 @@ enum mapping_flags {
+       AS_NO_WRITEBACK_TAGS = 5,
+       AS_LARGE_FOLIO_SUPPORT = 6,
+       AS_RELEASE_ALWAYS,      /* Call ->release_folio(), even if no private data */
++      AS_STABLE_WRITES,       /* must wait for writeback before modifying
++                                 folio contents */
+ };
+ /**
+@@ -285,6 +287,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
+       clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+ }
++static inline bool mapping_stable_writes(const struct address_space *mapping)
++{
++      return test_bit(AS_STABLE_WRITES, &mapping->flags);
++}
++
++static inline void mapping_set_stable_writes(struct address_space *mapping)
++{
++      set_bit(AS_STABLE_WRITES, &mapping->flags);
++}
++
++static inline void mapping_clear_stable_writes(struct address_space *mapping)
++{
++      clear_bit(AS_STABLE_WRITES, &mapping->flags);
++}
++
+ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
+ {
+       return mapping->gfp_mask;
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index 7e9d8d857ecca..de5f69921b946 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -3078,7 +3078,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
+  */
+ void folio_wait_stable(struct folio *folio)
+ {
+-      if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
++      if (mapping_stable_writes(folio_mapping(folio)))
+               folio_wait_writeback(folio);
+ }
+ EXPORT_SYMBOL_GPL(folio_wait_stable);
+-- 
+2.43.0
+
diff --git a/queue-6.1/firmware-arm_scmi-fix-frequency-truncation-by-promot.patch b/queue-6.1/firmware-arm_scmi-fix-frequency-truncation-by-promot.patch
new file mode 100644 (file)
index 0000000..4ab2bec
--- /dev/null
@@ -0,0 +1,54 @@
+From 0be29a4228fb46ed71ceb9d3ce17be8b03862eba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Nov 2023 20:43:42 +0000
+Subject: firmware: arm_scmi: Fix frequency truncation by promoting multiplier
+ type
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+[ Upstream commit 8e3c98d9187e09274fc000a7d1a77b070a42d259 ]
+
+Fix the possible frequency truncation for all values equal to or greater
+4GHz on 64bit machines by updating the multiplier 'mult_factor' to
+'unsigned long' type. It is also possible that the multiplier itself can
+be greater than or equal to 2^32. So we need to also fix the equation
+computing the value of the multiplier.
+
+Fixes: a9e3fbfaa0ff ("firmware: arm_scmi: add initial support for performance protocol")
+Reported-by: Sibi Sankar <quic_sibis@quicinc.com>
+Closes: https://lore.kernel.org/all/20231129065748.19871-3-quic_sibis@quicinc.com/
+Cc: Cristian Marussi <cristian.marussi@arm.com>
+Link: https://lore.kernel.org/r/20231130204343.503076-1-sudeep.holla@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/perf.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
+index 431bda9165c3d..2775bcafe40f6 100644
+--- a/drivers/firmware/arm_scmi/perf.c
++++ b/drivers/firmware/arm_scmi/perf.c
+@@ -131,7 +131,7 @@ struct perf_dom_info {
+       u32 opp_count;
+       u32 sustained_freq_khz;
+       u32 sustained_perf_level;
+-      u32 mult_factor;
++      unsigned long mult_factor;
+       char name[SCMI_MAX_STR_SIZE];
+       struct scmi_opp opp[MAX_OPPS];
+       struct scmi_fc_info *fc_info;
+@@ -223,8 +223,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
+                       dom_info->mult_factor = 1000;
+               else
+                       dom_info->mult_factor =
+-                                      (dom_info->sustained_freq_khz * 1000) /
+-                                      dom_info->sustained_perf_level;
++                                      (dom_info->sustained_freq_khz * 1000UL)
++                                      / dom_info->sustained_perf_level;
+               strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/genirq-affinity-don-t-pass-irq_affinity_desc-array-t.patch b/queue-6.1/genirq-affinity-don-t-pass-irq_affinity_desc-array-t.patch
new file mode 100644 (file)
index 0000000..c12b7d6
--- /dev/null
@@ -0,0 +1,142 @@
+From 88fe3a4d6d033d9103e986952d44d4c647deba38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 10:29:02 +0800
+Subject: genirq/affinity: Don't pass irq_affinity_desc array to
+ irq_build_affinity_masks
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit e7bdd7f0cbd1c001bb9b4d3313edc5ee094bc3f8 ]
+
+Prepare for abstracting irq_build_affinity_masks() into a public function
+for assigning all CPUs evenly into several groups.
+
+Don't pass irq_affinity_desc array to irq_build_affinity_masks, instead
+return a cpumask array by storing each assigned group into one element of
+the array.
+
+This allows to provide a generic interface for grouping all CPUs evenly
+from a NUMA and CPU locality viewpoint, and the cost is one extra allocation
+in irq_build_affinity_masks(), which should be fine since it is done via
+GFP_KERNEL and irq_build_affinity_masks() is a slow path anyway.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/20221227022905.352674-4-ming.lei@redhat.com
+Stable-dep-of: 0263f92fadbb ("lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/affinity.c | 34 ++++++++++++++++++++++++----------
+ 1 file changed, 24 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index da6379cd27fd4..00bba1020ecb2 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -249,7 +249,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+                                     cpumask_var_t *node_to_cpumask,
+                                     const struct cpumask *cpu_mask,
+                                     struct cpumask *nmsk,
+-                                    struct irq_affinity_desc *masks)
++                                    struct cpumask *masks)
+ {
+       unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
+       unsigned int last_affv = numvecs;
+@@ -270,7 +270,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+               for_each_node_mask(n, nodemsk) {
+                       /* Ensure that only CPUs which are in both masks are set */
+                       cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+-                      cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
++                      cpumask_or(&masks[curvec], &masks[curvec], nmsk);
+                       if (++curvec == last_affv)
+                               curvec = 0;
+               }
+@@ -321,7 +321,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+                        */
+                       if (curvec >= last_affv)
+                               curvec = 0;
+-                      irq_spread_init_one(&masks[curvec].mask, nmsk,
++                      irq_spread_init_one(&masks[curvec], nmsk,
+                                               cpus_per_vec);
+               }
+               done += nv->nvectors;
+@@ -335,16 +335,16 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+  *    1) spread present CPU on these vectors
+  *    2) spread other possible CPUs on these vectors
+  */
+-static int irq_build_affinity_masks(unsigned int numvecs,
+-                                  struct irq_affinity_desc *masks)
++static struct cpumask *irq_build_affinity_masks(unsigned int numvecs)
+ {
+       unsigned int curvec = 0, nr_present = 0, nr_others = 0;
+       cpumask_var_t *node_to_cpumask;
+       cpumask_var_t nmsk, npresmsk;
+       int ret = -ENOMEM;
++      struct cpumask *masks = NULL;
+       if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+-              return ret;
++              return NULL;
+       if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
+               goto fail_nmsk;
+@@ -353,6 +353,10 @@ static int irq_build_affinity_masks(unsigned int numvecs,
+       if (!node_to_cpumask)
+               goto fail_npresmsk;
++      masks = kcalloc(numvecs, sizeof(*masks), GFP_KERNEL);
++      if (!masks)
++              goto fail_node_to_cpumask;
++
+       /* Stabilize the cpumasks */
+       cpus_read_lock();
+       build_node_to_cpumask(node_to_cpumask);
+@@ -386,6 +390,7 @@ static int irq_build_affinity_masks(unsigned int numvecs,
+       if (ret >= 0)
+               WARN_ON(nr_present + nr_others < numvecs);
++ fail_node_to_cpumask:
+       free_node_to_cpumask(node_to_cpumask);
+  fail_npresmsk:
+@@ -393,7 +398,11 @@ static int irq_build_affinity_masks(unsigned int numvecs,
+  fail_nmsk:
+       free_cpumask_var(nmsk);
+-      return ret < 0 ? ret : 0;
++      if (ret < 0) {
++              kfree(masks);
++              return NULL;
++      }
++      return masks;
+ }
+ static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+@@ -457,13 +466,18 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+        */
+       for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+               unsigned int this_vecs = affd->set_size[i];
+-              int ret;
++              int j;
++              struct cpumask *result = irq_build_affinity_masks(this_vecs);
+-              ret = irq_build_affinity_masks(this_vecs, &masks[curvec]);
+-              if (ret) {
++              if (!result) {
+                       kfree(masks);
+                       return NULL;
+               }
++
++              for (j = 0; j < this_vecs; j++)
++                      cpumask_copy(&masks[curvec + j].mask, &result[j]);
++              kfree(result);
++
+               curvec += this_vecs;
+               usedvecs += this_vecs;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/genirq-affinity-move-group_cpus_evenly-into-lib.patch b/queue-6.1/genirq-affinity-move-group_cpus_evenly-into-lib.patch
new file mode 100644 (file)
index 0000000..fb55891
--- /dev/null
@@ -0,0 +1,920 @@
+From ef4de3476be1d045915045b849fb143020fc8b84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 10:29:04 +0800
+Subject: genirq/affinity: Move group_cpus_evenly() into lib/
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit f7b3ea8cf72f3d6060fe08e461805181e7450a13 ]
+
+group_cpus_evenly() has become a generic function which can be used for
+other subsystems than the interrupt subsystem, so move it into lib/.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/20221227022905.352674-6-ming.lei@redhat.com
+Stable-dep-of: 0263f92fadbb ("lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS                |   2 +
+ include/linux/group_cpus.h |  14 ++
+ kernel/irq/affinity.c      | 398 +---------------------------------
+ lib/Makefile               |   2 +
+ lib/group_cpus.c           | 427 +++++++++++++++++++++++++++++++++++++
+ 5 files changed, 446 insertions(+), 397 deletions(-)
+ create mode 100644 include/linux/group_cpus.h
+ create mode 100644 lib/group_cpus.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 07a9c274c0e29..13d1078808bb5 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -10803,6 +10803,8 @@ L:     linux-kernel@vger.kernel.org
+ S:    Maintained
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
+ F:    kernel/irq/
++F:    include/linux/group_cpus.h
++F:    lib/group_cpus.c
+ IRQCHIP DRIVERS
+ M:    Thomas Gleixner <tglx@linutronix.de>
+diff --git a/include/linux/group_cpus.h b/include/linux/group_cpus.h
+new file mode 100644
+index 0000000000000..e42807ec61f6e
+--- /dev/null
++++ b/include/linux/group_cpus.h
+@@ -0,0 +1,14 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2016 Thomas Gleixner.
++ * Copyright (C) 2016-2017 Christoph Hellwig.
++ */
++
++#ifndef __LINUX_GROUP_CPUS_H
++#define __LINUX_GROUP_CPUS_H
++#include <linux/kernel.h>
++#include <linux/cpu.h>
++
++struct cpumask *group_cpus_evenly(unsigned int numgrps);
++
++#endif
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index 54083331f1bcb..44a4eba80315c 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -7,403 +7,7 @@
+ #include <linux/kernel.h>
+ #include <linux/slab.h>
+ #include <linux/cpu.h>
+-#include <linux/sort.h>
+-
+-static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+-                              unsigned int cpus_per_grp)
+-{
+-      const struct cpumask *siblmsk;
+-      int cpu, sibl;
+-
+-      for ( ; cpus_per_grp > 0; ) {
+-              cpu = cpumask_first(nmsk);
+-
+-              /* Should not happen, but I'm too lazy to think about it */
+-              if (cpu >= nr_cpu_ids)
+-                      return;
+-
+-              cpumask_clear_cpu(cpu, nmsk);
+-              cpumask_set_cpu(cpu, irqmsk);
+-              cpus_per_grp--;
+-
+-              /* If the cpu has siblings, use them first */
+-              siblmsk = topology_sibling_cpumask(cpu);
+-              for (sibl = -1; cpus_per_grp > 0; ) {
+-                      sibl = cpumask_next(sibl, siblmsk);
+-                      if (sibl >= nr_cpu_ids)
+-                              break;
+-                      if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+-                              continue;
+-                      cpumask_set_cpu(sibl, irqmsk);
+-                      cpus_per_grp--;
+-              }
+-      }
+-}
+-
+-static cpumask_var_t *alloc_node_to_cpumask(void)
+-{
+-      cpumask_var_t *masks;
+-      int node;
+-
+-      masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
+-      if (!masks)
+-              return NULL;
+-
+-      for (node = 0; node < nr_node_ids; node++) {
+-              if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
+-                      goto out_unwind;
+-      }
+-
+-      return masks;
+-
+-out_unwind:
+-      while (--node >= 0)
+-              free_cpumask_var(masks[node]);
+-      kfree(masks);
+-      return NULL;
+-}
+-
+-static void free_node_to_cpumask(cpumask_var_t *masks)
+-{
+-      int node;
+-
+-      for (node = 0; node < nr_node_ids; node++)
+-              free_cpumask_var(masks[node]);
+-      kfree(masks);
+-}
+-
+-static void build_node_to_cpumask(cpumask_var_t *masks)
+-{
+-      int cpu;
+-
+-      for_each_possible_cpu(cpu)
+-              cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
+-}
+-
+-static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
+-                              const struct cpumask *mask, nodemask_t *nodemsk)
+-{
+-      int n, nodes = 0;
+-
+-      /* Calculate the number of nodes in the supplied affinity mask */
+-      for_each_node(n) {
+-              if (cpumask_intersects(mask, node_to_cpumask[n])) {
+-                      node_set(n, *nodemsk);
+-                      nodes++;
+-              }
+-      }
+-      return nodes;
+-}
+-
+-struct node_groups {
+-      unsigned id;
+-
+-      union {
+-              unsigned ngroups;
+-              unsigned ncpus;
+-      };
+-};
+-
+-static int ncpus_cmp_func(const void *l, const void *r)
+-{
+-      const struct node_groups *ln = l;
+-      const struct node_groups *rn = r;
+-
+-      return ln->ncpus - rn->ncpus;
+-}
+-
+-/*
+- * Allocate group number for each node, so that for each node:
+- *
+- * 1) the allocated number is >= 1
+- *
+- * 2) the allocated number is <= active CPU number of this node
+- *
+- * The actual allocated total groups may be less than @numgrps when
+- * active total CPU number is less than @numgrps.
+- *
+- * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
+- * for each node.
+- */
+-static void alloc_nodes_groups(unsigned int numgrps,
+-                             cpumask_var_t *node_to_cpumask,
+-                             const struct cpumask *cpu_mask,
+-                             const nodemask_t nodemsk,
+-                             struct cpumask *nmsk,
+-                             struct node_groups *node_groups)
+-{
+-      unsigned n, remaining_ncpus = 0;
+-
+-      for (n = 0; n < nr_node_ids; n++) {
+-              node_groups[n].id = n;
+-              node_groups[n].ncpus = UINT_MAX;
+-      }
+-
+-      for_each_node_mask(n, nodemsk) {
+-              unsigned ncpus;
+-
+-              cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+-              ncpus = cpumask_weight(nmsk);
+-
+-              if (!ncpus)
+-                      continue;
+-              remaining_ncpus += ncpus;
+-              node_groups[n].ncpus = ncpus;
+-      }
+-
+-      numgrps = min_t(unsigned, remaining_ncpus, numgrps);
+-
+-      sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
+-           ncpus_cmp_func, NULL);
+-
+-      /*
+-       * Allocate groups for each node according to the ratio of this
+-       * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is
+-       * bigger than number of active numa nodes. Always start the
+-       * allocation from the node with minimized nr_cpus.
+-       *
+-       * This way guarantees that each active node gets allocated at
+-       * least one group, and the theory is simple: over-allocation
+-       * is only done when this node is assigned by one group, so
+-       * other nodes will be allocated >= 1 groups, since 'numgrps' is
+-       * bigger than number of numa nodes.
+-       *
+-       * One perfect invariant is that number of allocated groups for
+-       * each node is <= CPU count of this node:
+-       *
+-       * 1) suppose there are two nodes: A and B
+-       *      ncpu(X) is CPU count of node X
+-       *      grps(X) is the group count allocated to node X via this
+-       *      algorithm
+-       *
+-       *      ncpu(A) <= ncpu(B)
+-       *      ncpu(A) + ncpu(B) = N
+-       *      grps(A) + grps(B) = G
+-       *
+-       *      grps(A) = max(1, round_down(G * ncpu(A) / N))
+-       *      grps(B) = G - grps(A)
+-       *
+-       *      both N and G are integer, and 2 <= G <= N, suppose
+-       *      G = N - delta, and 0 <= delta <= N - 2
+-       *
+-       * 2) obviously grps(A) <= ncpu(A) because:
+-       *
+-       *      if grps(A) is 1, then grps(A) <= ncpu(A) given
+-       *      ncpu(A) >= 1
+-       *
+-       *      otherwise,
+-       *              grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N
+-       *
+-       * 3) prove how grps(B) <= ncpu(B):
+-       *
+-       *      if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be
+-       *      over-allocated, so grps(B) <= ncpu(B),
+-       *
+-       *      otherwise:
+-       *
+-       *      grps(A) =
+-       *              round_down(G * ncpu(A) / N) =
+-       *              round_down((N - delta) * ncpu(A) / N) =
+-       *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
+-       *              round_down((N * ncpu(A) - delta * N) / N)        =
+-       *              cpu(A) - delta
+-       *
+-       *      then:
+-       *
+-       *      grps(A) - G >= ncpu(A) - delta - G
+-       *      =>
+-       *      G - grps(A) <= G + delta - ncpu(A)
+-       *      =>
+-       *      grps(B) <= N - ncpu(A)
+-       *      =>
+-       *      grps(B) <= cpu(B)
+-       *
+-       * For nodes >= 3, it can be thought as one node and another big
+-       * node given that is exactly what this algorithm is implemented,
+-       * and we always re-calculate 'remaining_ncpus' & 'numgrps', and
+-       * finally for each node X: grps(X) <= ncpu(X).
+-       *
+-       */
+-      for (n = 0; n < nr_node_ids; n++) {
+-              unsigned ngroups, ncpus;
+-
+-              if (node_groups[n].ncpus == UINT_MAX)
+-                      continue;
+-
+-              WARN_ON_ONCE(numgrps == 0);
+-
+-              ncpus = node_groups[n].ncpus;
+-              ngroups = max_t(unsigned, 1,
+-                               numgrps * ncpus / remaining_ncpus);
+-              WARN_ON_ONCE(ngroups > ncpus);
+-
+-              node_groups[n].ngroups = ngroups;
+-
+-              remaining_ncpus -= ncpus;
+-              numgrps -= ngroups;
+-      }
+-}
+-
+-static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
+-                             cpumask_var_t *node_to_cpumask,
+-                             const struct cpumask *cpu_mask,
+-                             struct cpumask *nmsk, struct cpumask *masks)
+-{
+-      unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
+-      unsigned int last_grp = numgrps;
+-      unsigned int curgrp = startgrp;
+-      nodemask_t nodemsk = NODE_MASK_NONE;
+-      struct node_groups *node_groups;
+-
+-      if (cpumask_empty(cpu_mask))
+-              return 0;
+-
+-      nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
+-
+-      /*
+-       * If the number of nodes in the mask is greater than or equal the
+-       * number of groups we just spread the groups across the nodes.
+-       */
+-      if (numgrps <= nodes) {
+-              for_each_node_mask(n, nodemsk) {
+-                      /* Ensure that only CPUs which are in both masks are set */
+-                      cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+-                      cpumask_or(&masks[curgrp], &masks[curgrp], nmsk);
+-                      if (++curgrp == last_grp)
+-                              curgrp = 0;
+-              }
+-              return numgrps;
+-      }
+-
+-      node_groups = kcalloc(nr_node_ids,
+-                             sizeof(struct node_groups),
+-                             GFP_KERNEL);
+-      if (!node_groups)
+-              return -ENOMEM;
+-
+-      /* allocate group number for each node */
+-      alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
+-                         nodemsk, nmsk, node_groups);
+-      for (i = 0; i < nr_node_ids; i++) {
+-              unsigned int ncpus, v;
+-              struct node_groups *nv = &node_groups[i];
+-
+-              if (nv->ngroups == UINT_MAX)
+-                      continue;
+-
+-              /* Get the cpus on this node which are in the mask */
+-              cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
+-              ncpus = cpumask_weight(nmsk);
+-              if (!ncpus)
+-                      continue;
+-
+-              WARN_ON_ONCE(nv->ngroups > ncpus);
+-
+-              /* Account for rounding errors */
+-              extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
+-
+-              /* Spread allocated groups on CPUs of the current node */
+-              for (v = 0; v < nv->ngroups; v++, curgrp++) {
+-                      cpus_per_grp = ncpus / nv->ngroups;
+-
+-                      /* Account for extra groups to compensate rounding errors */
+-                      if (extra_grps) {
+-                              cpus_per_grp++;
+-                              --extra_grps;
+-                      }
+-
+-                      /*
+-                       * wrapping has to be considered given 'startgrp'
+-                       * may start anywhere
+-                       */
+-                      if (curgrp >= last_grp)
+-                              curgrp = 0;
+-                      grp_spread_init_one(&masks[curgrp], nmsk,
+-                                              cpus_per_grp);
+-              }
+-              done += nv->ngroups;
+-      }
+-      kfree(node_groups);
+-      return done;
+-}
+-
+-/*
+- * build affinity in two stages for each group, and try to put close CPUs
+- * in viewpoint of CPU and NUMA locality into same group, and we run
+- * two-stage grouping:
+- *
+- *    1) allocate present CPUs on these groups evenly first
+- *    2) allocate other possible CPUs on these groups evenly
+- */
+-static struct cpumask *group_cpus_evenly(unsigned int numgrps)
+-{
+-      unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
+-      cpumask_var_t *node_to_cpumask;
+-      cpumask_var_t nmsk, npresmsk;
+-      int ret = -ENOMEM;
+-      struct cpumask *masks = NULL;
+-
+-      if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+-              return NULL;
+-
+-      if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
+-              goto fail_nmsk;
+-
+-      node_to_cpumask = alloc_node_to_cpumask();
+-      if (!node_to_cpumask)
+-              goto fail_npresmsk;
+-
+-      masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
+-      if (!masks)
+-              goto fail_node_to_cpumask;
+-
+-      /* Stabilize the cpumasks */
+-      cpus_read_lock();
+-      build_node_to_cpumask(node_to_cpumask);
+-
+-      /* grouping present CPUs first */
+-      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+-                                cpu_present_mask, nmsk, masks);
+-      if (ret < 0)
+-              goto fail_build_affinity;
+-      nr_present = ret;
+-
+-      /*
+-       * Allocate non present CPUs starting from the next group to be
+-       * handled. If the grouping of present CPUs already exhausted the
+-       * group space, assign the non present CPUs to the already
+-       * allocated out groups.
+-       */
+-      if (nr_present >= numgrps)
+-              curgrp = 0;
+-      else
+-              curgrp = nr_present;
+-      cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+-      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+-                                npresmsk, nmsk, masks);
+-      if (ret >= 0)
+-              nr_others = ret;
+-
+- fail_build_affinity:
+-      cpus_read_unlock();
+-
+-      if (ret >= 0)
+-              WARN_ON(nr_present + nr_others < numgrps);
+-
+- fail_node_to_cpumask:
+-      free_node_to_cpumask(node_to_cpumask);
+-
+- fail_npresmsk:
+-      free_cpumask_var(npresmsk);
+-
+- fail_nmsk:
+-      free_cpumask_var(nmsk);
+-      if (ret < 0) {
+-              kfree(masks);
+-              return NULL;
+-      }
+-      return masks;
+-}
++#include <linux/group_cpus.h>
+ static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+ {
+diff --git a/lib/Makefile b/lib/Makefile
+index 5ffe72ec99797..6f1611d053e6a 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -361,6 +361,8 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o
+ obj-$(CONFIG_PARMAN) += parman.o
++obj-y += group_cpus.o
++
+ # GCC library routines
+ obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o
+ obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o
+diff --git a/lib/group_cpus.c b/lib/group_cpus.c
+new file mode 100644
+index 0000000000000..99f08c6cb9d97
+--- /dev/null
++++ b/lib/group_cpus.c
+@@ -0,0 +1,427 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2016 Thomas Gleixner.
++ * Copyright (C) 2016-2017 Christoph Hellwig.
++ */
++#include <linux/kernel.h>
++#include <linux/slab.h>
++#include <linux/cpu.h>
++#include <linux/sort.h>
++#include <linux/group_cpus.h>
++
++static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
++                              unsigned int cpus_per_grp)
++{
++      const struct cpumask *siblmsk;
++      int cpu, sibl;
++
++      for ( ; cpus_per_grp > 0; ) {
++              cpu = cpumask_first(nmsk);
++
++              /* Should not happen, but I'm too lazy to think about it */
++              if (cpu >= nr_cpu_ids)
++                      return;
++
++              cpumask_clear_cpu(cpu, nmsk);
++              cpumask_set_cpu(cpu, irqmsk);
++              cpus_per_grp--;
++
++              /* If the cpu has siblings, use them first */
++              siblmsk = topology_sibling_cpumask(cpu);
++              for (sibl = -1; cpus_per_grp > 0; ) {
++                      sibl = cpumask_next(sibl, siblmsk);
++                      if (sibl >= nr_cpu_ids)
++                              break;
++                      if (!cpumask_test_and_clear_cpu(sibl, nmsk))
++                              continue;
++                      cpumask_set_cpu(sibl, irqmsk);
++                      cpus_per_grp--;
++              }
++      }
++}
++
++static cpumask_var_t *alloc_node_to_cpumask(void)
++{
++      cpumask_var_t *masks;
++      int node;
++
++      masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
++      if (!masks)
++              return NULL;
++
++      for (node = 0; node < nr_node_ids; node++) {
++              if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
++                      goto out_unwind;
++      }
++
++      return masks;
++
++out_unwind:
++      while (--node >= 0)
++              free_cpumask_var(masks[node]);
++      kfree(masks);
++      return NULL;
++}
++
++static void free_node_to_cpumask(cpumask_var_t *masks)
++{
++      int node;
++
++      for (node = 0; node < nr_node_ids; node++)
++              free_cpumask_var(masks[node]);
++      kfree(masks);
++}
++
++static void build_node_to_cpumask(cpumask_var_t *masks)
++{
++      int cpu;
++
++      for_each_possible_cpu(cpu)
++              cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
++}
++
++static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
++                              const struct cpumask *mask, nodemask_t *nodemsk)
++{
++      int n, nodes = 0;
++
++      /* Calculate the number of nodes in the supplied affinity mask */
++      for_each_node(n) {
++              if (cpumask_intersects(mask, node_to_cpumask[n])) {
++                      node_set(n, *nodemsk);
++                      nodes++;
++              }
++      }
++      return nodes;
++}
++
++struct node_groups {
++      unsigned id;
++
++      union {
++              unsigned ngroups;
++              unsigned ncpus;
++      };
++};
++
++static int ncpus_cmp_func(const void *l, const void *r)
++{
++      const struct node_groups *ln = l;
++      const struct node_groups *rn = r;
++
++      return ln->ncpus - rn->ncpus;
++}
++
++/*
++ * Allocate group number for each node, so that for each node:
++ *
++ * 1) the allocated number is >= 1
++ *
++ * 2) the allocated number is <= active CPU number of this node
++ *
++ * The actual allocated total groups may be less than @numgrps when
++ * active total CPU number is less than @numgrps.
++ *
++ * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
++ * for each node.
++ */
++static void alloc_nodes_groups(unsigned int numgrps,
++                             cpumask_var_t *node_to_cpumask,
++                             const struct cpumask *cpu_mask,
++                             const nodemask_t nodemsk,
++                             struct cpumask *nmsk,
++                             struct node_groups *node_groups)
++{
++      unsigned n, remaining_ncpus = 0;
++
++      for (n = 0; n < nr_node_ids; n++) {
++              node_groups[n].id = n;
++              node_groups[n].ncpus = UINT_MAX;
++      }
++
++      for_each_node_mask(n, nodemsk) {
++              unsigned ncpus;
++
++              cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
++              ncpus = cpumask_weight(nmsk);
++
++              if (!ncpus)
++                      continue;
++              remaining_ncpus += ncpus;
++              node_groups[n].ncpus = ncpus;
++      }
++
++      numgrps = min_t(unsigned, remaining_ncpus, numgrps);
++
++      sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
++           ncpus_cmp_func, NULL);
++
++      /*
++       * Allocate groups for each node according to the ratio of this
++       * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is
++       * bigger than number of active numa nodes. Always start the
++       * allocation from the node with minimized nr_cpus.
++       *
++       * This way guarantees that each active node gets allocated at
++       * least one group, and the theory is simple: over-allocation
++       * is only done when this node is assigned by one group, so
++       * other nodes will be allocated >= 1 groups, since 'numgrps' is
++       * bigger than number of numa nodes.
++       *
++       * One perfect invariant is that number of allocated groups for
++       * each node is <= CPU count of this node:
++       *
++       * 1) suppose there are two nodes: A and B
++       *      ncpu(X) is CPU count of node X
++       *      grps(X) is the group count allocated to node X via this
++       *      algorithm
++       *
++       *      ncpu(A) <= ncpu(B)
++       *      ncpu(A) + ncpu(B) = N
++       *      grps(A) + grps(B) = G
++       *
++       *      grps(A) = max(1, round_down(G * ncpu(A) / N))
++       *      grps(B) = G - grps(A)
++       *
++       *      both N and G are integer, and 2 <= G <= N, suppose
++       *      G = N - delta, and 0 <= delta <= N - 2
++       *
++       * 2) obviously grps(A) <= ncpu(A) because:
++       *
++       *      if grps(A) is 1, then grps(A) <= ncpu(A) given
++       *      ncpu(A) >= 1
++       *
++       *      otherwise,
++       *              grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N
++       *
++       * 3) prove how grps(B) <= ncpu(B):
++       *
++       *      if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be
++       *      over-allocated, so grps(B) <= ncpu(B),
++       *
++       *      otherwise:
++       *
++       *      grps(A) =
++       *              round_down(G * ncpu(A) / N) =
++       *              round_down((N - delta) * ncpu(A) / N) =
++       *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
++       *              round_down((N * ncpu(A) - delta * N) / N)        =
++       *              cpu(A) - delta
++       *
++       *      then:
++       *
++       *      grps(A) - G >= ncpu(A) - delta - G
++       *      =>
++       *      G - grps(A) <= G + delta - ncpu(A)
++       *      =>
++       *      grps(B) <= N - ncpu(A)
++       *      =>
++       *      grps(B) <= cpu(B)
++       *
++       * For nodes >= 3, it can be thought as one node and another big
++       * node given that is exactly what this algorithm is implemented,
++       * and we always re-calculate 'remaining_ncpus' & 'numgrps', and
++       * finally for each node X: grps(X) <= ncpu(X).
++       *
++       */
++      for (n = 0; n < nr_node_ids; n++) {
++              unsigned ngroups, ncpus;
++
++              if (node_groups[n].ncpus == UINT_MAX)
++                      continue;
++
++              WARN_ON_ONCE(numgrps == 0);
++
++              ncpus = node_groups[n].ncpus;
++              ngroups = max_t(unsigned, 1,
++                               numgrps * ncpus / remaining_ncpus);
++              WARN_ON_ONCE(ngroups > ncpus);
++
++              node_groups[n].ngroups = ngroups;
++
++              remaining_ncpus -= ncpus;
++              numgrps -= ngroups;
++      }
++}
++
++static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
++                             cpumask_var_t *node_to_cpumask,
++                             const struct cpumask *cpu_mask,
++                             struct cpumask *nmsk, struct cpumask *masks)
++{
++      unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
++      unsigned int last_grp = numgrps;
++      unsigned int curgrp = startgrp;
++      nodemask_t nodemsk = NODE_MASK_NONE;
++      struct node_groups *node_groups;
++
++      if (cpumask_empty(cpu_mask))
++              return 0;
++
++      nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
++
++      /*
++       * If the number of nodes in the mask is greater than or equal the
++       * number of groups we just spread the groups across the nodes.
++       */
++      if (numgrps <= nodes) {
++              for_each_node_mask(n, nodemsk) {
++                      /* Ensure that only CPUs which are in both masks are set */
++                      cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
++                      cpumask_or(&masks[curgrp], &masks[curgrp], nmsk);
++                      if (++curgrp == last_grp)
++                              curgrp = 0;
++              }
++              return numgrps;
++      }
++
++      node_groups = kcalloc(nr_node_ids,
++                             sizeof(struct node_groups),
++                             GFP_KERNEL);
++      if (!node_groups)
++              return -ENOMEM;
++
++      /* allocate group number for each node */
++      alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
++                         nodemsk, nmsk, node_groups);
++      for (i = 0; i < nr_node_ids; i++) {
++              unsigned int ncpus, v;
++              struct node_groups *nv = &node_groups[i];
++
++              if (nv->ngroups == UINT_MAX)
++                      continue;
++
++              /* Get the cpus on this node which are in the mask */
++              cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
++              ncpus = cpumask_weight(nmsk);
++              if (!ncpus)
++                      continue;
++
++              WARN_ON_ONCE(nv->ngroups > ncpus);
++
++              /* Account for rounding errors */
++              extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
++
++              /* Spread allocated groups on CPUs of the current node */
++              for (v = 0; v < nv->ngroups; v++, curgrp++) {
++                      cpus_per_grp = ncpus / nv->ngroups;
++
++                      /* Account for extra groups to compensate rounding errors */
++                      if (extra_grps) {
++                              cpus_per_grp++;
++                              --extra_grps;
++                      }
++
++                      /*
++                       * wrapping has to be considered given 'startgrp'
++                       * may start anywhere
++                       */
++                      if (curgrp >= last_grp)
++                              curgrp = 0;
++                      grp_spread_init_one(&masks[curgrp], nmsk,
++                                              cpus_per_grp);
++              }
++              done += nv->ngroups;
++      }
++      kfree(node_groups);
++      return done;
++}
++
++#ifdef CONFIG_SMP
++/**
++ * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality
++ * @numgrps: number of groups
++ *
++ * Return: cpumask array if successful, NULL otherwise. And each element
++ * includes CPUs assigned to this group
++ *
++ * Try to put close CPUs from viewpoint of CPU and NUMA locality into
++ * same group, and run two-stage grouping:
++ *    1) allocate present CPUs on these groups evenly first
++ *    2) allocate other possible CPUs on these groups evenly
++ *
++ * We guarantee in the resulted grouping that all CPUs are covered, and
++ * no same CPU is assigned to multiple groups
++ */
++struct cpumask *group_cpus_evenly(unsigned int numgrps)
++{
++      unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
++      cpumask_var_t *node_to_cpumask;
++      cpumask_var_t nmsk, npresmsk;
++      int ret = -ENOMEM;
++      struct cpumask *masks = NULL;
++
++      if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
++              return NULL;
++
++      if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
++              goto fail_nmsk;
++
++      node_to_cpumask = alloc_node_to_cpumask();
++      if (!node_to_cpumask)
++              goto fail_npresmsk;
++
++      masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
++      if (!masks)
++              goto fail_node_to_cpumask;
++
++      /* Stabilize the cpumasks */
++      cpus_read_lock();
++      build_node_to_cpumask(node_to_cpumask);
++
++      /* grouping present CPUs first */
++      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
++                                cpu_present_mask, nmsk, masks);
++      if (ret < 0)
++              goto fail_build_affinity;
++      nr_present = ret;
++
++      /*
++       * Allocate non present CPUs starting from the next group to be
++       * handled. If the grouping of present CPUs already exhausted the
++       * group space, assign the non present CPUs to the already
++       * allocated out groups.
++       */
++      if (nr_present >= numgrps)
++              curgrp = 0;
++      else
++              curgrp = nr_present;
++      cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
++      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
++                                npresmsk, nmsk, masks);
++      if (ret >= 0)
++              nr_others = ret;
++
++ fail_build_affinity:
++      cpus_read_unlock();
++
++      if (ret >= 0)
++              WARN_ON(nr_present + nr_others < numgrps);
++
++ fail_node_to_cpumask:
++      free_node_to_cpumask(node_to_cpumask);
++
++ fail_npresmsk:
++      free_cpumask_var(npresmsk);
++
++ fail_nmsk:
++      free_cpumask_var(nmsk);
++      if (ret < 0) {
++              kfree(masks);
++              return NULL;
++      }
++      return masks;
++}
++#else
++struct cpumask *group_cpus_evenly(unsigned int numgrps)
++{
++      struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
++
++      if (!masks)
++              return NULL;
++
++      /* assign all CPUs(cpu 0) to the 1st group only */
++      cpumask_copy(&masks[0], cpu_possible_mask);
++      return masks;
++}
++#endif
+-- 
+2.43.0
+
diff --git a/queue-6.1/genirq-affinity-pass-affinity-managed-mask-array-to-.patch b/queue-6.1/genirq-affinity-pass-affinity-managed-mask-array-to-.patch
new file mode 100644 (file)
index 0000000..09a8df8
--- /dev/null
@@ -0,0 +1,121 @@
+From 8dadc19b3f0f31cb7d083c07257a1a72dc988e35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 10:29:01 +0800
+Subject: genirq/affinity: Pass affinity managed mask array to
+ irq_build_affinity_masks
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 1f962d91a15af54301c63febb8ac2ba07aa3654f ]
+
+Pass affinity managed mask array to irq_build_affinity_masks() so that the
+index of the first affinity managed vector is always zero.
+
+This allows to simplify the implementation a bit.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/20221227022905.352674-3-ming.lei@redhat.com
+Stable-dep-of: 0263f92fadbb ("lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/affinity.c | 28 ++++++++++++----------------
+ 1 file changed, 12 insertions(+), 16 deletions(-)
+
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index 3361e36ebaa1e..da6379cd27fd4 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -246,14 +246,13 @@ static void alloc_nodes_vectors(unsigned int numvecs,
+ static int __irq_build_affinity_masks(unsigned int startvec,
+                                     unsigned int numvecs,
+-                                    unsigned int firstvec,
+                                     cpumask_var_t *node_to_cpumask,
+                                     const struct cpumask *cpu_mask,
+                                     struct cpumask *nmsk,
+                                     struct irq_affinity_desc *masks)
+ {
+       unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
+-      unsigned int last_affv = firstvec + numvecs;
++      unsigned int last_affv = numvecs;
+       unsigned int curvec = startvec;
+       nodemask_t nodemsk = NODE_MASK_NONE;
+       struct node_vectors *node_vectors;
+@@ -273,7 +272,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+                       cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+                       cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
+                       if (++curvec == last_affv)
+-                              curvec = firstvec;
++                              curvec = 0;
+               }
+               return numvecs;
+       }
+@@ -321,7 +320,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+                        * may start anywhere
+                        */
+                       if (curvec >= last_affv)
+-                              curvec = firstvec;
++                              curvec = 0;
+                       irq_spread_init_one(&masks[curvec].mask, nmsk,
+                                               cpus_per_vec);
+               }
+@@ -336,11 +335,10 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+  *    1) spread present CPU on these vectors
+  *    2) spread other possible CPUs on these vectors
+  */
+-static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
++static int irq_build_affinity_masks(unsigned int numvecs,
+                                   struct irq_affinity_desc *masks)
+ {
+-      unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
+-      unsigned int firstvec = startvec;
++      unsigned int curvec = 0, nr_present = 0, nr_others = 0;
+       cpumask_var_t *node_to_cpumask;
+       cpumask_var_t nmsk, npresmsk;
+       int ret = -ENOMEM;
+@@ -360,9 +358,8 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
+       build_node_to_cpumask(node_to_cpumask);
+       /* Spread on present CPUs starting from affd->pre_vectors */
+-      ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
+-                                       node_to_cpumask, cpu_present_mask,
+-                                       nmsk, masks);
++      ret = __irq_build_affinity_masks(curvec, numvecs, node_to_cpumask,
++                                       cpu_present_mask, nmsk, masks);
+       if (ret < 0)
+               goto fail_build_affinity;
+       nr_present = ret;
+@@ -374,13 +371,12 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
+        * out vectors.
+        */
+       if (nr_present >= numvecs)
+-              curvec = firstvec;
++              curvec = 0;
+       else
+-              curvec = firstvec + nr_present;
++              curvec = nr_present;
+       cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+-      ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
+-                                       node_to_cpumask, npresmsk, nmsk,
+-                                       masks);
++      ret = __irq_build_affinity_masks(curvec, numvecs, node_to_cpumask,
++                                       npresmsk, nmsk, masks);
+       if (ret >= 0)
+               nr_others = ret;
+@@ -463,7 +459,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+               unsigned int this_vecs = affd->set_size[i];
+               int ret;
+-              ret = irq_build_affinity_masks(curvec, this_vecs, masks);
++              ret = irq_build_affinity_masks(this_vecs, &masks[curvec]);
+               if (ret) {
+                       kfree(masks);
+                       return NULL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/genirq-affinity-remove-the-firstvec-parameter-from-i.patch b/queue-6.1/genirq-affinity-remove-the-firstvec-parameter-from-i.patch
new file mode 100644 (file)
index 0000000..97e95a2
--- /dev/null
@@ -0,0 +1,54 @@
+From 3f9eac627a4179298074566b0149198d817ff10c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 10:29:00 +0800
+Subject: genirq/affinity: Remove the 'firstvec' parameter from
+ irq_build_affinity_masks
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit cdf07f0ea48a3b52f924714d477366ac510ee870 ]
+
+The 'firstvec' parameter is always same with the parameter of
+'startvec', so use 'startvec' directly inside irq_build_affinity_masks().
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/20221227022905.352674-2-ming.lei@redhat.com
+Stable-dep-of: 0263f92fadbb ("lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/affinity.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index d9a5c1d65a79d..3361e36ebaa1e 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -337,10 +337,10 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+  *    2) spread other possible CPUs on these vectors
+  */
+ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
+-                                  unsigned int firstvec,
+                                   struct irq_affinity_desc *masks)
+ {
+       unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
++      unsigned int firstvec = startvec;
+       cpumask_var_t *node_to_cpumask;
+       cpumask_var_t nmsk, npresmsk;
+       int ret = -ENOMEM;
+@@ -463,8 +463,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+               unsigned int this_vecs = affd->set_size[i];
+               int ret;
+-              ret = irq_build_affinity_masks(curvec, this_vecs,
+-                                             curvec, masks);
++              ret = irq_build_affinity_masks(curvec, this_vecs, masks);
+               if (ret) {
+                       kfree(masks);
+                       return NULL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/genirq-affinity-rename-irq_build_affinity_masks-as-g.patch b/queue-6.1/genirq-affinity-rename-irq_build_affinity_masks-as-g.patch
new file mode 100644 (file)
index 0000000..2e543c3
--- /dev/null
@@ -0,0 +1,485 @@
+From 2b38a67a94c19fcf3c655f12980a7a16eee4e44e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 10:29:03 +0800
+Subject: genirq/affinity: Rename irq_build_affinity_masks as group_cpus_evenly
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 523f1ea76aad9025f9bd5258d77f4406fa9dbe5d ]
+
+Map irq vector into group, which allows to abstract the algorithm for
+a generic use case outside of the interrupt core.
+
+Rename irq_build_affinity_masks as group_cpus_evenly, so the API can be
+reused for blk-mq to make default queue mapping even though irq vectors
+aren't involved.
+
+No functional change, just rename vector as group.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/20221227022905.352674-5-ming.lei@redhat.com
+Stable-dep-of: 0263f92fadbb ("lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/affinity.c | 242 +++++++++++++++++++++---------------------
+ 1 file changed, 121 insertions(+), 121 deletions(-)
+
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index 00bba1020ecb2..54083331f1bcb 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -9,13 +9,13 @@
+ #include <linux/cpu.h>
+ #include <linux/sort.h>
+-static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+-                              unsigned int cpus_per_vec)
++static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
++                              unsigned int cpus_per_grp)
+ {
+       const struct cpumask *siblmsk;
+       int cpu, sibl;
+-      for ( ; cpus_per_vec > 0; ) {
++      for ( ; cpus_per_grp > 0; ) {
+               cpu = cpumask_first(nmsk);
+               /* Should not happen, but I'm too lazy to think about it */
+@@ -24,18 +24,18 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+               cpumask_clear_cpu(cpu, nmsk);
+               cpumask_set_cpu(cpu, irqmsk);
+-              cpus_per_vec--;
++              cpus_per_grp--;
+               /* If the cpu has siblings, use them first */
+               siblmsk = topology_sibling_cpumask(cpu);
+-              for (sibl = -1; cpus_per_vec > 0; ) {
++              for (sibl = -1; cpus_per_grp > 0; ) {
+                       sibl = cpumask_next(sibl, siblmsk);
+                       if (sibl >= nr_cpu_ids)
+                               break;
+                       if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+                               continue;
+                       cpumask_set_cpu(sibl, irqmsk);
+-                      cpus_per_vec--;
++                      cpus_per_grp--;
+               }
+       }
+ }
+@@ -95,48 +95,48 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
+       return nodes;
+ }
+-struct node_vectors {
++struct node_groups {
+       unsigned id;
+       union {
+-              unsigned nvectors;
++              unsigned ngroups;
+               unsigned ncpus;
+       };
+ };
+ static int ncpus_cmp_func(const void *l, const void *r)
+ {
+-      const struct node_vectors *ln = l;
+-      const struct node_vectors *rn = r;
++      const struct node_groups *ln = l;
++      const struct node_groups *rn = r;
+       return ln->ncpus - rn->ncpus;
+ }
+ /*
+- * Allocate vector number for each node, so that for each node:
++ * Allocate group number for each node, so that for each node:
+  *
+  * 1) the allocated number is >= 1
+  *
+- * 2) the allocated numbver is <= active CPU number of this node
++ * 2) the allocated number is <= active CPU number of this node
+  *
+- * The actual allocated total vectors may be less than @numvecs when
+- * active total CPU number is less than @numvecs.
++ * The actual allocated total groups may be less than @numgrps when
++ * active total CPU number is less than @numgrps.
+  *
+  * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
+  * for each node.
+  */
+-static void alloc_nodes_vectors(unsigned int numvecs,
+-                              cpumask_var_t *node_to_cpumask,
+-                              const struct cpumask *cpu_mask,
+-                              const nodemask_t nodemsk,
+-                              struct cpumask *nmsk,
+-                              struct node_vectors *node_vectors)
++static void alloc_nodes_groups(unsigned int numgrps,
++                             cpumask_var_t *node_to_cpumask,
++                             const struct cpumask *cpu_mask,
++                             const nodemask_t nodemsk,
++                             struct cpumask *nmsk,
++                             struct node_groups *node_groups)
+ {
+       unsigned n, remaining_ncpus = 0;
+       for (n = 0; n < nr_node_ids; n++) {
+-              node_vectors[n].id = n;
+-              node_vectors[n].ncpus = UINT_MAX;
++              node_groups[n].id = n;
++              node_groups[n].ncpus = UINT_MAX;
+       }
+       for_each_node_mask(n, nodemsk) {
+@@ -148,61 +148,61 @@ static void alloc_nodes_vectors(unsigned int numvecs,
+               if (!ncpus)
+                       continue;
+               remaining_ncpus += ncpus;
+-              node_vectors[n].ncpus = ncpus;
++              node_groups[n].ncpus = ncpus;
+       }
+-      numvecs = min_t(unsigned, remaining_ncpus, numvecs);
++      numgrps = min_t(unsigned, remaining_ncpus, numgrps);
+-      sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
++      sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
+            ncpus_cmp_func, NULL);
+       /*
+-       * Allocate vectors for each node according to the ratio of this
+-       * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
++       * Allocate groups for each node according to the ratio of this
++       * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is
+        * bigger than number of active numa nodes. Always start the
+        * allocation from the node with minimized nr_cpus.
+        *
+        * This way guarantees that each active node gets allocated at
+-       * least one vector, and the theory is simple: over-allocation
+-       * is only done when this node is assigned by one vector, so
+-       * other nodes will be allocated >= 1 vector, since 'numvecs' is
++       * least one group, and the theory is simple: over-allocation
++       * is only done when this node is assigned by one group, so
++       * other nodes will be allocated >= 1 groups, since 'numgrps' is
+        * bigger than number of numa nodes.
+        *
+-       * One perfect invariant is that number of allocated vectors for
++       * One perfect invariant is that number of allocated groups for
+        * each node is <= CPU count of this node:
+        *
+        * 1) suppose there are two nodes: A and B
+        *      ncpu(X) is CPU count of node X
+-       *      vecs(X) is the vector count allocated to node X via this
++       *      grps(X) is the group count allocated to node X via this
+        *      algorithm
+        *
+        *      ncpu(A) <= ncpu(B)
+        *      ncpu(A) + ncpu(B) = N
+-       *      vecs(A) + vecs(B) = V
++       *      grps(A) + grps(B) = G
+        *
+-       *      vecs(A) = max(1, round_down(V * ncpu(A) / N))
+-       *      vecs(B) = V - vecs(A)
++       *      grps(A) = max(1, round_down(G * ncpu(A) / N))
++       *      grps(B) = G - grps(A)
+        *
+-       *      both N and V are integer, and 2 <= V <= N, suppose
+-       *      V = N - delta, and 0 <= delta <= N - 2
++       *      both N and G are integer, and 2 <= G <= N, suppose
++       *      G = N - delta, and 0 <= delta <= N - 2
+        *
+-       * 2) obviously vecs(A) <= ncpu(A) because:
++       * 2) obviously grps(A) <= ncpu(A) because:
+        *
+-       *      if vecs(A) is 1, then vecs(A) <= ncpu(A) given
++       *      if grps(A) is 1, then grps(A) <= ncpu(A) given
+        *      ncpu(A) >= 1
+        *
+        *      otherwise,
+-       *              vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
++       *              grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N
+        *
+-       * 3) prove how vecs(B) <= ncpu(B):
++       * 3) prove how grps(B) <= ncpu(B):
+        *
+-       *      if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
+-       *      over-allocated, so vecs(B) <= ncpu(B),
++       *      if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be
++       *      over-allocated, so grps(B) <= ncpu(B),
+        *
+        *      otherwise:
+        *
+-       *      vecs(A) =
+-       *              round_down(V * ncpu(A) / N) =
++       *      grps(A) =
++       *              round_down(G * ncpu(A) / N) =
+        *              round_down((N - delta) * ncpu(A) / N) =
+        *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
+        *              round_down((N * ncpu(A) - delta * N) / N)        =
+@@ -210,52 +210,50 @@ static void alloc_nodes_vectors(unsigned int numvecs,
+        *
+        *      then:
+        *
+-       *      vecs(A) - V >= ncpu(A) - delta - V
++       *      grps(A) - G >= ncpu(A) - delta - G
+        *      =>
+-       *      V - vecs(A) <= V + delta - ncpu(A)
++       *      G - grps(A) <= G + delta - ncpu(A)
+        *      =>
+-       *      vecs(B) <= N - ncpu(A)
++       *      grps(B) <= N - ncpu(A)
+        *      =>
+-       *      vecs(B) <= cpu(B)
++       *      grps(B) <= cpu(B)
+        *
+        * For nodes >= 3, it can be thought as one node and another big
+        * node given that is exactly what this algorithm is implemented,
+-       * and we always re-calculate 'remaining_ncpus' & 'numvecs', and
+-       * finally for each node X: vecs(X) <= ncpu(X).
++       * and we always re-calculate 'remaining_ncpus' & 'numgrps', and
++       * finally for each node X: grps(X) <= ncpu(X).
+        *
+        */
+       for (n = 0; n < nr_node_ids; n++) {
+-              unsigned nvectors, ncpus;
++              unsigned ngroups, ncpus;
+-              if (node_vectors[n].ncpus == UINT_MAX)
++              if (node_groups[n].ncpus == UINT_MAX)
+                       continue;
+-              WARN_ON_ONCE(numvecs == 0);
++              WARN_ON_ONCE(numgrps == 0);
+-              ncpus = node_vectors[n].ncpus;
+-              nvectors = max_t(unsigned, 1,
+-                               numvecs * ncpus / remaining_ncpus);
+-              WARN_ON_ONCE(nvectors > ncpus);
++              ncpus = node_groups[n].ncpus;
++              ngroups = max_t(unsigned, 1,
++                               numgrps * ncpus / remaining_ncpus);
++              WARN_ON_ONCE(ngroups > ncpus);
+-              node_vectors[n].nvectors = nvectors;
++              node_groups[n].ngroups = ngroups;
+               remaining_ncpus -= ncpus;
+-              numvecs -= nvectors;
++              numgrps -= ngroups;
+       }
+ }
+-static int __irq_build_affinity_masks(unsigned int startvec,
+-                                    unsigned int numvecs,
+-                                    cpumask_var_t *node_to_cpumask,
+-                                    const struct cpumask *cpu_mask,
+-                                    struct cpumask *nmsk,
+-                                    struct cpumask *masks)
++static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
++                             cpumask_var_t *node_to_cpumask,
++                             const struct cpumask *cpu_mask,
++                             struct cpumask *nmsk, struct cpumask *masks)
+ {
+-      unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
+-      unsigned int last_affv = numvecs;
+-      unsigned int curvec = startvec;
++      unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
++      unsigned int last_grp = numgrps;
++      unsigned int curgrp = startgrp;
+       nodemask_t nodemsk = NODE_MASK_NONE;
+-      struct node_vectors *node_vectors;
++      struct node_groups *node_groups;
+       if (cpumask_empty(cpu_mask))
+               return 0;
+@@ -264,34 +262,33 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+       /*
+        * If the number of nodes in the mask is greater than or equal the
+-       * number of vectors we just spread the vectors across the nodes.
++       * number of groups we just spread the groups across the nodes.
+        */
+-      if (numvecs <= nodes) {
++      if (numgrps <= nodes) {
+               for_each_node_mask(n, nodemsk) {
+                       /* Ensure that only CPUs which are in both masks are set */
+                       cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+-                      cpumask_or(&masks[curvec], &masks[curvec], nmsk);
+-                      if (++curvec == last_affv)
+-                              curvec = 0;
++                      cpumask_or(&masks[curgrp], &masks[curgrp], nmsk);
++                      if (++curgrp == last_grp)
++                              curgrp = 0;
+               }
+-              return numvecs;
++              return numgrps;
+       }
+-      node_vectors = kcalloc(nr_node_ids,
+-                             sizeof(struct node_vectors),
++      node_groups = kcalloc(nr_node_ids,
++                             sizeof(struct node_groups),
+                              GFP_KERNEL);
+-      if (!node_vectors)
++      if (!node_groups)
+               return -ENOMEM;
+-      /* allocate vector number for each node */
+-      alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
+-                          nodemsk, nmsk, node_vectors);
+-
++      /* allocate group number for each node */
++      alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
++                         nodemsk, nmsk, node_groups);
+       for (i = 0; i < nr_node_ids; i++) {
+               unsigned int ncpus, v;
+-              struct node_vectors *nv = &node_vectors[i];
++              struct node_groups *nv = &node_groups[i];
+-              if (nv->nvectors == UINT_MAX)
++              if (nv->ngroups == UINT_MAX)
+                       continue;
+               /* Get the cpus on this node which are in the mask */
+@@ -300,44 +297,47 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+               if (!ncpus)
+                       continue;
+-              WARN_ON_ONCE(nv->nvectors > ncpus);
++              WARN_ON_ONCE(nv->ngroups > ncpus);
+               /* Account for rounding errors */
+-              extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
++              extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
+-              /* Spread allocated vectors on CPUs of the current node */
+-              for (v = 0; v < nv->nvectors; v++, curvec++) {
+-                      cpus_per_vec = ncpus / nv->nvectors;
++              /* Spread allocated groups on CPUs of the current node */
++              for (v = 0; v < nv->ngroups; v++, curgrp++) {
++                      cpus_per_grp = ncpus / nv->ngroups;
+-                      /* Account for extra vectors to compensate rounding errors */
+-                      if (extra_vecs) {
+-                              cpus_per_vec++;
+-                              --extra_vecs;
++                      /* Account for extra groups to compensate rounding errors */
++                      if (extra_grps) {
++                              cpus_per_grp++;
++                              --extra_grps;
+                       }
+                       /*
+-                       * wrapping has to be considered given 'startvec'
++                       * wrapping has to be considered given 'startgrp'
+                        * may start anywhere
+                        */
+-                      if (curvec >= last_affv)
+-                              curvec = 0;
+-                      irq_spread_init_one(&masks[curvec], nmsk,
+-                                              cpus_per_vec);
++                      if (curgrp >= last_grp)
++                              curgrp = 0;
++                      grp_spread_init_one(&masks[curgrp], nmsk,
++                                              cpus_per_grp);
+               }
+-              done += nv->nvectors;
++              done += nv->ngroups;
+       }
+-      kfree(node_vectors);
++      kfree(node_groups);
+       return done;
+ }
+ /*
+- * build affinity in two stages:
+- *    1) spread present CPU on these vectors
+- *    2) spread other possible CPUs on these vectors
++ * build affinity in two stages for each group, and try to put close CPUs
++ * in viewpoint of CPU and NUMA locality into same group, and we run
++ * two-stage grouping:
++ *
++ *    1) allocate present CPUs on these groups evenly first
++ *    2) allocate other possible CPUs on these groups evenly
+  */
+-static struct cpumask *irq_build_affinity_masks(unsigned int numvecs)
++static struct cpumask *group_cpus_evenly(unsigned int numgrps)
+ {
+-      unsigned int curvec = 0, nr_present = 0, nr_others = 0;
++      unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
+       cpumask_var_t *node_to_cpumask;
+       cpumask_var_t nmsk, npresmsk;
+       int ret = -ENOMEM;
+@@ -353,7 +353,7 @@ static struct cpumask *irq_build_affinity_masks(unsigned int numvecs)
+       if (!node_to_cpumask)
+               goto fail_npresmsk;
+-      masks = kcalloc(numvecs, sizeof(*masks), GFP_KERNEL);
++      masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
+       if (!masks)
+               goto fail_node_to_cpumask;
+@@ -361,26 +361,26 @@ static struct cpumask *irq_build_affinity_masks(unsigned int numvecs)
+       cpus_read_lock();
+       build_node_to_cpumask(node_to_cpumask);
+-      /* Spread on present CPUs starting from affd->pre_vectors */
+-      ret = __irq_build_affinity_masks(curvec, numvecs, node_to_cpumask,
+-                                       cpu_present_mask, nmsk, masks);
++      /* grouping present CPUs first */
++      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
++                                cpu_present_mask, nmsk, masks);
+       if (ret < 0)
+               goto fail_build_affinity;
+       nr_present = ret;
+       /*
+-       * Spread on non present CPUs starting from the next vector to be
+-       * handled. If the spreading of present CPUs already exhausted the
+-       * vector space, assign the non present CPUs to the already spread
+-       * out vectors.
++       * Allocate non present CPUs starting from the next group to be
++       * handled. If the grouping of present CPUs already exhausted the
++       * group space, assign the non present CPUs to the already
++       * allocated out groups.
+        */
+-      if (nr_present >= numvecs)
+-              curvec = 0;
++      if (nr_present >= numgrps)
++              curgrp = 0;
+       else
+-              curvec = nr_present;
++              curgrp = nr_present;
+       cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+-      ret = __irq_build_affinity_masks(curvec, numvecs, node_to_cpumask,
+-                                       npresmsk, nmsk, masks);
++      ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
++                                npresmsk, nmsk, masks);
+       if (ret >= 0)
+               nr_others = ret;
+@@ -388,7 +388,7 @@ static struct cpumask *irq_build_affinity_masks(unsigned int numvecs)
+       cpus_read_unlock();
+       if (ret >= 0)
+-              WARN_ON(nr_present + nr_others < numvecs);
++              WARN_ON(nr_present + nr_others < numgrps);
+  fail_node_to_cpumask:
+       free_node_to_cpumask(node_to_cpumask);
+@@ -467,7 +467,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+       for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+               unsigned int this_vecs = affd->set_size[i];
+               int j;
+-              struct cpumask *result = irq_build_affinity_masks(this_vecs);
++              struct cpumask *result = group_cpus_evenly(this_vecs);
+               if (!result) {
+                       kfree(masks);
+-- 
+2.43.0
+
diff --git a/queue-6.1/i40e-fix-filter-input-checks-to-prevent-config-with-.patch b/queue-6.1/i40e-fix-filter-input-checks-to-prevent-config-with-.patch
new file mode 100644 (file)
index 0000000..7c1ad18
--- /dev/null
@@ -0,0 +1,53 @@
+From 0ddfc8bc46129c7a83ae4cf6d0cc4063fbfc2355 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Nov 2023 11:23:11 +0100
+Subject: i40e: Fix filter input checks to prevent config with invalid values
+
+From: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
+
+[ Upstream commit 3e48041d9820c17e0a51599d12e66c6e12a8d08d ]
+
+Prevent VF from configuring filters with unsupported actions or use
+REDIRECT action with invalid tc number. Current checks could cause
+out of bounds access on PF side.
+
+Fixes: e284fc280473 ("i40e: Add and delete cloud filter")
+Reviewed-by: Andrii Staikov <andrii.staikov@intel.com>
+Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
+Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Bharathi Sreenivas <bharathi.sreenivas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index cb925baf72ce0..3c38129a5224a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -3451,16 +3451,16 @@ static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+       bool found = false;
+       int bkt;
+-      if (!tc_filter->action) {
++      if (tc_filter->action != VIRTCHNL_ACTION_TC_REDIRECT) {
+               dev_info(&pf->pdev->dev,
+-                       "VF %d: Currently ADq doesn't support Drop Action\n",
+-                       vf->vf_id);
++                       "VF %d: ADQ doesn't support this action (%d)\n",
++                       vf->vf_id, tc_filter->action);
+               goto err;
+       }
+       /* action_meta is TC number here to which the filter is applied */
+       if (!tc_filter->action_meta ||
+-          tc_filter->action_meta > I40E_MAX_VF_VSI) {
++          tc_filter->action_meta > vf->num_tc) {
+               dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+                        vf->vf_id, tc_filter->action_meta);
+               goto err;
+-- 
+2.43.0
+
diff --git a/queue-6.1/i40e-fix-use-after-free-in-i40e_aqc_add_filters.patch b/queue-6.1/i40e-fix-use-after-free-in-i40e_aqc_add_filters.patch
new file mode 100644 (file)
index 0000000..c71b68f
--- /dev/null
@@ -0,0 +1,120 @@
+From 20287328081684e38abeda69f42fe548148fc294 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Dec 2023 15:08:50 +0800
+Subject: i40e: fix use-after-free in i40e_aqc_add_filters()
+
+From: Ke Xiao <xiaoke@sangfor.com.cn>
+
+[ Upstream commit 6a15584e99db8918b60e507539c7446375dcf366 ]
+
+Commit 3116f59c12bd ("i40e: fix use-after-free in
+i40e_sync_filters_subtask()") avoided use-after-free issues,
+by increasing refcount during update the VSI filter list to
+the HW. However, it missed the unicast situation.
+
+When deleting an unicast FDB entry, the i40e driver will release
+the mac_filter, and i40e_service_task will concurrently request
+firmware to add the mac_filter, which will lead to the following
+use-after-free issue.
+
+Fix again for both netdev->uc and netdev->mc.
+
+BUG: KASAN: use-after-free in i40e_aqc_add_filters+0x55c/0x5b0 [i40e]
+Read of size 2 at addr ffff888eb3452d60 by task kworker/8:7/6379
+
+CPU: 8 PID: 6379 Comm: kworker/8:7 Kdump: loaded Tainted: G
+Workqueue: i40e i40e_service_task [i40e]
+Call Trace:
+ dump_stack+0x71/0xab
+ print_address_description+0x6b/0x290
+ kasan_report+0x14a/0x2b0
+ i40e_aqc_add_filters+0x55c/0x5b0 [i40e]
+ i40e_sync_vsi_filters+0x1676/0x39c0 [i40e]
+ i40e_service_task+0x1397/0x2bb0 [i40e]
+ process_one_work+0x56a/0x11f0
+ worker_thread+0x8f/0xf40
+ kthread+0x2a0/0x390
+ ret_from_fork+0x1f/0x40
+
+Allocated by task 21948:
+ kasan_kmalloc+0xa6/0xd0
+ kmem_cache_alloc_trace+0xdb/0x1c0
+ i40e_add_filter+0x11e/0x520 [i40e]
+ i40e_addr_sync+0x37/0x60 [i40e]
+ __hw_addr_sync_dev+0x1f5/0x2f0
+ i40e_set_rx_mode+0x61/0x1e0 [i40e]
+ dev_uc_add_excl+0x137/0x190
+ i40e_ndo_fdb_add+0x161/0x260 [i40e]
+ rtnl_fdb_add+0x567/0x950
+ rtnetlink_rcv_msg+0x5db/0x880
+ netlink_rcv_skb+0x254/0x380
+ netlink_unicast+0x454/0x610
+ netlink_sendmsg+0x747/0xb00
+ sock_sendmsg+0xe2/0x120
+ __sys_sendto+0x1ae/0x290
+ __x64_sys_sendto+0xdd/0x1b0
+ do_syscall_64+0xa0/0x370
+ entry_SYSCALL_64_after_hwframe+0x65/0xca
+
+Freed by task 21948:
+ __kasan_slab_free+0x137/0x190
+ kfree+0x8b/0x1b0
+ __i40e_del_filter+0x116/0x1e0 [i40e]
+ i40e_del_mac_filter+0x16c/0x300 [i40e]
+ i40e_addr_unsync+0x134/0x1b0 [i40e]
+ __hw_addr_sync_dev+0xff/0x2f0
+ i40e_set_rx_mode+0x61/0x1e0 [i40e]
+ dev_uc_del+0x77/0x90
+ rtnl_fdb_del+0x6a5/0x860
+ rtnetlink_rcv_msg+0x5db/0x880
+ netlink_rcv_skb+0x254/0x380
+ netlink_unicast+0x454/0x610
+ netlink_sendmsg+0x747/0xb00
+ sock_sendmsg+0xe2/0x120
+ __sys_sendto+0x1ae/0x290
+ __x64_sys_sendto+0xdd/0x1b0
+ do_syscall_64+0xa0/0x370
+ entry_SYSCALL_64_after_hwframe+0x65/0xca
+
+Fixes: 3116f59c12bd ("i40e: fix use-after-free in i40e_sync_filters_subtask()")
+Fixes: 41c445ff0f48 ("i40e: main driver core")
+Signed-off-by: Ke Xiao <xiaoke@sangfor.com.cn>
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Di Zhu <zhudi2@huawei.com>
+Reviewed-by: Jan Sokolowski <jan.sokolowski@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index b4157ff370a31..cdc68b78bd9ea 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -104,12 +104,18 @@ static struct workqueue_struct *i40e_wq;
+ static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
+                                 struct net_device *netdev, int delta)
+ {
++      struct netdev_hw_addr_list *ha_list;
+       struct netdev_hw_addr *ha;
+       if (!f || !netdev)
+               return;
+-      netdev_for_each_mc_addr(ha, netdev) {
++      if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr))
++              ha_list = &netdev->uc;
++      else
++              ha_list = &netdev->mc;
++
++      netdev_hw_addr_list_for_each(ha, ha_list) {
+               if (ether_addr_equal(ha->addr, f->macaddr)) {
+                       ha->refcount += delta;
+                       if (ha->refcount <= 0)
+-- 
+2.43.0
+
diff --git a/queue-6.1/i40e-restore-vf-msi-x-state-during-pci-reset.patch b/queue-6.1/i40e-restore-vf-msi-x-state-during-pci-reset.patch
new file mode 100644 (file)
index 0000000..3bf1bef
--- /dev/null
@@ -0,0 +1,104 @@
+From 7b1f4a98a68f67ebaea752502865a2679eea1b6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Dec 2023 14:27:35 +0100
+Subject: i40e: Restore VF MSI-X state during PCI reset
+
+From: Andrii Staikov <andrii.staikov@intel.com>
+
+[ Upstream commit 371e576ff3e8580d91d49026e5d5faebf5565558 ]
+
+During a PCI FLR the MSI-X Enable flag in the VF PCI MSI-X capability
+register will be cleared. This can lead to issues when a VF is
+assigned to a VM because in these cases the VF driver receives no
+indication of the PF PCI error/reset and additionally it is incapable
+of restoring the cleared flag in the hypervisor configuration space
+without fully reinitializing the driver interrupt functionality.
+
+Since the VF driver is unable to easily resolve this condition on its own,
+restore the VF MSI-X flag during the PF PCI reset handling.
+
+Fixes: 19b7960b2da1 ("i40e: implement split PCI error reset handler")
+Co-developed-by: Karen Ostrowska <karen.ostrowska@intel.com>
+Signed-off-by: Karen Ostrowska <karen.ostrowska@intel.com>
+Co-developed-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Andrii Staikov <andrii.staikov@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c   |  3 +++
+ .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 26 +++++++++++++++++++
+ .../ethernet/intel/i40e/i40e_virtchnl_pf.h    |  3 +++
+ 3 files changed, 32 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index cdc68b78bd9ea..63d43ef86f9b9 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -16450,6 +16450,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+               return;
+       i40e_reset_and_rebuild(pf, false, false);
++#ifdef CONFIG_PCI_IOV
++      i40e_restore_all_vfs_msi_state(pdev);
++#endif /* CONFIG_PCI_IOV */
+ }
+ /**
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index 3c38129a5224a..c7d761426d6ce 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -152,6 +152,32 @@ void i40e_vc_notify_reset(struct i40e_pf *pf)
+                            (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+ }
++#ifdef CONFIG_PCI_IOV
++void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev)
++{
++      u16 vf_id;
++      u16 pos;
++
++      /* Continue only if this is a PF */
++      if (!pdev->is_physfn)
++              return;
++
++      if (!pci_num_vf(pdev))
++              return;
++
++      pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
++      if (pos) {
++              struct pci_dev *vf_dev = NULL;
++
++              pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
++              while ((vf_dev = pci_get_device(pdev->vendor, vf_id, vf_dev))) {
++                      if (vf_dev->is_virtfn && vf_dev->physfn == pdev)
++                              pci_restore_msi_state(vf_dev);
++              }
++      }
++}
++#endif /* CONFIG_PCI_IOV */
++
+ /**
+  * i40e_vc_notify_vf_reset
+  * @vf: pointer to the VF structure
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+index 358bbdb587951..bd497cc5303a1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+@@ -135,6 +135,9 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
+ void i40e_vc_notify_link_state(struct i40e_pf *pf);
+ void i40e_vc_notify_reset(struct i40e_pf *pf);
++#ifdef CONFIG_PCI_IOV
++void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev);
++#endif /* CONFIG_PCI_IOV */
+ int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+                     struct ifla_vf_stats *vf_stats);
+-- 
+2.43.0
+
diff --git a/queue-6.1/ice-fix-link_down_on_close-message.patch b/queue-6.1/ice-fix-link_down_on_close-message.patch
new file mode 100644 (file)
index 0000000..e9926e9
--- /dev/null
@@ -0,0 +1,55 @@
+From 2dd7c71e40d1a2ab164d9905c6bf8e507590d539 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Dec 2023 12:01:56 +0100
+Subject: ice: Fix link_down_on_close message
+
+From: Katarzyna Wieczerzycka <katarzyna.wieczerzycka@intel.com>
+
+[ Upstream commit 6a8d8bb55e7001de2d50920381cc858f3a3e9fb7 ]
+
+The driver should not report an error message when for a medialess port
+the link_down_on_close flag is enabled and the physical link cannot be
+set down.
+
+Fixes: 8ac7132704f3 ("ice: Fix interface being down after reset with link-down-on-close flag on")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Katarzyna Wieczerzycka <katarzyna.wieczerzycka@intel.com>
+Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index f0f39364819ac..5eb3b80b293c0 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2138,7 +2138,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
+       /* Ensure we have media as we cannot configure a medialess port */
+       if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+-              return -EPERM;
++              return -ENOMEDIUM;
+       ice_print_topo_conflict(vsi);
+@@ -9065,8 +9065,12 @@ int ice_stop(struct net_device *netdev)
+               int link_err = ice_force_phys_link_state(vsi, false);
+               if (link_err) {
+-                      netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+-                                 vsi->vsi_num, link_err);
++                      if (link_err == -ENOMEDIUM)
++                              netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n",
++                                          vsi->vsi_num);
++                      else
++                              netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
++                                         vsi->vsi_num, link_err);
+                       return -EIO;
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/ice-shut-down-vsi-with-link-down-on-close-enabled.patch b/queue-6.1/ice-shut-down-vsi-with-link-down-on-close-enabled.patch
new file mode 100644 (file)
index 0000000..b95ffe0
--- /dev/null
@@ -0,0 +1,40 @@
+From f1ef60049882de4af95c17ef50adb9017ecbaa09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Dec 2023 12:01:57 +0100
+Subject: ice: Shut down VSI with "link-down-on-close" enabled
+
+From: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
+
+[ Upstream commit 6d05ff55ef4f4954d28551236239f297bd52ea48 ]
+
+Disabling netdev with ethtool private flag "link-down-on-close" enabled
+can cause NULL pointer dereference bug. Shut down VSI regardless of
+"link-down-on-close" state.
+
+Fixes: 8ac7132704f3 ("ice: Fix interface being down after reset with link-down-on-close flag on")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
+Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 5eb3b80b293c0..ab46cfca4028d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -9071,6 +9071,8 @@ int ice_stop(struct net_device *netdev)
+                       else
+                               netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+                                          vsi->vsi_num, link_err);
++
++                      ice_vsi_close(vsi);
+                       return -EIO;
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-check-vlan-ethertype-mask.patch b/queue-6.1/igc-check-vlan-ethertype-mask.patch
new file mode 100644 (file)
index 0000000..a79d1ce
--- /dev/null
@@ -0,0 +1,72 @@
+From e09a381b3b1d4fa9bd86c9d51bbd7c9766cc671a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Dec 2023 15:07:18 +0100
+Subject: igc: Check VLAN EtherType mask
+
+From: Kurt Kanzenbach <kurt@linutronix.de>
+
+[ Upstream commit 7afd49a38e73afd57ff62c8d1cf5af760c4d49c0 ]
+
+Currently the driver accepts VLAN EtherType steering rules regardless of
+the configured mask. And things might fail silently or with confusing error
+messages to the user. The VLAN EtherType can only be matched by full
+mask. Therefore, add a check for that.
+
+For instance the following rule is invalid, but the driver accepts it and
+ignores the user specified mask:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 \
+|             m 0x00ff action 0
+|Added rule with ID 63
+|root@host:~# ethtool --show-ntuple enp3s0
+|4 RX rings available
+|Total 1 rules
+|
+|Filter: 63
+|        Flow Type: Raw Ethernet
+|        Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Ethertype: 0x0 mask: 0xFFFF
+|        VLAN EtherType: 0x8100 mask: 0x0
+|        VLAN: 0x0 mask: 0xffff
+|        User-defined: 0x0 mask: 0xffffffffffffffff
+|        Action: Direct to queue 0
+
+After:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 \
+|             m 0x00ff action 0
+|rmgr: Cannot insert RX class rule: Operation not supported
+
+Fixes: 2b477d057e33 ("igc: Integrate flex filter into ethtool ops")
+Suggested-by: Suman Ghosh <sumang@marvell.com>
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ethtool.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index e146357d61a8a..2bee9cace5983 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -1356,6 +1356,14 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
+               return -EOPNOTSUPP;
+       }
++      /* VLAN EtherType can only be matched by full mask. */
++      if ((fsp->flow_type & FLOW_EXT) &&
++          fsp->m_ext.vlan_etype &&
++          fsp->m_ext.vlan_etype != ETHER_TYPE_FULL_MASK) {
++              netdev_dbg(netdev, "VLAN EtherType mask not supported\n");
++              return -EOPNOTSUPP;
++      }
++
+       if (fsp->location >= IGC_MAX_RXNFC_RULES) {
+               netdev_dbg(netdev, "Invalid location\n");
+               return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-check-vlan-tci-mask.patch b/queue-6.1/igc-check-vlan-tci-mask.patch
new file mode 100644 (file)
index 0000000..164e81e
--- /dev/null
@@ -0,0 +1,141 @@
+From ab151d4a86bceafa58b773d11dd768f176a291af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Dec 2023 08:50:43 +0100
+Subject: igc: Check VLAN TCI mask
+
+From: Kurt Kanzenbach <kurt@linutronix.de>
+
+[ Upstream commit b5063cbe148b829e8eb97672c2cbccc058835476 ]
+
+Currently the driver accepts VLAN TCI steering rules regardless of the
+configured mask. And things might fail silently or with confusing error
+messages to the user.
+
+There are two ways to handle the VLAN TCI mask:
+
+ 1. Match on the PCP field using a VLAN prio filter
+ 2. Match on complete TCI field using a flex filter
+
+Therefore, add checks and code for that.
+
+For instance the following rule is invalid and will be converted into a
+VLAN prio rule which is not correct:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan 0x0001 m 0xf000 \
+|             action 1
+|Added rule with ID 61
+|root@host:~# ethtool --show-ntuple enp3s0
+|4 RX rings available
+|Total 1 rules
+|
+|Filter: 61
+|        Flow Type: Raw Ethernet
+|        Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Ethertype: 0x0 mask: 0xFFFF
+|        VLAN EtherType: 0x0 mask: 0xffff
+|        VLAN: 0x1 mask: 0x1fff
+|        User-defined: 0x0 mask: 0xffffffffffffffff
+|        Action: Direct to queue 1
+
+After:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan 0x0001 m 0xf000 \
+|             action 1
+|rmgr: Cannot insert RX class rule: Operation not supported
+
+Fixes: 7991487ecb2d ("igc: Allow for Flex Filters to be installed")
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h         |  1 +
+ drivers/net/ethernet/intel/igc/igc_ethtool.c | 28 +++++++++++++++++---
+ 2 files changed, 26 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 43c05b41627f7..2a894ca49d93b 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -538,6 +538,7 @@ struct igc_nfc_filter {
+       u16 etype;
+       __be16 vlan_etype;
+       u16 vlan_tci;
++      u16 vlan_tci_mask;
+       u8 src_addr[ETH_ALEN];
+       u8 dst_addr[ETH_ALEN];
+       u8 user_data[8];
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index 51ef18060dbc4..e146357d61a8a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -957,6 +957,7 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev,
+ }
+ #define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
++#define VLAN_TCI_FULL_MASK ((__force __be16)~0)
+ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
+                                   struct ethtool_rxnfc *cmd)
+ {
+@@ -988,7 +989,7 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+               fsp->flow_type |= FLOW_EXT;
+               fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci);
+-              fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
++              fsp->m_ext.vlan_tci = htons(rule->filter.vlan_tci_mask);
+       }
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+@@ -1223,6 +1224,7 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
+       if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+               rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
++              rule->filter.vlan_tci_mask = ntohs(fsp->m_ext.vlan_tci);
+               rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
+       }
+@@ -1260,11 +1262,19 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
+               memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data));
+       }
+-      /* When multiple filter options or user data or vlan etype is set, use a
+-       * flex filter.
++      /* The i225/i226 has various different filters. Flex filters provide a
++       * way to match up to the first 128 bytes of a packet. Use them for:
++       *   a) For specific user data
++       *   b) For VLAN EtherType
++       *   c) For full TCI match
++       *   d) Or in case multiple filter criteria are set
++       *
++       * Otherwise, use the simple MAC, VLAN PRIO or EtherType filters.
+        */
+       if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) ||
+           (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) ||
++          ((rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) &&
++           rule->filter.vlan_tci_mask == ntohs(VLAN_TCI_FULL_MASK)) ||
+           (rule->filter.match_flags & (rule->filter.match_flags - 1)))
+               rule->flex = true;
+       else
+@@ -1334,6 +1344,18 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
+               return -EINVAL;
+       }
++      /* There are two ways to match the VLAN TCI:
++       *  1. Match on PCP field and use vlan prio filter for it
++       *  2. Match on complete TCI field and use flex filter for it
++       */
++      if ((fsp->flow_type & FLOW_EXT) &&
++          fsp->m_ext.vlan_tci &&
++          fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK) &&
++          fsp->m_ext.vlan_tci != VLAN_TCI_FULL_MASK) {
++              netdev_dbg(netdev, "VLAN mask not supported\n");
++              return -EOPNOTSUPP;
++      }
++
+       if (fsp->location >= IGC_MAX_RXNFC_RULES) {
+               netdev_dbg(netdev, "Invalid location\n");
+               return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-fix-hicredit-calculation.patch b/queue-6.1/igc-fix-hicredit-calculation.patch
new file mode 100644 (file)
index 0000000..cc61a83
--- /dev/null
@@ -0,0 +1,45 @@
+From 4b3b14b400fefd4fa7447adb596675bb2e8637e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Dec 2023 15:58:16 +0100
+Subject: igc: Fix hicredit calculation
+
+From: Rodrigo Cataldo <rodrigo.cadore@l-acoustics.com>
+
+[ Upstream commit 947dfc8138dfaeb6e966e2d661de89eb203e3064 ]
+
+According to the Intel Software Manual for I225, Section 7.5.2.7,
+hicredit should be multiplied by the constant link-rate value, 0x7736.
+
+Currently, the old constant link-rate value, 0x7735, from the boards
+supported on igb are being used, most likely due to a copy'n'paste, as
+the rest of the logic is the same for both drivers.
+
+Update hicredit accordingly.
+
+Fixes: 1ab011b0bf07 ("igc: Add support for CBS offloading")
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Signed-off-by: Rodrigo Cataldo <rodrigo.cadore@l-acoustics.com>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 725db36e399d2..31ea0781b65ec 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -178,7 +178,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+                       wr32(IGC_TQAVCC(i), tqavcc);
+                       wr32(IGC_TQAVHC(i),
+-                           0x80000000 + ring->hicredit * 0x7735);
++                           0x80000000 + ring->hicredit * 0x7736);
+               } else {
+                       /* Disable any CBS for the queue */
+                       txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK);
+-- 
+2.43.0
+
diff --git a/queue-6.1/igc-report-vlan-ethertype-matching-back-to-user.patch b/queue-6.1/igc-report-vlan-ethertype-matching-back-to-user.patch
new file mode 100644 (file)
index 0000000..87852f6
--- /dev/null
@@ -0,0 +1,75 @@
+From 0d687ebbf03e0fea5331b2481ed7bc3e89afd878 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Dec 2023 08:50:42 +0100
+Subject: igc: Report VLAN EtherType matching back to user
+
+From: Kurt Kanzenbach <kurt@linutronix.de>
+
+[ Upstream commit 088464abd48cf3735aee91f9e211b32da9d81117 ]
+
+Currently the driver allows to configure matching by VLAN EtherType.
+However, the retrieval function does not report it back to the user. Add
+it.
+
+Before:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 action 0
+|Added rule with ID 63
+|root@host:~# ethtool --show-ntuple enp3s0
+|4 RX rings available
+|Total 1 rules
+|
+|Filter: 63
+|        Flow Type: Raw Ethernet
+|        Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Ethertype: 0x0 mask: 0xFFFF
+|        Action: Direct to queue 0
+
+After:
+|root@host:~# ethtool -N enp3s0 flow-type ether vlan-etype 0x8100 action 0
+|Added rule with ID 63
+|root@host:~# ethtool --show-ntuple enp3s0
+|4 RX rings available
+|Total 1 rules
+|
+|Filter: 63
+|        Flow Type: Raw Ethernet
+|        Src MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Dest MAC addr: 00:00:00:00:00:00 mask: FF:FF:FF:FF:FF:FF
+|        Ethertype: 0x0 mask: 0xFFFF
+|        VLAN EtherType: 0x8100 mask: 0x0
+|        VLAN: 0x0 mask: 0xffff
+|        User-defined: 0x0 mask: 0xffffffffffffffff
+|        Action: Direct to queue 0
+
+Fixes: 2b477d057e33 ("igc: Integrate flex filter into ethtool ops")
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ethtool.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index 81897f7a90a91..51ef18060dbc4 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -979,6 +979,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
+               fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+       }
++      if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) {
++              fsp->flow_type |= FLOW_EXT;
++              fsp->h_ext.vlan_etype = rule->filter.vlan_etype;
++              fsp->m_ext.vlan_etype = ETHER_TYPE_FULL_MASK;
++      }
++
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+               fsp->flow_type |= FLOW_EXT;
+               fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci);
+-- 
+2.43.0
+
diff --git a/queue-6.1/ipv4-ipv6-use-splice_eof-to-flush.patch b/queue-6.1/ipv4-ipv6-use-splice_eof-to-flush.patch
new file mode 100644 (file)
index 0000000..fe1c466
--- /dev/null
@@ -0,0 +1,262 @@
+From 5611af5949dfd630156868ccdfe55a978083caf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Jun 2023 19:19:13 +0100
+Subject: ipv4, ipv6: Use splice_eof() to flush
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 1d7e4538a5463faa0b0e26a7a7b6bd68c7dfdd78 ]
+
+Allow splice to undo the effects of MSG_MORE after prematurely ending a
+splice/sendfile due to getting an EOF condition (->splice_read() returned
+0) after splice had called sendmsg() with MSG_MORE set when the user didn't
+set MSG_MORE.
+
+For UDP, a pending packet will not be emitted if the socket is closed
+before it is flushed; with this change, it be flushed by ->splice_eof().
+
+For TCP, it's not clear that MSG_MORE is actually effective.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Kuniyuki Iwashima <kuniyu@amazon.com>
+cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+cc: David Ahern <dsahern@kernel.org>
+cc: Jens Axboe <axboe@kernel.dk>
+cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: a0002127cd74 ("udp: move udp->no_check6_tx to udp->udp_flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_common.h |  1 +
+ include/net/tcp.h         |  1 +
+ include/net/udp.h         |  1 +
+ net/ipv4/af_inet.c        | 18 ++++++++++++++++++
+ net/ipv4/tcp.c            | 16 ++++++++++++++++
+ net/ipv4/tcp_ipv4.c       |  1 +
+ net/ipv4/udp.c            | 16 ++++++++++++++++
+ net/ipv6/af_inet6.c       |  1 +
+ net/ipv6/tcp_ipv6.c       |  1 +
+ net/ipv6/udp.c            | 15 +++++++++++++++
+ 10 files changed, 71 insertions(+)
+
+diff --git a/include/net/inet_common.h b/include/net/inet_common.h
+index cec453c18f1d6..4673bbfd2811f 100644
+--- a/include/net/inet_common.h
++++ b/include/net/inet_common.h
+@@ -33,6 +33,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+               bool kern);
+ int inet_send_prepare(struct sock *sk);
+ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
++void inet_splice_eof(struct socket *sock);
+ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+                     size_t size, int flags);
+ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index c3d56b337f358..4c838f7290dd9 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -332,6 +332,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
+ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
+                        size_t size, struct ubuf_info *uarg);
++void tcp_splice_eof(struct socket *sock);
+ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
+                int flags);
+ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+diff --git a/include/net/udp.h b/include/net/udp.h
+index fee053bcd17c6..fa4cdbe55552c 100644
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -269,6 +269,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
+ int udp_err(struct sk_buff *, u32);
+ int udp_abort(struct sock *sk, int err);
+ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
++void udp_splice_eof(struct socket *sock);
+ int udp_push_pending_frames(struct sock *sk);
+ void udp_flush_pending_frames(struct sock *sk);
+ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 5d379df90c826..347c3768df6e8 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -838,6 +838,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ }
+ EXPORT_SYMBOL(inet_sendmsg);
++void inet_splice_eof(struct socket *sock)
++{
++      const struct proto *prot;
++      struct sock *sk = sock->sk;
++
++      if (unlikely(inet_send_prepare(sk)))
++              return;
++
++      /* IPV6_ADDRFORM can change sk->sk_prot under us. */
++      prot = READ_ONCE(sk->sk_prot);
++      if (prot->splice_eof)
++              prot->splice_eof(sock);
++}
++EXPORT_SYMBOL_GPL(inet_splice_eof);
++
+ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+                     size_t size, int flags)
+ {
+@@ -1057,6 +1072,7 @@ const struct proto_ops inet_stream_ops = {
+ #ifdef CONFIG_MMU
+       .mmap              = tcp_mmap,
+ #endif
++      .splice_eof        = inet_splice_eof,
+       .sendpage          = inet_sendpage,
+       .splice_read       = tcp_splice_read,
+       .read_sock         = tcp_read_sock,
+@@ -1091,6 +1107,7 @@ const struct proto_ops inet_dgram_ops = {
+       .read_skb          = udp_read_skb,
+       .recvmsg           = inet_recvmsg,
+       .mmap              = sock_no_mmap,
++      .splice_eof        = inet_splice_eof,
+       .sendpage          = inet_sendpage,
+       .set_peek_off      = sk_set_peek_off,
+ #ifdef CONFIG_COMPAT
+@@ -1122,6 +1139,7 @@ static const struct proto_ops inet_sockraw_ops = {
+       .sendmsg           = inet_sendmsg,
+       .recvmsg           = inet_recvmsg,
+       .mmap              = sock_no_mmap,
++      .splice_eof        = inet_splice_eof,
+       .sendpage          = inet_sendpage,
+ #ifdef CONFIG_COMPAT
+       .compat_ioctl      = inet_compat_ioctl,
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 3935451ad061e..0b7844a8d5711 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1492,6 +1492,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ }
+ EXPORT_SYMBOL(tcp_sendmsg);
++void tcp_splice_eof(struct socket *sock)
++{
++      struct sock *sk = sock->sk;
++      struct tcp_sock *tp = tcp_sk(sk);
++      int mss_now, size_goal;
++
++      if (!tcp_write_queue_tail(sk))
++              return;
++
++      lock_sock(sk);
++      mss_now = tcp_send_mss(sk, &size_goal, 0);
++      tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
++      release_sock(sk);
++}
++EXPORT_SYMBOL_GPL(tcp_splice_eof);
++
+ /*
+  *    Handle reading urgent data. BSD has very simple semantics for
+  *    this, no blocking and very strange errors 8)
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 7ebbbe561e402..be2c807eed15d 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -3067,6 +3067,7 @@ struct proto tcp_prot = {
+       .keepalive              = tcp_set_keepalive,
+       .recvmsg                = tcp_recvmsg,
+       .sendmsg                = tcp_sendmsg,
++      .splice_eof             = tcp_splice_eof,
+       .sendpage               = tcp_sendpage,
+       .backlog_rcv            = tcp_v4_do_rcv,
+       .release_cb             = tcp_release_cb,
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index b49cb3df01bb4..e8dd2880ac9aa 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1332,6 +1332,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ }
+ EXPORT_SYMBOL(udp_sendmsg);
++void udp_splice_eof(struct socket *sock)
++{
++      struct sock *sk = sock->sk;
++      struct udp_sock *up = udp_sk(sk);
++
++      if (!up->pending || READ_ONCE(up->corkflag))
++              return;
++
++      lock_sock(sk);
++      if (up->pending && !READ_ONCE(up->corkflag))
++              udp_push_pending_frames(sk);
++      release_sock(sk);
++}
++EXPORT_SYMBOL_GPL(udp_splice_eof);
++
+ int udp_sendpage(struct sock *sk, struct page *page, int offset,
+                size_t size, int flags)
+ {
+@@ -2907,6 +2922,7 @@ struct proto udp_prot = {
+       .getsockopt             = udp_getsockopt,
+       .sendmsg                = udp_sendmsg,
+       .recvmsg                = udp_recvmsg,
++      .splice_eof             = udp_splice_eof,
+       .sendpage               = udp_sendpage,
+       .release_cb             = ip4_datagram_release_cb,
+       .hash                   = udp_lib_hash,
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index b5309ae87fd79..a2f29ca516000 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -711,6 +711,7 @@ const struct proto_ops inet6_stream_ops = {
+ #ifdef CONFIG_MMU
+       .mmap              = tcp_mmap,
+ #endif
++      .splice_eof        = inet_splice_eof,
+       .sendpage          = inet_sendpage,
+       .sendmsg_locked    = tcp_sendmsg_locked,
+       .sendpage_locked   = tcp_sendpage_locked,
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 7be89dcfd5fc5..ba9a22db5805c 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -2158,6 +2158,7 @@ struct proto tcpv6_prot = {
+       .keepalive              = tcp_set_keepalive,
+       .recvmsg                = tcp_recvmsg,
+       .sendmsg                = tcp_sendmsg,
++      .splice_eof             = tcp_splice_eof,
+       .sendpage               = tcp_sendpage,
+       .backlog_rcv            = tcp_v6_do_rcv,
+       .release_cb             = tcp_release_cb,
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 7f49f69226a21..2a65136dca773 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1657,6 +1657,20 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       goto out;
+ }
++static void udpv6_splice_eof(struct socket *sock)
++{
++      struct sock *sk = sock->sk;
++      struct udp_sock *up = udp_sk(sk);
++
++      if (!up->pending || READ_ONCE(up->corkflag))
++              return;
++
++      lock_sock(sk);
++      if (up->pending && !READ_ONCE(up->corkflag))
++              udp_v6_push_pending_frames(sk);
++      release_sock(sk);
++}
++
+ void udpv6_destroy_sock(struct sock *sk)
+ {
+       struct udp_sock *up = udp_sk(sk);
+@@ -1768,6 +1782,7 @@ struct proto udpv6_prot = {
+       .getsockopt             = udpv6_getsockopt,
+       .sendmsg                = udpv6_sendmsg,
+       .recvmsg                = udpv6_recvmsg,
++      .splice_eof             = udpv6_splice_eof,
+       .release_cb             = ip6_datagram_release_cb,
+       .hash                   = udp_lib_hash,
+       .unhash                 = udp_lib_unhash,
+-- 
+2.43.0
+
diff --git a/queue-6.1/khugepage-replace-try_to_release_page-with-filemap_r.patch b/queue-6.1/khugepage-replace-try_to_release_page-with-filemap_r.patch
new file mode 100644 (file)
index 0000000..bdbdadc
--- /dev/null
@@ -0,0 +1,95 @@
+From 3d1c97b9a2cc1afdd2fa063fb59338e2a8a04818 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 23:30:53 -0800
+Subject: khugepage: replace try_to_release_page() with filemap_release_folio()
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit 64ab3195ea077eaeedc8b382939c3dc5ca56f369 ]
+
+Replace some calls with their folio equivalents.  This change removes 4
+calls to compound_head() and is in preparation for the removal of the
+try_to_release_page() wrapper.
+
+Link: https://lkml.kernel.org/r/20221118073055.55694-3-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index ef72d3df4b65b..6fc7db587c453 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1818,6 +1818,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+       xas_set(&xas, start);
+       for (index = start; index < end; index++) {
+               struct page *page = xas_next(&xas);
++              struct folio *folio;
+               VM_BUG_ON(index != xas.xa_index);
+               if (is_shmem) {
+@@ -1844,8 +1845,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+                       }
+                       if (xa_is_value(page) || !PageUptodate(page)) {
+-                              struct folio *folio;
+-
+                               xas_unlock_irq(&xas);
+                               /* swap in or instantiate fallocated page */
+                               if (shmem_get_folio(mapping->host, index,
+@@ -1933,13 +1932,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+                       goto out_unlock;
+               }
+-              if (page_mapping(page) != mapping) {
++              folio = page_folio(page);
++
++              if (folio_mapping(folio) != mapping) {
+                       result = SCAN_TRUNCATED;
+                       goto out_unlock;
+               }
+-              if (!is_shmem && (PageDirty(page) ||
+-                                PageWriteback(page))) {
++              if (!is_shmem && (folio_test_dirty(folio) ||
++                                folio_test_writeback(folio))) {
+                       /*
+                        * khugepaged only works on read-only fd, so this
+                        * page is dirty because it hasn't been flushed
+@@ -1949,20 +1950,20 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+                       goto out_unlock;
+               }
+-              if (isolate_lru_page(page)) {
++              if (folio_isolate_lru(folio)) {
+                       result = SCAN_DEL_PAGE_LRU;
+                       goto out_unlock;
+               }
+-              if (page_has_private(page) &&
+-                  !try_to_release_page(page, GFP_KERNEL)) {
++              if (folio_has_private(folio) &&
++                  !filemap_release_folio(folio, GFP_KERNEL)) {
+                       result = SCAN_PAGE_HAS_PRIVATE;
+-                      putback_lru_page(page);
++                      folio_putback_lru(folio);
+                       goto out_unlock;
+               }
+-              if (page_mapped(page))
+-                      try_to_unmap(page_folio(page),
++              if (folio_mapped(folio))
++                      try_to_unmap(folio,
+                                       TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
+               xas_lock_irq(&xas);
+-- 
+2.43.0
+
diff --git a/queue-6.1/lib-group_cpus.c-avoid-acquiring-cpu-hotplug-lock-in.patch b/queue-6.1/lib-group_cpus.c-avoid-acquiring-cpu-hotplug-lock-in.patch
new file mode 100644 (file)
index 0000000..02d5271
--- /dev/null
@@ -0,0 +1,102 @@
+From f07953806fd1f09054b8a7c16085bb0faaba9aec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Nov 2023 16:35:59 +0800
+Subject: lib/group_cpus.c: avoid acquiring cpu hotplug lock in
+ group_cpus_evenly
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 0263f92fadbb9d294d5971ac57743f882c93b2b3 ]
+
+group_cpus_evenly() could be part of storage driver's error handler, such
+as nvme driver, when may happen during CPU hotplug, in which storage queue
+has to drain its pending IOs because all CPUs associated with the queue
+are offline and the queue is becoming inactive.  And handling IO needs
+error handler to provide forward progress.
+
+Then deadlock is caused:
+
+1) inside CPU hotplug handler, CPU hotplug lock is held, and blk-mq's
+   handler is waiting for inflight IO
+
+2) error handler is waiting for CPU hotplug lock
+
+3) inflight IO can't be completed in blk-mq's CPU hotplug handler
+   because error handling can't provide forward progress.
+
+Solve the deadlock by not holding CPU hotplug lock in group_cpus_evenly(),
+in which two stage spreads are taken: 1) the 1st stage is over all present
+CPUs; 2) the end stage is over all other CPUs.
+
+Turns out the two stage spread just needs consistent 'cpu_present_mask',
+and remove the CPU hotplug lock by storing it into one local cache.  This
+way doesn't change correctness, because all CPUs are still covered.
+
+Link: https://lkml.kernel.org/r/20231120083559.285174-1-ming.lei@redhat.com
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reported-by: Yi Zhang <yi.zhang@redhat.com>
+Reported-by: Guangwu Zhang <guazhang@redhat.com>
+Tested-by: Guangwu Zhang <guazhang@redhat.com>
+Reviewed-by: Chengming Zhou <zhouchengming@bytedance.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Cc: Keith Busch <kbusch@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/group_cpus.c | 22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/lib/group_cpus.c b/lib/group_cpus.c
+index 99f08c6cb9d97..156b1446d2a20 100644
+--- a/lib/group_cpus.c
++++ b/lib/group_cpus.c
+@@ -365,13 +365,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
+       if (!masks)
+               goto fail_node_to_cpumask;
+-      /* Stabilize the cpumasks */
+-      cpus_read_lock();
+       build_node_to_cpumask(node_to_cpumask);
++      /*
++       * Make a local cache of 'cpu_present_mask', so the two stages
++       * spread can observe consistent 'cpu_present_mask' without holding
++       * cpu hotplug lock, then we can reduce deadlock risk with cpu
++       * hotplug code.
++       *
++       * Here CPU hotplug may happen when reading `cpu_present_mask`, and
++       * we can live with the case because it only affects that hotplug
++       * CPU is handled in the 1st or 2nd stage, and either way is correct
++       * from API user viewpoint since 2-stage spread is sort of
++       * optimization.
++       */
++      cpumask_copy(npresmsk, data_race(cpu_present_mask));
++
+       /* grouping present CPUs first */
+       ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+-                                cpu_present_mask, nmsk, masks);
++                                npresmsk, nmsk, masks);
+       if (ret < 0)
+               goto fail_build_affinity;
+       nr_present = ret;
+@@ -386,15 +398,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
+               curgrp = 0;
+       else
+               curgrp = nr_present;
+-      cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
++      cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk);
+       ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+                                 npresmsk, nmsk, masks);
+       if (ret >= 0)
+               nr_others = ret;
+  fail_build_affinity:
+-      cpus_read_unlock();
+-
+       if (ret >= 0)
+               WARN_ON(nr_present + nr_others < numgrps);
+-- 
+2.43.0
+
diff --git a/queue-6.1/media-camss-sm8250-virtual-channels-for-csid.patch b/queue-6.1/media-camss-sm8250-virtual-channels-for-csid.patch
new file mode 100644 (file)
index 0000000..7b657bf
--- /dev/null
@@ -0,0 +1,307 @@
+From e153f80eac85c4d13fc6aa0c5ddb79469a59ee34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 11:40:34 +0200
+Subject: media: camss: sm8250: Virtual channels for CSID
+
+From: Milen Mitkov <quic_mmitkov@quicinc.com>
+
+[ Upstream commit 3c4ed72a16bc6733cda9c65048af74a2e8eaa0eb ]
+
+CSID hardware on SM8250 can demux up to 4 simultaneous streams
+based on virtual channel (vc) or datatype (dt).
+The CSID subdevice entity now has 4 source ports that can be
+enabled/disabled and thus can control which virtual channels
+are enabled. Datatype demuxing not tested.
+
+In order to keep a valid internal state of the subdevice,
+implicit format propagation from the sink to the source pads
+has been preserved. However, the format on each source pad
+can be different and in that case it must be configured explicitly.
+
+CSID's s_stream is called when any stream is started or stopped.
+It will call configure_streams() that will rewrite IRQ settings to HW.
+When multiple streams are running simultaneously there is an issue
+when writing IRQ settings for one stream while another is still
+running, thus avoid re-writing settings if they were not changed
+in link setup, or by fully powering off the CSID hardware.
+
+Signed-off-by: Milen Mitkov <quic_mmitkov@quicinc.com>
+Reviewed-by: Robert Foss <robert.foss@linaro.org>
+Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+Acked-by: Robert Foss <robert.foss@linaro.org>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Stable-dep-of: e655d1ae9703 ("media: qcom: camss: Fix set CSI2_RX_CFG1_VC_MODE when VC is greater than 3")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../platform/qcom/camss/camss-csid-gen2.c     | 54 ++++++++++++-------
+ .../media/platform/qcom/camss/camss-csid.c    | 44 ++++++++++-----
+ .../media/platform/qcom/camss/camss-csid.h    | 11 +++-
+ 3 files changed, 74 insertions(+), 35 deletions(-)
+
+diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+index 904208f6f9546..2e015e69a6ad6 100644
+--- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c
++++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+@@ -334,13 +334,14 @@ static const struct csid_format csid_formats[] = {
+       },
+ };
+-static void csid_configure_stream(struct csid_device *csid, u8 enable)
++static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc)
+ {
+       struct csid_testgen_config *tg = &csid->testgen;
+       u32 val;
+       u32 phy_sel = 0;
+       u8 lane_cnt = csid->phy.lane_cnt;
+-      struct v4l2_mbus_framefmt *input_format = &csid->fmt[MSM_CSID_PAD_SRC];
++      /* Source pads matching RDI channels on hardware. Pad 1 -> RDI0, Pad 2 -> RDI1, etc. */
++      struct v4l2_mbus_framefmt *input_format = &csid->fmt[MSM_CSID_PAD_FIRST_SRC + vc];
+       const struct csid_format *format = csid_get_fmt_entry(csid->formats, csid->nformats,
+                                                             input_format->code);
+@@ -351,8 +352,7 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+               phy_sel = csid->phy.csiphy_id;
+       if (enable) {
+-              u8 vc = 0; /* Virtual Channel 0 */
+-              u8 dt_id = vc * 4;
++              u8 dt_id = vc;
+               if (tg->enabled) {
+                       /* configure one DT, infinite frames */
+@@ -392,42 +392,42 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+               val |= format->data_type << RDI_CFG0_DATA_TYPE;
+               val |= vc << RDI_CFG0_VIRTUAL_CHANNEL;
+               val |= dt_id << RDI_CFG0_DT_ID;
+-              writel_relaxed(val, csid->base + CSID_RDI_CFG0(0));
++              writel_relaxed(val, csid->base + CSID_RDI_CFG0(vc));
+               /* CSID_TIMESTAMP_STB_POST_IRQ */
+               val = 2 << RDI_CFG1_TIMESTAMP_STB_SEL;
+-              writel_relaxed(val, csid->base + CSID_RDI_CFG1(0));
++              writel_relaxed(val, csid->base + CSID_RDI_CFG1(vc));
+               val = 1;
+-              writel_relaxed(val, csid->base + CSID_RDI_FRM_DROP_PERIOD(0));
++              writel_relaxed(val, csid->base + CSID_RDI_FRM_DROP_PERIOD(vc));
+               val = 0;
+-              writel_relaxed(val, csid->base + CSID_RDI_FRM_DROP_PATTERN(0));
++              writel_relaxed(val, csid->base + CSID_RDI_FRM_DROP_PATTERN(vc));
+               val = 1;
+-              writel_relaxed(val, csid->base + CSID_RDI_IRQ_SUBSAMPLE_PERIOD(0));
++              writel_relaxed(val, csid->base + CSID_RDI_IRQ_SUBSAMPLE_PERIOD(vc));
+               val = 0;
+-              writel_relaxed(val, csid->base + CSID_RDI_IRQ_SUBSAMPLE_PATTERN(0));
++              writel_relaxed(val, csid->base + CSID_RDI_IRQ_SUBSAMPLE_PATTERN(vc));
+               val = 1;
+-              writel_relaxed(val, csid->base + CSID_RDI_RPP_PIX_DROP_PERIOD(0));
++              writel_relaxed(val, csid->base + CSID_RDI_RPP_PIX_DROP_PERIOD(vc));
+               val = 0;
+-              writel_relaxed(val, csid->base + CSID_RDI_RPP_PIX_DROP_PATTERN(0));
++              writel_relaxed(val, csid->base + CSID_RDI_RPP_PIX_DROP_PATTERN(vc));
+               val = 1;
+-              writel_relaxed(val, csid->base + CSID_RDI_RPP_LINE_DROP_PERIOD(0));
++              writel_relaxed(val, csid->base + CSID_RDI_RPP_LINE_DROP_PERIOD(vc));
+               val = 0;
+-              writel_relaxed(val, csid->base + CSID_RDI_RPP_LINE_DROP_PATTERN(0));
++              writel_relaxed(val, csid->base + CSID_RDI_RPP_LINE_DROP_PATTERN(vc));
+               val = 0;
+-              writel_relaxed(val, csid->base + CSID_RDI_CTRL(0));
++              writel_relaxed(val, csid->base + CSID_RDI_CTRL(vc));
+-              val = readl_relaxed(csid->base + CSID_RDI_CFG0(0));
++              val = readl_relaxed(csid->base + CSID_RDI_CFG0(vc));
+               val |=  1 << RDI_CFG0_ENABLE;
+-              writel_relaxed(val, csid->base + CSID_RDI_CFG0(0));
++              writel_relaxed(val, csid->base + CSID_RDI_CFG0(vc));
+       }
+       if (tg->enabled) {
+@@ -453,7 +453,16 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+               val = HALT_CMD_RESUME_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD;
+       else
+               val = HALT_CMD_HALT_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD;
+-      writel_relaxed(val, csid->base + CSID_RDI_CTRL(0));
++      writel_relaxed(val, csid->base + CSID_RDI_CTRL(vc));
++}
++
++static void csid_configure_stream(struct csid_device *csid, u8 enable)
++{
++      u8 i;
++      /* Loop through all enabled VCs and configure stream for each */
++      for (i = 0; i < MSM_CSID_MAX_SRC_STREAMS; i++)
++              if (csid->phy.en_vc & BIT(i))
++                      __csid_configure_stream(csid, enable, i);
+ }
+ static int csid_configure_testgen_pattern(struct csid_device *csid, s32 val)
+@@ -499,6 +508,7 @@ static irqreturn_t csid_isr(int irq, void *dev)
+       struct csid_device *csid = dev;
+       u32 val;
+       u8 reset_done;
++      int i;
+       val = readl_relaxed(csid->base + CSID_TOP_IRQ_STATUS);
+       writel_relaxed(val, csid->base + CSID_TOP_IRQ_CLEAR);
+@@ -507,8 +517,12 @@ static irqreturn_t csid_isr(int irq, void *dev)
+       val = readl_relaxed(csid->base + CSID_CSI2_RX_IRQ_STATUS);
+       writel_relaxed(val, csid->base + CSID_CSI2_RX_IRQ_CLEAR);
+-      val = readl_relaxed(csid->base + CSID_CSI2_RDIN_IRQ_STATUS(0));
+-      writel_relaxed(val, csid->base + CSID_CSI2_RDIN_IRQ_CLEAR(0));
++      /* Read and clear IRQ status for each enabled RDI channel */
++      for (i = 0; i < MSM_CSID_MAX_SRC_STREAMS; i++)
++              if (csid->phy.en_vc & BIT(i)) {
++                      val = readl_relaxed(csid->base + CSID_CSI2_RDIN_IRQ_STATUS(i));
++                      writel_relaxed(val, csid->base + CSID_CSI2_RDIN_IRQ_CLEAR(i));
++              }
+       val = 1 << IRQ_CMD_CLEAR;
+       writel_relaxed(val, csid->base + CSID_IRQ_CMD);
+diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
+index 88f188e0f7501..6360314f04a63 100644
+--- a/drivers/media/platform/qcom/camss/camss-csid.c
++++ b/drivers/media/platform/qcom/camss/camss-csid.c
+@@ -196,6 +196,8 @@ static int csid_set_power(struct v4l2_subdev *sd, int on)
+                       return ret;
+               }
++              csid->phy.need_vc_update = true;
++
+               enable_irq(csid->irq);
+               ret = csid->ops->reset(csid);
+@@ -249,7 +251,10 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable)
+                       return -ENOLINK;
+       }
+-      csid->ops->configure_stream(csid, enable);
++      if (csid->phy.need_vc_update) {
++              csid->ops->configure_stream(csid, enable);
++              csid->phy.need_vc_update = false;
++      }
+       return 0;
+ }
+@@ -460,6 +465,7 @@ static int csid_set_format(struct v4l2_subdev *sd,
+ {
+       struct csid_device *csid = v4l2_get_subdevdata(sd);
+       struct v4l2_mbus_framefmt *format;
++      int i;
+       format = __csid_get_format(csid, sd_state, fmt->pad, fmt->which);
+       if (format == NULL)
+@@ -468,14 +474,14 @@ static int csid_set_format(struct v4l2_subdev *sd,
+       csid_try_format(csid, sd_state, fmt->pad, &fmt->format, fmt->which);
+       *format = fmt->format;
+-      /* Propagate the format from sink to source */
++      /* Propagate the format from sink to source pads */
+       if (fmt->pad == MSM_CSID_PAD_SINK) {
+-              format = __csid_get_format(csid, sd_state, MSM_CSID_PAD_SRC,
+-                                         fmt->which);
++              for (i = MSM_CSID_PAD_FIRST_SRC; i < MSM_CSID_PADS_NUM; ++i) {
++                      format = __csid_get_format(csid, sd_state, i, fmt->which);
+-              *format = fmt->format;
+-              csid_try_format(csid, sd_state, MSM_CSID_PAD_SRC, format,
+-                              fmt->which);
++                      *format = fmt->format;
++                      csid_try_format(csid, sd_state, i, format, fmt->which);
++              }
+       }
+       return 0;
+@@ -738,7 +744,6 @@ static int csid_link_setup(struct media_entity *entity,
+               struct csid_device *csid;
+               struct csiphy_device *csiphy;
+               struct csiphy_lanes_cfg *lane_cfg;
+-              struct v4l2_subdev_format format = { 0 };
+               sd = media_entity_to_v4l2_subdev(entity);
+               csid = v4l2_get_subdevdata(sd);
+@@ -761,11 +766,22 @@ static int csid_link_setup(struct media_entity *entity,
+               lane_cfg = &csiphy->cfg.csi2->lane_cfg;
+               csid->phy.lane_cnt = lane_cfg->num_data;
+               csid->phy.lane_assign = csid_get_lane_assign(lane_cfg);
++      }
++      /* Decide which virtual channels to enable based on which source pads are enabled */
++      if (local->flags & MEDIA_PAD_FL_SOURCE) {
++              struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity);
++              struct csid_device *csid = v4l2_get_subdevdata(sd);
++              struct device *dev = csid->camss->dev;
++
++              if (flags & MEDIA_LNK_FL_ENABLED)
++                      csid->phy.en_vc |= BIT(local->index - 1);
++              else
++                      csid->phy.en_vc &= ~BIT(local->index - 1);
+-              /* Reset format on source pad to sink pad format */
+-              format.pad = MSM_CSID_PAD_SRC;
+-              format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+-              csid_set_format(&csid->subdev, NULL, &format);
++              csid->phy.need_vc_update = true;
++
++              dev_dbg(dev, "%s: Enabled CSID virtual channels mask 0x%x\n",
++                      __func__, csid->phy.en_vc);
+       }
+       return 0;
+@@ -816,6 +832,7 @@ int msm_csid_register_entity(struct csid_device *csid,
+       struct v4l2_subdev *sd = &csid->subdev;
+       struct media_pad *pads = csid->pads;
+       struct device *dev = csid->camss->dev;
++      int i;
+       int ret;
+       v4l2_subdev_init(sd, &csid_v4l2_ops);
+@@ -852,7 +869,8 @@ int msm_csid_register_entity(struct csid_device *csid,
+       }
+       pads[MSM_CSID_PAD_SINK].flags = MEDIA_PAD_FL_SINK;
+-      pads[MSM_CSID_PAD_SRC].flags = MEDIA_PAD_FL_SOURCE;
++      for (i = MSM_CSID_PAD_FIRST_SRC; i < MSM_CSID_PADS_NUM; ++i)
++              pads[i].flags = MEDIA_PAD_FL_SOURCE;
+       sd->entity.function = MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER;
+       sd->entity.ops = &csid_media_ops;
+diff --git a/drivers/media/platform/qcom/camss/camss-csid.h b/drivers/media/platform/qcom/camss/camss-csid.h
+index f06040e44c515..d4b48432a0973 100644
+--- a/drivers/media/platform/qcom/camss/camss-csid.h
++++ b/drivers/media/platform/qcom/camss/camss-csid.h
+@@ -19,8 +19,13 @@
+ #include <media/v4l2-subdev.h>
+ #define MSM_CSID_PAD_SINK 0
+-#define MSM_CSID_PAD_SRC 1
+-#define MSM_CSID_PADS_NUM 2
++#define MSM_CSID_PAD_FIRST_SRC 1
++#define MSM_CSID_PADS_NUM 5
++
++#define MSM_CSID_PAD_SRC (MSM_CSID_PAD_FIRST_SRC)
++
++/* CSID hardware can demultiplex up to 4 outputs */
++#define MSM_CSID_MAX_SRC_STREAMS      4
+ #define DATA_TYPE_EMBEDDED_DATA_8BIT  0x12
+ #define DATA_TYPE_YUV420_8BIT         0x18
+@@ -81,6 +86,8 @@ struct csid_phy_config {
+       u8 csiphy_id;
+       u8 lane_cnt;
+       u32 lane_assign;
++      u32 en_vc;
++      u8 need_vc_update;
+ };
+ struct csid_device;
+-- 
+2.43.0
+
diff --git a/queue-6.1/media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-v.patch b/queue-6.1/media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-v.patch
new file mode 100644 (file)
index 0000000..a318ff4
--- /dev/null
@@ -0,0 +1,39 @@
+From b716307f6947508dbb996139baebff85b0be36ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 16:16:14 +0100
+Subject: media: qcom: camss: Fix set CSI2_RX_CFG1_VC_MODE when VC is greater
+ than 3
+
+From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+
+[ Upstream commit e655d1ae9703286cef7fda8675cad62f649dc183 ]
+
+VC_MODE = 0 implies a two bit VC address.
+VC_MODE = 1 is required for VCs with a larger address than two bits.
+
+Fixes: eebe6d00e9bf ("media: camss: Add support for CSID hardware version Titan 170")
+Cc: stable@vger.kernel.org
+Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/media/platform/qcom/camss/camss-csid-gen2.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+index 2e015e69a6ad6..23acc387be5f0 100644
+--- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c
++++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c
+@@ -446,6 +446,8 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc)
+       writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG0);
+       val = 1 << CSI2_RX_CFG1_PACKET_ECC_CORRECTION_EN;
++      if (vc > 3)
++              val |= 1 << CSI2_RX_CFG1_VC_MODE;
+       val |= 1 << CSI2_RX_CFG1_MISR_EN;
+       writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1);
+-- 
+2.43.0
+
diff --git a/queue-6.1/memory-failure-convert-truncate_error_page-to-use-fo.patch b/queue-6.1/memory-failure-convert-truncate_error_page-to-use-fo.patch
new file mode 100644 (file)
index 0000000..35b5035
--- /dev/null
@@ -0,0 +1,47 @@
+From 86430873bd38064e37a7298e400a5f663c4efa25 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 23:30:54 -0800
+Subject: memory-failure: convert truncate_error_page() to use folio
+
+From: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+
+[ Upstream commit ac5efa782041670b63a05c36d92d02a80e50bb63 ]
+
+Replace try_to_release_page() with filemap_release_folio().  This change
+is in preparation for the removal of the try_to_release_page() wrapper.
+
+Link: https://lkml.kernel.org/r/20221118073055.55694-4-vishal.moola@gmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/memory-failure.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index ebd717157c813..6355166a6bb28 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -827,12 +827,13 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
+       int ret = MF_FAILED;
+       if (mapping->a_ops->error_remove_page) {
++              struct folio *folio = page_folio(p);
+               int err = mapping->a_ops->error_remove_page(mapping, p);
+               if (err != 0) {
+                       pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
+-              } else if (page_has_private(p) &&
+-                         !try_to_release_page(p, GFP_NOIO)) {
++              } else if (folio_has_private(folio) &&
++                         !filemap_release_folio(folio, GFP_NOIO)) {
+                       pr_info("%#lx: failed to release buffers\n", pfn);
+               } else {
+                       ret = MF_RECOVERED;
+-- 
+2.43.0
+
diff --git a/queue-6.1/mlxbf_gige-fix-receive-packet-race-condition.patch b/queue-6.1/mlxbf_gige-fix-receive-packet-race-condition.patch
new file mode 100644 (file)
index 0000000..8792e43
--- /dev/null
@@ -0,0 +1,63 @@
+From e38ef647ff2cf5958850b2c4b30eebe83d34dcaf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Dec 2023 18:47:39 -0500
+Subject: mlxbf_gige: fix receive packet race condition
+
+From: David Thompson <davthompson@nvidia.com>
+
+[ Upstream commit dcea1bd45e6d111cc8fc1aaefa7e31694089bda3 ]
+
+Under heavy traffic, the BlueField Gigabit interface can
+become unresponsive. This is due to a possible race condition
+in the mlxbf_gige_rx_packet function, where the function exits
+with producer and consumer indices equal but there are remaining
+packet(s) to be processed. In order to prevent this situation,
+read receive consumer index *before* the HW replenish so that
+the mlxbf_gige_rx_packet function returns an accurate return
+value even if a packet is received into just-replenished buffer
+prior to exiting this routine. If the just-replenished buffer
+is received and occupies the last RX ring entry, the interface
+would not recover and instead would encounter RX packet drops
+related to internal buffer shortages since the driver RX logic
+is not being triggered to drain the RX ring. This patch will
+address and prevent this "ring full" condition.
+
+Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
+Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
+Signed-off-by: David Thompson <davthompson@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+index 0d5a41a2ae010..227d01cace3f0 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+@@ -267,6 +267,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
+               priv->stats.rx_truncate_errors++;
+       }
++      /* Read receive consumer index before replenish so that this routine
++       * returns accurate return value even if packet is received into
++       * just-replenished buffer prior to exiting this routine.
++       */
++      rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI);
++      rx_ci_rem = rx_ci % priv->rx_q_entries;
++
+       /* Let hardware know we've replenished one buffer */
+       rx_pi++;
+@@ -279,8 +286,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
+       rx_pi_rem = rx_pi % priv->rx_q_entries;
+       if (rx_pi_rem == 0)
+               priv->valid_polarity ^= 1;
+-      rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI);
+-      rx_ci_rem = rx_ci % priv->rx_q_entries;
+       if (skb)
+               netif_receive_skb(skb);
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-memory_hotplug-add-missing-mem_hotplug_lock.patch b/queue-6.1/mm-memory_hotplug-add-missing-mem_hotplug_lock.patch
new file mode 100644 (file)
index 0000000..dc74f92
--- /dev/null
@@ -0,0 +1,218 @@
+From 670dabf41eb1dc619547a684c591cbef6598cb48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Nov 2023 15:53:52 +0100
+Subject: mm/memory_hotplug: add missing mem_hotplug_lock
+
+From: Sumanth Korikkar <sumanthk@linux.ibm.com>
+
+[ Upstream commit 001002e73712cdf6b8d9a103648cda3040ad7647 ]
+
+From Documentation/core-api/memory-hotplug.rst:
+When adding/removing/onlining/offlining memory or adding/removing
+heterogeneous/device memory, we should always hold the mem_hotplug_lock
+in write mode to serialise memory hotplug (e.g. access to global/zone
+variables).
+
+mhp_(de)init_memmap_on_memory() functions can change zone stats and
+struct page content, but they are currently called w/o the
+mem_hotplug_lock.
+
+When memory block is being offlined and when kmemleak goes through each
+populated zone, the following theoretical race conditions could occur:
+CPU 0:                                      | CPU 1:
+memory_offline()                            |
+-> offline_pages()                          |
+       -> mem_hotplug_begin()               |
+          ...                               |
+       -> mem_hotplug_done()                |
+                                            | kmemleak_scan()
+                                            | -> get_online_mems()
+                                            |    ...
+-> mhp_deinit_memmap_on_memory()            |
+  [not protected by mem_hotplug_begin/done()]|
+  Marks memory section as offline,          |   Retrieves zone_start_pfn
+  poisons vmemmap struct pages and updates   |   and struct page members.
+  the zone related data                             |
+                                            |    ...
+                                            | -> put_online_mems()
+
+Fix this by ensuring mem_hotplug_lock is taken before performing
+mhp_init_memmap_on_memory().  Also ensure that
+mhp_deinit_memmap_on_memory() holds the lock.
+
+online/offline_pages() are currently only called from
+memory_block_online/offline(), so it is safe to move the locking there.
+
+Link: https://lkml.kernel.org/r/20231120145354.308999-2-sumanthk@linux.ibm.com
+Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range")
+Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: kernel test robot <lkp@intel.com>
+Cc: <stable@vger.kernel.org>   [5.15+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/memory.c | 18 +++++++++++++++---
+ mm/memory_hotplug.c   | 13 ++++++-------
+ 2 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/base/memory.c b/drivers/base/memory.c
+index 9aa0da991cfb9..5d39f3e374dae 100644
+--- a/drivers/base/memory.c
++++ b/drivers/base/memory.c
+@@ -175,6 +175,9 @@ int memory_notify(unsigned long val, void *v)
+       return blocking_notifier_call_chain(&memory_chain, val, v);
+ }
++/*
++ * Must acquire mem_hotplug_lock in write mode.
++ */
+ static int memory_block_online(struct memory_block *mem)
+ {
+       unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+@@ -193,10 +196,11 @@ static int memory_block_online(struct memory_block *mem)
+        * stage helps to keep accounting easier to follow - e.g vmemmaps
+        * belong to the same zone as the memory they backed.
+        */
++      mem_hotplug_begin();
+       if (nr_vmemmap_pages) {
+               ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
+               if (ret)
+-                      return ret;
++                      goto out;
+       }
+       ret = online_pages(start_pfn + nr_vmemmap_pages,
+@@ -204,7 +208,7 @@ static int memory_block_online(struct memory_block *mem)
+       if (ret) {
+               if (nr_vmemmap_pages)
+                       mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
+-              return ret;
++              goto out;
+       }
+       /*
+@@ -216,9 +220,14 @@ static int memory_block_online(struct memory_block *mem)
+                                         nr_vmemmap_pages);
+       mem->zone = zone;
++out:
++      mem_hotplug_done();
+       return ret;
+ }
++/*
++ * Must acquire mem_hotplug_lock in write mode.
++ */
+ static int memory_block_offline(struct memory_block *mem)
+ {
+       unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+@@ -233,6 +242,7 @@ static int memory_block_offline(struct memory_block *mem)
+        * Unaccount before offlining, such that unpopulated zone and kthreads
+        * can properly be torn down in offline_pages().
+        */
++      mem_hotplug_begin();
+       if (nr_vmemmap_pages)
+               adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+                                         -nr_vmemmap_pages);
+@@ -244,13 +254,15 @@ static int memory_block_offline(struct memory_block *mem)
+               if (nr_vmemmap_pages)
+                       adjust_present_page_count(pfn_to_page(start_pfn),
+                                                 mem->group, nr_vmemmap_pages);
+-              return ret;
++              goto out;
+       }
+       if (nr_vmemmap_pages)
+               mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
+       mem->zone = NULL;
++out:
++      mem_hotplug_done();
+       return ret;
+ }
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index bd2570b4f9b7b..d02722bbfcf33 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1069,6 +1069,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
+       kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
+ }
++/*
++ * Must be called with mem_hotplug_lock in write mode.
++ */
+ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                      struct zone *zone, struct memory_group *group)
+ {
+@@ -1089,7 +1092,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                        !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
+               return -EINVAL;
+-      mem_hotplug_begin();
+       /* associate pfn range with the zone */
+       move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
+@@ -1148,7 +1150,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+       writeback_set_ratelimit();
+       memory_notify(MEM_ONLINE, &arg);
+-      mem_hotplug_done();
+       return 0;
+ failed_addition:
+@@ -1157,7 +1158,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+       memory_notify(MEM_CANCEL_ONLINE, &arg);
+       remove_pfn_range_from_zone(zone, pfn, nr_pages);
+-      mem_hotplug_done();
+       return ret;
+ }
+@@ -1787,6 +1787,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
+       return 0;
+ }
++/*
++ * Must be called with mem_hotplug_lock in write mode.
++ */
+ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                       struct zone *zone, struct memory_group *group)
+ {
+@@ -1809,8 +1812,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                        !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
+               return -EINVAL;
+-      mem_hotplug_begin();
+-
+       /*
+        * Don't allow to offline memory blocks that contain holes.
+        * Consequently, memory blocks with holes can never get onlined
+@@ -1946,7 +1947,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+       memory_notify(MEM_OFFLINE, &arg);
+       remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
+-      mem_hotplug_done();
+       return 0;
+ failed_removal_isolated:
+@@ -1961,7 +1961,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                (unsigned long long) start_pfn << PAGE_SHIFT,
+                ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
+                reason);
+-      mem_hotplug_done();
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-memory_hotplug-fix-error-handling-in-add_memory_r.patch b/queue-6.1/mm-memory_hotplug-fix-error-handling-in-add_memory_r.patch
new file mode 100644 (file)
index 0000000..5f2af8e
--- /dev/null
@@ -0,0 +1,62 @@
+From 9345b30fdfb2604449065987afce0aa558347408 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Nov 2023 15:53:53 +0100
+Subject: mm/memory_hotplug: fix error handling in add_memory_resource()
+
+From: Sumanth Korikkar <sumanthk@linux.ibm.com>
+
+[ Upstream commit f42ce5f087eb69e47294ababd2e7e6f88a82d308 ]
+
+In add_memory_resource(), creation of memory block devices occurs after
+successful call to arch_add_memory().  However, creation of memory block
+devices could fail.  In that case, arch_remove_memory() is called to
+perform necessary cleanup.
+
+Currently with or without altmap support, arch_remove_memory() is always
+passed with altmap set to NULL during error handling.  This leads to
+freeing of struct pages using free_pages(), eventhough the allocation
+might have been performed with altmap support via
+altmap_alloc_block_buf().
+
+Fix the error handling by passing altmap in arch_remove_memory(). This
+ensures the following:
+* When altmap is disabled, deallocation of the struct pages array occurs
+  via free_pages().
+* When altmap is enabled, deallocation occurs via vmem_altmap_free().
+
+Link: https://lkml.kernel.org/r/20231120145354.308999-3-sumanthk@linux.ibm.com
+Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range")
+Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: kernel test robot <lkp@intel.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: <stable@vger.kernel.org>   [5.15+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/memory_hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index d02722bbfcf33..3b9d3a4b43869 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1382,7 +1382,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
+       ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+                                         group);
+       if (ret) {
+-              arch_remove_memory(start, size, NULL);
++              arch_remove_memory(start, size, params.altmap);
+               goto error;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-merge-folio_has_private-filemap_release_folio-cal.patch b/queue-6.1/mm-merge-folio_has_private-filemap_release_folio-cal.patch
new file mode 100644 (file)
index 0000000..5ddd7a6
--- /dev/null
@@ -0,0 +1,282 @@
+From 060289f8c5d7dc83b3980d57bc014879b377c9a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 11:48:51 +0100
+Subject: mm: merge folio_has_private()/filemap_release_folio() call pairs
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 0201ebf274a306a6ebb95e5dc2d6a0a27c737cac ]
+
+Patch series "mm, netfs, fscache: Stop read optimisation when folio
+removed from pagecache", v7.
+
+This fixes an optimisation in fscache whereby we don't read from the cache
+for a particular file until we know that there's data there that we don't
+have in the pagecache.  The problem is that I'm no longer using PG_fscache
+(aka PG_private_2) to indicate that the page is cached and so I don't get
+a notification when a cached page is dropped from the pagecache.
+
+The first patch merges some folio_has_private() and
+filemap_release_folio() pairs and introduces a helper,
+folio_needs_release(), to indicate if a release is required.
+
+The second patch is the actual fix.  Following Willy's suggestions[1], it
+adds an AS_RELEASE_ALWAYS flag to an address_space that will make
+filemap_release_folio() always call ->release_folio(), even if
+PG_private/PG_private_2 aren't set.  folio_needs_release() is altered to
+add a check for this.
+
+This patch (of 2):
+
+Make filemap_release_folio() check folio_has_private().  Then, in most
+cases, where a call to folio_has_private() is immediately followed by a
+call to filemap_release_folio(), we can get rid of the test in the pair.
+
+There are a couple of sites in mm/vscan.c that this can't so easily be
+done.  In shrink_folio_list(), there are actually three cases (something
+different is done for incompletely invalidated buffers), but
+filemap_release_folio() elides two of them.
+
+In shrink_active_list(), we don't have have the folio lock yet, so the
+check allows us to avoid locking the page unnecessarily.
+
+A wrapper function to check if a folio needs release is provided for those
+places that still need to do it in the mm/ directory.  This will acquire
+additional parts to the condition in a future patch.
+
+After this, the only remaining caller of folio_has_private() outside of
+mm/ is a check in fuse.
+
+Link: https://lkml.kernel.org/r/20230628104852.3391651-1-dhowells@redhat.com
+Link: https://lkml.kernel.org/r/20230628104852.3391651-2-dhowells@redhat.com
+Reported-by: Rohith Surabattula <rohiths.msft@gmail.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Steve French <sfrench@samba.org>
+Cc: Shyam Prasad N <nspmangalore@gmail.com>
+Cc: Rohith Surabattula <rohiths.msft@gmail.com>
+Cc: Dave Wysochanski <dwysocha@redhat.com>
+Cc: Dominique Martinet <asmadeus@codewreck.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Xiubo Li <xiubli@redhat.com>
+Cc: Jingbo Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/move_extent.c | 12 ++++--------
+ fs/splice.c           |  3 +--
+ mm/filemap.c          |  2 ++
+ mm/huge_memory.c      |  3 +--
+ mm/internal.h         |  8 ++++++++
+ mm/khugepaged.c       |  3 +--
+ mm/memory-failure.c   |  8 +++-----
+ mm/migrate.c          |  3 +--
+ mm/truncate.c         |  6 ++----
+ mm/vmscan.c           |  8 ++++----
+ 10 files changed, 27 insertions(+), 29 deletions(-)
+
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index 8dbb87edf24c4..dedc9d445f243 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -339,10 +339,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+                       ext4_double_up_write_data_sem(orig_inode, donor_inode);
+                       goto data_copy;
+               }
+-              if ((folio_has_private(folio[0]) &&
+-                   !filemap_release_folio(folio[0], 0)) ||
+-                  (folio_has_private(folio[1]) &&
+-                   !filemap_release_folio(folio[1], 0))) {
++              if (!filemap_release_folio(folio[0], 0) ||
++                  !filemap_release_folio(folio[1], 0)) {
+                       *err = -EBUSY;
+                       goto drop_data_sem;
+               }
+@@ -361,10 +359,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+       /* At this point all buffers in range are uptodate, old mapping layout
+        * is no longer required, try to drop it now. */
+-      if ((folio_has_private(folio[0]) &&
+-              !filemap_release_folio(folio[0], 0)) ||
+-          (folio_has_private(folio[1]) &&
+-              !filemap_release_folio(folio[1], 0))) {
++      if (!filemap_release_folio(folio[0], 0) ||
++          !filemap_release_folio(folio[1], 0)) {
+               *err = -EBUSY;
+               goto unlock_folios;
+       }
+diff --git a/fs/splice.c b/fs/splice.c
+index c4ae54deac42c..d0230cf8ec571 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -65,8 +65,7 @@ static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+                */
+               folio_wait_writeback(folio);
+-              if (folio_has_private(folio) &&
+-                  !filemap_release_folio(folio, GFP_KERNEL))
++              if (!filemap_release_folio(folio, GFP_KERNEL))
+                       goto out_unlock;
+               /*
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 10fe6430693bd..2809b1174f04e 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -4005,6 +4005,8 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp)
+       struct address_space * const mapping = folio->mapping;
+       BUG_ON(!folio_test_locked(folio));
++      if (!folio_needs_release(folio))
++              return true;
+       if (folio_test_writeback(folio))
+               return false;
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 2753fb54cdf38..59577946735b1 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2694,8 +2694,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+               gfp = current_gfp_context(mapping_gfp_mask(mapping) &
+                                                       GFP_RECLAIM_MASK);
+-              if (folio_test_private(folio) &&
+-                              !filemap_release_folio(folio, gfp)) {
++              if (!filemap_release_folio(folio, gfp)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+diff --git a/mm/internal.h b/mm/internal.h
+index 6b7ef495b56d3..1fefb5181ab78 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -163,6 +163,14 @@ static inline void set_page_refcounted(struct page *page)
+       set_page_count(page, 1);
+ }
++/*
++ * Return true if a folio needs ->release_folio() calling upon it.
++ */
++static inline bool folio_needs_release(struct folio *folio)
++{
++      return folio_has_private(folio);
++}
++
+ extern unsigned long highest_memmap_pfn;
+ /*
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 6fc7db587c453..65bd0b105266a 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1955,8 +1955,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
+                       goto out_unlock;
+               }
+-              if (folio_has_private(folio) &&
+-                  !filemap_release_folio(folio, GFP_KERNEL)) {
++              if (!filemap_release_folio(folio, GFP_KERNEL)) {
+                       result = SCAN_PAGE_HAS_PRIVATE;
+                       folio_putback_lru(folio);
+                       goto out_unlock;
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index 6355166a6bb28..5b846ed5dcbe9 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -830,14 +830,12 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
+               struct folio *folio = page_folio(p);
+               int err = mapping->a_ops->error_remove_page(mapping, p);
+-              if (err != 0) {
++              if (err != 0)
+                       pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
+-              } else if (folio_has_private(folio) &&
+-                         !filemap_release_folio(folio, GFP_NOIO)) {
++              else if (!filemap_release_folio(folio, GFP_NOIO))
+                       pr_info("%#lx: failed to release buffers\n", pfn);
+-              } else {
++              else
+                       ret = MF_RECOVERED;
+-              }
+       } else {
+               /*
+                * If the file system doesn't support it just invalidate
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 91bd69c61148e..c93dd6a31c31a 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -914,8 +914,7 @@ static int fallback_migrate_folio(struct address_space *mapping,
+        * Buffers may be managed in a filesystem specific way.
+        * We must have no buffers or drop them.
+        */
+-      if (folio_test_private(src) &&
+-          !filemap_release_folio(src, GFP_KERNEL))
++      if (!filemap_release_folio(src, GFP_KERNEL))
+               return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
+       return migrate_folio(mapping, dst, src, mode);
+diff --git a/mm/truncate.c b/mm/truncate.c
+index c0be77e5c0083..0d4dd233f5187 100644
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -19,7 +19,6 @@
+ #include <linux/highmem.h>
+ #include <linux/pagevec.h>
+ #include <linux/task_io_accounting_ops.h>
+-#include <linux/buffer_head.h>        /* grr. try_to_release_page */
+ #include <linux/shmem_fs.h>
+ #include <linux/rmap.h>
+ #include "internal.h"
+@@ -276,7 +275,7 @@ static long mapping_evict_folio(struct address_space *mapping,
+       if (folio_ref_count(folio) >
+                       folio_nr_pages(folio) + folio_has_private(folio) + 1)
+               return 0;
+-      if (folio_has_private(folio) && !filemap_release_folio(folio, 0))
++      if (!filemap_release_folio(folio, 0))
+               return 0;
+       return remove_mapping(mapping, folio);
+@@ -581,8 +580,7 @@ static int invalidate_complete_folio2(struct address_space *mapping,
+       if (folio->mapping != mapping)
+               return 0;
+-      if (folio_has_private(folio) &&
+-          !filemap_release_folio(folio, GFP_KERNEL))
++      if (!filemap_release_folio(folio, GFP_KERNEL))
+               return 0;
+       spin_lock(&mapping->host->i_lock);
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 3f090faa6377f..9f3cfb7caa48d 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1992,7 +1992,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
+                * (refcount == 1) it can be freed.  Otherwise, leave
+                * the folio on the LRU so it is swappable.
+                */
+-              if (folio_has_private(folio)) {
++              if (folio_needs_release(folio)) {
+                       if (!filemap_release_folio(folio, sc->gfp_mask))
+                               goto activate_locked;
+                       if (!mapping && folio_ref_count(folio) == 1) {
+@@ -2618,9 +2618,9 @@ static void shrink_active_list(unsigned long nr_to_scan,
+               }
+               if (unlikely(buffer_heads_over_limit)) {
+-                      if (folio_test_private(folio) && folio_trylock(folio)) {
+-                              if (folio_test_private(folio))
+-                                      filemap_release_folio(folio, 0);
++                      if (folio_needs_release(folio) &&
++                          folio_trylock(folio)) {
++                              filemap_release_folio(folio, 0);
+                               folio_unlock(folio);
+                       }
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-netfs-fscache-stop-read-optimisation-when-folio-r.patch b/queue-6.1/mm-netfs-fscache-stop-read-optimisation-when-folio-r.patch
new file mode 100644 (file)
index 0000000..28f6433
--- /dev/null
@@ -0,0 +1,222 @@
+From ed65a1b09f78fea9d521a21c25bb036dc802af12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 11:48:52 +0100
+Subject: mm, netfs, fscache: stop read optimisation when folio removed from
+ pagecache
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit b4fa966f03b7401ceacd4ffd7227197afb2b8376 ]
+
+Fscache has an optimisation by which reads from the cache are skipped
+until we know that (a) there's data there to be read and (b) that data
+isn't entirely covered by pages resident in the netfs pagecache.  This is
+done with two flags manipulated by fscache_note_page_release():
+
+       if (...
+           test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
+           test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
+               clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags);
+
+where the NO_DATA_TO_READ flag causes cachefiles_prepare_read() to
+indicate that netfslib should download from the server or clear the page
+instead.
+
+The fscache_note_page_release() function is intended to be called from
+->releasepage() - but that only gets called if PG_private or PG_private_2
+is set - and currently the former is at the discretion of the network
+filesystem and the latter is only set whilst a page is being written to
+the cache, so sometimes we miss clearing the optimisation.
+
+Fix this by following Willy's suggestion[1] and adding an address_space
+flag, AS_RELEASE_ALWAYS, that causes filemap_release_folio() to always call
+->release_folio() if it's set, even if PG_private or PG_private_2 aren't
+set.
+
+Note that this would require folio_test_private() and page_has_private() to
+become more complicated.  To avoid that, in the places[*] where these are
+used to conditionalise calls to filemap_release_folio() and
+try_to_release_page(), the tests are removed the those functions just
+jumped to unconditionally and the test is performed there.
+
+[*] There are some exceptions in vmscan.c where the check guards more than
+just a call to the releaser.  I've added a function, folio_needs_release()
+to wrap all the checks for that.
+
+AS_RELEASE_ALWAYS should be set if a non-NULL cookie is obtained from
+fscache and cleared in ->evict_inode() before truncate_inode_pages_final()
+is called.
+
+Additionally, the FSCACHE_COOKIE_NO_DATA_TO_READ flag needs to be cleared
+and the optimisation cancelled if a cachefiles object already contains data
+when we open it.
+
+[dwysocha@redhat.com: call folio_mapping() inside folio_needs_release()]
+  Link: https://github.com/DaveWysochanskiRH/kernel/commit/902c990e311120179fa5de99d68364b2947b79ec
+Link: https://lkml.kernel.org/r/20230628104852.3391651-3-dhowells@redhat.com
+Fixes: 1f67e6d0b188 ("fscache: Provide a function to note the release of a page")
+Fixes: 047487c947e8 ("cachefiles: Implement the I/O routines")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
+Reported-by: Rohith Surabattula <rohiths.msft@gmail.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Tested-by: SeongJae Park <sj@kernel.org>
+Cc: Daire Byrne <daire.byrne@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Steve French <sfrench@samba.org>
+Cc: Shyam Prasad N <nspmangalore@gmail.com>
+Cc: Rohith Surabattula <rohiths.msft@gmail.com>
+Cc: Dave Wysochanski <dwysocha@redhat.com>
+Cc: Dominique Martinet <asmadeus@codewreck.org>
+Cc: Ilya Dryomov <idryomov@gmail.com>
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Jingbo Xu <jefflexu@linux.alibaba.com>
+Cc: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/9p/cache.c           |  2 ++
+ fs/afs/internal.h       |  2 ++
+ fs/cachefiles/namei.c   |  2 ++
+ fs/ceph/cache.c         |  2 ++
+ fs/nfs/fscache.c        |  3 +++
+ fs/smb/client/fscache.c |  2 ++
+ include/linux/pagemap.h | 16 ++++++++++++++++
+ mm/internal.h           |  5 ++++-
+ 8 files changed, 33 insertions(+), 1 deletion(-)
+
+diff --git a/fs/9p/cache.c b/fs/9p/cache.c
+index cebba4eaa0b57..12c0ae29f1857 100644
+--- a/fs/9p/cache.c
++++ b/fs/9p/cache.c
+@@ -68,6 +68,8 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
+                                      &path, sizeof(path),
+                                      &version, sizeof(version),
+                                      i_size_read(&v9inode->netfs.inode));
++      if (v9inode->netfs.cache)
++              mapping_set_release_always(inode->i_mapping);
+       p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
+                inode, v9fs_inode_cookie(v9inode));
+diff --git a/fs/afs/internal.h b/fs/afs/internal.h
+index fcbb598d8c85d..a25fdc3e52310 100644
+--- a/fs/afs/internal.h
++++ b/fs/afs/internal.h
+@@ -682,6 +682,8 @@ static inline void afs_vnode_set_cache(struct afs_vnode *vnode,
+ {
+ #ifdef CONFIG_AFS_FSCACHE
+       vnode->netfs.cache = cookie;
++      if (cookie)
++              mapping_set_release_always(vnode->netfs.inode.i_mapping);
+ #endif
+ }
+diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
+index 03ca8f2f657ab..50b2ee163af60 100644
+--- a/fs/cachefiles/namei.c
++++ b/fs/cachefiles/namei.c
+@@ -584,6 +584,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
+       if (ret < 0)
+               goto check_failed;
++      clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &object->cookie->flags);
++
+       object->file = file;
+       /* Always update the atime on an object we've just looked up (this is
+diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
+index 177d8e8d73fe4..de1dee46d3df7 100644
+--- a/fs/ceph/cache.c
++++ b/fs/ceph/cache.c
+@@ -36,6 +36,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
+                                      &ci->i_vino, sizeof(ci->i_vino),
+                                      &ci->i_version, sizeof(ci->i_version),
+                                      i_size_read(inode));
++      if (ci->netfs.cache)
++              mapping_set_release_always(inode->i_mapping);
+ }
+ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info *ci)
+diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
+index e731c00a9fcbc..d3c938dd2b12a 100644
+--- a/fs/nfs/fscache.c
++++ b/fs/nfs/fscache.c
+@@ -176,6 +176,9 @@ void nfs_fscache_init_inode(struct inode *inode)
+                                              &auxdata,      /* aux_data */
+                                              sizeof(auxdata),
+                                              i_size_read(inode));
++
++      if (netfs_inode(inode)->cache)
++              mapping_set_release_always(inode->i_mapping);
+ }
+ /*
+diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
+index e73625b5d0cc6..f64bad513ba6d 100644
+--- a/fs/smb/client/fscache.c
++++ b/fs/smb/client/fscache.c
+@@ -108,6 +108,8 @@ void cifs_fscache_get_inode_cookie(struct inode *inode)
+                                      &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+                                      &cd, sizeof(cd),
+                                      i_size_read(&cifsi->netfs.inode));
++      if (cifsi->netfs.cache)
++              mapping_set_release_always(inode->i_mapping);
+ }
+ void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update)
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index 03307b72de6c6..fdbb90ae56c70 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -199,6 +199,7 @@ enum mapping_flags {
+       /* writeback related tags are not used */
+       AS_NO_WRITEBACK_TAGS = 5,
+       AS_LARGE_FOLIO_SUPPORT = 6,
++      AS_RELEASE_ALWAYS,      /* Call ->release_folio(), even if no private data */
+ };
+ /**
+@@ -269,6 +270,21 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping)
+       return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
+ }
++static inline bool mapping_release_always(const struct address_space *mapping)
++{
++      return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
++}
++
++static inline void mapping_set_release_always(struct address_space *mapping)
++{
++      set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
++}
++
++static inline void mapping_clear_release_always(struct address_space *mapping)
++{
++      clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
++}
++
+ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
+ {
+       return mapping->gfp_mask;
+diff --git a/mm/internal.h b/mm/internal.h
+index 1fefb5181ab78..d01130efce5fb 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -168,7 +168,10 @@ static inline void set_page_refcounted(struct page *page)
+  */
+ static inline bool folio_needs_release(struct folio *folio)
+ {
+-      return folio_has_private(folio);
++      struct address_space *mapping = folio_mapping(folio);
++
++      return folio_has_private(folio) ||
++              (mapping && mapping_release_always(mapping));
+ }
+ extern unsigned long highest_memmap_pfn;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-annotate-data-races-around-sk-sk_bind_phc.patch b/queue-6.1/net-annotate-data-races-around-sk-sk_bind_phc.patch
new file mode 100644 (file)
index 0000000..c53afba
--- /dev/null
@@ -0,0 +1,60 @@
+From 1b799e9a0670b2cf155f5463f9b42e791668abaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:12 +0000
+Subject: net: annotate data-races around sk->sk_bind_phc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 251cd405a9e6e70b92fe5afbdd17fd5caf9d3266 ]
+
+sk->sk_bind_phc is read locklessly. Add corresponding annotations.
+
+Fixes: d463126e23f1 ("net: sock: extend SO_TIMESTAMPING for PHC binding")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yangbo Lu <yangbo.lu@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 7f6ca95d16b9 ("net: Implement missing getsockopt(SO_TIMESTAMPING_NEW)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 4 ++--
+ net/socket.c    | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 929055bc0cc7b..49b7f252ddae4 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -890,7 +890,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+       if (!match)
+               return -EINVAL;
+-      sk->sk_bind_phc = phc_index;
++      WRITE_ONCE(sk->sk_bind_phc, phc_index);
+       return 0;
+ }
+@@ -1706,7 +1706,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+       case SO_TIMESTAMPING_OLD:
+               lv = sizeof(v.timestamping);
+               v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+-              v.timestamping.bind_phc = sk->sk_bind_phc;
++              v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
+               break;
+       case SO_RCVTIMEO_OLD:
+diff --git a/net/socket.c b/net/socket.c
+index 9c1fb94b12851..07470724e7358 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -940,7 +940,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+               if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+                       hwtstamp = ptp_convert_timestamp(&hwtstamp,
+-                                                       sk->sk_bind_phc);
++                                                       READ_ONCE(sk->sk_bind_phc));
+               if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
+                       empty = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-annotate-data-races-around-sk-sk_tsflags.patch b/queue-6.1/net-annotate-data-races-around-sk-sk_tsflags.patch
new file mode 100644 (file)
index 0000000..fadc546
--- /dev/null
@@ -0,0 +1,367 @@
+From e1f7cc7fc59e4d300f8a27e6ce20ed53893823db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:11 +0000
+Subject: net: annotate data-races around sk->sk_tsflags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e3390b30a5dfb112e8e802a59c0f68f947b638b2 ]
+
+sk->sk_tsflags can be read locklessly, add corresponding annotations.
+
+Fixes: b9f40e21ef42 ("net-timestamp: move timestamp flags out of sk_flags")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 7f6ca95d16b9 ("net: Implement missing getsockopt(SO_TIMESTAMPING_NEW)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h       |  2 +-
+ include/net/sock.h     | 17 ++++++++++-------
+ net/can/j1939/socket.c | 10 ++++++----
+ net/core/skbuff.c      | 10 ++++++----
+ net/core/sock.c        |  4 ++--
+ net/ipv4/ip_output.c   |  2 +-
+ net/ipv4/ip_sockglue.c |  2 +-
+ net/ipv4/tcp.c         |  4 ++--
+ net/ipv6/ip6_output.c  |  2 +-
+ net/ipv6/ping.c        |  2 +-
+ net/ipv6/raw.c         |  2 +-
+ net/ipv6/udp.c         |  2 +-
+ net/socket.c           | 13 +++++++------
+ 13 files changed, 40 insertions(+), 32 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index c286344628dba..c83c09c65623f 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -95,7 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+       ipcm_init(ipcm);
+       ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
+-      ipcm->sockc.tsflags = inet->sk.sk_tsflags;
++      ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
+       ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
+       ipcm->addr = inet->inet_saddr;
+       ipcm->protocol = inet->inet_num;
+diff --git a/include/net/sock.h b/include/net/sock.h
+index b6027b01c2455..d8ed62a8e1a3e 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1928,7 +1928,9 @@ struct sockcm_cookie {
+ static inline void sockcm_init(struct sockcm_cookie *sockc,
+                              const struct sock *sk)
+ {
+-      *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
++      *sockc = (struct sockcm_cookie) {
++              .tsflags = READ_ONCE(sk->sk_tsflags)
++      };
+ }
+ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
+@@ -2741,9 +2743,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+ static inline void
+ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+ {
+-      ktime_t kt = skb->tstamp;
+       struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+-
++      u32 tsflags = READ_ONCE(sk->sk_tsflags);
++      ktime_t kt = skb->tstamp;
+       /*
+        * generate control messages if
+        * - receive time stamping in software requested
+@@ -2751,10 +2753,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+        * - hardware time stamps available and wanted
+        */
+       if (sock_flag(sk, SOCK_RCVTSTAMP) ||
+-          (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+-          (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
++          (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
++          (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+           (hwtstamps->hwtstamp &&
+-           (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
++           (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
+               __sock_recv_timestamp(msg, sk, skb);
+       else
+               sock_write_timestamp(sk, kt);
+@@ -2776,7 +2778,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ #define TSFLAGS_ANY     (SOF_TIMESTAMPING_SOFTWARE                    | \
+                          SOF_TIMESTAMPING_RAW_HARDWARE)
+-      if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
++      if (sk->sk_flags & FLAGS_RECV_CMSGS ||
++          READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+               __sock_recv_cmsgs(msg, sk, skb);
+       else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
+               sock_write_timestamp(sk, skb->tstamp);
+diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
+index 9c828067b4481..b0be23559243c 100644
+--- a/net/can/j1939/socket.c
++++ b/net/can/j1939/socket.c
+@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       struct sock_exterr_skb *serr;
+       struct sk_buff *skb;
+       char *state = "UNK";
++      u32 tsflags;
+       int err;
+       jsk = j1939_sk(sk);
+@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+               return;
++      tsflags = READ_ONCE(sk->sk_tsflags);
+       switch (type) {
+       case J1939_ERRQUEUE_TX_ACK:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
++              if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_SCHED:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
++              if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_ABORT:
+@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       case J1939_ERRQUEUE_RX_DPO:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_ABORT:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
++              if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+                       return;
+               break;
+       default:
+@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       }
+       serr->opt_stats = true;
+-      if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++      if (tsflags & SOF_TIMESTAMPING_OPT_ID)
+               serr->ee.ee_data = session->tskey;
+       netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 73b1e0e53534e..8a819d0a7bfb0 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4913,7 +4913,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
+       serr->ee.ee_info = tstype;
+       serr->opt_stats = opt_stats;
+       serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
+-      if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
++      if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+               serr->ee.ee_data = skb_shinfo(skb)->tskey;
+               if (sk_is_tcp(sk))
+                       serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
+@@ -4969,21 +4969,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ {
+       struct sk_buff *skb;
+       bool tsonly, opt_stats = false;
++      u32 tsflags;
+       if (!sk)
+               return;
+-      if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
++      tsflags = READ_ONCE(sk->sk_tsflags);
++      if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+           skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+               return;
+-      tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
++      tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       if (!skb_may_tx_timestamp(sk, tsonly))
+               return;
+       if (tsonly) {
+ #ifdef CONFIG_INET
+-              if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
++              if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+                   sk_is_tcp(sk)) {
+                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+                                                            ack_skb);
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 4305e55dbfba4..929055bc0cc7b 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -926,7 +926,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
+                       return ret;
+       }
+-      sk->sk_tsflags = val;
++      WRITE_ONCE(sk->sk_tsflags, val);
+       sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+       if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+@@ -1705,7 +1705,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+       case SO_TIMESTAMPING_OLD:
+               lv = sizeof(v.timestamping);
+-              v.timestamping.flags = sk->sk_tsflags;
++              v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+               v.timestamping.bind_phc = sk->sk_bind_phc;
+               break;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index d8ec802f97524..e19ef88ae181f 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -991,7 +991,7 @@ static int __ip_append_data(struct sock *sk,
+       paged = !!cork->gso_size;
+       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+-          sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++          READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+       hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index 63aa52becd880..c1fb7580ea581 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -509,7 +509,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+        * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+        */
+       info = PKTINFO_SKB_CB(skb);
+-      if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
++      if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
+           !info->ipi_ifindex)
+               return false;
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 58409ea2da0af..3935451ad061e 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2359,14 +2359,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+                       }
+               }
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
++              if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
+                       has_timestamping = true;
+               else
+                       tss->ts[0] = (struct timespec64) {0};
+       }
+       if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
++              if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
+                       has_timestamping = true;
+               else
+                       tss->ts[2] = (struct timespec64) {0};
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 04822e2cba74a..e9ae084d038d1 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1507,7 +1507,7 @@ static int __ip6_append_data(struct sock *sk,
+       orig_mtu = mtu;
+       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+-          sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++          READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+       hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 4d5a27dd9a4b2..a5d7d1915ba7e 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               return -EINVAL;
+       ipcm6_init_sk(&ipc6, np);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+       fl6.flowi6_oif = oif;
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index df3abd9e5237c..dc31752a7edcc 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -776,7 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       fl6.flowi6_uid = sk->sk_uid;
+       ipcm6_init(&ipc6);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = fl6.flowi6_mark;
+       if (sin6) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 64b36c2ba774a..7f49f69226a21 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1358,7 +1358,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       ipcm6_init(&ipc6);
+       ipc6.gso_size = READ_ONCE(up->gso_size);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+       /* destination address check */
+diff --git a/net/socket.c b/net/socket.c
+index 04cba91c7cbe5..9c1fb94b12851 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -826,7 +826,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+ static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
+ {
+-      bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
++      bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
+       struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+       struct net_device *orig_dev;
+       ktime_t hwtstamp;
+@@ -878,12 +878,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+       int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+       int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+       struct scm_timestamping_internal tss;
+-
+       int empty = 1, false_tstamp = 0;
+       struct skb_shared_hwtstamps *shhwtstamps =
+               skb_hwtstamps(skb);
+       int if_index;
+       ktime_t hwtstamp;
++      u32 tsflags;
+       /* Race occurred between timestamp enabling and packet
+          receiving.  Fill in the current time for now. */
+@@ -925,11 +925,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+       }
+       memset(&tss, 0, sizeof(tss));
+-      if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
++      tsflags = READ_ONCE(sk->sk_tsflags);
++      if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+           ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
+               empty = 0;
+       if (shhwtstamps &&
+-          (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
++          (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+           !skb_is_swtx_tstamp(skb, false_tstamp)) {
+               if_index = 0;
+               if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+@@ -937,14 +938,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+               else
+                       hwtstamp = shhwtstamps->hwtstamp;
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
++              if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+                       hwtstamp = ptp_convert_timestamp(&hwtstamp,
+                                                        sk->sk_bind_phc);
+               if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
+                       empty = 0;
+-                      if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
++                      if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+                           !skb_is_err_queue(skb))
+                               put_ts_pktinfo(msg, skb, if_index);
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-bcmgenet-fix-fcs-generation-for-fragmented-skbuf.patch b/queue-6.1/net-bcmgenet-fix-fcs-generation-for-fragmented-skbuf.patch
new file mode 100644 (file)
index 0000000..78419a9
--- /dev/null
@@ -0,0 +1,46 @@
+From 0f89a214d5bd7890cd44370aca6aade6589a47b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Dec 2023 14:56:38 +0100
+Subject: net: bcmgenet: Fix FCS generation for fragmented skbuffs
+
+From: Adrian Cinal <adriancinal@gmail.com>
+
+[ Upstream commit e584f2ff1e6cc9b1d99e8a6b0f3415940d1b3eb3 ]
+
+The flag DMA_TX_APPEND_CRC was only written to the first DMA descriptor
+in the TX path, where each descriptor corresponds to a single skbuff
+fragment (or the skbuff head). This led to packets with no FCS appearing
+on the wire if the kernel allocated the packet in fragments, which would
+always happen when using PACKET_MMAP/TPACKET (cf. tpacket_fill_skb() in
+net/af_packet.c).
+
+Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
+Signed-off-by: Adrian Cinal <adriancinal1@gmail.com>
+Acked-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20231228135638.1339245-1-adriancinal1@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+index 1ae082eb9e905..c2a9913082153 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2131,8 +2131,10 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
+               /* Note: if we ever change from DMA_TX_APPEND_CRC below we
+                * will need to restore software padding of "runt" packets
+                */
++              len_stat |= DMA_TX_APPEND_CRC;
++
+               if (!i) {
+-                      len_stat |= DMA_TX_APPEND_CRC | DMA_SOP;
++                      len_stat |= DMA_SOP;
+                       if (skb->ip_summed == CHECKSUM_PARTIAL)
+                               len_stat |= DMA_TX_DO_CSUM;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-declare-msg_splice_pages-internal-sendmsg-flag.patch b/queue-6.1/net-declare-msg_splice_pages-internal-sendmsg-flag.patch
new file mode 100644 (file)
index 0000000..0323681
--- /dev/null
@@ -0,0 +1,94 @@
+From 75dffd6df5e444bb377e400ba3e8acf49ca982d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 13:11:10 +0100
+Subject: net: Declare MSG_SPLICE_PAGES internal sendmsg() flag
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit b841b901c452d92610f739a36e54978453528876 ]
+
+Declare MSG_SPLICE_PAGES, an internal sendmsg() flag, that hints to a
+network protocol that it should splice pages from the source iterator
+rather than copying the data if it can.  This flag is added to a list that
+is cleared by sendmsg syscalls on entry.
+
+This is intended as a replacement for the ->sendpage() op, allowing a way
+to splice in several multipage folios in one go.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+cc: Jens Axboe <axboe@kernel.dk>
+cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: a0002127cd74 ("udp: move udp->no_check6_tx to udp->udp_flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/socket.h | 3 +++
+ io_uring/net.c         | 2 ++
+ net/socket.c           | 2 ++
+ 3 files changed, 7 insertions(+)
+
+diff --git a/include/linux/socket.h b/include/linux/socket.h
+index 1db29aab8f9c3..b3c58042bd254 100644
+--- a/include/linux/socket.h
++++ b/include/linux/socket.h
+@@ -324,6 +324,7 @@ struct ucred {
+                                         */
+ #define MSG_ZEROCOPY  0x4000000       /* Use user data in kernel path */
++#define MSG_SPLICE_PAGES 0x8000000    /* Splice the pages from the iterator in sendmsg() */
+ #define MSG_FASTOPEN  0x20000000      /* Send data in TCP SYN */
+ #define MSG_CMSG_CLOEXEC 0x40000000   /* Set close_on_exec for file
+                                          descriptor received through
+@@ -334,6 +335,8 @@ struct ucred {
+ #define MSG_CMSG_COMPAT       0               /* We never have 32 bit fixups */
+ #endif
++/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
++#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES)
+ /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+ #define SOL_IP                0
+diff --git a/io_uring/net.c b/io_uring/net.c
+index 57c626cb4d1a5..67f09a40bcb21 100644
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -389,6 +389,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
++      flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
+       msg.msg_flags = flags;
+       ret = sock_sendmsg(sock, &msg);
+       if (ret < min_ret) {
+@@ -1137,6 +1138,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
+               msg_flags |= MSG_DONTWAIT;
+       if (msg_flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
++      msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
+       msg.msg_flags = msg_flags;
+       msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
+diff --git a/net/socket.c b/net/socket.c
+index 0104617b440dc..6f39f7b0cc85c 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2131,6 +2131,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
+               msg.msg_name = (struct sockaddr *)&address;
+               msg.msg_namelen = addr_len;
+       }
++      flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
+       if (sock->file->f_flags & O_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       msg.msg_flags = flags;
+@@ -2482,6 +2483,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
+               msg_sys->msg_control = ctl_buf;
+               msg_sys->msg_control_is_user = false;
+       }
++      flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
+       msg_sys->msg_flags = flags;
+       if (sock->file->f_flags & O_NONBLOCK)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dpaa2-eth-rearrange-variable-in-dpaa2_eth_get_et.patch b/queue-6.1/net-dpaa2-eth-rearrange-variable-in-dpaa2_eth_get_et.patch
new file mode 100644 (file)
index 0000000..014b575
--- /dev/null
@@ -0,0 +1,62 @@
+From 8e5b100ede5240de3c21551e38e66faf2d685c09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 17:18:51 +0300
+Subject: net: dpaa2-eth: rearrange variable in dpaa2_eth_get_ethtool_stats
+
+From: Ioana Ciornei <ioana.ciornei@nxp.com>
+
+[ Upstream commit 3313206827678f6f036eca601a51f6c4524b559a ]
+
+Rearrange the variables in the dpaa2_eth_get_ethtool_stats() function so
+that we adhere to the reverse Christmas tree rule.
+Also, in the next patch we are adding more variables and I didn't know
+where to place them with the current ordering.
+
+Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: beb1930f966d ("dpaa2-eth: recycle the RX buffer only after all processing done")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 18 ++++++++----------
+ 1 file changed, 8 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+index eea7d7a07c007..59888826469b9 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+@@ -227,17 +227,8 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
+                                       struct ethtool_stats *stats,
+                                       u64 *data)
+ {
+-      int i = 0;
+-      int j, k, err;
+-      int num_cnt;
+-      union dpni_statistics dpni_stats;
+-      u32 fcnt, bcnt;
+-      u32 fcnt_rx_total = 0, fcnt_tx_total = 0;
+-      u32 bcnt_rx_total = 0, bcnt_tx_total = 0;
+-      u32 buf_cnt;
+       struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+-      struct dpaa2_eth_drv_stats *extras;
+-      struct dpaa2_eth_ch_stats *ch_stats;
++      union dpni_statistics dpni_stats;
+       int dpni_stats_page_size[DPNI_STATISTICS_CNT] = {
+               sizeof(dpni_stats.page_0),
+               sizeof(dpni_stats.page_1),
+@@ -247,6 +238,13 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
+               sizeof(dpni_stats.page_5),
+               sizeof(dpni_stats.page_6),
+       };
++      u32 fcnt_rx_total = 0, fcnt_tx_total = 0;
++      u32 bcnt_rx_total = 0, bcnt_tx_total = 0;
++      struct dpaa2_eth_ch_stats *ch_stats;
++      struct dpaa2_eth_drv_stats *extras;
++      int j, k, err, num_cnt, i = 0;
++      u32 fcnt, bcnt;
++      u32 buf_cnt;
+       memset(data, 0,
+              sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-implement-missing-getsockopt-so_timestamping_new.patch b/queue-6.1/net-implement-missing-getsockopt-so_timestamping_new.patch
new file mode 100644 (file)
index 0000000..340dbc0
--- /dev/null
@@ -0,0 +1,60 @@
+From f3ca390d856050f4a3be15ee0cec3f772f96b860 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 00:19:01 +0100
+Subject: net: Implement missing getsockopt(SO_TIMESTAMPING_NEW)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jörn-Thorben Hinz <jthinz@mailbox.tu-berlin.de>
+
+[ Upstream commit 7f6ca95d16b96567ce4cf458a2790ff17fa620c3 ]
+
+Commit 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") added the new
+socket option SO_TIMESTAMPING_NEW. Setting the option is handled in
+sk_setsockopt(), querying it was not handled in sk_getsockopt(), though.
+
+Following remarks on an earlier submission of this patch, keep the old
+behavior of getsockopt(SO_TIMESTAMPING_OLD) which returns the active
+flags even if they actually have been set through SO_TIMESTAMPING_NEW.
+
+The new getsockopt(SO_TIMESTAMPING_NEW) is stricter, returning flags
+only if they have been set through the same option.
+
+Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW")
+Link: https://lore.kernel.org/lkml/20230703175048.151683-1-jthinz@mailbox.tu-berlin.de/
+Link: https://lore.kernel.org/netdev/0d7cddc9-03fa-43db-a579-14f3e822615b@app.fastmail.com/
+Signed-off-by: Jörn-Thorben Hinz <jthinz@mailbox.tu-berlin.de>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 49b7f252ddae4..0d8754ec837dc 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1704,9 +1704,16 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case SO_TIMESTAMPING_OLD:
++      case SO_TIMESTAMPING_NEW:
+               lv = sizeof(v.timestamping);
+-              v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+-              v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
++              /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
++               * returning the flags when they were set through the same option.
++               * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
++               */
++              if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
++                      v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
++                      v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
++              }
+               break;
+       case SO_RCVTIMEO_OLD:
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-implement-missing-so_timestamping_new-cmsg-suppo.patch b/queue-6.1/net-implement-missing-so_timestamping_new-cmsg-suppo.patch
new file mode 100644 (file)
index 0000000..9dc4c47
--- /dev/null
@@ -0,0 +1,40 @@
+From e6b1f3de357f796324e9e623e65680e3c7fff48f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Jan 2024 09:57:44 +0100
+Subject: net: Implement missing SO_TIMESTAMPING_NEW cmsg support
+
+From: Thomas Lange <thomas@corelatus.se>
+
+[ Upstream commit 382a32018b74f407008615e0e831d05ed28e81cd ]
+
+Commit 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") added the new
+socket option SO_TIMESTAMPING_NEW. However, it was never implemented in
+__sock_cmsg_send thus breaking SO_TIMESTAMPING cmsg for platforms using
+SO_TIMESTAMPING_NEW.
+
+Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW")
+Link: https://lore.kernel.org/netdev/6a7281bf-bc4a-4f75-bb88-7011908ae471@app.fastmail.com/
+Signed-off-by: Thomas Lange <thomas@corelatus.se>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/20240104085744.49164-1-thomas@corelatus.se
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 0d8754ec837dc..c50a14a02edd4 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2771,6 +2771,7 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
+               sockc->mark = *(u32 *)CMSG_DATA(cmsg);
+               break;
+       case SO_TIMESTAMPING_OLD:
++      case SO_TIMESTAMPING_NEW:
+               if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+                       return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5-increase-size-of-irq-name-buffer.patch b/queue-6.1/net-mlx5-increase-size-of-irq-name-buffer.patch
new file mode 100644 (file)
index 0000000..aa683dd
--- /dev/null
@@ -0,0 +1,76 @@
+From 2a83821a4f768e3f7e4d98d0b8623c31ade327a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 13:58:43 -0800
+Subject: net/mlx5: Increase size of irq name buffer
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+[ Upstream commit 3338bebfc26a1e2cebbba82a1cf12c0159608e73 ]
+
+Without increased buffer size, will trigger -Wformat-truncation with W=1
+for the snprintf operation writing to the buffer.
+
+    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function 'mlx5_irq_alloc':
+    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:296:7: error: '@pci:' directive output may be truncated writing 5 bytes into a region of size between 1 and 32 [-Werror=format-truncation=]
+      296 |    "%s@pci:%s", name, pci_name(dev->pdev));
+          |       ^~~~~
+    drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:295:2: note: 'snprintf' output 6 or more bytes (assuming 37) into a destination of size 32
+      295 |  snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+          |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      296 |    "%s@pci:%s", name, pci_name(dev->pdev));
+          |    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Fixes: ada9f5d00797 ("IB/mlx5: Fix eq names to display nicely in /proc/interrupts")
+Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Link: https://lore.kernel.org/r/20231114215846.5902-13-saeed@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 6 +++---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 3 +++
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index d136360ac6a98..a6d3fc96e1685 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -25,7 +25,7 @@
+ struct mlx5_irq {
+       struct atomic_notifier_head nh;
+       cpumask_var_t mask;
+-      char name[MLX5_MAX_IRQ_NAME];
++      char name[MLX5_MAX_IRQ_FORMATTED_NAME];
+       struct mlx5_irq_pool *pool;
+       int refcount;
+       u32 index;
+@@ -236,8 +236,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+       else
+               irq_sf_set_name(pool, name, i);
+       ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+-      snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+-               "%s@pci:%s", name, pci_name(dev->pdev));
++      snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
++               MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
+       err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+                         &irq->nh);
+       if (err) {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+index 5c7e68bee43a0..4047179307c4a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+@@ -7,6 +7,9 @@
+ #include <linux/mlx5/driver.h>
+ #define MLX5_MAX_IRQ_NAME (32)
++#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
++#define MLX5_MAX_IRQ_FORMATTED_NAME \
++      (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
+ /* max irq_index is 2047, so four chars */
+ #define MLX5_MAX_IRQ_IDX_CHARS (4)
+ #define MLX5_EQ_REFS_PER_IRQ (2)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-qla3xxx-fix-potential-memleak-in-ql_alloc_buffer.patch b/queue-6.1/net-qla3xxx-fix-potential-memleak-in-ql_alloc_buffer.patch
new file mode 100644 (file)
index 0000000..c35fba3
--- /dev/null
@@ -0,0 +1,44 @@
+From 9c442a6aebc6eef0931aa962bc9c2dc82e4ac4a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Dec 2023 15:02:27 +0800
+Subject: net/qla3xxx: fix potential memleak in ql_alloc_buffer_queues
+
+From: Dinghao Liu <dinghao.liu@zju.edu.cn>
+
+[ Upstream commit 89f45c30172c80e55c887f32f1af8e184124577b ]
+
+When dma_alloc_coherent() fails, we should free qdev->lrg_buf
+to prevent potential memleak.
+
+Fixes: 1357bfcf7106 ("qla3xxx: Dynamically size the rx buffer queue based on the MTU.")
+Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
+Link: https://lore.kernel.org/r/20231227070227.10527-1-dinghao.liu@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qla3xxx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
+index 0d57ffcedf0c6..fc78bc959ded8 100644
+--- a/drivers/net/ethernet/qlogic/qla3xxx.c
++++ b/drivers/net/ethernet/qlogic/qla3xxx.c
+@@ -2591,6 +2591,7 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev)
+       if (qdev->lrg_buf_q_alloc_virt_addr == NULL) {
+               netdev_err(qdev->ndev, "lBufQ failed\n");
++              kfree(qdev->lrg_buf);
+               return -ENOMEM;
+       }
+       qdev->lrg_buf_q_virt_addr = qdev->lrg_buf_q_alloc_virt_addr;
+@@ -2615,6 +2616,7 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev)
+                                 qdev->lrg_buf_q_alloc_size,
+                                 qdev->lrg_buf_q_alloc_virt_addr,
+                                 qdev->lrg_buf_q_alloc_phy_addr);
++              kfree(qdev->lrg_buf);
+               return -ENOMEM;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-ravb-wait-for-operating-mode-to-be-applied.patch b/queue-6.1/net-ravb-wait-for-operating-mode-to-be-applied.patch
new file mode 100644 (file)
index 0000000..09181e2
--- /dev/null
@@ -0,0 +1,181 @@
+From 369ba8d2f5585f0a8e7f716d5dfd513881c4a891 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 10:13:53 +0200
+Subject: net: ravb: Wait for operating mode to be applied
+
+From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+[ Upstream commit 9039cd4c61635b2d541009a7cd5e2cc052402f28 ]
+
+CSR.OPS bits specify the current operating mode and (according to
+documentation) they are updated by HW when the operating mode change
+request is processed. To comply with this check CSR.OPS before proceeding.
+
+Commit introduces ravb_set_opmode() that does all the necessities for
+setting the operating mode (set CCC.OPC (and CCC.GAC, CCC.CSEL, if any) and
+wait for CSR.OPS) and call it where needed. This should comply with all the
+HW manuals requirements as different manual variants specify that different
+modes need to be checked in CSR.OPS when setting CCC.OPC.
+
+If gPTP active in config mode is supported and it needs to be enabled, the
+CCC.GAC and CCC.CSEL needs to be configured along with CCC.OPC in the same
+write access. For this, ravb_set_opmode() allows passing GAC and CSEL as
+part of opmode and the function updates accordingly CCC register.
+
+Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
+Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c | 65 +++++++++++++++---------
+ 1 file changed, 42 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 68cb5616ef991..c2c56a5289caf 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -68,16 +68,27 @@ int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value)
+       return -ETIMEDOUT;
+ }
+-static int ravb_config(struct net_device *ndev)
++static int ravb_set_opmode(struct net_device *ndev, u32 opmode)
+ {
++      u32 csr_ops = 1U << (opmode & CCC_OPC);
++      u32 ccc_mask = CCC_OPC;
+       int error;
+-      /* Set config mode */
+-      ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
+-      /* Check if the operating mode is changed to the config mode */
+-      error = ravb_wait(ndev, CSR, CSR_OPS, CSR_OPS_CONFIG);
+-      if (error)
+-              netdev_err(ndev, "failed to switch device to config mode\n");
++      /* If gPTP active in config mode is supported it needs to be configured
++       * along with CSEL and operating mode in the same access. This is a
++       * hardware limitation.
++       */
++      if (opmode & CCC_GAC)
++              ccc_mask |= CCC_GAC | CCC_CSEL;
++
++      /* Set operating mode */
++      ravb_modify(ndev, CCC, ccc_mask, opmode);
++      /* Check if the operating mode is changed to the requested one */
++      error = ravb_wait(ndev, CSR, CSR_OPS, csr_ops);
++      if (error) {
++              netdev_err(ndev, "failed to switch device to requested mode (%u)\n",
++                         opmode & CCC_OPC);
++      }
+       return error;
+ }
+@@ -675,7 +686,7 @@ static int ravb_dmac_init(struct net_device *ndev)
+       int error;
+       /* Set CONFIG mode */
+-      error = ravb_config(ndev);
++      error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+       if (error)
+               return error;
+@@ -684,9 +695,7 @@ static int ravb_dmac_init(struct net_device *ndev)
+               return error;
+       /* Setting the control will start the AVB-DMAC process. */
+-      ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION);
+-
+-      return 0;
++      return ravb_set_opmode(ndev, CCC_OPC_OPERATION);
+ }
+ static void ravb_get_tx_tstamp(struct net_device *ndev)
+@@ -1048,7 +1057,7 @@ static int ravb_stop_dma(struct net_device *ndev)
+               return error;
+       /* Stop AVB-DMAC process */
+-      return ravb_config(ndev);
++      return ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ }
+ /* E-MAC interrupt handler */
+@@ -2576,21 +2585,25 @@ static int ravb_set_gti(struct net_device *ndev)
+       return 0;
+ }
+-static void ravb_set_config_mode(struct net_device *ndev)
++static int ravb_set_config_mode(struct net_device *ndev)
+ {
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
++      int error;
+       if (info->gptp) {
+-              ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
++              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
++              if (error)
++                      return error;
+               /* Set CSEL value */
+               ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+       } else if (info->ccc_gac) {
+-              ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+-                          CCC_GAC | CCC_CSEL_HPB);
++              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB);
+       } else {
+-              ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
++              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+       }
++
++      return error;
+ }
+ /* Set tx and rx clock internal delay modes */
+@@ -2810,7 +2823,9 @@ static int ravb_probe(struct platform_device *pdev)
+       ndev->ethtool_ops = &ravb_ethtool_ops;
+       /* Set AVB config mode */
+-      ravb_set_config_mode(ndev);
++      error = ravb_set_config_mode(ndev);
++      if (error)
++              goto out_disable_gptp_clk;
+       if (info->gptp || info->ccc_gac) {
+               /* Set GTI value */
+@@ -2933,8 +2948,7 @@ static int ravb_remove(struct platform_device *pdev)
+       dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
+                         priv->desc_bat_dma);
+-      /* Set reset mode */
+-      ravb_write(ndev, CCC_OPC_RESET, CCC);
++      ravb_set_opmode(ndev, CCC_OPC_RESET);
+       clk_disable_unprepare(priv->gptp_clk);
+       clk_disable_unprepare(priv->refclk);
+@@ -3018,8 +3032,11 @@ static int __maybe_unused ravb_resume(struct device *dev)
+       int ret = 0;
+       /* If WoL is enabled set reset mode to rearm the WoL logic */
+-      if (priv->wol_enabled)
+-              ravb_write(ndev, CCC_OPC_RESET, CCC);
++      if (priv->wol_enabled) {
++              ret = ravb_set_opmode(ndev, CCC_OPC_RESET);
++              if (ret)
++                      return ret;
++      }
+       /* All register have been reset to default values.
+        * Restore all registers which where setup at probe time and
+@@ -3027,7 +3044,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
+        */
+       /* Set AVB config mode */
+-      ravb_set_config_mode(ndev);
++      ret = ravb_set_config_mode(ndev);
++      if (ret)
++              return ret;
+       if (info->gptp || info->ccc_gac) {
+               /* Set GTI value */
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-save-and-restore-msg_namelen-in-sock_sendmsg.patch b/queue-6.1/net-save-and-restore-msg_namelen-in-sock_sendmsg.patch
new file mode 100644 (file)
index 0000000..49b0447
--- /dev/null
@@ -0,0 +1,55 @@
+From 1c052bf518018a0db7e7a4b8e3f63445d941d7b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Dec 2023 09:12:30 -0400
+Subject: net: Save and restore msg_namelen in sock_sendmsg
+
+From: Marc Dionne <marc.dionne@auristor.com>
+
+[ Upstream commit 01b2885d9415152bcb12ff1f7788f500a74ea0ed ]
+
+Commit 86a7e0b69bd5 ("net: prevent rewrite of msg_name in
+sock_sendmsg()") made sock_sendmsg save the incoming msg_name pointer
+and restore it before returning, to insulate the caller against
+msg_name being changed by the called code.  If the address length
+was also changed however, we may return with an inconsistent structure
+where the length doesn't match the address, and attempts to reuse it may
+lead to lost packets.
+
+For example, a kernel that doesn't have commit 1c5950fc6fe9 ("udp6: fix
+potential access to stale information") will replace a v4 mapped address
+with its ipv4 equivalent, and shorten namelen accordingly from 28 to 16.
+If the caller attempts to reuse the resulting msg structure, it will have
+the original ipv6 (v4 mapped) address but an incorrect v4 length.
+
+Fixes: 86a7e0b69bd5 ("net: prevent rewrite of msg_name in sock_sendmsg()")
+Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/socket.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/socket.c b/net/socket.c
+index 07470724e7358..0104617b440dc 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -740,6 +740,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg)
+ {
+       struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
+       struct sockaddr_storage address;
++      int save_len = msg->msg_namelen;
+       int ret;
+       if (msg->msg_name) {
+@@ -749,6 +750,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg)
+       ret = __sock_sendmsg(sock, msg);
+       msg->msg_name = save_addr;
++      msg->msg_namelen = save_len;
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-act_ct-fix-promotion-of-offloaded-unreplie.patch b/queue-6.1/net-sched-act_ct-fix-promotion-of-offloaded-unreplie.patch
new file mode 100644 (file)
index 0000000..19b468d
--- /dev/null
@@ -0,0 +1,158 @@
+From 14e25d537fb93353328283053064f4589dcff379 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Jun 2023 15:22:59 +0300
+Subject: net/sched: act_ct: Fix promotion of offloaded unreplied tuple
+
+From: Paul Blakey <paulb@nvidia.com>
+
+[ Upstream commit 41f2c7c342d3adb1c4dd5f2e3dd831adff16a669 ]
+
+Currently UNREPLIED and UNASSURED connections are added to the nf flow
+table. This causes the following connection packets to be processed
+by the flow table which then skips conntrack_in(), and thus such the
+connections will remain UNREPLIED and UNASSURED even if reply traffic
+is then seen. Even still, the unoffloaded reply packets are the ones
+triggering hardware update from new to established state, and if
+there aren't any to triger an update and/or previous update was
+missed, hardware can get out of sync with sw and still mark
+packets as new.
+
+Fix the above by:
+1) Not skipping conntrack_in() for UNASSURED packets, but still
+   refresh for hardware, as before the cited patch.
+2) Try and force a refresh by reply-direction packets that update
+   the hardware rules from new to established state.
+3) Remove any bidirectional flows that didn't failed to update in
+   hardware for re-insertion as bidrectional once any new packet
+   arrives.
+
+Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections")
+Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Paul Blakey <paulb@nvidia.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Link: https://lore.kernel.org/r/1686313379-117663-1-git-send-email-paulb@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h |  2 +-
+ net/netfilter/nf_flow_table_core.c    | 13 ++++++++++---
+ net/netfilter/nf_flow_table_ip.c      |  4 ++--
+ net/sched/act_ct.c                    |  9 ++++++++-
+ 4 files changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index ebb28ec5b6faf..f37f9f34430c1 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -268,7 +268,7 @@ int flow_offload_route_init(struct flow_offload *flow,
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+ void flow_offload_refresh(struct nf_flowtable *flow_table,
+-                        struct flow_offload *flow);
++                        struct flow_offload *flow, bool force);
+ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+                                                    struct flow_offload_tuple *tuple);
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index 81c26a96c30bb..baddb93a5e8cf 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -314,12 +314,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+ void flow_offload_refresh(struct nf_flowtable *flow_table,
+-                        struct flow_offload *flow)
++                        struct flow_offload *flow, bool force)
+ {
+       u32 timeout;
+       timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+-      if (timeout - READ_ONCE(flow->timeout) > HZ)
++      if (force || timeout - READ_ONCE(flow->timeout) > HZ)
+               WRITE_ONCE(flow->timeout, timeout);
+       else
+               return;
+@@ -331,6 +331,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_refresh);
++static bool nf_flow_is_outdated(const struct flow_offload *flow)
++{
++      return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
++              !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
++}
++
+ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ {
+       return nf_flow_timeout_delta(flow->timeout) <= 0;
+@@ -420,7 +426,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
+                                   struct flow_offload *flow, void *data)
+ {
+       if (nf_flow_has_expired(flow) ||
+-          nf_ct_is_dying(flow->ct))
++          nf_ct_is_dying(flow->ct) ||
++          nf_flow_is_outdated(flow))
+               flow_offload_teardown(flow);
+       if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index b350fe9d00b0b..6feaac9ab05c8 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+       if (skb_try_make_writable(skb, thoff + hdrsize))
+               return NF_DROP;
+-      flow_offload_refresh(flow_table, flow);
++      flow_offload_refresh(flow_table, flow, false);
+       nf_flow_encap_pop(skb, tuplehash);
+       thoff -= offset;
+@@ -646,7 +646,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+       if (skb_try_make_writable(skb, thoff + hdrsize))
+               return NF_DROP;
+-      flow_offload_refresh(flow_table, flow);
++      flow_offload_refresh(flow_table, flow, false);
+       nf_flow_encap_pop(skb, tuplehash);
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 3c063065f125f..b80a58d3bf0f3 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -606,6 +606,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+       struct flow_offload_tuple tuple = {};
+       enum ip_conntrack_info ctinfo;
+       struct tcphdr *tcph = NULL;
++      bool force_refresh = false;
+       struct flow_offload *flow;
+       struct nf_conn *ct;
+       u8 dir;
+@@ -643,6 +644,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+                        * established state, then don't refresh.
+                        */
+                       return false;
++              force_refresh = true;
+       }
+       if (tcph && (unlikely(tcph->fin || tcph->rst))) {
+@@ -656,7 +658,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+       else
+               ctinfo = IP_CT_ESTABLISHED_REPLY;
+-      flow_offload_refresh(nf_ft, flow);
++      flow_offload_refresh(nf_ft, flow, force_refresh);
++      if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
++              /* Process this flow in SW to allow promoting to ASSURED */
++              return false;
++      }
++
+       nf_conntrack_get(&ct->ct_general);
+       nf_ct_set(skb, ct, ctinfo);
+       if (nf_ft->flags & NF_FLOWTABLE_COUNTER)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-act_ct-offload-udp-new-connections.patch b/queue-6.1/net-sched-act_ct-offload-udp-new-connections.patch
new file mode 100644 (file)
index 0000000..86b6ea9
--- /dev/null
@@ -0,0 +1,157 @@
+From da420921aeb41458470ef982be25475a762e01c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Feb 2023 17:30:59 +0100
+Subject: net/sched: act_ct: offload UDP NEW connections
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 6a9bad0069cf306f3df6ac53cf02438d4e15f296 ]
+
+Modify the offload algorithm of UDP connections to the following:
+
+- Offload NEW connection as unidirectional.
+
+- When connection state changes to ESTABLISHED also update the hardware
+flow. However, in order to prevent act_ct from spamming offload add wq for
+every packet coming in reply direction in this state verify whether
+connection has already been updated to ESTABLISHED in the drivers. If that
+it the case, then skip flow_table and let conntrack handle such packets
+which will also allow conntrack to potentially promote the connection to
+ASSURED.
+
+- When connection state changes to ASSURED set the flow_table flow
+NF_FLOW_HW_BIDIRECTIONAL flag which will cause refresh mechanism to offload
+the reply direction.
+
+All other protocols have their offload algorithm preserved and are always
+offloaded as bidirectional.
+
+Note that this change tries to minimize the load on flow_table add
+workqueue. First, it tracks the last ctinfo that was offloaded by using new
+flow 'NF_FLOW_HW_ESTABLISHED' flag and doesn't schedule the refresh for
+reply direction packets when the offloads have already been updated with
+current ctinfo. Second, when 'add' task executes on workqueue it always
+update the offload with current flow state (by checking 'bidirectional'
+flow flag and obtaining actual ctinfo/cookie through meta action instead of
+caching any of these from the moment of scheduling the 'add' work)
+preventing the need from scheduling more updates if state changed
+concurrently while the 'add' work was pending on workqueue.
+
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 51 +++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 39 insertions(+), 12 deletions(-)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 86d269724485a..3c063065f125f 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -365,7 +365,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
+                                 struct nf_conn *ct,
+-                                bool tcp)
++                                bool tcp, bool bidirectional)
+ {
+       struct nf_conn_act_ct_ext *act_ct_ext;
+       struct flow_offload *entry;
+@@ -384,6 +384,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
+               ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+               ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+       }
++      if (bidirectional)
++              __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
+       act_ct_ext = nf_conn_act_ct_ext_find(ct);
+       if (act_ct_ext) {
+@@ -407,26 +409,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
+                                          struct nf_conn *ct,
+                                          enum ip_conntrack_info ctinfo)
+ {
+-      bool tcp = false;
+-
+-      if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+-          !test_bit(IPS_ASSURED_BIT, &ct->status))
+-              return;
++      bool tcp = false, bidirectional = true;
+       switch (nf_ct_protonum(ct)) {
+       case IPPROTO_TCP:
+-              tcp = true;
+-              if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
++              if ((ctinfo != IP_CT_ESTABLISHED &&
++                   ctinfo != IP_CT_ESTABLISHED_REPLY) ||
++                  !test_bit(IPS_ASSURED_BIT, &ct->status) ||
++                  ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+                       return;
++
++              tcp = true;
+               break;
+       case IPPROTO_UDP:
++              if (!nf_ct_is_confirmed(ct))
++                      return;
++              if (!test_bit(IPS_ASSURED_BIT, &ct->status))
++                      bidirectional = false;
+               break;
+ #ifdef CONFIG_NF_CT_PROTO_GRE
+       case IPPROTO_GRE: {
+               struct nf_conntrack_tuple *tuple;
+-              if (ct->status & IPS_NAT_MASK)
++              if ((ctinfo != IP_CT_ESTABLISHED &&
++                   ctinfo != IP_CT_ESTABLISHED_REPLY) ||
++                  !test_bit(IPS_ASSURED_BIT, &ct->status) ||
++                  ct->status & IPS_NAT_MASK)
+                       return;
++
+               tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+               /* No support for GRE v1 */
+               if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+@@ -442,7 +452,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
+           ct->status & IPS_SEQ_ADJUST)
+               return;
+-      tcf_ct_flow_table_add(ct_ft, ct, tcp);
++      tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
+ }
+ static bool
+@@ -621,13 +631,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+       ct = flow->ct;
++      if (dir == FLOW_OFFLOAD_DIR_REPLY &&
++          !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
++              /* Only offload reply direction after connection became
++               * assured.
++               */
++              if (test_bit(IPS_ASSURED_BIT, &ct->status))
++                      set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
++              else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
++                      /* If flow_table flow has already been updated to the
++                       * established state, then don't refresh.
++                       */
++                      return false;
++      }
++
+       if (tcph && (unlikely(tcph->fin || tcph->rst))) {
+               flow_offload_teardown(flow);
+               return false;
+       }
+-      ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
+-                                                  IP_CT_ESTABLISHED_REPLY;
++      if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
++              ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
++                      IP_CT_ESTABLISHED : IP_CT_NEW;
++      else
++              ctinfo = IP_CT_ESTABLISHED_REPLY;
+       flow_offload_refresh(nf_ft, flow);
+       nf_conntrack_get(&ct->ct_general);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch b/queue-6.1/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch
new file mode 100644 (file)
index 0000000..cecac2b
--- /dev/null
@@ -0,0 +1,195 @@
+From baf1515c7f3f16801fb896b07d179e5aa3fe924b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Dec 2023 18:25:54 +0100
+Subject: net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc ]
+
+The referenced change added custom cleanup code to act_ct to delete any
+callbacks registered on the parent block when deleting the
+tcf_ct_flow_table instance. However, the underlying issue is that the
+drivers don't obtain the reference to the tcf_ct_flow_table instance when
+registering callbacks which means that not only driver callbacks may still
+be on the table when deleting it but also that the driver can still have
+pointers to its internal nf_flowtable and can use it concurrently which
+results either warning in netfilter[0] or use-after-free.
+
+Fix the issue by taking a reference to the underlying struct
+tcf_ct_flow_table instance when registering the callback and release the
+reference when unregistering. Expose new API required for such reference
+counting by adding two new callbacks to nf_flowtable_type and implementing
+them for act_ct flowtable_ct type. This fixes the issue by extending the
+lifetime of nf_flowtable until all users have unregistered.
+
+[0]:
+[106170.938634] ------------[ cut here ]------------
+[106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm
+ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis
+try overlay mlx5_core
+[106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1
+[106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
+[106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00
+[106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202
+[106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48
+[106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000
+[106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700
+[106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58
+[106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000
+[106170.951616] FS:  0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000
+[106170.952329] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0
+[106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[106170.954766] Call Trace:
+[106170.955057]  <TASK>
+[106170.955315]  ? __warn+0x79/0x120
+[106170.955648]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.956172]  ? report_bug+0x17c/0x190
+[106170.956537]  ? handle_bug+0x3c/0x60
+[106170.956891]  ? exc_invalid_op+0x14/0x70
+[106170.957264]  ? asm_exc_invalid_op+0x16/0x20
+[106170.957666]  ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core]
+[106170.958172]  ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core]
+[106170.958788]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.959339]  ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core]
+[106170.959854]  ? mapping_remove+0x154/0x1d0 [mlx5_core]
+[106170.960342]  ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core]
+[106170.960927]  mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core]
+[106170.961441]  mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core]
+[106170.962001]  mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core]
+[106170.962524]  mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core]
+[106170.963034]  mlx5e_flow_put+0x73/0xe0 [mlx5_core]
+[106170.963506]  mlx5e_put_flow_list+0x38/0x70 [mlx5_core]
+[106170.964002]  mlx5e_rep_update_flows+0xec/0x290 [mlx5_core]
+[106170.964525]  mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core]
+[106170.965056]  process_one_work+0x13a/0x2c0
+[106170.965443]  worker_thread+0x2e5/0x3f0
+[106170.965808]  ? rescuer_thread+0x410/0x410
+[106170.966192]  kthread+0xc6/0xf0
+[106170.966515]  ? kthread_complete_and_exit+0x20/0x20
+[106170.966970]  ret_from_fork+0x2d/0x50
+[106170.967332]  ? kthread_complete_and_exit+0x20/0x20
+[106170.967774]  ret_from_fork_asm+0x11/0x20
+[106170.970466]  </TASK>
+[106170.970726] ---[ end trace 0000000000000000 ]---
+
+Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h | 10 ++++++++
+ net/sched/act_ct.c                    | 34 ++++++++++++++++++++++-----
+ 2 files changed, 38 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index 0b163ead95c9f..dde4dd9c4012c 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -62,6 +62,8 @@ struct nf_flowtable_type {
+                                                 enum flow_offload_tuple_dir dir,
+                                                 struct nf_flow_rule *flow_rule);
+       void                            (*free)(struct nf_flowtable *ft);
++      void                            (*get)(struct nf_flowtable *ft);
++      void                            (*put)(struct nf_flowtable *ft);
+       nf_hookfn                       *hook;
+       struct module                   *owner;
+ };
+@@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
+       }
+       list_add_tail(&block_cb->list, &block->cb_list);
++      up_write(&flow_table->flow_block_lock);
++
++      if (flow_table->type->get)
++              flow_table->type->get(flow_table);
++      return 0;
+ unlock:
+       up_write(&flow_table->flow_block_lock);
+@@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
+               WARN_ON(true);
+       }
+       up_write(&flow_table->flow_block_lock);
++
++      if (flow_table->type->put)
++              flow_table->type->put(flow_table);
+ }
+ int flow_offload_route_init(struct flow_offload *flow,
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 4d34474f2cc0e..faf798133059b 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -280,9 +280,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
+              !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+ }
++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
++
++static void tcf_ct_nf_get(struct nf_flowtable *ft)
++{
++      struct tcf_ct_flow_table *ct_ft =
++              container_of(ft, struct tcf_ct_flow_table, nf_ft);
++
++      tcf_ct_flow_table_get_ref(ct_ft);
++}
++
++static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
++
++static void tcf_ct_nf_put(struct nf_flowtable *ft)
++{
++      struct tcf_ct_flow_table *ct_ft =
++              container_of(ft, struct tcf_ct_flow_table, nf_ft);
++
++      tcf_ct_flow_table_put(ct_ft);
++}
++
+ static struct nf_flowtable_type flowtable_ct = {
+       .gc             = tcf_ct_flow_is_outdated,
+       .action         = tcf_ct_flow_table_fill_actions,
++      .get            = tcf_ct_nf_get,
++      .put            = tcf_ct_nf_put,
+       .owner          = THIS_MODULE,
+ };
+@@ -331,9 +353,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
+       return err;
+ }
++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
++{
++      refcount_inc(&ct_ft->ref);
++}
++
+ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+ {
+-      struct flow_block_cb *block_cb, *tmp_cb;
+       struct tcf_ct_flow_table *ct_ft;
+       struct flow_block *block;
+@@ -341,13 +367,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+                            rwork);
+       nf_flow_table_free(&ct_ft->nf_ft);
+-      /* Remove any remaining callbacks before cleanup */
+       block = &ct_ft->nf_ft.flow_block;
+       down_write(&ct_ft->nf_ft.flow_block_lock);
+-      list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
+-              list_del(&block_cb->list);
+-              flow_block_cb_free(block_cb);
+-      }
++      WARN_ON(!list_empty(&block->cb_list));
+       up_write(&ct_ft->nf_ft.flow_block_lock);
+       kfree(ct_ft);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-call-tcf_ct_params_free-to-free-params-in-.patch b/queue-6.1/net-sched-call-tcf_ct_params_free-to-free-params-in-.patch
new file mode 100644 (file)
index 0000000..827f58b
--- /dev/null
@@ -0,0 +1,112 @@
+From 89293e3bc421a92dfd4935a5bec34d30ab89aba1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Nov 2022 15:34:16 -0500
+Subject: net: sched: call tcf_ct_params_free to free params in tcf_ct_init
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 1913894100ca53205f2d56091cb34b8eba1de217 ]
+
+This patch is to make the err path simple by calling tcf_ct_params_free(),
+so that it won't cause problems when more members are added into param and
+need freeing on the err path.
+
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 35 ++++++++++++++++++-----------------
+ 1 file changed, 18 insertions(+), 17 deletions(-)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 4c7f7861ea967..478cedc29b737 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -345,11 +345,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+       module_put(THIS_MODULE);
+ }
+-static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
++static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft)
+ {
+-      struct tcf_ct_flow_table *ct_ft = params->ct_ft;
+-
+-      if (refcount_dec_and_test(&params->ct_ft->ref)) {
++      if (refcount_dec_and_test(&ct_ft->ref)) {
+               rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
+               INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
+               queue_rcu_work(act_ct_wq, &ct_ft->rwork);
+@@ -832,18 +830,23 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+       return err;
+ }
+-static void tcf_ct_params_free(struct rcu_head *head)
++static void tcf_ct_params_free(struct tcf_ct_params *params)
+ {
+-      struct tcf_ct_params *params = container_of(head,
+-                                                  struct tcf_ct_params, rcu);
+-
+-      tcf_ct_flow_table_put(params);
+-
++      if (params->ct_ft)
++              tcf_ct_flow_table_put(params->ct_ft);
+       if (params->tmpl)
+               nf_ct_put(params->tmpl);
+       kfree(params);
+ }
++static void tcf_ct_params_free_rcu(struct rcu_head *head)
++{
++      struct tcf_ct_params *params;
++
++      params = container_of(head, struct tcf_ct_params, rcu);
++      tcf_ct_params_free(params);
++}
++
+ #if IS_ENABLED(CONFIG_NF_NAT)
+ /* Modelled after nf_nat_ipv[46]_fn().
+  * range is only used for new, uninitialized NAT state.
+@@ -1390,7 +1393,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
+       err = tcf_ct_flow_table_get(net, params);
+       if (err)
+-              goto cleanup_params;
++              goto cleanup;
+       spin_lock_bh(&c->tcf_lock);
+       goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+@@ -1401,17 +1404,15 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
+       if (goto_ch)
+               tcf_chain_put_by_act(goto_ch);
+       if (params)
+-              call_rcu(&params->rcu, tcf_ct_params_free);
++              call_rcu(&params->rcu, tcf_ct_params_free_rcu);
+       return res;
+-cleanup_params:
+-      if (params->tmpl)
+-              nf_ct_put(params->tmpl);
+ cleanup:
+       if (goto_ch)
+               tcf_chain_put_by_act(goto_ch);
+-      kfree(params);
++      if (params)
++              tcf_ct_params_free(params);
+       tcf_idr_release(*a, bind);
+       return err;
+ }
+@@ -1423,7 +1424,7 @@ static void tcf_ct_cleanup(struct tc_action *a)
+       params = rcu_dereference_protected(c->params, 1);
+       if (params)
+-              call_rcu(&params->rcu, tcf_ct_params_free);
++              call_rcu(&params->rcu, tcf_ct_params_free_rcu);
+ }
+ static int tcf_ct_dump_key_val(struct sk_buff *skb,
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-em_text-fix-possible-memory-leak-in-em_tex.patch b/queue-6.1/net-sched-em_text-fix-possible-memory-leak-in-em_tex.patch
new file mode 100644 (file)
index 0000000..b4907c2
--- /dev/null
@@ -0,0 +1,40 @@
+From 93c23c768858a1ae196116f045b4c1ff98e4e843 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Dec 2023 10:25:31 +0800
+Subject: net: sched: em_text: fix possible memory leak in em_text_destroy()
+
+From: Hangyu Hua <hbh25y@gmail.com>
+
+[ Upstream commit 8fcb0382af6f1ef50936f1be05b8149eb2f88496 ]
+
+m->data needs to be freed when em_text_destroy is called.
+
+Fixes: d675c989ed2d ("[PKT_SCHED]: Packet classification based on textsearch (ematch)")
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/em_text.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/em_text.c b/net/sched/em_text.c
+index 6f3c1fb2fb44c..f176afb70559e 100644
+--- a/net/sched/em_text.c
++++ b/net/sched/em_text.c
+@@ -97,8 +97,10 @@ static int em_text_change(struct net *net, void *data, int len,
+ static void em_text_destroy(struct tcf_ematch *m)
+ {
+-      if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
++      if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) {
+               textsearch_destroy(EM_TEXT_PRIV(m)->config);
++              kfree(EM_TEXT_PRIV(m));
++      }
+ }
+ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-smc-fix-invalid-link-access-in-dumping-smc-r-con.patch b/queue-6.1/net-smc-fix-invalid-link-access-in-dumping-smc-r-con.patch
new file mode 100644 (file)
index 0000000..98bc154
--- /dev/null
@@ -0,0 +1,91 @@
+From a55dfee1f458e66ebb434d93453283be3b49b991 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Dec 2023 15:40:35 +0800
+Subject: net/smc: fix invalid link access in dumping SMC-R connections
+
+From: Wen Gu <guwen@linux.alibaba.com>
+
+[ Upstream commit 9dbe086c69b8902c85cece394760ac212e9e4ccc ]
+
+A crash was found when dumping SMC-R connections. It can be reproduced
+by following steps:
+
+- environment: two RNICs on both sides.
+- run SMC-R between two sides, now a SMC_LGR_SYMMETRIC type link group
+  will be created.
+- set the first RNIC down on either side and link group will turn to
+  SMC_LGR_ASYMMETRIC_LOCAL then.
+- run 'smcss -R' and the crash will be triggered.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000010
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 8000000101fdd067 P4D 8000000101fdd067 PUD 10ce46067 PMD 0
+ Oops: 0000 [#1] PREEMPT SMP PTI
+ CPU: 3 PID: 1810 Comm: smcss Kdump: loaded Tainted: G W   E      6.7.0-rc6+ #51
+ RIP: 0010:__smc_diag_dump.constprop.0+0x36e/0x620 [smc_diag]
+ Call Trace:
+  <TASK>
+  ? __die+0x24/0x70
+  ? page_fault_oops+0x66/0x150
+  ? exc_page_fault+0x69/0x140
+  ? asm_exc_page_fault+0x26/0x30
+  ? __smc_diag_dump.constprop.0+0x36e/0x620 [smc_diag]
+  smc_diag_dump_proto+0xd0/0xf0 [smc_diag]
+  smc_diag_dump+0x26/0x60 [smc_diag]
+  netlink_dump+0x19f/0x320
+  __netlink_dump_start+0x1dc/0x300
+  smc_diag_handler_dump+0x6a/0x80 [smc_diag]
+  ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag]
+  sock_diag_rcv_msg+0x121/0x140
+  ? __pfx_sock_diag_rcv_msg+0x10/0x10
+  netlink_rcv_skb+0x5a/0x110
+  sock_diag_rcv+0x28/0x40
+  netlink_unicast+0x22a/0x330
+  netlink_sendmsg+0x240/0x4a0
+  __sock_sendmsg+0xb0/0xc0
+  ____sys_sendmsg+0x24e/0x300
+  ? copy_msghdr_from_user+0x62/0x80
+  ___sys_sendmsg+0x7c/0xd0
+  ? __do_fault+0x34/0x1a0
+  ? do_read_fault+0x5f/0x100
+  ? do_fault+0xb0/0x110
+  __sys_sendmsg+0x4d/0x80
+  do_syscall_64+0x45/0xf0
+  entry_SYSCALL_64_after_hwframe+0x6e/0x76
+
+When the first RNIC is set down, the lgr->lnk[0] will be cleared and an
+asymmetric link will be allocated in lgr->link[SMC_LINKS_PER_LGR_MAX - 1]
+by smc_llc_alloc_alt_link(). Then when we try to dump SMC-R connections
+in __smc_diag_dump(), the invalid lgr->lnk[0] will be accessed, resulting
+in this issue. So fix it by accessing the right link.
+
+Fixes: f16a7dd5cf27 ("smc: netlink interface for SMC sockets")
+Reported-by: henaumars <henaumars@sina.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7616
+Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
+Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
+Link: https://lore.kernel.org/r/1703662835-53416-1-git-send-email-guwen@linux.alibaba.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc_diag.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
+index 80ea7d954eceb..801044e7d1949 100644
+--- a/net/smc/smc_diag.c
++++ b/net/smc/smc_diag.c
+@@ -153,8 +153,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
+                       .lnk[0].link_id = link->link_id,
+               };
+-              memcpy(linfo.lnk[0].ibname,
+-                     smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
++              memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name,
+                      sizeof(link->smcibdev->ibdev->name));
+               smc_gid_be16_convert(linfo.lnk[0].gid, link->gid);
+               smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-timestamp-extend-sof_timestamping_opt_id-to-hw-t.patch b/queue-6.1/net-timestamp-extend-sof_timestamping_opt_id-to-hw-t.patch
new file mode 100644 (file)
index 0000000..f052b5e
--- /dev/null
@@ -0,0 +1,52 @@
+From f34a1a0c97dbe98c13f2e62a01b50c39a1ff419d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 08:07:38 -0800
+Subject: net-timestamp: extend SOF_TIMESTAMPING_OPT_ID to HW timestamps
+
+From: Vadim Fedorenko <vadfed@meta.com>
+
+[ Upstream commit 8ca5a5790b9a1ce147484d2a2c4e66d2553f3d6c ]
+
+When the feature was added it was enabled for SW timestamps only but
+with current hardware the same out-of-order timestamps can be seen.
+Let's expand the area for the feature to all types of timestamps.
+
+Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 7f6ca95d16b9 ("net: Implement missing getsockopt(SO_TIMESTAMPING_NEW)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/ip_output.c  | 2 +-
+ net/ipv6/ip6_output.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 493c679ea54f3..d8ec802f97524 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -990,7 +990,7 @@ static int __ip_append_data(struct sock *sk,
+       mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+       paged = !!cork->gso_size;
+-      if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
++      if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 3c2b2a85de367..04822e2cba74a 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1506,7 +1506,7 @@ static int __ip6_append_data(struct sock *sk,
+       mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
+       orig_mtu = mtu;
+-      if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
++      if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-flowtable-allow-unidirectional-rules.patch b/queue-6.1/netfilter-flowtable-allow-unidirectional-rules.patch
new file mode 100644 (file)
index 0000000..fcb1839
--- /dev/null
@@ -0,0 +1,76 @@
+From de10b8ea976d0c729b8d59713e03fe511557d6b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Feb 2023 17:30:56 +0100
+Subject: netfilter: flowtable: allow unidirectional rules
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 8f84780b84d645d6e35467f4a6f3236b20d7f4b2 ]
+
+Modify flow table offload to support unidirectional connections by
+extending enum nf_flow_flags with new "NF_FLOW_HW_BIDIRECTIONAL" flag. Only
+offload reply direction when the flag is set. This infrastructure change is
+necessary to support offloading UDP NEW connections in original direction
+in following patches in series.
+
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h |  1 +
+ net/netfilter/nf_flow_table_offload.c | 12 ++++++++----
+ 2 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index cd982f4a0f50c..88ab98ab41d9f 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -164,6 +164,7 @@ enum nf_flow_flags {
+       NF_FLOW_HW_DYING,
+       NF_FLOW_HW_DEAD,
+       NF_FLOW_HW_PENDING,
++      NF_FLOW_HW_BIDIRECTIONAL,
+ };
+ enum flow_offload_type {
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index 4d9b99abe37d6..8b852f10fab4b 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
+       ok_count += flow_offload_tuple_add(offload, flow_rule[0],
+                                          FLOW_OFFLOAD_DIR_ORIGINAL);
+-      ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+-                                         FLOW_OFFLOAD_DIR_REPLY);
++      if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
++              ok_count += flow_offload_tuple_add(offload, flow_rule[1],
++                                                 FLOW_OFFLOAD_DIR_REPLY);
+       if (ok_count == 0)
+               return -ENOENT;
+@@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
+ {
+       clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+       flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+-      flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
++      if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
++              flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+       set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
+ }
+@@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
+       u64 lastused;
+       flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+-      flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
++      if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
++              flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
++                                       &stats[1]);
+       lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
+       offload->flow->timeout = max_t(u64, offload->flow->timeout,
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-flowtable-cache-info-of-last-offload.patch b/queue-6.1/netfilter-flowtable-cache-info-of-last-offload.patch
new file mode 100644 (file)
index 0000000..f468da9
--- /dev/null
@@ -0,0 +1,171 @@
+From aa8689eb8935d603a6f52824c4f47b3279e22da5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Feb 2023 17:30:57 +0100
+Subject: netfilter: flowtable: cache info of last offload
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 1a441a9b8be8849957a01413a144f84932c324cb ]
+
+Modify flow table offload to cache the last ct info status that was passed
+to the driver offload callbacks by extending enum nf_flow_flags with new
+"NF_FLOW_HW_ESTABLISHED" flag. Set the flag if ctinfo was 'established'
+during last act_ct meta actions fill call. This infrastructure change is
+necessary to optimize promoting of UDP connections from 'new' to
+'established' in following patches in this series.
+
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h |  7 ++++---
+ net/netfilter/nf_flow_table_inet.c    |  2 +-
+ net/netfilter/nf_flow_table_offload.c |  6 +++---
+ net/sched/act_ct.c                    | 12 +++++++-----
+ 4 files changed, 15 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index 88ab98ab41d9f..ebb28ec5b6faf 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -57,7 +57,7 @@ struct nf_flowtable_type {
+                                                struct net_device *dev,
+                                                enum flow_block_command cmd);
+       int                             (*action)(struct net *net,
+-                                                const struct flow_offload *flow,
++                                                struct flow_offload *flow,
+                                                 enum flow_offload_tuple_dir dir,
+                                                 struct nf_flow_rule *flow_rule);
+       void                            (*free)(struct nf_flowtable *ft);
+@@ -165,6 +165,7 @@ enum nf_flow_flags {
+       NF_FLOW_HW_DEAD,
+       NF_FLOW_HW_PENDING,
+       NF_FLOW_HW_BIDIRECTIONAL,
++      NF_FLOW_HW_ESTABLISHED,
+ };
+ enum flow_offload_type {
+@@ -313,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
+ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+                               struct net_device *dev,
+                               enum flow_block_command cmd);
+-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
+                           enum flow_offload_tuple_dir dir,
+                           struct nf_flow_rule *flow_rule);
+-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
+                           enum flow_offload_tuple_dir dir,
+                           struct nf_flow_rule *flow_rule);
+diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
+index 0ccabf3fa6aa3..9505f9d188ff2 100644
+--- a/net/netfilter/nf_flow_table_inet.c
++++ b/net/netfilter/nf_flow_table_inet.c
+@@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+ }
+ static int nf_flow_rule_route_inet(struct net *net,
+-                                 const struct flow_offload *flow,
++                                 struct flow_offload *flow,
+                                  enum flow_offload_tuple_dir dir,
+                                  struct nf_flow_rule *flow_rule)
+ {
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index 8b852f10fab4b..1c26f03fc6617 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
+       return 0;
+ }
+-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
+                           enum flow_offload_tuple_dir dir,
+                           struct nf_flow_rule *flow_rule)
+ {
+@@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
+-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
+                           enum flow_offload_tuple_dir dir,
+                           struct nf_flow_rule *flow_rule)
+ {
+@@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
+ {
+       const struct nf_flowtable *flowtable = offload->flowtable;
+       const struct flow_offload_tuple *tuple, *other_tuple;
+-      const struct flow_offload *flow = offload->flow;
++      struct flow_offload *flow = offload->flow;
+       struct dst_entry *other_dst = NULL;
+       struct nf_flow_rule *flow_rule;
+       int err = -ENOMEM;
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 478cedc29b737..86d269724485a 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -168,11 +168,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
+ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
+                                             enum ip_conntrack_dir dir,
++                                            enum ip_conntrack_info ctinfo,
+                                             struct flow_action *action)
+ {
+       struct nf_conn_labels *ct_labels;
+       struct flow_action_entry *entry;
+-      enum ip_conntrack_info ctinfo;
+       u32 *act_ct_labels;
+       entry = tcf_ct_flow_table_flow_action_get_next(action);
+@@ -180,8 +180,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+       entry->ct_metadata.mark = READ_ONCE(ct->mark);
+ #endif
+-      ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
+-                                           IP_CT_ESTABLISHED_REPLY;
+       /* aligns with the CT reference on the SKB nf_ct_set */
+       entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
+       entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
+@@ -235,22 +233,26 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
+ }
+ static int tcf_ct_flow_table_fill_actions(struct net *net,
+-                                        const struct flow_offload *flow,
++                                        struct flow_offload *flow,
+                                         enum flow_offload_tuple_dir tdir,
+                                         struct nf_flow_rule *flow_rule)
+ {
+       struct flow_action *action = &flow_rule->rule->action;
+       int num_entries = action->num_entries;
+       struct nf_conn *ct = flow->ct;
++      enum ip_conntrack_info ctinfo;
+       enum ip_conntrack_dir dir;
+       int i, err;
+       switch (tdir) {
+       case FLOW_OFFLOAD_DIR_ORIGINAL:
+               dir = IP_CT_DIR_ORIGINAL;
++              ctinfo = IP_CT_ESTABLISHED;
++              set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+               break;
+       case FLOW_OFFLOAD_DIR_REPLY:
+               dir = IP_CT_DIR_REPLY;
++              ctinfo = IP_CT_ESTABLISHED_REPLY;
+               break;
+       default:
+               return -EOPNOTSUPP;
+@@ -260,7 +262,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
+       if (err)
+               goto err_nat;
+-      tcf_ct_flow_table_add_action_meta(ct, dir, action);
++      tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
+       return 0;
+ err_nat:
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-flowtable-gc-pushes-back-packets-to-classi.patch b/queue-6.1/netfilter-flowtable-gc-pushes-back-packets-to-classi.patch
new file mode 100644 (file)
index 0000000..1c70921
--- /dev/null
@@ -0,0 +1,104 @@
+From 0449b478e6c121959ee093763e1b856302a2a0bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Oct 2023 21:09:47 +0200
+Subject: netfilter: flowtable: GC pushes back packets to classic path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 735795f68b37e9bb49f642407a0d49b1631ea1c7 ]
+
+Since 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded
+unreplied tuple"), flowtable GC pushes back flows with IPS_SEEN_REPLY
+back to classic path in every run, ie. every second. This is because of
+a new check for NF_FLOW_HW_ESTABLISHED which is specific of sched/act_ct.
+
+In Netfilter's flowtable case, NF_FLOW_HW_ESTABLISHED never gets set on
+and IPS_SEEN_REPLY is unreliable since users decide when to offload the
+flow before, such bit might be set on at a later stage.
+
+Fix it by adding a custom .gc handler that sched/act_ct can use to
+deal with its NF_FLOW_HW_ESTABLISHED bit.
+
+Fixes: 41f2c7c342d3 ("net/sched: act_ct: Fix promotion of offloaded unreplied tuple")
+Reported-by: Vladimir Smelhaus <vl.sm@email.cz>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Stable-dep-of: 125f1c7f26ff ("net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h |  1 +
+ net/netfilter/nf_flow_table_core.c    | 14 +++++++-------
+ net/sched/act_ct.c                    |  7 +++++++
+ 3 files changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index f37f9f34430c1..0b163ead95c9f 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -53,6 +53,7 @@ struct nf_flowtable_type {
+       struct list_head                list;
+       int                             family;
+       int                             (*init)(struct nf_flowtable *ft);
++      bool                            (*gc)(const struct flow_offload *flow);
+       int                             (*setup)(struct nf_flowtable *ft,
+                                                struct net_device *dev,
+                                                enum flow_block_command cmd);
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index baddb93a5e8cf..c1d99cb370b44 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -331,12 +331,6 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_refresh);
+-static bool nf_flow_is_outdated(const struct flow_offload *flow)
+-{
+-      return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+-              !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+-}
+-
+ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ {
+       return nf_flow_timeout_delta(flow->timeout) <= 0;
+@@ -422,12 +416,18 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+       return err;
+ }
++static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
++                            const struct flow_offload *flow)
++{
++      return flow_table->type->gc && flow_table->type->gc(flow);
++}
++
+ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
+                                   struct flow_offload *flow, void *data)
+ {
+       if (nf_flow_has_expired(flow) ||
+           nf_ct_is_dying(flow->ct) ||
+-          nf_flow_is_outdated(flow))
++          nf_flow_custom_gc(flow_table, flow))
+               flow_offload_teardown(flow);
+       if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index b80a58d3bf0f3..4d34474f2cc0e 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -274,7 +274,14 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
+       return err;
+ }
++static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
++{
++      return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
++             !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
++}
++
+ static struct nf_flowtable_type flowtable_ct = {
++      .gc             = tcf_ct_flow_is_outdated,
+       .action         = tcf_ct_flow_table_fill_actions,
+       .owner          = THIS_MODULE,
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-set-transport-offset-from-mac-he.patch b/queue-6.1/netfilter-nf_tables-set-transport-offset-from-mac-he.patch
new file mode 100644 (file)
index 0000000..6ded120
--- /dev/null
@@ -0,0 +1,75 @@
+From 56cc1e9b5b7e464b9e998329d7173330be70efb2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Dec 2023 11:50:12 +0100
+Subject: netfilter: nf_tables: set transport offset from mac header for
+ netdev/egress
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 0ae8e4cca78781401b17721bfb72718fdf7b4912 ]
+
+Before this patch, transport offset (pkt->thoff) provides an offset
+relative to the network header. This is fine for the inet families
+because skb->data points to the network header in such case. However,
+from netdev/egress, skb->data points to the mac header (if available),
+thus, pkt->thoff is missing the mac header length.
+
+Add skb_network_offset() to the transport offset (pkt->thoff) for
+netdev, so transport header mangling works as expected. Adjust payload
+fast eval function to use skb->data now that pkt->thoff provides an
+absolute offset. This explains why users report that matching on
+egress/netdev works but payload mangling does not.
+
+This patch implicitly fixes payload mangling for IPv4 packets in
+netdev/egress given skb_store_bits() requires an offset from skb->data
+to reach the transport header.
+
+I suspect that nft_exthdr and the trace infra were also broken from
+netdev/egress because they also take skb->data as start, and pkt->thoff
+was not correct.
+
+Note that IPv6 is fine because ipv6_find_hdr() already provides a
+transport offset starting from skb->data, which includes
+skb_network_offset().
+
+The bridge family also uses nft_set_pktinfo_ipv4_validate(), but there
+skb_network_offset() is zero, so the update in this patch does not alter
+the existing behaviour.
+
+Fixes: 42df6e1d221d ("netfilter: Introduce egress hook")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables_ipv4.h | 2 +-
+ net/netfilter/nf_tables_core.c         | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
+index d8f6cb47ebe37..5225d2bd1a6e9 100644
+--- a/include/net/netfilter/nf_tables_ipv4.h
++++ b/include/net/netfilter/nf_tables_ipv4.h
+@@ -30,7 +30,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
+               return -1;
+       len = iph_totlen(pkt->skb, iph);
+-      thoff = iph->ihl * 4;
++      thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4);
+       if (pkt->skb->len < len)
+               return -1;
+       else if (len < thoff)
+diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
+index cee3e4e905ec8..e0c117229ee9d 100644
+--- a/net/netfilter/nf_tables_core.c
++++ b/net/netfilter/nf_tables_core.c
+@@ -141,7 +141,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
+       else {
+               if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+                       return false;
+-              ptr = skb_network_header(skb) + nft_thoff(pkt);
++              ptr = skb->data + nft_thoff(pkt);
+       }
+       ptr += priv->offset;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nft_immediate-drop-chain-reference-counter.patch b/queue-6.1/netfilter-nft_immediate-drop-chain-reference-counter.patch
new file mode 100644 (file)
index 0000000..adc031e
--- /dev/null
@@ -0,0 +1,36 @@
+From d39cbbf50dc98ed34532f9728b5fb98aa77c1b82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jan 2024 20:15:33 +0100
+Subject: netfilter: nft_immediate: drop chain reference counter on error
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit b29be0ca8e816119ccdf95cc7d7c7be9bde005f1 ]
+
+In the init path, nft_data_init() bumps the chain reference counter,
+decrement it on error by following the error path which calls
+nft_data_release() to restore it.
+
+Fixes: 4bedf9eee016 ("netfilter: nf_tables: fix chain binding transaction logic")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_immediate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
+index 5f59dbab3e933..55fcf0280c5c3 100644
+--- a/net/netfilter/nft_immediate.c
++++ b/net/netfilter/nft_immediate.c
+@@ -78,7 +78,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
+               case NFT_GOTO:
+                       err = nf_tables_bind_chain(ctx, chain);
+                       if (err < 0)
+-                              return err;
++                              goto err1;
+                       break;
+               default:
+                       break;
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-use-skb_ip_totlen-and-iph_totlen.patch b/queue-6.1/netfilter-use-skb_ip_totlen-and-iph_totlen.patch
new file mode 100644 (file)
index 0000000..556c8c8
--- /dev/null
@@ -0,0 +1,97 @@
+From d9408d0e798b54be53f54b011e26b31027f18849 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 Jan 2023 10:58:34 -0500
+Subject: netfilter: use skb_ip_totlen and iph_totlen
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit a13fbf5ed5b4fc9095f12e955ca3a59b5507ff01 ]
+
+There are also quite some places in netfilter that may process IPv4 TCP
+GSO packets, we need to replace them too.
+
+In length_mt(), we have to use u_int32_t/int to accept skb_ip_totlen()
+return value, otherwise it may overflow and mismatch. This change will
+also help us add selftest for IPv4 BIG TCP in the following patch.
+
+Note that we don't need to replace the one in tcpmss_tg4(), as it will
+return if there is data after tcphdr in tcpmss_mangle_packet(). The
+same in mangle_contents() in nf_nat_helper.c, it returns false when
+skb->len + extra > 65535 in enlarge_skb().
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 0ae8e4cca787 ("netfilter: nf_tables: set transport offset from mac header for netdev/egress")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables_ipv4.h | 4 ++--
+ net/netfilter/ipvs/ip_vs_xmit.c        | 2 +-
+ net/netfilter/nf_log_syslog.c          | 2 +-
+ net/netfilter/xt_length.c              | 2 +-
+ 4 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
+index c4a6147b0ef8c..d8f6cb47ebe37 100644
+--- a/include/net/netfilter/nf_tables_ipv4.h
++++ b/include/net/netfilter/nf_tables_ipv4.h
+@@ -29,7 +29,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
+       if (iph->ihl < 5 || iph->version != 4)
+               return -1;
+-      len = ntohs(iph->tot_len);
++      len = iph_totlen(pkt->skb, iph);
+       thoff = iph->ihl * 4;
+       if (pkt->skb->len < len)
+               return -1;
+@@ -62,7 +62,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
+       if (iph->ihl < 5 || iph->version != 4)
+               goto inhdr_error;
+-      len = ntohs(iph->tot_len);
++      len = iph_totlen(pkt->skb, iph);
+       thoff = iph->ihl * 4;
+       if (pkt->skb->len < len) {
+               __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
+diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
+index 7243079ef3546..b452eb3ddcecb 100644
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -994,7 +994,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+               old_dsfield = ipv4_get_dsfield(old_iph);
+               *ttl = old_iph->ttl;
+               if (payload_len)
+-                      *payload_len = ntohs(old_iph->tot_len);
++                      *payload_len = skb_ip_totlen(skb);
+       }
+       /* Implement full-functionality option for ECN encapsulation */
+diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
+index cb894f0d63e9d..c66689ad2b491 100644
+--- a/net/netfilter/nf_log_syslog.c
++++ b/net/netfilter/nf_log_syslog.c
+@@ -322,7 +322,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
+       /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+       nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+-                     ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
++                     iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK,
+                      ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+       /* Max length: 6 "CE DF MF " */
+diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
+index 9fbfad13176f0..ca730cedb5d41 100644
+--- a/net/netfilter/xt_length.c
++++ b/net/netfilter/xt_length.c
+@@ -21,7 +21,7 @@ static bool
+ length_mt(const struct sk_buff *skb, struct xt_action_param *par)
+ {
+       const struct xt_length_info *info = par->matchinfo;
+-      u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
++      u32 pktlen = skb_ip_totlen(skb);
+       return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfc-llcp_core-hold-a-ref-to-llcp_local-dev-when-hold.patch b/queue-6.1/nfc-llcp_core-hold-a-ref-to-llcp_local-dev-when-hold.patch
new file mode 100644 (file)
index 0000000..8eabbed
--- /dev/null
@@ -0,0 +1,128 @@
+From 931ea9a2205ca793f1dcdff5f7f215cc9d0f2826 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Dec 2023 23:19:43 +0530
+Subject: nfc: llcp_core: Hold a ref to llcp_local->dev when holding a ref to
+ llcp_local
+
+From: Siddh Raman Pant <code@siddh.me>
+
+[ Upstream commit c95f919567d6f1914f13350af61a1b044ac85014 ]
+
+llcp_sock_sendmsg() calls nfc_llcp_send_ui_frame() which in turn calls
+nfc_alloc_send_skb(), which accesses the nfc_dev from the llcp_sock for
+getting the headroom and tailroom needed for skb allocation.
+
+Parallelly the nfc_dev can be freed, as the refcount is decreased via
+nfc_free_device(), leading to a UAF reported by Syzkaller, which can
+be summarized as follows:
+
+(1) llcp_sock_sendmsg() -> nfc_llcp_send_ui_frame()
+       -> nfc_alloc_send_skb() -> Dereference *nfc_dev
+(2) virtual_ncidev_close() -> nci_free_device() -> nfc_free_device()
+       -> put_device() -> nfc_release() -> Free *nfc_dev
+
+When a reference to llcp_local is acquired, we do not acquire the same
+for the nfc_dev. This leads to freeing even when the llcp_local is in
+use, and this is the case with the UAF described above too.
+
+Thus, when we acquire a reference to llcp_local, we should acquire a
+reference to nfc_dev, and release the references appropriately later.
+
+References for llcp_local is initialized in nfc_llcp_register_device()
+(which is called by nfc_register_device()). Thus, we should acquire a
+reference to nfc_dev there.
+
+nfc_unregister_device() calls nfc_llcp_unregister_device() which in
+turn calls nfc_llcp_local_put(). Thus, the reference to nfc_dev is
+appropriately released later.
+
+Reported-and-tested-by: syzbot+bbe84a4010eeea00982d@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=bbe84a4010eeea00982d
+Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when creating a socket")
+Reviewed-by: Suman Ghosh <sumang@marvell.com>
+Signed-off-by: Siddh Raman Pant <code@siddh.me>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/nfc/llcp_core.c | 39 ++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 36 insertions(+), 3 deletions(-)
+
+diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
+index 1dac28136e6a3..18be13fb9b75a 100644
+--- a/net/nfc/llcp_core.c
++++ b/net/nfc/llcp_core.c
+@@ -145,6 +145,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
+ static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
+ {
++      /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever
++       * we hold a reference to local, we also need to hold a reference to
++       * the device to avoid UAF.
++       */
++      if (!nfc_get_device(local->dev->idx))
++              return NULL;
++
+       kref_get(&local->ref);
+       return local;
+@@ -177,10 +184,18 @@ static void local_release(struct kref *ref)
+ int nfc_llcp_local_put(struct nfc_llcp_local *local)
+ {
++      struct nfc_dev *dev;
++      int ret;
++
+       if (local == NULL)
+               return 0;
+-      return kref_put(&local->ref, local_release);
++      dev = local->dev;
++
++      ret = kref_put(&local->ref, local_release);
++      nfc_put_device(dev);
++
++      return ret;
+ }
+ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
+@@ -959,8 +974,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
+       }
+       new_sock = nfc_llcp_sock(new_sk);
+-      new_sock->dev = local->dev;
++
+       new_sock->local = nfc_llcp_local_get(local);
++      if (!new_sock->local) {
++              reason = LLCP_DM_REJ;
++              sock_put(&new_sock->sk);
++              release_sock(&sock->sk);
++              sock_put(&sock->sk);
++              goto fail;
++      }
++
++      new_sock->dev = local->dev;
+       new_sock->rw = sock->rw;
+       new_sock->miux = sock->miux;
+       new_sock->nfc_protocol = sock->nfc_protocol;
+@@ -1597,7 +1621,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
+       if (local == NULL)
+               return -ENOMEM;
+-      local->dev = ndev;
++      /* As we are going to initialize local's refcount, we need to get the
++       * nfc_dev to avoid UAF, otherwise there is no point in continuing.
++       * See nfc_llcp_local_get().
++       */
++      local->dev = nfc_get_device(ndev->idx);
++      if (!local->dev) {
++              kfree(local);
++              return -ENODEV;
++      }
++
+       INIT_LIST_HEAD(&local->list);
+       kref_init(&local->ref);
+       mutex_init(&local->sdp_lock);
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-always-configure-nix-tx-link-credits-ba.patch b/queue-6.1/octeontx2-af-always-configure-nix-tx-link-credits-ba.patch
new file mode 100644 (file)
index 0000000..c4a7232
--- /dev/null
@@ -0,0 +1,184 @@
+From 6e614dd77c02e745841357aa5fe9f9b4b1a63b2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jan 2024 15:26:43 +0530
+Subject: octeontx2-af: Always configure NIX TX link credits based on max frame
+ size
+
+From: Naveen Mamindlapalli <naveenm@marvell.com>
+
+[ Upstream commit a0d9528f6daf7fe8de217fa80a94d2989d2a57a7 ]
+
+Currently the NIX TX link credits are initialized based on the max frame
+size that can be transmitted on a link but when the MTU is changed, the
+NIX TX link credits are reprogrammed by the SW based on the new MTU value.
+Since SMQ max packet length is programmed to max frame size by default,
+there is a chance that NIX TX may stall while sending a max frame sized
+packet on the link with insufficient credits to send the packet all at
+once. This patch avoids stall issue by not changing the link credits
+dynamically when the MTU is changed.
+
+Fixes: 1c74b89171c3 ("octeontx2-af: Wait for TX link idle for credits change")
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 110 +-----------------
+ 1 file changed, 3 insertions(+), 107 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 959f36efdc4a6..15f698020ec44 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -3923,90 +3923,18 @@ static void nix_find_link_frs(struct rvu *rvu,
+               req->minlen = minlen;
+ }
+-static int
+-nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+-                      u16 pcifunc, u64 tx_credits)
+-{
+-      struct rvu_hwinfo *hw = rvu->hw;
+-      int pf = rvu_get_pf(pcifunc);
+-      u8 cgx_id = 0, lmac_id = 0;
+-      unsigned long poll_tmo;
+-      bool restore_tx_en = 0;
+-      struct nix_hw *nix_hw;
+-      u64 cfg, sw_xoff = 0;
+-      u32 schq = 0;
+-      u32 credits;
+-      int rc;
+-
+-      nix_hw = get_nix_hw(rvu->hw, blkaddr);
+-      if (!nix_hw)
+-              return NIX_AF_ERR_INVALID_NIXBLK;
+-
+-      if (tx_credits == nix_hw->tx_credits[link])
+-              return 0;
+-
+-      /* Enable cgx tx if disabled for credits to be back */
+-      if (is_pf_cgxmapped(rvu, pf)) {
+-              rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+-              restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
+-                                                  lmac_id, true);
+-      }
+-
+-      mutex_lock(&rvu->rsrc_lock);
+-      /* Disable new traffic to link */
+-      if (hw->cap.nix_shaping) {
+-              schq = nix_get_tx_link(rvu, pcifunc);
+-              sw_xoff = rvu_read64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq));
+-              rvu_write64(rvu, blkaddr,
+-                          NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+-      }
+-
+-      rc = NIX_AF_ERR_LINK_CREDITS;
+-      poll_tmo = jiffies + usecs_to_jiffies(200000);
+-      /* Wait for credits to return */
+-      do {
+-              if (time_after(jiffies, poll_tmo))
+-                      goto exit;
+-              usleep_range(100, 200);
+-
+-              cfg = rvu_read64(rvu, blkaddr,
+-                               NIX_AF_TX_LINKX_NORM_CREDIT(link));
+-              credits = (cfg >> 12) & 0xFFFFFULL;
+-      } while (credits != nix_hw->tx_credits[link]);
+-
+-      cfg &= ~(0xFFFFFULL << 12);
+-      cfg |= (tx_credits << 12);
+-      rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
+-      rc = 0;
+-
+-      nix_hw->tx_credits[link] = tx_credits;
+-
+-exit:
+-      /* Enable traffic back */
+-      if (hw->cap.nix_shaping && !sw_xoff)
+-              rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq), 0);
+-
+-      /* Restore state of cgx tx */
+-      if (restore_tx_en)
+-              rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+-
+-      mutex_unlock(&rvu->rsrc_lock);
+-      return rc;
+-}
+-
+ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+                                   struct msg_rsp *rsp)
+ {
+       struct rvu_hwinfo *hw = rvu->hw;
+       u16 pcifunc = req->hdr.pcifunc;
+       int pf = rvu_get_pf(pcifunc);
+-      int blkaddr, schq, link = -1;
+-      struct nix_txsch *txsch;
+-      u64 cfg, lmac_fifo_len;
++      int blkaddr, link = -1;
+       struct nix_hw *nix_hw;
+       struct rvu_pfvf *pfvf;
+       u8 cgx = 0, lmac = 0;
+       u16 max_mtu;
++      u64 cfg;
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+       if (blkaddr < 0)
+@@ -4027,25 +3955,6 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+       if (req->update_minlen && req->minlen < NIC_HW_MIN_FRS)
+               return NIX_AF_ERR_FRS_INVALID;
+-      /* Check if requester wants to update SMQ's */
+-      if (!req->update_smq)
+-              goto rx_frscfg;
+-
+-      /* Update min/maxlen in each of the SMQ attached to this PF/VF */
+-      txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+-      mutex_lock(&rvu->rsrc_lock);
+-      for (schq = 0; schq < txsch->schq.max; schq++) {
+-              if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+-                      continue;
+-              cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq));
+-              cfg = (cfg & ~(0xFFFFULL << 8)) | ((u64)req->maxlen << 8);
+-              if (req->update_minlen)
+-                      cfg = (cfg & ~0x7FULL) | ((u64)req->minlen & 0x7F);
+-              rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq), cfg);
+-      }
+-      mutex_unlock(&rvu->rsrc_lock);
+-
+-rx_frscfg:
+       /* Check if config is for SDP link */
+       if (req->sdp_link) {
+               if (!hw->sdp_links)
+@@ -4068,7 +3977,6 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+       if (link < 0)
+               return NIX_AF_ERR_RX_LINK_INVALID;
+-
+ linkcfg:
+       nix_find_link_frs(rvu, req, pcifunc);
+@@ -4078,19 +3986,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+               cfg = (cfg & ~0xFFFFULL) | req->minlen;
+       rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), cfg);
+-      if (req->sdp_link || pf == 0)
+-              return 0;
+-
+-      /* Update transmit credits for CGX links */
+-      lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, lmac);
+-      if (!lmac_fifo_len) {
+-              dev_err(rvu->dev,
+-                      "%s: Failed to get CGX/RPM%d:LMAC%d FIFO size\n",
+-                      __func__, cgx, lmac);
+-              return 0;
+-      }
+-      return nix_config_link_credits(rvu, blkaddr, link, pcifunc,
+-                                     (lmac_fifo_len - req->maxlen) / 16);
++      return 0;
+ }
+ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-fix-marking-couple-of-structure-as-__pa.patch b/queue-6.1/octeontx2-af-fix-marking-couple-of-structure-as-__pa.patch
new file mode 100644 (file)
index 0000000..a32a0a4
--- /dev/null
@@ -0,0 +1,46 @@
+From abb25686716bdc469df9eb0f9cea42cd499e4e1e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Dec 2023 19:56:33 +0530
+Subject: octeontx2-af: Fix marking couple of structure as __packed
+
+From: Suman Ghosh <sumang@marvell.com>
+
+[ Upstream commit 0ee2384a5a0f3b4eeac8d10bb01a0609d245a4d1 ]
+
+Couple of structures was not marked as __packed. This patch
+fixes the same and mark them as __packed.
+
+Fixes: 42006910b5ea ("octeontx2-af: cleanup KPU config data")
+Signed-off-by: Suman Ghosh <sumang@marvell.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/npc.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+index d027c23b8ef8e..aaff91bc7415a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+@@ -514,7 +514,7 @@ struct npc_lt_def {
+       u8      ltype_mask;
+       u8      ltype_match;
+       u8      lid;
+-};
++} __packed;
+ struct npc_lt_def_ipsec {
+       u8      ltype_mask;
+@@ -522,7 +522,7 @@ struct npc_lt_def_ipsec {
+       u8      lid;
+       u8      spi_offset;
+       u8      spi_nz;
+-};
++} __packed;
+ struct npc_lt_def_apad {
+       u8      ltype_mask;
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-fix-pause-frame-configuration.patch b/queue-6.1/octeontx2-af-fix-pause-frame-configuration.patch
new file mode 100644 (file)
index 0000000..0f9960f
--- /dev/null
@@ -0,0 +1,56 @@
+From c1c4d52f9e1a5f81883fafcb8b866f3bb4e20f70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Dec 2023 14:57:54 +0530
+Subject: octeontx2-af: Fix pause frame configuration
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit e307b5a845c5951dabafc48d00b6424ee64716c4 ]
+
+The current implementation's default Pause Forward setting is causing
+unnecessary network traffic. This patch disables Pause Forward to
+address this issue.
+
+Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+index a70e1153fa04b..6b4792a942d84 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+@@ -283,6 +283,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
+       cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg);
+       rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg);
++      /* Disable forward pause to driver */
++      cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
++      cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD;
++      rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
++
+       /* Enable channel mask for all LMACS */
+       rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL);
+ }
+@@ -451,12 +456,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p
+       if (rx_pause) {
+               cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
++                       RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE);
+       } else {
+               cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
++                      RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE);
+       }
+       if (tx_pause) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-re-enable-mac-tx-in-otx2_stop-processin.patch b/queue-6.1/octeontx2-af-re-enable-mac-tx-in-otx2_stop-processin.patch
new file mode 100644 (file)
index 0000000..a41ad10
--- /dev/null
@@ -0,0 +1,93 @@
+From a82d68a811ec60556c67c80775519e4d65f02f35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jan 2024 19:44:00 +0530
+Subject: octeontx2-af: Re-enable MAC TX in otx2_stop processing
+
+From: Naveen Mamindlapalli <naveenm@marvell.com>
+
+[ Upstream commit 818ed8933bd17bc91a9fa8b94a898189c546fc1a ]
+
+During QoS scheduling testing with multiple strict priority flows, the
+netdev tx watchdog timeout routine is invoked when a low priority QoS
+queue doesn't get a chance to transmit the packets because other high
+priority flows are completely subscribing the transmit link. The netdev
+tx watchdog timeout routine will stop MAC RX and TX functionality in
+otx2_stop() routine before cleanup of HW TX queues which results in SMQ
+flush errors because the packets belonging to low priority queues will
+never gets flushed since MAC TX is disabled. This patch fixes the issue
+by re-enabling MAC TX to ensure the packets in HW pipeline gets flushed
+properly.
+
+Fixes: a7faa68b4e7f ("octeontx2-af: Start/Stop traffic in CGX along with NPC")
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu.h |  1 +
+ .../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 17 +++++++++++++++++
+ .../net/ethernet/marvell/octeontx2/af/rvu_nix.c |  8 +++++++-
+ 3 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+index 95a7bc396e8ea..ab78e9d020751 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+@@ -850,6 +850,7 @@ u32  rvu_cgx_get_fifolen(struct rvu *rvu);
+ void *rvu_first_cgx_pdata(struct rvu *rvu);
+ int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
+ int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
++int rvu_cgx_tx_enable(struct rvu *rvu, u16 pcifunc, bool enable);
+ int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause,
+                              u16 pfc_en);
+ int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+index c60b9580ca969..fa658bd4dfb3b 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+@@ -456,6 +456,23 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
+       return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start);
+ }
++int rvu_cgx_tx_enable(struct rvu *rvu, u16 pcifunc, bool enable)
++{
++      int pf = rvu_get_pf(pcifunc);
++      struct mac_ops *mac_ops;
++      u8 cgx_id, lmac_id;
++      void *cgxd;
++
++      if (!is_cgx_config_permitted(rvu, pcifunc))
++              return LMAC_AF_ERR_PERM_DENIED;
++
++      rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
++      cgxd = rvu_cgx_pdata(cgx_id, rvu);
++      mac_ops = get_mac_ops(cgxd);
++
++      return mac_ops->mac_tx_enable(cgxd, lmac_id, enable);
++}
++
+ int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable)
+ {
+       struct mac_ops *mac_ops;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 15f698020ec44..7f9581ce7f1fe 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -4506,7 +4506,13 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
+       pfvf = rvu_get_pfvf(rvu, pcifunc);
+       clear_bit(NIXLF_INITIALIZED, &pfvf->flags);
+-      return rvu_cgx_start_stop_io(rvu, pcifunc, false);
++      err = rvu_cgx_start_stop_io(rvu, pcifunc, false);
++      if (err)
++              return err;
++
++      rvu_cgx_tx_enable(rvu, pcifunc, true);
++
++      return 0;
+ }
+ #define RX_SA_BASE  GENMASK_ULL(52, 7)
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-support-variable-number-of-lmacs.patch b/queue-6.1/octeontx2-af-support-variable-number-of-lmacs.patch
new file mode 100644 (file)
index 0000000..fec38be
--- /dev/null
@@ -0,0 +1,342 @@
+From cc60721ea4736bbfd6d8355b455f0c847a4ccbe0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Dec 2022 12:35:18 +0530
+Subject: octeontx2-af: Support variable number of lmacs
+
+From: Rakesh Babu Saladi <rsaladi2@marvell.com>
+
+[ Upstream commit f2e664ad503d4e5ce7c42a0862ab164331a0ef37 ]
+
+Most of the code in CGX/RPM driver assumes that max lmacs per
+given MAC as always, 4 and the number of MAC blocks also as 4.
+With this assumption, the max number of interfaces supported is
+hardcoded to 16. This creates a problem as next gen CN10KB silicon
+MAC supports 8 lmacs per MAC block.
+
+This patch solves the problem by using "max lmac per MAC block"
+value from constant csrs and uses cgx_cnt_max value which is
+populated based number of MAC blocks supported by silicon.
+
+Signed-off-by: Rakesh Babu Saladi <rsaladi2@marvell.com>
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: e307b5a845c5 ("octeontx2-af: Fix pause frame configuration")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/cgx.c   | 35 ++++++++-----------
+ .../net/ethernet/marvell/octeontx2/af/cgx.h   |  6 ++--
+ .../marvell/octeontx2/af/lmac_common.h        |  5 ++-
+ .../net/ethernet/marvell/octeontx2/af/rvu.h   |  2 +-
+ .../ethernet/marvell/octeontx2/af/rvu_cgx.c   | 26 ++++++++------
+ .../marvell/octeontx2/af/rvu_debugfs.c        |  2 +-
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   |  2 +-
+ .../marvell/octeontx2/af/rvu_npc_hash.c       |  4 ++-
+ 8 files changed, 42 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+index 65c0373d34d12..90be87dc105d3 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+@@ -78,7 +78,7 @@ static bool is_dev_rpm(void *cgxd)
+ bool is_lmac_valid(struct cgx *cgx, int lmac_id)
+ {
+-      if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX)
++      if (!cgx || lmac_id < 0 || lmac_id >= cgx->max_lmac_per_mac)
+               return false;
+       return test_bit(lmac_id, &cgx->lmac_bmap);
+ }
+@@ -90,7 +90,7 @@ static int get_sequence_id_of_lmac(struct cgx *cgx, int lmac_id)
+ {
+       int tmp, id = 0;
+-      for_each_set_bit(tmp, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
++      for_each_set_bit(tmp, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
+               if (tmp == lmac_id)
+                       break;
+               id++;
+@@ -121,7 +121,7 @@ u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset)
+ struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx)
+ {
+-      if (!cgx || lmac_id >= MAX_LMAC_PER_CGX)
++      if (!cgx || lmac_id >= cgx->max_lmac_per_mac)
+               return NULL;
+       return cgx->lmac_idmap[lmac_id];
+@@ -1410,7 +1410,7 @@ int cgx_get_fwdata_base(u64 *base)
+       if (!cgx)
+               return -ENXIO;
+-      first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX);
++      first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac);
+       req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FWD_BASE, req);
+       err = cgx_fwi_cmd_generic(req, &resp, cgx, first_lmac);
+       if (!err)
+@@ -1499,7 +1499,7 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable)
+ static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx)
+ {
+-      int first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX);
++      int first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac);
+       u64 req = 0;
+       req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req);
+@@ -1537,7 +1537,7 @@ static void cgx_lmac_linkup_work(struct work_struct *work)
+       int i, err;
+       /* Do Link up for all the enabled lmacs */
+-      for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
++      for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
+               err = cgx_fwi_link_change(cgx, i, true);
+               if (err)
+                       dev_info(dev, "cgx port %d:%d Link up command failed\n",
+@@ -1557,14 +1557,6 @@ int cgx_lmac_linkup_start(void *cgxd)
+       return 0;
+ }
+-static void cgx_lmac_get_fifolen(struct cgx *cgx)
+-{
+-      u64 cfg;
+-
+-      cfg = cgx_read(cgx, 0, CGX_CONST);
+-      cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg);
+-}
+-
+ static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac,
+                                  int cnt, bool req_free)
+ {
+@@ -1619,17 +1611,14 @@ static int cgx_lmac_init(struct cgx *cgx)
+       u64 lmac_list;
+       int i, err;
+-      cgx_lmac_get_fifolen(cgx);
+-
+-      cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx);
+       /* lmac_list specifies which lmacs are enabled
+        * when bit n is set to 1, LMAC[n] is enabled
+        */
+       if (cgx->mac_ops->non_contiguous_serdes_lane)
+               lmac_list = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL;
+-      if (cgx->lmac_count > MAX_LMAC_PER_CGX)
+-              cgx->lmac_count = MAX_LMAC_PER_CGX;
++      if (cgx->lmac_count > cgx->max_lmac_per_mac)
++              cgx->lmac_count = cgx->max_lmac_per_mac;
+       for (i = 0; i < cgx->lmac_count; i++) {
+               lmac = kzalloc(sizeof(struct lmac), GFP_KERNEL);
+@@ -1707,7 +1696,7 @@ static int cgx_lmac_exit(struct cgx *cgx)
+       }
+       /* Free all lmac related resources */
+-      for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
++      for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
+               lmac = cgx->lmac_idmap[i];
+               if (!lmac)
+                       continue;
+@@ -1723,6 +1712,12 @@ static int cgx_lmac_exit(struct cgx *cgx)
+ static void cgx_populate_features(struct cgx *cgx)
+ {
++      u64 cfg;
++
++      cfg = cgx_read(cgx, 0, CGX_CONST);
++      cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg);
++      cgx->max_lmac_per_mac = FIELD_GET(CGX_CONST_MAX_LMACS, cfg);
++
+       if (is_dev_rpm(cgx))
+               cgx->hw_features = (RVU_LMAC_FEAT_DMACF | RVU_MAC_RPM |
+                                   RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+index 04338db38671b..09ddb00f63cc7 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+@@ -18,11 +18,8 @@
+ /* PCI BAR nos */
+ #define PCI_CFG_REG_BAR_NUM           0
+-#define CGX_ID_MASK                   0x7
+-#define MAX_LMAC_PER_CGX              4
++#define CGX_ID_MASK                   0xF
+ #define MAX_DMAC_ENTRIES_PER_CGX      32
+-#define CGX_FIFO_LEN                  65536 /* 64K for both Rx & Tx */
+-#define CGX_OFFSET(x)                 ((x) * MAX_LMAC_PER_CGX)
+ /* Registers */
+ #define CGXX_CMRX_CFG                 0x00
+@@ -56,6 +53,7 @@
+ #define CGXX_SCRATCH1_REG             0x1058
+ #define CGX_CONST                     0x2000
+ #define CGX_CONST_RXFIFO_SIZE         GENMASK_ULL(23, 0)
++#define CGX_CONST_MAX_LMACS           GENMASK_ULL(31, 24)
+ #define CGXX_SPUX_CONTROL1            0x10000
+ #define CGXX_SPUX_LNX_FEC_CORR_BLOCKS 0x10700
+ #define CGXX_SPUX_LNX_FEC_UNCORR_BLOCKS       0x10800
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+index 52b6016789fa4..697cfec74aa1e 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+@@ -128,7 +128,10 @@ struct cgx {
+       struct pci_dev          *pdev;
+       u8                      cgx_id;
+       u8                      lmac_count;
+-      struct lmac             *lmac_idmap[MAX_LMAC_PER_CGX];
++      /* number of LMACs per MAC could be 4 or 8 */
++      u8                      max_lmac_per_mac;
++#define MAX_LMAC_COUNT                8
++      struct lmac             *lmac_idmap[MAX_LMAC_COUNT];
+       struct                  work_struct cgx_cmd_work;
+       struct                  workqueue_struct *cgx_cmd_workq;
+       struct list_head        cgx_list;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+index ab78e9d020751..0b76dfa979d4e 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+@@ -480,7 +480,7 @@ struct rvu {
+       u8                      cgx_mapped_pfs;
+       u8                      cgx_cnt_max;     /* CGX port count max */
+       u8                      *pf2cgxlmac_map; /* pf to cgx_lmac map */
+-      u16                     *cgxlmac2pf_map; /* bitmap of mapped pfs for
++      u64                     *cgxlmac2pf_map; /* bitmap of mapped pfs for
+                                                 * every cgx lmac port
+                                                 */
+       unsigned long           pf_notify_bmap; /* Flags for PF notification */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+index fa658bd4dfb3b..bcb4385d0621c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+@@ -55,8 +55,9 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature)
+       return  (cgx_features_get(cgxd) & feature);
+ }
++#define CGX_OFFSET(x)                 ((x) * rvu->hw->lmac_per_cgx)
+ /* Returns bitmap of mapped PFs */
+-static u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
++static u64 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
+ {
+       return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id];
+ }
+@@ -71,7 +72,8 @@ int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id)
+       if (!pfmap)
+               return -ENODEV;
+       else
+-              return find_first_bit(&pfmap, 16);
++              return find_first_bit(&pfmap,
++                                    rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx);
+ }
+ static u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
+@@ -129,14 +131,14 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+       if (!cgx_cnt_max)
+               return 0;
+-      if (cgx_cnt_max > 0xF || MAX_LMAC_PER_CGX > 0xF)
++      if (cgx_cnt_max > 0xF || rvu->hw->lmac_per_cgx > 0xF)
+               return -EINVAL;
+       /* Alloc map table
+        * An additional entry is required since PF id starts from 1 and
+        * hence entry at offset 0 is invalid.
+        */
+-      size = (cgx_cnt_max * MAX_LMAC_PER_CGX + 1) * sizeof(u8);
++      size = (cgx_cnt_max * rvu->hw->lmac_per_cgx + 1) * sizeof(u8);
+       rvu->pf2cgxlmac_map = devm_kmalloc(rvu->dev, size, GFP_KERNEL);
+       if (!rvu->pf2cgxlmac_map)
+               return -ENOMEM;
+@@ -145,9 +147,10 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+       memset(rvu->pf2cgxlmac_map, 0xFF, size);
+       /* Reverse map table */
+-      rvu->cgxlmac2pf_map = devm_kzalloc(rvu->dev,
+-                                cgx_cnt_max * MAX_LMAC_PER_CGX * sizeof(u16),
+-                                GFP_KERNEL);
++      rvu->cgxlmac2pf_map =
++              devm_kzalloc(rvu->dev,
++                           cgx_cnt_max * rvu->hw->lmac_per_cgx * sizeof(u64),
++                           GFP_KERNEL);
+       if (!rvu->cgxlmac2pf_map)
+               return -ENOMEM;
+@@ -156,7 +159,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+               if (!rvu_cgx_pdata(cgx, rvu))
+                       continue;
+               lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
+-              for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) {
++              for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+                       lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
+                                             iter);
+                       rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
+@@ -235,7 +238,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
+       pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
+       do {
+-              pfid = find_first_bit(&pfmap, 16);
++              pfid = find_first_bit(&pfmap,
++                                    rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx);
+               clear_bit(pfid, &pfmap);
+               /* check if notification is enabled */
+@@ -310,7 +314,7 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu)
+               if (!cgxd)
+                       continue;
+               lmac_bmap = cgx_get_lmac_bmap(cgxd);
+-              for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) {
++              for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+                       err = cgx_lmac_evh_register(&cb, cgxd, lmac);
+                       if (err)
+                               dev_err(rvu->dev,
+@@ -396,7 +400,7 @@ int rvu_cgx_exit(struct rvu *rvu)
+               if (!cgxd)
+                       continue;
+               lmac_bmap = cgx_get_lmac_bmap(cgxd);
+-              for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX)
++              for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx)
+                       cgx_lmac_evh_unregister(cgxd, lmac);
+       }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index 5c9dc3f9262f5..cc5d342e026c7 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -2618,7 +2618,7 @@ static void rvu_dbg_cgx_init(struct rvu *rvu)
+               rvu->rvu_dbg.cgx = debugfs_create_dir(dname,
+                                                     rvu->rvu_dbg.cgx_root);
+-              for_each_set_bit(lmac_id, &lmac_bmap, MAX_LMAC_PER_CGX) {
++              for_each_set_bit(lmac_id, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+                       /* lmac debugfs dir */
+                       sprintf(dname, "lmac%d", lmac_id);
+                       rvu->rvu_dbg.lmac =
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 7f9581ce7f1fe..bb99302eab67a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -4079,7 +4079,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr,
+               /* Get LMAC id's from bitmap */
+               lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
+-              for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) {
++              for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+                       lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, iter);
+                       if (!lmac_fifo_len) {
+                               dev_err(rvu->dev,
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+index 34fa59575fa91..54e0dfdc9d984 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+@@ -1999,7 +1999,9 @@ int rvu_npc_exact_init(struct rvu *rvu)
+       /* Install SDP drop rule */
+       drop_mcam_idx = &table->num_drop_rules;
+-      max_lmac_cnt = rvu->cgx_cnt_max * MAX_LMAC_PER_CGX + PF_CGXMAP_BASE;
++      max_lmac_cnt = rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx +
++                     PF_CGXMAP_BASE;
++
+       for (i = PF_CGXMAP_BASE; i < max_lmac_cnt; i++) {
+               if (rvu->pf2cgxlmac_map[i] == 0xFF)
+                       continue;
+-- 
+2.43.0
+
diff --git a/queue-6.1/r8169-fix-pci-error-on-system-resume.patch b/queue-6.1/r8169-fix-pci-error-on-system-resume.patch
new file mode 100644 (file)
index 0000000..c9305e5
--- /dev/null
@@ -0,0 +1,49 @@
+From 583a0fa5c6f48858b3592059eba607d74041c813 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Dec 2023 12:34:09 +0800
+Subject: r8169: Fix PCI error on system resume
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+[ Upstream commit 9c476269bff2908a20930c58085bf0b05ebd569a ]
+
+Some r8168 NICs stop working upon system resume:
+
+[  688.051096] r8169 0000:02:00.1 enp2s0f1: rtl_ep_ocp_read_cond == 0 (loop: 10, delay: 10000).
+[  688.175131] r8169 0000:02:00.1 enp2s0f1: Link is Down
+...
+[  691.534611] r8169 0000:02:00.1 enp2s0f1: PCI error (cmd = 0x0407, status_errs = 0x0000)
+
+Not sure if it's related, but those NICs have a BMC device at function
+0:
+02:00.0 Unassigned class [ff00]: Realtek Semiconductor Co., Ltd. Realtek RealManage BMC [10ec:816e] (rev 1a)
+
+Trial and error shows that increase the loop wait on
+rtl_ep_ocp_read_cond to 30 can eliminate the issue, so let
+rtl8168ep_driver_start() to wait a bit longer.
+
+Fixes: e6d6ca6e1204 ("r8169: Add support for another RTL8168FP")
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index d22457f2cf9cf..06663c11ca96d 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -1145,7 +1145,7 @@ static void rtl8168ep_driver_start(struct rtl8169_private *tp)
+ {
+       r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
+       r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
+-      rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 10);
++      rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ }
+ static void rtl8168_driver_start(struct rtl8169_private *tp)
+-- 
+2.43.0
+
diff --git a/queue-6.1/ring-buffer-fix-32-bit-rb_time_read-race-with-rb_tim.patch b/queue-6.1/ring-buffer-fix-32-bit-rb_time_read-race-with-rb_tim.patch
new file mode 100644 (file)
index 0000000..60f6849
--- /dev/null
@@ -0,0 +1,74 @@
+From 861ba5891da49cc7768295f98827c32ed0dcd73a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 14:30:49 -0500
+Subject: ring-buffer: Fix 32-bit rb_time_read() race with rb_time_cmpxchg()
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+[ Upstream commit dec890089bf79a4954b61482715ee2d084364856 ]
+
+The following race can cause rb_time_read() to observe a corrupted time
+stamp:
+
+rb_time_cmpxchg()
+[...]
+        if (!rb_time_read_cmpxchg(&t->msb, msb, msb2))
+                return false;
+        if (!rb_time_read_cmpxchg(&t->top, top, top2))
+                return false;
+<interrupted before updating bottom>
+__rb_time_read()
+[...]
+        do {
+                c = local_read(&t->cnt);
+                top = local_read(&t->top);
+                bottom = local_read(&t->bottom);
+                msb = local_read(&t->msb);
+        } while (c != local_read(&t->cnt));
+
+        *cnt = rb_time_cnt(top);
+
+        /* If top and msb counts don't match, this interrupted a write */
+        if (*cnt != rb_time_cnt(msb))
+                return false;
+          ^ this check fails to catch that "bottom" is still not updated.
+
+So the old "bottom" value is returned, which is wrong.
+
+Fix this by checking that all three of msb, top, and bottom 2-bit cnt
+values match.
+
+The reason to favor checking all three fields over requiring a specific
+update order for both rb_time_set() and rb_time_cmpxchg() is because
+checking all three fields is more robust to handle partial failures of
+rb_time_cmpxchg() when interrupted by nested rb_time_set().
+
+Link: https://lore.kernel.org/lkml/20231211201324.652870-1-mathieu.desnoyers@efficios.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20231212193049.680122-1-mathieu.desnoyers@efficios.com
+
+Fixes: f458a1453424e ("ring-buffer: Test last update in 32bit version of __rb_time_read()")
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index 06d52525407b8..71cad4f1323c6 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -646,8 +646,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
+       *cnt = rb_time_cnt(top);
+-      /* If top and msb counts don't match, this interrupted a write */
+-      if (*cnt != rb_time_cnt(msb))
++      /* If top, msb or bottom counts don't match, this interrupted a write */
++      if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom))
+               return false;
+       /* The shift to msb will lose its cnt bits */
+-- 
+2.43.0
+
diff --git a/queue-6.1/s390-cpumf-support-user-space-events-for-counting.patch b/queue-6.1/s390-cpumf-support-user-space-events-for-counting.patch
new file mode 100644 (file)
index 0000000..b38d22d
--- /dev/null
@@ -0,0 +1,93 @@
+From 06bb501441103a4e4dd88b341771b77debd509e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 11:03:32 +0100
+Subject: s390/cpumf: support user space events for counting
+
+From: Thomas Richter <tmricht@linux.ibm.com>
+
+[ Upstream commit 91d5364dc673fa9cf3a5b7b30cf33c70803eb3a4 ]
+
+CPU Measurement counting facility events PROBLEM_STATE_CPU_CYCLES(32)
+and PROBLEM_STATE_INSTRUCTIONS(33) are valid events. However the device
+driver returns error -EOPNOTSUPP when these event are to be installed.
+
+Fix this and allow installation of events PROBLEM_STATE_CPU_CYCLES,
+PROBLEM_STATE_CPU_CYCLES:u, PROBLEM_STATE_INSTRUCTIONS and
+PROBLEM_STATE_INSTRUCTIONS:u.
+Kernel space counting only is still not supported by s390.
+
+Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
+Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Stable-dep-of: 09cda0a40051 ("s390/mm: add missing arch_set_page_dat() call to vmem_crst_alloc()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/perf_cpum_cf.c | 35 ++++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
+index f043a7ff220b7..28fa80fd69fa0 100644
+--- a/arch/s390/kernel/perf_cpum_cf.c
++++ b/arch/s390/kernel/perf_cpum_cf.c
+@@ -2,7 +2,7 @@
+ /*
+  * Performance event support for s390x - CPU-measurement Counter Facility
+  *
+- *  Copyright IBM Corp. 2012, 2021
++ *  Copyright IBM Corp. 2012, 2022
+  *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+  *           Thomas Richter <tmricht@linux.ibm.com>
+  */
+@@ -434,6 +434,12 @@ static void cpumf_hw_inuse(void)
+       mutex_unlock(&pmc_reserve_mutex);
+ }
++static int is_userspace_event(u64 ev)
++{
++      return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
++             cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
++}
++
+ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
+ {
+       struct perf_event_attr *attr = &event->attr;
+@@ -456,19 +462,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
+               if (is_sampling_event(event))   /* No sampling support */
+                       return -ENOENT;
+               ev = attr->config;
+-              /* Count user space (problem-state) only */
+               if (!attr->exclude_user && attr->exclude_kernel) {
+-                      if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+-                              return -EOPNOTSUPP;
+-                      ev = cpumf_generic_events_user[ev];
+-
+-              /* No support for kernel space counters only */
++                      /*
++                       * Count user space (problem-state) only
++                       * Handle events 32 and 33 as 0:u and 1:u
++                       */
++                      if (!is_userspace_event(ev)) {
++                              if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
++                                      return -EOPNOTSUPP;
++                              ev = cpumf_generic_events_user[ev];
++                      }
+               } else if (!attr->exclude_kernel && attr->exclude_user) {
++                      /* No support for kernel space counters only */
+                       return -EOPNOTSUPP;
+-              } else {        /* Count user and kernel space */
+-                      if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+-                              return -EOPNOTSUPP;
+-                      ev = cpumf_generic_events_basic[ev];
++              } else {
++                      /* Count user and kernel space, incl. events 32 + 33 */
++                      if (!is_userspace_event(ev)) {
++                              if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
++                                      return -EOPNOTSUPP;
++                              ev = cpumf_generic_events_basic[ev];
++                      }
+               }
+               break;
+-- 
+2.43.0
+
diff --git a/queue-6.1/s390-mm-add-missing-arch_set_page_dat-call-to-vmem_c.patch b/queue-6.1/s390-mm-add-missing-arch_set_page_dat-call-to-vmem_c.patch
new file mode 100644 (file)
index 0000000..0b98ed2
--- /dev/null
@@ -0,0 +1,63 @@
+From 2c2e7b36450c06fc93a5c80dd815dfff68ba45f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Oct 2023 21:07:04 +0200
+Subject: s390/mm: add missing arch_set_page_dat() call to vmem_crst_alloc()
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 09cda0a400519b1541591c506e54c9c48e3101bf ]
+
+If the cmma no-dat feature is available all pages that are not used for
+dynamic address translation are marked as "no-dat" with the ESSA
+instruction. This information is visible to the hypervisor, so that the
+hypervisor can optimize purging of guest TLB entries. This also means that
+pages which are used for dynamic address translation must not be marked as
+"no-dat", since the hypervisor may then incorrectly not purge guest TLB
+entries.
+
+Region and segment tables allocated via vmem_crst_alloc() are incorrectly
+marked as "no-dat", as soon as slab_is_available() returns true.
+
+Such tables are allocated e.g. when kernel page tables are split, memory is
+hotplugged, or a DCSS segment is loaded.
+
+Fix this by adding the missing arch_set_page_dat() call.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/mm/vmem.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
+index 9a0ce5315f36d..3cbb461820666 100644
+--- a/arch/s390/mm/vmem.c
++++ b/arch/s390/mm/vmem.c
+@@ -11,6 +11,7 @@
+ #include <linux/list.h>
+ #include <linux/hugetlb.h>
+ #include <linux/slab.h>
++#include <asm/page-states.h>
+ #include <asm/cacheflush.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/pgalloc.h>
+@@ -44,8 +45,11 @@ void *vmem_crst_alloc(unsigned long val)
+       unsigned long *table;
+       table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+-      if (table)
+-              crst_table_init(table, val);
++      if (!table)
++              return NULL;
++      crst_table_init(table, val);
++      if (slab_is_available())
++              arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+       return table;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-bonding-do-not-set-port-down-when-adding-t.patch b/queue-6.1/selftests-bonding-do-not-set-port-down-when-adding-t.patch
new file mode 100644 (file)
index 0000000..8f55390
--- /dev/null
@@ -0,0 +1,53 @@
+From d7b27f0f9c2a4de4109f6bda4aae6edb2b4cc9b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Dec 2023 20:59:22 +0800
+Subject: selftests: bonding: do not set port down when adding to bond
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 61fa2493ca76fd7bb74e13f0205274f4ab0aa696 ]
+
+Similar to commit be809424659c ("selftests: bonding: do not set port down
+before adding to bond"). The bond-arp-interval-causes-panic test failed
+after commit a4abfa627c38 ("net: rtnetlink: Enslave device before bringing
+it up") as the kernel will set the port down _after_ adding to bond if setting
+port down specifically.
+
+Fix it by removing the link down operation when adding to bond.
+
+Fixes: 2ffd57327ff1 ("selftests: bonding: cause oops in bond_rr_gen_slave_id")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Tested-by: Benjamin Poirier <benjamin.poirier@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../drivers/net/bonding/bond-arp-interval-causes-panic.sh   | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
+index 71c00bfafbc99..2ff58fed76e28 100755
+--- a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
++++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
+@@ -33,16 +33,16 @@ ip netns add "client"
+ ip link set dev link1_1 netns client down name eth0
+ ip netns exec client ip link add dev bond0 down type bond mode 1 \
+       miimon 100 all_slaves_active 1
+-ip netns exec client ip link set dev eth0 down master bond0
++ip netns exec client ip link set dev eth0 master bond0
+ ip netns exec client ip link set dev bond0 up
+ ip netns exec client ip addr add ${client_ip4}/24 dev bond0
+ ip netns exec client ping -c 5 $server_ip4 >/dev/null
+-ip netns exec client ip link set dev eth0 down nomaster
++ip netns exec client ip link set dev eth0 nomaster
+ ip netns exec client ip link set dev bond0 down
+ ip netns exec client ip link set dev bond0 type bond mode 0 \
+       arp_interval 1000 arp_ip_target "+${server_ip4}"
+-ip netns exec client ip link set dev eth0 down master bond0
++ip netns exec client ip link set dev eth0 master bond0
+ ip netns exec client ip link set dev bond0 up
+ ip netns exec client ping -c 5 $server_ip4 >/dev/null
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mptcp-fix-fastclose-with-csum-failure.patch b/queue-6.1/selftests-mptcp-fix-fastclose-with-csum-failure.patch
new file mode 100644 (file)
index 0000000..d7e4aeb
--- /dev/null
@@ -0,0 +1,58 @@
+From 329325f50f83617625c97e5ba5e4dcfd74ed1e79 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 00:16:17 +0100
+Subject: selftests: mptcp: fix fastclose with csum failure
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 7cefbe5e1dacc7236caa77e9d072423f21422fe2 ]
+
+Running the mp_join selftest manually with the following command line:
+
+  ./mptcp_join.sh -z -C
+
+leads to some failures:
+
+  002 fastclose server test
+  # ...
+  rtx                                 [fail] got 1 MP_RST[s] TX expected 0
+  # ...
+  rstrx                               [fail] got 1 MP_RST[s] RX expected 0
+
+The problem is really in the wrong expectations for the RST checks
+implied by the csum validation. Note that the same check is repeated
+explicitly in the same test-case, with the correct expectation and
+pass successfully.
+
+Address the issue explicitly setting the correct expectation for
+the failing checks.
+
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-5-7b9cd6a7b7f4@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index e52d513009fb0..9d8dde3b5c332 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -3041,7 +3041,7 @@ fastclose_tests()
+       if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+               run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server
+-              chk_join_nr 0 0 0
++              chk_join_nr 0 0 0 0 0 0 1
+               chk_fclose_nr 1 1 invert
+               chk_rst_nr 1 1
+       fi
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-mptcp-set-failing_links-in-run_tests.patch b/queue-6.1/selftests-mptcp-set-failing_links-in-run_tests.patch
new file mode 100644 (file)
index 0000000..65556f6
--- /dev/null
@@ -0,0 +1,64 @@
+From 6465bc887b9ddfd7a4e8a118a4c52b4bf285ea3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 10:34:09 -0700
+Subject: selftests: mptcp: set FAILING_LINKS in run_tests
+
+From: Geliang Tang <geliang.tang@suse.com>
+
+[ Upstream commit be7e9786c9155c2942cd53b813e4723be67e07c4 ]
+
+Set FAILING_LINKS as an env var with a limited scope only when calling
+run_tests().
+
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Mat Martineau <martineau@kernel.org>
+Link: https://lore.kernel.org/r/20230623-send-net-next-20230623-v1-3-a883213c8ba9@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 7cefbe5e1dac ("selftests: mptcp: fix fastclose with csum failure")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 9d8dde3b5c332..2107579e2939d 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -2167,9 +2167,9 @@ link_failure_tests()
+               pm_nl_set_limits $ns1 0 2
+               pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+               pm_nl_set_limits $ns2 1 2
+-              FAILING_LINKS="1"
+               pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+-              run_tests $ns1 $ns2 10.0.1.1 1
++              FAILING_LINKS="1" \
++                      run_tests $ns1 $ns2 10.0.1.1 1
+               chk_join_nr 2 2 2
+               chk_add_nr 1 1
+               chk_link_usage $ns2 ns2eth3 $cinsent 0
+@@ -2183,8 +2183,8 @@ link_failure_tests()
+               pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+               pm_nl_set_limits $ns2 1 2
+               pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+-              FAILING_LINKS="1 2"
+-              run_tests $ns1 $ns2 10.0.1.1 1
++              FAILING_LINKS="1 2" \
++                      run_tests $ns1 $ns2 10.0.1.1 1
+               chk_join_nr 2 2 2
+               chk_add_nr 1 1
+               chk_stale_nr $ns2 2 4 2
+@@ -2199,8 +2199,8 @@ link_failure_tests()
+               pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+               pm_nl_set_limits $ns2 1 3
+               pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+-              FAILING_LINKS="1 2"
+-              run_tests $ns1 $ns2 10.0.1.1 2
++              FAILING_LINKS="1 2" \
++                      run_tests $ns1 $ns2 10.0.1.1 2
+               chk_join_nr 2 2 2
+               chk_add_nr 1 1
+               chk_stale_nr $ns2 1 -1 2
+-- 
+2.43.0
+
diff --git a/queue-6.1/selftests-secretmem-floor-the-memory-size-to-the-mul.patch b/queue-6.1/selftests-secretmem-floor-the-memory-size-to-the-mul.patch
new file mode 100644 (file)
index 0000000..b1723b8
--- /dev/null
@@ -0,0 +1,56 @@
+From 966552f614ba6fbff3836f33c83f31ff6ef93760 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Dec 2023 15:19:30 +0500
+Subject: selftests: secretmem: floor the memory size to the multiple of
+ page_size
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit 0aac13add26d546ac74c89d2883b3a5f0fbea039 ]
+
+The "locked-in-memory size" limit per process can be non-multiple of
+page_size.  The mmap() fails if we try to allocate locked-in-memory with
+same size as the allowed limit if it isn't multiple of the page_size
+because mmap() rounds off the memory size to be allocated to next multiple
+of page_size.
+
+Fix this by flooring the length to be allocated with mmap() to the
+previous multiple of the page_size.
+
+This was getting triggered on KernelCI regularly because of different
+ulimit settings which wasn't multiple of the page_size.  Find logs
+here: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/
+The bug in was present from the time test was first added.
+
+Link: https://lkml.kernel.org/r/20231214101931.1155586-1-usama.anjum@collabora.com
+Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)")
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reported-by: "kernelci.org bot" <bot@kernelci.org>
+Closes: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/
+Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
+Cc: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/memfd_secret.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c
+index 957b9e18c7295..9b298f6a04b37 100644
+--- a/tools/testing/selftests/vm/memfd_secret.c
++++ b/tools/testing/selftests/vm/memfd_secret.c
+@@ -62,6 +62,9 @@ static void test_mlock_limit(int fd)
+       char *mem;
+       len = mlock_limit_cur;
++      if (len % page_size != 0)
++              len = (len/page_size) * page_size;
++
+       mem = mmap(NULL, len, prot, mode, fd, 0);
+       if (mem == MAP_FAILED) {
+               fail("unable to mmap secret memory\n");
+-- 
+2.43.0
+
index c071d30dc1dbf19f154e619fc27b1c54613b7e2f..10889fb54465e95c464600b978349c48af048ffc 100644 (file)
@@ -1,2 +1,122 @@
 keys-dns-fix-missing-size-check-of-v1-server-list-header.patch
 block-don-t-invalidate-pagecache-for-invalid-falloc-modes.patch
+wifi-iwlwifi-pcie-don-t-synchronize-irqs-from-irq.patch
+drm-bridge-ti-sn65dsi86-never-store-more-than-msg-si.patch
+netfilter-use-skb_ip_totlen-and-iph_totlen.patch
+netfilter-nf_tables-set-transport-offset-from-mac-he.patch
+nfc-llcp_core-hold-a-ref-to-llcp_local-dev-when-hold.patch
+octeontx2-af-fix-marking-couple-of-structure-as-__pa.patch
+drm-i915-dp-fix-passing-the-correct-dpcd_rev-for-drm.patch
+ice-fix-link_down_on_close-message.patch
+ice-shut-down-vsi-with-link-down-on-close-enabled.patch
+i40e-fix-filter-input-checks-to-prevent-config-with-.patch
+igc-report-vlan-ethertype-matching-back-to-user.patch
+igc-check-vlan-tci-mask.patch
+igc-check-vlan-ethertype-mask.patch
+asoc-fsl_rpmsg-fix-error-handler-with-pm_runtime_ena.patch
+asoc-mediatek-mt8186-fix-aud_pad_top-register-and-of.patch
+mlxbf_gige-fix-receive-packet-race-condition.patch
+net-sched-em_text-fix-possible-memory-leak-in-em_tex.patch
+r8169-fix-pci-error-on-system-resume.patch
+can-raw-add-support-for-so_mark.patch
+net-timestamp-extend-sof_timestamping_opt_id-to-hw-t.patch
+net-annotate-data-races-around-sk-sk_tsflags.patch
+net-annotate-data-races-around-sk-sk_bind_phc.patch
+net-implement-missing-getsockopt-so_timestamping_new.patch
+selftests-bonding-do-not-set-port-down-when-adding-t.patch
+arm-sun9i-smp-fix-array-index-out-of-bounds-read-in-.patch
+sfc-fix-a-double-free-bug-in-efx_probe_filters.patch
+net-bcmgenet-fix-fcs-generation-for-fragmented-skbuf.patch
+netfilter-nft_immediate-drop-chain-reference-counter.patch
+net-save-and-restore-msg_namelen-in-sock_sendmsg.patch
+i40e-fix-use-after-free-in-i40e_aqc_add_filters.patch
+asoc-meson-g12a-toacodec-validate-written-enum-value.patch
+asoc-meson-g12a-tohdmitx-validate-written-enum-value.patch
+asoc-meson-g12a-toacodec-fix-event-generation.patch
+asoc-meson-g12a-tohdmitx-fix-event-generation-for-s-.patch
+i40e-restore-vf-msi-x-state-during-pci-reset.patch
+igc-fix-hicredit-calculation.patch
+net-qla3xxx-fix-potential-memleak-in-ql_alloc_buffer.patch
+net-smc-fix-invalid-link-access-in-dumping-smc-r-con.patch
+octeontx2-af-always-configure-nix-tx-link-credits-ba.patch
+octeontx2-af-re-enable-mac-tx-in-otx2_stop-processin.patch
+asix-add-check-for-usbnet_get_endpoints.patch
+net-ravb-wait-for-operating-mode-to-be-applied.patch
+bnxt_en-remove-mis-applied-code-from-bnxt_cfg_ntp_fi.patch
+net-implement-missing-so_timestamping_new-cmsg-suppo.patch
+selftests-secretmem-floor-the-memory-size-to-the-mul.patch
+cpu-smt-create-topology_smt_thread_allowed.patch
+cpu-smt-make-smt-control-more-robust-against-enumera.patch
+srcu-fix-callbacks-acceleration-mishandling.patch
+bpf-x64-fix-tailcall-infinite-loop.patch
+bpf-x86-simplify-the-parsing-logic-of-structure-para.patch
+bpf-x86-save-restore-regs-with-bpf_dw-size.patch
+net-declare-msg_splice_pages-internal-sendmsg-flag.patch
+udp-convert-udp_sendpage-to-use-msg_splice_pages.patch
+splice-net-add-a-splice_eof-op-to-file-ops-and-socke.patch
+ipv4-ipv6-use-splice_eof-to-flush.patch
+udp-introduce-udp-udp_flags.patch
+udp-move-udp-no_check6_tx-to-udp-udp_flags.patch
+udp-move-udp-no_check6_rx-to-udp-udp_flags.patch
+udp-move-udp-gro_enabled-to-udp-udp_flags.patch
+udp-move-udp-accept_udp_-l4-fraglist-to-udp-udp_flag.patch
+udp-lockless-udp_encap_l2tpinudp-udp_gro.patch
+udp-annotate-data-races-around-udp-encap_type.patch
+wifi-iwlwifi-yoyo-swap-cdb-and-jacket-bits-values.patch
+arm64-dts-qcom-sdm845-align-rpmh-regulator-nodes-wit.patch
+arm64-dts-qcom-sdm845-fix-psci-power-domain-names.patch
+fbdev-imsttfb-release-framebuffer-and-dealloc-cmap-o.patch
+fbdev-imsttfb-fix-double-free-in-probe.patch
+bpf-decouple-prune-and-jump-points.patch
+bpf-remove-unnecessary-prune-and-jump-points.patch
+bpf-remove-unused-insn_cnt-argument-from-visit_-func.patch
+bpf-clean-up-visit_insn-s-instruction-processing.patch
+bpf-support-new-32bit-offset-jmp-instruction.patch
+bpf-handle-ldimm64-properly-in-check_cfg.patch
+bpf-fix-precision-backtracking-instruction-iteration.patch
+blk-mq-make-sure-active-queue-usage-is-held-for-bio_.patch
+net-mlx5-increase-size-of-irq-name-buffer.patch
+s390-mm-add-missing-arch_set_page_dat-call-to-vmem_c.patch
+s390-cpumf-support-user-space-events-for-counting.patch
+f2fs-clean-up-i_compress_flag-and-i_compress_level-u.patch
+f2fs-convert-to-use-bitmap-api.patch
+f2fs-assign-default-compression-level.patch
+f2fs-set-the-default-compress_level-on-ioctl.patch
+selftests-mptcp-fix-fastclose-with-csum-failure.patch
+selftests-mptcp-set-failing_links-in-run_tests.patch
+media-camss-sm8250-virtual-channels-for-csid.patch
+media-qcom-camss-fix-set-csi2_rx_cfg1_vc_mode-when-v.patch
+ext4-convert-move_extent_per_page-to-use-folios.patch
+khugepage-replace-try_to_release_page-with-filemap_r.patch
+memory-failure-convert-truncate_error_page-to-use-fo.patch
+mm-merge-folio_has_private-filemap_release_folio-cal.patch
+mm-netfs-fscache-stop-read-optimisation-when-folio-r.patch
+filemap-add-a-per-mapping-stable-writes-flag.patch
+block-update-the-stable_writes-flag-in-bdev_add.patch
+smb-client-fix-missing-mode-bits-for-smb-symlinks.patch
+net-dpaa2-eth-rearrange-variable-in-dpaa2_eth_get_et.patch
+dpaa2-eth-recycle-the-rx-buffer-only-after-all-proce.patch
+ethtool-don-t-propagate-eopnotsupp-from-dumps.patch
+bpf-sockmap-af_unix-stream-sockets-need-to-hold-ref-.patch
+firmware-arm_scmi-fix-frequency-truncation-by-promot.patch
+alsa-hda-realtek-add-quirk-for-lenovo-yoga-pro-7.patch
+genirq-affinity-remove-the-firstvec-parameter-from-i.patch
+genirq-affinity-pass-affinity-managed-mask-array-to-.patch
+genirq-affinity-don-t-pass-irq_affinity_desc-array-t.patch
+genirq-affinity-rename-irq_build_affinity_masks-as-g.patch
+genirq-affinity-move-group_cpus_evenly-into-lib.patch
+lib-group_cpus.c-avoid-acquiring-cpu-hotplug-lock-in.patch
+mm-memory_hotplug-add-missing-mem_hotplug_lock.patch
+mm-memory_hotplug-fix-error-handling-in-add_memory_r.patch
+net-sched-call-tcf_ct_params_free-to-free-params-in-.patch
+netfilter-flowtable-allow-unidirectional-rules.patch
+netfilter-flowtable-cache-info-of-last-offload.patch
+net-sched-act_ct-offload-udp-new-connections.patch
+net-sched-act_ct-fix-promotion-of-offloaded-unreplie.patch
+netfilter-flowtable-gc-pushes-back-packets-to-classi.patch
+net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch
+octeontx2-af-fix-pause-frame-configuration.patch
+octeontx2-af-support-variable-number-of-lmacs.patch
+btrfs-fix-qgroup_free_reserved_data-int-overflow.patch
+btrfs-mark-the-len-field-in-struct-btrfs_ordered_sum.patch
+ring-buffer-fix-32-bit-rb_time_read-race-with-rb_tim.patch
diff --git a/queue-6.1/sfc-fix-a-double-free-bug-in-efx_probe_filters.patch b/queue-6.1/sfc-fix-a-double-free-bug-in-efx_probe_filters.patch
new file mode 100644 (file)
index 0000000..296cc23
--- /dev/null
@@ -0,0 +1,51 @@
+From 8c1095d26a73f2833e1f8e43056a8371a254b8f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Dec 2023 19:29:14 +0800
+Subject: sfc: fix a double-free bug in efx_probe_filters
+
+From: Zhipeng Lu <alexious@zju.edu.cn>
+
+[ Upstream commit d5a306aedba34e640b11d7026dbbafb78ee3a5f6 ]
+
+In efx_probe_filters, the channel->rps_flow_id is freed in a
+efx_for_each_channel marco  when success equals to 0.
+However, after the following call chain:
+
+ef100_net_open
+  |-> efx_probe_filters
+  |-> ef100_net_stop
+        |-> efx_remove_filters
+
+The channel->rps_flow_id is freed again in the efx_for_each_channel of
+efx_remove_filters, triggering a double-free bug.
+
+Fixes: a9dc3d5612ce ("sfc_ef100: RX filter table management and related gubbins")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
+Link: https://lore.kernel.org/r/20231225112915.3544581-1-alexious@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/rx_common.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
+index 9220afeddee81..3f290791df1c4 100644
+--- a/drivers/net/ethernet/sfc/rx_common.c
++++ b/drivers/net/ethernet/sfc/rx_common.c
+@@ -820,8 +820,10 @@ int efx_probe_filters(struct efx_nic *efx)
+               }
+               if (!success) {
+-                      efx_for_each_channel(channel, efx)
++                      efx_for_each_channel(channel, efx) {
+                               kfree(channel->rps_flow_id);
++                              channel->rps_flow_id = NULL;
++                      }
+                       efx->type->filter_table_remove(efx);
+                       rc = -ENOMEM;
+                       goto out_unlock;
+-- 
+2.43.0
+
diff --git a/queue-6.1/smb-client-fix-missing-mode-bits-for-smb-symlinks.patch b/queue-6.1/smb-client-fix-missing-mode-bits-for-smb-symlinks.patch
new file mode 100644 (file)
index 0000000..68b97b5
--- /dev/null
@@ -0,0 +1,36 @@
+From 9ab12c755c1e01616c2dcfea64cf55fb2c382d0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 25 Nov 2023 23:55:10 -0300
+Subject: smb: client: fix missing mode bits for SMB symlinks
+
+From: Paulo Alcantara <pc@manguebit.com>
+
+[ Upstream commit ef22bb800d967616c7638d204bc1b425beac7f5f ]
+
+When instantiating inodes for SMB symlinks, add the mode bits from
+@cifs_sb->ctx->file_mode as we already do for the other special files.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
+index 7be51f9d2fa18..5343898bac8a6 100644
+--- a/fs/smb/client/inode.c
++++ b/fs/smb/client/inode.c
+@@ -264,7 +264,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
+               fattr->cf_dtype = DT_REG;
+               break;
+       case UNIX_SYMLINK:
+-              fattr->cf_mode |= S_IFLNK;
++              fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
+               fattr->cf_dtype = DT_LNK;
+               break;
+       case UNIX_DIR:
+-- 
+2.43.0
+
diff --git a/queue-6.1/splice-net-add-a-splice_eof-op-to-file-ops-and-socke.patch b/queue-6.1/splice-net-add-a-splice_eof-op-to-file-ops-and-socke.patch
new file mode 100644 (file)
index 0000000..1ec5fdd
--- /dev/null
@@ -0,0 +1,212 @@
+From 4539a0bb3af7906c4281c52b7c7e1f6ccdebe5e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Jun 2023 19:19:10 +0100
+Subject: splice, net: Add a splice_eof op to file-ops and socket-ops
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2bfc66850952b6921b2033b09729ec59eabbc81d ]
+
+Add an optional method, ->splice_eof(), to allow splice to indicate the
+premature termination of a splice to struct file_operations and struct
+proto_ops.
+
+This is called if sendfile() or splice() encounters all of the following
+conditions inside splice_direct_to_actor():
+
+ (1) the user did not set SPLICE_F_MORE (splice only), and
+
+ (2) an EOF condition occurred (->splice_read() returned 0), and
+
+ (3) we haven't read enough to fulfill the request (ie. len > 0 still), and
+
+ (4) we have already spliced at least one byte.
+
+A further patch will modify the behaviour of SPLICE_F_MORE to always be
+passed to the actor if either the user set it or we haven't yet read
+sufficient data to fulfill the request.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+cc: Jens Axboe <axboe@kernel.dk>
+cc: Christoph Hellwig <hch@lst.de>
+cc: Al Viro <viro@zeniv.linux.org.uk>
+cc: Matthew Wilcox <willy@infradead.org>
+cc: Jan Kara <jack@suse.cz>
+cc: Jeff Layton <jlayton@kernel.org>
+cc: David Hildenbrand <david@redhat.com>
+cc: Christian Brauner <brauner@kernel.org>
+cc: Chuck Lever <chuck.lever@oracle.com>
+cc: Boris Pismenny <borisp@nvidia.com>
+cc: John Fastabend <john.fastabend@gmail.com>
+cc: linux-mm@kvack.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: a0002127cd74 ("udp: move udp->no_check6_tx to udp->udp_flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/splice.c            | 31 ++++++++++++++++++++++++++++++-
+ include/linux/fs.h     |  1 +
+ include/linux/net.h    |  1 +
+ include/linux/splice.h |  1 +
+ include/net/sock.h     |  1 +
+ net/socket.c           | 10 ++++++++++
+ 6 files changed, 44 insertions(+), 1 deletion(-)
+
+diff --git a/fs/splice.c b/fs/splice.c
+index 5969b7a1d353a..c4ae54deac42c 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -764,6 +764,17 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+       return out->f_op->splice_write(pipe, out, ppos, len, flags);
+ }
++/*
++ * Indicate to the caller that there was a premature EOF when reading from the
++ * source and the caller didn't indicate they would be sending more data after
++ * this.
++ */
++static void do_splice_eof(struct splice_desc *sd)
++{
++      if (sd->splice_eof)
++              sd->splice_eof(sd);
++}
++
+ /*
+  * Attempt to initiate a splice from a file to a pipe.
+  */
+@@ -864,7 +875,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+               ret = do_splice_to(in, &pos, pipe, len, flags);
+               if (unlikely(ret <= 0))
+-                      goto out_release;
++                      goto read_failure;
+               read_len = ret;
+               sd->total_len = read_len;
+@@ -904,6 +915,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+       file_accessed(in);
+       return bytes;
++read_failure:
++      /*
++       * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that
++       * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a
++       * "->splice_in()" that returned EOF (ie zero) *and* we have sent at
++       * least 1 byte *then* we will also do the ->splice_eof() call.
++       */
++      if (ret == 0 && !more && len > 0 && bytes)
++              do_splice_eof(sd);
+ out_release:
+       /*
+        * If we did an incomplete transfer we must release
+@@ -932,6 +952,14 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
+                             sd->flags);
+ }
++static void direct_file_splice_eof(struct splice_desc *sd)
++{
++      struct file *file = sd->u.file;
++
++      if (file->f_op->splice_eof)
++              file->f_op->splice_eof(file);
++}
++
+ /**
+  * do_splice_direct - splices data directly between two files
+  * @in:               file to splice from
+@@ -957,6 +985,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+               .flags          = flags,
+               .pos            = *ppos,
+               .u.file         = out,
++              .splice_eof     = direct_file_splice_eof,
+               .opos           = opos,
+       };
+       long ret;
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index b6af6abc7a77f..4a1911dcf834b 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2177,6 +2177,7 @@ struct file_operations {
+       int (*flock) (struct file *, int, struct file_lock *);
+       ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+       ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
++      void (*splice_eof)(struct file *file);
+       int (*setlease)(struct file *, long, struct file_lock **, void **);
+       long (*fallocate)(struct file *file, int mode, loff_t offset,
+                         loff_t len);
+diff --git a/include/linux/net.h b/include/linux/net.h
+index 18d942bbdf6e0..25baca60f6cba 100644
+--- a/include/linux/net.h
++++ b/include/linux/net.h
+@@ -209,6 +209,7 @@ struct proto_ops {
+                                     int offset, size_t size, int flags);
+       ssize_t         (*splice_read)(struct socket *sock,  loff_t *ppos,
+                                      struct pipe_inode_info *pipe, size_t len, unsigned int flags);
++      void            (*splice_eof)(struct socket *sock);
+       int             (*set_peek_off)(struct sock *sk, int val);
+       int             (*peek_len)(struct socket *sock);
+diff --git a/include/linux/splice.h b/include/linux/splice.h
+index a55179fd60fc3..41a70687be853 100644
+--- a/include/linux/splice.h
++++ b/include/linux/splice.h
+@@ -38,6 +38,7 @@ struct splice_desc {
+               struct file *file;      /* file to read/write */
+               void *data;             /* cookie */
+       } u;
++      void (*splice_eof)(struct splice_desc *sd); /* Unexpected EOF handler */
+       loff_t pos;                     /* file position */
+       loff_t *opos;                   /* sendfile: output position */
+       size_t num_spliced;             /* number of bytes already spliced */
+diff --git a/include/net/sock.h b/include/net/sock.h
+index d8ed62a8e1a3e..9de9f070537cc 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1279,6 +1279,7 @@ struct proto {
+                                          size_t len, int flags, int *addr_len);
+       int                     (*sendpage)(struct sock *sk, struct page *page,
+                                       int offset, size_t size, int flags);
++      void                    (*splice_eof)(struct socket *sock);
+       int                     (*bind)(struct sock *sk,
+                                       struct sockaddr *addr, int addr_len);
+       int                     (*bind_add)(struct sock *sk,
+diff --git a/net/socket.c b/net/socket.c
+index 6f39f7b0cc85c..639d76f20384e 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -130,6 +130,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
+ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+                               struct pipe_inode_info *pipe, size_t len,
+                               unsigned int flags);
++static void sock_splice_eof(struct file *file);
+ #ifdef CONFIG_PROC_FS
+ static void sock_show_fdinfo(struct seq_file *m, struct file *f)
+@@ -164,6 +165,7 @@ static const struct file_operations socket_file_ops = {
+       .sendpage =     sock_sendpage,
+       .splice_write = generic_splice_sendpage,
+       .splice_read =  sock_splice_read,
++      .splice_eof =   sock_splice_eof,
+       .show_fdinfo =  sock_show_fdinfo,
+ };
+@@ -1091,6 +1093,14 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+       return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+ }
++static void sock_splice_eof(struct file *file)
++{
++      struct socket *sock = file->private_data;
++
++      if (sock->ops->splice_eof)
++              sock->ops->splice_eof(sock);
++}
++
+ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+       struct file *file = iocb->ki_filp;
+-- 
+2.43.0
+
diff --git a/queue-6.1/srcu-fix-callbacks-acceleration-mishandling.patch b/queue-6.1/srcu-fix-callbacks-acceleration-mishandling.patch
new file mode 100644 (file)
index 0000000..8223bed
--- /dev/null
@@ -0,0 +1,157 @@
+From 87882bb82acf16fc4e9d159032c1e6e7a25a3f87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 01:28:59 +0200
+Subject: srcu: Fix callbacks acceleration mishandling
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit 4a8e65b0c348e42107c64381e692e282900be361 ]
+
+SRCU callbacks acceleration might fail if the preceding callbacks
+advance also fails. This can happen when the following steps are met:
+
+1) The RCU_WAIT_TAIL segment has callbacks (say for gp_num 8) and the
+   RCU_NEXT_READY_TAIL also has callbacks (say for gp_num 12).
+
+2) The grace period for RCU_WAIT_TAIL is observed as started but not yet
+   completed so rcu_seq_current() returns 4 + SRCU_STATE_SCAN1 = 5.
+
+3) This value is passed to rcu_segcblist_advance() which can't move
+   any segment forward and fails.
+
+4) srcu_gp_start_if_needed() still proceeds with callback acceleration.
+   But then the call to rcu_seq_snap() observes the grace period for the
+   RCU_WAIT_TAIL segment (gp_num 8) as completed and the subsequent one
+   for the RCU_NEXT_READY_TAIL segment as started
+   (ie: 8 + SRCU_STATE_SCAN1 = 9) so it returns a snapshot of the
+   next grace period, which is 16.
+
+5) The value of 16 is passed to rcu_segcblist_accelerate() but the
+   freshly enqueued callback in RCU_NEXT_TAIL can't move to
+   RCU_NEXT_READY_TAIL which already has callbacks for a previous grace
+   period (gp_num = 12). So acceleration fails.
+
+6) Note in all these steps, srcu_invoke_callbacks() hadn't had a chance
+   to run srcu_invoke_callbacks().
+
+Then some very bad outcome may happen if the following happens:
+
+7) Some other CPU races and starts the grace period number 16 before the
+   CPU handling previous steps had a chance. Therefore srcu_gp_start()
+   isn't called on the latter sdp to fix the acceleration leak from
+   previous steps with a new pair of call to advance/accelerate.
+
+8) The grace period 16 completes and srcu_invoke_callbacks() is finally
+   called. All the callbacks from previous grace periods (8 and 12) are
+   correctly advanced and executed but callbacks in RCU_NEXT_READY_TAIL
+   still remain. Then rcu_segcblist_accelerate() is called with a
+   snaphot of 20.
+
+9) Since nothing started the grace period number 20, callbacks stay
+   unhandled.
+
+This has been reported in real load:
+
+       [3144162.608392] INFO: task kworker/136:12:252684 blocked for more
+       than 122 seconds.
+       [3144162.615986]       Tainted: G           O  K   5.4.203-1-tlinux4-0011.1 #1
+       [3144162.623053] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
+       disables this message.
+       [3144162.631162] kworker/136:12  D    0 252684      2 0x90004000
+       [3144162.631189] Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
+       [3144162.631192] Call Trace:
+       [3144162.631202]  __schedule+0x2ee/0x660
+       [3144162.631206]  schedule+0x33/0xa0
+       [3144162.631209]  schedule_timeout+0x1c4/0x340
+       [3144162.631214]  ? update_load_avg+0x82/0x660
+       [3144162.631217]  ? raw_spin_rq_lock_nested+0x1f/0x30
+       [3144162.631218]  wait_for_completion+0x119/0x180
+       [3144162.631220]  ? wake_up_q+0x80/0x80
+       [3144162.631224]  __synchronize_srcu.part.19+0x81/0xb0
+       [3144162.631226]  ? __bpf_trace_rcu_utilization+0x10/0x10
+       [3144162.631227]  synchronize_srcu+0x5f/0xc0
+       [3144162.631236]  irqfd_shutdown+0x3c/0xb0 [kvm]
+       [3144162.631239]  ? __schedule+0x2f6/0x660
+       [3144162.631243]  process_one_work+0x19a/0x3a0
+       [3144162.631244]  worker_thread+0x37/0x3a0
+       [3144162.631247]  kthread+0x117/0x140
+       [3144162.631247]  ? process_one_work+0x3a0/0x3a0
+       [3144162.631248]  ? __kthread_cancel_work+0x40/0x40
+       [3144162.631250]  ret_from_fork+0x1f/0x30
+
+Fix this with taking the snapshot for acceleration _before_ the read
+of the current grace period number.
+
+The only side effect of this solution is that callbacks advancing happen
+then _after_ the full barrier in rcu_seq_snap(). This is not a problem
+because that barrier only cares about:
+
+1) Ordering accesses of the update side before call_srcu() so they don't
+   bleed.
+2) See all the accesses prior to the grace period of the current gp_num
+
+The only things callbacks advancing need to be ordered against are
+carried by snp locking.
+
+Reported-by: Yong He <alexyonghe@tencent.com>
+Co-developed-by:: Yong He <alexyonghe@tencent.com>
+Signed-off-by: Yong He <alexyonghe@tencent.com>
+Co-developed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by:  Joel Fernandes (Google) <joel@joelfernandes.org>
+Co-developed-by: Neeraj upadhyay <Neeraj.Upadhyay@amd.com>
+Signed-off-by: Neeraj upadhyay <Neeraj.Upadhyay@amd.com>
+Link: http://lore.kernel.org/CANZk6aR+CqZaqmMWrC2eRRPY12qAZnDZLwLnHZbNi=xXMB401g@mail.gmail.com
+Fixes: da915ad5cf25 ("srcu: Parallelize callback handling")
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/srcutree.c | 31 +++++++++++++++++++++++++++++--
+ 1 file changed, 29 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
+index 8fdf076720384..929dcbc04d29c 100644
+--- a/kernel/rcu/srcutree.c
++++ b/kernel/rcu/srcutree.c
+@@ -1100,10 +1100,37 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
+       spin_lock_irqsave_sdp_contention(sdp, &flags);
+       if (rhp)
+               rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
++      /*
++       * The snapshot for acceleration must be taken _before_ the read of the
++       * current gp sequence used for advancing, otherwise advancing may fail
++       * and acceleration may then fail too.
++       *
++       * This could happen if:
++       *
++       *  1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the
++       *     RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8).
++       *
++       *  2) The grace period for RCU_WAIT_TAIL is seen as started but not
++       *     completed so rcu_seq_current() returns X + SRCU_STATE_SCAN1.
++       *
++       *  3) This value is passed to rcu_segcblist_advance() which can't move
++       *     any segment forward and fails.
++       *
++       *  4) srcu_gp_start_if_needed() still proceeds with callback acceleration.
++       *     But then the call to rcu_seq_snap() observes the grace period for the
++       *     RCU_WAIT_TAIL segment as completed and the subsequent one for the
++       *     RCU_NEXT_READY_TAIL segment as started (ie: X + 4 + SRCU_STATE_SCAN1)
++       *     so it returns a snapshot of the next grace period, which is X + 12.
++       *
++       *  5) The value of X + 12 is passed to rcu_segcblist_accelerate() but the
++       *     freshly enqueued callback in RCU_NEXT_TAIL can't move to
++       *     RCU_NEXT_READY_TAIL which already has callbacks for a previous grace
++       *     period (gp_num = X + 8). So acceleration fails.
++       */
++      s = rcu_seq_snap(&ssp->srcu_gp_seq);
+       rcu_segcblist_advance(&sdp->srcu_cblist,
+                             rcu_seq_current(&ssp->srcu_gp_seq));
+-      s = rcu_seq_snap(&ssp->srcu_gp_seq);
+-      (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
++      WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s) && rhp);
+       if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
+               sdp->srcu_gp_seq_needed = s;
+               needgp = true;
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-annotate-data-races-around-udp-encap_type.patch b/queue-6.1/udp-annotate-data-races-around-udp-encap_type.patch
new file mode 100644 (file)
index 0000000..681e051
--- /dev/null
@@ -0,0 +1,205 @@
+From be36ec0d25d48e42c708f492bd350b9e4cc0e19e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:28 +0000
+Subject: udp: annotate data-races around udp->encap_type
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 70a36f571362a8de8b8c02d21ae524fc776287f2 ]
+
+syzbot/KCSAN complained about UDP_ENCAP_L2TPINUDP setsockopt() racing.
+
+Add READ_ONCE()/WRITE_ONCE() to document races on this lockless field.
+
+syzbot report was:
+BUG: KCSAN: data-race in udp_lib_setsockopt / udp_lib_setsockopt
+
+read-write to 0xffff8881083603fa of 1 bytes by task 16557 on cpu 0:
+udp_lib_setsockopt+0x682/0x6c0
+udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2779
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read-write to 0xffff8881083603fa of 1 bytes by task 16554 on cpu 1:
+udp_lib_setsockopt+0x682/0x6c0
+udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2779
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x01 -> 0x05
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 16554 Comm: syz-executor.5 Not tainted 6.5.0-rc7-syzkaller-00004-gf7757129e3de #0
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c      | 4 ++--
+ net/ipv4/udp.c         | 9 +++++----
+ net/ipv4/xfrm4_input.c | 4 ++--
+ net/ipv6/udp.c         | 5 +++--
+ net/ipv6/xfrm6_input.c | 4 ++--
+ net/l2tp/l2tp_core.c   | 6 +++---
+ 6 files changed, 17 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 477b4d4f860bd..bace989591f75 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -629,7 +629,7 @@ static void __gtp_encap_destroy(struct sock *sk)
+                       gtp->sk0 = NULL;
+               else
+                       gtp->sk1u = NULL;
+-              udp_sk(sk)->encap_type = 0;
++              WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+               rcu_assign_sk_user_data(sk, NULL);
+               release_sock(sk);
+               sock_put(sk);
+@@ -681,7 +681,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
+       netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
+-      switch (udp_sk(sk)->encap_type) {
++      switch (READ_ONCE(udp_sk(sk)->encap_type)) {
+       case UDP_ENCAP_GTP0:
+               netdev_dbg(gtp->dev, "received GTP0 packet\n");
+               ret = gtp0_udp_encap_recv(gtp, skb);
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 267f77633a8f3..5672d9a86c5d2 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -733,7 +733,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
+                              iph->saddr, uh->source, skb->dev->ifindex,
+                              inet_sdif(skb), udptable, NULL);
+-      if (!sk || udp_sk(sk)->encap_type) {
++      if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
+               /* No socket for error: try tunnels before discarding */
+               if (static_branch_unlikely(&udp_encap_needed_key)) {
+                       sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
+@@ -2114,7 +2114,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
+       }
+       nf_reset_ct(skb);
+-      if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
++      if (static_branch_unlikely(&udp_encap_needed_key) &&
++          READ_ONCE(up->encap_type)) {
+               int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+               /*
+@@ -2699,7 +2700,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+ #endif
+                       fallthrough;
+               case UDP_ENCAP_L2TPINUDP:
+-                      up->encap_type = val;
++                      WRITE_ONCE(up->encap_type, val);
+                       udp_tunnel_encap_enable(sk);
+                       break;
+               default:
+@@ -2800,7 +2801,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_ENCAP:
+-              val = up->encap_type;
++              val = READ_ONCE(up->encap_type);
+               break;
+       case UDP_NO_CHECK6_TX:
+diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
+index eac206a290d05..183f6dc372429 100644
+--- a/net/ipv4/xfrm4_input.c
++++ b/net/ipv4/xfrm4_input.c
+@@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+       struct udphdr *uh;
+       struct iphdr *iph;
+       int iphlen, len;
+-
+       __u8 *udpdata;
+       __be32 *udpdata32;
+-      __u16 encap_type = up->encap_type;
++      u16 encap_type;
++      encap_type = READ_ONCE(up->encap_type);
+       /* if this is not encapsulated socket, then just return now */
+       if (!encap_type)
+               return 1;
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 5b7c4f8e2ed03..961106eda69d0 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+       sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
+                              inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
+-      if (!sk || udp_sk(sk)->encap_type) {
++      if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
+               /* No socket for error: try tunnels before discarding */
+               if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+                       sk = __udp6_lib_err_encap(net, hdr, offset, uh,
+@@ -712,7 +712,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
+       }
+       nf_reset_ct(skb);
+-      if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
++      if (static_branch_unlikely(&udpv6_encap_needed_key) &&
++          READ_ONCE(up->encap_type)) {
+               int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+               /*
+diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
+index 4907ab241d6be..4156387248e40 100644
+--- a/net/ipv6/xfrm6_input.c
++++ b/net/ipv6/xfrm6_input.c
+@@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+       struct ipv6hdr *ip6h;
+       int len;
+       int ip6hlen = sizeof(struct ipv6hdr);
+-
+       __u8 *udpdata;
+       __be32 *udpdata32;
+-      __u16 encap_type = up->encap_type;
++      u16 encap_type;
+       if (skb->protocol == htons(ETH_P_IP))
+               return xfrm4_udp_encap_rcv(sk, skb);
++      encap_type = READ_ONCE(up->encap_type);
+       /* if this is not encapsulated socket, then just return now */
+       if (!encap_type)
+               return 1;
+diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
+index 03608d3ded4b8..8d21ff25f1602 100644
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk)
+       switch (tunnel->encap) {
+       case L2TP_ENCAPTYPE_UDP:
+               /* No longer an encapsulation socket. See net/ipv4/udp.c */
+-              (udp_sk(sk))->encap_type = 0;
+-              (udp_sk(sk))->encap_rcv = NULL;
+-              (udp_sk(sk))->encap_destroy = NULL;
++              WRITE_ONCE(udp_sk(sk)->encap_type, 0);
++              udp_sk(sk)->encap_rcv = NULL;
++              udp_sk(sk)->encap_destroy = NULL;
+               break;
+       case L2TP_ENCAPTYPE_IP:
+               break;
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-convert-udp_sendpage-to-use-msg_splice_pages.patch b/queue-6.1/udp-convert-udp_sendpage-to-use-msg_splice_pages.patch
new file mode 100644 (file)
index 0000000..7bd883f
--- /dev/null
@@ -0,0 +1,95 @@
+From 18625f6ea3f1ce6d5e70c59bd187fa0323530c26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 13:11:22 +0100
+Subject: udp: Convert udp_sendpage() to use MSG_SPLICE_PAGES
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 7ac7c987850c3ec617c778f7bd871804dc1c648d ]
+
+Convert udp_sendpage() to use sendmsg() with MSG_SPLICE_PAGES rather than
+directly splicing in the pages itself.
+
+This allows ->sendpage() to be replaced by something that can handle
+multiple multipage folios in a single transaction.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+cc: David Ahern <dsahern@kernel.org>
+cc: Jens Axboe <axboe@kernel.dk>
+cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: a0002127cd74 ("udp: move udp->no_check6_tx to udp->udp_flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 51 ++++++--------------------------------------------
+ 1 file changed, 6 insertions(+), 45 deletions(-)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 65abc92a81bd0..b49cb3df01bb4 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1335,54 +1335,15 @@ EXPORT_SYMBOL(udp_sendmsg);
+ int udp_sendpage(struct sock *sk, struct page *page, int offset,
+                size_t size, int flags)
+ {
+-      struct inet_sock *inet = inet_sk(sk);
+-      struct udp_sock *up = udp_sk(sk);
+-      int ret;
++      struct bio_vec bvec;
++      struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
+       if (flags & MSG_SENDPAGE_NOTLAST)
+-              flags |= MSG_MORE;
+-
+-      if (!up->pending) {
+-              struct msghdr msg = {   .msg_flags = flags|MSG_MORE };
+-
+-              /* Call udp_sendmsg to specify destination address which
+-               * sendpage interface can't pass.
+-               * This will succeed only when the socket is connected.
+-               */
+-              ret = udp_sendmsg(sk, &msg, 0);
+-              if (ret < 0)
+-                      return ret;
+-      }
+-
+-      lock_sock(sk);
++              msg.msg_flags |= MSG_MORE;
+-      if (unlikely(!up->pending)) {
+-              release_sock(sk);
+-
+-              net_dbg_ratelimited("cork failed\n");
+-              return -EINVAL;
+-      }
+-
+-      ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
+-                           page, offset, size, flags);
+-      if (ret == -EOPNOTSUPP) {
+-              release_sock(sk);
+-              return sock_no_sendpage(sk->sk_socket, page, offset,
+-                                      size, flags);
+-      }
+-      if (ret < 0) {
+-              udp_flush_pending_frames(sk);
+-              goto out;
+-      }
+-
+-      up->len += size;
+-      if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
+-              ret = udp_push_pending_frames(sk);
+-      if (!ret)
+-              ret = size;
+-out:
+-      release_sock(sk);
+-      return ret;
++      bvec_set_page(&bvec, page, size, offset);
++      iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
++      return udp_sendmsg(sk, &msg, size);
+ }
+ #define UDP_SKB_IS_STATELESS 0x80000000
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-introduce-udp-udp_flags.patch b/queue-6.1/udp-introduce-udp-udp_flags.patch
new file mode 100644 (file)
index 0000000..6dc6007
--- /dev/null
@@ -0,0 +1,171 @@
+From ceb0fec094adcb6586e574c81ad754f61512c4eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:21 +0000
+Subject: udp: introduce udp->udp_flags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 81b36803ac139827538ac5ce4028e750a3c53f53 ]
+
+According to syzbot, it is time to use proper atomic flags
+for various UDP flags.
+
+Add udp_flags field, and convert udp->corkflag to first
+bit in it.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: a0002127cd74 ("udp: move udp->no_check6_tx to udp->udp_flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h | 28 +++++++++++++++++++++-------
+ net/ipv4/udp.c      | 12 ++++++------
+ net/ipv6/udp.c      |  6 +++---
+ 3 files changed, 30 insertions(+), 16 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index e96da4157d04d..10b56b8231e3c 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -30,14 +30,20 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
+       return (num + net_hash_mix(net)) & mask;
+ }
++enum {
++      UDP_FLAGS_CORK,         /* Cork is required */
++};
++
+ struct udp_sock {
+       /* inet_sock has to be the first member */
+       struct inet_sock inet;
+ #define udp_port_hash         inet.sk.__sk_common.skc_u16hashes[0]
+ #define udp_portaddr_hash     inet.sk.__sk_common.skc_u16hashes[1]
+ #define udp_portaddr_node     inet.sk.__sk_common.skc_portaddr_node
++
++      unsigned long    udp_flags;
++
+       int              pending;       /* Any pending frames ? */
+-      unsigned int     corkflag;      /* Cork is required */
+       __u8             encap_type;    /* Is this an Encapsulation socket? */
+       unsigned char    no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
+                        no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+@@ -49,6 +55,11 @@ struct udp_sock {
+                        gro_enabled:1, /* Request GRO aggregation */
+                        accept_udp_l4:1,
+                        accept_udp_fraglist:1;
++/* indicator bits used by pcflag: */
++#define UDPLITE_BIT      0x1                  /* set by udplite proto init function */
++#define UDPLITE_SEND_CC  0x2                  /* set via udplite setsockopt         */
++#define UDPLITE_RECV_CC  0x4          /* set via udplite setsocktopt        */
++      __u8             pcflag;        /* marks socket as UDP-Lite if > 0    */
+       /*
+        * Following member retains the information to create a UDP header
+        * when the socket is uncorked.
+@@ -60,12 +71,6 @@ struct udp_sock {
+        */
+       __u16            pcslen;
+       __u16            pcrlen;
+-/* indicator bits used by pcflag: */
+-#define UDPLITE_BIT      0x1                  /* set by udplite proto init function */
+-#define UDPLITE_SEND_CC  0x2                  /* set via udplite setsockopt         */
+-#define UDPLITE_RECV_CC  0x4          /* set via udplite setsocktopt        */
+-      __u8             pcflag;        /* marks socket as UDP-Lite if > 0    */
+-      __u8             unused[3];
+       /*
+        * For encapsulation sockets.
+        */
+@@ -89,6 +94,15 @@ struct udp_sock {
+       int             forward_deficit;
+ };
++#define udp_test_bit(nr, sk)                  \
++      test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
++#define udp_set_bit(nr, sk)                   \
++      set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
++#define udp_clear_bit(nr, sk)                 \
++      clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
++#define udp_assign_bit(nr, sk, val)           \
++      assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
++
+ #define UDP_MAX_SEGMENTS      (1 << 6UL)
+ static inline struct udp_sock *udp_sk(const struct sock *sk)
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index e8dd2880ac9aa..60a754477efb2 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1068,7 +1068,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       __be16 dport;
+       u8  tos;
+       int err, is_udplite = IS_UDPLITE(sk);
+-      int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
++      int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
+       int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+       struct sk_buff *skb;
+       struct ip_options_data opt_copy;
+@@ -1337,11 +1337,11 @@ void udp_splice_eof(struct socket *sock)
+       struct sock *sk = sock->sk;
+       struct udp_sock *up = udp_sk(sk);
+-      if (!up->pending || READ_ONCE(up->corkflag))
++      if (!up->pending || udp_test_bit(CORK, sk))
+               return;
+       lock_sock(sk);
+-      if (up->pending && !READ_ONCE(up->corkflag))
++      if (up->pending && !udp_test_bit(CORK, sk))
+               udp_push_pending_frames(sk);
+       release_sock(sk);
+ }
+@@ -2673,9 +2673,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+       switch (optname) {
+       case UDP_CORK:
+               if (val != 0) {
+-                      WRITE_ONCE(up->corkflag, 1);
++                      udp_set_bit(CORK, sk);
+               } else {
+-                      WRITE_ONCE(up->corkflag, 0);
++                      udp_clear_bit(CORK, sk);
+                       lock_sock(sk);
+                       push_pending_frames(sk);
+                       release_sock(sk);
+@@ -2800,7 +2800,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+       switch (optname) {
+       case UDP_CORK:
+-              val = READ_ONCE(up->corkflag);
++              val = udp_test_bit(CORK, sk);
+               break;
+       case UDP_ENCAP:
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 2a65136dca773..85653e3a04fe8 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1351,7 +1351,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       int addr_len = msg->msg_namelen;
+       bool connected = false;
+       int ulen = len;
+-      int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
++      int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
+       int err;
+       int is_udplite = IS_UDPLITE(sk);
+       int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+@@ -1662,11 +1662,11 @@ static void udpv6_splice_eof(struct socket *sock)
+       struct sock *sk = sock->sk;
+       struct udp_sock *up = udp_sk(sk);
+-      if (!up->pending || READ_ONCE(up->corkflag))
++      if (!up->pending || udp_test_bit(CORK, sk))
+               return;
+       lock_sock(sk);
+-      if (up->pending && !READ_ONCE(up->corkflag))
++      if (up->pending && !udp_test_bit(CORK, sk))
+               udp_v6_push_pending_frames(sk);
+       release_sock(sk);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-lockless-udp_encap_l2tpinudp-udp_gro.patch b/queue-6.1/udp-lockless-udp_encap_l2tpinudp-udp_gro.patch
new file mode 100644 (file)
index 0000000..8665533
--- /dev/null
@@ -0,0 +1,154 @@
+From d2f165afbbc9ce0af6beddcde9af3f3d368908f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:27 +0000
+Subject: udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ac9a7f4ce5dda1472e8f44096f33066c6ec1a3b4 ]
+
+Move udp->encap_enabled to udp->udp_flags.
+
+Add udp_test_and_set_bit() helper to allow lockless
+udp_tunnel_encap_enable() implementation.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 70a36f571362 ("udp: annotate data-races around udp->encap_type")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h        |  9 ++++-----
+ include/net/udp_tunnel.h   |  9 +++------
+ net/ipv4/udp.c             | 10 +++-------
+ net/ipv4/udp_tunnel_core.c |  2 +-
+ net/ipv6/udp.c             |  2 +-
+ 5 files changed, 12 insertions(+), 20 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index 0e6880856246a..efd9ab6df3797 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -37,6 +37,7 @@ enum {
+       UDP_FLAGS_GRO_ENABLED,  /* Request GRO aggregation */
+       UDP_FLAGS_ACCEPT_FRAGLIST,
+       UDP_FLAGS_ACCEPT_L4,
++      UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
+ };
+ struct udp_sock {
+@@ -50,11 +51,7 @@ struct udp_sock {
+       int              pending;       /* Any pending frames ? */
+       __u8             encap_type;    /* Is this an Encapsulation socket? */
+-      unsigned char    encap_enabled:1; /* This socket enabled encap
+-                                         * processing; UDP tunnels and
+-                                         * different encapsulation layer set
+-                                         * this
+-                                         */
++
+ /* indicator bits used by pcflag: */
+ #define UDPLITE_BIT      0x1                  /* set by udplite proto init function */
+ #define UDPLITE_SEND_CC  0x2                  /* set via udplite setsockopt         */
+@@ -98,6 +95,8 @@ struct udp_sock {
+       test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+ #define udp_set_bit(nr, sk)                   \
+       set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
++#define udp_test_and_set_bit(nr, sk)          \
++      test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+ #define udp_clear_bit(nr, sk)                 \
+       clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+ #define udp_assign_bit(nr, sk, val)           \
+diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
+index 72394f441dad8..e5f81710b18f4 100644
+--- a/include/net/udp_tunnel.h
++++ b/include/net/udp_tunnel.h
+@@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
+ }
+ #endif
+-static inline void udp_tunnel_encap_enable(struct socket *sock)
++static inline void udp_tunnel_encap_enable(struct sock *sk)
+ {
+-      struct udp_sock *up = udp_sk(sock->sk);
+-
+-      if (up->encap_enabled)
++      if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
+               return;
+-      up->encap_enabled = 1;
+ #if IS_ENABLED(CONFIG_IPV6)
+-      if (sock->sk->sk_family == PF_INET6)
++      if (READ_ONCE(sk->sk_family) == PF_INET6)
+               ipv6_stub->udpv6_encap_enable();
+ #endif
+       udp_encap_enable();
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index df0ea45b8b8f2..267f77633a8f3 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2645,7 +2645,7 @@ void udp_destroy_sock(struct sock *sk)
+                       if (encap_destroy)
+                               encap_destroy(sk);
+               }
+-              if (up->encap_enabled)
++              if (udp_test_bit(ENCAP_ENABLED, sk))
+                       static_branch_dec(&udp_encap_needed_key);
+       }
+ }
+@@ -2700,9 +2700,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+                       fallthrough;
+               case UDP_ENCAP_L2TPINUDP:
+                       up->encap_type = val;
+-                      lock_sock(sk);
+-                      udp_tunnel_encap_enable(sk->sk_socket);
+-                      release_sock(sk);
++                      udp_tunnel_encap_enable(sk);
+                       break;
+               default:
+                       err = -ENOPROTOOPT;
+@@ -2725,14 +2723,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_GRO:
+-              lock_sock(sk);
+               /* when enabling GRO, accept the related GSO packet type */
+               if (valbool)
+-                      udp_tunnel_encap_enable(sk->sk_socket);
++                      udp_tunnel_encap_enable(sk);
+               udp_assign_bit(GRO_ENABLED, sk, valbool);
+               udp_assign_bit(ACCEPT_L4, sk, valbool);
+-              release_sock(sk);
+               break;
+       /*
+diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
+index 5f8104cf082d0..732e21b75ba28 100644
+--- a/net/ipv4/udp_tunnel_core.c
++++ b/net/ipv4/udp_tunnel_core.c
+@@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+       udp_sk(sk)->gro_receive = cfg->gro_receive;
+       udp_sk(sk)->gro_complete = cfg->gro_complete;
+-      udp_tunnel_encap_enable(sock);
++      udp_tunnel_encap_enable(sk);
+ }
+ EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index ddd17b5ea4259..5b7c4f8e2ed03 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1688,7 +1688,7 @@ void udpv6_destroy_sock(struct sock *sk)
+                       if (encap_destroy)
+                               encap_destroy(sk);
+               }
+-              if (up->encap_enabled) {
++              if (udp_test_bit(ENCAP_ENABLED, sk)) {
+                       static_branch_dec(&udpv6_encap_needed_key);
+                       udp_encap_disable();
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-move-udp-accept_udp_-l4-fraglist-to-udp-udp_flag.patch b/queue-6.1/udp-move-udp-accept_udp_-l4-fraglist-to-udp-udp_flag.patch
new file mode 100644 (file)
index 0000000..c3353f0
--- /dev/null
@@ -0,0 +1,91 @@
+From f8848188eeb61db01317edf7b603cd83e93eef38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:26 +0000
+Subject: udp: move udp->accept_udp_{l4|fraglist} to udp->udp_flags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit f5f52f0884a595ff99ab1a608643fe4025fca2d5 ]
+
+These are read locklessly, move them to udp_flags to fix data-races.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 70a36f571362 ("udp: annotate data-races around udp->encap_type")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h | 16 +++++++++-------
+ net/ipv4/udp.c      |  2 +-
+ 2 files changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index f87e2123fe7b0..0e6880856246a 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -35,6 +35,8 @@ enum {
+       UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+       UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
+       UDP_FLAGS_GRO_ENABLED,  /* Request GRO aggregation */
++      UDP_FLAGS_ACCEPT_FRAGLIST,
++      UDP_FLAGS_ACCEPT_L4,
+ };
+ struct udp_sock {
+@@ -48,13 +50,11 @@ struct udp_sock {
+       int              pending;       /* Any pending frames ? */
+       __u8             encap_type;    /* Is this an Encapsulation socket? */
+-      unsigned char    encap_enabled:1, /* This socket enabled encap
++      unsigned char    encap_enabled:1; /* This socket enabled encap
+                                          * processing; UDP tunnels and
+                                          * different encapsulation layer set
+                                          * this
+                                          */
+-                       accept_udp_l4:1,
+-                       accept_udp_fraglist:1;
+ /* indicator bits used by pcflag: */
+ #define UDPLITE_BIT      0x1                  /* set by udplite proto init function */
+ #define UDPLITE_SEND_CC  0x2                  /* set via udplite setsockopt         */
+@@ -146,10 +146,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+       if (!skb_is_gso(skb))
+               return false;
+-      if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
++      if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
++          !udp_test_bit(ACCEPT_L4, sk))
+               return true;
+-      if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
++      if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
++          !udp_test_bit(ACCEPT_FRAGLIST, sk))
+               return true;
+       return false;
+@@ -157,8 +159,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ static inline void udp_allow_gso(struct sock *sk)
+ {
+-      udp_sk(sk)->accept_udp_l4 = 1;
+-      udp_sk(sk)->accept_udp_fraglist = 1;
++      udp_set_bit(ACCEPT_L4, sk);
++      udp_set_bit(ACCEPT_FRAGLIST, sk);
+ }
+ #define udp_portaddr_for_each_entry(__sk, list) \
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 28292fcf07075..df0ea45b8b8f2 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2731,7 +2731,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+               if (valbool)
+                       udp_tunnel_encap_enable(sk->sk_socket);
+               udp_assign_bit(GRO_ENABLED, sk, valbool);
+-              up->accept_udp_l4 = valbool;
++              udp_assign_bit(ACCEPT_L4, sk, valbool);
+               release_sock(sk);
+               break;
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-move-udp-gro_enabled-to-udp-udp_flags.patch b/queue-6.1/udp-move-udp-gro_enabled-to-udp-udp_flags.patch
new file mode 100644 (file)
index 0000000..4aae43c
--- /dev/null
@@ -0,0 +1,109 @@
+From a7beff020a1a4657b4250f8b820c5cfbd77d49a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:24 +0000
+Subject: udp: move udp->gro_enabled to udp->udp_flags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e1dc0615c6b08ef36414f08c011965b8fb56198b ]
+
+syzbot reported that udp->gro_enabled can be read locklessly.
+Use one atomic bit from udp->udp_flags.
+
+Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h    | 2 +-
+ net/ipv4/udp.c         | 6 +++---
+ net/ipv4/udp_offload.c | 4 ++--
+ net/ipv6/udp.c         | 2 +-
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index e6cd46e2b0831..f87e2123fe7b0 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -34,6 +34,7 @@ enum {
+       UDP_FLAGS_CORK,         /* Cork is required */
+       UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+       UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
++      UDP_FLAGS_GRO_ENABLED,  /* Request GRO aggregation */
+ };
+ struct udp_sock {
+@@ -52,7 +53,6 @@ struct udp_sock {
+                                          * different encapsulation layer set
+                                          * this
+                                          */
+-                       gro_enabled:1, /* Request GRO aggregation */
+                        accept_udp_l4:1,
+                        accept_udp_fraglist:1;
+ /* indicator bits used by pcflag: */
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 01e74919885ad..28292fcf07075 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1901,7 +1901,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+                                                     (struct sockaddr *)sin);
+       }
+-      if (udp_sk(sk)->gro_enabled)
++      if (udp_test_bit(GRO_ENABLED, sk))
+               udp_cmsg_recv(msg, sk, skb);
+       if (inet->cmsg_flags)
+@@ -2730,7 +2730,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+               /* when enabling GRO, accept the related GSO packet type */
+               if (valbool)
+                       udp_tunnel_encap_enable(sk->sk_socket);
+-              up->gro_enabled = valbool;
++              udp_assign_bit(GRO_ENABLED, sk, valbool);
+               up->accept_udp_l4 = valbool;
+               release_sock(sk);
+               break;
+@@ -2820,7 +2820,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_GRO:
+-              val = up->gro_enabled;
++              val = udp_test_bit(GRO_ENABLED, sk);
+               break;
+       /* The following two cannot be changed on UDP sockets, the return is
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index 6d1a4bec2614d..8096576fd9bde 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -549,10 +549,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+       NAPI_GRO_CB(skb)->is_flist = 0;
+       if (!sk || !udp_sk(sk)->gro_receive) {
+               if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+-                      NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;
++                      NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
+               if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
+-                  (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
++                  (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist)
+                       return call_gro_receive(udp_gro_receive_segment, head, skb);
+               /* no GRO, be sure flush the current packet */
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index ae4f7f983f951..ddd17b5ea4259 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -440,7 +440,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+                                                     (struct sockaddr *)sin6);
+       }
+-      if (udp_sk(sk)->gro_enabled)
++      if (udp_test_bit(GRO_ENABLED, sk))
+               udp_cmsg_recv(msg, sk, skb);
+       if (np->rxopt.all)
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-move-udp-no_check6_rx-to-udp-udp_flags.patch b/queue-6.1/udp-move-udp-no_check6_rx-to-udp-udp_flags.patch
new file mode 100644 (file)
index 0000000..9a420d6
--- /dev/null
@@ -0,0 +1,123 @@
+From e1834f0244ebec827e6c0f8f4cf0bce3dc679841 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:23 +0000
+Subject: udp: move udp->no_check6_rx to udp->udp_flags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit bcbc1b1de884647aa0318bf74eb7f293d72a1e40 ]
+
+syzbot reported that udp->no_check6_rx can be read locklessly.
+Use one atomic bit from udp->udp_flags.
+
+Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h | 10 +++++-----
+ net/ipv4/udp.c      |  4 ++--
+ net/ipv6/udp.c      |  6 +++---
+ 3 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index b5ca5760ae34b..e6cd46e2b0831 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -33,6 +33,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
+ enum {
+       UDP_FLAGS_CORK,         /* Cork is required */
+       UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
++      UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
+ };
+ struct udp_sock {
+@@ -46,8 +47,7 @@ struct udp_sock {
+       int              pending;       /* Any pending frames ? */
+       __u8             encap_type;    /* Is this an Encapsulation socket? */
+-      unsigned char    no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+-                       encap_enabled:1, /* This socket enabled encap
++      unsigned char    encap_enabled:1, /* This socket enabled encap
+                                          * processing; UDP tunnels and
+                                          * different encapsulation layer set
+                                          * this
+@@ -117,7 +117,7 @@ static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
+ static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+ {
+-      udp_sk(sk)->no_check6_rx = val;
++      udp_assign_bit(NO_CHECK6_RX, sk, val);
+ }
+ static inline bool udp_get_no_check6_tx(const struct sock *sk)
+@@ -125,9 +125,9 @@ static inline bool udp_get_no_check6_tx(const struct sock *sk)
+       return udp_test_bit(NO_CHECK6_TX, sk);
+ }
+-static inline bool udp_get_no_check6_rx(struct sock *sk)
++static inline bool udp_get_no_check6_rx(const struct sock *sk)
+ {
+-      return udp_sk(sk)->no_check6_rx;
++      return udp_test_bit(NO_CHECK6_RX, sk);
+ }
+ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 513035e83a820..01e74919885ad 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2715,7 +2715,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_NO_CHECK6_RX:
+-              up->no_check6_rx = valbool;
++              udp_set_no_check6_rx(sk, valbool);
+               break;
+       case UDP_SEGMENT:
+@@ -2812,7 +2812,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_NO_CHECK6_RX:
+-              val = up->no_check6_rx;
++              val = udp_get_no_check6_rx(sk);
+               break;
+       case UDP_SEGMENT:
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index c6e20293c521f..ae4f7f983f951 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -882,7 +882,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+               /* If zero checksum and no_check is not on for
+                * the socket then skip it.
+                */
+-              if (!uh->check && !udp_sk(sk)->no_check6_rx)
++              if (!uh->check && !udp_get_no_check6_rx(sk))
+                       continue;
+               if (!first) {
+                       first = sk;
+@@ -1000,7 +1000,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+               if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
+                       udp6_sk_rx_dst_set(sk, dst);
+-              if (!uh->check && !udp_sk(sk)->no_check6_rx) {
++              if (!uh->check && !udp_get_no_check6_rx(sk)) {
+                       if (refcounted)
+                               sock_put(sk);
+                       goto report_csum_error;
+@@ -1022,7 +1022,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+       /* Unicast */
+       sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+       if (sk) {
+-              if (!uh->check && !udp_sk(sk)->no_check6_rx)
++              if (!uh->check && !udp_get_no_check6_rx(sk))
+                       goto report_csum_error;
+               return udp6_unicast_rcv_skb(sk, skb, uh);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-move-udp-no_check6_tx-to-udp-udp_flags.patch b/queue-6.1/udp-move-udp-no_check6_tx-to-udp-udp_flags.patch
new file mode 100644 (file)
index 0000000..0b5f161
--- /dev/null
@@ -0,0 +1,114 @@
+From db4859b6d666990e4f3bb767f895153034944c21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 09:17:22 +0000
+Subject: udp: move udp->no_check6_tx to udp->udp_flags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a0002127cd746fcaa182ad3386ef6931c37f3bda ]
+
+syzbot reported that udp->no_check6_tx can be read locklessly.
+Use one atomic bit from udp->udp_flags
+
+Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/udp.h | 10 +++++-----
+ net/ipv4/udp.c      |  4 ++--
+ net/ipv6/udp.c      |  4 ++--
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index 10b56b8231e3c..b5ca5760ae34b 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -32,6 +32,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
+ enum {
+       UDP_FLAGS_CORK,         /* Cork is required */
++      UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+ };
+ struct udp_sock {
+@@ -45,8 +46,7 @@ struct udp_sock {
+       int              pending;       /* Any pending frames ? */
+       __u8             encap_type;    /* Is this an Encapsulation socket? */
+-      unsigned char    no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
+-                       no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
++      unsigned char    no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+                        encap_enabled:1, /* This socket enabled encap
+                                          * processing; UDP tunnels and
+                                          * different encapsulation layer set
+@@ -112,7 +112,7 @@ static inline struct udp_sock *udp_sk(const struct sock *sk)
+ static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
+ {
+-      udp_sk(sk)->no_check6_tx = val;
++      udp_assign_bit(NO_CHECK6_TX, sk, val);
+ }
+ static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+@@ -120,9 +120,9 @@ static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+       udp_sk(sk)->no_check6_rx = val;
+ }
+-static inline bool udp_get_no_check6_tx(struct sock *sk)
++static inline bool udp_get_no_check6_tx(const struct sock *sk)
+ {
+-      return udp_sk(sk)->no_check6_tx;
++      return udp_test_bit(NO_CHECK6_TX, sk);
+ }
+ static inline bool udp_get_no_check6_rx(struct sock *sk)
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 60a754477efb2..513035e83a820 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2711,7 +2711,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_NO_CHECK6_TX:
+-              up->no_check6_tx = valbool;
++              udp_set_no_check6_tx(sk, valbool);
+               break;
+       case UDP_NO_CHECK6_RX:
+@@ -2808,7 +2808,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case UDP_NO_CHECK6_TX:
+-              val = up->no_check6_tx;
++              val = udp_get_no_check6_tx(sk);
+               break;
+       case UDP_NO_CHECK6_RX:
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 85653e3a04fe8..c6e20293c521f 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1260,7 +1260,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
+-              if (udp_sk(sk)->no_check6_tx) {
++              if (udp_get_no_check6_tx(sk)) {
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
+@@ -1281,7 +1281,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+       if (is_udplite)
+               csum = udplite_csum(skb);
+-      else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
++      else if (udp_get_no_check6_tx(sk)) {   /* UDP csum disabled */
+               skb->ip_summed = CHECKSUM_NONE;
+               goto send;
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-iwlwifi-pcie-don-t-synchronize-irqs-from-irq.patch b/queue-6.1/wifi-iwlwifi-pcie-don-t-synchronize-irqs-from-irq.patch
new file mode 100644 (file)
index 0000000..96697b7
--- /dev/null
@@ -0,0 +1,170 @@
+From 011b4aa30e6bcbdc4307b512c1563b39d38981b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Dec 2023 11:13:34 +0100
+Subject: wifi: iwlwifi: pcie: don't synchronize IRQs from IRQ
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 400f6ebbc175286576c7f7fddf3c347d09d12310 ]
+
+On older devices (before unified image!) we can end up calling
+stop_device from an rfkill interrupt. However, in stop_device
+we attempt to synchronize IRQs, which then of course deadlocks.
+
+Avoid this by checking the context, if running from the IRQ
+thread then don't synchronize. This wouldn't be correct on a
+new device since RSS is supported, but older devices only have
+a single interrupt/queue.
+
+Fixes: 37fb29bd1f90 ("wifi: iwlwifi: pcie: synchronize IRQs before NAPI")
+Reviewed-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://msgid.link/20231215111335.59aab00baed7.Iadfe154d6248e7f9dfd69522e5429dbbd72925d7@changeid
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/wireless/intel/iwlwifi/pcie/internal.h  |  4 ++--
+ drivers/net/wireless/intel/iwlwifi/pcie/rx.c    |  8 ++++----
+ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 17 +++++++++--------
+ 3 files changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+index 69b95ad5993b0..2ec4ee8ab317c 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+@@ -745,7 +745,7 @@ static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
+       }
+ }
+-void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans);
++void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans, bool from_irq);
+ static inline bool iwl_is_rfkill_set(struct iwl_trans *trans)
+ {
+@@ -792,7 +792,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans)
+       return (trans->dbg.dest_tlv || iwl_trans_dbg_ini_valid(trans));
+ }
+-void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state);
++void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state, bool from_irq);
+ void iwl_trans_pcie_dump_regs(struct iwl_trans *trans);
+ #ifdef CONFIG_IWLWIFI_DEBUGFS
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+index 90a46faaaffdf..57a11ee05bc36 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+@@ -1781,7 +1781,7 @@ static u32 iwl_pcie_int_cause_ict(struct iwl_trans *trans)
+       return inta;
+ }
+-void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans)
++void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans, bool from_irq)
+ {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct isr_statistics *isr_stats = &trans_pcie->isr_stats;
+@@ -1805,7 +1805,7 @@ void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans)
+       isr_stats->rfkill++;
+       if (prev != report)
+-              iwl_trans_pcie_rf_kill(trans, report);
++              iwl_trans_pcie_rf_kill(trans, report, from_irq);
+       mutex_unlock(&trans_pcie->mutex);
+       if (hw_rfkill) {
+@@ -1945,7 +1945,7 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
+       /* HW RF KILL switch toggled */
+       if (inta & CSR_INT_BIT_RF_KILL) {
+-              iwl_pcie_handle_rfkill_irq(trans);
++              iwl_pcie_handle_rfkill_irq(trans, true);
+               handled |= CSR_INT_BIT_RF_KILL;
+       }
+@@ -2362,7 +2362,7 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
+       /* HW RF KILL switch toggled */
+       if (inta_hw & MSIX_HW_INT_CAUSES_REG_RF_KILL)
+-              iwl_pcie_handle_rfkill_irq(trans);
++              iwl_pcie_handle_rfkill_irq(trans, true);
+       if (inta_hw & MSIX_HW_INT_CAUSES_REG_HW_ERR) {
+               IWL_ERR(trans,
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+index 796972f224326..c7ed35b3dd8d5 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+@@ -1080,7 +1080,7 @@ bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans)
+       report = test_bit(STATUS_RFKILL_OPMODE, &trans->status);
+       if (prev != report)
+-              iwl_trans_pcie_rf_kill(trans, report);
++              iwl_trans_pcie_rf_kill(trans, report, false);
+       return hw_rfkill;
+ }
+@@ -1234,7 +1234,7 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie)
+       trans_pcie->hw_mask = trans_pcie->hw_init_mask;
+ }
+-static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans)
++static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool from_irq)
+ {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+@@ -1261,7 +1261,8 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans)
+       if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
+               IWL_DEBUG_INFO(trans,
+                              "DEVICE_ENABLED bit was set and is now cleared\n");
+-              iwl_pcie_synchronize_irqs(trans);
++              if (!from_irq)
++                      iwl_pcie_synchronize_irqs(trans);
+               iwl_pcie_rx_napi_sync(trans);
+               iwl_pcie_tx_stop(trans);
+               iwl_pcie_rx_stop(trans);
+@@ -1451,7 +1452,7 @@ void iwl_trans_pcie_handle_stop_rfkill(struct iwl_trans *trans,
+               clear_bit(STATUS_RFKILL_OPMODE, &trans->status);
+       }
+       if (hw_rfkill != was_in_rfkill)
+-              iwl_trans_pcie_rf_kill(trans, hw_rfkill);
++              iwl_trans_pcie_rf_kill(trans, hw_rfkill, false);
+ }
+ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
+@@ -1466,12 +1467,12 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
+       mutex_lock(&trans_pcie->mutex);
+       trans_pcie->opmode_down = true;
+       was_in_rfkill = test_bit(STATUS_RFKILL_OPMODE, &trans->status);
+-      _iwl_trans_pcie_stop_device(trans);
++      _iwl_trans_pcie_stop_device(trans, false);
+       iwl_trans_pcie_handle_stop_rfkill(trans, was_in_rfkill);
+       mutex_unlock(&trans_pcie->mutex);
+ }
+-void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state)
++void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state, bool from_irq)
+ {
+       struct iwl_trans_pcie __maybe_unused *trans_pcie =
+               IWL_TRANS_GET_PCIE_TRANS(trans);
+@@ -1484,7 +1485,7 @@ void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state)
+               if (trans->trans_cfg->gen2)
+                       _iwl_trans_pcie_gen2_stop_device(trans);
+               else
+-                      _iwl_trans_pcie_stop_device(trans);
++                      _iwl_trans_pcie_stop_device(trans, from_irq);
+       }
+ }
+@@ -2815,7 +2816,7 @@ static ssize_t iwl_dbgfs_rfkill_write(struct file *file,
+       IWL_WARN(trans, "changing debug rfkill %d->%d\n",
+                trans_pcie->debug_rfkill, new_value);
+       trans_pcie->debug_rfkill = new_value;
+-      iwl_pcie_handle_rfkill_irq(trans);
++      iwl_pcie_handle_rfkill_irq(trans, false);
+       return count;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/wifi-iwlwifi-yoyo-swap-cdb-and-jacket-bits-values.patch b/queue-6.1/wifi-iwlwifi-yoyo-swap-cdb-and-jacket-bits-values.patch
new file mode 100644 (file)
index 0000000..f493be6
--- /dev/null
@@ -0,0 +1,40 @@
+From 11af298a892bfd2816d2ccff3eaa3db927072f70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 12:36:22 +0300
+Subject: wifi: iwlwifi: yoyo: swap cdb and jacket bits values
+
+From: Rotem Saado <rotem.saado@intel.com>
+
+[ Upstream commit 65008777b9dcd2002414ddb2c2158293a6e2fd6f ]
+
+The bits are wrong, the jacket bit should be 5 and cdb bit 4.
+Fix it.
+
+Fixes: 1f171f4f1437 ("iwlwifi: Add support for getting rf id with blank otp")
+Signed-off-by: Rotem Saado <rotem.saado@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20231004123422.356d8dacda2f.I349ab888b43a11baa2453a1d6978a6a703e422f0@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+index 157d1f31c4871..c5a306b01fe20 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+@@ -348,8 +348,8 @@
+ #define RFIC_REG_RD                   0xAD0470
+ #define WFPM_CTRL_REG                 0xA03030
+ #define WFPM_OTP_CFG1_ADDR            0x00a03098
+-#define WFPM_OTP_CFG1_IS_JACKET_BIT   BIT(4)
+-#define WFPM_OTP_CFG1_IS_CDB_BIT      BIT(5)
++#define WFPM_OTP_CFG1_IS_JACKET_BIT   BIT(5)
++#define WFPM_OTP_CFG1_IS_CDB_BIT      BIT(4)
+ #define WFPM_GP2                      0xA030B4
+-- 
+2.43.0
+