From 17f6c32ea7269089e7f26ca5c579da46d1c4ec15 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 23 Jul 2023 21:25:06 -0400 Subject: [PATCH] Fixes for 6.1 Signed-off-by: Sasha Levin --- ...acklight-native-dmi-quirk-for-dell-s.patch | 46 ++ ...l-up-loops-in-dsp-setup-code-for-aud.patch | 155 +++++ ...-fix-generic-fixup-definition-for-cs.patch | 82 +++ ...-for-invalid-dai-id-handling-in-acp_.patch | 63 ++ ...d938x-fix-db-range-for-hphl-and-hphr.patch | 51 ++ ...-wcd938x-fix-mbhc-impedance-loglevel.patch | 43 ++ ...do-not-close-gpr-port-before-closing.patch | 60 ++ ...race-uninitialized-data-in-dfsentry_.patch | 60 ++ ...ent-call-disconnect-callback-before-.patch | 168 +++++ ...nc-avoid-use-after-free-in-dbg-for-h.patch | 60 ++ ...x-iso_conn-related-locking-and-valid.patch | 292 +++++++++ ...u-for-hci_conn_params-and-iterate-sa.patch | 594 ++++++++++++++++++ ...address-kcsan-report-on-bpf_lru_list.patch | 177 ++++++ ...i-type-used-for-freplace-attached-fu.patch | 55 ++ ...g-idx-logic-in-check_max_stack_depth.patch | 75 +++ ...ing-only-if-writing-to-unprivileged_.patch | 47 ++ ..._max_stack_depth-for-async-callbacks.patch | 102 +++ ...id-taking-fast-sock-lock-in-iterator.patch | 152 +++++ ...k-warning-when-enabling-stp-in-netns.patch | 71 +++ ...ore-careful-when-setting-mirror_num_.patch | 50 ++ ...k-during-reconnection-after-timeout-.patch | 100 +++ ...devlink_port_type_warn-source-device.patch | 77 +++ ...nteger-overflow-in-radeon_cs_parser_.patch | 43 ++ ...x-do-a-final-check-before-timing-out.patch | 69 ++ ...ix-missing-irq-check-in-au1200fb_drv.patch | 40 ++ ...-removed-unneeded-release_mem_region.patch | 36 ++ ...warn-about-invalid-left-right-margin.patch | 43 ++ ...-read-only-mounted-filesystem-in-txb.patch | 41 ++ ...s-fix-null-ptr-deref-read-in-txbegin.patch | 46 ++ ...-array-index-out-of-bounds-in-dballo.patch | 88 +++ ...r-03f0-464a-hp-elite-presenter-mouse.patch | 56 ++ ...ock-caused-by-rtnl-and-driver-s-lock.patch | 342 ++++++++++ ...bounds-when-setting-channels-on-remo.patch | 160 +++++ ...fix-reset-task-race-with-iavf_remove.patch | 190 ++++++ ...vf-fix-use-after-free-in-free_netdev.patch | 215 +++++++ ...make-functions-static-where-possible.patch | 223 +++++++ ...v_update_features-into-watchdog-task.patch | 95 +++ ...-vlan-offloading-caps-once-after-vfr.patch | 66 ++ ...-internal-state-to-free-traffic-irqs.patch | 65 ++ ...-reset-in-callbacks-which-trigger-it.patch | 253 ++++++++ ...ix-igb_down-hung-on-surprise-removal.patch | 89 +++ ...avoid-transmit-queue-timeout-for-xdp.patch | 61 ++ ...t-garbled-tx-queue-with-xdp-zerocopy.patch | 79 +++ ...ly-sequence-symbols-when-config_lto_.patch | 151 +++++ ...-the-performance-of-kallsyms_lookup_.patch | 241 +++++++ ...to-only-suffixes-from-promoted-globa.patch | 104 +++ ...on-t-drop-packet-from-non-root-netns.patch | 50 ++ ...c-prom-address-warray-bounds-warning.patch | 56 ++ ...p-correct-ksz8795-static-mac-table-a.patch | 94 +++ ...p-ksz8-make-ksz8_r_sta_mac_table-sta.patch | 54 ++ ...p-ksz8-separate-static-mac-table-ope.patch | 111 ++++ ...p-ksz8_r_sta_mac_table-avoid-using-e.patch | 154 +++++ ...t-litex-add-support-for-64-bit-stats.patch | 82 +++ ...et-mtk_eth_soc-handle-probe-deferral.patch | 86 +++ ...cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch | 78 +++ ...ncpy-not-using-dest-buf-length-as-le.patch | 140 +++++ ...sistent-txhash-in-time_wait-and-syn_.patch | 134 ++++ ...use-kfree_sensitive-instead-of-kfree.patch | 38 ++ ...ipv6-check-return-value-of-pskb_trim.patch | 39 ++ ...stale-pointer-dereference-in-phy_ini.patch | 74 +++ ...f-undo-tcf_bind_filter-in-case-of-an.patch | 165 +++++ ...tchall-undo-tcf_bind_filter-in-case-.patch | 98 +++ ...2-undo-refcount-decrement-in-case-up.patch | 49 ++ ...2-undo-tcf_bind_filter-if-u32_replac.patch | 122 ++++ ...les-can-t-schedule-in-nft_chain_vali.patch | 64 ++ ...les-fix-spurious-set-element-inserti.patch | 49 ++ ...les-skip-bound-chain-in-netns-releas.patch | 37 ++ ...ables-skip-bound-chain-on-rule-flush.patch | 43 ++ ...t_pipapo-fix-improper-element-remova.patch | 63 ++ ...nt-allocate-bpids-for-lbk-interfaces.patch | 43 ++ ...nd-offset-of-struct-vfsmount-in-ovl_.patch | 63 ++ ...ibrary-not-found-error-when-using-cs.patch | 94 +++ ...rzg2l-handle-non-unique-subnode-name.patch | 118 ++++ ...rzv2m-handle-non-unique-subnode-name.patch | 116 ++++ queue-6.1/quota-fix-warning-in-dqgrab.patch | 105 ++++ ...isable-quotas-when-add_dquot_ref-fai.patch | 45 ++ ...nal-concurrent-load-from-cpu_no_qs.b.patch | 76 +++ ...pr_info-with-spin-lock-in-cblist_ini.patch | 91 +++ ...-the-lookup-process-failing-to-get-s.patch | 113 ++++ ...-balance-task-to-its-current-running.patch | 96 +++ ...e-recent_used_cpu-to-test-p-cpus_ptr.patch | 41 ++ ...-unprivileged-polling-of-n-2s-period.patch | 434 +++++++++++++ ...-extract-update_triggers-side-effect.patch | 91 +++ ...ix-avgs_work-re-arm-in-psi_avgs_work.patch | 141 +++++ ...earrange-polling-code-in-preparation.patch | 247 ++++++++ ...-existing-poll-members-in-preparatio.patch | 432 +++++++++++++ ...rnfs-polling-functions-for-psi-trigg.patch | 176 ++++++ ...keys-modify-mismatched-function-name.patch | 40 ++ queue-6.1/series | 113 ++++ .../spi-bcm63xx-fix-max-prepend-length.patch | 47 ++ ...compatible-for-intel-mount-evans-soc.patch | 81 +++ ...ear-loopback-bit-after-loopback-test.patch | 40 ++ ...data-races-around-fastopenq.max_qlen.patch | 77 +++ ...a-races-around-icsk-icsk_syn_retries.patch | 69 ++ ...a-races-around-icsk-icsk_user_timeou.patch | 54 ++ ...-data-races-around-rskq_defer_accept.patch | 53 ++ ...a-races-around-tcp_rsk-req-ts_recent.patch | 184 ++++++ ...data-races-around-tcp_rsk-req-txhash.patch | 170 +++++ ...data-races-around-tp-keepalive_intvl.patch | 68 ++ ...ata-races-around-tp-keepalive_probes.patch | 69 ++ ...-data-races-around-tp-keepalive_time.patch | 58 ++ ...nnotate-data-races-around-tp-linger2.patch | 52 ++ ...e-data-races-around-tp-notsent_lowat.patch | 64 ++ ...te-data-races-around-tp-tcp_tx_delay.patch | 46 ++ ...notate-data-races-around-tp-tsoffset.patch | 63 ++ ...lized-array-access-for-some-pathname.patch | 41 ++ ...support-default-regdb-while-searchin.patch | 137 ++++ ...ix-memory-leak-in-wmi-firmware-stats.patch | 63 ++ ...registration-of-6ghz-only-phy-withou.patch | 71 +++ ...i-iwlwifi-add-support-for-new-pci-id.patch | 43 ++ ...mvm-avoid-baid-size-integer-overflow.patch | 47 ++ ...e-add-device-id-51f1-for-killer-1675.patch | 38 ++ ..._hwsim-fix-possible-null-dereference.patch | 46 ++ ...ix-wstringop-overflow-warning-in-ioc.patch | 71 +++ 114 files changed, 11683 insertions(+) create mode 100644 queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch create mode 100644 queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch create mode 100644 queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch create mode 100644 queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch create mode 100644 queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch create mode 100644 queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch create mode 100644 queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch create mode 100644 queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch create mode 100644 queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch create mode 100644 queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch create mode 100644 queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch create mode 100644 queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch create mode 100644 queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch create mode 100644 queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch create mode 100644 queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch create mode 100644 queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch create mode 100644 queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch create mode 100644 queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch create mode 100644 queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch create mode 100644 queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch create mode 100644 queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch create mode 100644 queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch create mode 100644 queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch create mode 100644 queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch create mode 100644 queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch create mode 100644 queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch create mode 100644 queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch create mode 100644 queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch create mode 100644 queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch create mode 100644 queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch create mode 100644 queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch create mode 100644 queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch create mode 100644 queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch create mode 100644 queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch create mode 100644 queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch create mode 100644 queue-6.1/iavf-make-functions-static-where-possible.patch create mode 100644 queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch create mode 100644 queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch create mode 100644 queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch create mode 100644 queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch create mode 100644 queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch create mode 100644 queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch create mode 100644 queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch create mode 100644 queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch create mode 100644 queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch create mode 100644 queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch create mode 100644 queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch create mode 100644 queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch create mode 100644 queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch create mode 100644 queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch create mode 100644 queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch create mode 100644 queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch create mode 100644 queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch create mode 100644 queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch create mode 100644 queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch create mode 100644 queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch create mode 100644 queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch create mode 100644 queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch create mode 100644 queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch create mode 100644 queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch create mode 100644 queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch create mode 100644 queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch create mode 100644 queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch create mode 100644 queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch create mode 100644 queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch create mode 100644 queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch create mode 100644 queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch create mode 100644 queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch create mode 100644 queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch create mode 100644 queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch create mode 100644 queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch create mode 100644 queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch create mode 100644 queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch create mode 100644 queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch create mode 100644 queue-6.1/quota-fix-warning-in-dqgrab.patch create mode 100644 queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch create mode 100644 queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch create mode 100644 queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch create mode 100644 queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch create mode 100644 queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch create mode 100644 queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch create mode 100644 queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch create mode 100644 queue-6.1/sched-psi-extract-update_triggers-side-effect.patch create mode 100644 queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch create mode 100644 queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch create mode 100644 queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch create mode 100644 queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch create mode 100644 queue-6.1/security-keys-modify-mismatched-function-name.patch create mode 100644 queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch create mode 100644 queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch create mode 100644 queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch create mode 100644 queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch create mode 100644 queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch create mode 100644 queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch create mode 100644 queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch create mode 100644 queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch create mode 100644 queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch create mode 100644 queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch create mode 100644 queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch create mode 100644 queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch create mode 100644 queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch diff --git a/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch b/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch new file mode 100644 index 00000000000..9ccde5f3384 --- /dev/null +++ b/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch @@ -0,0 +1,46 @@ +From 92bf9e7e60ec477f33e9520a2f8ed58c717a4f9b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 20:45:04 +0200 +Subject: ACPI: video: Add backlight=native DMI quirk for Dell Studio 1569 + +From: Hans de Goede + +[ Upstream commit 23d28cc0444be3f694eb986cd653b6888b78431d ] + +The Dell Studio 1569 predates Windows 8, so it defaults to using +acpi_video# for backlight control, but this is non functional on +this model. + +Add a DMI quirk to use the native intel_backlight interface which +does work properly. + +Reported-by: raycekarneal +Signed-off-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/video_detect.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c +index 4a77e7e6e3fa0..c8dd7f7407da2 100644 +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -526,6 +526,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), + }, + }, ++ { ++ .callback = video_detect_force_native, ++ /* Dell Studio 1569 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1569"), ++ }, ++ }, + { + .callback = video_detect_force_native, + /* Acer Aspire 3830TG */ +-- +2.39.2 + diff --git a/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch b/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch new file mode 100644 index 00000000000..9aaf0c9504e --- /dev/null +++ b/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch @@ -0,0 +1,155 @@ +From af0f59a65f332284ca2bf7579e4158dff37dc62d Mon Sep 17 00:00:00 2001 +From: Oswald Buddenhagen +Date: Wed, 10 May 2023 19:39:05 +0200 +Subject: [PATCH AUTOSEL 4.19 02/11] ALSA: emu10k1: roll up loops in DSP setup + code for Audigy +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 8cabf83c7aa54530e699be56249fb44f9505c4f3 ] + +There is no apparent reason for the massive code duplication. + +Signed-off-by: Oswald Buddenhagen +Link: https://lore.kernel.org/r/20230510173917.3073107-3-oswald.buddenhagen@gmx.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/emu10k1/emufx.c | 112 +++----------------------------------- + 1 file changed, 9 insertions(+), 103 deletions(-) + +diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c +index 1f25e6d029d82..84d98c098b744 100644 +--- a/sound/pci/emu10k1/emufx.c ++++ b/sound/pci/emu10k1/emufx.c +@@ -1550,14 +1550,8 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) + gpr += 2; + + /* Master volume (will be renamed later) */ +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS)); +- A_OP(icode, &ptr, iMAC0, A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS)); ++ for (z = 0; z < 8; z++) ++ A_OP(icode, &ptr, iMAC0, A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS)); + snd_emu10k1_init_mono_control(&controls[nctl++], "Wave Master Playback Volume", gpr, 0); + gpr += 2; + +@@ -1641,102 +1635,14 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input)) + dev_dbg(emu->card->dev, "emufx.c: gpr=0x%x, tmp=0x%x\n", + gpr, tmp); + */ +- /* For the EMU1010: How to get 32bit values from the DSP. High 16bits into L, low 16bits into R. */ +- /* A_P16VIN(0) is delayed by one sample, +- * so all other A_P16VIN channels will need to also be delayed +- */ +- /* Left ADC in. 1 of 2 */ + snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_P16VIN(0x0), A_FXBUS2(0) ); +- /* Right ADC in 1 of 2 */ +- gpr_map[gpr++] = 0x00000000; +- /* Delaying by one sample: instead of copying the input +- * value A_P16VIN to output A_FXBUS2 as in the first channel, +- * we use an auxiliary register, delaying the value by one +- * sample +- */ +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(2) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x1), A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(4) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x2), A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(6) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x3), A_C_00000000, A_C_00000000); +- /* For 96kHz mode */ +- /* Left ADC in. 2 of 2 */ +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0x8) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x4), A_C_00000000, A_C_00000000); +- /* Right ADC in 2 of 2 */ +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xa) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x5), A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xc) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x6), A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xe) ); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x7), A_C_00000000, A_C_00000000); +- /* Pavel Hofman - we still have voices, A_FXBUS2s, and +- * A_P16VINs available - +- * let's add 8 more capture channels - total of 16 +- */ +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x10)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x8), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x12)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x9), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x14)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xa), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x16)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xb), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x18)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xc), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x1a)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xd), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x1c)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xe), +- A_C_00000000, A_C_00000000); +- gpr_map[gpr++] = 0x00000000; +- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp, +- bit_shifter16, +- A_GPR(gpr - 1), +- A_FXBUS2(0x1e)); +- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xf), +- A_C_00000000, A_C_00000000); ++ /* A_P16VIN(0) is delayed by one sample, so all other A_P16VIN channels ++ * will need to also be delayed; we use an auxiliary register for that. */ ++ for (z = 1; z < 0x10; z++) { ++ snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr), A_FXBUS2(z * 2) ); ++ A_OP(icode, &ptr, iACC3, A_GPR(gpr), A_P16VIN(z), A_C_00000000, A_C_00000000); ++ gpr_map[gpr++] = 0x00000000; ++ } + } + + #if 0 +-- +2.39.2 + diff --git a/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch b/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch new file mode 100644 index 00000000000..3f4c3ac4924 --- /dev/null +++ b/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch @@ -0,0 +1,82 @@ +From 3d60fd0a504a6c9938b831d63bf6bc1a74979fdf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 09:20:21 +0100 +Subject: ALSA: hda/realtek: Fix generic fixup definition for cs35l41 amp + +From: Vitaly Rodionov + +[ Upstream commit f7b069cf08816252f494d193b9ecdff172bf9aa1 ] + +Generic fixup for CS35L41 amplifies should not have vendor specific +chained fixup. For ThinkPad laptops with led issue, we can just add +specific fixup. + +Fixes: a6ac60b36dade (ALSA: hda/realtek: Fix mute led issue on thinkpad with cs35l41 s-codec) +Signed-off-by: Vitaly Rodionov +Link: https://lore.kernel.org/r/20230720082022.13033-1-vitalyr@opensource.cirrus.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 1a8ca119ffe45..cb34a62075b13 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -7220,6 +7220,7 @@ enum { + ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, + ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, + ALC236_FIXUP_DELL_DUAL_CODECS, ++ ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, + }; + + /* A special fixup for Lenovo C940 and Yoga Duet 7; +@@ -9090,8 +9091,6 @@ static const struct hda_fixup alc269_fixups[] = { + [ALC287_FIXUP_CS35L41_I2C_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, +- .chained = true, +- .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + }, + [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, +@@ -9228,6 +9227,12 @@ static const struct hda_fixup alc269_fixups[] = { + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + }, ++ [ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = cs35l41_fixup_i2c_two, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_THINKPAD_ACPI, ++ }, + }; + + static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -9750,14 +9755,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), +- SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2), +- SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2), ++ SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), ++ SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), + SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), + SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), +-- +2.39.2 + diff --git a/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch b/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch new file mode 100644 index 00000000000..dc7aa29a72f --- /dev/null +++ b/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch @@ -0,0 +1,63 @@ +From 01fe45bc121655c2ea7d823e3442f3c388fb23b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 16:23:54 +0530 +Subject: ASoC: amd: acp: fix for invalid dai id handling in + acp_get_byte_count() + +From: Vijendar Mukunda + +[ Upstream commit 85aeab362201cf52c34cd429e4f6c75a0b42f9a3 ] + +For invalid dai id, instead of returning -EINVAL +return bytes count as zero in acp_get_byte_count() function. + +Fixes: 623621a9f9e1 ("ASoC: amd: Add common framework to support I2S on ACP SOC") + +Signed-off-by: Vijendar Mukunda +Link: https://lore.kernel.org/r/20230626105356.2580125-6-Vijendar.Mukunda@amd.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/amd/acp/amd.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/sound/soc/amd/acp/amd.h b/sound/soc/amd/acp/amd.h +index 5f2119f422715..12a176a50fd6e 100644 +--- a/sound/soc/amd/acp/amd.h ++++ b/sound/soc/amd/acp/amd.h +@@ -173,7 +173,7 @@ int snd_amd_acp_find_config(struct pci_dev *pci); + + static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int direction) + { +- u64 byte_count, low = 0, high = 0; ++ u64 byte_count = 0, low = 0, high = 0; + + if (direction == SNDRV_PCM_STREAM_PLAYBACK) { + switch (dai_id) { +@@ -191,7 +191,7 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int + break; + default: + dev_err(adata->dev, "Invalid dai id %x\n", dai_id); +- return -EINVAL; ++ goto POINTER_RETURN_BYTES; + } + } else { + switch (dai_id) { +@@ -213,12 +213,13 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int + break; + default: + dev_err(adata->dev, "Invalid dai id %x\n", dai_id); +- return -EINVAL; ++ goto POINTER_RETURN_BYTES; + } + } + /* Get 64 bit value from two 32 bit registers */ + byte_count = (high << 32) | low; + ++POINTER_RETURN_BYTES: + return byte_count; + } + +-- +2.39.2 + diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch b/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch new file mode 100644 index 00000000000..2f4c267613e --- /dev/null +++ b/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch @@ -0,0 +1,51 @@ +From 8fdb4c209948ee94e6e06e178741f29d84f4e4d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 13:57:23 +0100 +Subject: ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR + +From: Srinivas Kandagatla + +[ Upstream commit c03226ba15fe3c42d13907ec7d8536396602557b ] + +dB range for HPHL and HPHR gains are from +6dB to -30dB in steps of +1.5dB with register values range from 0 to 24. + +Current code maps these dB ranges incorrectly, fix them to allow proper +volume setting. + +Fixes: e8ba1e05bdc0 ("ASoC: codecs: wcd938x: add basic controls") +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20230705125723.40464-1-srinivas.kandagatla@linaro.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/wcd938x.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c +index 7715040383840..2316481c2541b 100644 +--- a/sound/soc/codecs/wcd938x.c ++++ b/sound/soc/codecs/wcd938x.c +@@ -210,7 +210,7 @@ struct wcd938x_priv { + }; + + static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); +-static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(line_gain, 600, -3000); ++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); + static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); + + struct wcd938x_mbhc_zdet_param { +@@ -2662,8 +2662,8 @@ static const struct snd_kcontrol_new wcd938x_snd_controls[] = { + wcd938x_get_swr_port, wcd938x_set_swr_port), + SOC_SINGLE_EXT("DSD_R Switch", WCD938X_DSD_R, 0, 1, 0, + wcd938x_get_swr_port, wcd938x_set_swr_port), +- SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 0, line_gain), +- SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 0, line_gain), ++ SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 1, line_gain), ++ SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 1, line_gain), + WCD938X_EAR_PA_GAIN_TLV("EAR_PA Volume", WCD938X_ANA_EAR_COMPANDER_CTL, + 2, 0x10, 0, ear_pa_gain), + SOC_SINGLE_EXT("ADC1 Switch", WCD938X_ADC1, 1, 1, 0, +-- +2.39.2 + diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch b/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch new file mode 100644 index 00000000000..5a1143b5bf8 --- /dev/null +++ b/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch @@ -0,0 +1,43 @@ +From 6837fd2094a0338619e2fbd26039c39ad53d3cf8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 16:27:13 +0200 +Subject: ASoC: codecs: wcd938x: fix mbhc impedance loglevel + +From: Johan Hovold + +[ Upstream commit e5ce198bd5c6923b6a51e1493b1401f84c24b26d ] + +Demote the MBHC impedance measurement printk, which is not an error +message, from error to debug level. + +While at it, fix the capitalisation of "ohm" and add the missing space +before the opening parenthesis. + +Fixes: bcee7ed09b8e ("ASoC: codecs: wcd938x: add Multi Button Headset Control support") +Signed-off-by: Johan Hovold +Reviewed-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20230630142717.5314-2-johan+linaro@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/wcd938x.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c +index df0b3ac7f1321..7715040383840 100644 +--- a/sound/soc/codecs/wcd938x.c ++++ b/sound/soc/codecs/wcd938x.c +@@ -2165,8 +2165,8 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x, + else if (x1 < minCode_param[noff]) + *zdet = WCD938X_ZDET_FLOATING_IMPEDANCE; + +- pr_err("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n", +- __func__, d1, c1, x1, *zdet); ++ pr_debug("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d (milliohm)\n", ++ __func__, d1, c1, x1, *zdet); + ramp_down: + i = 0; + while (x1) { +-- +2.39.2 + diff --git a/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch b/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch new file mode 100644 index 00000000000..a14f4ebf759 --- /dev/null +++ b/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch @@ -0,0 +1,60 @@ +From 4b2b48aa8c43caaeef24802e4265e3ba2daa7ba5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 14:18:42 +0100 +Subject: ASoC: qcom: q6apm: do not close GPR port before closing graph + +From: Srinivas Kandagatla + +[ Upstream commit c1be62923d4d86e7c06b1224626e27eb8d9ab32e ] + +Closing GPR port before graph close can result in un handled notifications +from DSP, this results in spam of errors from GPR driver as there is no +one to handle these notification at that point in time. + +Fix this by closing GPR port after graph close is finished. + +Fixes: 5477518b8a0e ("ASoC: qdsp6: audioreach: add q6apm support") +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20230705131842.41584-1-srinivas.kandagatla@linaro.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/qcom/qdsp6/q6apm.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c +index 794019286c704..16acdf3a99e1c 100644 +--- a/sound/soc/qcom/qdsp6/q6apm.c ++++ b/sound/soc/qcom/qdsp6/q6apm.c +@@ -515,6 +515,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) + + switch (hdr->opcode) { + case DATA_CMD_RSP_WR_SH_MEM_EP_DATA_BUFFER_DONE_V2: ++ if (!graph->ar_graph) ++ break; + client_event = APM_CLIENT_EVENT_DATA_WRITE_DONE; + mutex_lock(&graph->lock); + token = hdr->token & APM_WRITE_TOKEN_MASK; +@@ -548,6 +550,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) + wake_up(&graph->cmd_wait); + break; + case DATA_CMD_RSP_RD_SH_MEM_EP_DATA_BUFFER_V2: ++ if (!graph->ar_graph) ++ break; + client_event = APM_CLIENT_EVENT_DATA_READ_DONE; + mutex_lock(&graph->lock); + rd_done = data->payload; +@@ -650,8 +654,9 @@ int q6apm_graph_close(struct q6apm_graph *graph) + { + struct audioreach_graph *ar_graph = graph->ar_graph; + +- gpr_free_port(graph->port); ++ graph->ar_graph = NULL; + kref_put(&ar_graph->refcount, q6apm_put_audioreach_graph); ++ gpr_free_port(graph->port); + kfree(graph); + + return 0; +-- +2.39.2 + diff --git a/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch b/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch new file mode 100644 index 00000000000..15bf7cc98a3 --- /dev/null +++ b/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch @@ -0,0 +1,60 @@ +From f51906ec30b0242c56247bae4862008fd7ae2eeb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 14:25:23 +0300 +Subject: ASoC: SOF: ipc3-dtrace: uninitialized data in + dfsentry_trace_filter_write() + +From: Dan Carpenter + +[ Upstream commit 469e2f28c2cbee2430058c1c9bb6d1675d7195fb ] + +This doesn't check how many bytes the simple_write_to_buffer() writes to +the buffer. The only thing that we know is that the first byte is +initialized and the last byte of the buffer is set to NUL. However +the middle bytes could be uninitialized. + +There is no need to use simple_write_to_buffer(). This code does not +support partial writes but instead passes "pos = 0" as the starting +offset regardless of what the user passed as "*ppos". Just use the +copy_from_user() function and initialize the whole buffer. + +Fixes: 671e0b90051e ("ASoC: SOF: Clone the trace code to ipc3-dtrace as fw_tracing implementation") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/74148292-ce4d-4e01-a1a7-921e6767da14@moroto.mountain +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/ipc3-dtrace.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c +index b815b0244d9e4..8cf421577378c 100644 +--- a/sound/soc/sof/ipc3-dtrace.c ++++ b/sound/soc/sof/ipc3-dtrace.c +@@ -187,7 +187,6 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user + struct snd_sof_dfsentry *dfse = file->private_data; + struct sof_ipc_trace_filter_elem *elems = NULL; + struct snd_sof_dev *sdev = dfse->sdev; +- loff_t pos = 0; + int num_elems; + char *string; + int ret; +@@ -202,11 +201,11 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user + if (!string) + return -ENOMEM; + +- /* assert null termination */ +- string[count] = 0; +- ret = simple_write_to_buffer(string, count, &pos, from, count); +- if (ret < 0) ++ if (copy_from_user(string, from, count)) { ++ ret = -EFAULT; + goto error; ++ } ++ string[count] = '\0'; + + ret = trace_filter_parse(sdev, string, &num_elems, &elems); + if (ret < 0) +-- +2.39.2 + diff --git a/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch b/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch new file mode 100644 index 00000000000..625180f5a80 --- /dev/null +++ b/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch @@ -0,0 +1,168 @@ +From f56314f8f520be77c9344013ed73653e992d3600 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Jun 2023 01:04:32 +0300 +Subject: Bluetooth: hci_event: call disconnect callback before deleting conn + +From: Pauli Virtanen + +[ Upstream commit 7f7cfcb6f0825652973b780f248603e23f16ee90 ] + +In hci_cs_disconnect, we do hci_conn_del even if disconnection failed. + +ISO, L2CAP and SCO connections refer to the hci_conn without +hci_conn_get, so disconn_cfm must be called so they can clean up their +conn, otherwise use-after-free occurs. + +ISO: +========================================================== +iso_sock_connect:880: sk 00000000eabd6557 +iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da +... +iso_conn_add:140: hcon 000000001696f1fd conn 00000000b6251073 +hci_dev_put:1487: hci0 orig refcnt 17 +__iso_chan_add:214: conn 00000000b6251073 +iso_sock_clear_timer:117: sock 00000000eabd6557 state 3 +... +hci_rx_work:4085: hci0 Event packet +hci_event_packet:7601: hci0: event 0x0f +hci_cmd_status_evt:4346: hci0: opcode 0x0406 +hci_cs_disconnect:2760: hci0: status 0x0c +hci_sent_cmd_data:3107: hci0 opcode 0x0406 +hci_conn_del:1151: hci0 hcon 000000001696f1fd handle 2560 +hci_conn_unlink:1102: hci0: hcon 000000001696f1fd +hci_conn_drop:1451: hcon 00000000d8521aaf orig refcnt 2 +hci_chan_list_flush:2780: hcon 000000001696f1fd +hci_dev_put:1487: hci0 orig refcnt 21 +hci_dev_put:1487: hci0 orig refcnt 20 +hci_req_cmd_complete:3978: opcode 0x0406 status 0x0c +... ... +iso_sock_sendmsg:1098: sock 00000000dea5e2e0, sk 00000000eabd6557 +BUG: kernel NULL pointer dereference, address: 0000000000000668 +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +RIP: 0010:iso_sock_sendmsg (net/bluetooth/iso.c:1112) bluetooth +========================================================== + +L2CAP: +================================================================== +hci_cmd_status_evt:4359: hci0: opcode 0x0406 +hci_cs_disconnect:2760: hci0: status 0x0c +hci_sent_cmd_data:3085: hci0 opcode 0x0406 +hci_conn_del:1151: hci0 hcon ffff88800c999000 handle 3585 +hci_conn_unlink:1102: hci0: hcon ffff88800c999000 +hci_chan_list_flush:2780: hcon ffff88800c999000 +hci_chan_del:2761: hci0 hcon ffff88800c999000 chan ffff888018ddd280 +... +BUG: KASAN: slab-use-after-free in hci_send_acl+0x2d/0x540 [bluetooth] +Read of size 8 at addr ffff888018ddd298 by task bluetoothd/1175 + +CPU: 0 PID: 1175 Comm: bluetoothd Tainted: G E 6.4.0-rc4+ #2 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +Call Trace: + + dump_stack_lvl+0x5b/0x90 + print_report+0xcf/0x670 + ? __virt_addr_valid+0xf8/0x180 + ? hci_send_acl+0x2d/0x540 [bluetooth] + kasan_report+0xa8/0xe0 + ? hci_send_acl+0x2d/0x540 [bluetooth] + hci_send_acl+0x2d/0x540 [bluetooth] + ? __pfx___lock_acquire+0x10/0x10 + l2cap_chan_send+0x1fd/0x1300 [bluetooth] + ? l2cap_sock_sendmsg+0xf2/0x170 [bluetooth] + ? __pfx_l2cap_chan_send+0x10/0x10 [bluetooth] + ? lock_release+0x1d5/0x3c0 + ? mark_held_locks+0x1a/0x90 + l2cap_sock_sendmsg+0x100/0x170 [bluetooth] + sock_write_iter+0x275/0x280 + ? __pfx_sock_write_iter+0x10/0x10 + ? __pfx___lock_acquire+0x10/0x10 + do_iter_readv_writev+0x176/0x220 + ? __pfx_do_iter_readv_writev+0x10/0x10 + ? find_held_lock+0x83/0xa0 + ? selinux_file_permission+0x13e/0x210 + do_iter_write+0xda/0x340 + vfs_writev+0x1b4/0x400 + ? __pfx_vfs_writev+0x10/0x10 + ? __seccomp_filter+0x112/0x750 + ? populate_seccomp_data+0x182/0x220 + ? __fget_light+0xdf/0x100 + ? do_writev+0x19d/0x210 + do_writev+0x19d/0x210 + ? __pfx_do_writev+0x10/0x10 + ? mark_held_locks+0x1a/0x90 + do_syscall_64+0x60/0x90 + ? lockdep_hardirqs_on_prepare+0x149/0x210 + ? do_syscall_64+0x6c/0x90 + ? lockdep_hardirqs_on_prepare+0x149/0x210 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7ff45cb23e64 +Code: 15 d1 1f 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 80 3d 9d a7 0d 00 00 74 13 b8 14 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89 +RSP: 002b:00007fff21ae09b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000014 +RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007ff45cb23e64 +RDX: 0000000000000001 RSI: 00007fff21ae0aa0 RDI: 0000000000000017 +RBP: 00007fff21ae0aa0 R08: 000000000095a8a0 R09: 0000607000053f40 +R10: 0000000000000001 R11: 0000000000000202 R12: 00007fff21ae0ac0 +R13: 00000fffe435c150 R14: 00007fff21ae0a80 R15: 000060f000000040 + + +Allocated by task 771: + kasan_save_stack+0x33/0x60 + kasan_set_track+0x25/0x30 + __kasan_kmalloc+0xaa/0xb0 + hci_chan_create+0x67/0x1b0 [bluetooth] + l2cap_conn_add.part.0+0x17/0x590 [bluetooth] + l2cap_connect_cfm+0x266/0x6b0 [bluetooth] + hci_le_remote_feat_complete_evt+0x167/0x310 [bluetooth] + hci_event_packet+0x38d/0x800 [bluetooth] + hci_rx_work+0x287/0xb20 [bluetooth] + process_one_work+0x4f7/0x970 + worker_thread+0x8f/0x620 + kthread+0x17f/0x1c0 + ret_from_fork+0x2c/0x50 + +Freed by task 771: + kasan_save_stack+0x33/0x60 + kasan_set_track+0x25/0x30 + kasan_save_free_info+0x2e/0x50 + ____kasan_slab_free+0x169/0x1c0 + slab_free_freelist_hook+0x9e/0x1c0 + __kmem_cache_free+0xc0/0x310 + hci_chan_list_flush+0x46/0x90 [bluetooth] + hci_conn_cleanup+0x7d/0x330 [bluetooth] + hci_cs_disconnect+0x35d/0x530 [bluetooth] + hci_cmd_status_evt+0xef/0x2b0 [bluetooth] + hci_event_packet+0x38d/0x800 [bluetooth] + hci_rx_work+0x287/0xb20 [bluetooth] + process_one_work+0x4f7/0x970 + worker_thread+0x8f/0x620 + kthread+0x17f/0x1c0 + ret_from_fork+0x2c/0x50 +================================================================== + +Fixes: b8d290525e39 ("Bluetooth: clean up connection in hci_cs_disconnect") +Signed-off-by: Pauli Virtanen +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index ec9b0612f2761..83eaf25ece465 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -2789,6 +2789,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) + hci_enable_advertising(hdev); + } + ++ /* Inform sockets conn is gone before we delete it */ ++ hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED); ++ + goto done; + } + +-- +2.39.2 + diff --git a/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch b/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch new file mode 100644 index 00000000000..f4cce427f91 --- /dev/null +++ b/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch @@ -0,0 +1,60 @@ +From 37d8d1ea773870a99ffb70e4fb61facc4b296dfc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Jun 2023 15:33:14 -0700 +Subject: Bluetooth: hci_sync: Avoid use-after-free in dbg for + hci_remove_adv_monitor() + +From: Douglas Anderson + +[ Upstream commit de6dfcefd107667ce2dbedf4d9337f5ed557a4a1 ] + +KASAN reports that there's a use-after-free in +hci_remove_adv_monitor(). Trawling through the disassembly, you can +see that the complaint is from the access in bt_dev_dbg() under the +HCI_ADV_MONITOR_EXT_MSFT case. The problem case happens because +msft_remove_monitor() can end up freeing the monitor +structure. Specifically: + hci_remove_adv_monitor() -> + msft_remove_monitor() -> + msft_remove_monitor_sync() -> + msft_le_cancel_monitor_advertisement_cb() -> + hci_free_adv_monitor() + +Let's fix the problem by just stashing the relevant data when it's +still valid. + +Fixes: 7cf5c2978f23 ("Bluetooth: hci_sync: Refactor remove Adv Monitor") +Signed-off-by: Douglas Anderson +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index be0e6865b340f..d034bf2a999e1 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor) + { + int status = 0; ++ int handle; + + switch (hci_get_adv_monitor_offload_ext(hdev)) { + case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */ +@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev, + goto free_monitor; + + case HCI_ADV_MONITOR_EXT_MSFT: ++ handle = monitor->handle; + status = msft_remove_monitor(hdev, monitor); + bt_dev_dbg(hdev, "%s remove monitor %d msft status %d", +- hdev->name, monitor->handle, status); ++ hdev->name, handle, status); + break; + } + +-- +2.39.2 + diff --git a/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch b/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch new file mode 100644 index 00000000000..997d943298e --- /dev/null +++ b/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch @@ -0,0 +1,292 @@ +From 1bba473b620234ccdcf3a2b08e021f5b27202ce4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Jun 2023 01:04:33 +0300 +Subject: Bluetooth: ISO: fix iso_conn related locking and validity issues + +From: Pauli Virtanen + +[ Upstream commit d40ae85ee62e3666f45bc61864b22121346f88ef ] + +sk->sk_state indicates whether iso_pi(sk)->conn is valid. Operations +that check/update sk_state and access conn should hold lock_sock, +otherwise they can race. + +The order of taking locks is hci_dev_lock > lock_sock > iso_conn_lock, +which is how it is in connect/disconnect_cfm -> iso_conn_del -> +iso_chan_del. + +Fix locking in iso_connect_cis/bis and sendmsg/recvmsg to take lock_sock +around updating sk_state and conn. + +iso_conn_del must not occur during iso_connect_cis/bis, as it frees the +iso_conn. Hold hdev->lock longer to prevent that. + +This should not reintroduce the issue fixed in commit 241f51931c35 +("Bluetooth: ISO: Avoid circular locking dependency"), since the we +acquire locks in order. We retain the fix in iso_sock_connect to release +lock_sock before iso_connect_* acquires hdev->lock. + +Similarly for commit 6a5ad251b7cd ("Bluetooth: ISO: Fix possible +circular locking dependency"). We retain the fix in iso_conn_ready to +not acquire iso_conn_lock before lock_sock. + +iso_conn_add shall return iso_conn with valid hcon. Make it so also when +reusing an old CIS connection waiting for disconnect timeout (see +__iso_sock_close where conn->hcon is set to NULL). + +Trace with iso_conn_del after iso_chan_add in iso_connect_cis: +=============================================================== +iso_sock_create:771: sock 00000000be9b69b7 +iso_sock_init:693: sk 000000004dff667e +iso_sock_bind:827: sk 000000004dff667e 70:1a:b8:98:ff:a2 type 1 +iso_sock_setsockopt:1289: sk 000000004dff667e +iso_sock_setsockopt:1289: sk 000000004dff667e +iso_sock_setsockopt:1289: sk 000000004dff667e +iso_sock_connect:875: sk 000000004dff667e +iso_connect_cis:353: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da +hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da +hci_conn_add:1005: hci0 dst 28:3d:c2:4a:7e:da +iso_conn_add:140: hcon 000000007b65d182 conn 00000000daf8625e +__iso_chan_add:214: conn 00000000daf8625e +iso_connect_cfm:1700: hcon 000000007b65d182 bdaddr 28:3d:c2:4a:7e:da status 12 +iso_conn_del:187: hcon 000000007b65d182 conn 00000000daf8625e, err 16 +iso_sock_clear_timer:117: sock 000000004dff667e state 3 + +iso_chan_del:153: sk 000000004dff667e, conn 00000000daf8625e, err 16 +hci_conn_del:1151: hci0 hcon 000000007b65d182 handle 65535 +hci_conn_unlink:1102: hci0: hcon 000000007b65d182 +hci_chan_list_flush:2780: hcon 000000007b65d182 +iso_sock_getsockopt:1376: sk 000000004dff667e +iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e +iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e +iso_sock_getsockopt:1376: sk 000000004dff667e +iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e +iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e +iso_sock_shutdown:1434: sock 00000000be9b69b7, sk 000000004dff667e, how 1 +__iso_sock_close:632: sk 000000004dff667e state 5 socket 00000000be9b69b7 + +BUG: kernel NULL pointer dereference, address: 0000000000000000 +PGD 8000000006467067 P4D 8000000006467067 PUD 3f5f067 PMD 0 +Oops: 0000 [#1] PREEMPT SMP PTI +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +RIP: 0010:__iso_sock_close (net/bluetooth/iso.c:664) bluetooth +=============================================================== + +Trace with iso_conn_del before iso_chan_add in iso_connect_cis: +=============================================================== +iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da +... +iso_conn_add:140: hcon 0000000093bc551f conn 00000000768ae504 +hci_dev_put:1487: hci0 orig refcnt 21 +hci_event_packet:7607: hci0: event 0x0e +hci_cmd_complete_evt:4231: hci0: opcode 0x2062 +hci_cc_le_set_cig_params:3846: hci0: status 0x07 +hci_sent_cmd_data:3107: hci0 opcode 0x2062 +iso_connect_cfm:1703: hcon 0000000093bc551f bdaddr 28:3d:c2:4a:7e:da status 7 +iso_conn_del:187: hcon 0000000093bc551f conn 00000000768ae504, err 12 +hci_conn_del:1151: hci0 hcon 0000000093bc551f handle 65535 +hci_conn_unlink:1102: hci0: hcon 0000000093bc551f +hci_chan_list_flush:2780: hcon 0000000093bc551f +__iso_chan_add:214: conn 00000000768ae504 + +iso_sock_clear_timer:117: sock 0000000098323f95 state 3 +general protection fault, probably for non-canonical address 0x30b29c630930aec8: 0000 [#1] PREEMPT SMP PTI +CPU: 1 PID: 1920 Comm: bluetoothd Tainted: G E 6.3.0-rc7+ #4 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +RIP: 0010:detach_if_pending+0x28/0xd0 +Code: 90 90 0f 1f 44 00 00 48 8b 47 08 48 85 c0 0f 84 ad 00 00 00 55 89 d5 53 48 83 3f 00 48 89 fb 74 7d 66 90 48 8b 03 48 8b 53 08 <> +RSP: 0018:ffffb90841a67d08 EFLAGS: 00010007 +RAX: 0000000000000000 RBX: ffff9141bd5061b8 RCX: 0000000000000000 +RDX: 30b29c630930aec8 RSI: ffff9141fdd21e80 RDI: ffff9141bd5061b8 +RBP: 0000000000000001 R08: 0000000000000000 R09: ffffb90841a67b88 +R10: 0000000000000003 R11: ffffffff8613f558 R12: ffff9141fdd21e80 +R13: 0000000000000000 R14: ffff9141b5976010 R15: ffff914185755338 +FS: 00007f45768bd840(0000) GS:ffff9141fdd00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000619000424074 CR3: 0000000009f5e005 CR4: 0000000000170ee0 +Call Trace: + + timer_delete+0x48/0x80 + try_to_grab_pending+0xdf/0x170 + __cancel_work+0x37/0xb0 + iso_connect_cis+0x141/0x400 [bluetooth] +=============================================================== + +Trace with NULL conn->hcon in state BT_CONNECT: +=============================================================== +__iso_sock_close:619: sk 00000000f7c71fc5 state 1 socket 00000000d90c5fe5 +... +__iso_sock_close:619: sk 00000000f7c71fc5 state 8 socket 00000000d90c5fe5 +iso_chan_del:153: sk 00000000f7c71fc5, conn 0000000022c03a7e, err 104 +... +iso_sock_connect:862: sk 00000000129b56c3 +iso_connect_cis:348: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a +hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a +hci_dev_hold:1495: hci0 orig refcnt 19 +__iso_chan_add:214: conn 0000000022c03a7e + +iso_sock_clear_timer:117: sock 00000000129b56c3 state 3 +... +iso_sock_ready:1485: sk 00000000129b56c3 +... +iso_sock_sendmsg:1077: sock 00000000e5013966, sk 00000000129b56c3 +BUG: kernel NULL pointer dereference, address: 00000000000006a8 +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 1 PID: 1403 Comm: wireplumber Tainted: G E 6.3.0-rc7+ #4 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +RIP: 0010:iso_sock_sendmsg+0x63/0x2a0 [bluetooth] +=============================================================== + +Fixes: 241f51931c35 ("Bluetooth: ISO: Avoid circular locking dependency") +Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") +Signed-off-by: Pauli Virtanen +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/iso.c | 53 ++++++++++++++++++++++++++------------------- + 1 file changed, 31 insertions(+), 22 deletions(-) + +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index cb959e8eac185..699e4f400df29 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -116,8 +116,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon) + { + struct iso_conn *conn = hcon->iso_data; + +- if (conn) ++ if (conn) { ++ if (!conn->hcon) ++ conn->hcon = hcon; + return conn; ++ } + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) +@@ -285,14 +288,13 @@ static int iso_connect_bis(struct sock *sk) + goto unlock; + } + +- hci_dev_unlock(hdev); +- hci_dev_put(hdev); ++ lock_sock(sk); + + err = iso_chan_add(conn, sk, NULL); +- if (err) +- return err; +- +- lock_sock(sk); ++ if (err) { ++ release_sock(sk); ++ goto unlock; ++ } + + /* Update source addr of the socket */ + bacpy(&iso_pi(sk)->src, &hcon->src); +@@ -306,7 +308,6 @@ static int iso_connect_bis(struct sock *sk) + } + + release_sock(sk); +- return err; + + unlock: + hci_dev_unlock(hdev); +@@ -367,14 +368,13 @@ static int iso_connect_cis(struct sock *sk) + goto unlock; + } + +- hci_dev_unlock(hdev); +- hci_dev_put(hdev); ++ lock_sock(sk); + + err = iso_chan_add(conn, sk, NULL); +- if (err) +- return err; +- +- lock_sock(sk); ++ if (err) { ++ release_sock(sk); ++ goto unlock; ++ } + + /* Update source addr of the socket */ + bacpy(&iso_pi(sk)->src, &hcon->src); +@@ -391,7 +391,6 @@ static int iso_connect_cis(struct sock *sk) + } + + release_sock(sk); +- return err; + + unlock: + hci_dev_unlock(hdev); +@@ -1036,8 +1035,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) + { + struct sock *sk = sock->sk; +- struct iso_conn *conn = iso_pi(sk)->conn; + struct sk_buff *skb, **frag; ++ size_t mtu; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); +@@ -1049,11 +1048,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + +- if (sk->sk_state != BT_CONNECTED) ++ lock_sock(sk); ++ ++ if (sk->sk_state != BT_CONNECTED) { ++ release_sock(sk); + return -ENOTCONN; ++ } ++ ++ mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu; ++ ++ release_sock(sk); + +- skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu, +- HCI_ISO_DATA_HDR_SIZE, 0); ++ skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); + +@@ -1066,8 +1072,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, + while (len) { + struct sk_buff *tmp; + +- tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu, +- 0, 0); ++ tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0); + if (IS_ERR(tmp)) { + kfree_skb(skb); + return PTR_ERR(tmp); +@@ -1122,15 +1127,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, + BT_DBG("sk %p", sk); + + if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { ++ lock_sock(sk); + switch (sk->sk_state) { + case BT_CONNECT2: +- lock_sock(sk); + iso_conn_defer_accept(pi->conn->hcon); + sk->sk_state = BT_CONFIG; + release_sock(sk); + return 0; + case BT_CONNECT: ++ release_sock(sk); + return iso_connect_cis(sk); ++ default: ++ release_sock(sk); ++ break; + } + } + +-- +2.39.2 + diff --git a/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch b/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch new file mode 100644 index 00000000000..8a341ebde67 --- /dev/null +++ b/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch @@ -0,0 +1,594 @@ +From 6fa1ac47040a970b9823dd880eeff4a1f5d2c7a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Jun 2023 01:04:31 +0300 +Subject: Bluetooth: use RCU for hci_conn_params and iterate safely in hci_sync + +From: Pauli Virtanen + +[ Upstream commit 195ef75e19287b4bc413da3e3e3722b030ac881e ] + +hci_update_accept_list_sync iterates over hdev->pend_le_conns and +hdev->pend_le_reports, and waits for controller events in the loop body, +without holding hdev lock. + +Meanwhile, these lists and the items may be modified e.g. by +le_scan_cleanup. This can invalidate the list cursor or any other item +in the list, resulting to invalid behavior (eg use-after-free). + +Use RCU for the hci_conn_params action lists. Since the loop bodies in +hci_sync block and we cannot use RCU or hdev->lock for the whole loop, +copy list items first and then iterate on the copy. Only the flags field +is written from elsewhere, so READ_ONCE/WRITE_ONCE should guarantee we +read valid values. + +Free params everywhere with hci_conn_params_free so the cleanup is +guaranteed to be done properly. + +This fixes the following, which can be triggered e.g. by BlueZ new +mgmt-tester case "Add + Remove Device Nowait - Success", or by changing +hci_le_set_cig_params to always return false, and running iso-tester: + +================================================================== +BUG: KASAN: slab-use-after-free in hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841) +Read of size 8 at addr ffff888001265018 by task kworker/u3:0/32 + +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 +Workqueue: hci0 hci_cmd_sync_work +Call Trace: + +dump_stack_lvl (./arch/x86/include/asm/irqflags.h:134 lib/dump_stack.c:107) +print_report (mm/kasan/report.c:320 mm/kasan/report.c:430) +? __virt_addr_valid (./include/linux/mmzone.h:1915 ./include/linux/mmzone.h:2011 arch/x86/mm/physaddr.c:65) +? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841) +kasan_report (mm/kasan/report.c:538) +? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841) +hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841) +? __pfx_hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2780) +? mutex_lock (kernel/locking/mutex.c:282) +? __pfx_mutex_lock (kernel/locking/mutex.c:282) +? __pfx_mutex_unlock (kernel/locking/mutex.c:538) +? __pfx_update_passive_scan_sync (net/bluetooth/hci_sync.c:2861) +hci_cmd_sync_work (net/bluetooth/hci_sync.c:306) +process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399) +worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538) +? __pfx_worker_thread (kernel/workqueue.c:2480) +kthread (kernel/kthread.c:376) +? __pfx_kthread (kernel/kthread.c:331) +ret_from_fork (arch/x86/entry/entry_64.S:314) + + +Allocated by task 31: +kasan_save_stack (mm/kasan/common.c:46) +kasan_set_track (mm/kasan/common.c:52) +__kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383) +hci_conn_params_add (./include/linux/slab.h:580 ./include/linux/slab.h:720 net/bluetooth/hci_core.c:2277) +hci_connect_le_scan (net/bluetooth/hci_conn.c:1419 net/bluetooth/hci_conn.c:1589) +hci_connect_cis (net/bluetooth/hci_conn.c:2266) +iso_connect_cis (net/bluetooth/iso.c:390) +iso_sock_connect (net/bluetooth/iso.c:899) +__sys_connect (net/socket.c:2003 net/socket.c:2020) +__x64_sys_connect (net/socket.c:2027) +do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) + +Freed by task 15: +kasan_save_stack (mm/kasan/common.c:46) +kasan_set_track (mm/kasan/common.c:52) +kasan_save_free_info (mm/kasan/generic.c:523) +__kasan_slab_free (mm/kasan/common.c:238 mm/kasan/common.c:200 mm/kasan/common.c:244) +__kmem_cache_free (mm/slub.c:1807 mm/slub.c:3787 mm/slub.c:3800) +hci_conn_params_del (net/bluetooth/hci_core.c:2323) +le_scan_cleanup (net/bluetooth/hci_conn.c:202) +process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399) +worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538) +kthread (kernel/kthread.c:376) +ret_from_fork (arch/x86/entry/entry_64.S:314) +================================================================== + +Fixes: e8907f76544f ("Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 3") +Signed-off-by: Pauli Virtanen +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 5 ++ + net/bluetooth/hci_conn.c | 10 +-- + net/bluetooth/hci_core.c | 38 ++++++++-- + net/bluetooth/hci_event.c | 12 ++-- + net/bluetooth/hci_sync.c | 117 ++++++++++++++++++++++++++++--- + net/bluetooth/mgmt.c | 26 +++---- + 6 files changed, 164 insertions(+), 44 deletions(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index 84c5ce57eab69..ddbcbf9ccb2ce 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -807,6 +807,7 @@ struct hci_conn_params { + + struct hci_conn *conn; + bool explicit_connect; ++ /* Accessed without hdev->lock: */ + hci_conn_flags_t flags; + u8 privacy_mode; + }; +@@ -1536,7 +1537,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, + bdaddr_t *addr, u8 addr_type); + void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); + void hci_conn_params_clear_disabled(struct hci_dev *hdev); ++void hci_conn_params_free(struct hci_conn_params *param); + ++void hci_pend_le_list_del_init(struct hci_conn_params *param); ++void hci_pend_le_list_add(struct hci_conn_params *param, ++ struct list_head *list); + struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, + bdaddr_t *addr, + u8 addr_type); +diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c +index fef09d2121384..61059571c8779 100644 +--- a/net/bluetooth/hci_conn.c ++++ b/net/bluetooth/hci_conn.c +@@ -117,7 +117,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) + */ + params->explicit_connect = false; + +- list_del_init(¶ms->action); ++ hci_pend_le_list_del_init(params); + + switch (params->auto_connect) { + case HCI_AUTO_CONN_EXPLICIT: +@@ -126,10 +126,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) + return; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: +- list_add(¶ms->action, &hdev->pend_le_reports); ++ hci_pend_le_list_add(params, &hdev->pend_le_reports); + break; + default: + break; +@@ -1398,8 +1398,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, + if (params->auto_connect == HCI_AUTO_CONN_DISABLED || + params->auto_connect == HCI_AUTO_CONN_REPORT || + params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { +- list_del_init(¶ms->action); +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_del_init(params); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + } + + params->explicit_connect = true; +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index ca42129f8f91a..be0e6865b340f 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -2249,21 +2249,45 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, + return NULL; + } + +-/* This function requires the caller holds hdev->lock */ ++/* This function requires the caller holds hdev->lock or rcu_read_lock */ + struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, + bdaddr_t *addr, u8 addr_type) + { + struct hci_conn_params *param; + +- list_for_each_entry(param, list, action) { ++ rcu_read_lock(); ++ ++ list_for_each_entry_rcu(param, list, action) { + if (bacmp(¶m->addr, addr) == 0 && +- param->addr_type == addr_type) ++ param->addr_type == addr_type) { ++ rcu_read_unlock(); + return param; ++ } + } + ++ rcu_read_unlock(); ++ + return NULL; + } + ++/* This function requires the caller holds hdev->lock */ ++void hci_pend_le_list_del_init(struct hci_conn_params *param) ++{ ++ if (list_empty(¶m->action)) ++ return; ++ ++ list_del_rcu(¶m->action); ++ synchronize_rcu(); ++ INIT_LIST_HEAD(¶m->action); ++} ++ ++/* This function requires the caller holds hdev->lock */ ++void hci_pend_le_list_add(struct hci_conn_params *param, ++ struct list_head *list) ++{ ++ list_add_rcu(¶m->action, list); ++} ++ + /* This function requires the caller holds hdev->lock */ + struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, + bdaddr_t *addr, u8 addr_type) +@@ -2297,14 +2321,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, + return params; + } + +-static void hci_conn_params_free(struct hci_conn_params *params) ++void hci_conn_params_free(struct hci_conn_params *params) + { ++ hci_pend_le_list_del_init(params); ++ + if (params->conn) { + hci_conn_drop(params->conn); + hci_conn_put(params->conn); + } + +- list_del(¶ms->action); + list_del(¶ms->list); + kfree(params); + } +@@ -2342,8 +2367,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev) + continue; + } + +- list_del(¶ms->list); +- kfree(params); ++ hci_conn_params_free(params); + } + + BT_DBG("All LE disabled connection parameters were removed"); +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index b272cc1f36481..ec9b0612f2761 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -1558,7 +1558,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data, + + params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type); + if (params) +- params->privacy_mode = cp->mode; ++ WRITE_ONCE(params->privacy_mode, cp->mode); + + hci_dev_unlock(hdev); + +@@ -2809,8 +2809,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: +- list_del_init(¶ms->action); +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_del_init(params); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + break; + + default: +@@ -3428,8 +3428,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: +- list_del_init(¶ms->action); +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_del_init(params); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + hci_update_passive_scan(hdev); + break; + +@@ -5952,7 +5952,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, + conn->dst_type); + if (params) { +- list_del_init(¶ms->action); ++ hci_pend_le_list_del_init(params); + if (params->conn) { + hci_conn_drop(params->conn); + hci_conn_put(params->conn); +diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c +index 37131a36700a1..2ae038dfc39f7 100644 +--- a/net/bluetooth/hci_sync.c ++++ b/net/bluetooth/hci_sync.c +@@ -2139,15 +2139,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev, + return 0; + } + ++struct conn_params { ++ bdaddr_t addr; ++ u8 addr_type; ++ hci_conn_flags_t flags; ++ u8 privacy_mode; ++}; ++ + /* Adds connection to resolve list if needed. + * Setting params to NULL programs local hdev->irk + */ + static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, +- struct hci_conn_params *params) ++ struct conn_params *params) + { + struct hci_cp_le_add_to_resolv_list cp; + struct smp_irk *irk; + struct bdaddr_list_with_irk *entry; ++ struct hci_conn_params *p; + + if (!use_ll_privacy(hdev)) + return 0; +@@ -2182,6 +2190,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, + /* Default privacy mode is always Network */ + params->privacy_mode = HCI_NETWORK_PRIVACY; + ++ rcu_read_lock(); ++ p = hci_pend_le_action_lookup(&hdev->pend_le_conns, ++ ¶ms->addr, params->addr_type); ++ if (!p) ++ p = hci_pend_le_action_lookup(&hdev->pend_le_reports, ++ ¶ms->addr, params->addr_type); ++ if (p) ++ WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY); ++ rcu_read_unlock(); ++ + done: + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) + memcpy(cp.local_irk, hdev->irk, 16); +@@ -2194,7 +2212,7 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, + + /* Set Device Privacy Mode. */ + static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev, +- struct hci_conn_params *params) ++ struct conn_params *params) + { + struct hci_cp_le_set_privacy_mode cp; + struct smp_irk *irk; +@@ -2219,6 +2237,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev, + bacpy(&cp.bdaddr, &irk->bdaddr); + cp.mode = HCI_DEVICE_PRIVACY; + ++ /* Note: params->privacy_mode is not updated since it is a copy */ ++ + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); + } +@@ -2228,7 +2248,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev, + * properly set the privacy mode. + */ + static int hci_le_add_accept_list_sync(struct hci_dev *hdev, +- struct hci_conn_params *params, ++ struct conn_params *params, + u8 *num_entries) + { + struct hci_cp_le_add_to_accept_list cp; +@@ -2426,6 +2446,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, + return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk); + } + ++static struct conn_params *conn_params_copy(struct list_head *list, size_t *n) ++{ ++ struct hci_conn_params *params; ++ struct conn_params *p; ++ size_t i; ++ ++ rcu_read_lock(); ++ ++ i = 0; ++ list_for_each_entry_rcu(params, list, action) ++ ++i; ++ *n = i; ++ ++ rcu_read_unlock(); ++ ++ p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL); ++ if (!p) ++ return NULL; ++ ++ rcu_read_lock(); ++ ++ i = 0; ++ list_for_each_entry_rcu(params, list, action) { ++ /* Racing adds are handled in next scan update */ ++ if (i >= *n) ++ break; ++ ++ /* No hdev->lock, but: addr, addr_type are immutable. ++ * privacy_mode is only written by us or in ++ * hci_cc_le_set_privacy_mode that we wait for. ++ * We should be idempotent so MGMT updating flags ++ * while we are processing is OK. ++ */ ++ bacpy(&p[i].addr, ¶ms->addr); ++ p[i].addr_type = params->addr_type; ++ p[i].flags = READ_ONCE(params->flags); ++ p[i].privacy_mode = READ_ONCE(params->privacy_mode); ++ ++i; ++ } ++ ++ rcu_read_unlock(); ++ ++ *n = i; ++ return p; ++} ++ + /* Device must not be scanning when updating the accept list. + * + * Update is done using the following sequence: +@@ -2445,11 +2511,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, + */ + static u8 hci_update_accept_list_sync(struct hci_dev *hdev) + { +- struct hci_conn_params *params; ++ struct conn_params *params; + struct bdaddr_list *b, *t; + u8 num_entries = 0; + bool pend_conn, pend_report; + u8 filter_policy; ++ size_t i, n; + int err; + + /* Pause advertising if resolving list can be used as controllers +@@ -2483,6 +2550,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) + if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type)) + continue; + ++ /* Pointers not dereferenced, no locks needed */ + pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, + b->bdaddr_type); +@@ -2511,23 +2579,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) + * available accept list entries in the controller, then + * just abort and return filer policy value to not use the + * accept list. ++ * ++ * The list and params may be mutated while we wait for events, ++ * so make a copy and iterate it. + */ +- list_for_each_entry(params, &hdev->pend_le_conns, action) { +- err = hci_le_add_accept_list_sync(hdev, params, &num_entries); +- if (err) ++ ++ params = conn_params_copy(&hdev->pend_le_conns, &n); ++ if (!params) { ++ err = -ENOMEM; ++ goto done; ++ } ++ ++ for (i = 0; i < n; ++i) { ++ err = hci_le_add_accept_list_sync(hdev, ¶ms[i], ++ &num_entries); ++ if (err) { ++ kvfree(params); + goto done; ++ } + } + ++ kvfree(params); ++ + /* After adding all new pending connections, walk through + * the list of pending reports and also add these to the + * accept list if there is still space. Abort if space runs out. + */ +- list_for_each_entry(params, &hdev->pend_le_reports, action) { +- err = hci_le_add_accept_list_sync(hdev, params, &num_entries); +- if (err) ++ ++ params = conn_params_copy(&hdev->pend_le_reports, &n); ++ if (!params) { ++ err = -ENOMEM; ++ goto done; ++ } ++ ++ for (i = 0; i < n; ++i) { ++ err = hci_le_add_accept_list_sync(hdev, ¶ms[i], ++ &num_entries); ++ if (err) { ++ kvfree(params); + goto done; ++ } + } + ++ kvfree(params); ++ + /* Use the allowlist unless the following conditions are all true: + * - We are not currently suspending + * - There are 1 or more ADV monitors registered and it's not offloaded +@@ -4778,12 +4873,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) + struct hci_conn_params *p; + + list_for_each_entry(p, &hdev->le_conn_params, list) { ++ hci_pend_le_list_del_init(p); + if (p->conn) { + hci_conn_drop(p->conn); + hci_conn_put(p->conn); + p->conn = NULL; + } +- list_del_init(&p->action); + } + + BT_DBG("All LE pending actions cleared"); +diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c +index 815f2abe918ef..89c94f3e96bc3 100644 +--- a/net/bluetooth/mgmt.c ++++ b/net/bluetooth/mgmt.c +@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev) + /* Needed for AUTO_OFF case where might not "really" + * have been powered off. + */ +- list_del_init(&p->action); ++ hci_pend_le_list_del_init(p); + + switch (p->auto_connect) { + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: +- list_add(&p->action, &hdev->pend_le_conns); ++ hci_pend_le_list_add(p, &hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: +- list_add(&p->action, &hdev->pend_le_reports); ++ hci_pend_le_list_add(p, &hdev->pend_le_reports); + break; + default: + break; +@@ -5161,7 +5161,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, + goto unlock; + } + +- params->flags = current_flags; ++ WRITE_ONCE(params->flags, current_flags); + status = MGMT_STATUS_SUCCESS; + + /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY +@@ -7573,7 +7573,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, + if (params->auto_connect == auto_connect) + return 0; + +- list_del_init(¶ms->action); ++ hci_pend_le_list_del_init(params); + + switch (auto_connect) { + case HCI_AUTO_CONN_DISABLED: +@@ -7582,18 +7582,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, + * connect to device, keep connecting. + */ + if (params->explicit_connect) +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: + if (params->explicit_connect) +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + else +- list_add(¶ms->action, &hdev->pend_le_reports); ++ hci_pend_le_list_add(params, &hdev->pend_le_reports); + break; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + if (!is_connected(hdev, addr, addr_type)) +- list_add(¶ms->action, &hdev->pend_le_conns); ++ hci_pend_le_list_add(params, &hdev->pend_le_conns); + break; + } + +@@ -7816,9 +7816,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, + goto unlock; + } + +- list_del(¶ms->action); +- list_del(¶ms->list); +- kfree(params); ++ hci_conn_params_free(params); + + device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); + } else { +@@ -7849,9 +7847,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, + p->auto_connect = HCI_AUTO_CONN_EXPLICIT; + continue; + } +- list_del(&p->action); +- list_del(&p->list); +- kfree(p); ++ hci_conn_params_free(p); + } + + bt_dev_dbg(hdev, "All LE connection parameters were removed"); +-- +2.39.2 + diff --git a/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch b/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch new file mode 100644 index 00000000000..9da0f1b277e --- /dev/null +++ b/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch @@ -0,0 +1,177 @@ +From ccf4979c64a589eed4428fcc3fc6a92a8627c659 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 May 2023 21:37:48 -0700 +Subject: bpf: Address KCSAN report on bpf_lru_list + +From: Martin KaFai Lau + +[ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ] + +KCSAN reported a data-race when accessing node->ref. +Although node->ref does not have to be accurate, +take this chance to use a more common READ_ONCE() and WRITE_ONCE() +pattern instead of data_race(). + +There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref(). +This patch also adds bpf_lru_node_clear_ref() to do the +WRITE_ONCE(node->ref, 0) also. + +================================================================== +BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem + +write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1: +__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline] +__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline] +__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240 +bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline] +bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline] +bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499 +prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline] +__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316 +bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 +bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 +generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 +bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 +__sys_bpf+0x338/0x810 +__do_sys_bpf kernel/bpf/syscall.c:5096 [inline] +__se_sys_bpf kernel/bpf/syscall.c:5094 [inline] +__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0: +bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline] +__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332 +bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 +bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 +generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 +bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 +__sys_bpf+0x338/0x810 +__do_sys_bpf kernel/bpf/syscall.c:5096 [inline] +__se_sys_bpf kernel/bpf/syscall.c:5094 [inline] +__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x01 -> 0x00 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023 +================================================================== + +Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com +Signed-off-by: Martin KaFai Lau +Acked-by: Yonghong Song +Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/bpf_lru_list.c | 21 +++++++++++++-------- + kernel/bpf/bpf_lru_list.h | 7 ++----- + 2 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c +index d99e89f113c43..3dabdd137d102 100644 +--- a/kernel/bpf/bpf_lru_list.c ++++ b/kernel/bpf/bpf_lru_list.c +@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) + /* bpf_lru_node helpers */ + static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) + { +- return node->ref; ++ return READ_ONCE(node->ref); ++} ++ ++static void bpf_lru_node_clear_ref(struct bpf_lru_node *node) ++{ ++ WRITE_ONCE(node->ref, 0); + } + + static void bpf_lru_list_count_inc(struct bpf_lru_list *l, +@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, + + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_move(&node->list, &l->lists[tgt_type]); + } + +@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l, + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; + } +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + + /* If the moving node is the next_inactive_rotation candidate, + * move the next_inactive_rotation pointer also. +@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru, + *(u32 *)((void *)node + lru->hash_offset) = hash; + node->cpu = cpu; + node->type = BPF_LRU_LOCAL_LIST_T_PENDING; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, local_pending_list(loc_l)); + } + +@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, + if (!list_empty(free_list)) { + node = list_first_entry(free_list, struct bpf_lru_node, list); + *(u32 *)((void *)node + lru->hash_offset) = hash; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); + } + +@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, + } + + node->type = BPF_LRU_LOCAL_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_move(&node->list, local_free_list(loc_l)); + + raw_spin_unlock_irqrestore(&loc_l->lock, flags); +@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, + + node = (struct bpf_lru_node *)(buf + node_offset); + node->type = BPF_LRU_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); + buf += elem_size; + } +@@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, + node = (struct bpf_lru_node *)(buf + node_offset); + node->cpu = cpu; + node->type = BPF_LRU_LIST_T_FREE; +- node->ref = 0; ++ bpf_lru_node_clear_ref(node); + list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); + i++; + buf += elem_size; +diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h +index 4ea227c9c1ade..8f3c8b2b4490e 100644 +--- a/kernel/bpf/bpf_lru_list.h ++++ b/kernel/bpf/bpf_lru_list.h +@@ -64,11 +64,8 @@ struct bpf_lru { + + static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) + { +- /* ref is an approximation on access frequency. It does not +- * have to be very accurate. Hence, no protection is used. +- */ +- if (!node->ref) +- node->ref = 1; ++ if (!READ_ONCE(node->ref)) ++ WRITE_ONCE(node->ref, 1); + } + + int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, +-- +2.39.2 + diff --git a/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch b/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch new file mode 100644 index 00000000000..c3a7b30b4e4 --- /dev/null +++ b/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch @@ -0,0 +1,55 @@ +From 0a9f7c72db338d808de8b35708d487940038ce8f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 09:49:31 -0700 +Subject: bpf, arm64: Fix BTI type used for freplace attached functions + +From: Alexander Duyck + +[ Upstream commit a3f25d614bc73b45e8f02adc6769876dfd16ca84 ] + +When running an freplace attached bpf program on an arm64 system w were +seeing the following issue: + Unhandled 64-bit el1h sync exception on CPU47, ESR 0x0000000036000003 -- BTI + +After a bit of work to track it down I determined that what appeared to be +happening is that the 'bti c' at the start of the program was somehow being +reached after a 'br' instruction. Further digging pointed me toward the +fact that the function was attached via freplace. This in turn led me to +build_plt which I believe is invoking the long jump which is triggering +this error. + +To resolve it we can replace the 'bti c' with 'bti jc' and add a comment +explaining why this has to be modified as such. + +Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") +Signed-off-by: Alexander Duyck +Acked-by: Xu Kuohai +Link: https://lore.kernel.org/r/168926677665.316237.9953845318337455525.stgit@ahduyck-xeon-server.home.arpa +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + arch/arm64/net/bpf_jit_comp.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c +index 8f16217c111c8..14134fd34ff79 100644 +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -322,7 +322,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) + * + */ + +- emit_bti(A64_BTI_C, ctx); ++ /* bpf function may be invoked by 3 instruction types: ++ * 1. bl, attached via freplace to bpf prog via short jump ++ * 2. br, attached via freplace to bpf prog via long jump ++ * 3. blr, working as a function pointer, used by emit_call. ++ * So BTI_JC should used here to support both br and blr. ++ */ ++ emit_bti(A64_BTI_JC, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); +-- +2.39.2 + diff --git a/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch b/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch new file mode 100644 index 00000000000..fce380e970d --- /dev/null +++ b/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch @@ -0,0 +1,75 @@ +From 6136de53109de1a3979843917ce4f9c78823e3e1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 21:45:28 +0530 +Subject: bpf: Fix subprog idx logic in check_max_stack_depth + +From: Kumar Kartikeya Dwivedi + +[ Upstream commit ba7b3e7d5f9014be65879ede8fd599cb222901c9 ] + +The assignment to idx in check_max_stack_depth happens once we see a +bpf_pseudo_call or bpf_pseudo_func. This is not an issue as the rest of +the code performs a few checks and then pushes the frame to the frame +stack, except the case of async callbacks. If the async callback case +causes the loop iteration to be skipped, the idx assignment will be +incorrect on the next iteration of the loop. The value stored in the +frame stack (as the subprogno of the current subprog) will be incorrect. + +This leads to incorrect checks and incorrect tail_call_reachable +marking. Save the target subprog in a new variable and only assign to +idx once we are done with the is_async_cb check which may skip pushing +of frame to the frame stack and subsequent stack depth checks and tail +call markings. + +Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.") +Signed-off-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/r/20230717161530.1238-2-memxor@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 8c3ededef3172..fdba4086881b3 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4336,7 +4336,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) + continue_func: + subprog_end = subprog[idx + 1].start; + for (; i < subprog_end; i++) { +- int next_insn; ++ int next_insn, sidx; + + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) + continue; +@@ -4346,14 +4346,14 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) + + /* find the callee */ + next_insn = i + insn[i].imm + 1; +- idx = find_subprog(env, next_insn); +- if (idx < 0) { ++ sidx = find_subprog(env, next_insn); ++ if (sidx < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + next_insn); + return -EFAULT; + } +- if (subprog[idx].is_async_cb) { +- if (subprog[idx].has_tail_call) { ++ if (subprog[sidx].is_async_cb) { ++ if (subprog[sidx].has_tail_call) { + verbose(env, "verifier bug. subprog has tail_call and async cb\n"); + return -EFAULT; + } +@@ -4362,6 +4362,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) + continue; + } + i = next_insn; ++ idx = sidx; + + if (subprog[idx].has_tail_call) + tail_call_reachable = true; +-- +2.39.2 + diff --git a/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch b/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch new file mode 100644 index 00000000000..c1133994d09 --- /dev/null +++ b/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch @@ -0,0 +1,47 @@ +From cb24f938e033cedcefaf283a9d5f44beb406005c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 May 2023 11:14:18 -0700 +Subject: bpf: Print a warning only if writing to unprivileged_bpf_disabled. + +From: Kui-Feng Lee + +[ Upstream commit fedf99200ab086c42a572fca1d7266b06cdc3e3f ] + +Only print the warning message if you are writing to +"/proc/sys/kernel/unprivileged_bpf_disabled". + +The kernel may print an annoying warning when you read +"/proc/sys/kernel/unprivileged_bpf_disabled" saying + + WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible + via Spectre v2 BHB attacks! + +However, this message is only meaningful when the feature is +disabled or enabled. + +Signed-off-by: Kui-Feng Lee +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20230502181418.308479-1-kuifeng@meta.com +Signed-off-by: Sasha Levin +--- + kernel/bpf/syscall.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 8633ec4f92df3..0c44a716f0a24 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -5289,7 +5289,8 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, + *(int *)table->data = unpriv_enable; + } + +- unpriv_ebpf_notify(unpriv_enable); ++ if (write) ++ unpriv_ebpf_notify(unpriv_enable); + + return ret; + } +-- +2.39.2 + diff --git a/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch b/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch new file mode 100644 index 00000000000..80144d50777 --- /dev/null +++ b/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch @@ -0,0 +1,102 @@ +From 765e8a472e267495e5ef26af7754684c76f6627f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 21:45:29 +0530 +Subject: bpf: Repeat check_max_stack_depth for async callbacks + +From: Kumar Kartikeya Dwivedi + +[ Upstream commit b5e9ad522c4ccd32d322877515cff8d47ed731b9 ] + +While the check_max_stack_depth function explores call chains emanating +from the main prog, which is typically enough to cover all possible call +chains, it doesn't explore those rooted at async callbacks unless the +async callback will have been directly called, since unlike non-async +callbacks it skips their instruction exploration as they don't +contribute to stack depth. + +It could be the case that the async callback leads to a callchain which +exceeds the stack depth, but this is never reachable while only +exploring the entry point from main subprog. Hence, repeat the check for +the main subprog *and* all async callbacks marked by the symbolic +execution pass of the verifier, as execution of the program may begin at +any of them. + +Consider functions with following stack depths: +main: 256 +async: 256 +foo: 256 + +main: + rX = async + bpf_timer_set_callback(...) + +async: + foo() + +Here, async is not descended as it does not contribute to stack depth of +main (since it is referenced using bpf_pseudo_func and not +bpf_pseudo_call). However, when async is invoked asynchronously, it will +end up breaching the MAX_BPF_STACK limit by calling foo. + +Hence, in addition to main, we also need to explore call chains +beginning at all async callback subprogs in a program. + +Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.") +Signed-off-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/r/20230717161530.1238-3-memxor@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index fdba4086881b3..f25ce959fae64 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4288,16 +4288,17 @@ static int update_stack_depth(struct bpf_verifier_env *env, + * Since recursion is prevented by check_cfg() this algorithm + * only needs a local stack of MAX_CALL_FRAMES to remember callsites + */ +-static int check_max_stack_depth(struct bpf_verifier_env *env) ++static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) + { +- int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn = env->prog->insnsi; ++ int depth = 0, frame = 0, i, subprog_end; + bool tail_call_reachable = false; + int ret_insn[MAX_CALL_FRAMES]; + int ret_prog[MAX_CALL_FRAMES]; + int j; + ++ i = subprog[idx].start; + process_func: + /* protect against potential stack overflow that might happen when + * bpf2bpf calls get combined with tailcalls. Limit the caller's stack +@@ -4398,6 +4399,22 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) + goto continue_func; + } + ++static int check_max_stack_depth(struct bpf_verifier_env *env) ++{ ++ struct bpf_subprog_info *si = env->subprog_info; ++ int ret; ++ ++ for (int i = 0; i < env->subprog_cnt; i++) { ++ if (!i || si[i].is_async_cb) { ++ ret = check_max_stack_depth_subprog(env, i); ++ if (ret < 0) ++ return ret; ++ } ++ continue; ++ } ++ return 0; ++} ++ + #ifndef CONFIG_BPF_JIT_ALWAYS_ON + static int get_callee_stack_depth(struct bpf_verifier_env *env, + const struct bpf_insn *insn, int idx) +-- +2.39.2 + diff --git a/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch b/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch new file mode 100644 index 00000000000..2d88a8a5300 --- /dev/null +++ b/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch @@ -0,0 +1,152 @@ +From 76b79c254cf2d798a26a7e99c73226b2df0ff1bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 May 2023 22:51:49 +0000 +Subject: bpf: tcp: Avoid taking fast sock lock in iterator + +From: Aditi Ghag + +[ Upstream commit 9378096e8a656fb5c4099b26b1370c56f056eab9 ] + +This is a preparatory commit to replace `lock_sock_fast` with +`lock_sock`,and facilitate BPF programs executed from the TCP sockets +iterator to be able to destroy TCP sockets using the bpf_sock_destroy +kfunc (implemented in follow-up commits). + +Previously, BPF TCP iterator was acquiring the sock lock with BH +disabled. This led to scenarios where the sockets hash table bucket lock +can be acquired with BH enabled in some path versus disabled in other. +In such situation, kernel issued a warning since it thinks that in the +BH enabled path the same bucket lock *might* be acquired again in the +softirq context (BH disabled), which will lead to a potential dead lock. +Since bpf_sock_destroy also happens in a process context, the potential +deadlock warning is likely a false alarm. + +Here is a snippet of annotated stack trace that motivated this change: + +``` + +Possible interrupt unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&h->lhash2[i].lock); + local_bh_disable(); + lock(&h->lhash2[i].lock); +kernel imagined possible scenario: + local_bh_disable(); /* Possible softirq */ + lock(&h->lhash2[i].lock); +*** Potential Deadlock *** + +process context: + +lock_acquire+0xcd/0x330 +_raw_spin_lock+0x33/0x40 +------> Acquire (bucket) lhash2.lock with BH enabled +__inet_hash+0x4b/0x210 +inet_csk_listen_start+0xe6/0x100 +inet_listen+0x95/0x1d0 +__sys_listen+0x69/0xb0 +__x64_sys_listen+0x14/0x20 +do_syscall_64+0x3c/0x90 +entry_SYSCALL_64_after_hwframe+0x72/0xdc + +bpf_sock_destroy run from iterator: + +lock_acquire+0xcd/0x330 +_raw_spin_lock+0x33/0x40 +------> Acquire (bucket) lhash2.lock with BH disabled +inet_unhash+0x9a/0x110 +tcp_set_state+0x6a/0x210 +tcp_abort+0x10d/0x200 +bpf_prog_6793c5ca50c43c0d_iter_tcp6_server+0xa4/0xa9 +bpf_iter_run_prog+0x1ff/0x340 +------> lock_sock_fast that acquires sock lock with BH disabled +bpf_iter_tcp_seq_show+0xca/0x190 +bpf_seq_read+0x177/0x450 + +``` + +Also, Yonghong reported a deadlock for non-listening TCP sockets that +this change resolves. Previously, `lock_sock_fast` held the sock spin +lock with BH which was again being acquired in `tcp_abort`: + +``` +watchdog: BUG: soft lockup - CPU#0 stuck for 86s! [test_progs:2331] +RIP: 0010:queued_spin_lock_slowpath+0xd8/0x500 +Call Trace: + + _raw_spin_lock+0x84/0x90 + tcp_abort+0x13c/0x1f0 + bpf_prog_88539c5453a9dd47_iter_tcp6_client+0x82/0x89 + bpf_iter_run_prog+0x1aa/0x2c0 + ? preempt_count_sub+0x1c/0xd0 + ? from_kuid_munged+0x1c8/0x210 + bpf_iter_tcp_seq_show+0x14e/0x1b0 + bpf_seq_read+0x36c/0x6a0 + +bpf_iter_tcp_seq_show + lock_sock_fast + __lock_sock_fast + spin_lock_bh(&sk->sk_lock.slock); + /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held.* */ + + ... + tcp_abort + local_bh_disable(); + spin_lock(&((sk)->sk_lock.slock)); // from bh_lock_sock(sk) + +``` + +With the switch to `lock_sock`, it calls `spin_unlock_bh` before returning: + +``` +lock_sock + lock_sock_nested + spin_lock_bh(&sk->sk_lock.slock); + : + spin_unlock_bh(&sk->sk_lock.slock); +``` + +Acked-by: Yonghong Song +Acked-by: Stanislav Fomichev +Signed-off-by: Aditi Ghag +Link: https://lore.kernel.org/r/20230519225157.760788-2-aditi.ghag@isovalent.com +Signed-off-by: Martin KaFai Lau +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index b37c1bcb15097..a7de5ba74e7f7 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2911,7 +2911,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) + struct bpf_iter_meta meta; + struct bpf_prog *prog; + struct sock *sk = v; +- bool slow; + uid_t uid; + int ret; + +@@ -2919,7 +2918,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) + return 0; + + if (sk_fullsock(sk)) +- slow = lock_sock_fast(sk); ++ lock_sock(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; +@@ -2943,7 +2942,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) + + unlock: + if (sk_fullsock(sk)) +- unlock_sock_fast(sk, slow); ++ release_sock(sk); + return ret; + + } +-- +2.39.2 + diff --git a/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch b/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch new file mode 100644 index 00000000000..b6461aa64a5 --- /dev/null +++ b/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch @@ -0,0 +1,71 @@ +From 5841124edbf8b166987956c008ec9eafe491d36b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jul 2023 08:44:49 -0700 +Subject: bridge: Add extack warning when enabling STP in netns. + +From: Kuniyuki Iwashima + +[ Upstream commit 56a16035bb6effb37177867cea94c13a8382f745 ] + +When we create an L2 loop on a bridge in netns, we will see packets storm +even if STP is enabled. + + # unshare -n + # ip link add br0 type bridge + # ip link add veth0 type veth peer name veth1 + # ip link set veth0 master br0 up + # ip link set veth1 master br0 up + # ip link set br0 type bridge stp_state 1 + # ip link set br0 up + # sleep 30 + # ip -s link show br0 + 2: br0: mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000 + link/ether b6:61:98:1c:1c:b5 brd ff:ff:ff:ff:ff:ff + RX: bytes packets errors dropped missed mcast + 956553768 12861249 0 0 0 12861249 <-. Keep + TX: bytes packets errors dropped carrier collsns | increasing + 1027834 11951 0 0 0 0 <-' rapidly + +This is because llc_rcv() drops all packets in non-root netns and BPDU +is dropped. + +Let's add extack warning when enabling STP in netns. + + # unshare -n + # ip link add br0 type bridge + # ip link set br0 type bridge stp_state 1 + Warning: bridge: STP does not work in non-root netns. + +Note this commit will be reverted later when we namespacify the whole LLC +infra. + +Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe") +Suggested-by: Harry Coin +Link: https://lore.kernel.org/netdev/0f531295-e289-022d-5add-5ceffa0df9bc@quietfountain.com/ +Suggested-by: Ido Schimmel +Signed-off-by: Kuniyuki Iwashima +Acked-by: Nikolay Aleksandrov +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/bridge/br_stp_if.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c +index 75204d36d7f90..b65962682771f 100644 +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -201,6 +201,9 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val, + { + ASSERT_RTNL(); + ++ if (!net_eq(dev_net(br->dev), &init_net)) ++ NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns"); ++ + if (br_mrp_enabled(br)) { + NL_SET_ERR_MSG_MOD(extack, + "STP can't be enabled if MRP is already enabled"); +-- +2.39.2 + diff --git a/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch b/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch new file mode 100644 index 00000000000..893e406609d --- /dev/null +++ b/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch @@ -0,0 +1,50 @@ +From 34038040cc781e64ecfa341e776b1d3ca1839d8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Jun 2023 08:13:23 +0200 +Subject: btrfs: be a bit more careful when setting mirror_num_ret in + btrfs_map_block + +From: Christoph Hellwig + +[ Upstream commit 4e7de35eb7d1a1d4f2dda15f39fbedd4798a0b8d ] + +The mirror_num_ret is allowed to be NULL, although it has to be set when +smap is set. Unfortunately that is not a well enough specifiable +invariant for static type checkers, so add a NULL check to make sure they +are fine. + +Fixes: 03793cbbc80f ("btrfs: add fast path for single device io in __btrfs_map_block") +Reported-by: Dan Carpenter +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/volumes.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 7433ae929fdcb..2e0832d70406c 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -6595,11 +6595,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, + if (patch_the_first_stripe_for_dev_replace) { + smap->dev = dev_replace->tgtdev; + smap->physical = physical_to_patch_in_first_stripe; +- *mirror_num_ret = map->num_stripes + 1; ++ if (mirror_num_ret) ++ *mirror_num_ret = map->num_stripes + 1; + } else { + set_io_stripe(smap, map, stripe_index, stripe_offset, + stripe_nr); +- *mirror_num_ret = mirror_num; ++ if (mirror_num_ret) ++ *mirror_num_ret = mirror_num; + } + *bioc_ret = NULL; + ret = 0; +-- +2.39.2 + diff --git a/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch b/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch new file mode 100644 index 00000000000..7a2c897f51d --- /dev/null +++ b/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch @@ -0,0 +1,100 @@ +From 7a8eaa17077746c57f6fa160701348e82e480ae9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jul 2023 08:56:33 +0000 +Subject: cifs: fix mid leak during reconnection after timeout threshold + +From: Shyam Prasad N + +[ Upstream commit 69cba9d3c1284e0838ae408830a02c4a063104bc ] + +When the number of responses with status of STATUS_IO_TIMEOUT +exceeds a specified threshold (NUM_STATUS_IO_TIMEOUT), we reconnect +the connection. But we do not return the mid, or the credits +returned for the mid, or reduce the number of in-flight requests. + +This bug could result in the server->in_flight count to go bad, +and also cause a leak in the mids. + +This change moves the check to a few lines below where the +response is decrypted, even of the response is read from the +transform header. This way, the code for returning the mids +can be reused. + +Also, the cifs_reconnect was reconnecting just the transport +connection before. In case of multi-channel, this may not be +what we want to do after several timeouts. Changed that to +reconnect the session and the tree too. + +Also renamed NUM_STATUS_IO_TIMEOUT to a more appropriate name +MAX_STATUS_IO_TIMEOUT. + +Fixes: 8e670f77c4a5 ("Handle STATUS_IO_TIMEOUT gracefully") +Signed-off-by: Shyam Prasad N +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/connect.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c +index 935fe198a4baf..cbe08948baf4a 100644 +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -59,7 +59,7 @@ extern bool disable_legacy_dialects; + #define TLINK_IDLE_EXPIRE (600 * HZ) + + /* Drop the connection to not overload the server */ +-#define NUM_STATUS_IO_TIMEOUT 5 ++#define MAX_STATUS_IO_TIMEOUT 5 + + struct mount_ctx { + struct cifs_sb_info *cifs_sb; +@@ -1162,6 +1162,7 @@ cifs_demultiplex_thread(void *p) + struct mid_q_entry *mids[MAX_COMPOUND]; + char *bufs[MAX_COMPOUND]; + unsigned int noreclaim_flag, num_io_timeout = 0; ++ bool pending_reconnect = false; + + noreclaim_flag = memalloc_noreclaim_save(); + cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current)); +@@ -1201,6 +1202,8 @@ cifs_demultiplex_thread(void *p) + cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length); + if (!is_smb_response(server, buf[0])) + continue; ++ ++ pending_reconnect = false; + next_pdu: + server->pdu_size = pdu_length; + +@@ -1258,10 +1261,13 @@ cifs_demultiplex_thread(void *p) + if (server->ops->is_status_io_timeout && + server->ops->is_status_io_timeout(buf)) { + num_io_timeout++; +- if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) { +- cifs_reconnect(server, false); ++ if (num_io_timeout > MAX_STATUS_IO_TIMEOUT) { ++ cifs_server_dbg(VFS, ++ "Number of request timeouts exceeded %d. Reconnecting", ++ MAX_STATUS_IO_TIMEOUT); ++ ++ pending_reconnect = true; + num_io_timeout = 0; +- continue; + } + } + +@@ -1308,6 +1314,11 @@ cifs_demultiplex_thread(void *p) + buf = server->smallbuf; + goto next_pdu; + } ++ ++ /* do this reconnect at the very end after processing all MIDs */ ++ if (pending_reconnect) ++ cifs_reconnect(server, true); ++ + } /* end while !EXITING */ + + /* buffer usually freed in free_mid - need to free it here on exit */ +-- +2.39.2 + diff --git a/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch b/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch new file mode 100644 index 00000000000..d6552021503 --- /dev/null +++ b/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch @@ -0,0 +1,77 @@ +From 4aca3a9686777cc7cbeeafbea29e9349e546bc92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 11:54:47 +0200 +Subject: devlink: report devlink_port_type_warn source device + +From: Petr Oros + +[ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ] + +devlink_port_type_warn is scheduled for port devlink and warning +when the port type is not set. But from this warning it is not easy +found out which device (driver) has no devlink port set. + +[ 3709.975552] Type was not set for devlink port. +[ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20 +[ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm +[ 3709.994030] crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse +[ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1 +[ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022 +[ 3710.108437] Workqueue: events devlink_port_type_warn +[ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20 +[ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87 +[ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282 +[ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027 +[ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8 +[ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18 +[ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600 +[ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905 +[ 3710.108452] FS: 0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000 +[ 3710.108453] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0 +[ 3710.108456] PKRU: 55555554 +[ 3710.108457] Call Trace: +[ 3710.108458] +[ 3710.108459] process_one_work+0x1e2/0x3b0 +[ 3710.108466] ? rescuer_thread+0x390/0x390 +[ 3710.108468] worker_thread+0x50/0x3a0 +[ 3710.108471] ? rescuer_thread+0x390/0x390 +[ 3710.108473] kthread+0xdd/0x100 +[ 3710.108477] ? kthread_complete_and_exit+0x20/0x20 +[ 3710.108479] ret_from_fork+0x1f/0x30 +[ 3710.108485] +[ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]--- + +After patch: +[ 402.473064] ice 0000:41:00.0: Type was not set for devlink port. +[ 402.473064] ice 0000:41:00.1: Type was not set for devlink port. + +Signed-off-by: Petr Oros +Reviewed-by: Pavan Chebbi +Reviewed-by: Jakub Kicinski +Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/devlink.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/core/devlink.c b/net/core/devlink.c +index 2aa77d4b80d0a..5a4a4b34ac15c 100644 +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -9826,7 +9826,10 @@ EXPORT_SYMBOL_GPL(devlink_free); + + static void devlink_port_type_warn(struct work_struct *work) + { +- WARN(true, "Type was not set for devlink port."); ++ struct devlink_port *port = container_of(to_delayed_work(work), ++ struct devlink_port, ++ type_warn_dw); ++ dev_warn(port->devlink->dev, "Type was not set for devlink port."); + } + + static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) +-- +2.39.2 + diff --git a/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch b/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch new file mode 100644 index 00000000000..4c2b917abd6 --- /dev/null +++ b/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch @@ -0,0 +1,43 @@ +From 20d5e3268aeb5cd2827f61521d33a0203f680509 Mon Sep 17 00:00:00 2001 +From: hackyzh002 +Date: Wed, 19 Apr 2023 20:20:58 +0800 +Subject: [PATCH AUTOSEL 4.19 01/11] drm/radeon: Fix integer overflow in + radeon_cs_parser_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit f828b681d0cd566f86351c0b913e6cb6ed8c7b9c ] + +The type of size is unsigned, if size is 0x40000000, there will be an +integer overflow, size will be zero after size *= sizeof(uint32_t), +will cause uninitialized memory to be referenced later + +Reviewed-by: Christian König +Signed-off-by: hackyzh002 +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/radeon/radeon_cs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c +index 1ae31dbc61c64..5e61abb3dce5c 100644 +--- a/drivers/gpu/drm/radeon/radeon_cs.c ++++ b/drivers/gpu/drm/radeon/radeon_cs.c +@@ -265,7 +265,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) + { + struct drm_radeon_cs *cs = data; + uint64_t *chunk_array_ptr; +- unsigned size, i; ++ u64 size; ++ unsigned i; + u32 ring = RADEON_CS_RING_GFX; + s32 priority = 0; + +-- +2.39.2 + diff --git a/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch b/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch new file mode 100644 index 00000000000..70d64a56f2e --- /dev/null +++ b/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch @@ -0,0 +1,69 @@ +From 9bbaa84ecaeca40ae4d2d1cd4ab363546113da7a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 00:34:05 +0200 +Subject: dsa: mv88e6xxx: Do a final check before timing out + +From: Linus Walleij + +[ Upstream commit 95ce158b6c93b28842b54b42ad1cb221b9844062 ] + +I get sporadic timeouts from the driver when using the +MV88E6352. Reading the status again after the loop fixes the +problem: the operation is successful but goes undetected. + +Some added prints show things like this: + +[ 58.356209] mv88e6085 mdio_mux-0.1:00: Timeout while waiting + for switch, addr 1b reg 0b, mask 8000, val 0000, data c000 +[ 58.367487] mv88e6085 mdio_mux-0.1:00: Timeout waiting for + ATU op 4000, fid 0001 +(...) +[ 61.826293] mv88e6085 mdio_mux-0.1:00: Timeout while waiting + for switch, addr 1c reg 18, mask 8000, val 0000, data 9860 +[ 61.837560] mv88e6085 mdio_mux-0.1:00: Timeout waiting + for PHY command 1860 to complete + +The reason is probably not the commands: I think those are +mostly fine with the 50+50ms timeout, but the problem +appears when OpenWrt brings up several interfaces in +parallel on a system with 7 populated ports: if one of +them take more than 50 ms and waits one or more of the +others can get stuck on the mutex for the switch and then +this can easily multiply. + +As we sleep and wait, the function loop needs a final +check after exiting the loop if we were successful. + +Suggested-by: Andrew Lunn +Cc: Tobias Waldekranz +Fixes: 35da1dfd9484 ("net: dsa: mv88e6xxx: Improve performance of busy bit polling") +Signed-off-by: Linus Walleij +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20230712223405.861899-1-linus.walleij@linaro.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 4db1652015d1d..b69bd44ada1f2 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -109,6 +109,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg, + usleep_range(1000, 2000); + } + ++ err = mv88e6xxx_read(chip, addr, reg, &data); ++ if (err) ++ return err; ++ ++ if ((data & mask) == val) ++ return 0; ++ + dev_err(chip->dev, "Timeout while waiting for switch\n"); + return -ETIMEDOUT; + } +-- +2.39.2 + diff --git a/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch b/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch new file mode 100644 index 00000000000..0e0a727fd33 --- /dev/null +++ b/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch @@ -0,0 +1,40 @@ +From 3f351b5e8558e6d06eb00f3a0b3ce2ac4d1bd613 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 15 Jul 2023 16:16:56 +0800 +Subject: fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe + +From: Zhang Shurong + +[ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ] + +This func misses checking for platform_get_irq()'s call and may passes the +negative error codes to request_irq(), which takes unsigned IRQ #, +causing it to fail with -EINVAL, overriding an original error code. + +Fix this by stop calling request_irq() with invalid IRQ #s. + +Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ") +Signed-off-by: Zhang Shurong +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/au1200fb.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c +index b6b22fa4a8a01..fd3ff398d234a 100644 +--- a/drivers/video/fbdev/au1200fb.c ++++ b/drivers/video/fbdev/au1200fb.c +@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev) + + /* Now hook interrupt too */ + irq = platform_get_irq(dev, 0); ++ if (irq < 0) ++ return irq; ++ + ret = request_irq(irq, au1200fb_handle_irq, + IRQF_SHARED, "lcd", (void *)dev); + if (ret) { +-- +2.39.2 + diff --git a/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch b/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch new file mode 100644 index 00000000000..4ced25e8975 --- /dev/null +++ b/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch @@ -0,0 +1,36 @@ +From 37392063869cec1e0f260e3d3edc86270b958c95 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 21:19:58 +0800 +Subject: fbdev: imxfb: Removed unneeded release_mem_region + +From: Yangtao Li + +[ Upstream commit 45fcc058a75bf5d65cf4c32da44a252fbe873cd4 ] + +Remove unnecessary release_mem_region from the error path to prevent +mem region from being released twice, which could avoid resource leak +or other unexpected issues. + +Fixes: b083c22d5114 ("video: fbdev: imxfb: Convert request_mem_region + ioremap to devm_ioremap_resource") +Signed-off-by: Yangtao Li +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/imxfb.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c +index 61731921011d5..36ada87b49a49 100644 +--- a/drivers/video/fbdev/imxfb.c ++++ b/drivers/video/fbdev/imxfb.c +@@ -1043,7 +1043,6 @@ static int imxfb_probe(struct platform_device *pdev) + failed_map: + failed_ioremap: + failed_getclock: +- release_mem_region(res->start, resource_size(res)); + failed_of_parse: + kfree(info->pseudo_palette); + failed_init: +-- +2.39.2 + diff --git a/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch b/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch new file mode 100644 index 00000000000..5efab428be1 --- /dev/null +++ b/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch @@ -0,0 +1,43 @@ +From c6e2909b7334117823ea14b1738ea3584813e756 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 15:24:37 +0200 +Subject: fbdev: imxfb: warn about invalid left/right margin + +From: Martin Kaiser + +[ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ] + +Warn about invalid var->left_margin or var->right_margin. Their values +are read from the device tree. + +We store var->left_margin-3 and var->right_margin-1 in register +fields. These fields should be >= 0. + +Fixes: 7e8549bcee00 ("imxfb: Fix margin settings") +Signed-off-by: Martin Kaiser +Signed-off-by: Helge Deller +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/imxfb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c +index 51fde1b2a7938..61731921011d5 100644 +--- a/drivers/video/fbdev/imxfb.c ++++ b/drivers/video/fbdev/imxfb.c +@@ -613,10 +613,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf + if (var->hsync_len < 1 || var->hsync_len > 64) + printk(KERN_ERR "%s: invalid hsync_len %d\n", + info->fix.id, var->hsync_len); +- if (var->left_margin > 255) ++ if (var->left_margin < 3 || var->left_margin > 255) + printk(KERN_ERR "%s: invalid left_margin %d\n", + info->fix.id, var->left_margin); +- if (var->right_margin > 255) ++ if (var->right_margin < 1 || var->right_margin > 255) + printk(KERN_ERR "%s: invalid right_margin %d\n", + info->fix.id, var->right_margin); + if (var->yres < 1 || var->yres > ymax_mask) +-- +2.39.2 + diff --git a/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch b/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch new file mode 100644 index 00000000000..4331148e36a --- /dev/null +++ b/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch @@ -0,0 +1,41 @@ +From ffb509c36e5b36da98c9fb1f8f539f0cbf606665 Mon Sep 17 00:00:00 2001 +From: Immad Mir +Date: Fri, 23 Jun 2023 19:17:08 +0530 +Subject: [PATCH AUTOSEL 4.19 11/11] FS: JFS: Check for read-only mounted + filesystem in txBegin +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 95e2b352c03b0a86c5717ba1d24ea20969abcacc ] + + This patch adds a check for read-only mounted filesystem + in txBegin before starting a transaction potentially saving + from NULL pointer deref. + +Signed-off-by: Immad Mir +Signed-off-by: Dave Kleikamp +Signed-off-by: Sasha Levin +--- + fs/jfs/jfs_txnmgr.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c +index 78789c5ed36b0..e10db01f253b8 100644 +--- a/fs/jfs/jfs_txnmgr.c ++++ b/fs/jfs/jfs_txnmgr.c +@@ -367,6 +367,11 @@ tid_t txBegin(struct super_block *sb, int flag) + jfs_info("txBegin: flag = 0x%x", flag); + log = JFS_SBI(sb)->log; + ++ if (!log) { ++ jfs_error(sb, "read-only filesystem\n"); ++ return 0; ++ } ++ + TXN_LOCK(); + + INCREMENT(TxStat.txBegin); +-- +2.39.2 + diff --git a/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch b/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch new file mode 100644 index 00000000000..bccceacdac9 --- /dev/null +++ b/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch @@ -0,0 +1,46 @@ +From ced92b3b30ff868a14d5763842e5299bdad70edb Mon Sep 17 00:00:00 2001 +From: Immad Mir +Date: Fri, 23 Jun 2023 19:14:01 +0530 +Subject: [PATCH AUTOSEL 4.19 10/11] FS: JFS: Fix null-ptr-deref Read in + txBegin +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 47cfdc338d674d38f4b2f22b7612cc6a2763ba27 ] + + Syzkaller reported an issue where txBegin may be called + on a superblock in a read-only mounted filesystem which leads + to NULL pointer deref. This could be solved by checking if + the filesystem is read-only before calling txBegin, and returning + with appropiate error code. + +Reported-By: syzbot+f1faa20eec55e0c8644c@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=be7e52c50c5182cc09a09ea6fc456446b2039de3 + +Signed-off-by: Immad Mir +Signed-off-by: Dave Kleikamp +Signed-off-by: Sasha Levin +--- + fs/jfs/namei.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c +index 14528c0ffe635..c2c439acbb780 100644 +--- a/fs/jfs/namei.c ++++ b/fs/jfs/namei.c +@@ -811,6 +811,11 @@ static int jfs_link(struct dentry *old_dentry, + if (rc) + goto out; + ++ if (isReadOnly(ip)) { ++ jfs_error(ip->i_sb, "read-only filesystem\n"); ++ return -EROFS; ++ } ++ + tid = txBegin(ip->i_sb, 0); + + mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); +-- +2.39.2 + diff --git a/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch b/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch new file mode 100644 index 00000000000..0676bb0e0af --- /dev/null +++ b/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch @@ -0,0 +1,88 @@ +From 35a29fcb694a5f3ee27d66f57f19795b367fd883 Mon Sep 17 00:00:00 2001 +From: Yogesh +Date: Thu, 22 Jun 2023 00:07:03 +0530 +Subject: [PATCH AUTOSEL 4.19 08/11] fs: jfs: Fix UBSAN: + array-index-out-of-bounds in dbAllocDmapLev +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 4e302336d5ca1767a06beee7596a72d3bdc8d983 ] + +Syzkaller reported the following issue: + +UBSAN: array-index-out-of-bounds in fs/jfs/jfs_dmap.c:1965:6 +index -84 is out of range for type 's8[341]' (aka 'signed char[341]') +CPU: 1 PID: 4995 Comm: syz-executor146 Not tainted 6.4.0-rc6-syzkaller-00037-gb6dad5178cea #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 + ubsan_epilogue lib/ubsan.c:217 [inline] + __ubsan_handle_out_of_bounds+0x11c/0x150 lib/ubsan.c:348 + dbAllocDmapLev+0x3e5/0x430 fs/jfs/jfs_dmap.c:1965 + dbAllocCtl+0x113/0x920 fs/jfs/jfs_dmap.c:1809 + dbAllocAG+0x28f/0x10b0 fs/jfs/jfs_dmap.c:1350 + dbAlloc+0x658/0xca0 fs/jfs/jfs_dmap.c:874 + dtSplitUp fs/jfs/jfs_dtree.c:974 [inline] + dtInsert+0xda7/0x6b00 fs/jfs/jfs_dtree.c:863 + jfs_create+0x7b6/0xbb0 fs/jfs/namei.c:137 + lookup_open fs/namei.c:3492 [inline] + open_last_lookups fs/namei.c:3560 [inline] + path_openat+0x13df/0x3170 fs/namei.c:3788 + do_filp_open+0x234/0x490 fs/namei.c:3818 + do_sys_openat2+0x13f/0x500 fs/open.c:1356 + do_sys_open fs/open.c:1372 [inline] + __do_sys_openat fs/open.c:1388 [inline] + __se_sys_openat fs/open.c:1383 [inline] + __x64_sys_openat+0x247/0x290 fs/open.c:1383 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f1f4e33f7e9 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007ffc21129578 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1f4e33f7e9 +RDX: 000000000000275a RSI: 0000000020000040 RDI: 00000000ffffff9c +RBP: 00007f1f4e2ff080 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1f4e2ff110 +R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + + +The bug occurs when the dbAllocDmapLev()function attempts to access +dp->tree.stree[leafidx + LEAFIND] while the leafidx value is negative. + +To rectify this, the patch introduces a safeguard within the +dbAllocDmapLev() function. A check has been added to verify if leafidx is +negative. If it is, the function immediately returns an I/O error, preventing +any further execution that could potentially cause harm. + +Tested via syzbot. + +Reported-by: syzbot+853a6f4dfa3cf37d3aea@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?extid=ae2f5a27a07ae44b0f17 +Signed-off-by: Yogesh +Signed-off-by: Dave Kleikamp +Signed-off-by: Sasha Levin +--- + fs/jfs/jfs_dmap.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c +index 3ad0a33e0443b..6fdf18caf241d 100644 +--- a/fs/jfs/jfs_dmap.c ++++ b/fs/jfs/jfs_dmap.c +@@ -2034,6 +2034,9 @@ dbAllocDmapLev(struct bmap * bmp, + if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) + return -ENOSPC; + ++ if (leafidx < 0) ++ return -EIO; ++ + /* determine the block number within the file system corresponding + * to the leaf at which free space was found. + */ +-- +2.39.2 + diff --git a/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch b/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch new file mode 100644 index 00000000000..f0aa875f044 --- /dev/null +++ b/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch @@ -0,0 +1,56 @@ +From dc3ca84683c4bb50761998adaf575f383748ba73 Mon Sep 17 00:00:00 2001 +From: Marco Morandini +Date: Tue, 30 May 2023 15:40:08 +0200 +Subject: [PATCH AUTOSEL 4.19 05/11] HID: add quirk for 03f0:464a HP Elite + Presenter Mouse +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 0db117359e47750d8bd310d19f13e1c4ef7fc26a ] + +HP Elite Presenter Mouse HID Record Descriptor shows +two mouses (Repord ID 0x1 and 0x2), one keypad (Report ID 0x5), +two Consumer Controls (Report IDs 0x6 and 0x3). +Previous to this commit it registers one mouse, one keypad +and one Consumer Control, and it was usable only as a +digitl laser pointer (one of the two mouses). This patch defines +the 464a USB device ID and enables the HID_QUIRK_MULTI_INPUT +quirk for it, allowing to use the device both as a mouse +and a digital laser pointer. + +Signed-off-by: Marco Morandini +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-ids.h | 1 + + drivers/hid/hid-quirks.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h +index c0ba8d6f4978f..a9d6f8acf70b5 100644 +--- a/drivers/hid/hid-ids.h ++++ b/drivers/hid/hid-ids.h +@@ -571,6 +571,7 @@ + #define USB_DEVICE_ID_UGCI_FIGHTING 0x0030 + + #define USB_VENDOR_ID_HP 0x03f0 ++#define USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A 0x464a + #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a + #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a + #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a +diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c +index 8de294aa3184a..a2ab338166e61 100644 +--- a/drivers/hid/hid-quirks.c ++++ b/drivers/hid/hid-quirks.c +@@ -98,6 +98,7 @@ static const struct hid_device_id hid_quirks[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A293), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A), HID_QUIRK_ALWAYS_POLL }, ++ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL }, +-- +2.39.2 + diff --git a/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch b/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch new file mode 100644 index 00000000000..85904bae1b5 --- /dev/null +++ b/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch @@ -0,0 +1,342 @@ +From d67f7140ec52c786fa3e1e17d5a41330d5965e52 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Jun 2023 10:52:25 -0400 +Subject: iavf: fix a deadlock caused by rtnl and driver's lock circular + dependencies + +From: Ahmed Zaki + +[ Upstream commit d1639a17319ba78a018280cd2df6577a7e5d9fab ] + +A driver's lock (crit_lock) is used to serialize all the driver's tasks. +Lockdep, however, shows a circular dependency between rtnl and +crit_lock. This happens when an ndo that already holds the rtnl requests +the driver to reset, since the reset task (in some paths) tries to grab +rtnl to either change real number of queues of update netdev features. + + [566.241851] ====================================================== + [566.241893] WARNING: possible circular locking dependency detected + [566.241936] 6.2.14-100.fc36.x86_64+debug #1 Tainted: G OE + [566.241984] ------------------------------------------------------ + [566.242025] repro.sh/2604 is trying to acquire lock: + [566.242061] ffff9280fc5ceee8 (&adapter->crit_lock){+.+.}-{3:3}, at: iavf_close+0x3c/0x240 [iavf] + [566.242167] + but task is already holding lock: + [566.242209] ffffffff9976d350 (rtnl_mutex){+.+.}-{3:3}, at: iavf_remove+0x6b5/0x730 [iavf] + [566.242300] + which lock already depends on the new lock. + + [566.242353] + the existing dependency chain (in reverse order) is: + [566.242401] + -> #1 (rtnl_mutex){+.+.}-{3:3}: + [566.242451] __mutex_lock+0xc1/0xbb0 + [566.242489] iavf_init_interrupt_scheme+0x179/0x440 [iavf] + [566.242560] iavf_watchdog_task+0x80b/0x1400 [iavf] + [566.242627] process_one_work+0x2b3/0x560 + [566.242663] worker_thread+0x4f/0x3a0 + [566.242696] kthread+0xf2/0x120 + [566.242730] ret_from_fork+0x29/0x50 + [566.242763] + -> #0 (&adapter->crit_lock){+.+.}-{3:3}: + [566.242815] __lock_acquire+0x15ff/0x22b0 + [566.242869] lock_acquire+0xd2/0x2c0 + [566.242901] __mutex_lock+0xc1/0xbb0 + [566.242934] iavf_close+0x3c/0x240 [iavf] + [566.242997] __dev_close_many+0xac/0x120 + [566.243036] dev_close_many+0x8b/0x140 + [566.243071] unregister_netdevice_many_notify+0x165/0x7c0 + [566.243116] unregister_netdevice_queue+0xd3/0x110 + [566.243157] iavf_remove+0x6c1/0x730 [iavf] + [566.243217] pci_device_remove+0x33/0xa0 + [566.243257] device_release_driver_internal+0x1bc/0x240 + [566.243299] pci_stop_bus_device+0x6c/0x90 + [566.243338] pci_stop_and_remove_bus_device+0xe/0x20 + [566.243380] pci_iov_remove_virtfn+0xd1/0x130 + [566.243417] sriov_disable+0x34/0xe0 + [566.243448] ice_free_vfs+0x2da/0x330 [ice] + [566.244383] ice_sriov_configure+0x88/0xad0 [ice] + [566.245353] sriov_numvfs_store+0xde/0x1d0 + [566.246156] kernfs_fop_write_iter+0x15e/0x210 + [566.246921] vfs_write+0x288/0x530 + [566.247671] ksys_write+0x74/0xf0 + [566.248408] do_syscall_64+0x58/0x80 + [566.249145] entry_SYSCALL_64_after_hwframe+0x72/0xdc + [566.249886] + other info that might help us debug this: + + [566.252014] Possible unsafe locking scenario: + + [566.253432] CPU0 CPU1 + [566.254118] ---- ---- + [566.254800] lock(rtnl_mutex); + [566.255514] lock(&adapter->crit_lock); + [566.256233] lock(rtnl_mutex); + [566.256897] lock(&adapter->crit_lock); + [566.257388] + *** DEADLOCK *** + +The deadlock can be triggered by a script that is continuously resetting +the VF adapter while doing other operations requiring RTNL, e.g: + + while :; do + ip link set $VF up + ethtool --set-channels $VF combined 2 + ip link set $VF down + ip link set $VF up + ethtool --set-channels $VF combined 4 + ip link set $VF down + done + +Any operation that triggers a reset can substitute "ethtool --set-channles" + +As a fix, add a new task "finish_config" that do all the work which +needs rtnl lock. With the exception of iavf_remove(), all work that +require rtnl should be called from this task. + +As for iavf_remove(), at the point where we need to call +unregister_netdevice() (and grab rtnl_lock), we make sure the finish_config +task is not running (cancel_work_sync()) to safely grab rtnl. Subsequent +finish_config work cannot restart after that since the task is guarded +by the __IAVF_IN_REMOVE_TASK bit in iavf_schedule_finish_config(). + +Fixes: 5ac49f3c2702 ("iavf: use mutexes for locking of critical sections") +Signed-off-by: Ahmed Zaki +Signed-off-by: Mateusz Palczewski +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf.h | 2 + + drivers/net/ethernet/intel/iavf/iavf_main.c | 114 +++++++++++++----- + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 1 + + 3 files changed, 85 insertions(+), 32 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h +index 2fe44e865d0a2..305675042fe55 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf.h ++++ b/drivers/net/ethernet/intel/iavf/iavf.h +@@ -255,6 +255,7 @@ struct iavf_adapter { + struct workqueue_struct *wq; + struct work_struct reset_task; + struct work_struct adminq_task; ++ struct work_struct finish_config; + struct delayed_work client_task; + wait_queue_head_t down_waitqueue; + wait_queue_head_t reset_waitqueue; +@@ -521,6 +522,7 @@ int iavf_process_config(struct iavf_adapter *adapter); + int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); + void iavf_schedule_reset(struct iavf_adapter *adapter); + void iavf_schedule_request_stats(struct iavf_adapter *adapter); ++void iavf_schedule_finish_config(struct iavf_adapter *adapter); + void iavf_reset(struct iavf_adapter *adapter); + void iavf_set_ethtool_ops(struct net_device *netdev); + void iavf_update_stats(struct iavf_adapter *adapter); +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index c2739071149de..0e201d690f0dd 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -1702,10 +1702,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter) + adapter->msix_entries[vector].entry = vector; + + err = iavf_acquire_msix_vectors(adapter, v_budget); ++ if (!err) ++ iavf_schedule_finish_config(adapter); + + out: +- netif_set_real_num_rx_queues(adapter->netdev, pairs); +- netif_set_real_num_tx_queues(adapter->netdev, pairs); + return err; + } + +@@ -1925,9 +1925,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) + goto err_alloc_queues; + } + +- rtnl_lock(); + err = iavf_set_interrupt_capability(adapter); +- rtnl_unlock(); + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to setup interrupt capabilities\n"); +@@ -2013,6 +2011,78 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni + return err; + } + ++/** ++ * iavf_finish_config - do all netdev work that needs RTNL ++ * @work: our work_struct ++ * ++ * Do work that needs both RTNL and crit_lock. ++ **/ ++static void iavf_finish_config(struct work_struct *work) ++{ ++ struct iavf_adapter *adapter; ++ int pairs, err; ++ ++ adapter = container_of(work, struct iavf_adapter, finish_config); ++ ++ /* Always take RTNL first to prevent circular lock dependency */ ++ rtnl_lock(); ++ mutex_lock(&adapter->crit_lock); ++ ++ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && ++ adapter->netdev_registered && ++ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { ++ netdev_update_features(adapter->netdev); ++ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; ++ } ++ ++ switch (adapter->state) { ++ case __IAVF_DOWN: ++ if (!adapter->netdev_registered) { ++ err = register_netdevice(adapter->netdev); ++ if (err) { ++ dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", ++ err); ++ ++ /* go back and try again.*/ ++ iavf_free_rss(adapter); ++ iavf_free_misc_irq(adapter); ++ iavf_reset_interrupt_capability(adapter); ++ iavf_change_state(adapter, ++ __IAVF_INIT_CONFIG_ADAPTER); ++ goto out; ++ } ++ adapter->netdev_registered = true; ++ } ++ ++ /* Set the real number of queues when reset occurs while ++ * state == __IAVF_DOWN ++ */ ++ fallthrough; ++ case __IAVF_RUNNING: ++ pairs = adapter->num_active_queues; ++ netif_set_real_num_rx_queues(adapter->netdev, pairs); ++ netif_set_real_num_tx_queues(adapter->netdev, pairs); ++ break; ++ ++ default: ++ break; ++ } ++ ++out: ++ mutex_unlock(&adapter->crit_lock); ++ rtnl_unlock(); ++} ++ ++/** ++ * iavf_schedule_finish_config - Set the flags and schedule a reset event ++ * @adapter: board private structure ++ **/ ++void iavf_schedule_finish_config(struct iavf_adapter *adapter) ++{ ++ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) ++ queue_work(adapter->wq, &adapter->finish_config); ++} ++ + /** + * iavf_process_aq_command - process aq_required flags + * and sends aq command +@@ -2650,22 +2720,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) + + netif_carrier_off(netdev); + adapter->link_up = false; +- +- /* set the semaphore to prevent any callbacks after device registration +- * up to time when state of driver will be set to __IAVF_DOWN +- */ +- rtnl_lock(); +- if (!adapter->netdev_registered) { +- err = register_netdevice(netdev); +- if (err) { +- rtnl_unlock(); +- goto err_register; +- } +- } +- +- adapter->netdev_registered = true; +- + netif_tx_stop_all_queues(netdev); ++ + if (CLIENT_ALLOWED(adapter)) { + err = iavf_lan_add_device(adapter); + if (err) +@@ -2678,7 +2734,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) + + iavf_change_state(adapter, __IAVF_DOWN); + set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); +- rtnl_unlock(); + + iavf_misc_irq_enable(adapter); + wake_up(&adapter->down_waitqueue); +@@ -2698,10 +2753,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) + /* request initial VLAN offload settings */ + iavf_set_vlan_offload_features(adapter, 0, netdev->features); + ++ iavf_schedule_finish_config(adapter); + return; ++ + err_mem: + iavf_free_rss(adapter); +-err_register: + iavf_free_misc_irq(adapter); + err_sw_init: + iavf_reset_interrupt_capability(adapter); +@@ -2728,15 +2784,6 @@ static void iavf_watchdog_task(struct work_struct *work) + goto restart_watchdog; + } + +- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && +- adapter->netdev_registered && +- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && +- rtnl_trylock()) { +- netdev_update_features(adapter->netdev); +- rtnl_unlock(); +- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; +- } +- + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + iavf_change_state(adapter, __IAVF_COMM_FAILED); + +@@ -4980,6 +5027,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + INIT_WORK(&adapter->reset_task, iavf_reset_task); + INIT_WORK(&adapter->adminq_task, iavf_adminq_task); ++ INIT_WORK(&adapter->finish_config, iavf_finish_config); + INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); + INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, +@@ -5123,13 +5171,15 @@ static void iavf_remove(struct pci_dev *pdev) + usleep_range(500, 1000); + } + cancel_delayed_work_sync(&adapter->watchdog_task); ++ cancel_work_sync(&adapter->finish_config); + ++ rtnl_lock(); + if (adapter->netdev_registered) { +- rtnl_lock(); + unregister_netdevice(netdev); + adapter->netdev_registered = false; +- rtnl_unlock(); + } ++ rtnl_unlock(); ++ + if (CLIENT_ALLOWED(adapter)) { + err = iavf_lan_del_device(adapter); + if (err) +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index eec7ac3b7f6ee..35419673b6987 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -2237,6 +2237,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + + iavf_process_config(adapter); + adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; ++ iavf_schedule_finish_config(adapter); + + iavf_set_queue_vlan_tag_loc(adapter); + +-- +2.39.2 + diff --git a/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch b/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch new file mode 100644 index 00000000000..ce0bd2c31df --- /dev/null +++ b/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch @@ -0,0 +1,160 @@ +From cc55115bcb0aa7ee5bb38c780a6de7795ff2f2b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 19:11:48 +0800 +Subject: iavf: Fix out-of-bounds when setting channels on remove + +From: Ding Hui + +[ Upstream commit 7c4bced3caa749ce468b0c5de711c98476b23a52 ] + +If we set channels greater during iavf_remove(), and waiting reset done +would be timeout, then returned with error but changed num_active_queues +directly, that will lead to OOB like the following logs. Because the +num_active_queues is greater than tx/rx_rings[] allocated actually. + +Reproducer: + + [root@host ~]# cat repro.sh + #!/bin/bash + + pf_dbsf="0000:41:00.0" + vf0_dbsf="0000:41:02.0" + g_pids=() + + function do_set_numvf() + { + echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + } + + function do_set_channel() + { + local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) + [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } + ifconfig $nic 192.168.18.5 netmask 255.255.255.0 + ifconfig $nic up + ethtool -L $nic combined 1 + ethtool -L $nic combined 4 + sleep $((RANDOM%3)) + } + + function on_exit() + { + local pid + for pid in "${g_pids[@]}"; do + kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null + done + g_pids=() + } + + trap "on_exit; exit" EXIT + + while :; do do_set_numvf ; done & + g_pids+=($!) + while :; do do_set_channel ; done & + g_pids+=($!) + + wait + +Result: + +[ 3506.152887] iavf 0000:41:02.0: Removing device +[ 3510.400799] ================================================================== +[ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] +[ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536 +[ 3510.400823] +[ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 +[ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 +[ 3510.400835] Call Trace: +[ 3510.400851] dump_stack+0x71/0xab +[ 3510.400860] print_address_description+0x6b/0x290 +[ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf] +[ 3510.400868] kasan_report+0x14a/0x2b0 +[ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf] +[ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf] +[ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf] +[ 3510.400891] ? wait_woken+0x1d0/0x1d0 +[ 3510.400895] ? notifier_call_chain+0xc1/0x130 +[ 3510.400903] pci_device_remove+0xa8/0x1f0 +[ 3510.400910] device_release_driver_internal+0x1c6/0x460 +[ 3510.400916] pci_stop_bus_device+0x101/0x150 +[ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20 +[ 3510.400924] pci_iov_remove_virtfn+0x187/0x420 +[ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10 +[ 3510.400929] ? pci_get_subsys+0x90/0x90 +[ 3510.400932] sriov_disable+0xed/0x3e0 +[ 3510.400936] ? bus_find_device+0x12d/0x1a0 +[ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e] +[ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e] +[ 3510.400968] ? pci_get_device+0x7c/0x90 +[ 3510.400970] ? pci_get_subsys+0x90/0x90 +[ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210 +[ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10 +[ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] +[ 3510.401001] sriov_numvfs_store+0x214/0x290 +[ 3510.401005] ? sriov_totalvfs_show+0x30/0x30 +[ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10 +[ 3510.401011] ? __check_object_size+0x15a/0x350 +[ 3510.401018] kernfs_fop_write+0x280/0x3f0 +[ 3510.401022] vfs_write+0x145/0x440 +[ 3510.401025] ksys_write+0xab/0x160 +[ 3510.401028] ? __ia32_sys_read+0xb0/0xb0 +[ 3510.401031] ? fput_many+0x1a/0x120 +[ 3510.401032] ? filp_close+0xf0/0x130 +[ 3510.401038] do_syscall_64+0xa0/0x370 +[ 3510.401041] ? page_fault+0x8/0x30 +[ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca +[ 3510.401073] RIP: 0033:0x7f3a9bb842c0 +[ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 +[ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0 +[ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001 +[ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700 +[ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 +[ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001 +[ 3510.401090] +[ 3510.401093] Allocated by task 76795: +[ 3510.401098] kasan_kmalloc+0xa6/0xd0 +[ 3510.401099] __kmalloc+0xfb/0x200 +[ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf] +[ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf] +[ 3510.401114] process_one_work+0x56a/0x11f0 +[ 3510.401115] worker_thread+0x8f/0xf40 +[ 3510.401117] kthread+0x2a0/0x390 +[ 3510.401119] ret_from_fork+0x1f/0x40 +[ 3510.401122] 0xffffffffffffffff +[ 3510.401123] + +In timeout handling, we should keep the original num_active_queues +and reset num_req_queues to 0. + +Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") +Signed-off-by: Ding Hui +Cc: Donglin Peng +Cc: Huang Cun +Reviewed-by: Leon Romanovsky +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index 83cfc54a47062..4746ee517c75a 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -1863,7 +1863,7 @@ static int iavf_set_channels(struct net_device *netdev, + } + if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; +- adapter->num_active_queues = num_req; ++ adapter->num_req_queues = 0; + return -EOPNOTSUPP; + } + +-- +2.39.2 + diff --git a/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch b/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch new file mode 100644 index 00000000000..0e837151f9f --- /dev/null +++ b/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch @@ -0,0 +1,190 @@ +From 045d5f68bcd8b2284e19c86bfd77bc8ae236d467 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Jun 2023 10:52:26 -0400 +Subject: iavf: fix reset task race with iavf_remove() + +From: Ahmed Zaki + +[ Upstream commit c34743daca0eb1dc855831a5210f0800a850088e ] + +The reset task is currently scheduled from the watchdog or adminq tasks. +First, all direct calls to schedule the reset task are replaced with the +iavf_schedule_reset(), which is modified to accept the flag showing the +type of reset. + +To prevent the reset task from starting once iavf_remove() starts, we need +to check the __IAVF_IN_REMOVE_TASK bit before we schedule it. This is now +easily added to iavf_schedule_reset(). + +Finally, remove the check for IAVF_FLAG_RESET_NEEDED in the watchdog task. +It is redundant since all callers who set the flag immediately schedules +the reset task. + +Fixes: 3ccd54ef44eb ("iavf: Fix init state closure on remove") +Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage") +Signed-off-by: Ahmed Zaki +Signed-off-by: Mateusz Palczewski +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf.h | 2 +- + .../net/ethernet/intel/iavf/iavf_ethtool.c | 8 ++--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 32 +++++++------------ + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 3 +- + 4 files changed, 16 insertions(+), 29 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h +index 305675042fe55..543931c06bb17 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf.h ++++ b/drivers/net/ethernet/intel/iavf/iavf.h +@@ -520,7 +520,7 @@ int iavf_up(struct iavf_adapter *adapter); + void iavf_down(struct iavf_adapter *adapter); + int iavf_process_config(struct iavf_adapter *adapter); + int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); +-void iavf_schedule_reset(struct iavf_adapter *adapter); ++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags); + void iavf_schedule_request_stats(struct iavf_adapter *adapter); + void iavf_schedule_finish_config(struct iavf_adapter *adapter); + void iavf_reset(struct iavf_adapter *adapter); +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index 73219c5069290..fd6d6f6263f66 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -532,8 +532,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) + /* issue a reset to force legacy-rx change to take effect */ + if (changed_flags & IAVF_FLAG_LEGACY_RX) { + if (netif_running(netdev)) { +- adapter->flags |= IAVF_FLAG_RESET_NEEDED; +- queue_work(adapter->wq, &adapter->reset_task); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); +@@ -676,8 +675,7 @@ static int iavf_set_ringparam(struct net_device *netdev, + } + + if (netif_running(netdev)) { +- adapter->flags |= IAVF_FLAG_RESET_NEEDED; +- queue_work(adapter->wq, &adapter->reset_task); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); +@@ -1860,7 +1858,7 @@ static int iavf_set_channels(struct net_device *netdev, + + adapter->num_req_queues = num_req; + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; +- iavf_schedule_reset(adapter); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + + ret = iavf_wait_for_reset(adapter); + if (ret) +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 0e201d690f0dd..c1f91c55e1ca7 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -309,12 +309,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) + /** + * iavf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure ++ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED + **/ +-void iavf_schedule_reset(struct iavf_adapter *adapter) ++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags) + { +- if (!(adapter->flags & +- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { +- adapter->flags |= IAVF_FLAG_RESET_NEEDED; ++ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && ++ !(adapter->flags & ++ (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { ++ adapter->flags |= flags; + queue_work(adapter->wq, &adapter->reset_task); + } + } +@@ -342,7 +344,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) + struct iavf_adapter *adapter = netdev_priv(netdev); + + adapter->tx_timeout_count++; +- iavf_schedule_reset(adapter); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + } + + /** +@@ -2490,7 +2492,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) + adapter->vsi_res->num_queue_pairs); + adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED; + adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; +- iavf_schedule_reset(adapter); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + + return -EAGAIN; + } +@@ -2787,14 +2789,6 @@ static void iavf_watchdog_task(struct work_struct *work) + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + iavf_change_state(adapter, __IAVF_COMM_FAILED); + +- if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { +- adapter->aq_required = 0; +- adapter->current_op = VIRTCHNL_OP_UNKNOWN; +- mutex_unlock(&adapter->crit_lock); +- queue_work(adapter->wq, &adapter->reset_task); +- return; +- } +- + switch (adapter->state) { + case __IAVF_STARTUP: + iavf_startup(adapter); +@@ -2922,11 +2916,10 @@ static void iavf_watchdog_task(struct work_struct *work) + /* check for hw reset */ + reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; + if (!reg_val) { +- adapter->flags |= IAVF_FLAG_RESET_PENDING; + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); +- queue_work(adapter->wq, &adapter->reset_task); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(adapter->wq, + &adapter->watchdog_task, HZ * 2); +@@ -3324,9 +3317,7 @@ static void iavf_adminq_task(struct work_struct *work) + } while (pending); + mutex_unlock(&adapter->crit_lock); + +- if ((adapter->flags & +- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || +- adapter->state == __IAVF_RESETTING) ++ if (iavf_is_reset_in_progress(adapter)) + goto freedom; + + /* check for error indications */ +@@ -4423,8 +4414,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) + } + + if (netif_running(netdev)) { +- adapter->flags |= IAVF_FLAG_RESET_NEEDED; +- queue_work(adapter->wq, &adapter->reset_task); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret < 0) + netdev_warn(netdev, "MTU change interrupted waiting for reset"); +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index 35419673b6987..2fc8e60ef6afb 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + case VIRTCHNL_EVENT_RESET_IMPENDING: + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { +- adapter->flags |= IAVF_FLAG_RESET_PENDING; + dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); +- queue_work(adapter->wq, &adapter->reset_task); ++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); + } + break; + default: +-- +2.39.2 + diff --git a/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch b/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch new file mode 100644 index 00000000000..4191b7d0987 --- /dev/null +++ b/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch @@ -0,0 +1,215 @@ +From 65df986e4dd0e7534d9caca118a4603cfb45336b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 19:11:47 +0800 +Subject: iavf: Fix use-after-free in free_netdev + +From: Ding Hui + +[ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ] + +We do netif_napi_add() for all allocated q_vectors[], but potentially +do netif_napi_del() for part of them, then kfree q_vectors and leave +invalid pointers at dev->napi_list. + +Reproducer: + + [root@host ~]# cat repro.sh + #!/bin/bash + + pf_dbsf="0000:41:00.0" + vf0_dbsf="0000:41:02.0" + g_pids=() + + function do_set_numvf() + { + echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs + sleep $((RANDOM%3+1)) + } + + function do_set_channel() + { + local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) + [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } + ifconfig $nic 192.168.18.5 netmask 255.255.255.0 + ifconfig $nic up + ethtool -L $nic combined 1 + ethtool -L $nic combined 4 + sleep $((RANDOM%3)) + } + + function on_exit() + { + local pid + for pid in "${g_pids[@]}"; do + kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null + done + g_pids=() + } + + trap "on_exit; exit" EXIT + + while :; do do_set_numvf ; done & + g_pids+=($!) + while :; do do_set_channel ; done & + g_pids+=($!) + + wait + +Result: + +[ 4093.900222] ================================================================== +[ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390 +[ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699 +[ 4093.900233] +[ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 +[ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 +[ 4093.900239] Call Trace: +[ 4093.900244] dump_stack+0x71/0xab +[ 4093.900249] print_address_description+0x6b/0x290 +[ 4093.900251] ? free_netdev+0x308/0x390 +[ 4093.900252] kasan_report+0x14a/0x2b0 +[ 4093.900254] free_netdev+0x308/0x390 +[ 4093.900261] iavf_remove+0x825/0xd20 [iavf] +[ 4093.900265] pci_device_remove+0xa8/0x1f0 +[ 4093.900268] device_release_driver_internal+0x1c6/0x460 +[ 4093.900271] pci_stop_bus_device+0x101/0x150 +[ 4093.900273] pci_stop_and_remove_bus_device+0xe/0x20 +[ 4093.900275] pci_iov_remove_virtfn+0x187/0x420 +[ 4093.900277] ? pci_iov_add_virtfn+0xe10/0xe10 +[ 4093.900278] ? pci_get_subsys+0x90/0x90 +[ 4093.900280] sriov_disable+0xed/0x3e0 +[ 4093.900282] ? bus_find_device+0x12d/0x1a0 +[ 4093.900290] i40e_free_vfs+0x754/0x1210 [i40e] +[ 4093.900298] ? i40e_reset_all_vfs+0x880/0x880 [i40e] +[ 4093.900299] ? pci_get_device+0x7c/0x90 +[ 4093.900300] ? pci_get_subsys+0x90/0x90 +[ 4093.900306] ? pci_vfs_assigned.part.7+0x144/0x210 +[ 4093.900309] ? __mutex_lock_slowpath+0x10/0x10 +[ 4093.900315] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] +[ 4093.900318] sriov_numvfs_store+0x214/0x290 +[ 4093.900320] ? sriov_totalvfs_show+0x30/0x30 +[ 4093.900321] ? __mutex_lock_slowpath+0x10/0x10 +[ 4093.900323] ? __check_object_size+0x15a/0x350 +[ 4093.900326] kernfs_fop_write+0x280/0x3f0 +[ 4093.900329] vfs_write+0x145/0x440 +[ 4093.900330] ksys_write+0xab/0x160 +[ 4093.900332] ? __ia32_sys_read+0xb0/0xb0 +[ 4093.900334] ? fput_many+0x1a/0x120 +[ 4093.900335] ? filp_close+0xf0/0x130 +[ 4093.900338] do_syscall_64+0xa0/0x370 +[ 4093.900339] ? page_fault+0x8/0x30 +[ 4093.900341] entry_SYSCALL_64_after_hwframe+0x65/0xca +[ 4093.900357] RIP: 0033:0x7f16ad4d22c0 +[ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 +[ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0 +[ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001 +[ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700 +[ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 +[ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001 +[ 4093.900367] +[ 4093.900368] Allocated by task 820: +[ 4093.900371] kasan_kmalloc+0xa6/0xd0 +[ 4093.900373] __kmalloc+0xfb/0x200 +[ 4093.900376] iavf_init_interrupt_scheme+0x63b/0x1320 [iavf] +[ 4093.900380] iavf_watchdog_task+0x3d51/0x52c0 [iavf] +[ 4093.900382] process_one_work+0x56a/0x11f0 +[ 4093.900383] worker_thread+0x8f/0xf40 +[ 4093.900384] kthread+0x2a0/0x390 +[ 4093.900385] ret_from_fork+0x1f/0x40 +[ 4093.900387] 0xffffffffffffffff +[ 4093.900387] +[ 4093.900388] Freed by task 6699: +[ 4093.900390] __kasan_slab_free+0x137/0x190 +[ 4093.900391] kfree+0x8b/0x1b0 +[ 4093.900394] iavf_free_q_vectors+0x11d/0x1a0 [iavf] +[ 4093.900397] iavf_remove+0x35a/0xd20 [iavf] +[ 4093.900399] pci_device_remove+0xa8/0x1f0 +[ 4093.900400] device_release_driver_internal+0x1c6/0x460 +[ 4093.900401] pci_stop_bus_device+0x101/0x150 +[ 4093.900402] pci_stop_and_remove_bus_device+0xe/0x20 +[ 4093.900403] pci_iov_remove_virtfn+0x187/0x420 +[ 4093.900404] sriov_disable+0xed/0x3e0 +[ 4093.900409] i40e_free_vfs+0x754/0x1210 [i40e] +[ 4093.900415] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] +[ 4093.900416] sriov_numvfs_store+0x214/0x290 +[ 4093.900417] kernfs_fop_write+0x280/0x3f0 +[ 4093.900418] vfs_write+0x145/0x440 +[ 4093.900419] ksys_write+0xab/0x160 +[ 4093.900420] do_syscall_64+0xa0/0x370 +[ 4093.900421] entry_SYSCALL_64_after_hwframe+0x65/0xca +[ 4093.900422] 0xffffffffffffffff +[ 4093.900422] +[ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200 + which belongs to the cache kmalloc-8k of size 8192 +[ 4093.900425] The buggy address is located 5184 bytes inside of + 8192-byte region [ffff88b4dc144200, ffff88b4dc146200) +[ 4093.900425] The buggy address belongs to the page: +[ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0 +[ 4093.900430] flags: 0x10000000008100(slab|head) +[ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80 +[ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 +[ 4093.900434] page dumped because: kasan: bad access detected +[ 4093.900435] +[ 4093.900435] Memory state around the buggy address: +[ 4093.900436] ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900437] ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900438] ^ +[ 4093.900439] ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900440] ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 4093.900440] ================================================================== + +Although the patch #2 (of 2) can avoid the issue triggered by this +repro.sh, there still are other potential risks that if num_active_queues +is changed to less than allocated q_vectors[] by unexpected, the +mismatched netif_napi_add/del() can also cause UAF. + +Since we actually call netif_napi_add() for all allocated q_vectors +unconditionally in iavf_alloc_q_vectors(), so we should fix it by +letting netif_napi_del() match to netif_napi_add(). + +Fixes: 5eae00c57f5e ("i40evf: main driver core") +Signed-off-by: Ding Hui +Cc: Donglin Peng +Cc: Huang Cun +Reviewed-by: Simon Horman +Reviewed-by: Madhu Chittim +Reviewed-by: Leon Romanovsky +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 965d02d7ff80f..81676c3af4b36 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -1840,19 +1840,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) + static void iavf_free_q_vectors(struct iavf_adapter *adapter) + { + int q_idx, num_q_vectors; +- int napi_vectors; + + if (!adapter->q_vectors) + return; + + num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; +- napi_vectors = adapter->num_active_queues; + + for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { + struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; + +- if (q_idx < napi_vectors) +- netif_napi_del(&q_vector->napi); ++ netif_napi_del(&q_vector->napi); + } + kfree(adapter->q_vectors); + adapter->q_vectors = NULL; +-- +2.39.2 + diff --git a/queue-6.1/iavf-make-functions-static-where-possible.patch b/queue-6.1/iavf-make-functions-static-where-possible.patch new file mode 100644 index 00000000000..4105b0d4bab --- /dev/null +++ b/queue-6.1/iavf-make-functions-static-where-possible.patch @@ -0,0 +1,223 @@ +From 97d8a9e529256a00151bc682e79efba868de17a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Jun 2023 08:54:05 -0700 +Subject: iavf: make functions static where possible + +From: Przemek Kitszel + +[ Upstream commit a4aadf0f5905661cd25c366b96cc1c840f05b756 ] + +Make all possible functions static. + +Move iavf_force_wb() up to avoid forward declaration. + +Suggested-by: Maciej Fijalkowski +Reviewed-by: Maciej Fijalkowski +Signed-off-by: Przemek Kitszel +Signed-off-by: Tony Nguyen +Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf.h | 10 ----- + drivers/net/ethernet/intel/iavf/iavf_main.c | 14 +++---- + drivers/net/ethernet/intel/iavf/iavf_txrx.c | 43 ++++++++++----------- + drivers/net/ethernet/intel/iavf/iavf_txrx.h | 4 -- + 4 files changed, 28 insertions(+), 43 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h +index 6625625f91e47..a716ed6bb787d 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf.h ++++ b/drivers/net/ethernet/intel/iavf/iavf.h +@@ -523,9 +523,6 @@ void iavf_schedule_request_stats(struct iavf_adapter *adapter); + void iavf_reset(struct iavf_adapter *adapter); + void iavf_set_ethtool_ops(struct net_device *netdev); + void iavf_update_stats(struct iavf_adapter *adapter); +-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); +-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); +-void iavf_irq_enable_queues(struct iavf_adapter *adapter); + void iavf_free_all_tx_resources(struct iavf_adapter *adapter); + void iavf_free_all_rx_resources(struct iavf_adapter *adapter); + +@@ -579,17 +576,10 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); + void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); + void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); + void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); +-int iavf_replace_primary_mac(struct iavf_adapter *adapter, +- const u8 *new_mac); +-void +-iavf_set_vlan_offload_features(struct iavf_adapter *adapter, +- netdev_features_t prev_features, +- netdev_features_t features); + void iavf_add_fdir_filter(struct iavf_adapter *adapter); + void iavf_del_fdir_filter(struct iavf_adapter *adapter); + void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); + void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); + struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, + const u8 *macaddr); +-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs); + #endif /* _IAVF_H_ */ +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 68e951fe5e210..d5b1dcfe0ccdd 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -253,7 +253,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, + * + * Returns 0 on success, negative on failure + **/ +-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) ++static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) + { + unsigned int wait, delay = 10; + +@@ -362,7 +362,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) + * iavf_irq_enable_queues - Enable interrupt for all queues + * @adapter: board private structure + **/ +-void iavf_irq_enable_queues(struct iavf_adapter *adapter) ++static void iavf_irq_enable_queues(struct iavf_adapter *adapter) + { + struct iavf_hw *hw = &adapter->hw; + int i; +@@ -1003,8 +1003,8 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, + * + * Do not call this with mac_vlan_list_lock! + **/ +-int iavf_replace_primary_mac(struct iavf_adapter *adapter, +- const u8 *new_mac) ++static int iavf_replace_primary_mac(struct iavf_adapter *adapter, ++ const u8 *new_mac) + { + struct iavf_hw *hw = &adapter->hw; + struct iavf_mac_filter *f; +@@ -1860,7 +1860,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter) + * @adapter: board private structure + * + **/ +-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) ++static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) + { + if (!adapter->msix_entries) + return; +@@ -1875,7 +1875,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) + * @adapter: board private structure to initialize + * + **/ +-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) ++static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) + { + int err; + +@@ -2174,7 +2174,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) + * the watchdog if any changes are requested to expedite the request via + * virtchnl. + **/ +-void ++static void + iavf_set_vlan_offload_features(struct iavf_adapter *adapter, + netdev_features_t prev_features, + netdev_features_t features) +diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +index e989feda133c1..8c5f6096b0022 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +@@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, + * iavf_clean_tx_ring - Free any empty Tx buffers + * @tx_ring: ring to be cleaned + **/ +-void iavf_clean_tx_ring(struct iavf_ring *tx_ring) ++static void iavf_clean_tx_ring(struct iavf_ring *tx_ring) + { + unsigned long bi_size; + u16 i; +@@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring) + * Since there is no access to the ring head register + * in XL710, we need to use our local copies + **/ +-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) ++static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) + { + u32 head, tail; + +@@ -127,6 +127,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) + return 0; + } + ++/** ++ * iavf_force_wb - Issue SW Interrupt so HW does a wb ++ * @vsi: the VSI we care about ++ * @q_vector: the vector on which to force writeback ++ **/ ++static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) ++{ ++ u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | ++ IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ ++ IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | ++ IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK ++ /* allow 00 to be written to the index */; ++ ++ wr32(&vsi->back->hw, ++ IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), ++ val); ++} ++ + /** + * iavf_detect_recover_hung - Function to detect and recover hung_queues + * @vsi: pointer to vsi struct with tx queues +@@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi, + q_vector->arm_wb_state = true; + } + +-/** +- * iavf_force_wb - Issue SW Interrupt so HW does a wb +- * @vsi: the VSI we care about +- * @q_vector: the vector on which to force writeback +- * +- **/ +-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) +-{ +- u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | +- IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ +- IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | +- IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK +- /* allow 00 to be written to the index */; +- +- wr32(&vsi->back->hw, +- IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), +- val); +-} +- + static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, + struct iavf_ring_container *rc) + { +@@ -687,7 +686,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) + * iavf_clean_rx_ring - Free Rx buffers + * @rx_ring: ring to be cleaned + **/ +-void iavf_clean_rx_ring(struct iavf_ring *rx_ring) ++static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) + { + unsigned long bi_size; + u16 i; +diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h +index 2624bf6d009e3..7e6ee32d19b69 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h ++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h +@@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) + + bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); + netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +-void iavf_clean_tx_ring(struct iavf_ring *tx_ring); +-void iavf_clean_rx_ring(struct iavf_ring *rx_ring); + int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); + int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring); + void iavf_free_tx_resources(struct iavf_ring *tx_ring); + void iavf_free_rx_resources(struct iavf_ring *rx_ring); + int iavf_napi_poll(struct napi_struct *napi, int budget); +-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector); +-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw); + void iavf_detect_recover_hung(struct iavf_vsi *vsi); + int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size); + bool __iavf_chk_linearize(struct sk_buff *skb); +-- +2.39.2 + diff --git a/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch b/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch new file mode 100644 index 00000000000..8927af5c4e9 --- /dev/null +++ b/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch @@ -0,0 +1,95 @@ +From 5491562d5578b2fc118790482f43fbde751e023f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 17:42:27 +0100 +Subject: iavf: Move netdev_update_features() into watchdog task + +From: Marcin Szycik + +[ Upstream commit 7598f4b40bd60e4a4280de645eb2893eea80b59d ] + +Remove netdev_update_features() from iavf_adminq_task(), as it can cause +deadlocks due to needing rtnl_lock. Instead use the +IAVF_FLAG_SETUP_NETDEV_FEATURES flag to indicate that netdev features need +to be updated in the watchdog task. iavf_set_vlan_offload_features() +and iavf_set_queue_vlan_tag_loc() can be called directly from +iavf_virtchnl_completion(). + +Suggested-by: Phani Burra +Signed-off-by: Marcin Szycik +Reviewed-by: Alexander Lobakin +Tested-by: Marek Szlosek +Signed-off-by: Tony Nguyen +Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 27 +++++++------------ + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 8 ++++++ + 2 files changed, 17 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 104de9a071449..68e951fe5e210 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -2689,6 +2689,15 @@ static void iavf_watchdog_task(struct work_struct *work) + goto restart_watchdog; + } + ++ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && ++ adapter->netdev_registered && ++ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && ++ rtnl_trylock()) { ++ netdev_update_features(adapter->netdev); ++ rtnl_unlock(); ++ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; ++ } ++ + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + iavf_change_state(adapter, __IAVF_COMM_FAILED); + +@@ -3228,24 +3237,6 @@ static void iavf_adminq_task(struct work_struct *work) + } while (pending); + mutex_unlock(&adapter->crit_lock); + +- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) { +- if (adapter->netdev_registered || +- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { +- struct net_device *netdev = adapter->netdev; +- +- rtnl_lock(); +- netdev_update_features(netdev); +- rtnl_unlock(); +- /* Request VLAN offload settings */ +- if (VLAN_V2_ALLOWED(adapter)) +- iavf_set_vlan_offload_features +- (adapter, 0, netdev->features); +- +- iavf_set_queue_vlan_tag_loc(adapter); +- } +- +- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; +- } + if ((adapter->flags & + (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || + adapter->state == __IAVF_RESETTING) +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index 00dccdd290dce..07d37402a0df5 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -2237,6 +2237,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + + iavf_process_config(adapter); + adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; ++ ++ /* Request VLAN offload settings */ ++ if (VLAN_V2_ALLOWED(adapter)) ++ iavf_set_vlan_offload_features(adapter, 0, ++ netdev->features); ++ ++ iavf_set_queue_vlan_tag_loc(adapter); ++ + was_mac_changed = !ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr); + +-- +2.39.2 + diff --git a/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch b/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch new file mode 100644 index 00000000000..1ee405d4c13 --- /dev/null +++ b/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch @@ -0,0 +1,66 @@ +From c45878593282d7f12a92cae3b219aeb3889e32f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Apr 2023 12:09:39 -0600 +Subject: iavf: send VLAN offloading caps once after VFR + +From: Ahmed Zaki + +[ Upstream commit 7dcbdf29282fbcdb646dc785e8a57ed2c2fec8ba ] + +When the user disables rxvlan offloading and then changes the number of +channels, all VLAN ports are unable to receive traffic. + +Changing the number of channels triggers a VFR reset. During re-init, when +VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS is received, we do: +1 - set the IAVF_FLAG_SETUP_NETDEV_FEATURES flag +2 - call + iavf_set_vlan_offload_features(adapter, 0, netdev->features); + +The second step sends to the PF the __default__ features, in this case +aq_required |= IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING + +While the first step forces the watchdog task to call +netdev_update_features() -> iavf_set_features() -> +iavf_set_vlan_offload_features(adapter, netdev->features, features). +Since the user disabled the "rxvlan", this sets: +aq_required |= IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING + +When we start processing the AQ commands, both flags are enabled. Since we +process DISABLE_XTAG first then ENABLE_XTAG, this results in the PF +enabling the rxvlan offload. This breaks all communications on the VLAN +net devices. + +Fix by removing the call to iavf_set_vlan_offload_features() (second +step). Calling netdev_update_features() from watchdog task is enough for +both init and reset paths. + +Fixes: 7598f4b40bd6 ("iavf: Move netdev_update_features() into watchdog task") +Signed-off-by: Ahmed Zaki +Tested-by: Rafal Romanowski +Reviewed-by: Leon Romanovsky +Signed-off-by: Tony Nguyen +Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index 07d37402a0df5..7b34111fd4eb1 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -2238,11 +2238,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + iavf_process_config(adapter); + adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + +- /* Request VLAN offload settings */ +- if (VLAN_V2_ALLOWED(adapter)) +- iavf_set_vlan_offload_features(adapter, 0, +- netdev->features); +- + iavf_set_queue_vlan_tag_loc(adapter); + + was_mac_changed = !ether_addr_equal(netdev->dev_addr, +-- +2.39.2 + diff --git a/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch b/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch new file mode 100644 index 00000000000..a24bcc616ba --- /dev/null +++ b/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch @@ -0,0 +1,65 @@ +From 7af6ff049c18a0c4e3e4a80b523c331617b48a6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 May 2023 15:46:02 -0600 +Subject: iavf: use internal state to free traffic IRQs + +From: Ahmed Zaki + +[ Upstream commit a77ed5c5b768e9649be240a2d864e5cd9c6a2015 ] + +If the system tries to close the netdev while iavf_reset_task() is +running, __LINK_STATE_START will be cleared and netif_running() will +return false in iavf_reinit_interrupt_scheme(). This will result in +iavf_free_traffic_irqs() not being called and a leak as follows: + + [7632.489326] remove_proc_entry: removing non-empty directory 'irq/999', leaking at least 'iavf-enp24s0f0v0-TxRx-0' + [7632.490214] WARNING: CPU: 0 PID: 10 at fs/proc/generic.c:718 remove_proc_entry+0x19b/0x1b0 + +is shown when pci_disable_msix() is later called. Fix by using the +internal adapter state. The traffic IRQs will always exist if +state == __IAVF_RUNNING. + +Fixes: 5b36e8d04b44 ("i40evf: Enable VF to request an alternate queue allocation") +Signed-off-by: Ahmed Zaki +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 81676c3af4b36..104de9a071449 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -1941,15 +1941,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter) + /** + * iavf_reinit_interrupt_scheme - Reallocate queues and vectors + * @adapter: board private structure ++ * @running: true if adapter->state == __IAVF_RUNNING + * + * Returns 0 on success, negative on failure + **/ +-static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) ++static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running) + { + struct net_device *netdev = adapter->netdev; + int err; + +- if (netif_running(netdev)) ++ if (running) + iavf_free_traffic_irqs(adapter); + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); +@@ -3056,7 +3057,7 @@ static void iavf_reset_task(struct work_struct *work) + + if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) || + (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) { +- err = iavf_reinit_interrupt_scheme(adapter); ++ err = iavf_reinit_interrupt_scheme(adapter, running); + if (err) + goto reset_err; + } +-- +2.39.2 + diff --git a/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch b/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch new file mode 100644 index 00000000000..4ff53643af2 --- /dev/null +++ b/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch @@ -0,0 +1,253 @@ +From 666e6a1e4dfcf28dffd3be1e4128f2dde21ee8cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Jun 2023 10:52:22 -0400 +Subject: iavf: Wait for reset in callbacks which trigger it + +From: Marcin Szycik + +[ Upstream commit c2ed2403f12c74a74a0091ed5d830e72c58406e8 ] + +There was a fail when trying to add the interface to bonding +right after changing the MTU on the interface. It was caused +by bonding interface unable to open the interface due to +interface being in __RESETTING state because of MTU change. + +Add new reset_waitqueue to indicate that reset has finished. + +Add waiting for reset to finish in callbacks which trigger hw reset: +iavf_set_priv_flags(), iavf_change_mtu() and iavf_set_ringparam(). +We use a 5000ms timeout period because on Hyper-V based systems, +this operation takes around 3000-4000ms. In normal circumstances, +it doesn't take more than 500ms to complete. + +Add a function iavf_wait_for_reset() to reuse waiting for reset code and +use it also in iavf_set_channels(), which already waits for reset. +We don't use error handling in iavf_set_channels() as this could +cause the device to be in incorrect state if the reset was scheduled +but hit timeout or the waitng function was interrupted by a signal. + +Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") +Signed-off-by: Marcin Szycik +Co-developed-by: Dawid Wesierski +Signed-off-by: Dawid Wesierski +Signed-off-by: Sylwester Dziedziuch +Signed-off-by: Kamil Maziarz +Signed-off-by: Mateusz Palczewski +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf.h | 2 + + .../net/ethernet/intel/iavf/iavf_ethtool.c | 31 ++++++----- + drivers/net/ethernet/intel/iavf/iavf_main.c | 51 ++++++++++++++++++- + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 1 + + 4 files changed, 68 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h +index a716ed6bb787d..2fe44e865d0a2 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf.h ++++ b/drivers/net/ethernet/intel/iavf/iavf.h +@@ -257,6 +257,7 @@ struct iavf_adapter { + struct work_struct adminq_task; + struct delayed_work client_task; + wait_queue_head_t down_waitqueue; ++ wait_queue_head_t reset_waitqueue; + wait_queue_head_t vc_waitqueue; + struct iavf_q_vector *q_vectors; + struct list_head vlan_filter_list; +@@ -582,4 +583,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); + void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); + struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, + const u8 *macaddr); ++int iavf_wait_for_reset(struct iavf_adapter *adapter); + #endif /* _IAVF_H_ */ +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index 4746ee517c75a..73219c5069290 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) + { + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 orig_flags, new_flags, changed_flags; ++ int ret = 0; + u32 i; + + orig_flags = READ_ONCE(adapter->flags); +@@ -533,10 +534,13 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(adapter->wq, &adapter->reset_task); ++ ret = iavf_wait_for_reset(adapter); ++ if (ret) ++ netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); + } + } + +- return 0; ++ return ret; + } + + /** +@@ -627,6 +631,7 @@ static int iavf_set_ringparam(struct net_device *netdev, + { + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 new_rx_count, new_tx_count; ++ int ret = 0; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; +@@ -673,9 +678,12 @@ static int iavf_set_ringparam(struct net_device *netdev, + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(adapter->wq, &adapter->reset_task); ++ ret = iavf_wait_for_reset(adapter); ++ if (ret) ++ netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); + } + +- return 0; ++ return ret; + } + + /** +@@ -1830,7 +1838,7 @@ static int iavf_set_channels(struct net_device *netdev, + { + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 num_req = ch->combined_count; +- int i; ++ int ret = 0; + + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { +@@ -1854,20 +1862,11 @@ static int iavf_set_channels(struct net_device *netdev, + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; + iavf_schedule_reset(adapter); + +- /* wait for the reset is done */ +- for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { +- msleep(IAVF_RESET_WAIT_MS); +- if (adapter->flags & IAVF_FLAG_RESET_PENDING) +- continue; +- break; +- } +- if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { +- adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; +- adapter->num_req_queues = 0; +- return -EOPNOTSUPP; +- } ++ ret = iavf_wait_for_reset(adapter); ++ if (ret) ++ netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset"); + +- return 0; ++ return ret; + } + + /** +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index d5b1dcfe0ccdd..c2739071149de 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -166,6 +166,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev) + return netdev_priv(pci_get_drvdata(pdev)); + } + ++/** ++ * iavf_is_reset_in_progress - Check if a reset is in progress ++ * @adapter: board private structure ++ */ ++static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter) ++{ ++ if (adapter->state == __IAVF_RESETTING || ++ adapter->flags & (IAVF_FLAG_RESET_PENDING | ++ IAVF_FLAG_RESET_NEEDED)) ++ return true; ++ ++ return false; ++} ++ ++/** ++ * iavf_wait_for_reset - Wait for reset to finish. ++ * @adapter: board private structure ++ * ++ * Returns 0 if reset finished successfully, negative on timeout or interrupt. ++ */ ++int iavf_wait_for_reset(struct iavf_adapter *adapter) ++{ ++ int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue, ++ !iavf_is_reset_in_progress(adapter), ++ msecs_to_jiffies(5000)); ++ ++ /* If ret < 0 then it means wait was interrupted. ++ * If ret == 0 then it means we got a timeout while waiting ++ * for reset to finish. ++ * If ret > 0 it means reset has finished. ++ */ ++ if (ret > 0) ++ return 0; ++ else if (ret < 0) ++ return -EINTR; ++ else ++ return -EBUSY; ++} ++ + /** + * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code + * @hw: pointer to the HW structure +@@ -3161,6 +3200,7 @@ static void iavf_reset_task(struct work_struct *work) + + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + ++ wake_up(&adapter->reset_waitqueue); + mutex_unlock(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); + +@@ -4325,6 +4365,7 @@ static int iavf_close(struct net_device *netdev) + static int iavf_change_mtu(struct net_device *netdev, int new_mtu) + { + struct iavf_adapter *adapter = netdev_priv(netdev); ++ int ret = 0; + + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); +@@ -4337,9 +4378,14 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(adapter->wq, &adapter->reset_task); ++ ret = iavf_wait_for_reset(adapter); ++ if (ret < 0) ++ netdev_warn(netdev, "MTU change interrupted waiting for reset"); ++ else if (ret) ++ netdev_warn(netdev, "MTU change timed out waiting for reset"); + } + +- return 0; ++ return ret; + } + + #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ +@@ -4942,6 +4988,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + /* Setup the wait queue for indicating transition to down status */ + init_waitqueue_head(&adapter->down_waitqueue); + ++ /* Setup the wait queue for indicating transition to running state */ ++ init_waitqueue_head(&adapter->reset_waitqueue); ++ + /* Setup the wait queue for indicating virtchannel events */ + init_waitqueue_head(&adapter->vc_waitqueue); + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index 7b34111fd4eb1..eec7ac3b7f6ee 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + case VIRTCHNL_OP_ENABLE_QUEUES: + /* enable transmits */ + iavf_irq_enable(adapter, true); ++ wake_up(&adapter->reset_waitqueue); + adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; + break; + case VIRTCHNL_OP_DISABLE_QUEUES: +-- +2.39.2 + diff --git a/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch b/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch new file mode 100644 index 00000000000..0017c58f975 --- /dev/null +++ b/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch @@ -0,0 +1,89 @@ +From 1fce30757b3c297f96e47f71e0c036d447f63664 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 10:47:32 -0700 +Subject: igb: Fix igb_down hung on surprise removal + +From: Ying Hsu + +[ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ] + +In a setup where a Thunderbolt hub connects to Ethernet and a display +through USB Type-C, users may experience a hung task timeout when they +remove the cable between the PC and the Thunderbolt hub. +This is because the igb_down function is called multiple times when +the Thunderbolt hub is unplugged. For example, the igb_io_error_detected +triggers the first call, and the igb_remove triggers the second call. +The second call to igb_down will block at napi_synchronize. +Here's the call trace: + __schedule+0x3b0/0xddb + ? __mod_timer+0x164/0x5d3 + schedule+0x44/0xa8 + schedule_timeout+0xb2/0x2a4 + ? run_local_timers+0x4e/0x4e + msleep+0x31/0x38 + igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4] + __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4] + igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4] + __dev_close_many+0x95/0xec + dev_close_many+0x6e/0x103 + unregister_netdevice_many+0x105/0x5b1 + unregister_netdevice_queue+0xc2/0x10d + unregister_netdev+0x1c/0x23 + igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4] + pci_device_remove+0x3f/0x9c + device_release_driver_internal+0xfe/0x1b4 + pci_stop_bus_device+0x5b/0x7f + pci_stop_bus_device+0x30/0x7f + pci_stop_bus_device+0x30/0x7f + pci_stop_and_remove_bus_device+0x12/0x19 + pciehp_unconfigure_device+0x76/0xe9 + pciehp_disable_slot+0x6e/0x131 + pciehp_handle_presence_or_link_change+0x7a/0x3f7 + pciehp_ist+0xbe/0x194 + irq_thread_fn+0x22/0x4d + ? irq_thread+0x1fd/0x1fd + irq_thread+0x17b/0x1fd + ? irq_forced_thread_fn+0x5f/0x5f + kthread+0x142/0x153 + ? __irq_get_irqchip_state+0x46/0x46 + ? kthread_associate_blkcg+0x71/0x71 + ret_from_fork+0x1f/0x30 + +In this case, igb_io_error_detected detaches the network interface +and requests a PCIE slot reset, however, the PCIE reset callback is +not being invoked and thus the Ethernet connection breaks down. +As the PCIE error in this case is a non-fatal one, requesting a +slot reset can be avoided. +This patch fixes the task hung issue and preserves Ethernet +connection by ignoring non-fatal PCIE errors. + +Signed-off-by: Ying Hsu +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c +index 18ffbc892f86c..3e0444354632d 100644 +--- a/drivers/net/ethernet/intel/igb/igb_main.c ++++ b/drivers/net/ethernet/intel/igb/igb_main.c +@@ -9585,6 +9585,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + ++ if (state == pci_channel_io_normal) { ++ dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); ++ return PCI_ERS_RESULT_CAN_RECOVER; ++ } ++ + netif_device_detach(netdev); + + if (state == pci_channel_io_perm_failure) +-- +2.39.2 + diff --git a/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch b/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch new file mode 100644 index 00000000000..5aadd1a85b6 --- /dev/null +++ b/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch @@ -0,0 +1,61 @@ +From c01002df2d8dadbc072d6f4a641153969ae81dc1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Apr 2023 09:36:11 +0200 +Subject: igc: Avoid transmit queue timeout for XDP + +From: Kurt Kanzenbach + +[ Upstream commit 95b681485563c64585de78662ee52d06b7fa47d9 ] + +High XDP load triggers the netdev watchdog: + +|NETDEV WATCHDOG: enp3s0 (igc): transmit queue 2 timed out + +The reason is the Tx queue transmission start (txq->trans_start) is not updated +in XDP code path. Therefore, add it for all XDP transmission functions. + +Signed-off-by: Kurt Kanzenbach +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Stable-dep-of: 78adb4bcf99e ("igc: Prevent garbled TX queue with XDP ZEROCOPY") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 273941f90f066..ade4bde47c65a 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -2402,6 +2402,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); ++ /* Avoid transmit queue timeout since we share it with the slow path */ ++ txq_trans_cond_update(nq); + res = igc_xdp_init_tx_descriptor(ring, xdpf); + __netif_tx_unlock(nq); + return res; +@@ -2804,6 +2806,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) + + __netif_tx_lock(nq, cpu); + ++ /* Avoid transmit queue timeout since we share it with the slow path */ ++ txq_trans_cond_update(nq); ++ + budget = igc_desc_unused(ring); + + while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { +@@ -6297,6 +6302,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, + + __netif_tx_lock(nq, cpu); + ++ /* Avoid transmit queue timeout since we share it with the slow path */ ++ txq_trans_cond_update(nq); ++ + drops = 0; + for (i = 0; i < num_frames; i++) { + int err; +-- +2.39.2 + diff --git a/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch b/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch new file mode 100644 index 00000000000..4254f230b5d --- /dev/null +++ b/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch @@ -0,0 +1,79 @@ +From d6a3517285a333ba4076b9e7721da2053a4d7dd2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 10:54:44 -0700 +Subject: igc: Prevent garbled TX queue with XDP ZEROCOPY + +From: Florian Kauer + +[ Upstream commit 78adb4bcf99effbb960c5f9091e2e062509d1030 ] + +In normal operation, each populated queue item has +next_to_watch pointing to the last TX desc of the packet, +while each cleaned item has it set to 0. In particular, +next_to_use that points to the next (necessarily clean) +item to use has next_to_watch set to 0. + +When the TX queue is used both by an application using +AF_XDP with ZEROCOPY as well as a second non-XDP application +generating high traffic, the queue pointers can get in +an invalid state where next_to_use points to an item +where next_to_watch is NOT set to 0. + +However, the implementation assumes at several places +that this is never the case, so if it does hold, +bad things happen. In particular, within the loop inside +of igc_clean_tx_irq(), next_to_clean can overtake next_to_use. +Finally, this prevents any further transmission via +this queue and it never gets unblocked or signaled. +Secondly, if the queue is in this garbled state, +the inner loop of igc_clean_tx_ring() will never terminate, +completely hogging a CPU core. + +The reason is that igc_xdp_xmit_zc() reads next_to_use +before acquiring the lock, and writing it back +(potentially unmodified) later. If it got modified +before locking, the outdated next_to_use is written +pointing to an item that was already used elsewhere +(and thus next_to_watch got written). + +Fixes: 9acf59a752d4 ("igc: Enable TX via AF_XDP zero-copy") +Signed-off-by: Florian Kauer +Reviewed-by: Kurt Kanzenbach +Tested-by: Kurt Kanzenbach +Acked-by: Vinicius Costa Gomes +Reviewed-by: Simon Horman +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Link: https://lore.kernel.org/r/20230717175444.3217831-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index ade4bde47c65a..2e091a4a065e7 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -2797,9 +2797,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) + struct netdev_queue *nq = txring_txq(ring); + union igc_adv_tx_desc *tx_desc = NULL; + int cpu = smp_processor_id(); +- u16 ntu = ring->next_to_use; + struct xdp_desc xdp_desc; +- u16 budget; ++ u16 budget, ntu; + + if (!netif_carrier_ok(ring->netdev)) + return; +@@ -2809,6 +2808,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + ++ ntu = ring->next_to_use; + budget = igc_desc_unused(ring); + + while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { +-- +2.39.2 + diff --git a/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch b/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch new file mode 100644 index 00000000000..5ee0e2c26ff --- /dev/null +++ b/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch @@ -0,0 +1,151 @@ +From 84ac2024e94e7308d618a49933dee91acc662e7c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 16:49:15 +0800 +Subject: kallsyms: Correctly sequence symbols when CONFIG_LTO_CLANG=y + +From: Zhen Lei + +[ Upstream commit 010a0aad39fccceba4a07d30d163158a39c704f3 ] + +LLVM appends various suffixes for local functions and variables, suffixes +observed: + - foo.llvm.[0-9a-f]+ + - foo.[0-9a-f]+ + +Therefore, when CONFIG_LTO_CLANG=y, kallsyms_lookup_name() needs to +truncate the suffix of the symbol name before comparing the local function +or variable name. + +Old implementation code: +- if (strcmp(namebuf, name) == 0) +- return kallsyms_sym_address(i); +- if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0) +- return kallsyms_sym_address(i); + +The preceding process is traversed by address from low to high. That is, +for those with the same name after the suffix is removed, the one with +the smallest address is returned first. Therefore, when sorting in the +tool, if the raw names are the same, they should be sorted by address in +ascending order. + +ASCII[.] = 2e +ASCII[0-9] = 30,39 +ASCII[A-Z] = 41,5a +ASCII[_] = 5f +ASCII[a-z] = 61,7a + +According to the preceding ASCII code values, the following sorting result +is strictly followed. + --------------------------------- +| main-key | sub-key | +|---------------------------------| +| | addr_lowest | +| | ... | +| . | ... | +| | addr_highest | +|---------------------------------| +| ? | | //? is [_A-Za-z0-9] + --------------------------------- + +Signed-off-by: Zhen Lei +Signed-off-by: Luis Chamberlain +Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") +Signed-off-by: Sasha Levin +--- + scripts/kallsyms.c | 36 ++++++++++++++++++++++++++++++++++-- + scripts/link-vmlinux.sh | 4 ++++ + 2 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c +index dcb744a067e5e..67ef9aa14a770 100644 +--- a/scripts/kallsyms.c ++++ b/scripts/kallsyms.c +@@ -78,6 +78,7 @@ static unsigned int table_size, table_cnt; + static int all_symbols; + static int absolute_percpu; + static int base_relative; ++static int lto_clang; + + static int token_profit[0x10000]; + +@@ -89,7 +90,7 @@ static unsigned char best_table_len[256]; + static void usage(void) + { + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " +- "[--base-relative] in.map > out.S\n"); ++ "[--base-relative] [--lto-clang] in.map > out.S\n"); + exit(1); + } + +@@ -411,6 +412,34 @@ static int symbol_absolute(const struct sym_entry *s) + return s->percpu_absolute; + } + ++static char * s_name(char *buf) ++{ ++ /* Skip the symbol type */ ++ return buf + 1; ++} ++ ++static void cleanup_symbol_name(char *s) ++{ ++ char *p; ++ ++ if (!lto_clang) ++ return; ++ ++ /* ++ * ASCII[.] = 2e ++ * ASCII[0-9] = 30,39 ++ * ASCII[A-Z] = 41,5a ++ * ASCII[_] = 5f ++ * ASCII[a-z] = 61,7a ++ * ++ * As above, replacing '.' with '\0' does not affect the main sorting, ++ * but it helps us with subsorting. ++ */ ++ p = strchr(s, '.'); ++ if (p) ++ *p = '\0'; ++} ++ + static int compare_names(const void *a, const void *b) + { + int ret; +@@ -421,7 +450,9 @@ static int compare_names(const void *a, const void *b) + + expand_symbol(sa->sym, sa->len, sa_namebuf); + expand_symbol(sb->sym, sb->len, sb_namebuf); +- ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]); ++ cleanup_symbol_name(s_name(sa_namebuf)); ++ cleanup_symbol_name(s_name(sb_namebuf)); ++ ret = strcmp(s_name(sa_namebuf), s_name(sb_namebuf)); + if (!ret) { + if (sa->addr > sb->addr) + return 1; +@@ -855,6 +886,7 @@ int main(int argc, char **argv) + {"all-symbols", no_argument, &all_symbols, 1}, + {"absolute-percpu", no_argument, &absolute_percpu, 1}, + {"base-relative", no_argument, &base_relative, 1}, ++ {"lto-clang", no_argument, <o_clang, 1}, + {}, + }; + +diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh +index 918470d768e9c..32e573943cf03 100755 +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -156,6 +156,10 @@ kallsyms() + kallsymopt="${kallsymopt} --base-relative" + fi + ++ if is_enabled CONFIG_LTO_CLANG; then ++ kallsymopt="${kallsymopt} --lto-clang" ++ fi ++ + info KSYMS ${2} + scripts/kallsyms ${kallsymopt} ${1} > ${2} + } +-- +2.39.2 + diff --git a/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch b/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch new file mode 100644 index 00000000000..9b63380a315 --- /dev/null +++ b/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch @@ -0,0 +1,241 @@ +From 0abbf42237e70e5ca1bdbcd75de6eed8c1bd4077 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 16:49:14 +0800 +Subject: kallsyms: Improve the performance of kallsyms_lookup_name() + +From: Zhen Lei + +[ Upstream commit 60443c88f3a89fd303a9e8c0e84895910675c316 ] + +Currently, to search for a symbol, we need to expand the symbols in +'kallsyms_names' one by one, and then use the expanded string for +comparison. It's O(n). + +If we sort names in ascending order like addresses, we can also use +binary search. It's O(log(n)). + +In order not to change the implementation of "/proc/kallsyms", the table +kallsyms_names[] is still stored in a one-to-one correspondence with the +address in ascending order. + +Add array kallsyms_seqs_of_names[], it's indexed by the sequence number +of the sorted names, and the corresponding content is the sequence number +of the sorted addresses. For example: +Assume that the index of NameX in array kallsyms_seqs_of_names[] is 'i', +the content of kallsyms_seqs_of_names[i] is 'k', then the corresponding +address of NameX is kallsyms_addresses[k]. The offset in kallsyms_names[] +is get_symbol_offset(k). + +Note that the memory usage will increase by (4 * kallsyms_num_syms) +bytes, the next two patches will reduce (1 * kallsyms_num_syms) bytes +and properly handle the case CONFIG_LTO_CLANG=y. + +Performance test results: (x86) +Before: +min=234, max=10364402, avg=5206926 +min=267, max=11168517, avg=5207587 +After: +min=1016, max=90894, avg=7272 +min=1014, max=93470, avg=7293 + +The average lookup performance of kallsyms_lookup_name() improved 715x. + +Signed-off-by: Zhen Lei +Signed-off-by: Luis Chamberlain +Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions") +Signed-off-by: Sasha Levin +--- + kernel/kallsyms.c | 86 +++++++++++++++++++++++++++++++++----- + kernel/kallsyms_internal.h | 1 + + scripts/kallsyms.c | 37 ++++++++++++++++ + 3 files changed, 113 insertions(+), 11 deletions(-) + +diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c +index 60c20f301a6ba..ba351dfa109b6 100644 +--- a/kernel/kallsyms.c ++++ b/kernel/kallsyms.c +@@ -187,26 +187,90 @@ static bool cleanup_symbol_name(char *s) + return false; + } + ++static int compare_symbol_name(const char *name, char *namebuf) ++{ ++ int ret; ++ ++ ret = strcmp(name, namebuf); ++ if (!ret) ++ return ret; ++ ++ if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf)) ++ return 0; ++ ++ return ret; ++} ++ ++static int kallsyms_lookup_names(const char *name, ++ unsigned int *start, ++ unsigned int *end) ++{ ++ int ret; ++ int low, mid, high; ++ unsigned int seq, off; ++ char namebuf[KSYM_NAME_LEN]; ++ ++ low = 0; ++ high = kallsyms_num_syms - 1; ++ ++ while (low <= high) { ++ mid = low + (high - low) / 2; ++ seq = kallsyms_seqs_of_names[mid]; ++ off = get_symbol_offset(seq); ++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); ++ ret = compare_symbol_name(name, namebuf); ++ if (ret > 0) ++ low = mid + 1; ++ else if (ret < 0) ++ high = mid - 1; ++ else ++ break; ++ } ++ ++ if (low > high) ++ return -ESRCH; ++ ++ low = mid; ++ while (low) { ++ seq = kallsyms_seqs_of_names[low - 1]; ++ off = get_symbol_offset(seq); ++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); ++ if (compare_symbol_name(name, namebuf)) ++ break; ++ low--; ++ } ++ *start = low; ++ ++ if (end) { ++ high = mid; ++ while (high < kallsyms_num_syms - 1) { ++ seq = kallsyms_seqs_of_names[high + 1]; ++ off = get_symbol_offset(seq); ++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); ++ if (compare_symbol_name(name, namebuf)) ++ break; ++ high++; ++ } ++ *end = high; ++ } ++ ++ return 0; ++} ++ + /* Lookup the address for this symbol. Returns 0 if not found. */ + unsigned long kallsyms_lookup_name(const char *name) + { +- char namebuf[KSYM_NAME_LEN]; +- unsigned long i; +- unsigned int off; ++ int ret; ++ unsigned int i; + + /* Skip the search for empty string. */ + if (!*name) + return 0; + +- for (i = 0, off = 0; i < kallsyms_num_syms; i++) { +- off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); +- +- if (strcmp(namebuf, name) == 0) +- return kallsyms_sym_address(i); ++ ret = kallsyms_lookup_names(name, &i, NULL); ++ if (!ret) ++ return kallsyms_sym_address(kallsyms_seqs_of_names[i]); + +- if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0) +- return kallsyms_sym_address(i); +- } + return module_kallsyms_lookup_name(name); + } + +diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h +index 2d0c6f2f0243a..a04b7a5cb1e3e 100644 +--- a/kernel/kallsyms_internal.h ++++ b/kernel/kallsyms_internal.h +@@ -26,5 +26,6 @@ extern const char kallsyms_token_table[] __weak; + extern const u16 kallsyms_token_index[] __weak; + + extern const unsigned int kallsyms_markers[] __weak; ++extern const unsigned int kallsyms_seqs_of_names[] __weak; + + #endif // LINUX_KALLSYMS_INTERNAL_H_ +diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c +index 03fa07ad45d95..dcb744a067e5e 100644 +--- a/scripts/kallsyms.c ++++ b/scripts/kallsyms.c +@@ -49,6 +49,7 @@ _Static_assert( + struct sym_entry { + unsigned long long addr; + unsigned int len; ++ unsigned int seq; + unsigned int start_pos; + unsigned int percpu_absolute; + unsigned char sym[]; +@@ -410,6 +411,35 @@ static int symbol_absolute(const struct sym_entry *s) + return s->percpu_absolute; + } + ++static int compare_names(const void *a, const void *b) ++{ ++ int ret; ++ char sa_namebuf[KSYM_NAME_LEN]; ++ char sb_namebuf[KSYM_NAME_LEN]; ++ const struct sym_entry *sa = *(const struct sym_entry **)a; ++ const struct sym_entry *sb = *(const struct sym_entry **)b; ++ ++ expand_symbol(sa->sym, sa->len, sa_namebuf); ++ expand_symbol(sb->sym, sb->len, sb_namebuf); ++ ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]); ++ if (!ret) { ++ if (sa->addr > sb->addr) ++ return 1; ++ else if (sa->addr < sb->addr) ++ return -1; ++ ++ /* keep old order */ ++ return (int)(sa->seq - sb->seq); ++ } ++ ++ return ret; ++} ++ ++static void sort_symbols_by_name(void) ++{ ++ qsort(table, table_cnt, sizeof(table[0]), compare_names); ++} ++ + static void write_src(void) + { + unsigned int i, k, off; +@@ -495,6 +525,7 @@ static void write_src(void) + for (i = 0; i < table_cnt; i++) { + if ((i & 0xFF) == 0) + markers[i >> 8] = off; ++ table[i]->seq = i; + + /* There cannot be any symbol of length zero. */ + if (table[i]->len == 0) { +@@ -535,6 +566,12 @@ static void write_src(void) + + free(markers); + ++ sort_symbols_by_name(); ++ output_label("kallsyms_seqs_of_names"); ++ for (i = 0; i < table_cnt; i++) ++ printf("\t.long\t%u\n", table[i]->seq); ++ printf("\n"); ++ + output_label("kallsyms_token_table"); + off = 0; + for (i = 0; i < 256; i++) { +-- +2.39.2 + diff --git a/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch b/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch new file mode 100644 index 00000000000..e74c07b91eb --- /dev/null +++ b/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch @@ -0,0 +1,104 @@ +From 8ed9d429c7185d4b3fe9ef6360e3f9e6f63265c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 11:19:26 -0700 +Subject: kallsyms: strip LTO-only suffixes from promoted global functions + +From: Yonghong Song + +[ Upstream commit 8cc32a9bbf2934d90762d9de0187adcb5ad46a11 ] + +Commit 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions") +stripped all function/variable suffixes started with '.' regardless +of whether those suffixes are generated at LTO mode or not. In fact, +as far as I know, in LTO mode, when a static function/variable is +promoted to the global scope, '.llvm.<...>' suffix is added. + +The existing mechanism breaks live patch for a LTO kernel even if +no .llvm.<...> symbols are involved. For example, for the following +kernel symbols: + $ grep bpf_verifier_vlog /proc/kallsyms + ffffffff81549f60 t bpf_verifier_vlog + ffffffff8268b430 d bpf_verifier_vlog._entry + ffffffff8282a958 d bpf_verifier_vlog._entry_ptr + ffffffff82e12a1f d bpf_verifier_vlog.__already_done +'bpf_verifier_vlog' is a static function. '_entry', '_entry_ptr' and +'__already_done' are static variables used inside 'bpf_verifier_vlog', +so llvm promotes them to file-level static with prefix 'bpf_verifier_vlog.'. +Note that the func-level to file-level static function promotion also +happens without LTO. + +Given a symbol name 'bpf_verifier_vlog', with LTO kernel, current mechanism will +return 4 symbols to live patch subsystem which current live patching +subsystem cannot handle it. With non-LTO kernel, only one symbol +is returned. + +In [1], we have a lengthy discussion, the suggestion is to separate two +cases: + (1). new symbols with suffix which are generated regardless of whether + LTO is enabled or not, and + (2). new symbols with suffix generated only when LTO is enabled. + +The cleanup_symbol_name() should only remove suffixes for case (2). +Case (1) should not be changed so it can work uniformly with or without LTO. + +This patch removed LTO-only suffix '.llvm.<...>' so live patching and +tracing should work the same way for non-LTO kernel. +The cleanup_symbol_name() in scripts/kallsyms.c is also changed to have the same +filtering pattern so both kernel and kallsyms tool have the same +expectation on the order of symbols. + + [1] https://lore.kernel.org/live-patching/20230615170048.2382735-1-song@kernel.org/T/#u + +Fixes: 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions") +Reported-by: Song Liu +Signed-off-by: Yonghong Song +Reviewed-by: Zhen Lei +Reviewed-by: Nick Desaulniers +Acked-by: Song Liu +Link: https://lore.kernel.org/r/20230628181926.4102448-1-yhs@fb.com +Signed-off-by: Kees Cook +Signed-off-by: Sasha Levin +--- + kernel/kallsyms.c | 5 ++--- + scripts/kallsyms.c | 6 +++--- + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c +index ba351dfa109b6..676328a7c8c75 100644 +--- a/kernel/kallsyms.c ++++ b/kernel/kallsyms.c +@@ -174,11 +174,10 @@ static bool cleanup_symbol_name(char *s) + * LLVM appends various suffixes for local functions and variables that + * must be promoted to global scope as part of LTO. This can break + * hooking of static functions with kprobes. '.' is not a valid +- * character in an identifier in C. Suffixes observed: ++ * character in an identifier in C. Suffixes only in LLVM LTO observed: + * - foo.llvm.[0-9a-f]+ +- * - foo.[0-9a-f]+ + */ +- res = strchr(s, '.'); ++ res = strstr(s, ".llvm."); + if (res) { + *res = '\0'; + return true; +diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c +index 67ef9aa14a770..51edc73e2ebf8 100644 +--- a/scripts/kallsyms.c ++++ b/scripts/kallsyms.c +@@ -432,10 +432,10 @@ static void cleanup_symbol_name(char *s) + * ASCII[_] = 5f + * ASCII[a-z] = 61,7a + * +- * As above, replacing '.' with '\0' does not affect the main sorting, +- * but it helps us with subsorting. ++ * As above, replacing the first '.' in ".llvm." with '\0' does not ++ * affect the main sorting, but it helps us with subsorting. + */ +- p = strchr(s, '.'); ++ p = strstr(s, ".llvm."); + if (p) + *p = '\0'; + } +-- +2.39.2 + diff --git a/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch b/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch new file mode 100644 index 00000000000..f12f3fb002b --- /dev/null +++ b/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch @@ -0,0 +1,50 @@ +From e9fa3eef2ea63154cf4655e320d9deee9b91fb21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jul 2023 10:41:51 -0700 +Subject: llc: Don't drop packet from non-root netns. + +From: Kuniyuki Iwashima + +[ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ] + +Now these upper layer protocol handlers can be called from llc_rcv() +as sap->rcv_func(), which is registered by llc_sap_open(). + + * function which is passed to register_8022_client() + -> no in-kernel user calls register_8022_client(). + + * snap_rcv() + `- proto->rcvfunc() : registered by register_snap_client() + -> aarp_rcv() and atalk_rcv() drop packets from non-root netns + + * stp_pdu_rcv() + `- garp_protos[]->rcv() : registered by stp_proto_register() + -> garp_pdu_rcv() and br_stp_rcv() are netns-aware + +So, we can safely remove the netns restriction in llc_rcv(). + +Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/llc/llc_input.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c +index c309b72a58779..7cac441862e21 100644 +--- a/net/llc/llc_input.c ++++ b/net/llc/llc_input.c +@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); + +- if (!net_eq(dev_net(dev), &init_net)) +- goto drop; +- + /* + * When the interface is in promisc. mode, drop all the crap that it + * receives, do not try to analyse it. +-- +2.39.2 + diff --git a/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch b/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch new file mode 100644 index 00000000000..3ae68615e08 --- /dev/null +++ b/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch @@ -0,0 +1,56 @@ +From ef01382e1c734299b56bde7f6a5678e14939f8a4 Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Thu, 22 Jun 2023 17:43:57 -0600 +Subject: [PATCH AUTOSEL 4.19 09/11] MIPS: dec: prom: Address -Warray-bounds + warning +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 7b191b9b55df2a844bd32d1d380f47a7df1c2896 ] + +Zero-length arrays are deprecated, and we are replacing them with flexible +array members instead. So, replace zero-length array with flexible-array +member in struct memmap. + +Address the following warning found after building (with GCC-13) mips64 +with decstation_64_defconfig: +In function 'rex_setup_memory_region', + inlined from 'prom_meminit' at arch/mips/dec/prom/memory.c:91:3: +arch/mips/dec/prom/memory.c:72:31: error: array subscript i is outside array bounds of 'unsigned char[0]' [-Werror=array-bounds=] + 72 | if (bm->bitmap[i] == 0xff) + | ~~~~~~~~~~^~~ +In file included from arch/mips/dec/prom/memory.c:16: +./arch/mips/include/asm/dec/prom.h: In function 'prom_meminit': +./arch/mips/include/asm/dec/prom.h:73:23: note: while referencing 'bitmap' + 73 | unsigned char bitmap[0]; + +This helps with the ongoing efforts to globally enable -Warray-bounds. + +This results in no differences in binary output. + +Link: https://github.com/KSPP/linux/issues/79 +Link: https://github.com/KSPP/linux/issues/323 +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Sasha Levin +--- + arch/mips/include/asm/dec/prom.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h +index 09538ff5e9245..6f0405ba27d6d 100644 +--- a/arch/mips/include/asm/dec/prom.h ++++ b/arch/mips/include/asm/dec/prom.h +@@ -74,7 +74,7 @@ static inline bool prom_is_rex(u32 magic) + */ + typedef struct { + int pagesize; +- unsigned char bitmap[0]; ++ unsigned char bitmap[]; + } memmap; + + +-- +2.39.2 + diff --git a/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch b/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch new file mode 100644 index 00000000000..a4550bdb088 --- /dev/null +++ b/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch @@ -0,0 +1,94 @@ +From 2ad98a4006851a288ac932c2345ea6a91933390c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 17:46:22 -0700 +Subject: net: dsa: microchip: correct KSZ8795 static MAC table access + +From: Tristram Ha + +[ Upstream commit 4bdf79d686b49ac49373b36466acfb93972c7d7c ] + +The KSZ8795 driver code was modified to use on KSZ8863/73, which has +different register definitions. Some of the new KSZ8795 register +information are wrong compared to previous code. + +KSZ8795 also behaves differently in that the STATIC_MAC_TABLE_USE_FID +and STATIC_MAC_TABLE_FID bits are off by 1 when doing MAC table reading +than writing. To compensate that a special code was added to shift the +register value by 1 before applying those bits. This is wrong when the +code is running on KSZ8863, so this special code is only executed when +KSZ8795 is detected. + +Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips") +Signed-off-by: Tristram Ha +Reviewed-by: Horatiu Vultur +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz8795.c | 8 +++++++- + drivers/net/dsa/microchip/ksz_common.c | 8 ++++---- + drivers/net/dsa/microchip/ksz_common.h | 7 +++++++ + 3 files changed, 18 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index 6639fae56da7f..c63e082dc57dc 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -437,7 +437,13 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, + (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >> + shifts[STATIC_MAC_FWD_PORTS]; + alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0; +- data_hi >>= 1; ++ ++ /* KSZ8795 family switches have STATIC_MAC_TABLE_USE_FID and ++ * STATIC_MAC_TABLE_FID definitions off by 1 when doing read on the ++ * static MAC table compared to doing write. ++ */ ++ if (ksz_is_ksz87xx(dev)) ++ data_hi >>= 1; + alu->is_static = true; + alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0; + alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >> +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 3d59298eaa5cf..8c492d56d2c36 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -286,13 +286,13 @@ static const u32 ksz8795_masks[] = { + [STATIC_MAC_TABLE_VALID] = BIT(21), + [STATIC_MAC_TABLE_USE_FID] = BIT(23), + [STATIC_MAC_TABLE_FID] = GENMASK(30, 24), +- [STATIC_MAC_TABLE_OVERRIDE] = BIT(26), +- [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(24, 20), ++ [STATIC_MAC_TABLE_OVERRIDE] = BIT(22), ++ [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(20, 16), + [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(6, 0), +- [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(8), ++ [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7), + [DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7), + [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 29), +- [DYNAMIC_MAC_TABLE_FID] = GENMASK(26, 20), ++ [DYNAMIC_MAC_TABLE_FID] = GENMASK(22, 16), + [DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(26, 24), + [DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(28, 27), + [P_MII_TX_FLOW_CTRL] = BIT(5), +diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h +index 9cfa179575ce8..d1b2db8e65331 100644 +--- a/drivers/net/dsa/microchip/ksz_common.h ++++ b/drivers/net/dsa/microchip/ksz_common.h +@@ -512,6 +512,13 @@ static inline void ksz_regmap_unlock(void *__mtx) + mutex_unlock(mtx); + } + ++static inline bool ksz_is_ksz87xx(struct ksz_device *dev) ++{ ++ return dev->chip_id == KSZ8795_CHIP_ID || ++ dev->chip_id == KSZ8794_CHIP_ID || ++ dev->chip_id == KSZ8765_CHIP_ID; ++} ++ + static inline bool ksz_is_ksz88x3(struct ksz_device *dev) + { + return dev->chip_id == KSZ8830_CHIP_ID; +-- +2.39.2 + diff --git a/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch b/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch new file mode 100644 index 00000000000..394b25198f6 --- /dev/null +++ b/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch @@ -0,0 +1,54 @@ +From 25ba53cf4a6b0cb809c74f265b2e1cd0d00ea850 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Apr 2023 12:18:38 +0200 +Subject: net: dsa: microchip: ksz8: Make ksz8_r_sta_mac_table() static + +From: Oleksij Rempel + +[ Upstream commit b5751cdd7dbe618a03951bdd4c982a71ba448b1b ] + +As ksz8_r_sta_mac_table() is only used within ksz8795.c, there is no need +to export it. Make the function static for better encapsulation. + +Signed-off-by: Oleksij Rempel +Reviewed-by: Vladimir Oltean +Acked-by: Arun Ramadoss +Signed-off-by: Paolo Abeni +Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz8.h | 2 -- + drivers/net/dsa/microchip/ksz8795.c | 4 ++-- + 2 files changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h +index 8582b4b67d989..28137c4bf2928 100644 +--- a/drivers/net/dsa/microchip/ksz8.h ++++ b/drivers/net/dsa/microchip/ksz8.h +@@ -21,8 +21,6 @@ int ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val); + int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val); + int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr, + u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries); +-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, +- struct alu_struct *alu); + void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr, + struct alu_struct *alu); + void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt); +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index 38fd9b8e0287a..a2f67be66b97d 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -406,8 +406,8 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr, + return rc; + } + +-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, +- struct alu_struct *alu) ++static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, ++ struct alu_struct *alu) + { + u32 data_hi, data_lo; + const u8 *shifts; +-- +2.39.2 + diff --git a/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch b/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch new file mode 100644 index 00000000000..61558ee997e --- /dev/null +++ b/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch @@ -0,0 +1,111 @@ +From 07866a478229526bd65ea5676f89ffc143c3e040 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Apr 2023 12:18:36 +0200 +Subject: net: dsa: microchip: ksz8: Separate static MAC table operations for + code reuse + +From: Oleksij Rempel + +[ Upstream commit f6636ff69ec4f2c94a5ee1d032b21cfe1e0a5678 ] + +Move static MAC table operations to separate functions in order to reuse +the code for add/del_fdb. This is needed to address kernel warnings +caused by the lack of fdb add function support in the current driver. + +Signed-off-by: Oleksij Rempel +Reviewed-by: Vladimir Oltean +Signed-off-by: Paolo Abeni +Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz8795.c | 34 +++++++++++++++++++---------- + 1 file changed, 23 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index 22250ae222b5b..38fd9b8e0287a 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -926,8 +926,8 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port, + return ret; + } + +-int ksz8_mdb_add(struct ksz_device *dev, int port, +- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) ++static int ksz8_add_sta_mac(struct ksz_device *dev, int port, ++ const unsigned char *addr, u16 vid) + { + struct alu_struct alu; + int index; +@@ -937,8 +937,8 @@ int ksz8_mdb_add(struct ksz_device *dev, int port, + for (index = 0; index < dev->info->num_statics; index++) { + if (!ksz8_r_sta_mac_table(dev, index, &alu)) { + /* Found one already in static MAC table. */ +- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && +- alu.fid == mdb->vid) ++ if (!memcmp(alu.mac, addr, ETH_ALEN) && ++ alu.fid == vid) + break; + /* Remember the first empty entry. */ + } else if (!empty) { +@@ -954,23 +954,23 @@ int ksz8_mdb_add(struct ksz_device *dev, int port, + if (index == dev->info->num_statics) { + index = empty - 1; + memset(&alu, 0, sizeof(alu)); +- memcpy(alu.mac, mdb->addr, ETH_ALEN); ++ memcpy(alu.mac, addr, ETH_ALEN); + alu.is_static = true; + } + alu.port_forward |= BIT(port); +- if (mdb->vid) { ++ if (vid) { + alu.is_use_fid = true; + + /* Need a way to map VID to FID. */ +- alu.fid = mdb->vid; ++ alu.fid = vid; + } + ksz8_w_sta_mac_table(dev, index, &alu); + + return 0; + } + +-int ksz8_mdb_del(struct ksz_device *dev, int port, +- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) ++static int ksz8_del_sta_mac(struct ksz_device *dev, int port, ++ const unsigned char *addr, u16 vid) + { + struct alu_struct alu; + int index; +@@ -978,8 +978,8 @@ int ksz8_mdb_del(struct ksz_device *dev, int port, + for (index = 0; index < dev->info->num_statics; index++) { + if (!ksz8_r_sta_mac_table(dev, index, &alu)) { + /* Found one already in static MAC table. */ +- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && +- alu.fid == mdb->vid) ++ if (!memcmp(alu.mac, addr, ETH_ALEN) && ++ alu.fid == vid) + break; + } + } +@@ -998,6 +998,18 @@ int ksz8_mdb_del(struct ksz_device *dev, int port, + return 0; + } + ++int ksz8_mdb_add(struct ksz_device *dev, int port, ++ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) ++{ ++ return ksz8_add_sta_mac(dev, port, mdb->addr, mdb->vid); ++} ++ ++int ksz8_mdb_del(struct ksz_device *dev, int port, ++ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) ++{ ++ return ksz8_del_sta_mac(dev, port, mdb->addr, mdb->vid); ++} ++ + int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag, + struct netlink_ext_ack *extack) + { +-- +2.39.2 + diff --git a/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch b/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch new file mode 100644 index 00000000000..7ffbd3f1702 --- /dev/null +++ b/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch @@ -0,0 +1,154 @@ +From fe300e7a9fd658eb7004931d40d174aea1c803a0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Apr 2023 12:18:39 +0200 +Subject: net: dsa: microchip: ksz8_r_sta_mac_table(): Avoid using error code + for empty entries + +From: Oleksij Rempel + +[ Upstream commit 559901b46810e82ba5321a5e789f994b65d3bc3d ] + +Prepare for the next patch by ensuring that ksz8_r_sta_mac_table() does +not use error codes for empty entries. This change will enable better +handling of read/write errors in the upcoming patch. + +Signed-off-by: Oleksij Rempel +Reviewed-by: Vladimir Oltean +Signed-off-by: Paolo Abeni +Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz8795.c | 87 +++++++++++++++++------------ + 1 file changed, 50 insertions(+), 37 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c +index a2f67be66b97d..6639fae56da7f 100644 +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -407,7 +407,7 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr, + } + + static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, +- struct alu_struct *alu) ++ struct alu_struct *alu, bool *valid) + { + u32 data_hi, data_lo; + const u8 *shifts; +@@ -420,28 +420,32 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, + ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data); + data_hi = data >> 32; + data_lo = (u32)data; +- if (data_hi & (masks[STATIC_MAC_TABLE_VALID] | +- masks[STATIC_MAC_TABLE_OVERRIDE])) { +- alu->mac[5] = (u8)data_lo; +- alu->mac[4] = (u8)(data_lo >> 8); +- alu->mac[3] = (u8)(data_lo >> 16); +- alu->mac[2] = (u8)(data_lo >> 24); +- alu->mac[1] = (u8)data_hi; +- alu->mac[0] = (u8)(data_hi >> 8); +- alu->port_forward = +- (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >> +- shifts[STATIC_MAC_FWD_PORTS]; +- alu->is_override = +- (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0; +- data_hi >>= 1; +- alu->is_static = true; +- alu->is_use_fid = +- (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0; +- alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >> +- shifts[STATIC_MAC_FID]; ++ ++ if (!(data_hi & (masks[STATIC_MAC_TABLE_VALID] | ++ masks[STATIC_MAC_TABLE_OVERRIDE]))) { ++ *valid = false; + return 0; + } +- return -ENXIO; ++ ++ alu->mac[5] = (u8)data_lo; ++ alu->mac[4] = (u8)(data_lo >> 8); ++ alu->mac[3] = (u8)(data_lo >> 16); ++ alu->mac[2] = (u8)(data_lo >> 24); ++ alu->mac[1] = (u8)data_hi; ++ alu->mac[0] = (u8)(data_hi >> 8); ++ alu->port_forward = ++ (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >> ++ shifts[STATIC_MAC_FWD_PORTS]; ++ alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0; ++ data_hi >>= 1; ++ alu->is_static = true; ++ alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0; ++ alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >> ++ shifts[STATIC_MAC_FID]; ++ ++ *valid = true; ++ ++ return 0; + } + + void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr, +@@ -930,20 +934,25 @@ static int ksz8_add_sta_mac(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid) + { + struct alu_struct alu; +- int index; ++ int index, ret; + int empty = 0; + + alu.port_forward = 0; + for (index = 0; index < dev->info->num_statics; index++) { +- if (!ksz8_r_sta_mac_table(dev, index, &alu)) { +- /* Found one already in static MAC table. */ +- if (!memcmp(alu.mac, addr, ETH_ALEN) && +- alu.fid == vid) +- break; +- /* Remember the first empty entry. */ +- } else if (!empty) { +- empty = index + 1; ++ bool valid; ++ ++ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid); ++ if (ret) ++ return ret; ++ if (!valid) { ++ /* Remember the first empty entry. */ ++ if (!empty) ++ empty = index + 1; ++ continue; + } ++ ++ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid) ++ break; + } + + /* no available entry */ +@@ -973,15 +982,19 @@ static int ksz8_del_sta_mac(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid) + { + struct alu_struct alu; +- int index; ++ int index, ret; + + for (index = 0; index < dev->info->num_statics; index++) { +- if (!ksz8_r_sta_mac_table(dev, index, &alu)) { +- /* Found one already in static MAC table. */ +- if (!memcmp(alu.mac, addr, ETH_ALEN) && +- alu.fid == vid) +- break; +- } ++ bool valid; ++ ++ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid); ++ if (ret) ++ return ret; ++ if (!valid) ++ continue; ++ ++ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid) ++ break; + } + + /* no available entry */ +-- +2.39.2 + diff --git a/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch b/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch new file mode 100644 index 00000000000..a4b0da3e2df --- /dev/null +++ b/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch @@ -0,0 +1,82 @@ +From d4038c95e83f7d2c42f76634c0bd1e407d38b652 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 00:20:35 +0800 +Subject: net: ethernet: litex: add support for 64 bit stats + +From: Jisheng Zhang + +[ Upstream commit 18da174d865a87d47d2f33f5b0a322efcf067728 ] + +Implement 64 bit per cpu stats to fix the overflow of netdev->stats +on 32 bit platforms. To simplify the code, we use net core +pcpu_sw_netstats infrastructure. One small drawback is some memory +overhead because litex uses just one queue, but we allocate the +counters per cpu. + +Signed-off-by: Jisheng Zhang +Reviewed-by: Simon Horman +Acked-by: Gabriel Somlo +Link: https://lore.kernel.org/r/20230614162035.300-1-jszhang@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/litex/litex_liteeth.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c +index 35f24e0f09349..ffa96059079c6 100644 +--- a/drivers/net/ethernet/litex/litex_liteeth.c ++++ b/drivers/net/ethernet/litex/litex_liteeth.c +@@ -78,8 +78,7 @@ static int liteeth_rx(struct net_device *netdev) + memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len); + skb->protocol = eth_type_trans(skb, netdev); + +- netdev->stats.rx_packets++; +- netdev->stats.rx_bytes += len; ++ dev_sw_netstats_rx_add(netdev, len); + + return netif_rx(skb); + +@@ -185,8 +184,7 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, + litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len); + litex_write8(priv->base + LITEETH_READER_START, 1); + +- netdev->stats.tx_bytes += skb->len; +- netdev->stats.tx_packets++; ++ dev_sw_netstats_tx_add(netdev, 1, skb->len); + + priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots; + dev_kfree_skb_any(skb); +@@ -194,9 +192,17 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, + return NETDEV_TX_OK; + } + ++static void ++liteeth_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) ++{ ++ netdev_stats_to_stats64(stats, &netdev->stats); ++ dev_fetch_sw_netstats(stats, netdev->tstats); ++} ++ + static const struct net_device_ops liteeth_netdev_ops = { + .ndo_open = liteeth_open, + .ndo_stop = liteeth_stop, ++ .ndo_get_stats64 = liteeth_get_stats64, + .ndo_start_xmit = liteeth_start_xmit, + }; + +@@ -242,6 +248,11 @@ static int liteeth_probe(struct platform_device *pdev) + priv->netdev = netdev; + priv->dev = &pdev->dev; + ++ netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, ++ struct pcpu_sw_netstats); ++ if (!netdev->tstats) ++ return -ENOMEM; ++ + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; +-- +2.39.2 + diff --git a/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch b/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch new file mode 100644 index 00000000000..418095fc532 --- /dev/null +++ b/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch @@ -0,0 +1,86 @@ +From c3465911da1e9d1a7b64a1ed1f446f1ef9666ff2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 03:42:29 +0100 +Subject: net: ethernet: mtk_eth_soc: handle probe deferral + +From: Daniel Golle + +[ Upstream commit 1d6d537dc55d1f42d16290f00157ac387985b95b ] + +Move the call to of_get_ethdev_address to mtk_add_mac which is part of +the probe function and can hence itself return -EPROBE_DEFER should +of_get_ethdev_address return -EPROBE_DEFER. This allows us to entirely +get rid of the mtk_init function. + +The problem of of_get_ethdev_address returning -EPROBE_DEFER surfaced +in situations in which the NVMEM provider holding the MAC address has +not yet be loaded at the time mtk_eth_soc is initially probed. In this +case probing of mtk_eth_soc should be deferred instead of falling back +to use a random MAC address, so once the NVMEM provider becomes +available probing can be repeated. + +Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet") +Signed-off-by: Daniel Golle +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 ++++++++------------- + 1 file changed, 11 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 49975924e2426..7e318133423a9 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -3425,23 +3425,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth) + return 0; + } + +-static int __init mtk_init(struct net_device *dev) +-{ +- struct mtk_mac *mac = netdev_priv(dev); +- struct mtk_eth *eth = mac->hw; +- int ret; +- +- ret = of_get_ethdev_address(mac->of_node, dev); +- if (ret) { +- /* If the mac address is invalid, use random mac address */ +- eth_hw_addr_random(dev); +- dev_err(eth->dev, "generated random MAC address %pM\n", +- dev->dev_addr); +- } +- +- return 0; +-} +- + static void mtk_uninit(struct net_device *dev) + { + struct mtk_mac *mac = netdev_priv(dev); +@@ -3789,7 +3772,6 @@ static const struct ethtool_ops mtk_ethtool_ops = { + }; + + static const struct net_device_ops mtk_netdev_ops = { +- .ndo_init = mtk_init, + .ndo_uninit = mtk_uninit, + .ndo_open = mtk_open, + .ndo_stop = mtk_stop, +@@ -3845,6 +3827,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) + mac->hw = eth; + mac->of_node = np; + ++ err = of_get_ethdev_address(mac->of_node, eth->netdev[id]); ++ if (err == -EPROBE_DEFER) ++ return err; ++ ++ if (err) { ++ /* If the mac address is invalid, use random mac address */ ++ eth_hw_addr_random(eth->netdev[id]); ++ dev_err(eth->dev, "generated random MAC address %pM\n", ++ eth->netdev[id]->dev_addr); ++ } ++ + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); + mac->hwlro_ip_cnt = 0; + +-- +2.39.2 + diff --git a/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch b/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch new file mode 100644 index 00000000000..52f517cfd5f --- /dev/null +++ b/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch @@ -0,0 +1,78 @@ +From c809a11a4b6d3cfd988c7fb48576f8544d3b1d7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jul 2023 16:36:57 +0530 +Subject: net: ethernet: ti: cpsw_ale: Fix + cpsw_ale_get_field()/cpsw_ale_set_field() + +From: Tanmay Patil + +[ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ] + +CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words. +The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the +field will be strictly contained within one word. However, this is not +guaranteed to be the case and it is possible for ALE field entries to span +across up to two words at the most. + +Fix the methods to handle getting/setting fields spanning up to two words. + +Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support") +Signed-off-by: Tanmay Patil +[s-vadapalli@ti.com: rephrased commit message and added Fixes tag] +Signed-off-by: Siddharth Vadapalli +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c +index 231370e9a8017..2647c18d40d95 100644 +--- a/drivers/net/ethernet/ti/cpsw_ale.c ++++ b/drivers/net/ethernet/ti/cpsw_ale.c +@@ -106,23 +106,37 @@ struct cpsw_ale_dev_id { + + static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) + { +- int idx; ++ int idx, idx2; ++ u32 hi_val = 0; + + idx = start / 32; ++ idx2 = (start + bits - 1) / 32; ++ /* Check if bits to be fetched exceed a word */ ++ if (idx != idx2) { ++ idx2 = 2 - idx2; /* flip */ ++ hi_val = ale_entry[idx2] << ((idx2 * 32) - start); ++ } + start -= idx * 32; + idx = 2 - idx; /* flip */ +- return (ale_entry[idx] >> start) & BITMASK(bits); ++ return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits); + } + + static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, + u32 value) + { +- int idx; ++ int idx, idx2; + + value &= BITMASK(bits); +- idx = start / 32; ++ idx = start / 32; ++ idx2 = (start + bits - 1) / 32; ++ /* Check if bits to be set exceed a word */ ++ if (idx != idx2) { ++ idx2 = 2 - idx2; /* flip */ ++ ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); ++ ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); ++ } + start -= idx * 32; +- idx = 2 - idx; /* flip */ ++ idx = 2 - idx; /* flip */ + ale_entry[idx] &= ~(BITMASK(bits) << start); + ale_entry[idx] |= (value << start); + } +-- +2.39.2 + diff --git a/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch b/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch new file mode 100644 index 00000000000..1779fb5be73 --- /dev/null +++ b/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch @@ -0,0 +1,140 @@ +From c7bac058c0b91ef65d58a3020117d8bad2853616 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Jun 2023 20:33:08 +0800 +Subject: net: hns3: fix strncpy() not using dest-buf length as length issue + +From: Hao Chen + +[ Upstream commit 1cf3d5567f273a8746d1bade00633a93204f80f0 ] + +Now, strncpy() in hns3_dbg_fill_content() use src-length as copy-length, +it may result in dest-buf overflow. + +This patch is to fix intel compile warning for csky-linux-gcc (GCC) 12.1.0 +compiler. + +The warning reports as below: + +hclge_debugfs.c:92:25: warning: 'strncpy' specified bound depends on +the length of the source argument [-Wstringop-truncation] + +strncpy(pos, items[i].name, strlen(items[i].name)); + +hclge_debugfs.c:90:25: warning: 'strncpy' output truncated before +terminating nul copying as many bytes from a string as its length +[-Wstringop-truncation] + +strncpy(pos, result[i], strlen(result[i])); + +strncpy() use src-length as copy-length, it may result in +dest-buf overflow. + +So,this patch add some values check to avoid this issue. + +Signed-off-by: Hao Chen +Reported-by: kernel test robot +Closes: https://lore.kernel.org/lkml/202207170606.7WtHs9yS-lkp@intel.com/T/ +Signed-off-by: Hao Lan +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../ethernet/hisilicon/hns3/hns3_debugfs.c | 31 ++++++++++++++----- + .../hisilicon/hns3/hns3pf/hclge_debugfs.c | 29 ++++++++++++++--- + 2 files changed, 48 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +index bcccd82a2620f..f6ededec5a4fa 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +@@ -435,19 +435,36 @@ static void hns3_dbg_fill_content(char *content, u16 len, + const struct hns3_dbg_item *items, + const char **result, u16 size) + { ++#define HNS3_DBG_LINE_END_LEN 2 + char *pos = content; ++ u16 item_len; + u16 i; + ++ if (!len) { ++ return; ++ } else if (len <= HNS3_DBG_LINE_END_LEN) { ++ *pos++ = '\0'; ++ return; ++ } ++ + memset(content, ' ', len); +- for (i = 0; i < size; i++) { +- if (result) +- strncpy(pos, result[i], strlen(result[i])); +- else +- strncpy(pos, items[i].name, strlen(items[i].name)); ++ len -= HNS3_DBG_LINE_END_LEN; + +- pos += strlen(items[i].name) + items[i].interval; ++ for (i = 0; i < size; i++) { ++ item_len = strlen(items[i].name) + items[i].interval; ++ if (len < item_len) ++ break; ++ ++ if (result) { ++ if (item_len < strlen(result[i])) ++ break; ++ strscpy(pos, result[i], strlen(result[i])); ++ } else { ++ strscpy(pos, items[i].name, strlen(items[i].name)); ++ } ++ pos += item_len; ++ len -= item_len; + } +- + *pos++ = '\n'; + *pos++ = '\0'; + } +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +index 142415c84c6b2..0ebc21401b7c2 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +@@ -87,16 +87,35 @@ static void hclge_dbg_fill_content(char *content, u16 len, + const struct hclge_dbg_item *items, + const char **result, u16 size) + { ++#define HCLGE_DBG_LINE_END_LEN 2 + char *pos = content; ++ u16 item_len; + u16 i; + ++ if (!len) { ++ return; ++ } else if (len <= HCLGE_DBG_LINE_END_LEN) { ++ *pos++ = '\0'; ++ return; ++ } ++ + memset(content, ' ', len); ++ len -= HCLGE_DBG_LINE_END_LEN; ++ + for (i = 0; i < size; i++) { +- if (result) +- strncpy(pos, result[i], strlen(result[i])); +- else +- strncpy(pos, items[i].name, strlen(items[i].name)); +- pos += strlen(items[i].name) + items[i].interval; ++ item_len = strlen(items[i].name) + items[i].interval; ++ if (len < item_len) ++ break; ++ ++ if (result) { ++ if (item_len < strlen(result[i])) ++ break; ++ strscpy(pos, result[i], strlen(result[i])); ++ } else { ++ strscpy(pos, items[i].name, strlen(items[i].name)); ++ } ++ pos += item_len; ++ len -= item_len; + } + *pos++ = '\n'; + *pos++ = '\0'; +-- +2.39.2 + diff --git a/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch b/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch new file mode 100644 index 00000000000..3645eb7a502 --- /dev/null +++ b/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch @@ -0,0 +1,134 @@ +From d2d9a97443c3d363ac55a22c42cc9e677b12faa3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 May 2023 18:14:52 +0200 +Subject: net: ipv4: use consistent txhash in TIME_WAIT and SYN_RECV + +From: Antoine Tenart + +[ Upstream commit c0a8966e2bc7d31f77a7246947ebc09c1ff06066 ] + +When using IPv4/TCP, skb->hash comes from sk->sk_txhash except in +TIME_WAIT and SYN_RECV where it's not set in the reply skb from +ip_send_unicast_reply. Those packets will have a mismatched hash with +others from the same flow as their hashes will be 0. IPv6 does not have +the same issue as the hash is set from the socket txhash in those cases. + +This commits sets the hash in the reply skb from ip_send_unicast_reply, +which makes the IPv4 code behaving like IPv6. + +Signed-off-by: Antoine Tenart +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Stable-dep-of: 5e5265522a9a ("tcp: annotate data-races around tcp_rsk(req)->txhash") +Signed-off-by: Sasha Levin +--- + include/net/ip.h | 2 +- + net/ipv4/ip_output.c | 4 +++- + net/ipv4/tcp_ipv4.c | 14 +++++++++----- + 3 files changed, 13 insertions(+), 7 deletions(-) + +diff --git a/include/net/ip.h b/include/net/ip.h +index acec504c469a0..83a1a9bc3ceb1 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -282,7 +282,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, + const struct ip_options *sopt, + __be32 daddr, __be32 saddr, + const struct ip_reply_arg *arg, +- unsigned int len, u64 transmit_time); ++ unsigned int len, u64 transmit_time, u32 txhash); + + #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) + #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index 2a07588265c70..7b4ab545c06e0 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1691,7 +1691,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, + const struct ip_options *sopt, + __be32 daddr, __be32 saddr, + const struct ip_reply_arg *arg, +- unsigned int len, u64 transmit_time) ++ unsigned int len, u64 transmit_time, u32 txhash) + { + struct ip_options_data replyopts; + struct ipcm_cookie ipc; +@@ -1754,6 +1754,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, + arg->csum)); + nskb->ip_summed = CHECKSUM_NONE; + nskb->mono_delivery_time = !!transmit_time; ++ if (txhash) ++ skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); + ip_push_pending_frames(sk, &fl4); + } + out: +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index a7de5ba74e7f7..ef740983a1222 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) + u64 transmit_time = 0; + struct sock *ctl_sk; + struct net *net; ++ u32 txhash = 0; + + /* Never send a reset in response to a reset. */ + if (th->rst) +@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) + inet_twsk(sk)->tw_priority : sk->sk_priority; + transmit_time = tcp_transmit_time(sk); + xfrm_sk_clone_policy(ctl_sk, sk); ++ txhash = (sk->sk_state == TCP_TIME_WAIT) ? ++ inet_twsk(sk)->tw_txhash : sk->sk_txhash; + } else { + ctl_sk->sk_mark = 0; + ctl_sk->sk_priority = 0; +@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) + skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, + &arg, arg.iov[0].iov_len, +- transmit_time); ++ transmit_time, txhash); + + xfrm_sk_free_policy(ctl_sk); + sock_net_set(ctl_sk, &init_net); +@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk, + struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, int oif, + struct tcp_md5sig_key *key, +- int reply_flags, u8 tos) ++ int reply_flags, u8 tos, u32 txhash) + { + const struct tcphdr *th = tcp_hdr(skb); + struct { +@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk, + skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, + &arg, arg.iov[0].iov_len, +- transmit_time); ++ transmit_time, txhash); + + sock_net_set(ctl_sk, &init_net); + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); +@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) + tw->tw_bound_dev_if, + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, +- tw->tw_tos ++ tw->tw_tos, ++ tw->tw_txhash + ); + + inet_twsk_put(tw); +@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + 0, + tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, +- ip_hdr(skb)->tos); ++ ip_hdr(skb)->tos, tcp_rsk(req)->txhash); + } + + /* +-- +2.39.2 + diff --git a/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch b/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch new file mode 100644 index 00000000000..db0b541de2a --- /dev/null +++ b/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch @@ -0,0 +1,38 @@ +From 9ba17b30e66744d6805871a41ff330f6594f1806 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 17:59:19 +0800 +Subject: net: ipv4: Use kfree_sensitive instead of kfree + +From: Wang Ming + +[ Upstream commit daa751444fd9d4184270b1479d8af49aaf1a1ee6 ] + +key might contain private part of the key, so better use +kfree_sensitive to free it. + +Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP") +Signed-off-by: Wang Ming +Reviewed-by: Tariq Toukan +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/esp4.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c +index 52c8047efedbb..2d094d417ecae 100644 +--- a/net/ipv4/esp4.c ++++ b/net/ipv4/esp4.c +@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x, + err = crypto_aead_setkey(aead, key, keylen); + + free_key: +- kfree(key); ++ kfree_sensitive(key); + + error: + return err; +-- +2.39.2 + diff --git a/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch b/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch new file mode 100644 index 00000000000..21fad0bb8fb --- /dev/null +++ b/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch @@ -0,0 +1,39 @@ +From d40157f8faa30cf97d32dde6d80704d5d0898f75 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 22:45:19 +0800 +Subject: net:ipv6: check return value of pskb_trim() + +From: Yuanjun Gong + +[ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ] + +goto tx_err if an unexpected result is returned by pskb_tirm() +in ip6erspan_tunnel_xmit(). + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Yuanjun Gong +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6_gre.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 216b40ccadae0..d3fba7d8dec4e 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -977,7 +977,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + goto tx_err; + + if (skb->len > dev->mtu + dev->hard_header_len) { +- pskb_trim(skb, dev->mtu + dev->hard_header_len); ++ if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) ++ goto tx_err; + truncate = true; + } + +-- +2.39.2 + diff --git a/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch b/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch new file mode 100644 index 00000000000..45e4500a7d9 --- /dev/null +++ b/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch @@ -0,0 +1,74 @@ +From 5cd4f073ef92600361ab34604f85b132f284a528 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 03:02:31 +0300 +Subject: net: phy: prevent stale pointer dereference in phy_init() + +From: Vladimir Oltean + +[ Upstream commit 1c613beaf877c0c0d755853dc62687e2013e55c4 ] + +mdio_bus_init() and phy_driver_register() both have error paths, and if +those are ever hit, ethtool will have a stale pointer to the +phy_ethtool_phy_ops stub structure, which references memory from a +module that failed to load (phylib). + +It is probably hard to force an error in this code path even manually, +but the error teardown path of phy_init() should be the same as +phy_exit(), which is now simply not the case. + +Fixes: 55d8f053ce1b ("net: phy: Register ethtool PHY operations") +Link: https://lore.kernel.org/netdev/ZLaiJ4G6TaJYGJyU@shell.armlinux.org.uk/ +Suggested-by: Russell King (Oracle) +Signed-off-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20230720000231.1939689-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phy_device.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 7fbb0904b3c0f..82f74f96eba29 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -3252,23 +3252,30 @@ static int __init phy_init(void) + { + int rc; + ++ ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); ++ + rc = mdio_bus_init(); + if (rc) +- return rc; ++ goto err_ethtool_phy_ops; + +- ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); + features_init(); + + rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE); + if (rc) +- goto err_c45; ++ goto err_mdio_bus; + + rc = phy_driver_register(&genphy_driver, THIS_MODULE); +- if (rc) { +- phy_driver_unregister(&genphy_c45_driver); ++ if (rc) ++ goto err_c45; ++ ++ return 0; ++ + err_c45: +- mdio_bus_exit(); +- } ++ phy_driver_unregister(&genphy_c45_driver); ++err_mdio_bus: ++ mdio_bus_exit(); ++err_ethtool_phy_ops: ++ ethtool_set_ethtool_phy_ops(NULL); + + return rc; + } +-- +2.39.2 + diff --git a/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch b/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch new file mode 100644 index 00000000000..fca333f2ee6 --- /dev/null +++ b/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch @@ -0,0 +1,165 @@ +From 80ba7d3f04c1dd00e5a8cdab662fc9acf1a3b2b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 15:05:13 -0300 +Subject: net: sched: cls_bpf: Undo tcf_bind_filter in case of an error + +From: Victor Nogueira + +[ Upstream commit 26a22194927e8521e304ed75c2f38d8068d55fc7 ] + +If cls_bpf_offload errors out, we must also undo tcf_bind_filter that +was done before the error. + +Fix that by calling tcf_unbind_filter in errout_parms. + +Fixes: eadb41489fd2 ("net: cls_bpf: add support for marking filters as hardware-only") +Signed-off-by: Victor Nogueira +Acked-by: Jamal Hadi Salim +Reviewed-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_bpf.c | 99 +++++++++++++++++++++------------------------ + 1 file changed, 47 insertions(+), 52 deletions(-) + +diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c +index bc317b3eac124..0320e11eb248b 100644 +--- a/net/sched/cls_bpf.c ++++ b/net/sched/cls_bpf.c +@@ -404,56 +404,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, + return 0; + } + +-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, +- struct cls_bpf_prog *prog, unsigned long base, +- struct nlattr **tb, struct nlattr *est, u32 flags, +- struct netlink_ext_ack *extack) +-{ +- bool is_bpf, is_ebpf, have_exts = false; +- u32 gen_flags = 0; +- int ret; +- +- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; +- is_ebpf = tb[TCA_BPF_FD]; +- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) +- return -EINVAL; +- +- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags, +- extack); +- if (ret < 0) +- return ret; +- +- if (tb[TCA_BPF_FLAGS]) { +- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); +- +- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) +- return -EINVAL; +- +- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; +- } +- if (tb[TCA_BPF_FLAGS_GEN]) { +- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); +- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || +- !tc_flags_valid(gen_flags)) +- return -EINVAL; +- } +- +- prog->exts_integrated = have_exts; +- prog->gen_flags = gen_flags; +- +- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : +- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); +- if (ret < 0) +- return ret; +- +- if (tb[TCA_BPF_CLASSID]) { +- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); +- tcf_bind_filter(tp, &prog->res, base); +- } +- +- return 0; +-} +- + static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, +@@ -461,9 +411,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, + struct netlink_ext_ack *extack) + { + struct cls_bpf_head *head = rtnl_dereference(tp->root); ++ bool is_bpf, is_ebpf, have_exts = false; + struct cls_bpf_prog *oldprog = *arg; + struct nlattr *tb[TCA_BPF_MAX + 1]; ++ bool bound_to_filter = false; + struct cls_bpf_prog *prog; ++ u32 gen_flags = 0; + int ret; + + if (tca[TCA_OPTIONS] == NULL) +@@ -502,11 +455,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, + goto errout; + prog->handle = handle; + +- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags, +- extack); ++ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; ++ is_ebpf = tb[TCA_BPF_FD]; ++ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { ++ ret = -EINVAL; ++ goto errout_idr; ++ } ++ ++ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts, ++ flags, extack); ++ if (ret < 0) ++ goto errout_idr; ++ ++ if (tb[TCA_BPF_FLAGS]) { ++ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); ++ ++ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { ++ ret = -EINVAL; ++ goto errout_idr; ++ } ++ ++ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; ++ } ++ if (tb[TCA_BPF_FLAGS_GEN]) { ++ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); ++ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || ++ !tc_flags_valid(gen_flags)) { ++ ret = -EINVAL; ++ goto errout_idr; ++ } ++ } ++ ++ prog->exts_integrated = have_exts; ++ prog->gen_flags = gen_flags; ++ ++ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : ++ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); + if (ret < 0) + goto errout_idr; + ++ if (tb[TCA_BPF_CLASSID]) { ++ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); ++ tcf_bind_filter(tp, &prog->res, base); ++ bound_to_filter = true; ++ } ++ + ret = cls_bpf_offload(tp, prog, oldprog, extack); + if (ret) + goto errout_parms; +@@ -528,6 +521,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, + return 0; + + errout_parms: ++ if (bound_to_filter) ++ tcf_unbind_filter(tp, &prog->res); + cls_bpf_free_parms(prog); + errout_idr: + if (!oldprog) +-- +2.39.2 + diff --git a/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch b/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch new file mode 100644 index 00000000000..892c64519e3 --- /dev/null +++ b/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch @@ -0,0 +1,98 @@ +From df17b2737c98c54588b1108cd709109a4a053d7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 15:05:10 -0300 +Subject: net: sched: cls_matchall: Undo tcf_bind_filter in case of failure + after mall_set_parms + +From: Victor Nogueira + +[ Upstream commit b3d0e0489430735e2e7626aa37e6462cdd136e9d ] + +In case an error occurred after mall_set_parms executed successfully, we +must undo the tcf_bind_filter call it issues. + +Fix that by calling tcf_unbind_filter in err_replace_hw_filter label. + +Fixes: ec2507d2a306 ("net/sched: cls_matchall: Fix error path") +Signed-off-by: Victor Nogueira +Acked-by: Jamal Hadi Salim +Reviewed-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_matchall.c | 35 ++++++++++++----------------------- + 1 file changed, 12 insertions(+), 23 deletions(-) + +diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c +index 39a5d9c170def..43f8df5847414 100644 +--- a/net/sched/cls_matchall.c ++++ b/net/sched/cls_matchall.c +@@ -157,26 +157,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, + }; + +-static int mall_set_parms(struct net *net, struct tcf_proto *tp, +- struct cls_mall_head *head, +- unsigned long base, struct nlattr **tb, +- struct nlattr *est, u32 flags, u32 fl_flags, +- struct netlink_ext_ack *extack) +-{ +- int err; +- +- err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags, +- fl_flags, extack); +- if (err < 0) +- return err; +- +- if (tb[TCA_MATCHALL_CLASSID]) { +- head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); +- tcf_bind_filter(tp, &head->res, base); +- } +- return 0; +-} +- + static int mall_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, +@@ -185,6 +165,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + { + struct cls_mall_head *head = rtnl_dereference(tp->root); + struct nlattr *tb[TCA_MATCHALL_MAX + 1]; ++ bool bound_to_filter = false; + struct cls_mall_head *new; + u32 userflags = 0; + int err; +@@ -224,11 +205,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + goto err_alloc_percpu; + } + +- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], +- flags, new->flags, extack); +- if (err) ++ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE], ++ &new->exts, flags, new->flags, extack); ++ if (err < 0) + goto err_set_parms; + ++ if (tb[TCA_MATCHALL_CLASSID]) { ++ new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); ++ tcf_bind_filter(tp, &new->res, base); ++ bound_to_filter = true; ++ } ++ + if (!tc_skip_hw(new->flags)) { + err = mall_replace_hw_filter(tp, new, (unsigned long)new, + extack); +@@ -244,6 +231,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, + return 0; + + err_replace_hw_filter: ++ if (bound_to_filter) ++ tcf_unbind_filter(tp, &new->res); + err_set_parms: + free_percpu(new->pf); + err_alloc_percpu: +-- +2.39.2 + diff --git a/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch b/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch new file mode 100644 index 00000000000..644fb9b107b --- /dev/null +++ b/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch @@ -0,0 +1,49 @@ +From 2565a1a811821f66ba1cd9a3bb9496fbecdc80e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 15:05:12 -0300 +Subject: net: sched: cls_u32: Undo refcount decrement in case update failed + +From: Victor Nogueira + +[ Upstream commit e8d3d78c19be0264a5692bed477c303523aead31 ] + +In the case of an update, when TCA_U32_LINK is set, u32_set_parms will +decrement the refcount of the ht_down (struct tc_u_hnode) pointer +present in the older u32 filter which we are replacing. However, if +u32_replace_hw_knode errors out, the update command fails and that +ht_down pointer continues decremented. To fix that, when +u32_replace_hw_knode fails, check if ht_down's refcount was decremented +and undo the decrement. + +Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.") +Signed-off-by: Victor Nogueira +Acked-by: Jamal Hadi Salim +Reviewed-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index 7cfbcd5180841..1280736a7b92e 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -926,6 +926,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + if (err) { + u32_unbind_filter(tp, new, tb); + ++ if (tb[TCA_U32_LINK]) { ++ struct tc_u_hnode *ht_old; ++ ++ ht_old = rtnl_dereference(n->ht_down); ++ if (ht_old) ++ ht_old->refcnt++; ++ } + __u32_destroy_key(new); + return err; + } +-- +2.39.2 + diff --git a/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch b/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch new file mode 100644 index 00000000000..b118e643cf0 --- /dev/null +++ b/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch @@ -0,0 +1,122 @@ +From 66d4c485e832ee7c6d50709763bfdf4c14e821d0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 15:05:11 -0300 +Subject: net: sched: cls_u32: Undo tcf_bind_filter if u32_replace_hw_knode + +From: Victor Nogueira + +[ Upstream commit 9cb36faedeafb9720ac236aeae2ea57091d90a09 ] + +When u32_replace_hw_knode fails, we need to undo the tcf_bind_filter +operation done at u32_set_parms. + +Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.") +Signed-off-by: Victor Nogueira +Acked-by: Jamal Hadi Salim +Reviewed-by: Pedro Tammela +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 41 ++++++++++++++++++++++++++++++----------- + 1 file changed, 30 insertions(+), 11 deletions(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index a3477537c102b..7cfbcd5180841 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -710,8 +710,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { + [TCA_U32_FLAGS] = { .type = NLA_U32 }, + }; + ++static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n, ++ struct nlattr **tb) ++{ ++ if (tb[TCA_U32_CLASSID]) ++ tcf_unbind_filter(tp, &n->res); ++} ++ ++static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n, ++ unsigned long base, struct nlattr **tb) ++{ ++ if (tb[TCA_U32_CLASSID]) { ++ n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); ++ tcf_bind_filter(tp, &n->res, base); ++ } ++} ++ + static int u32_set_parms(struct net *net, struct tcf_proto *tp, +- unsigned long base, + struct tc_u_knode *n, struct nlattr **tb, + struct nlattr *est, u32 flags, u32 fl_flags, + struct netlink_ext_ack *extack) +@@ -758,10 +773,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, + if (ht_old) + ht_old->refcnt--; + } +- if (tb[TCA_U32_CLASSID]) { +- n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); +- tcf_bind_filter(tp, &n->res, base); +- } + + if (ifindex >= 0) + n->ifindex = ifindex; +@@ -901,17 +912,20 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + if (!new) + return -ENOMEM; + +- err = u32_set_parms(net, tp, base, new, tb, +- tca[TCA_RATE], flags, new->flags, +- extack); ++ err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE], ++ flags, new->flags, extack); + + if (err) { + __u32_destroy_key(new); + return err; + } + ++ u32_bind_filter(tp, new, base, tb); ++ + err = u32_replace_hw_knode(tp, new, flags, extack); + if (err) { ++ u32_unbind_filter(tp, new, tb); ++ + __u32_destroy_key(new); + return err; + } +@@ -1072,15 +1086,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + } + #endif + +- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ++ err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE], + flags, n->flags, extack); ++ ++ u32_bind_filter(tp, n, base, tb); ++ + if (err == 0) { + struct tc_u_knode __rcu **ins; + struct tc_u_knode *pins; + + err = u32_replace_hw_knode(tp, n, flags, extack); + if (err) +- goto errhw; ++ goto errunbind; + + if (!tc_in_hw(n->flags)) + n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; +@@ -1098,7 +1115,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + return 0; + } + +-errhw: ++errunbind: ++ u32_unbind_filter(tp, n, tb); ++ + #ifdef CONFIG_CLS_U32_MARK + free_percpu(n->pcpu_success); + #endif +-- +2.39.2 + diff --git a/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch b/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch new file mode 100644 index 00000000000..e9e644e643b --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch @@ -0,0 +1,64 @@ +From 93023625146793635d96beb87c81594cb326e47c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jul 2023 01:30:33 +0200 +Subject: netfilter: nf_tables: can't schedule in nft_chain_validate + +From: Florian Westphal + +[ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ] + +Can be called via nft set element list iteration, which may acquire +rcu and/or bh read lock (depends on set type). + +BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353 +in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft +preempt_count: 0, expected: 0 +RCU nest depth: 1, expected: 0 +2 locks held by nft/1232: + #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid + #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire +Call Trace: + nft_chain_validate + nft_lookup_validate_setelem + nft_pipapo_walk + nft_lookup_validate + nft_chain_validate + nft_immediate_validate + nft_chain_validate + nf_tables_validate + nf_tables_abort + +No choice but to move it to nf_tables_validate(). + +Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 58f14e4ef63d4..0bb1cc7ed5e99 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3500,8 +3500,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) + if (err < 0) + return err; + } +- +- cond_resched(); + } + + return 0; +@@ -3525,6 +3523,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) + err = nft_chain_validate(&ctx, chain); + if (err < 0) + return err; ++ ++ cond_resched(); + } + + return 0; +-- +2.39.2 + diff --git a/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch b/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch new file mode 100644 index 00000000000..d9dbd340acc --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch @@ -0,0 +1,49 @@ +From 447b7e2bbc060e4f8293f9e084a379b95e8bf78b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 00:29:58 +0200 +Subject: netfilter: nf_tables: fix spurious set element insertion failure + +From: Florian Westphal + +[ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ] + +On some platforms there is a padding hole in the nft_verdict +structure, between the verdict code and the chain pointer. + +On element insertion, if the new element clashes with an existing one and +NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as +the data associated with duplicated element is the same as the existing +one. The data equality check uses memcmp. + +For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT +padding area leads to spurious failure even if the verdict data is the +same. + +This then makes the insertion fail with 'already exists' error, even +though the new "key : data" matches an existing entry and userspace +told the kernel that it doesn't want to receive an error indication. + +Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 938cfa9a3adb6..58f14e4ef63d4 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -10114,6 +10114,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; ++ ++ /* zero padding hole for memcmp */ ++ memset(data, 0, sizeof(*data)); + data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict.code) { +-- +2.39.2 + diff --git a/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch b/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch new file mode 100644 index 00000000000..240214ec93d --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch @@ -0,0 +1,37 @@ +From 2de006dd895fa8e0d71406e0293e4e0caa40e552 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 20:19:43 +0200 +Subject: netfilter: nf_tables: skip bound chain in netns release path + +From: Pablo Neira Ayuso + +[ Upstream commit 751d460ccff3137212f47d876221534bf0490996 ] + +Skip bound chain from netns release path, the rule that owns this chain +releases these objects. + +Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 0bb1cc7ed5e99..f621c5e48747b 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -10398,6 +10398,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { ++ if (nft_chain_is_bound(chain)) ++ continue; ++ + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); +-- +2.39.2 + diff --git a/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch b/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch new file mode 100644 index 00000000000..9aff1bc6b86 --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch @@ -0,0 +1,43 @@ +From 00af5d0ed7436d8d334b78b70165969fd0c0dde3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 09:17:21 +0200 +Subject: netfilter: nf_tables: skip bound chain on rule flush + +From: Pablo Neira Ayuso + +[ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ] + +Skip bound chain when flushing table rules, the rule that owns this +chain releases these objects. + +Otherwise, the following warning is triggered: + + WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] + CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1 + RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] + +Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") +Reported-by: Kevin Rich +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index f621c5e48747b..ecde497368ec4 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3892,6 +3892,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; ++ if (nft_chain_is_bound(chain)) ++ continue; + + ctx.chain = chain; + err = nft_delrule_by_chain(&ctx); +-- +2.39.2 + diff --git a/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch b/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch new file mode 100644 index 00000000000..91dcec1dda0 --- /dev/null +++ b/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch @@ -0,0 +1,63 @@ +From 83c0d8d2e1df2dea06f0b2bf34a73af311411a76 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:08:21 +0200 +Subject: netfilter: nft_set_pipapo: fix improper element removal + +From: Florian Westphal + +[ Upstream commit 87b5a5c209405cb6b57424cdfa226a6dbd349232 ] + +end key should be equal to start unless NFT_SET_EXT_KEY_END is present. + +Its possible to add elements that only have a start key +("{ 1.0.0.0 . 2.0.0.0 }") without an internval end. + +Insertion treats this via: + +if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) + end = (const u8 *)nft_set_ext_key_end(ext)->data; +else + end = start; + +but removal side always uses nft_set_ext_key_end(). +This is wrong and leads to garbage remaining in the set after removal +next lookup/insert attempt will give: + +BUG: KASAN: slab-use-after-free in pipapo_get+0x8eb/0xb90 +Read of size 1 at addr ffff888100d50586 by task nft-pipapo_uaf_/1399 +Call Trace: + kasan_report+0x105/0x140 + pipapo_get+0x8eb/0xb90 + nft_pipapo_insert+0x1dc/0x1710 + nf_tables_newsetelem+0x31f5/0x4e00 + .. + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Reported-by: lonial con +Reviewed-by: Stefano Brivio +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_set_pipapo.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 0452ee586c1cc..a81829c10feab 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -1930,7 +1930,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, + int i, start, rules_fx; + + match_start = data; +- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data; ++ ++ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END)) ++ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data; ++ else ++ match_end = data; + + start = first_rule; + rules_fx = rules_f0; +-- +2.39.2 + diff --git a/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch b/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch new file mode 100644 index 00000000000..27c97b9ed07 --- /dev/null +++ b/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch @@ -0,0 +1,43 @@ +From b8bfbeb43ba95b6189f76448167e05a0545f9706 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 16 Jul 2023 15:07:41 +0530 +Subject: octeontx2-pf: Dont allocate BPIDs for LBK interfaces + +From: Geetha sowjanya + +[ Upstream commit 8fcd7c7b3a38ab5e452f542fda8f7940e77e479a ] + +Current driver enables backpressure for LBK interfaces. +But these interfaces do not support this feature. +Hence, this patch fixes the issue by skipping the +backpressure configuration for these interfaces. + +Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool"). +Signed-off-by: Geetha sowjanya +Signed-off-by: Sunil Goutham +Link: https://lore.kernel.org/r/20230716093741.28063-1-gakula@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +index ed911d9946277..c236dba80ff1a 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +@@ -1452,8 +1452,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) + if (err) + goto err_free_npa_lf; + +- /* Enable backpressure */ +- otx2_nix_config_bp(pf, true); ++ /* Enable backpressure for CGX mapped PF/VFs */ ++ if (!is_otx2_lbkvf(pf->pdev)) ++ otx2_nix_config_bp(pf, true); + + /* Init Auras and pools used by NIX RQ, for free buffer ptrs */ + err = otx2_rq_aura_pool_init(pf); +-- +2.39.2 + diff --git a/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch b/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch new file mode 100644 index 00000000000..430fc7ff309 --- /dev/null +++ b/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch @@ -0,0 +1,63 @@ +From 2c90078841a0854ee8bf4c7fa749f54fbd044f83 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Tue, 13 Jun 2023 10:13:37 +0200 +Subject: [PATCH AUTOSEL 4.19 06/11] ovl: check type and offset of struct + vfsmount in ovl_entry +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit f723edb8a532cd26e1ff0a2b271d73762d48f762 ] + +Porting overlayfs to the new amount api I started experiencing random +crashes that couldn't be explained easily. So after much debugging and +reasoning it became clear that struct ovl_entry requires the point to +struct vfsmount to be the first member and of type struct vfsmount. + +During the port I added a new member at the beginning of struct +ovl_entry which broke all over the place in the form of random crashes +and cache corruptions. While there's a comment in ovl_free_fs() to the +effect of "Hack! Reuse ofs->layers as a vfsmount array before freeing +it" there's no such comment on struct ovl_entry which makes this easy to +trip over. + +Add a comment and two static asserts for both the offset and the type of +pointer in struct ovl_entry. + +Signed-off-by: Christian Brauner +Signed-off-by: Amir Goldstein +Signed-off-by: Sasha Levin +--- + fs/overlayfs/ovl_entry.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h +index 1a1adc697c553..3d34ce992f0d8 100644 +--- a/fs/overlayfs/ovl_entry.h ++++ b/fs/overlayfs/ovl_entry.h +@@ -28,6 +28,7 @@ struct ovl_sb { + }; + + struct ovl_layer { ++ /* ovl_free_fs() relies on @mnt being the first member! */ + struct vfsmount *mnt; + /* Trap in ovl inode cache */ + struct inode *trap; +@@ -38,6 +39,14 @@ struct ovl_layer { + int fsid; + }; + ++/* ++ * ovl_free_fs() relies on @mnt being the first member when unmounting ++ * the private mounts created for each layer. Let's check both the ++ * offset and type. ++ */ ++static_assert(offsetof(struct ovl_layer, mnt) == 0); ++static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *)); ++ + struct ovl_path { + struct ovl_layer *layer; + struct dentry *dentry; +-- +2.39.2 + diff --git a/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch b/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch new file mode 100644 index 00000000000..985a8b231b1 --- /dev/null +++ b/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch @@ -0,0 +1,94 @@ +From 680f36a4f5e7d831b67c91dafe4f6c7797e53475 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 16:45:46 +0100 +Subject: perf build: Fix library not found error when using CSLIBS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: James Clark + +[ Upstream commit 1feece2780ac2f8de45177fe53979726cee4b3d1 ] + +-L only specifies the search path for libraries directly provided in the +link line with -l. Because -lopencsd isn't specified, it's only linked +because it's a dependency of -lopencsd_c_api. Dependencies like this are +resolved using the default system search paths or -rpath-link=... rather +than -L. This means that compilation only works if OpenCSD is installed +to the system rather than provided with the CSLIBS (-L) option. + +This could be fixed by adding -Wl,-rpath-link=$(CSLIBS) but that is less +conventional than just adding -lopencsd to the link line so that it uses +-L. -lopencsd seems to have been removed in commit ed17b1914978eddb +("perf tools: Drop requirement for libstdc++.so for libopencsd check") +because it was thought that there was a chance compilation would work +even if it didn't exist, but I think that only applies to libstdc++ so +there is no harm to add it back. libopencsd.so and libopencsd_c_api.so +would always exist together. + +Testing +======= + +The following scenarios now all work: + + * Cross build with OpenCSD installed + * Cross build using CSLIBS=... + * Native build with OpenCSD installed + * Native build using CSLIBS=... + * Static cross build with OpenCSD installed + * Static cross build with CSLIBS=... + +Committer testing: + + ⬢[acme@toolbox perf-tools]$ alias m + alias m='make -k BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools -C tools/perf install-bin && git status && perf test python ; perf record -o /dev/null sleep 0.01 ; perf stat --null sleep 0.01' + ⬢[acme@toolbox perf-tools]$ ldd ~/bin/perf | grep csd + libopencsd_c_api.so.1 => /lib64/libopencsd_c_api.so.1 (0x00007fd49c44e000) + libopencsd.so.1 => /lib64/libopencsd.so.1 (0x00007fd49bd56000) + ⬢[acme@toolbox perf-tools]$ cat /etc/redhat-release + Fedora release 36 (Thirty Six) + ⬢[acme@toolbox perf-tools]$ + +Fixes: ed17b1914978eddb ("perf tools: Drop requirement for libstdc++.so for libopencsd check") +Reported-by: Radhey Shyam Pandey +Signed-off-by: James Clark +Tested-by: Arnaldo Carvalho de Melo +Tested-by: Radhey Shyam Pandey +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Uwe Kleine-König +Cc: coresight@lists.linaro.org +Closes: https://lore.kernel.org/linux-arm-kernel/56905d7a-a91e-883a-b707-9d5f686ba5f1@arm.com/ +Link: https://lore.kernel.org/all/36cc4dc6-bf4b-1093-1c0a-876e368af183@kleine-koenig.org/ +Link: https://lore.kernel.org/r/20230707154546.456720-1-james.clark@arm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/Makefile.config | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config +index 898226ea8cadc..fac6ba07eacdb 100644 +--- a/tools/perf/Makefile.config ++++ b/tools/perf/Makefile.config +@@ -149,9 +149,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto + ifdef CSINCLUDES + LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) + endif +-OPENCSDLIBS := -lopencsd_c_api ++OPENCSDLIBS := -lopencsd_c_api -lopencsd + ifeq ($(findstring -static,${LDFLAGS}),-static) +- OPENCSDLIBS += -lopencsd -lstdc++ ++ OPENCSDLIBS += -lstdc++ + endif + ifdef CSLIBS + LIBOPENCSD_LDFLAGS := -L$(CSLIBS) +-- +2.39.2 + diff --git a/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch b/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch new file mode 100644 index 00000000000..51b77397bbf --- /dev/null +++ b/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch @@ -0,0 +1,118 @@ +From 726cf612acdfe280e96ebb1977b1ec50b8c6ec28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Jul 2023 12:18:58 +0100 +Subject: pinctrl: renesas: rzg2l: Handle non-unique subnode names + +From: Biju Das + +[ Upstream commit bfc374a145ae133613e05b9b89be561f169cb58d ] + +Currently, sd1 and sd0 have unique subnode names 'sd1_mux' and 'sd0_mux'. +If we change these to non-unique subnode names such as 'mux' this can +lead to the below conflict as the RZ/G2L pin control driver considers +only the names of the subnodes. + + pinctrl-rzg2l 11030000.pinctrl: pin P47_0 already requested by 11c00000.mmc; cannot claim for 11c10000.mmc + pinctrl-rzg2l 11030000.pinctrl: pin-376 (11c10000.mmc) status -22 + pinctrl-rzg2l 11030000.pinctrl: could not request pin 376 (P47_0) from group mux on device pinctrl-rzg2l + renesas_sdhi_internal_dmac 11c10000.mmc: Error applying setting, reverse things back + +Fix this by constructing unique names from the node names of both the +pin control configuration node and its child node, where appropriate. + +Based on the work done by Geert for the RZ/V2M pinctrl driver. + +Fixes: c4c4637eb57f ("pinctrl: renesas: Add RZ/G2L pin and gpio controller driver") +Signed-off-by: Biju Das +Reviewed-by: Geert Uytterhoeven +Link: https://lore.kernel.org/r/20230704111858.215278-1-biju.das.jz@bp.renesas.com +Signed-off-by: Geert Uytterhoeven +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/renesas/pinctrl-rzg2l.c | 28 ++++++++++++++++++------- + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c +index ca6303fc41f98..fd11d28e5a1e4 100644 +--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c ++++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c +@@ -246,6 +246,7 @@ static int rzg2l_map_add_config(struct pinctrl_map *map, + + static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, + struct device_node *np, ++ struct device_node *parent, + struct pinctrl_map **map, + unsigned int *num_maps, + unsigned int *index) +@@ -263,6 +264,7 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, + struct property *prop; + int ret, gsel, fsel; + const char **pin_fn; ++ const char *name; + const char *pin; + + pinmux = of_find_property(np, "pinmux", NULL); +@@ -346,8 +348,19 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, + psel_val[i] = MUX_FUNC(value); + } + ++ if (parent) { ++ name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn", ++ parent, np); ++ if (!name) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ } else { ++ name = np->name; ++ } ++ + /* Register a single pin group listing all the pins we read from DT */ +- gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL); ++ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL); + if (gsel < 0) { + ret = gsel; + goto done; +@@ -357,17 +370,16 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, + * Register a single group function where the 'data' is an array PSEL + * register values read from DT. + */ +- pin_fn[0] = np->name; +- fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1, +- psel_val); ++ pin_fn[0] = name; ++ fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val); + if (fsel < 0) { + ret = fsel; + goto remove_group; + } + + maps[idx].type = PIN_MAP_TYPE_MUX_GROUP; +- maps[idx].data.mux.group = np->name; +- maps[idx].data.mux.function = np->name; ++ maps[idx].data.mux.group = name; ++ maps[idx].data.mux.function = name; + idx++; + + dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux); +@@ -414,7 +426,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev, + index = 0; + + for_each_child_of_node(np, child) { +- ret = rzg2l_dt_subnode_to_map(pctldev, child, map, ++ ret = rzg2l_dt_subnode_to_map(pctldev, child, np, map, + num_maps, &index); + if (ret < 0) { + of_node_put(child); +@@ -423,7 +435,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev, + } + + if (*num_maps == 0) { +- ret = rzg2l_dt_subnode_to_map(pctldev, np, map, ++ ret = rzg2l_dt_subnode_to_map(pctldev, np, NULL, map, + num_maps, &index); + if (ret < 0) + goto done; +-- +2.39.2 + diff --git a/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch b/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch new file mode 100644 index 00000000000..b84aa528fc0 --- /dev/null +++ b/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch @@ -0,0 +1,116 @@ +From 825d0cfe089333f10e47c7657c16035ce33865d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jul 2023 17:07:06 +0200 +Subject: pinctrl: renesas: rzv2m: Handle non-unique subnode names + +From: Geert Uytterhoeven + +[ Upstream commit f46a0b47cc0829acd050213194c5a77351e619b2 ] + +The eMMC and SDHI pin control configuration nodes in DT have subnodes +with the same names ("data" and "ctrl"). As the RZ/V2M pin control +driver considers only the names of the subnodes, this leads to +conflicts: + + pinctrl-rzv2m b6250000.pinctrl: pin P8_2 already requested by 85000000.mmc; cannot claim for 85020000.mmc + pinctrl-rzv2m b6250000.pinctrl: pin-130 (85020000.mmc) status -22 + renesas_sdhi_internal_dmac 85020000.mmc: Error applying setting, reverse things back + +Fix this by constructing unique names from the node names of both the +pin control configuration node and its child node, where appropriate. + +Reported by: Fabrizio Castro + +Fixes: 92a9b825257614af ("pinctrl: renesas: Add RZ/V2M pin and gpio controller driver") +Signed-off-by: Geert Uytterhoeven +Tested-by: Fabrizio Castro +Link: https://lore.kernel.org/r/607bd6ab4905b0b1b119a06ef953fa1184505777.1688396717.git.geert+renesas@glider.be +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/renesas/pinctrl-rzv2m.c | 28 ++++++++++++++++++------- + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c +index e8c18198bebd2..35f382b055e83 100644 +--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c ++++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c +@@ -207,6 +207,7 @@ static int rzv2m_map_add_config(struct pinctrl_map *map, + + static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, + struct device_node *np, ++ struct device_node *parent, + struct pinctrl_map **map, + unsigned int *num_maps, + unsigned int *index) +@@ -224,6 +225,7 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, + struct property *prop; + int ret, gsel, fsel; + const char **pin_fn; ++ const char *name; + const char *pin; + + pinmux = of_find_property(np, "pinmux", NULL); +@@ -307,8 +309,19 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, + psel_val[i] = MUX_FUNC(value); + } + ++ if (parent) { ++ name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn", ++ parent, np); ++ if (!name) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ } else { ++ name = np->name; ++ } ++ + /* Register a single pin group listing all the pins we read from DT */ +- gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL); ++ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL); + if (gsel < 0) { + ret = gsel; + goto done; +@@ -318,17 +331,16 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, + * Register a single group function where the 'data' is an array PSEL + * register values read from DT. + */ +- pin_fn[0] = np->name; +- fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1, +- psel_val); ++ pin_fn[0] = name; ++ fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val); + if (fsel < 0) { + ret = fsel; + goto remove_group; + } + + maps[idx].type = PIN_MAP_TYPE_MUX_GROUP; +- maps[idx].data.mux.group = np->name; +- maps[idx].data.mux.function = np->name; ++ maps[idx].data.mux.group = name; ++ maps[idx].data.mux.function = name; + idx++; + + dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux); +@@ -375,7 +387,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev, + index = 0; + + for_each_child_of_node(np, child) { +- ret = rzv2m_dt_subnode_to_map(pctldev, child, map, ++ ret = rzv2m_dt_subnode_to_map(pctldev, child, np, map, + num_maps, &index); + if (ret < 0) { + of_node_put(child); +@@ -384,7 +396,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev, + } + + if (*num_maps == 0) { +- ret = rzv2m_dt_subnode_to_map(pctldev, np, map, ++ ret = rzv2m_dt_subnode_to_map(pctldev, np, NULL, map, + num_maps, &index); + if (ret < 0) + goto done; +-- +2.39.2 + diff --git a/queue-6.1/quota-fix-warning-in-dqgrab.patch b/queue-6.1/quota-fix-warning-in-dqgrab.patch new file mode 100644 index 00000000000..033ffc0a758 --- /dev/null +++ b/queue-6.1/quota-fix-warning-in-dqgrab.patch @@ -0,0 +1,105 @@ +From 1da38321c1da0aea4122e574000e2a97ee3d2378 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Mon, 5 Jun 2023 22:07:31 +0800 +Subject: [PATCH AUTOSEL 4.19 04/11] quota: fix warning in dqgrab() +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit d6a95db3c7ad160bc16b89e36449705309b52bcb ] + +There's issue as follows when do fault injection: +WARNING: CPU: 1 PID: 14870 at include/linux/quotaops.h:51 dquot_disable+0x13b7/0x18c0 +Modules linked in: +CPU: 1 PID: 14870 Comm: fsconfig Not tainted 6.3.0-next-20230505-00006-g5107a9c821af-dirty #541 +RIP: 0010:dquot_disable+0x13b7/0x18c0 +RSP: 0018:ffffc9000acc79e0 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88825e41b980 +RDX: 0000000000000000 RSI: ffff88825e41b980 RDI: 0000000000000002 +RBP: ffff888179f68000 R08: ffffffff82087ca7 R09: 0000000000000000 +R10: 0000000000000001 R11: ffffed102f3ed026 R12: ffff888179f68130 +R13: ffff888179f68110 R14: dffffc0000000000 R15: ffff888179f68118 +FS: 00007f450a073740(0000) GS:ffff88882fc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007ffe96f2efd8 CR3: 000000025c8ad000 CR4: 00000000000006e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + dquot_load_quota_sb+0xd53/0x1060 + dquot_resume+0x172/0x230 + ext4_reconfigure+0x1dc6/0x27b0 + reconfigure_super+0x515/0xa90 + __x64_sys_fsconfig+0xb19/0xd20 + do_syscall_64+0x39/0xb0 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Above issue may happens as follows: +ProcessA ProcessB ProcessC +sys_fsconfig + vfs_fsconfig_locked + reconfigure_super + ext4_remount + dquot_suspend -> suspend all type quota + + sys_fsconfig + vfs_fsconfig_locked + reconfigure_super + ext4_remount + dquot_resume + ret = dquot_load_quota_sb + add_dquot_ref + do_open -> open file O_RDWR + vfs_open + do_dentry_open + get_write_access + atomic_inc_unless_negative(&inode->i_writecount) + ext4_file_open + dquot_file_open + dquot_initialize + __dquot_initialize + dqget + atomic_inc(&dquot->dq_count); + + __dquot_initialize + __dquot_initialize + dqget + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + ext4_acquire_dquot + -> Return error DQ_ACTIVE_B flag isn't set + dquot_disable + invalidate_dquots + if (atomic_read(&dquot->dq_count)) + dqgrab + WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + -> Trigger warning + +In the above scenario, 'dquot->dq_flags' has no DQ_ACTIVE_B is normal when +dqgrab(). +To solve above issue just replace the dqgrab() use in invalidate_dquots() with +atomic_inc(&dquot->dq_count). + +Signed-off-by: Ye Bin +Signed-off-by: Jan Kara +Message-Id: <20230605140731.2427629-3-yebin10@huawei.com> +Signed-off-by: Sasha Levin +--- + fs/quota/dquot.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c +index 0d3ffc727bb00..303987d29b9c9 100644 +--- a/fs/quota/dquot.c ++++ b/fs/quota/dquot.c +@@ -540,7 +540,7 @@ static void invalidate_dquots(struct super_block *sb, int type) + continue; + /* Wait for dquot users */ + if (atomic_read(&dquot->dq_count)) { +- dqgrab(dquot); ++ atomic_inc(&dquot->dq_count); + spin_unlock(&dq_list_lock); + /* + * Once dqput() wakes us up, we know it's time to free +-- +2.39.2 + diff --git a/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch b/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch new file mode 100644 index 00000000000..b7e21bb5ca1 --- /dev/null +++ b/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch @@ -0,0 +1,45 @@ +From 3e9e30aa708b3b8cb0485725964206a7b72d1f9b Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 5 Jun 2023 22:07:30 +0800 +Subject: [PATCH AUTOSEL 4.19 03/11] quota: Properly disable quotas when + add_dquot_ref() fails +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 6a4e3363792e30177cc3965697e34ddcea8b900b ] + +When add_dquot_ref() fails (usually due to IO error or ENOMEM), we want +to disable quotas we are trying to enable. However dquot_disable() call +was passed just the flags we are enabling so in case flags == +DQUOT_USAGE_ENABLED dquot_disable() call will just fail with EINVAL +instead of properly disabling quotas. Fix the problem by always passing +DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED to dquot_disable() in this +case. + +Reported-and-tested-by: Ye Bin +Reported-by: syzbot+e633c79ceaecbf479854@syzkaller.appspotmail.com +Signed-off-by: Jan Kara +Message-Id: <20230605140731.2427629-2-yebin10@huawei.com> +Signed-off-by: Sasha Levin +--- + fs/quota/dquot.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c +index 770a2b1434856..0d3ffc727bb00 100644 +--- a/fs/quota/dquot.c ++++ b/fs/quota/dquot.c +@@ -2407,7 +2407,8 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, + + error = add_dquot_ref(sb, type); + if (error) +- dquot_disable(sb, type, flags); ++ dquot_disable(sb, type, ++ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + + return error; + out_fmt: +-- +2.39.2 + diff --git a/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch b/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch new file mode 100644 index 00000000000..7735a7471ff --- /dev/null +++ b/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch @@ -0,0 +1,76 @@ +From 4d3360fe4eb403c4add5725291d2c102bad4db73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Apr 2023 16:05:38 -0700 +Subject: rcu: Mark additional concurrent load from ->cpu_no_qs.b.exp + +From: Paul E. McKenney + +[ Upstream commit 9146eb25495ea8bfb5010192e61e3ed5805ce9ef ] + +The per-CPU rcu_data structure's ->cpu_no_qs.b.exp field is updated +only on the instance corresponding to the current CPU, but can be read +more widely. Unmarked accesses are OK from the corresponding CPU, but +only if interrupts are disabled, given that interrupt handlers can and +do modify this field. + +Unfortunately, although the load from rcu_preempt_deferred_qs() is always +carried out from the corresponding CPU, interrupts are not necessarily +disabled. This commit therefore upgrades this load to READ_ONCE. + +Similarly, the diagnostic access from synchronize_rcu_expedited_wait() +might run with interrupts disabled and from some other CPU. This commit +therefore marks this load with data_race(). + +Finally, the C-language access in rcu_preempt_ctxt_queue() is OK as +is because interrupts are disabled and this load is always from the +corresponding CPU. This commit adds a comment giving the rationale for +this access being safe. + +This data race was reported by KCSAN. Not appropriate for backporting +due to failure being unlikely. + +Signed-off-by: Paul E. McKenney +Signed-off-by: Sasha Levin +--- + kernel/rcu/tree_exp.h | 2 +- + kernel/rcu/tree_plugin.h | 4 +++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h +index e25321dbb068e..aa3ec3c3b9f75 100644 +--- a/kernel/rcu/tree_exp.h ++++ b/kernel/rcu/tree_exp.h +@@ -641,7 +641,7 @@ static void synchronize_rcu_expedited_wait(void) + "O."[!!cpu_online(cpu)], + "o."[!!(rdp->grpmask & rnp->expmaskinit)], + "N."[!!(rdp->grpmask & rnp->expmaskinitnext)], +- "D."[!!(rdp->cpu_no_qs.b.exp)]); ++ "D."[!!data_race(rdp->cpu_no_qs.b.exp)]); + } + } + pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", +diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h +index e3142ee35fc6a..044026abfdd7f 100644 +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -257,6 +257,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) + * GP should not be able to end until we report, so there should be + * no need to check for a subsequent expedited GP. (Though we are + * still in a quiescent state in any case.) ++ * ++ * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change. + */ + if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp) + rcu_report_exp_rdp(rdp); +@@ -941,7 +943,7 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t) + { + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + +- if (rdp->cpu_no_qs.b.exp) ++ if (READ_ONCE(rdp->cpu_no_qs.b.exp)) + rcu_report_exp_rdp(rdp); + } + +-- +2.39.2 + diff --git a/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch b/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch new file mode 100644 index 00000000000..a6c062917c4 --- /dev/null +++ b/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch @@ -0,0 +1,91 @@ +From aef95e1bb3b2e697dd8a92a4b03466862cd224fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Aug 2022 01:22:05 +0900 +Subject: rcu-tasks: Avoid pr_info() with spin lock in cblist_init_generic() + +From: Shigeru Yoshida + +[ Upstream commit 5fc8cbe4cf0fd34ded8045c385790c3bf04f6785 ] + +pr_info() is called with rtp->cbs_gbl_lock spin lock locked. Because +pr_info() calls printk() that might sleep, this will result in BUG +like below: + +[ 0.206455] cblist_init_generic: Setting adjustable number of callback queues. +[ 0.206463] +[ 0.206464] ============================= +[ 0.206464] [ BUG: Invalid wait context ] +[ 0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted +[ 0.206466] ----------------------------- +[ 0.206466] swapper/0/1 is trying to lock: +[ 0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0 +[ 0.206473] other info that might help us debug this: +[ 0.206473] context-{5:5} +[ 0.206474] 3 locks held by swapper/0/1: +[ 0.206474] #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0 +[ 0.206478] #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e +[ 0.206482] #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330 +[ 0.206485] stack backtrace: +[ 0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5 +[ 0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 +[ 0.206489] Call Trace: +[ 0.206490] +[ 0.206491] dump_stack_lvl+0x6a/0x9f +[ 0.206493] __lock_acquire.cold+0x2d7/0x2fe +[ 0.206496] ? stack_trace_save+0x46/0x70 +[ 0.206497] lock_acquire+0xd1/0x2f0 +[ 0.206499] ? serial8250_console_write+0x327/0x4a0 +[ 0.206500] ? __lock_acquire+0x5c7/0x2720 +[ 0.206502] _raw_spin_lock_irqsave+0x3d/0x90 +[ 0.206504] ? serial8250_console_write+0x327/0x4a0 +[ 0.206506] serial8250_console_write+0x327/0x4a0 +[ 0.206508] console_emit_next_record.constprop.0+0x180/0x330 +[ 0.206511] console_unlock+0xf7/0x1f0 +[ 0.206512] vprintk_emit+0xf7/0x330 +[ 0.206514] _printk+0x63/0x7e +[ 0.206516] cblist_init_generic.constprop.0.cold+0x24/0x32 +[ 0.206518] rcu_init_tasks_generic+0x5/0xd9 +[ 0.206522] kernel_init_freeable+0x15b/0x2a2 +[ 0.206523] ? rest_init+0x160/0x160 +[ 0.206526] kernel_init+0x11/0x120 +[ 0.206527] ret_from_fork+0x1f/0x30 +[ 0.206530] +[ 0.207018] cblist_init_generic: Setting shift to 1 and lim to 1. + +This patch moves pr_info() so that it is called without +rtp->cbs_gbl_lock locked. + +Signed-off-by: Shigeru Yoshida +Tested-by: "Zhang, Qiang1" +Signed-off-by: Paul E. McKenney +Signed-off-by: Sasha Levin +--- + kernel/rcu/tasks.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h +index df968321feada..c1f18c63b9b14 100644 +--- a/kernel/rcu/tasks.h ++++ b/kernel/rcu/tasks.h +@@ -233,7 +233,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp) + if (rcu_task_enqueue_lim < 0) { + rcu_task_enqueue_lim = 1; + rcu_task_cb_adjust = true; +- pr_info("%s: Setting adjustable number of callback queues.\n", __func__); + } else if (rcu_task_enqueue_lim == 0) { + rcu_task_enqueue_lim = 1; + } +@@ -264,6 +263,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp) + raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. + } + raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); ++ ++ if (rcu_task_cb_adjust) ++ pr_info("%s: Setting adjustable number of callback queues.\n", __func__); ++ + pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim)); + } + +-- +2.39.2 + diff --git a/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch b/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch new file mode 100644 index 00000000000..8812a74d9c6 --- /dev/null +++ b/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch @@ -0,0 +1,113 @@ +From 242c82c4047048b1d67da8284935b57fc6abaa12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 14:59:18 -0700 +Subject: Revert "tcp: avoid the lookup process failing to get sk in ehash + table" + +From: Kuniyuki Iwashima + +[ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ] + +This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043. + +Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in +ehash table") reversed the order in how a socket is inserted into ehash +to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are +swapped. However, it introduced another lookup failure. + +The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU +and does not have SOCK_RCU_FREE, so the socket could be reused even while +it is being referenced on another CPU doing RCU lookup. + +Let's say a socket is reused and inserted into the same hash bucket during +lookup. After the blamed commit, a new socket is inserted at the end of +the list. If that happens, we will skip sockets placed after the previous +position of the reused socket, resulting in ehash lookup failure. + +As described in Documentation/RCU/rculist_nulls.rst, we should insert a +new socket at the head of the list to avoid such an issue. + +This issue, the swap-lookup-failure, and another variant reported in [0] +can all be handled properly by adding a locked ehash lookup suggested by +Eric Dumazet [1]. + +However, this issue could occur for every packet, thus more likely than +the other two races, so let's revert the change for now. + +Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0] +Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1] +Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table") +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 17 ++--------------- + net/ipv4/inet_timewait_sock.c | 8 ++++---- + 2 files changed, 6 insertions(+), 19 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index e8734ffca85a8..c19b462662ad0 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + spin_lock(lock); + if (osk) { + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); +- ret = sk_hashed(osk); +- if (ret) { +- /* Before deleting the node, we insert a new one to make +- * sure that the look-up-sk process would not miss either +- * of them and that at least one node would exist in ehash +- * table all the time. Otherwise there's a tiny chance +- * that lookup process could find nothing in ehash table. +- */ +- __sk_nulls_add_node_tail_rcu(sk, list); +- sk_nulls_del_node_init_rcu(osk); +- } +- goto unlock; +- } +- if (found_dup_sk) { ++ ret = sk_nulls_del_node_init_rcu(osk); ++ } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; +@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + if (ret) + __sk_nulls_add_node_rcu(sk, list); + +-unlock: + spin_unlock(lock); + + return ret; +diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c +index beed32fff4841..1d77d992e6e77 100644 +--- a/net/ipv4/inet_timewait_sock.c ++++ b/net/ipv4/inet_timewait_sock.c +@@ -91,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) + } + EXPORT_SYMBOL_GPL(inet_twsk_put); + +-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, +- struct hlist_nulls_head *list) ++static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, ++ struct hlist_nulls_head *list) + { +- hlist_nulls_add_tail_rcu(&tw->tw_node, list); ++ hlist_nulls_add_head_rcu(&tw->tw_node, list); + } + + static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, +@@ -147,7 +147,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + + spin_lock(lock); + +- inet_twsk_add_node_tail_rcu(tw, &ehead->chain); ++ inet_twsk_add_node_rcu(tw, &ehead->chain); + + /* Step 3: Remove SK from hash chain */ + if (__sk_nulls_del_node_init_rcu(sk)) +-- +2.39.2 + diff --git a/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch b/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch new file mode 100644 index 00000000000..7ea3c58721b --- /dev/null +++ b/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch @@ -0,0 +1,96 @@ +From 8455627afba0715ac09ca4e31fd0ca55986494f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 May 2023 16:25:07 +0800 +Subject: sched/fair: Don't balance task to its current running CPU + +From: Yicong Yang + +[ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ] + +We've run into the case that the balancer tries to balance a migration +disabled task and trigger the warning in set_task_cpu() like below: + + ------------[ cut here ]------------ + WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240 + Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip> + CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G O 6.1.0-rc4+ #1 + Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021 + pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : set_task_cpu+0x188/0x240 + lr : load_balance+0x5d0/0xc60 + sp : ffff80000803bc70 + x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040 + x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001 + x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78 + x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000 + x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000 + x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000 + x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530 + x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e + x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a + x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001 + Call trace: + set_task_cpu+0x188/0x240 + load_balance+0x5d0/0xc60 + rebalance_domains+0x26c/0x380 + _nohz_idle_balance.isra.0+0x1e0/0x370 + run_rebalance_domains+0x6c/0x80 + __do_softirq+0x128/0x3d8 + ____do_softirq+0x18/0x24 + call_on_irq_stack+0x2c/0x38 + do_softirq_own_stack+0x24/0x3c + __irq_exit_rcu+0xcc/0xf4 + irq_exit_rcu+0x18/0x24 + el1_interrupt+0x4c/0xe4 + el1h_64_irq_handler+0x18/0x2c + el1h_64_irq+0x74/0x78 + arch_cpu_idle+0x18/0x4c + default_idle_call+0x58/0x194 + do_idle+0x244/0x2b0 + cpu_startup_entry+0x30/0x3c + secondary_start_kernel+0x14c/0x190 + __secondary_switched+0xb0/0xb4 + ---[ end trace 0000000000000000 ]--- + +Further investigation shows that the warning is superfluous, the migration +disabled task is just going to be migrated to its current running CPU. +This is because that on load balance if the dst_cpu is not allowed by the +task, we'll re-select a new_dst_cpu as a candidate. If no task can be +balanced to dst_cpu we'll try to balance the task to the new_dst_cpu +instead. In this case when the migration disabled task is not on CPU it +only allows to run on its current CPU, load balance will select its +current CPU as new_dst_cpu and later triggers the warning above. + +The new_dst_cpu is chosen from the env->dst_grpmask. Currently it +contains CPUs in sched_group_span() and if we have overlapped groups it's +possible to run into this case. This patch makes env->dst_grpmask of +group_balance_mask() which exclude any CPUs from the busiest group and +solve the issue. For balancing in a domain with no overlapped groups +the behaviour keeps same as before. + +Suggested-by: Vincent Guittot +Signed-off-by: Yicong Yang +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index fa33c441ae867..57d39de0962d7 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10556,7 +10556,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, + .sd = sd, + .dst_cpu = this_cpu, + .dst_rq = this_rq, +- .dst_grpmask = sched_group_span(sd->groups), ++ .dst_grpmask = group_balance_mask(sd->groups), + .idle = idle, + .loop_break = SCHED_NR_MIGRATE_BREAK, + .cpus = cpus, +-- +2.39.2 + diff --git a/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch b/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch new file mode 100644 index 00000000000..9b8cfc75250 --- /dev/null +++ b/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch @@ -0,0 +1,41 @@ +From 87c0b2894b5bff97a3b231e21a5467e96e6ba324 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 16:07:47 +0800 +Subject: sched/fair: Use recent_used_cpu to test p->cpus_ptr + +From: Miaohe Lin + +[ Upstream commit ae2ad293d6be143ad223f5f947cca07bcbe42595 ] + +When checking whether a recently used CPU can be a potential idle +candidate, recent_used_cpu should be used to test p->cpus_ptr as +p->recent_used_cpu is not equal to recent_used_cpu and candidate +decision is made based on recent_used_cpu here. + +Fixes: 89aafd67f28c ("sched/fair: Use prev instead of new target as recent_used_cpu") +Signed-off-by: Miaohe Lin +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Phil Auld +Acked-by: Mel Gorman +Link: https://lore.kernel.org/r/20230620080747.359122-1-linmiaohe@huawei.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 57d39de0962d7..5e5aea2360a87 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6935,7 +6935,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + recent_used_cpu != target && + cpus_share_cache(recent_used_cpu, target) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && +- cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && ++ cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) && + asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { + return recent_used_cpu; + } +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch b/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch new file mode 100644 index 00000000000..71bccffd238 --- /dev/null +++ b/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch @@ -0,0 +1,434 @@ +From 24ad138c2ace2a7a5bc0ceccb0055be994ccc3ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Mar 2023 12:54:18 +0200 +Subject: sched/psi: Allow unprivileged polling of N*2s period + +From: Domenico Cerasuolo + +[ Upstream commit d82caa273565b45fcf103148950549af76c314b0 ] + +PSI offers 2 mechanisms to get information about a specific resource +pressure. One is reading from /proc/pressure/, which gives +average pressures aggregated every 2s. The other is creating a pollable +fd for a specific resource and cgroup. + +The trigger creation requires CAP_SYS_RESOURCE, and gives the +possibility to pick specific time window and threshold, spawing an RT +thread to aggregate the data. + +Systemd would like to provide containers the option to monitor pressure +on their own cgroup and sub-cgroups. For example, if systemd launches a +container that itself then launches services, the container should have +the ability to poll() for pressure in individual services. But neither +the container nor the services are privileged. + +This patch implements a mechanism to allow unprivileged users to create +pressure triggers. The difference with privileged triggers creation is +that unprivileged ones must have a time window that's a multiple of 2s. +This is so that we can avoid unrestricted spawning of rt threads, and +use instead the same aggregation mechanism done for the averages, which +runs independently of any triggers. + +Suggested-by: Johannes Weiner +Signed-off-by: Domenico Cerasuolo +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Johannes Weiner +Link: https://lore.kernel.org/r/20230330105418.77061-5-cerasuolodomenico@gmail.com +Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling") +Signed-off-by: Sasha Levin +--- + Documentation/accounting/psi.rst | 4 + + include/linux/psi.h | 2 +- + include/linux/psi_types.h | 7 ++ + kernel/cgroup/cgroup.c | 2 +- + kernel/sched/psi.c | 175 +++++++++++++++++++------------ + 5 files changed, 121 insertions(+), 69 deletions(-) + +diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst +index 5e40b3f437f90..df6062eb3abbc 100644 +--- a/Documentation/accounting/psi.rst ++++ b/Documentation/accounting/psi.rst +@@ -105,6 +105,10 @@ prevent overly frequent polling. Max limit is chosen as a high enough number + after which monitors are most likely not needed and psi averages can be used + instead. + ++Unprivileged users can also create monitors, with the only limitation that the ++window size must be a multiple of 2s, in order to prevent excessive resource ++usage. ++ + When activated, psi monitor stays active for at least the duration of one + tracking window to avoid repeated activations/deactivations when system is + bouncing in and out of the stall state. +diff --git a/include/linux/psi.h b/include/linux/psi.h +index b029a847def1e..ab26200c28033 100644 +--- a/include/linux/psi.h ++++ b/include/linux/psi.h +@@ -24,7 +24,7 @@ void psi_memstall_leave(unsigned long *flags); + + int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); + struct psi_trigger *psi_trigger_create(struct psi_group *group, +- char *buf, enum psi_res res); ++ char *buf, enum psi_res res, struct file *file); + void psi_trigger_destroy(struct psi_trigger *t); + + __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, +diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h +index 1819afa8b1987..040c089581c6c 100644 +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -151,6 +151,9 @@ struct psi_trigger { + + /* Deferred event(s) from previous ratelimit window */ + bool pending_event; ++ ++ /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */ ++ enum psi_aggregators aggregator; + }; + + struct psi_group { +@@ -171,6 +174,10 @@ struct psi_group { + /* Aggregator work control */ + struct delayed_work avgs_work; + ++ /* Unprivileged triggers against N*PSI_FREQ windows */ ++ struct list_head avg_triggers; ++ u32 avg_nr_triggers[NR_PSI_STATES - 1]; ++ + /* Total stall times and sampled pressure averages */ + u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; + unsigned long avg[NR_PSI_STATES - 1][3]; +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 2380c4daef33d..c35efae566a4b 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, + } + + psi = cgroup_psi(cgrp); +- new = psi_trigger_create(psi, buf, res); ++ new = psi_trigger_create(psi, buf, res, of->file); + if (IS_ERR(new)) { + cgroup_put(cgrp); + return PTR_ERR(new); +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index f3df6a8ff493c..e072f6b31bf30 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -186,9 +186,14 @@ static void group_init(struct psi_group *group) + seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); + group->avg_last_update = sched_clock(); + group->avg_next_update = group->avg_last_update + psi_period; +- INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); + mutex_init(&group->avgs_lock); +- /* Init trigger-related members */ ++ ++ /* Init avg trigger-related members */ ++ INIT_LIST_HEAD(&group->avg_triggers); ++ memset(group->avg_nr_triggers, 0, sizeof(group->avg_nr_triggers)); ++ INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); ++ ++ /* Init rtpoll trigger-related members */ + atomic_set(&group->rtpoll_scheduled, 0); + mutex_init(&group->rtpoll_trigger_lock); + INIT_LIST_HEAD(&group->rtpoll_triggers); +@@ -430,21 +435,32 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) + return growth; + } + +-static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total) ++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total, ++ enum psi_aggregators aggregator) + { + struct psi_trigger *t; +- u64 *total = group->total[PSI_POLL]; ++ u64 *total = group->total[aggregator]; ++ struct list_head *triggers; ++ u64 *aggregator_total; + *update_total = false; + ++ if (aggregator == PSI_AVGS) { ++ triggers = &group->avg_triggers; ++ aggregator_total = group->avg_total; ++ } else { ++ triggers = &group->rtpoll_triggers; ++ aggregator_total = group->rtpoll_total; ++ } ++ + /* + * On subsequent updates, calculate growth deltas and let + * watchers know when their specified thresholds are exceeded. + */ +- list_for_each_entry(t, &group->rtpoll_triggers, node) { ++ list_for_each_entry(t, triggers, node) { + u64 growth; + bool new_stall; + +- new_stall = group->rtpoll_total[t->state] != total[t->state]; ++ new_stall = aggregator_total[t->state] != total[t->state]; + + /* Check for stall activity or a previous threshold breach */ + if (!new_stall && !t->pending_event) +@@ -546,6 +562,7 @@ static void psi_avgs_work(struct work_struct *work) + struct delayed_work *dwork; + struct psi_group *group; + u32 changed_states; ++ bool update_total; + u64 now; + + dwork = to_delayed_work(work); +@@ -563,8 +580,10 @@ static void psi_avgs_work(struct work_struct *work) + * Once restarted, we'll catch up the running averages in one + * go - see calc_avgs() and missed_periods. + */ +- if (now >= group->avg_next_update) ++ if (now >= group->avg_next_update) { ++ update_triggers(group, now, &update_total, PSI_AVGS); + group->avg_next_update = update_averages(group, now); ++ } + + if (changed_states & PSI_STATE_RESCHEDULE) { + schedule_delayed_work(dwork, nsecs_to_jiffies( +@@ -574,7 +593,7 @@ static void psi_avgs_work(struct work_struct *work) + mutex_unlock(&group->avgs_lock); + } + +-static void init_triggers(struct psi_group *group, u64 now) ++static void init_rtpoll_triggers(struct psi_group *group, u64 now) + { + struct psi_trigger *t; + +@@ -667,7 +686,7 @@ static void psi_rtpoll_work(struct psi_group *group) + if (changed_states & group->rtpoll_states) { + /* Initialize trigger windows when entering polling mode */ + if (now > group->rtpoll_until) +- init_triggers(group, now); ++ init_rtpoll_triggers(group, now); + + /* + * Keep the monitor active for at least the duration of the +@@ -684,7 +703,7 @@ static void psi_rtpoll_work(struct psi_group *group) + } + + if (now >= group->rtpoll_next_update) { +- group->rtpoll_next_update = update_triggers(group, now, &update_total); ++ group->rtpoll_next_update = update_triggers(group, now, &update_total, PSI_POLL); + if (update_total) + memcpy(group->rtpoll_total, group->total[PSI_POLL], + sizeof(group->rtpoll_total)); +@@ -1254,16 +1273,23 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) + } + + struct psi_trigger *psi_trigger_create(struct psi_group *group, +- char *buf, enum psi_res res) ++ char *buf, enum psi_res res, struct file *file) + { + struct psi_trigger *t; + enum psi_states state; + u32 threshold_us; ++ bool privileged; + u32 window_us; + + if (static_branch_likely(&psi_disabled)) + return ERR_PTR(-EOPNOTSUPP); + ++ /* ++ * Checking the privilege here on file->f_cred implies that a privileged user ++ * could open the file and delegate the write to an unprivileged one. ++ */ ++ privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE); ++ + if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2) + state = PSI_IO_SOME + res * 2; + else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2) +@@ -1283,6 +1309,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, + window_us > WINDOW_MAX_US) + return ERR_PTR(-EINVAL); + ++ /* ++ * Unprivileged users can only use 2s windows so that averages aggregation ++ * work is used, and no RT threads need to be spawned. ++ */ ++ if (!privileged && window_us % 2000000) ++ return ERR_PTR(-EINVAL); ++ + /* Check threshold */ + if (threshold_us == 0 || threshold_us > window_us) + return ERR_PTR(-EINVAL); +@@ -1302,31 +1335,40 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, + t->last_event_time = 0; + init_waitqueue_head(&t->event_wait); + t->pending_event = false; ++ t->aggregator = privileged ? PSI_POLL : PSI_AVGS; + +- mutex_lock(&group->rtpoll_trigger_lock); ++ if (privileged) { ++ mutex_lock(&group->rtpoll_trigger_lock); + +- if (!rcu_access_pointer(group->rtpoll_task)) { +- struct task_struct *task; ++ if (!rcu_access_pointer(group->rtpoll_task)) { ++ struct task_struct *task; + +- task = kthread_create(psi_rtpoll_worker, group, "psimon"); +- if (IS_ERR(task)) { +- kfree(t); +- mutex_unlock(&group->rtpoll_trigger_lock); +- return ERR_CAST(task); ++ task = kthread_create(psi_rtpoll_worker, group, "psimon"); ++ if (IS_ERR(task)) { ++ kfree(t); ++ mutex_unlock(&group->rtpoll_trigger_lock); ++ return ERR_CAST(task); ++ } ++ atomic_set(&group->rtpoll_wakeup, 0); ++ wake_up_process(task); ++ rcu_assign_pointer(group->rtpoll_task, task); + } +- atomic_set(&group->rtpoll_wakeup, 0); +- wake_up_process(task); +- rcu_assign_pointer(group->rtpoll_task, task); +- } + +- list_add(&t->node, &group->rtpoll_triggers); +- group->rtpoll_min_period = min(group->rtpoll_min_period, +- div_u64(t->win.size, UPDATES_PER_WINDOW)); +- group->rtpoll_nr_triggers[t->state]++; +- group->rtpoll_states |= (1 << t->state); ++ list_add(&t->node, &group->rtpoll_triggers); ++ group->rtpoll_min_period = min(group->rtpoll_min_period, ++ div_u64(t->win.size, UPDATES_PER_WINDOW)); ++ group->rtpoll_nr_triggers[t->state]++; ++ group->rtpoll_states |= (1 << t->state); + +- mutex_unlock(&group->rtpoll_trigger_lock); ++ mutex_unlock(&group->rtpoll_trigger_lock); ++ } else { ++ mutex_lock(&group->avgs_lock); ++ ++ list_add(&t->node, &group->avg_triggers); ++ group->avg_nr_triggers[t->state]++; + ++ mutex_unlock(&group->avgs_lock); ++ } + return t; + } + +@@ -1350,34 +1392,41 @@ void psi_trigger_destroy(struct psi_trigger *t) + */ + wake_up_pollfree(&t->event_wait); + +- mutex_lock(&group->rtpoll_trigger_lock); +- +- if (!list_empty(&t->node)) { +- struct psi_trigger *tmp; +- u64 period = ULLONG_MAX; +- +- list_del(&t->node); +- group->rtpoll_nr_triggers[t->state]--; +- if (!group->rtpoll_nr_triggers[t->state]) +- group->rtpoll_states &= ~(1 << t->state); +- /* reset min update period for the remaining triggers */ +- list_for_each_entry(tmp, &group->rtpoll_triggers, node) +- period = min(period, div_u64(tmp->win.size, +- UPDATES_PER_WINDOW)); +- group->rtpoll_min_period = period; +- /* Destroy rtpoll_task when the last trigger is destroyed */ +- if (group->rtpoll_states == 0) { +- group->rtpoll_until = 0; +- task_to_destroy = rcu_dereference_protected( +- group->rtpoll_task, +- lockdep_is_held(&group->rtpoll_trigger_lock)); +- rcu_assign_pointer(group->rtpoll_task, NULL); +- del_timer(&group->rtpoll_timer); ++ if (t->aggregator == PSI_AVGS) { ++ mutex_lock(&group->avgs_lock); ++ if (!list_empty(&t->node)) { ++ list_del(&t->node); ++ group->avg_nr_triggers[t->state]--; + } ++ mutex_unlock(&group->avgs_lock); ++ } else { ++ mutex_lock(&group->rtpoll_trigger_lock); ++ if (!list_empty(&t->node)) { ++ struct psi_trigger *tmp; ++ u64 period = ULLONG_MAX; ++ ++ list_del(&t->node); ++ group->rtpoll_nr_triggers[t->state]--; ++ if (!group->rtpoll_nr_triggers[t->state]) ++ group->rtpoll_states &= ~(1 << t->state); ++ /* reset min update period for the remaining triggers */ ++ list_for_each_entry(tmp, &group->rtpoll_triggers, node) ++ period = min(period, div_u64(tmp->win.size, ++ UPDATES_PER_WINDOW)); ++ group->rtpoll_min_period = period; ++ /* Destroy rtpoll_task when the last trigger is destroyed */ ++ if (group->rtpoll_states == 0) { ++ group->rtpoll_until = 0; ++ task_to_destroy = rcu_dereference_protected( ++ group->rtpoll_task, ++ lockdep_is_held(&group->rtpoll_trigger_lock)); ++ rcu_assign_pointer(group->rtpoll_task, NULL); ++ del_timer(&group->rtpoll_timer); ++ } ++ } ++ mutex_unlock(&group->rtpoll_trigger_lock); + } + +- mutex_unlock(&group->rtpoll_trigger_lock); +- + /* + * Wait for psi_schedule_rtpoll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the +@@ -1437,27 +1486,19 @@ static int psi_cpu_show(struct seq_file *m, void *v) + return psi_show(m, &psi_system, PSI_CPU); + } + +-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) +-{ +- if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) +- return -EPERM; +- +- return single_open(file, psi_show, NULL); +-} +- + static int psi_io_open(struct inode *inode, struct file *file) + { +- return psi_open(file, psi_io_show); ++ return single_open(file, psi_io_show, NULL); + } + + static int psi_memory_open(struct inode *inode, struct file *file) + { +- return psi_open(file, psi_memory_show); ++ return single_open(file, psi_memory_show, NULL); + } + + static int psi_cpu_open(struct inode *inode, struct file *file) + { +- return psi_open(file, psi_cpu_show); ++ return single_open(file, psi_cpu_show, NULL); + } + + static ssize_t psi_write(struct file *file, const char __user *user_buf, +@@ -1491,7 +1532,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, + return -EBUSY; + } + +- new = psi_trigger_create(&psi_system, buf, res); ++ new = psi_trigger_create(&psi_system, buf, res, file); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); +@@ -1571,7 +1612,7 @@ static int psi_irq_show(struct seq_file *m, void *v) + + static int psi_irq_open(struct inode *inode, struct file *file) + { +- return psi_open(file, psi_irq_show); ++ return single_open(file, psi_irq_show, NULL); + } + + static ssize_t psi_irq_write(struct file *file, const char __user *user_buf, +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch b/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch new file mode 100644 index 00000000000..8244dd63ad8 --- /dev/null +++ b/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch @@ -0,0 +1,91 @@ +From 3d78ff2fdc7f963507676dadc4a58e7433f61819 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Mar 2023 12:54:17 +0200 +Subject: sched/psi: Extract update_triggers side effect + +From: Domenico Cerasuolo + +[ Upstream commit 4468fcae49f08e88fbbffe05b29496192df89991 ] + +This change moves update_total flag out of update_triggers function, +currently called only in psi_poll_work. +In the next patch, update_triggers will be called also in psi_avgs_work, +but the total update information is specific to psi_poll_work. +Returning update_total value to the caller let us avoid differentiating +the implementation of update_triggers for different aggregators. + +Suggested-by: Johannes Weiner +Signed-off-by: Domenico Cerasuolo +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Johannes Weiner +Link: https://lore.kernel.org/r/20230330105418.77061-4-cerasuolodomenico@gmail.com +Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling") +Signed-off-by: Sasha Levin +--- + kernel/sched/psi.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index a3d0b5cf797ab..f3df6a8ff493c 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -430,11 +430,11 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) + return growth; + } + +-static u64 update_triggers(struct psi_group *group, u64 now) ++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total) + { + struct psi_trigger *t; +- bool update_total = false; + u64 *total = group->total[PSI_POLL]; ++ *update_total = false; + + /* + * On subsequent updates, calculate growth deltas and let +@@ -462,7 +462,7 @@ static u64 update_triggers(struct psi_group *group, u64 now) + * been through all of them. Also remember to extend the + * polling time if we see new stall activity. + */ +- update_total = true; ++ *update_total = true; + + /* Calculate growth since last update */ + growth = window_update(&t->win, now, total[t->state]); +@@ -485,10 +485,6 @@ static u64 update_triggers(struct psi_group *group, u64 now) + t->pending_event = false; + } + +- if (update_total) +- memcpy(group->rtpoll_total, total, +- sizeof(group->rtpoll_total)); +- + return now + group->rtpoll_min_period; + } + +@@ -622,6 +618,7 @@ static void psi_rtpoll_work(struct psi_group *group) + { + bool force_reschedule = false; + u32 changed_states; ++ bool update_total; + u64 now; + + mutex_lock(&group->rtpoll_trigger_lock); +@@ -686,8 +683,12 @@ static void psi_rtpoll_work(struct psi_group *group) + goto out; + } + +- if (now >= group->rtpoll_next_update) +- group->rtpoll_next_update = update_triggers(group, now); ++ if (now >= group->rtpoll_next_update) { ++ group->rtpoll_next_update = update_triggers(group, now, &update_total); ++ if (update_total) ++ memcpy(group->rtpoll_total, group->total[PSI_POLL], ++ sizeof(group->rtpoll_total)); ++ } + + psi_schedule_rtpoll_work(group, + nsecs_to_jiffies(group->rtpoll_next_update - now) + 1, +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch b/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch new file mode 100644 index 00000000000..811894df2de --- /dev/null +++ b/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch @@ -0,0 +1,141 @@ +From cd6a5ae395de7987446d45c2944bc8de4a8917f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Oct 2022 19:05:51 +0800 +Subject: sched/psi: Fix avgs_work re-arm in psi_avgs_work() + +From: Chengming Zhou + +[ Upstream commit 2fcd7bbae90a6d844da8660a9d27079281dfbba2 ] + +Pavan reported a problem that PSI avgs_work idle shutoff is not +working at all. Because PSI_NONIDLE condition would be observed in +psi_avgs_work()->collect_percpu_times()->get_recent_times() even if +only the kworker running avgs_work on the CPU. + +Although commit 1b69ac6b40eb ("psi: fix aggregation idle shut-off") +avoided the ping-pong wake problem when the worker sleep, psi_avgs_work() +still will always re-arm the avgs_work, so shutoff is not working. + +This patch changes to use PSI_STATE_RESCHEDULE to flag whether to +re-arm avgs_work in get_recent_times(). For the current CPU, we re-arm +avgs_work only when (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0), +for other CPUs we can just check PSI_NONIDLE delta. The new flag +is only used in psi_avgs_work(), so we check in get_recent_times() +that current_work() is avgs_work. + +One potential problem is that the brief period of non-idle time +incurred between the aggregation run and the kworker's dequeue will +be stranded in the per-cpu buckets until avgs_work run next time. +The buckets can hold 4s worth of time, and future activity will wake +the avgs_work with a 2s delay, giving us 2s worth of data we can leave +behind when shut off the avgs_work. If the kworker run other works after +avgs_work shut off and doesn't have any scheduler activities for 2s, +this maybe a problem. + +Reported-by: Pavan Kondeti +Signed-off-by: Chengming Zhou +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Johannes Weiner +Acked-by: Suren Baghdasaryan +Tested-by: Chengming Zhou +Link: https://lore.kernel.org/r/20221014110551.22695-1-zhouchengming@bytedance.com +Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling") +Signed-off-by: Sasha Levin +--- + include/linux/psi_types.h | 3 +++ + kernel/sched/psi.c | 30 +++++++++++++++++++++++++++--- + 2 files changed, 30 insertions(+), 3 deletions(-) + +diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h +index 14a1ebb74e11f..1e0a0d7ace3af 100644 +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -72,6 +72,9 @@ enum psi_states { + /* Use one bit in the state mask to track TSK_ONCPU */ + #define PSI_ONCPU (1 << NR_PSI_STATES) + ++/* Flag whether to re-arm avgs_work, see details in get_recent_times() */ ++#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1)) ++ + enum psi_aggregators { + PSI_AVGS = 0, + PSI_POLL, +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index e83c321461cf4..02e011cabe917 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -243,6 +243,8 @@ static void get_recent_times(struct psi_group *group, int cpu, + u32 *pchanged_states) + { + struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); ++ int current_cpu = raw_smp_processor_id(); ++ unsigned int tasks[NR_PSI_TASK_COUNTS]; + u64 now, state_start; + enum psi_states s; + unsigned int seq; +@@ -257,6 +259,8 @@ static void get_recent_times(struct psi_group *group, int cpu, + memcpy(times, groupc->times, sizeof(groupc->times)); + state_mask = groupc->state_mask; + state_start = groupc->state_start; ++ if (cpu == current_cpu) ++ memcpy(tasks, groupc->tasks, sizeof(groupc->tasks)); + } while (read_seqcount_retry(&groupc->seq, seq)); + + /* Calculate state time deltas against the previous snapshot */ +@@ -281,6 +285,28 @@ static void get_recent_times(struct psi_group *group, int cpu, + if (delta) + *pchanged_states |= (1 << s); + } ++ ++ /* ++ * When collect_percpu_times() from the avgs_work, we don't want to ++ * re-arm avgs_work when all CPUs are IDLE. But the current CPU running ++ * this avgs_work is never IDLE, cause avgs_work can't be shut off. ++ * So for the current CPU, we need to re-arm avgs_work only when ++ * (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0), for other CPUs ++ * we can just check PSI_NONIDLE delta. ++ */ ++ if (current_work() == &group->avgs_work.work) { ++ bool reschedule; ++ ++ if (cpu == current_cpu) ++ reschedule = tasks[NR_RUNNING] + ++ tasks[NR_IOWAIT] + ++ tasks[NR_MEMSTALL] > 1; ++ else ++ reschedule = *pchanged_states & (1 << PSI_NONIDLE); ++ ++ if (reschedule) ++ *pchanged_states |= PSI_STATE_RESCHEDULE; ++ } + } + + static void calc_avgs(unsigned long avg[3], int missed_periods, +@@ -416,7 +442,6 @@ static void psi_avgs_work(struct work_struct *work) + struct delayed_work *dwork; + struct psi_group *group; + u32 changed_states; +- bool nonidle; + u64 now; + + dwork = to_delayed_work(work); +@@ -427,7 +452,6 @@ static void psi_avgs_work(struct work_struct *work) + now = sched_clock(); + + collect_percpu_times(group, PSI_AVGS, &changed_states); +- nonidle = changed_states & (1 << PSI_NONIDLE); + /* + * If there is task activity, periodically fold the per-cpu + * times and feed samples into the running averages. If things +@@ -438,7 +462,7 @@ static void psi_avgs_work(struct work_struct *work) + if (now >= group->avg_next_update) + group->avg_next_update = update_averages(group, now); + +- if (nonidle) { ++ if (changed_states & PSI_STATE_RESCHEDULE) { + schedule_delayed_work(dwork, nsecs_to_jiffies( + group->avg_next_update - now) + 1); + } +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch b/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch new file mode 100644 index 00000000000..2763aad0412 --- /dev/null +++ b/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch @@ -0,0 +1,247 @@ +From c64ea43f91987426ad1c79576bec5a3f7421d28d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Mar 2023 12:54:15 +0200 +Subject: sched/psi: Rearrange polling code in preparation + +From: Domenico Cerasuolo + +[ Upstream commit 7fab21fa0d000a0ea32d73ce8eec68557c6c268b ] + +Move a few functions up in the file to avoid forward declaration needed +in the patch implementing unprivileged PSI triggers. + +Suggested-by: Johannes Weiner +Signed-off-by: Domenico Cerasuolo +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Johannes Weiner +Link: https://lore.kernel.org/r/20230330105418.77061-2-cerasuolodomenico@gmail.com +Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling") +Signed-off-by: Sasha Levin +--- + kernel/sched/psi.c | 196 ++++++++++++++++++++++----------------------- + 1 file changed, 98 insertions(+), 98 deletions(-) + +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index 02e011cabe917..fe9269f1d2a46 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -384,92 +384,6 @@ static void collect_percpu_times(struct psi_group *group, + *pchanged_states = changed_states; + } + +-static u64 update_averages(struct psi_group *group, u64 now) +-{ +- unsigned long missed_periods = 0; +- u64 expires, period; +- u64 avg_next_update; +- int s; +- +- /* avgX= */ +- expires = group->avg_next_update; +- if (now - expires >= psi_period) +- missed_periods = div_u64(now - expires, psi_period); +- +- /* +- * The periodic clock tick can get delayed for various +- * reasons, especially on loaded systems. To avoid clock +- * drift, we schedule the clock in fixed psi_period intervals. +- * But the deltas we sample out of the per-cpu buckets above +- * are based on the actual time elapsing between clock ticks. +- */ +- avg_next_update = expires + ((1 + missed_periods) * psi_period); +- period = now - (group->avg_last_update + (missed_periods * psi_period)); +- group->avg_last_update = now; +- +- for (s = 0; s < NR_PSI_STATES - 1; s++) { +- u32 sample; +- +- sample = group->total[PSI_AVGS][s] - group->avg_total[s]; +- /* +- * Due to the lockless sampling of the time buckets, +- * recorded time deltas can slip into the next period, +- * which under full pressure can result in samples in +- * excess of the period length. +- * +- * We don't want to report non-sensical pressures in +- * excess of 100%, nor do we want to drop such events +- * on the floor. Instead we punt any overage into the +- * future until pressure subsides. By doing this we +- * don't underreport the occurring pressure curve, we +- * just report it delayed by one period length. +- * +- * The error isn't cumulative. As soon as another +- * delta slips from a period P to P+1, by definition +- * it frees up its time T in P. +- */ +- if (sample > period) +- sample = period; +- group->avg_total[s] += sample; +- calc_avgs(group->avg[s], missed_periods, sample, period); +- } +- +- return avg_next_update; +-} +- +-static void psi_avgs_work(struct work_struct *work) +-{ +- struct delayed_work *dwork; +- struct psi_group *group; +- u32 changed_states; +- u64 now; +- +- dwork = to_delayed_work(work); +- group = container_of(dwork, struct psi_group, avgs_work); +- +- mutex_lock(&group->avgs_lock); +- +- now = sched_clock(); +- +- collect_percpu_times(group, PSI_AVGS, &changed_states); +- /* +- * If there is task activity, periodically fold the per-cpu +- * times and feed samples into the running averages. If things +- * are idle and there is no data to process, stop the clock. +- * Once restarted, we'll catch up the running averages in one +- * go - see calc_avgs() and missed_periods. +- */ +- if (now >= group->avg_next_update) +- group->avg_next_update = update_averages(group, now); +- +- if (changed_states & PSI_STATE_RESCHEDULE) { +- schedule_delayed_work(dwork, nsecs_to_jiffies( +- group->avg_next_update - now) + 1); +- } +- +- mutex_unlock(&group->avgs_lock); +-} +- + /* Trigger tracking window manipulations */ + static void window_reset(struct psi_window *win, u64 now, u64 value, + u64 prev_growth) +@@ -516,18 +430,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) + return growth; + } + +-static void init_triggers(struct psi_group *group, u64 now) +-{ +- struct psi_trigger *t; +- +- list_for_each_entry(t, &group->triggers, node) +- window_reset(&t->win, now, +- group->total[PSI_POLL][t->state], 0); +- memcpy(group->polling_total, group->total[PSI_POLL], +- sizeof(group->polling_total)); +- group->polling_next_update = now + group->poll_min_period; +-} +- + static u64 update_triggers(struct psi_group *group, u64 now) + { + struct psi_trigger *t; +@@ -590,6 +492,104 @@ static u64 update_triggers(struct psi_group *group, u64 now) + return now + group->poll_min_period; + } + ++static u64 update_averages(struct psi_group *group, u64 now) ++{ ++ unsigned long missed_periods = 0; ++ u64 expires, period; ++ u64 avg_next_update; ++ int s; ++ ++ /* avgX= */ ++ expires = group->avg_next_update; ++ if (now - expires >= psi_period) ++ missed_periods = div_u64(now - expires, psi_period); ++ ++ /* ++ * The periodic clock tick can get delayed for various ++ * reasons, especially on loaded systems. To avoid clock ++ * drift, we schedule the clock in fixed psi_period intervals. ++ * But the deltas we sample out of the per-cpu buckets above ++ * are based on the actual time elapsing between clock ticks. ++ */ ++ avg_next_update = expires + ((1 + missed_periods) * psi_period); ++ period = now - (group->avg_last_update + (missed_periods * psi_period)); ++ group->avg_last_update = now; ++ ++ for (s = 0; s < NR_PSI_STATES - 1; s++) { ++ u32 sample; ++ ++ sample = group->total[PSI_AVGS][s] - group->avg_total[s]; ++ /* ++ * Due to the lockless sampling of the time buckets, ++ * recorded time deltas can slip into the next period, ++ * which under full pressure can result in samples in ++ * excess of the period length. ++ * ++ * We don't want to report non-sensical pressures in ++ * excess of 100%, nor do we want to drop such events ++ * on the floor. Instead we punt any overage into the ++ * future until pressure subsides. By doing this we ++ * don't underreport the occurring pressure curve, we ++ * just report it delayed by one period length. ++ * ++ * The error isn't cumulative. As soon as another ++ * delta slips from a period P to P+1, by definition ++ * it frees up its time T in P. ++ */ ++ if (sample > period) ++ sample = period; ++ group->avg_total[s] += sample; ++ calc_avgs(group->avg[s], missed_periods, sample, period); ++ } ++ ++ return avg_next_update; ++} ++ ++static void psi_avgs_work(struct work_struct *work) ++{ ++ struct delayed_work *dwork; ++ struct psi_group *group; ++ u32 changed_states; ++ u64 now; ++ ++ dwork = to_delayed_work(work); ++ group = container_of(dwork, struct psi_group, avgs_work); ++ ++ mutex_lock(&group->avgs_lock); ++ ++ now = sched_clock(); ++ ++ collect_percpu_times(group, PSI_AVGS, &changed_states); ++ /* ++ * If there is task activity, periodically fold the per-cpu ++ * times and feed samples into the running averages. If things ++ * are idle and there is no data to process, stop the clock. ++ * Once restarted, we'll catch up the running averages in one ++ * go - see calc_avgs() and missed_periods. ++ */ ++ if (now >= group->avg_next_update) ++ group->avg_next_update = update_averages(group, now); ++ ++ if (changed_states & PSI_STATE_RESCHEDULE) { ++ schedule_delayed_work(dwork, nsecs_to_jiffies( ++ group->avg_next_update - now) + 1); ++ } ++ ++ mutex_unlock(&group->avgs_lock); ++} ++ ++static void init_triggers(struct psi_group *group, u64 now) ++{ ++ struct psi_trigger *t; ++ ++ list_for_each_entry(t, &group->triggers, node) ++ window_reset(&t->win, now, ++ group->total[PSI_POLL][t->state], 0); ++ memcpy(group->polling_total, group->total[PSI_POLL], ++ sizeof(group->polling_total)); ++ group->polling_next_update = now + group->poll_min_period; ++} ++ + /* Schedule polling if it's not already scheduled or forced. */ + static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay, + bool force) +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch b/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch new file mode 100644 index 00000000000..63cf15f6166 --- /dev/null +++ b/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch @@ -0,0 +1,432 @@ +From 0970d615d9b33fac51e3ce6bebe313abcf75dfe9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Mar 2023 12:54:16 +0200 +Subject: sched/psi: Rename existing poll members in preparation + +From: Domenico Cerasuolo + +[ Upstream commit 65457b74aa9437418e552e8d52d7112d4f9901a6 ] + +Renaming in PSI implementation to make a clear distinction between +privileged and unprivileged triggers code to be implemented in the +next patch. + +Suggested-by: Johannes Weiner +Signed-off-by: Domenico Cerasuolo +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Johannes Weiner +Link: https://lore.kernel.org/r/20230330105418.77061-3-cerasuolodomenico@gmail.com +Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling") +Signed-off-by: Sasha Levin +--- + include/linux/psi_types.h | 36 ++++----- + kernel/sched/psi.c | 163 +++++++++++++++++++------------------- + 2 files changed, 100 insertions(+), 99 deletions(-) + +diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h +index 1e0a0d7ace3af..1819afa8b1987 100644 +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -175,26 +175,26 @@ struct psi_group { + u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; + unsigned long avg[NR_PSI_STATES - 1][3]; + +- /* Monitor work control */ +- struct task_struct __rcu *poll_task; +- struct timer_list poll_timer; +- wait_queue_head_t poll_wait; +- atomic_t poll_wakeup; +- atomic_t poll_scheduled; ++ /* Monitor RT polling work control */ ++ struct task_struct __rcu *rtpoll_task; ++ struct timer_list rtpoll_timer; ++ wait_queue_head_t rtpoll_wait; ++ atomic_t rtpoll_wakeup; ++ atomic_t rtpoll_scheduled; + + /* Protects data used by the monitor */ +- struct mutex trigger_lock; +- +- /* Configured polling triggers */ +- struct list_head triggers; +- u32 nr_triggers[NR_PSI_STATES - 1]; +- u32 poll_states; +- u64 poll_min_period; +- +- /* Total stall times at the start of monitor activation */ +- u64 polling_total[NR_PSI_STATES - 1]; +- u64 polling_next_update; +- u64 polling_until; ++ struct mutex rtpoll_trigger_lock; ++ ++ /* Configured RT polling triggers */ ++ struct list_head rtpoll_triggers; ++ u32 rtpoll_nr_triggers[NR_PSI_STATES - 1]; ++ u32 rtpoll_states; ++ u64 rtpoll_min_period; ++ ++ /* Total stall times at the start of RT polling monitor activation */ ++ u64 rtpoll_total[NR_PSI_STATES - 1]; ++ u64 rtpoll_next_update; ++ u64 rtpoll_until; + }; + + #else /* CONFIG_PSI */ +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index fe9269f1d2a46..a3d0b5cf797ab 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -189,14 +189,14 @@ static void group_init(struct psi_group *group) + INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); + mutex_init(&group->avgs_lock); + /* Init trigger-related members */ +- atomic_set(&group->poll_scheduled, 0); +- mutex_init(&group->trigger_lock); +- INIT_LIST_HEAD(&group->triggers); +- group->poll_min_period = U32_MAX; +- group->polling_next_update = ULLONG_MAX; +- init_waitqueue_head(&group->poll_wait); +- timer_setup(&group->poll_timer, poll_timer_fn, 0); +- rcu_assign_pointer(group->poll_task, NULL); ++ atomic_set(&group->rtpoll_scheduled, 0); ++ mutex_init(&group->rtpoll_trigger_lock); ++ INIT_LIST_HEAD(&group->rtpoll_triggers); ++ group->rtpoll_min_period = U32_MAX; ++ group->rtpoll_next_update = ULLONG_MAX; ++ init_waitqueue_head(&group->rtpoll_wait); ++ timer_setup(&group->rtpoll_timer, poll_timer_fn, 0); ++ rcu_assign_pointer(group->rtpoll_task, NULL); + } + + void __init psi_init(void) +@@ -440,11 +440,11 @@ static u64 update_triggers(struct psi_group *group, u64 now) + * On subsequent updates, calculate growth deltas and let + * watchers know when their specified thresholds are exceeded. + */ +- list_for_each_entry(t, &group->triggers, node) { ++ list_for_each_entry(t, &group->rtpoll_triggers, node) { + u64 growth; + bool new_stall; + +- new_stall = group->polling_total[t->state] != total[t->state]; ++ new_stall = group->rtpoll_total[t->state] != total[t->state]; + + /* Check for stall activity or a previous threshold breach */ + if (!new_stall && !t->pending_event) +@@ -486,10 +486,10 @@ static u64 update_triggers(struct psi_group *group, u64 now) + } + + if (update_total) +- memcpy(group->polling_total, total, +- sizeof(group->polling_total)); ++ memcpy(group->rtpoll_total, total, ++ sizeof(group->rtpoll_total)); + +- return now + group->poll_min_period; ++ return now + group->rtpoll_min_period; + } + + static u64 update_averages(struct psi_group *group, u64 now) +@@ -582,53 +582,53 @@ static void init_triggers(struct psi_group *group, u64 now) + { + struct psi_trigger *t; + +- list_for_each_entry(t, &group->triggers, node) ++ list_for_each_entry(t, &group->rtpoll_triggers, node) + window_reset(&t->win, now, + group->total[PSI_POLL][t->state], 0); +- memcpy(group->polling_total, group->total[PSI_POLL], +- sizeof(group->polling_total)); +- group->polling_next_update = now + group->poll_min_period; ++ memcpy(group->rtpoll_total, group->total[PSI_POLL], ++ sizeof(group->rtpoll_total)); ++ group->rtpoll_next_update = now + group->rtpoll_min_period; + } + + /* Schedule polling if it's not already scheduled or forced. */ +-static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay, ++static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay, + bool force) + { + struct task_struct *task; + + /* + * atomic_xchg should be called even when !force to provide a +- * full memory barrier (see the comment inside psi_poll_work). ++ * full memory barrier (see the comment inside psi_rtpoll_work). + */ +- if (atomic_xchg(&group->poll_scheduled, 1) && !force) ++ if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force) + return; + + rcu_read_lock(); + +- task = rcu_dereference(group->poll_task); ++ task = rcu_dereference(group->rtpoll_task); + /* + * kworker might be NULL in case psi_trigger_destroy races with + * psi_task_change (hotpath) which can't use locks + */ + if (likely(task)) +- mod_timer(&group->poll_timer, jiffies + delay); ++ mod_timer(&group->rtpoll_timer, jiffies + delay); + else +- atomic_set(&group->poll_scheduled, 0); ++ atomic_set(&group->rtpoll_scheduled, 0); + + rcu_read_unlock(); + } + +-static void psi_poll_work(struct psi_group *group) ++static void psi_rtpoll_work(struct psi_group *group) + { + bool force_reschedule = false; + u32 changed_states; + u64 now; + +- mutex_lock(&group->trigger_lock); ++ mutex_lock(&group->rtpoll_trigger_lock); + + now = sched_clock(); + +- if (now > group->polling_until) { ++ if (now > group->rtpoll_until) { + /* + * We are either about to start or might stop polling if no + * state change was recorded. Resetting poll_scheduled leaves +@@ -638,7 +638,7 @@ static void psi_poll_work(struct psi_group *group) + * should be negligible and polling_next_update still keeps + * updates correctly on schedule. + */ +- atomic_set(&group->poll_scheduled, 0); ++ atomic_set(&group->rtpoll_scheduled, 0); + /* + * A task change can race with the poll worker that is supposed to + * report on it. To avoid missing events, ensure ordering between +@@ -667,9 +667,9 @@ static void psi_poll_work(struct psi_group *group) + + collect_percpu_times(group, PSI_POLL, &changed_states); + +- if (changed_states & group->poll_states) { ++ if (changed_states & group->rtpoll_states) { + /* Initialize trigger windows when entering polling mode */ +- if (now > group->polling_until) ++ if (now > group->rtpoll_until) + init_triggers(group, now); + + /* +@@ -677,50 +677,50 @@ static void psi_poll_work(struct psi_group *group) + * minimum tracking window as long as monitor states are + * changing. + */ +- group->polling_until = now + +- group->poll_min_period * UPDATES_PER_WINDOW; ++ group->rtpoll_until = now + ++ group->rtpoll_min_period * UPDATES_PER_WINDOW; + } + +- if (now > group->polling_until) { +- group->polling_next_update = ULLONG_MAX; ++ if (now > group->rtpoll_until) { ++ group->rtpoll_next_update = ULLONG_MAX; + goto out; + } + +- if (now >= group->polling_next_update) +- group->polling_next_update = update_triggers(group, now); ++ if (now >= group->rtpoll_next_update) ++ group->rtpoll_next_update = update_triggers(group, now); + +- psi_schedule_poll_work(group, +- nsecs_to_jiffies(group->polling_next_update - now) + 1, ++ psi_schedule_rtpoll_work(group, ++ nsecs_to_jiffies(group->rtpoll_next_update - now) + 1, + force_reschedule); + + out: +- mutex_unlock(&group->trigger_lock); ++ mutex_unlock(&group->rtpoll_trigger_lock); + } + +-static int psi_poll_worker(void *data) ++static int psi_rtpoll_worker(void *data) + { + struct psi_group *group = (struct psi_group *)data; + + sched_set_fifo_low(current); + + while (true) { +- wait_event_interruptible(group->poll_wait, +- atomic_cmpxchg(&group->poll_wakeup, 1, 0) || ++ wait_event_interruptible(group->rtpoll_wait, ++ atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) || + kthread_should_stop()); + if (kthread_should_stop()) + break; + +- psi_poll_work(group); ++ psi_rtpoll_work(group); + } + return 0; + } + + static void poll_timer_fn(struct timer_list *t) + { +- struct psi_group *group = from_timer(group, t, poll_timer); ++ struct psi_group *group = from_timer(group, t, rtpoll_timer); + +- atomic_set(&group->poll_wakeup, 1); +- wake_up_interruptible(&group->poll_wait); ++ atomic_set(&group->rtpoll_wakeup, 1); ++ wake_up_interruptible(&group->rtpoll_wait); + } + + static void record_times(struct psi_group_cpu *groupc, u64 now) +@@ -851,8 +851,8 @@ static void psi_group_change(struct psi_group *group, int cpu, + + write_seqcount_end(&groupc->seq); + +- if (state_mask & group->poll_states) +- psi_schedule_poll_work(group, 1, false); ++ if (state_mask & group->rtpoll_states) ++ psi_schedule_rtpoll_work(group, 1, false); + + if (wake_clock && !delayed_work_pending(&group->avgs_work)) + schedule_delayed_work(&group->avgs_work, PSI_FREQ); +@@ -1005,8 +1005,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta) + + write_seqcount_end(&groupc->seq); + +- if (group->poll_states & (1 << PSI_IRQ_FULL)) +- psi_schedule_poll_work(group, 1, false); ++ if (group->rtpoll_states & (1 << PSI_IRQ_FULL)) ++ psi_schedule_rtpoll_work(group, 1, false); + } while ((group = group->parent)); + } + #endif +@@ -1101,7 +1101,7 @@ void psi_cgroup_free(struct cgroup *cgroup) + cancel_delayed_work_sync(&cgroup->psi->avgs_work); + free_percpu(cgroup->psi->pcpu); + /* All triggers must be removed by now */ +- WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n"); ++ WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n"); + kfree(cgroup->psi); + } + +@@ -1302,29 +1302,29 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, + init_waitqueue_head(&t->event_wait); + t->pending_event = false; + +- mutex_lock(&group->trigger_lock); ++ mutex_lock(&group->rtpoll_trigger_lock); + +- if (!rcu_access_pointer(group->poll_task)) { ++ if (!rcu_access_pointer(group->rtpoll_task)) { + struct task_struct *task; + +- task = kthread_create(psi_poll_worker, group, "psimon"); ++ task = kthread_create(psi_rtpoll_worker, group, "psimon"); + if (IS_ERR(task)) { + kfree(t); +- mutex_unlock(&group->trigger_lock); ++ mutex_unlock(&group->rtpoll_trigger_lock); + return ERR_CAST(task); + } +- atomic_set(&group->poll_wakeup, 0); ++ atomic_set(&group->rtpoll_wakeup, 0); + wake_up_process(task); +- rcu_assign_pointer(group->poll_task, task); ++ rcu_assign_pointer(group->rtpoll_task, task); + } + +- list_add(&t->node, &group->triggers); +- group->poll_min_period = min(group->poll_min_period, ++ list_add(&t->node, &group->rtpoll_triggers); ++ group->rtpoll_min_period = min(group->rtpoll_min_period, + div_u64(t->win.size, UPDATES_PER_WINDOW)); +- group->nr_triggers[t->state]++; +- group->poll_states |= (1 << t->state); ++ group->rtpoll_nr_triggers[t->state]++; ++ group->rtpoll_states |= (1 << t->state); + +- mutex_unlock(&group->trigger_lock); ++ mutex_unlock(&group->rtpoll_trigger_lock); + + return t; + } +@@ -1349,51 +1349,52 @@ void psi_trigger_destroy(struct psi_trigger *t) + */ + wake_up_pollfree(&t->event_wait); + +- mutex_lock(&group->trigger_lock); ++ mutex_lock(&group->rtpoll_trigger_lock); + + if (!list_empty(&t->node)) { + struct psi_trigger *tmp; + u64 period = ULLONG_MAX; + + list_del(&t->node); +- group->nr_triggers[t->state]--; +- if (!group->nr_triggers[t->state]) +- group->poll_states &= ~(1 << t->state); ++ group->rtpoll_nr_triggers[t->state]--; ++ if (!group->rtpoll_nr_triggers[t->state]) ++ group->rtpoll_states &= ~(1 << t->state); + /* reset min update period for the remaining triggers */ +- list_for_each_entry(tmp, &group->triggers, node) ++ list_for_each_entry(tmp, &group->rtpoll_triggers, node) + period = min(period, div_u64(tmp->win.size, + UPDATES_PER_WINDOW)); +- group->poll_min_period = period; +- /* Destroy poll_task when the last trigger is destroyed */ +- if (group->poll_states == 0) { +- group->polling_until = 0; ++ group->rtpoll_min_period = period; ++ /* Destroy rtpoll_task when the last trigger is destroyed */ ++ if (group->rtpoll_states == 0) { ++ group->rtpoll_until = 0; + task_to_destroy = rcu_dereference_protected( +- group->poll_task, +- lockdep_is_held(&group->trigger_lock)); +- rcu_assign_pointer(group->poll_task, NULL); +- del_timer(&group->poll_timer); ++ group->rtpoll_task, ++ lockdep_is_held(&group->rtpoll_trigger_lock)); ++ rcu_assign_pointer(group->rtpoll_task, NULL); ++ del_timer(&group->rtpoll_timer); + } + } + +- mutex_unlock(&group->trigger_lock); ++ mutex_unlock(&group->rtpoll_trigger_lock); + + /* +- * Wait for psi_schedule_poll_work RCU to complete its read-side ++ * Wait for psi_schedule_rtpoll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the +- * poll_task. ++ * rtpoll_task. + */ + synchronize_rcu(); + /* +- * Stop kthread 'psimon' after releasing trigger_lock to prevent a +- * deadlock while waiting for psi_poll_work to acquire trigger_lock ++ * Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent ++ * a deadlock while waiting for psi_rtpoll_work to acquire ++ * rtpoll_trigger_lock + */ + if (task_to_destroy) { + /* + * After the RCU grace period has expired, the worker +- * can no longer be found through group->poll_task. ++ * can no longer be found through group->rtpoll_task. + */ + kthread_stop(task_to_destroy); +- atomic_set(&group->poll_scheduled, 0); ++ atomic_set(&group->rtpoll_scheduled, 0); + } + kfree(t); + } +-- +2.39.2 + diff --git a/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch b/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch new file mode 100644 index 00000000000..2f9c6baea91 --- /dev/null +++ b/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch @@ -0,0 +1,176 @@ +From cc4a5d27580aad5472ec624bab19f12d4556982c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Jun 2023 17:56:12 -0700 +Subject: sched/psi: use kernfs polling functions for PSI trigger polling + +From: Suren Baghdasaryan + +[ Upstream commit aff037078ecaecf34a7c2afab1341815f90fba5e ] + +Destroying psi trigger in cgroup_file_release causes UAF issues when +a cgroup is removed from under a polling process. This is happening +because cgroup removal causes a call to cgroup_file_release while the +actual file is still alive. Destroying the trigger at this point would +also destroy its waitqueue head and if there is still a polling process +on that file accessing the waitqueue, it will step on the freed pointer: + +do_select + vfs_poll + do_rmdir + cgroup_rmdir + kernfs_drain_open_files + cgroup_file_release + cgroup_pressure_release + psi_trigger_destroy + wake_up_pollfree(&t->event_wait) +// vfs_poll is unblocked + synchronize_rcu + kfree(t) + poll_freewait -> UAF access to the trigger's waitqueue head + +Patch [1] fixed this issue for epoll() case using wake_up_pollfree(), +however the same issue exists for synchronous poll() case. +The root cause of this issue is that the lifecycles of the psi trigger's +waitqueue and of the file associated with the trigger are different. Fix +this by using kernfs_generic_poll function when polling on cgroup-specific +psi triggers. It internally uses kernfs_open_node->poll waitqueue head +with its lifecycle tied to the file's lifecycle. This also renders the +fix in [1] obsolete, so revert it. + +[1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()") + +Fixes: 0e94682b73bf ("psi: introduce psi monitor") +Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/ +Reported-by: Lu Jialin +Signed-off-by: Suren Baghdasaryan +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@google.com +Signed-off-by: Sasha Levin +--- + include/linux/psi.h | 5 +++-- + include/linux/psi_types.h | 3 +++ + kernel/cgroup/cgroup.c | 2 +- + kernel/sched/psi.c | 29 +++++++++++++++++++++-------- + 4 files changed, 28 insertions(+), 11 deletions(-) + +diff --git a/include/linux/psi.h b/include/linux/psi.h +index ab26200c28033..e0745873e3f26 100644 +--- a/include/linux/psi.h ++++ b/include/linux/psi.h +@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags); + void psi_memstall_leave(unsigned long *flags); + + int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); +-struct psi_trigger *psi_trigger_create(struct psi_group *group, +- char *buf, enum psi_res res, struct file *file); ++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, ++ enum psi_res res, struct file *file, ++ struct kernfs_open_file *of); + void psi_trigger_destroy(struct psi_trigger *t); + + __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, +diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h +index 040c089581c6c..f1fd3a8044e0e 100644 +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -137,6 +137,9 @@ struct psi_trigger { + /* Wait queue for polling */ + wait_queue_head_t event_wait; + ++ /* Kernfs file for cgroup triggers */ ++ struct kernfs_open_file *of; ++ + /* Pending event flag */ + int event; + +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index c35efae566a4b..73f11e4db3a4d 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, + } + + psi = cgroup_psi(cgrp); +- new = psi_trigger_create(psi, buf, res, of->file); ++ new = psi_trigger_create(psi, buf, res, of->file, of); + if (IS_ERR(new)) { + cgroup_put(cgrp); + return PTR_ERR(new); +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c +index e072f6b31bf30..80d8c10e93638 100644 +--- a/kernel/sched/psi.c ++++ b/kernel/sched/psi.c +@@ -494,8 +494,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total, + continue; + + /* Generate an event */ +- if (cmpxchg(&t->event, 0, 1) == 0) +- wake_up_interruptible(&t->event_wait); ++ if (cmpxchg(&t->event, 0, 1) == 0) { ++ if (t->of) ++ kernfs_notify(t->of->kn); ++ else ++ wake_up_interruptible(&t->event_wait); ++ } + t->last_event_time = now; + /* Reset threshold breach flag once event got generated */ + t->pending_event = false; +@@ -1272,8 +1276,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) + return 0; + } + +-struct psi_trigger *psi_trigger_create(struct psi_group *group, +- char *buf, enum psi_res res, struct file *file) ++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, ++ enum psi_res res, struct file *file, ++ struct kernfs_open_file *of) + { + struct psi_trigger *t; + enum psi_states state; +@@ -1333,7 +1338,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, + + t->event = 0; + t->last_event_time = 0; +- init_waitqueue_head(&t->event_wait); ++ t->of = of; ++ if (!of) ++ init_waitqueue_head(&t->event_wait); + t->pending_event = false; + t->aggregator = privileged ? PSI_POLL : PSI_AVGS; + +@@ -1390,7 +1397,10 @@ void psi_trigger_destroy(struct psi_trigger *t) + * being accessed later. Can happen if cgroup is deleted from under a + * polling process. + */ +- wake_up_pollfree(&t->event_wait); ++ if (t->of) ++ kernfs_notify(t->of->kn); ++ else ++ wake_up_interruptible(&t->event_wait); + + if (t->aggregator == PSI_AVGS) { + mutex_lock(&group->avgs_lock); +@@ -1462,7 +1472,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr, + if (!t) + return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; + +- poll_wait(file, &t->event_wait, wait); ++ if (t->of) ++ kernfs_generic_poll(t->of, wait); ++ else ++ poll_wait(file, &t->event_wait, wait); + + if (cmpxchg(&t->event, 1, 0) == 1) + ret |= EPOLLPRI; +@@ -1532,7 +1545,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, + return -EBUSY; + } + +- new = psi_trigger_create(&psi_system, buf, res, file); ++ new = psi_trigger_create(&psi_system, buf, res, file, NULL); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); +-- +2.39.2 + diff --git a/queue-6.1/security-keys-modify-mismatched-function-name.patch b/queue-6.1/security-keys-modify-mismatched-function-name.patch new file mode 100644 index 00000000000..964df76e0b9 --- /dev/null +++ b/queue-6.1/security-keys-modify-mismatched-function-name.patch @@ -0,0 +1,40 @@ +From d5bcc1aba8ad5267a2fd8d1da3794a97630d9c16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Jun 2023 10:18:25 +0800 +Subject: security: keys: Modify mismatched function name + +From: Jiapeng Chong + +[ Upstream commit 2a4152742025c5f21482e8cebc581702a0fa5b01 ] + +No functional modification involved. + +security/keys/trusted-keys/trusted_tpm2.c:203: warning: expecting prototype for tpm_buf_append_auth(). Prototype was for tpm2_buf_append_auth() instead. + +Fixes: 2e19e10131a0 ("KEYS: trusted: Move TPM2 trusted keys code") +Reported-by: Abaci Robot +Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5524 +Signed-off-by: Jiapeng Chong +Reviewed-by: Paul Moore +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Sasha Levin +--- + security/keys/trusted-keys/trusted_tpm2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c +index 2b2c8eb258d5b..bc700f85f80be 100644 +--- a/security/keys/trusted-keys/trusted_tpm2.c ++++ b/security/keys/trusted-keys/trusted_tpm2.c +@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen, + } + + /** +- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer. ++ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer. + * + * @buf: an allocated tpm_buf instance + * @session_handle: session handle +-- +2.39.2 + diff --git a/queue-6.1/series b/queue-6.1/series index e7bc4dd2231..6df3335dfa7 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -49,3 +49,116 @@ asoc-codecs-wcd934x-fix-resource-leaks-on-component-remove.patch asoc-codecs-wcd938x-fix-codec-initialisation-race.patch asoc-codecs-wcd938x-fix-soundwire-initialisation-race.patch ext4-correct-inline-offset-when-handling-xattrs-in-inode-body.patch +drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch +alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch +quota-properly-disable-quotas-when-add_dquot_ref-fai.patch +quota-fix-warning-in-dqgrab.patch +hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch +ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch +udf-fix-uninitialized-array-access-for-some-pathname.patch +fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch +mips-dec-prom-address-warray-bounds-warning.patch +fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch +fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch +acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch +rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch +rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch +sched-fair-don-t-balance-task-to-its-current-running.patch +wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch +bpf-print-a-warning-only-if-writing-to-unprivileged_.patch +bpf-address-kcsan-report-on-bpf_lru_list.patch +bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch +wifi-ath11k-add-support-default-regdb-while-searchin.patch +wifi-mac80211_hwsim-fix-possible-null-dereference.patch +spi-dw-add-compatible-for-intel-mount-evans-soc.patch +wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch +net-ethernet-litex-add-support-for-64-bit-stats.patch +devlink-report-devlink_port_type_warn-source-device.patch +wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch +wifi-iwlwifi-add-support-for-new-pci-id.patch +wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch +wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch +igb-fix-igb_down-hung-on-surprise-removal.patch +net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch +asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch +asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch +asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch +asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch +sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch +sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch +sched-psi-rearrange-polling-code-in-preparation.patch +sched-psi-rename-existing-poll-members-in-preparatio.patch +sched-psi-extract-update_triggers-side-effect.patch +sched-psi-allow-unprivileged-polling-of-n-2s-period.patch +sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch +pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch +pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch +spi-bcm63xx-fix-max-prepend-length.patch +fbdev-imxfb-warn-about-invalid-left-right-margin.patch +fbdev-imxfb-removed-unneeded-release_mem_region.patch +perf-build-fix-library-not-found-error-when-using-cs.patch +btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch +spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch +kallsyms-improve-the-performance-of-kallsyms_lookup_.patch +kallsyms-correctly-sequence-symbols-when-config_lto_.patch +kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch +dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch +net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch +bridge-add-extack-warning-when-enabling-stp-in-netns.patch +net-ethernet-mtk_eth_soc-handle-probe-deferral.patch +cifs-fix-mid-leak-during-reconnection-after-timeout-.patch +asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch +net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch +net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch +net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch +net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch +net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch +net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch +net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch +net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch +iavf-fix-use-after-free-in-free_netdev.patch +iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch +iavf-use-internal-state-to-free-traffic-irqs.patch +iavf-move-netdev_update_features-into-watchdog-task.patch +iavf-send-vlan-offloading-caps-once-after-vfr.patch +iavf-make-functions-static-where-possible.patch +iavf-wait-for-reset-in-callbacks-which-trigger-it.patch +iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch +iavf-fix-reset-task-race-with-iavf_remove.patch +security-keys-modify-mismatched-function-name.patch +octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch +bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch +bpf-repeat-check_max_stack_depth-for-async-callbacks.patch +bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch +igc-avoid-transmit-queue-timeout-for-xdp.patch +igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch +net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch +tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch +tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch +net-ipv4-use-kfree_sensitive-instead-of-kfree.patch +net-ipv6-check-return-value-of-pskb_trim.patch +revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch +fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch +llc-don-t-drop-packet-from-non-root-netns.patch +alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch +netfilter-nf_tables-fix-spurious-set-element-inserti.patch +netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch +netfilter-nft_set_pipapo-fix-improper-element-remova.patch +netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch +netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch +bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch +bluetooth-hci_event-call-disconnect-callback-before-.patch +bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch +bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch +tcp-annotate-data-races-around-tp-tcp_tx_delay.patch +tcp-annotate-data-races-around-tp-tsoffset.patch +tcp-annotate-data-races-around-tp-keepalive_time.patch +tcp-annotate-data-races-around-tp-keepalive_intvl.patch +tcp-annotate-data-races-around-tp-keepalive_probes.patch +tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch +tcp-annotate-data-races-around-tp-linger2.patch +tcp-annotate-data-races-around-rskq_defer_accept.patch +tcp-annotate-data-races-around-tp-notsent_lowat.patch +tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch +tcp-annotate-data-races-around-fastopenq.max_qlen.patch +net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch diff --git a/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch b/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch new file mode 100644 index 00000000000..378e34a46b9 --- /dev/null +++ b/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch @@ -0,0 +1,47 @@ +From cf5e36388cb882c6653cd3159ae15b19b12d882e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Jun 2023 09:14:52 +0200 +Subject: spi: bcm63xx: fix max prepend length + +From: Jonas Gorski + +[ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ] + +The command word is defined as following: + + /* Command */ + #define SPI_CMD_COMMAND_SHIFT 0 + #define SPI_CMD_DEVICE_ID_SHIFT 4 + #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT 8 + #define SPI_CMD_ONE_BYTE_SHIFT 11 + #define SPI_CMD_ONE_WIRE_SHIFT 12 + +If the prepend byte count field starts at bit 8, and the next defined +bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and +thus the max value is 7, not 15. + +Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up") +Signed-off-by: Jonas Gorski +Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-bcm63xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c +index 80fa0ef8909ca..147199002df1e 100644 +--- a/drivers/spi/spi-bcm63xx.c ++++ b/drivers/spi/spi-bcm63xx.c +@@ -126,7 +126,7 @@ enum bcm63xx_regs_spi { + SPI_MSG_DATA_SIZE, + }; + +-#define BCM63XX_SPI_MAX_PREPEND 15 ++#define BCM63XX_SPI_MAX_PREPEND 7 + + #define BCM63XX_SPI_MAX_CS 8 + #define BCM63XX_SPI_BUS_NUM 0 +-- +2.39.2 + diff --git a/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch b/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch new file mode 100644 index 00000000000..26ebd33b46c --- /dev/null +++ b/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch @@ -0,0 +1,81 @@ +From a47a909fedf766372d2d6e58a2e2e2694d9e1dfe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 07:54:01 -0700 +Subject: spi: dw: Add compatible for Intel Mount Evans SoC + +From: Abe Kohandel + +[ Upstream commit 0760d5d0e9f0c0e2200a0323a61d1995bb745dee ] + +The Intel Mount Evans SoC's Integrated Management Complex uses the SPI +controller for access to a NOR SPI FLASH. However, the SoC doesn't +provide a mechanism to override the native chip select signal. + +This driver doesn't use DMA for memory operations when a chip select +override is not provided due to the native chip select timing behavior. +As a result no DMA configuration is done for the controller and this +configuration is not tested. + +The controller also has an errata where a full TX FIFO can result in +data corruption. The suggested workaround is to never completely fill +the FIFO. The TX FIFO has a size of 32 so the fifo_len is set to 31. + +Signed-off-by: Abe Kohandel +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20230606145402.474866-2-abe.kohandel@intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-dw-mmio.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c +index 26c40ea6dd129..7e8478ad74e55 100644 +--- a/drivers/spi/spi-dw-mmio.c ++++ b/drivers/spi/spi-dw-mmio.c +@@ -222,6 +222,31 @@ static int dw_spi_intel_init(struct platform_device *pdev, + return 0; + } + ++/* ++ * The Intel Mount Evans SoC's Integrated Management Complex uses the ++ * SPI controller for access to a NOR SPI FLASH. However, the SoC doesn't ++ * provide a mechanism to override the native chip select signal. ++ * ++ * This driver doesn't use DMA for memory operations when a chip select ++ * override is not provided due to the native chip select timing behavior. ++ * As a result no DMA configuration is done for the controller and this ++ * configuration is not tested. ++ */ ++static int dw_spi_mountevans_imc_init(struct platform_device *pdev, ++ struct dw_spi_mmio *dwsmmio) ++{ ++ /* ++ * The Intel Mount Evans SoC's Integrated Management Complex DW ++ * apb_ssi_v4.02a controller has an errata where a full TX FIFO can ++ * result in data corruption. The suggested workaround is to never ++ * completely fill the FIFO. The TX FIFO has a size of 32 so the ++ * fifo_len is set to 31. ++ */ ++ dwsmmio->dws.fifo_len = 31; ++ ++ return 0; ++} ++ + static int dw_spi_canaan_k210_init(struct platform_device *pdev, + struct dw_spi_mmio *dwsmmio) + { +@@ -350,6 +375,10 @@ static const struct of_device_id dw_spi_mmio_of_match[] = { + { .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_hssi_init}, + { .compatible = "intel,keembay-ssi", .data = dw_spi_intel_init}, + { .compatible = "intel,thunderbay-ssi", .data = dw_spi_intel_init}, ++ { ++ .compatible = "intel,mountevans-imc-ssi", ++ .data = dw_spi_mountevans_imc_init, ++ }, + { .compatible = "microchip,sparx5-spi", dw_spi_mscc_sparx5_init}, + { .compatible = "canaan,k210-spi", dw_spi_canaan_k210_init}, + { /* end of table */} +-- +2.39.2 + diff --git a/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch b/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch new file mode 100644 index 00000000000..8843429f8cc --- /dev/null +++ b/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch @@ -0,0 +1,40 @@ +From f832b5453eead49443949271d5828c464703455b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 17:20:20 +0900 +Subject: spi: s3c64xx: clear loopback bit after loopback test + +From: Jaewon Kim + +[ Upstream commit 9ec3c5517e22a12d2ff1b71e844f7913641460c6 ] + +When SPI loopback transfer is performed, S3C64XX_SPI_MODE_SELF_LOOPBACK +bit still remained. It works as loopback even if the next transfer is +not spi loopback mode. +If not SPI_LOOP, needs to clear S3C64XX_SPI_MODE_SELF_LOOPBACK bit. + +Signed-off-by: Jaewon Kim +Fixes: ffb7bcd3b27e ("spi: s3c64xx: support loopback mode") +Reviewed-by: Chanho Park +Link: https://lore.kernel.org/r/20230711082020.138165-1-jaewon02.kim@samsung.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-s3c64xx.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c +index 71d324ec9a70a..1480df7b43b3f 100644 +--- a/drivers/spi/spi-s3c64xx.c ++++ b/drivers/spi/spi-s3c64xx.c +@@ -668,6 +668,8 @@ static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd) + + if ((sdd->cur_mode & SPI_LOOP) && sdd->port_conf->has_loopback) + val |= S3C64XX_SPI_MODE_SELF_LOOPBACK; ++ else ++ val &= ~S3C64XX_SPI_MODE_SELF_LOOPBACK; + + writel(val, regs + S3C64XX_SPI_MODE_CFG); + +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch b/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch new file mode 100644 index 00000000000..8d091d79b80 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch @@ -0,0 +1,77 @@ +From 7035bedf31a88876c025d69b93d6ebb0256f36f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:57 +0000 +Subject: tcp: annotate data-races around fastopenq.max_qlen + +From: Eric Dumazet + +[ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ] + +This field can be read locklessly. + +Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/linux/tcp.h | 2 +- + net/ipv4/tcp.c | 2 +- + net/ipv4/tcp_fastopen.c | 6 ++++-- + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/include/linux/tcp.h b/include/linux/tcp.h +index 41b1da621a458..9cd289ad3f5b5 100644 +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -510,7 +510,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); + +- queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); ++ WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn)); + } + + static inline void tcp_move_syn(struct tcp_sock *tp, +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index b3a5ff311567b..fab25d4f3a6f1 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -4247,7 +4247,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_FASTOPEN: +- val = icsk->icsk_accept_queue.fastopenq.max_qlen; ++ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); + break; + + case TCP_FASTOPEN_CONNECT: +diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c +index 45cc7f1ca2961..85e4953f11821 100644 +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, + static bool tcp_fastopen_queue_check(struct sock *sk) + { + struct fastopen_queue *fastopenq; ++ int max_qlen; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying +@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk) + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; +- if (fastopenq->max_qlen == 0) ++ max_qlen = READ_ONCE(fastopenq->max_qlen); ++ if (max_qlen == 0) + return false; + +- if (fastopenq->qlen >= fastopenq->max_qlen) { ++ if (fastopenq->qlen >= max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch new file mode 100644 index 00000000000..abaaf2ef0ca --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch @@ -0,0 +1,69 @@ +From ae744dd736807b48f042d785128b2d771387f69c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:52 +0000 +Subject: tcp: annotate data-races around icsk->icsk_syn_retries + +From: Eric Dumazet + +[ Upstream commit 3a037f0f3c4bfe44518f2fbb478aa2f99a9cd8bb ] + +do_tcp_getsockopt() and reqsk_timer_handler() read +icsk->icsk_syn_retries while another cpu might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-7-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_connection_sock.c | 2 +- + net/ipv4/tcp.c | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 8e35ea66d930a..62a3b103f258a 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -1016,7 +1016,7 @@ static void reqsk_timer_handler(struct timer_list *t) + + icsk = inet_csk(sk_listener); + net = sock_net(sk_listener); +- max_syn_ack_retries = icsk->icsk_syn_retries ? : ++ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); + /* Normally all the openreqs are young and become mature + * (i.e. converted to established socket) for first timeout. +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 7d75928ea0f9c..ffa9717293358 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3397,7 +3397,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val) + return -EINVAL; + + lock_sock(sk); +- inet_csk(sk)->icsk_syn_retries = val; ++ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val); + release_sock(sk); + return 0; + } +@@ -3678,7 +3678,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + if (val < 1 || val > MAX_TCP_SYNCNT) + err = -EINVAL; + else +- icsk->icsk_syn_retries = val; ++ WRITE_ONCE(icsk->icsk_syn_retries, val); + break; + + case TCP_SAVE_SYN: +@@ -4095,7 +4095,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + val = keepalive_probes(tp); + break; + case TCP_SYNCNT: +- val = icsk->icsk_syn_retries ? : ++ val = READ_ONCE(icsk->icsk_syn_retries) ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); + break; + case TCP_LINGER2: +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch new file mode 100644 index 00000000000..1840f3aa1b1 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch @@ -0,0 +1,54 @@ +From 7efbdf0a8a4d26103224e8eb9779b4b5c48a11c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:56 +0000 +Subject: tcp: annotate data-races around icsk->icsk_user_timeout + +From: Eric Dumazet + +[ Upstream commit 26023e91e12c68669db416b97234328a03d8e499 ] + +This field can be read locklessly from do_tcp_getsockopt() + +Fixes: dca43c75e7e5 ("tcp: Add TCP_USER_TIMEOUT socket option.") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-11-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 6f3a494b965ae..b3a5ff311567b 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3406,7 +3406,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt); + void tcp_sock_set_user_timeout(struct sock *sk, u32 val) + { + lock_sock(sk); +- inet_csk(sk)->icsk_user_timeout = val; ++ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); + release_sock(sk); + } + EXPORT_SYMBOL(tcp_sock_set_user_timeout); +@@ -3726,7 +3726,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + if (val < 0) + err = -EINVAL; + else +- icsk->icsk_user_timeout = val; ++ WRITE_ONCE(icsk->icsk_user_timeout, val); + break; + + case TCP_FASTOPEN: +@@ -4243,7 +4243,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_USER_TIMEOUT: +- val = icsk->icsk_user_timeout; ++ val = READ_ONCE(icsk->icsk_user_timeout); + break; + + case TCP_FASTOPEN: +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch b/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch new file mode 100644 index 00000000000..11e7afc0472 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch @@ -0,0 +1,53 @@ +From 7cb1fa4e8fc2528b3c95ebf4367b85eaf269c0e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:54 +0000 +Subject: tcp: annotate data-races around rskq_defer_accept + +From: Eric Dumazet + +[ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ] + +do_tcp_getsockopt() reads rskq_defer_accept while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 363535b6ece83..bc3ad48f92389 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3700,9 +3700,9 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + + case TCP_DEFER_ACCEPT: + /* Translate value in seconds to number of retransmits */ +- icsk->icsk_accept_queue.rskq_defer_accept = +- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, +- TCP_RTO_MAX / HZ); ++ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, ++ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, ++ TCP_RTO_MAX / HZ)); + break; + + case TCP_WINDOW_CLAMP: +@@ -4104,8 +4104,9 @@ int do_tcp_getsockopt(struct sock *sk, int level, + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; + break; + case TCP_DEFER_ACCEPT: +- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, +- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); ++ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); ++ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, ++ TCP_RTO_MAX / HZ); + break; + case TCP_WINDOW_CLAMP: + val = tp->window_clamp; +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch new file mode 100644 index 00000000000..ec6abdae945 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch @@ -0,0 +1,184 @@ +From 2a19bb80f620e9115ee081f89944c9fc3882cceb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 14:44:45 +0000 +Subject: tcp: annotate data-races around tcp_rsk(req)->ts_recent + +From: Eric Dumazet + +[ Upstream commit eba20811f32652bc1a52d5e7cc403859b86390d9 ] + +TCP request sockets are lockless, tcp_rsk(req)->ts_recent +can change while being read by another cpu as syzbot noticed. + +This is harmless, but we should annotate the known races. + +Note that tcp_check_req() changes req->ts_recent a bit early, +we might change this in the future. + +BUG: KCSAN: data-race in tcp_check_req / tcp_check_req + +write to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 1: +tcp_check_req+0x694/0xc70 net/ipv4/tcp_minisocks.c:762 +tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071 +ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205 +ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233 +NF_HOOK include/linux/netfilter.h:303 [inline] +ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254 +dst_input include/net/dst.h:468 [inline] +ip_rcv_finish net/ipv4/ip_input.c:449 [inline] +NF_HOOK include/linux/netfilter.h:303 [inline] +ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569 +__netif_receive_skb_one_core net/core/dev.c:5493 [inline] +__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607 +process_backlog+0x21f/0x380 net/core/dev.c:5935 +__napi_poll+0x60/0x3b0 net/core/dev.c:6498 +napi_poll net/core/dev.c:6565 [inline] +net_rx_action+0x32b/0x750 net/core/dev.c:6698 +__do_softirq+0xc1/0x265 kernel/softirq.c:571 +do_softirq+0x7e/0xb0 kernel/softirq.c:472 +__local_bh_enable_ip+0x64/0x70 kernel/softirq.c:396 +local_bh_enable+0x1f/0x20 include/linux/bottom_half.h:33 +rcu_read_unlock_bh include/linux/rcupdate.h:843 [inline] +__dev_queue_xmit+0xabb/0x1d10 net/core/dev.c:4271 +dev_queue_xmit include/linux/netdevice.h:3088 [inline] +neigh_hh_output include/net/neighbour.h:528 [inline] +neigh_output include/net/neighbour.h:542 [inline] +ip_finish_output2+0x700/0x840 net/ipv4/ip_output.c:229 +ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:317 +NF_HOOK_COND include/linux/netfilter.h:292 [inline] +ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:431 +dst_output include/net/dst.h:458 [inline] +ip_local_out net/ipv4/ip_output.c:126 [inline] +__ip_queue_xmit+0xa4d/0xa70 net/ipv4/ip_output.c:533 +ip_queue_xmit+0x38/0x40 net/ipv4/ip_output.c:547 +__tcp_transmit_skb+0x1194/0x16e0 net/ipv4/tcp_output.c:1399 +tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline] +tcp_write_xmit+0x13ff/0x2fd0 net/ipv4/tcp_output.c:2693 +__tcp_push_pending_frames+0x6a/0x1a0 net/ipv4/tcp_output.c:2877 +tcp_push_pending_frames include/net/tcp.h:1952 [inline] +__tcp_sock_set_cork net/ipv4/tcp.c:3336 [inline] +tcp_sock_set_cork+0xe8/0x100 net/ipv4/tcp.c:3343 +rds_tcp_xmit_path_complete+0x3b/0x40 net/rds/tcp_send.c:52 +rds_send_xmit+0xf8d/0x1420 net/rds/send.c:422 +rds_send_worker+0x42/0x1d0 net/rds/threads.c:200 +process_one_work+0x3e6/0x750 kernel/workqueue.c:2408 +worker_thread+0x5f2/0xa10 kernel/workqueue.c:2555 +kthread+0x1d7/0x210 kernel/kthread.c:379 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 + +read to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 0: +tcp_check_req+0x32a/0xc70 net/ipv4/tcp_minisocks.c:622 +tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071 +ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205 +ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233 +NF_HOOK include/linux/netfilter.h:303 [inline] +ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254 +dst_input include/net/dst.h:468 [inline] +ip_rcv_finish net/ipv4/ip_input.c:449 [inline] +NF_HOOK include/linux/netfilter.h:303 [inline] +ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569 +__netif_receive_skb_one_core net/core/dev.c:5493 [inline] +__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607 +process_backlog+0x21f/0x380 net/core/dev.c:5935 +__napi_poll+0x60/0x3b0 net/core/dev.c:6498 +napi_poll net/core/dev.c:6565 [inline] +net_rx_action+0x32b/0x750 net/core/dev.c:6698 +__do_softirq+0xc1/0x265 kernel/softirq.c:571 +run_ksoftirqd+0x17/0x20 kernel/softirq.c:939 +smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164 +kthread+0x1d7/0x210 kernel/kthread.c:379 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 + +value changed: 0x1cd237f1 -> 0x1cd237f2 + +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230717144445.653164-3-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 2 +- + net/ipv4/tcp_minisocks.c | 9 ++++++--- + net/ipv4/tcp_output.c | 2 +- + net/ipv6/tcp_ipv6.c | 2 +- + 4 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index e5df50b3e23a0..d49a66b271d52 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -988,7 +988,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, +- req->ts_recent, ++ READ_ONCE(req->ts_recent), + 0, + tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index f281eab7fd125..42844d20da020 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, + newtp->max_window = newtp->snd_wnd; + + if (newtp->rx_opt.tstamp_ok) { +- newtp->rx_opt.ts_recent = req->ts_recent; ++ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); + newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); + newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; + } else { +@@ -601,7 +601,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); + + if (tmp_opt.saw_tstamp) { +- tmp_opt.ts_recent = req->ts_recent; ++ tmp_opt.ts_recent = READ_ONCE(req->ts_recent); + if (tmp_opt.rcv_tsecr) + tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; + /* We do not store true stamp, but it is not required, +@@ -740,8 +740,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + + /* In sequence, PAWS is OK. */ + ++ /* TODO: We probably should defer ts_recent change once ++ * we take ownership of @req. ++ */ + if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) +- req->ts_recent = tmp_opt.rcv_tsval; ++ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); + + if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { + /* Truncate SYN, it is out of window starting +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 693a29d3f43bd..26bd039f9296f 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -876,7 +876,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, + if (likely(ireq->tstamp_ok)) { + opts->options |= OPTION_TS; + opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; +- opts->tsecr = req->ts_recent; ++ opts->tsecr = READ_ONCE(req->ts_recent); + remaining -= TCPOLEN_TSTAMP_ALIGNED; + } + if (likely(ireq->sack_ok)) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 0dcb06a1fe044..d9253aa764fae 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1130,7 +1130,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, +- req->ts_recent, sk->sk_bound_dev_if, ++ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), + ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, + READ_ONCE(tcp_rsk(req)->txhash)); +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch new file mode 100644 index 00000000000..7cee347686d --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch @@ -0,0 +1,170 @@ +From d29e41820d443947afb2314e6e9891e047903726 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jul 2023 14:44:44 +0000 +Subject: tcp: annotate data-races around tcp_rsk(req)->txhash + +From: Eric Dumazet + +[ Upstream commit 5e5265522a9a7f91d1b0bd411d634bdaf16c80cd ] + +TCP request sockets are lockless, some of their fields +can change while being read by another cpu as syzbot noticed. + +This is usually harmless, but we should annotate the known +races. + +This patch takes care of tcp_rsk(req)->txhash, +a separate one is needed for tcp_rsk(req)->ts_recent. + +BUG: KCSAN: data-race in tcp_make_synack / tcp_rtx_synack + +write to 0xffff8881362304bc of 4 bytes by task 32083 on cpu 1: +tcp_rtx_synack+0x9d/0x2a0 net/ipv4/tcp_output.c:4213 +inet_rtx_syn_ack+0x38/0x80 net/ipv4/inet_connection_sock.c:880 +tcp_check_req+0x379/0xc70 net/ipv4/tcp_minisocks.c:665 +tcp_v6_rcv+0x125b/0x1b20 net/ipv6/tcp_ipv6.c:1673 +ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437 +ip6_input_finish net/ipv6/ip6_input.c:482 [inline] +NF_HOOK include/linux/netfilter.h:303 [inline] +ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491 +dst_input include/net/dst.h:468 [inline] +ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79 +NF_HOOK include/linux/netfilter.h:303 [inline] +ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309 +__netif_receive_skb_one_core net/core/dev.c:5452 [inline] +__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566 +netif_receive_skb_internal net/core/dev.c:5652 [inline] +netif_receive_skb+0x4a/0x310 net/core/dev.c:5711 +tun_rx_batched+0x3bf/0x400 +tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997 +tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043 +call_write_iter include/linux/fs.h:1871 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x4ab/0x7d0 fs/read_write.c:584 +ksys_write+0xeb/0x1a0 fs/read_write.c:637 +__do_sys_write fs/read_write.c:649 [inline] +__se_sys_write fs/read_write.c:646 [inline] +__x64_sys_write+0x42/0x50 fs/read_write.c:646 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff8881362304bc of 4 bytes by task 32078 on cpu 0: +tcp_make_synack+0x367/0xb40 net/ipv4/tcp_output.c:3663 +tcp_v6_send_synack+0x72/0x420 net/ipv6/tcp_ipv6.c:544 +tcp_conn_request+0x11a8/0x1560 net/ipv4/tcp_input.c:7059 +tcp_v6_conn_request+0x13f/0x180 net/ipv6/tcp_ipv6.c:1175 +tcp_rcv_state_process+0x156/0x1de0 net/ipv4/tcp_input.c:6494 +tcp_v6_do_rcv+0x98a/0xb70 net/ipv6/tcp_ipv6.c:1509 +tcp_v6_rcv+0x17b8/0x1b20 net/ipv6/tcp_ipv6.c:1735 +ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437 +ip6_input_finish net/ipv6/ip6_input.c:482 [inline] +NF_HOOK include/linux/netfilter.h:303 [inline] +ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491 +dst_input include/net/dst.h:468 [inline] +ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79 +NF_HOOK include/linux/netfilter.h:303 [inline] +ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309 +__netif_receive_skb_one_core net/core/dev.c:5452 [inline] +__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566 +netif_receive_skb_internal net/core/dev.c:5652 [inline] +netif_receive_skb+0x4a/0x310 net/core/dev.c:5711 +tun_rx_batched+0x3bf/0x400 +tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997 +tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043 +call_write_iter include/linux/fs.h:1871 [inline] +new_sync_write fs/read_write.c:491 [inline] +vfs_write+0x4ab/0x7d0 fs/read_write.c:584 +ksys_write+0xeb/0x1a0 fs/read_write.c:637 +__do_sys_write fs/read_write.c:649 [inline] +__se_sys_write fs/read_write.c:646 [inline] +__x64_sys_write+0x42/0x50 fs/read_write.c:646 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x91d25731 -> 0xe79325cd + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 32078 Comm: syz-executor.4 Not tainted 6.5.0-rc1-syzkaller-00033-geb26cbb1a754 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023 + +Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230717144445.653164-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 3 ++- + net/ipv4/tcp_minisocks.c | 2 +- + net/ipv4/tcp_output.c | 4 ++-- + net/ipv6/tcp_ipv6.c | 2 +- + 4 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index ef740983a1222..e5df50b3e23a0 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -992,7 +992,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + 0, + tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, +- ip_hdr(skb)->tos, tcp_rsk(req)->txhash); ++ ip_hdr(skb)->tos, ++ READ_ONCE(tcp_rsk(req)->txhash)); + } + + /* +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index 7f37e7da64671..f281eab7fd125 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; + + newtp->lsndtime = tcp_jiffies32; +- newsk->sk_txhash = treq->txhash; ++ newsk->sk_txhash = READ_ONCE(treq->txhash); + newtp->total_retrans = req->num_retrans; + + tcp_init_xmit_timers(newsk); +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 925594dbeb929..693a29d3f43bd 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -3581,7 +3581,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, + rcu_read_lock(); + md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); + #endif +- skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); ++ skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4); + /* bpf program will be interested in the tcp_flags */ + TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK; + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, +@@ -4124,7 +4124,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) + + /* Paired with WRITE_ONCE() in sock_setsockopt() */ + if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED) +- tcp_rsk(req)->txhash = net_tx_rndhash(); ++ WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash()); + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, + NULL); + if (!res) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 8d61efeab9c99..0dcb06a1fe044 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1133,7 +1133,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + req->ts_recent, sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), + ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, +- tcp_rsk(req)->txhash); ++ READ_ONCE(tcp_rsk(req)->txhash)); + } + + +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch new file mode 100644 index 00000000000..5dfc88a4ed2 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch @@ -0,0 +1,68 @@ +From 078902bb3940caf45e1f58470e88e8184a16486d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:50 +0000 +Subject: tcp: annotate data-races around tp->keepalive_intvl + +From: Eric Dumazet + +[ Upstream commit 5ecf9d4f52ff2f1d4d44c9b68bc75688e82f13b4 ] + +do_tcp_getsockopt() reads tp->keepalive_intvl while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-5-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 9 +++++++-- + net/ipv4/tcp.c | 4 ++-- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 397c248102415..f39c44cbdfe62 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1511,9 +1511,14 @@ void tcp_leave_memory_pressure(struct sock *sk); + static inline int keepalive_intvl_when(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); ++ int val; ++ ++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() ++ * and do_tcp_setsockopt(). ++ */ ++ val = READ_ONCE(tp->keepalive_intvl); + +- return tp->keepalive_intvl ? : +- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); ++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); + } + + static inline int keepalive_time_when(const struct tcp_sock *tp) +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index c0d7b226bca1a..d19cfeb78392d 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3451,7 +3451,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val) + return -EINVAL; + + lock_sock(sk); +- tcp_sk(sk)->keepalive_intvl = val * HZ; ++ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); + release_sock(sk); + return 0; + } +@@ -3665,7 +3665,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + if (val < 1 || val > MAX_TCP_KEEPINTVL) + err = -EINVAL; + else +- tp->keepalive_intvl = val * HZ; ++ WRITE_ONCE(tp->keepalive_intvl, val * HZ); + break; + case TCP_KEEPCNT: + if (val < 1 || val > MAX_TCP_KEEPCNT) +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch new file mode 100644 index 00000000000..8df99735c91 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch @@ -0,0 +1,69 @@ +From 8b50db4f550c9b4fa395cb961dd7c9ab6b4ac010 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:51 +0000 +Subject: tcp: annotate data-races around tp->keepalive_probes + +From: Eric Dumazet + +[ Upstream commit 6e5e1de616bf5f3df1769abc9292191dfad9110a ] + +do_tcp_getsockopt() reads tp->keepalive_probes while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-6-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 9 +++++++-- + net/ipv4/tcp.c | 5 +++-- + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index f39c44cbdfe62..9733d8e4f10af 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1535,9 +1535,14 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) + static inline int keepalive_probes(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); ++ int val; ++ ++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() ++ * and do_tcp_setsockopt(). ++ */ ++ val = READ_ONCE(tp->keepalive_probes); + +- return tp->keepalive_probes ? : +- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); ++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); + } + + static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index d19cfeb78392d..7d75928ea0f9c 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3463,7 +3463,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) + return -EINVAL; + + lock_sock(sk); +- tcp_sk(sk)->keepalive_probes = val; ++ /* Paired with READ_ONCE() in keepalive_probes() */ ++ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); + release_sock(sk); + return 0; + } +@@ -3671,7 +3672,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + if (val < 1 || val > MAX_TCP_KEEPCNT) + err = -EINVAL; + else +- tp->keepalive_probes = val; ++ WRITE_ONCE(tp->keepalive_probes, val); + break; + case TCP_SYNCNT: + if (val < 1 || val > MAX_TCP_SYNCNT) +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch new file mode 100644 index 00000000000..5c5aa55e06b --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch @@ -0,0 +1,58 @@ +From 9121aedbe1355d93c6f3ab514d0878a9099021f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:49 +0000 +Subject: tcp: annotate data-races around tp->keepalive_time + +From: Eric Dumazet + +[ Upstream commit 4164245c76ff906c9086758e1c3f87082a7f5ef5 ] + +do_tcp_getsockopt() reads tp->keepalive_time while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-4-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 7 +++++-- + net/ipv4/tcp.c | 3 ++- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 5eedd476a38d7..397c248102415 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1519,9 +1519,12 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) + static inline int keepalive_time_when(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); ++ int val; + +- return tp->keepalive_time ? : +- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); ++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ ++ val = READ_ONCE(tp->keepalive_time); ++ ++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + } + + static inline int keepalive_probes(const struct tcp_sock *tp) +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 3edf7a1c5cbd2..c0d7b226bca1a 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3418,7 +3418,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) + if (val < 1 || val > MAX_TCP_KEEPIDLE) + return -EINVAL; + +- tp->keepalive_time = val * HZ; ++ /* Paired with WRITE_ONCE() in keepalive_time_when() */ ++ WRITE_ONCE(tp->keepalive_time, val * HZ); + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + u32 elapsed = keepalive_time_elapsed(tp); +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch b/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch new file mode 100644 index 00000000000..4c9751d2f34 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch @@ -0,0 +1,52 @@ +From 3d98c816d1920605a924d0ead6bf2be144e81749 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:53 +0000 +Subject: tcp: annotate data-races around tp->linger2 + +From: Eric Dumazet + +[ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ] + +do_tcp_getsockopt() reads tp->linger2 while another cpu +might change its value. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index ffa9717293358..363535b6ece83 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3691,11 +3691,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + + case TCP_LINGER2: + if (val < 0) +- tp->linger2 = -1; ++ WRITE_ONCE(tp->linger2, -1); + else if (val > TCP_FIN_TIMEOUT_MAX / HZ) +- tp->linger2 = TCP_FIN_TIMEOUT_MAX; ++ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); + else +- tp->linger2 = val * HZ; ++ WRITE_ONCE(tp->linger2, val * HZ); + break; + + case TCP_DEFER_ACCEPT: +@@ -4099,7 +4099,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); + break; + case TCP_LINGER2: +- val = tp->linger2; ++ val = READ_ONCE(tp->linger2); + if (val >= 0) + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; + break; +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch b/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch new file mode 100644 index 00000000000..76a913e6334 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch @@ -0,0 +1,64 @@ +From e13aeaa389758176f64c75eeb7dd1bf6ebee1871 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:55 +0000 +Subject: tcp: annotate data-races around tp->notsent_lowat + +From: Eric Dumazet + +[ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ] + +tp->notsent_lowat can be read locklessly from do_tcp_getsockopt() +and tcp_poll(). + +Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 6 +++++- + net/ipv4/tcp.c | 4 ++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 9733d8e4f10af..e9c8f88f47696 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -2059,7 +2059,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); + static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); +- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); ++ u32 val; ++ ++ val = READ_ONCE(tp->notsent_lowat); ++ ++ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + } + + bool tcp_stream_memory_free(const struct sock *sk, int wake); +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index bc3ad48f92389..6f3a494b965ae 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3770,7 +3770,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + err = tcp_repair_set_window(tp, optval, optlen); + break; + case TCP_NOTSENT_LOWAT: +- tp->notsent_lowat = val; ++ WRITE_ONCE(tp->notsent_lowat, val); + sk->sk_write_space(sk); + break; + case TCP_INQ: +@@ -4266,7 +4266,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); + break; + case TCP_NOTSENT_LOWAT: +- val = tp->notsent_lowat; ++ val = READ_ONCE(tp->notsent_lowat); + break; + case TCP_INQ: + val = tp->recvmsg_inq; +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch b/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch new file mode 100644 index 00000000000..89755e23176 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch @@ -0,0 +1,46 @@ +From acc05127977764c50f101313e03fed5dd0b7728e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:47 +0000 +Subject: tcp: annotate data-races around tp->tcp_tx_delay + +From: Eric Dumazet + +[ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ] + +do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu +might change its value. + +Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 0bd0be3c63d22..5e4bc80dc0ae5 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3780,7 +3780,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + case TCP_TX_DELAY: + if (val) + tcp_enable_tx_delay(); +- tp->tcp_tx_delay = val; ++ WRITE_ONCE(tp->tcp_tx_delay, val); + break; + default: + err = -ENOPROTOOPT; +@@ -4256,7 +4256,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_TX_DELAY: +- val = tp->tcp_tx_delay; ++ val = READ_ONCE(tp->tcp_tx_delay); + break; + + case TCP_TIMESTAMP: +-- +2.39.2 + diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch b/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch new file mode 100644 index 00000000000..b1de5b67a70 --- /dev/null +++ b/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch @@ -0,0 +1,63 @@ +From 5cb5df7c5c218e8bc062747711555eb97a17ceb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jul 2023 21:28:48 +0000 +Subject: tcp: annotate data-races around tp->tsoffset + +From: Eric Dumazet + +[ Upstream commit dd23c9f1e8d5c1d2e3d29393412385ccb9c7a948 ] + +do_tcp_getsockopt() reads tp->tsoffset while another cpu +might change its value. + +Fixes: 93be6ce0e91b ("tcp: set and get per-socket timestamp") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230719212857.3943972-3-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 4 ++-- + net/ipv4/tcp_ipv4.c | 5 +++-- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 5e4bc80dc0ae5..3edf7a1c5cbd2 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3762,7 +3762,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, + if (!tp->repair) + err = -EPERM; + else +- tp->tsoffset = val - tcp_time_stamp_raw(); ++ WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw()); + break; + case TCP_REPAIR_WINDOW: + err = tcp_repair_set_window(tp, optval, optlen); +@@ -4260,7 +4260,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, + break; + + case TCP_TIMESTAMP: +- val = tcp_time_stamp_raw() + tp->tsoffset; ++ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); + break; + case TCP_NOTSENT_LOWAT: + val = tp->notsent_lowat; +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index d49a66b271d52..9a8d59e9303a0 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -307,8 +307,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + inet->inet_daddr, + inet->inet_sport, + usin->sin_port)); +- tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr, +- inet->inet_daddr); ++ WRITE_ONCE(tp->tsoffset, ++ secure_tcp_ts_off(net, inet->inet_saddr, ++ inet->inet_daddr)); + } + + inet->inet_id = get_random_u16(); +-- +2.39.2 + diff --git a/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch b/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch new file mode 100644 index 00000000000..c51ebdbd8e4 --- /dev/null +++ b/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch @@ -0,0 +1,41 @@ +From 3af33ea1ad72a1fc6ed5074f0ce9e16cc52c818e Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 21 Jun 2023 11:32:35 +0200 +Subject: [PATCH AUTOSEL 4.19 07/11] udf: Fix uninitialized array access for + some pathnames +X-stable: review +X-Patchwork-Hint: Ignore +X-stable-base: Linux 4.19.288 + +[ Upstream commit 028f6055c912588e6f72722d89c30b401bbcf013 ] + +For filenames that begin with . and are between 2 and 5 characters long, +UDF charset conversion code would read uninitialized memory in the +output buffer. The only practical impact is that the name may be prepended a +"unification hash" when it is not actually needed but still it is good +to fix this. + +Reported-by: syzbot+cd311b1e43cc25f90d18@syzkaller.appspotmail.com +Link: https://lore.kernel.org/all/000000000000e2638a05fe9dc8f9@google.com +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/udf/unicode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c +index 5fcfa96463ebb..85521d6b02370 100644 +--- a/fs/udf/unicode.c ++++ b/fs/udf/unicode.c +@@ -247,7 +247,7 @@ static int udf_name_from_CS0(struct super_block *sb, + } + + if (translate) { +- if (str_o_len <= 2 && str_o[0] == '.' && ++ if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' && + (str_o_len == 1 || str_o[1] == '.')) + needsCRC = 1; + if (needsCRC) { +-- +2.39.2 + diff --git a/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch b/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch new file mode 100644 index 00000000000..0a2b80985d3 --- /dev/null +++ b/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch @@ -0,0 +1,137 @@ +From 1c0a043a5b5d55b841bdb8e72a4e7dbded64e33b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 May 2023 12:41:06 +0300 +Subject: wifi: ath11k: add support default regdb while searching board-2.bin + for WCN6855 + +From: Wen Gong + +[ Upstream commit 88ca89202f8e8afb5225eb5244d79cd67c15d744 ] + +Sometimes board-2.bin does not have the regdb data which matched the +parameters such as vendor, device, subsystem-vendor, subsystem-device +and etc. Add default regdb data with 'bus=%s' into board-2.bin for +WCN6855, then ath11k use 'bus=pci' to search regdb data in board-2.bin +for WCN6855. + +kernel: [ 122.515808] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262' +kernel: [ 122.517240] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564 +kernel: [ 122.517280] ath11k_pci 0000:03:00.0: failed to fetch regdb data for bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262 from ath11k/WCN6855/hw2.0/board-2.bin +kernel: [ 122.517464] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci' +kernel: [ 122.518901] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564 +kernel: [ 122.518915] ath11k_pci 0000:03:00.0: board name +kernel: [ 122.518917] ath11k_pci 0000:03:00.0: 00000000: 62 75 73 3d 70 63 69 bus=pci +kernel: [ 122.518918] ath11k_pci 0000:03:00.0: boot found match regdb data for name 'bus=pci' +kernel: [ 122.518920] ath11k_pci 0000:03:00.0: boot found regdb data for 'bus=pci' +kernel: [ 122.518921] ath11k_pci 0000:03:00.0: fetched regdb + +Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 + +Signed-off-by: Wen Gong +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20230517133959.8224-1-quic_wgong@quicinc.com +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/ath/ath11k/core.c | 53 +++++++++++++++++++------- + 1 file changed, 40 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c +index b99180bc81723..893fefadbba96 100644 +--- a/drivers/net/wireless/ath/ath11k/core.c ++++ b/drivers/net/wireless/ath/ath11k/core.c +@@ -870,7 +870,8 @@ int ath11k_core_check_dt(struct ath11k_base *ab) + } + + static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name, +- size_t name_len, bool with_variant) ++ size_t name_len, bool with_variant, ++ bool bus_type_mode) + { + /* strlen(',variant=') + strlen(ab->qmi.target.bdf_ext) */ + char variant[9 + ATH11K_QMI_BDF_EXT_STR_LENGTH] = { 0 }; +@@ -881,15 +882,20 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name, + + switch (ab->id.bdf_search) { + case ATH11K_BDF_SEARCH_BUS_AND_BOARD: +- scnprintf(name, name_len, +- "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s", +- ath11k_bus_str(ab->hif.bus), +- ab->id.vendor, ab->id.device, +- ab->id.subsystem_vendor, +- ab->id.subsystem_device, +- ab->qmi.target.chip_id, +- ab->qmi.target.board_id, +- variant); ++ if (bus_type_mode) ++ scnprintf(name, name_len, ++ "bus=%s", ++ ath11k_bus_str(ab->hif.bus)); ++ else ++ scnprintf(name, name_len, ++ "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s", ++ ath11k_bus_str(ab->hif.bus), ++ ab->id.vendor, ab->id.device, ++ ab->id.subsystem_vendor, ++ ab->id.subsystem_device, ++ ab->qmi.target.chip_id, ++ ab->qmi.target.board_id, ++ variant); + break; + default: + scnprintf(name, name_len, +@@ -908,13 +914,19 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name, + static int ath11k_core_create_board_name(struct ath11k_base *ab, char *name, + size_t name_len) + { +- return __ath11k_core_create_board_name(ab, name, name_len, true); ++ return __ath11k_core_create_board_name(ab, name, name_len, true, false); + } + + static int ath11k_core_create_fallback_board_name(struct ath11k_base *ab, char *name, + size_t name_len) + { +- return __ath11k_core_create_board_name(ab, name, name_len, false); ++ return __ath11k_core_create_board_name(ab, name, name_len, false, false); ++} ++ ++static int ath11k_core_create_bus_type_board_name(struct ath11k_base *ab, char *name, ++ size_t name_len) ++{ ++ return __ath11k_core_create_board_name(ab, name, name_len, false, true); + } + + const struct firmware *ath11k_core_firmware_request(struct ath11k_base *ab, +@@ -1218,7 +1230,7 @@ int ath11k_core_fetch_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd) + + int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd) + { +- char boardname[BOARD_NAME_SIZE]; ++ char boardname[BOARD_NAME_SIZE], default_boardname[BOARD_NAME_SIZE]; + int ret; + + ret = ath11k_core_create_board_name(ab, boardname, BOARD_NAME_SIZE); +@@ -1235,6 +1247,21 @@ int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd + if (!ret) + goto exit; + ++ ret = ath11k_core_create_bus_type_board_name(ab, default_boardname, ++ BOARD_NAME_SIZE); ++ if (ret) { ++ ath11k_dbg(ab, ATH11K_DBG_BOOT, ++ "failed to create default board name for regdb: %d", ret); ++ goto exit; ++ } ++ ++ ret = ath11k_core_fetch_board_data_api_n(ab, bd, default_boardname, ++ ATH11K_BD_IE_REGDB, ++ ATH11K_BD_IE_REGDB_NAME, ++ ATH11K_BD_IE_REGDB_DATA); ++ if (!ret) ++ goto exit; ++ + ret = ath11k_core_fetch_board_data_api_1(ab, bd, ATH11K_REGDB_FILE_NAME); + if (ret) + ath11k_dbg(ab, ATH11K_DBG_BOOT, "failed to fetch %s from %s\n", +-- +2.39.2 + diff --git a/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch b/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch new file mode 100644 index 00000000000..94851f54743 --- /dev/null +++ b/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch @@ -0,0 +1,63 @@ +From d4bcf71d3c456ca0656ec111454eda83581a3d2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 14:41:28 +0530 +Subject: wifi: ath11k: fix memory leak in WMI firmware stats + +From: P Praneesh + +[ Upstream commit 6aafa1c2d3e3fea2ebe84c018003f2a91722e607 ] + +Memory allocated for firmware pdev, vdev and beacon statistics +are not released during rmmod. + +Fix it by calling ath11k_fw_stats_free() function before hardware +unregister. + +While at it, avoid calling ath11k_fw_stats_free() while processing +the firmware stats received in the WMI event because the local list +is getting spliced and reinitialised and hence there are no elements +in the list after splicing. + +Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 + +Signed-off-by: P Praneesh +Signed-off-by: Aditya Kumar Singh +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20230606091128.14202-1-quic_adisi@quicinc.com +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/ath/ath11k/mac.c | 1 + + drivers/net/wireless/ath/ath11k/wmi.c | 5 +++++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c +index b19d44b3f5dfb..cb77dd6ce9665 100644 +--- a/drivers/net/wireless/ath/ath11k/mac.c ++++ b/drivers/net/wireless/ath/ath11k/mac.c +@@ -9279,6 +9279,7 @@ void ath11k_mac_destroy(struct ath11k_base *ab) + if (!ar) + continue; + ++ ath11k_fw_stats_free(&ar->fw_stats); + ieee80211_free_hw(ar->hw); + pdev->ar = NULL; + } +diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c +index fad9f8d308a20..3e0a47f4a3ebd 100644 +--- a/drivers/net/wireless/ath/ath11k/wmi.c ++++ b/drivers/net/wireless/ath/ath11k/wmi.c +@@ -7590,6 +7590,11 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk + rcu_read_unlock(); + spin_unlock_bh(&ar->data_lock); + ++ /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised ++ * at this point, no need to free the individual list. ++ */ ++ return; ++ + free: + ath11k_fw_stats_free(&stats); + } +-- +2.39.2 + diff --git a/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch b/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch new file mode 100644 index 00000000000..38a06246e6d --- /dev/null +++ b/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch @@ -0,0 +1,71 @@ +From 885bcbfa0c9659fa068668223c2f45c63640b4c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 21 Apr 2023 16:54:45 +0200 +Subject: wifi: ath11k: fix registration of 6Ghz-only phy without the full + channel range + +From: Maxime Bizon + +[ Upstream commit e2ceb1de2f83aafd8003f0b72dfd4b7441e97d14 ] + +Because of what seems to be a typo, a 6Ghz-only phy for which the BDF +does not allow the 7115Mhz channel will fail to register: + + WARNING: CPU: 2 PID: 106 at net/wireless/core.c:907 wiphy_register+0x914/0x954 + Modules linked in: ath11k_pci sbsa_gwdt + CPU: 2 PID: 106 Comm: kworker/u8:5 Not tainted 6.3.0-rc7-next-20230418-00549-g1e096a17625a-dirty #9 + Hardware name: Freebox V7R Board (DT) + Workqueue: ath11k_qmi_driver_event ath11k_qmi_driver_event_work + pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : wiphy_register+0x914/0x954 + lr : ieee80211_register_hw+0x67c/0xc10 + sp : ffffff800b123aa0 + x29: ffffff800b123aa0 x28: 0000000000000000 x27: 0000000000000000 + x26: 0000000000000000 x25: 0000000000000006 x24: ffffffc008d51418 + x23: ffffffc008cb0838 x22: ffffff80176c2460 x21: 0000000000000168 + x20: ffffff80176c0000 x19: ffffff80176c03e0 x18: 0000000000000014 + x17: 00000000cbef338c x16: 00000000d2a26f21 x15: 00000000ad6bb85f + x14: 0000000000000020 x13: 0000000000000020 x12: 00000000ffffffbd + x11: 0000000000000208 x10: 00000000fffffdf7 x9 : ffffffc009394718 + x8 : ffffff80176c0528 x7 : 000000007fffffff x6 : 0000000000000006 + x5 : 0000000000000005 x4 : ffffff800b304284 x3 : ffffff800b304284 + x2 : ffffff800b304d98 x1 : 0000000000000000 x0 : 0000000000000000 + Call trace: + wiphy_register+0x914/0x954 + ieee80211_register_hw+0x67c/0xc10 + ath11k_mac_register+0x7c4/0xe10 + ath11k_core_qmi_firmware_ready+0x1f4/0x570 + ath11k_qmi_driver_event_work+0x198/0x590 + process_one_work+0x1b8/0x328 + worker_thread+0x6c/0x414 + kthread+0x100/0x104 + ret_from_fork+0x10/0x20 + ---[ end trace 0000000000000000 ]--- + ath11k_pci 0002:01:00.0: ieee80211 registration failed: -22 + ath11k_pci 0002:01:00.0: failed register the radio with mac80211: -22 + ath11k_pci 0002:01:00.0: failed to create pdev core: -22 + +Signed-off-by: Maxime Bizon +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20230421145445.2612280-1-mbizon@freebox.fr +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/ath/ath11k/mac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c +index ef7617802491e..b19d44b3f5dfb 100644 +--- a/drivers/net/wireless/ath/ath11k/mac.c ++++ b/drivers/net/wireless/ath/ath11k/mac.c +@@ -8715,7 +8715,7 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar, + } + + if (supported_bands & WMI_HOST_WLAN_5G_CAP) { +- if (reg_cap->high_5ghz_chan >= ATH11K_MAX_6G_FREQ) { ++ if (reg_cap->high_5ghz_chan >= ATH11K_MIN_6G_FREQ) { + channels = kmemdup(ath11k_6ghz_channels, + sizeof(ath11k_6ghz_channels), GFP_KERNEL); + if (!channels) { +-- +2.39.2 + diff --git a/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch b/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch new file mode 100644 index 00000000000..f23938ad5d1 --- /dev/null +++ b/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch @@ -0,0 +1,43 @@ +From 1a37162f09f199864048ac62ae05cc6310aef58f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 13:03:59 +0300 +Subject: wifi: iwlwifi: Add support for new PCI Id + +From: Mukesh Sisodiya + +[ Upstream commit 35bd6f1d043d089fcb60450e1287cc65f0095787 ] + +Add support for the PCI Id 51F1 without IMR support. + +Signed-off-by: Mukesh Sisodiya +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230620125813.9800e652e789.Ic06a085832ac3f988c8ef07d856c8e281563295d@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +index f6872b2a0d9d0..d5bd869086458 100644 +--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +@@ -495,6 +495,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { + {IWL_PCI_DEVICE(0x7AF0, PCI_ANY_ID, iwl_so_trans_cfg)}, + {IWL_PCI_DEVICE(0x51F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)}, + {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_imr_trans_cfg)}, ++ {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)}, + {IWL_PCI_DEVICE(0x54F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)}, + {IWL_PCI_DEVICE(0x7F70, PCI_ANY_ID, iwl_so_trans_cfg)}, + +@@ -543,6 +544,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { + IWL_DEV_INFO(0x51F0, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_160_name), + IWL_DEV_INFO(0x51F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x51F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), ++ IWL_DEV_INFO(0x51F1, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x54F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x54F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7A70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), +-- +2.39.2 + diff --git a/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch b/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch new file mode 100644 index 00000000000..bbc97894d10 --- /dev/null +++ b/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch @@ -0,0 +1,47 @@ +From dd01d6d149a5c58b8f2f7d9e9211ce28c8befd64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 13:04:02 +0300 +Subject: wifi: iwlwifi: mvm: avoid baid size integer overflow + +From: Johannes Berg + +[ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ] + +Roee reported various hard-to-debug crashes with pings in +EHT aggregation scenarios. Enabling KASAN showed that we +access the BAID allocation out of bounds, and looking at +the code a bit shows that since the reorder buffer entry +(struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug +such as lockdep is enabled, then staring from an agg size +512 we overflow the size calculation, and allocate a much +smaller structure than we should, causing slab corruption +once we initialize this. + +Fix this by simply using u32 instead of u16. + +Reported-by: Roee Goldfiner +Signed-off-by: Johannes Berg +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +index 013aca70c3d3b..6b52afcf02721 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +@@ -2738,7 +2738,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, + } + + if (iwl_mvm_has_new_rx_api(mvm) && start) { +- u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); ++ u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); + + /* sparse doesn't like the __align() so don't check */ + #ifndef __CHECKER__ +-- +2.39.2 + diff --git a/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch b/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch new file mode 100644 index 00000000000..5b4e16636a3 --- /dev/null +++ b/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch @@ -0,0 +1,38 @@ +From 80c181a4bc2b86eb00ab6e09dcbcdda26aa6fc13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Jun 2023 13:12:20 +0300 +Subject: wifi: iwlwifi: pcie: add device id 51F1 for killer 1675 + +From: Yi Kuo + +[ Upstream commit f4daceae4087bbb3e9a56044b44601d520d009d2 ] + +Intel Killer AX1675i/s with device id 51f1 would show +"No config found for PCI dev 51f1/1672" in dmesg and refuse to work. +Add the new device id 51F1 for 1675i/s to fix the issue. + +Signed-off-by: Yi Kuo +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230621130444.ee224675380b.I921c905e21e8d041ad808def8f454f27b5ebcd8b@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +index d5bd869086458..4d4db5f6836be 100644 +--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +@@ -683,6 +683,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { + IWL_DEV_INFO(0x2726, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name), + IWL_DEV_INFO(0x51F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name), + IWL_DEV_INFO(0x51F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name), ++ IWL_DEV_INFO(0x51F1, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name), ++ IWL_DEV_INFO(0x51F1, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name), + IWL_DEV_INFO(0x54F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name), + IWL_DEV_INFO(0x54F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name), + IWL_DEV_INFO(0x7A70, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name), +-- +2.39.2 + diff --git a/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch b/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch new file mode 100644 index 00000000000..3a94dfeda97 --- /dev/null +++ b/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch @@ -0,0 +1,46 @@ +From a7163d690f5af8b426d97da0807e07b334cb5bdb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 4 Jun 2023 12:11:27 +0300 +Subject: wifi: mac80211_hwsim: Fix possible NULL dereference + +From: Ilan Peer + +[ Upstream commit 0cc80943ef518a1c51a1111e9346d1daf11dd545 ] + +In a call to mac80211_hwsim_select_tx_link() the sta pointer might +be NULL, thus need to check that it is not NULL before accessing it. + +Signed-off-by: Ilan Peer +Signed-off-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230604120651.f4d889fc98c4.Iae85f527ed245a37637a874bb8b8c83d79812512@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/mac80211_hwsim.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c +index 0d81098c7b45c..da5c355405f68 100644 +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -4,7 +4,7 @@ + * Copyright (c) 2008, Jouni Malinen + * Copyright (c) 2011, Javier Lopez + * Copyright (c) 2016 - 2017 Intel Deutschland GmbH +- * Copyright (C) 2018 - 2022 Intel Corporation ++ * Copyright (C) 2018 - 2023 Intel Corporation + */ + + /* +@@ -1753,7 +1753,7 @@ mac80211_hwsim_select_tx_link(struct mac80211_hwsim_data *data, + + WARN_ON(is_multicast_ether_addr(hdr->addr1)); + +- if (WARN_ON_ONCE(!sta->valid_links)) ++ if (WARN_ON_ONCE(!sta || !sta->valid_links)) + return &vif->bss_conf; + + for (i = 0; i < ARRAY_SIZE(vif->link_conf); i++) { +-- +2.39.2 + diff --git a/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch b/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch new file mode 100644 index 00000000000..2ed2e2602ab --- /dev/null +++ b/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch @@ -0,0 +1,71 @@ +From 683ebdf526ff6b7d1a58030e79ed32ee6779a0ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 12:04:07 -0600 +Subject: wifi: wext-core: Fix -Wstringop-overflow warning in + ioctl_standard_iw_point() + +From: Gustavo A. R. Silva + +[ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ] + +-Wstringop-overflow is legitimately warning us about extra_size +pontentially being zero at some point, hence potenially ending +up _allocating_ zero bytes of memory for extra pointer and then +trying to access such object in a call to copy_from_user(). + +Fix this by adding a sanity check to ensure we never end up +trying to allocate zero bytes of data for extra pointer, before +continue executing the rest of the code in the function. + +Address the following -Wstringop-overflow warning seen when built +m68k architecture with allyesconfig configuration: + from net/wireless/wext-core.c:11: +In function '_copy_from_user', + inlined from 'copy_from_user' at include/linux/uaccess.h:183:7, + inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7: +arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] + 48 | #define memset(d, c, n) __builtin_memset(d, c, n) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +include/linux/uaccess.h:153:17: note: in expansion of macro 'memset' + 153 | memset(to + (n - res), 0, res); + | ^~~~~~ +In function 'kmalloc', + inlined from 'kzalloc' at include/linux/slab.h:694:9, + inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10: +include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc' + 577 | return __kmalloc(size, flags); + | ^~~~~~~~~~~~~~~~~~~~~~ + +This help with the ongoing efforts to globally enable +-Wstringop-overflow. + +Link: https://github.com/KSPP/linux/issues/315 +Signed-off-by: Gustavo A. R. Silva +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/wext-core.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c +index fe8765c4075d3..8a4b85f96a13a 100644 +--- a/net/wireless/wext-core.c ++++ b/net/wireless/wext-core.c +@@ -799,6 +799,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, + } + } + ++ /* Sanity-check to ensure we never end up _allocating_ zero ++ * bytes of data for extra. ++ */ ++ if (extra_size <= 0) ++ return -EFAULT; ++ + /* kzalloc() ensures NULL-termination for essid_compat. */ + extra = kzalloc(extra_size, GFP_KERNEL); + if (!extra) +-- +2.39.2 + -- 2.47.3