From b84c4843d0afc89b7fc121f4462855d7c8af6481 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 19 Feb 2024 20:05:22 +0100 Subject: [PATCH] 6.7-stable patches added patches: arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch asoc-tas2781-add-module-parameter-to-tascodec_init.patch blk-wbt-fix-detection-of-dirty-throttled-tasks.patch can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch ceph-prevent-use-after-free-in-encode_cap_msg.patch crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch docs-kernel_feat.py-fix-build-error-for-missing-files.patch exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch fs-relax-mount_setattr-permission-checks.patch hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch kvm-arm64-fix-circular-locking-dependency.patch kvm-s390-vsie-fix-race-during-shadow-creation.patch loongarch-fix-earlycon-parameter-if-kasan-enabled.patch mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch net-stmmac-protect-updates-of-64-bit-statistics-counters.patch nfp-enable-netdev_xdp_act_redirect-feature-flag.patch nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch nfp-use-correct-macro-for-lengthselect-in-bar-config.patch nouveau-gsp-use-correct-size-for-registry-rpc.patch of-property-fix-typo-in-io-channels.patch pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch tools-rtla-exit-with-exit_success-when-help-is-invoked.patch tools-rtla-fix-clang-warning-about-mount_point-var-size.patch tools-rtla-fix-makefile-compiler-options-for-clang.patch tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch tools-rtla-remove-unused-sched_getattr-function.patch tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch tools-rv-fix-curr_reactor-uninitialized-variable.patch tools-rv-fix-makefile-compiler-options-for-clang.patch wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch wifi-iwlwifi-fix-double-free-bug.patch wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch xen-events-close-evtchn-after-mapping-cleanup.patch zonefs-improve-error-handling.patch --- ...hat-tif_sve-means-we-saved-sve-state.patch | 58 ++ ...cobalt-100-to-arm-neoverse-n2-errata.patch | 94 +++ ...uirk-for-lenovo-ideapad-pro-5-16arp8.patch | 39 + ...f-ipc3-fix-message-bounds-on-ipc-ops.patch | 43 + ...dd-module-parameter-to-tascodec_init.patch | 84 ++ ...x-detection-of-dirty-throttled-tasks.patch | 104 +++ ...er-during-setsockopt-so_j1939_filter.patch | 194 +++++ ...-changing-j1939_socks_lock-to-rwlock.patch | 153 ++++ ...ulation-using-the-old-data-bittiming.patch | 35 + ...ent-use-after-free-in-encode_cap_msg.patch | 53 ++ ...s-sgl-free-on-zero-length-error-path.patch | 52 ++ ...ce-in-__sev_platform_shutdown_locked.patch | 118 +++ ...py-fix-build-error-for-missing-files.patch | 50 ++ ...nger-necessary-spin_lock_irq-siglock.patch | 59 ++ ...r-dereference-in-hugetlbs_fill_super.patch | 156 ++++ ...djusted-outside-of-lock_task_sighand.patch | 76 ++ ...to-gather-the-threads-children-stats.patch | 124 +++ ...elax-mount_setattr-permission-checks.patch | 59 ++ ..._probe-if-net_device_register-missed.patch | 179 +++++ ...-its-fix-gicv4.1-vpe-affinity-update.patch | 80 ++ ...quirk-probing-for-acpi-based-systems.patch | 48 ++ ...add-write-memory-barrier-before-exit.patch | 63 ++ ...rm64-fix-circular-locking-dependency.patch | 84 ++ ...vsie-fix-race-during-shadow-creation.patch | 59 ++ ...-earlycon-parameter-if-kasan-enabled.patch | 34 + ...s-tried-regions-update-timeout-setup.patch | 37 + ...e-reserved-by-shmat-if-shm_noreserve.patch | 103 +++ ...-that-disk-can-t-be-detected-by-bios.patch | 69 ++ ...sw-enable-mac_managed_pm-to-fix-mdio.patch | 62 ++ ...ew-enable-mac_managed_pm-to-fix-mdio.patch | 63 ++ ...pdates-of-64-bit-statistics-counters.patch | 740 ++++++++++++++++++ ...netdev_xdp_act_redirect-feature-flag.patch | 33 + ...-re-adding-mac-index-for-bonded-port.patch | 50 ++ ...macro-for-lengthselect-in-bar-config.patch | 46 ++ ...sp-use-correct-size-for-registry-rpc.patch | 41 + .../of-property-fix-typo-in-io-channels.patch | 35 + ...bit-bug-in-dw_pcie_ep_raise_msix_irq.patch | 52 ++ ...e-unused-cleanup-to-a-_sync-initcall.patch | 34 + ...-r8a77980-sysc-cr7-must-be-always-on.patch | 42 + ...e-gp-relative-addressing-is-not-used.patch | 34 + ...s-of-l3-ip-in-case-of-network-issues.patch | 70 ++ queue-6.7/series | 58 ++ ...d-and-cruid-for-multiuser-automounts.patch | 56 ++ ...andard-maximum-write-size-negotiated.patch | 85 ++ ...x-setting-the-cns-bit-in-router_cs_5.patch | 46 ++ ...th-exit_success-when-help-is-invoked.patch | 90 +++ ...g-warning-about-mount_point-var-size.patch | 52 ++ ...-makefile-compiler-options-for-clang.patch | 72 ++ ...ized-bucket-data-bucket_size-warning.patch | 107 +++ ...remove-unused-sched_getattr-function.patch | 52 ++ ...tting-prio-with-nice-for-sched_other.patch | 55 ++ ...-curr_reactor-uninitialized-variable.patch | 80 ++ ...-makefile-compiler-options-for-clang.patch | 67 ++ ...0211-fix-wiphy-delayed-work-queueing.patch | 46 ++ .../wifi-iwlwifi-fix-double-free-bug.patch | 41 + ...-a-crash-when-we-run-out-of-stations.patch | 59 ++ ...info-pointer-in-ieee80211_tx_dequeue.patch | 51 ++ ...s-close-evtchn-after-mapping-cleanup.patch | 166 ++++ queue-6.7/zonefs-improve-error-handling.patch | 252 ++++++ 59 files changed, 5044 insertions(+) create mode 100644 queue-6.7/arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch create mode 100644 queue-6.7/arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch create mode 100644 queue-6.7/asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch create mode 100644 queue-6.7/asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch create mode 100644 queue-6.7/asoc-tas2781-add-module-parameter-to-tascodec_init.patch create mode 100644 queue-6.7/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch create mode 100644 queue-6.7/can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch create mode 100644 queue-6.7/can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch create mode 100644 queue-6.7/can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch create mode 100644 queue-6.7/ceph-prevent-use-after-free-in-encode_cap_msg.patch create mode 100644 queue-6.7/crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch create mode 100644 queue-6.7/crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch create mode 100644 queue-6.7/docs-kernel_feat.py-fix-build-error-for-missing-files.patch create mode 100644 queue-6.7/exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch create mode 100644 queue-6.7/fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch create mode 100644 queue-6.7/fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch create mode 100644 queue-6.7/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch create mode 100644 queue-6.7/fs-relax-mount_setattr-permission-checks.patch create mode 100644 queue-6.7/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch create mode 100644 queue-6.7/irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch create mode 100644 queue-6.7/irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch create mode 100644 queue-6.7/irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch create mode 100644 queue-6.7/kvm-arm64-fix-circular-locking-dependency.patch create mode 100644 queue-6.7/kvm-s390-vsie-fix-race-during-shadow-creation.patch create mode 100644 queue-6.7/loongarch-fix-earlycon-parameter-if-kasan-enabled.patch create mode 100644 queue-6.7/mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch create mode 100644 queue-6.7/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch create mode 100644 queue-6.7/mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch create mode 100644 queue-6.7/net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch create mode 100644 queue-6.7/net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch create mode 100644 queue-6.7/net-stmmac-protect-updates-of-64-bit-statistics-counters.patch create mode 100644 queue-6.7/nfp-enable-netdev_xdp_act_redirect-feature-flag.patch create mode 100644 queue-6.7/nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch create mode 100644 queue-6.7/nfp-use-correct-macro-for-lengthselect-in-bar-config.patch create mode 100644 queue-6.7/nouveau-gsp-use-correct-size-for-registry-rpc.patch create mode 100644 queue-6.7/of-property-fix-typo-in-io-channels.patch create mode 100644 queue-6.7/pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch create mode 100644 queue-6.7/pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch create mode 100644 queue-6.7/pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch create mode 100644 queue-6.7/riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch create mode 100644 queue-6.7/s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch create mode 100644 queue-6.7/smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch create mode 100644 queue-6.7/smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch create mode 100644 queue-6.7/thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch create mode 100644 queue-6.7/tools-rtla-exit-with-exit_success-when-help-is-invoked.patch create mode 100644 queue-6.7/tools-rtla-fix-clang-warning-about-mount_point-var-size.patch create mode 100644 queue-6.7/tools-rtla-fix-makefile-compiler-options-for-clang.patch create mode 100644 queue-6.7/tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch create mode 100644 queue-6.7/tools-rtla-remove-unused-sched_getattr-function.patch create mode 100644 queue-6.7/tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch create mode 100644 queue-6.7/tools-rv-fix-curr_reactor-uninitialized-variable.patch create mode 100644 queue-6.7/tools-rv-fix-makefile-compiler-options-for-clang.patch create mode 100644 queue-6.7/wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch create mode 100644 queue-6.7/wifi-iwlwifi-fix-double-free-bug.patch create mode 100644 queue-6.7/wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch create mode 100644 queue-6.7/wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch create mode 100644 queue-6.7/xen-events-close-evtchn-after-mapping-cleanup.patch create mode 100644 queue-6.7/zonefs-improve-error-handling.patch diff --git a/queue-6.7/arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch b/queue-6.7/arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch new file mode 100644 index 00000000000..09f9394ad11 --- /dev/null +++ b/queue-6.7/arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch @@ -0,0 +1,58 @@ +From 61da7c8e2a602f66be578cbbcebe8638c10e0f48 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Tue, 30 Jan 2024 15:43:53 +0000 +Subject: arm64/signal: Don't assume that TIF_SVE means we saved SVE state + +From: Mark Brown + +commit 61da7c8e2a602f66be578cbbcebe8638c10e0f48 upstream. + +When we are in a syscall we will only save the FPSIMD subset even though +the task still has access to the full register set, and on context switch +we will only remove TIF_SVE when loading the register state. This means +that the signal handling code should not assume that TIF_SVE means that +the register state is stored in SVE format, it should instead check the +format that was recorded during save. + +Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") +Signed-off-by: Mark Brown +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240130-arm64-sve-signal-regs-v2-1-9fc6f9502782@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/fpsimd.c | 2 +- + arch/arm64/kernel/signal.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -1628,7 +1628,7 @@ void fpsimd_preserve_current_state(void) + void fpsimd_signal_preserve_current_state(void) + { + fpsimd_preserve_current_state(); +- if (test_thread_flag(TIF_SVE)) ++ if (current->thread.fp_type == FP_STATE_SVE) + sve_to_fpsimd(current); + } + +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -242,7 +242,7 @@ static int preserve_sve_context(struct s + vl = task_get_sme_vl(current); + vq = sve_vq_from_vl(vl); + flags |= SVE_SIG_FLAG_SM; +- } else if (test_thread_flag(TIF_SVE)) { ++ } else if (current->thread.fp_type == FP_STATE_SVE) { + vq = sve_vq_from_vl(vl); + } + +@@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct + if (system_supports_sve() || system_supports_sme()) { + unsigned int vq = 0; + +- if (add_all || test_thread_flag(TIF_SVE) || ++ if (add_all || current->thread.fp_type == FP_STATE_SVE || + thread_sm_enabled(¤t->thread)) { + int vl = max(sve_max_vl(), sme_max_vl()); + diff --git a/queue-6.7/arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch b/queue-6.7/arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch new file mode 100644 index 00000000000..5d103d966bd --- /dev/null +++ b/queue-6.7/arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch @@ -0,0 +1,94 @@ +From fb091ff394792c018527b3211bbdfae93ea4ac02 Mon Sep 17 00:00:00 2001 +From: Easwar Hariharan +Date: Wed, 14 Feb 2024 17:55:18 +0000 +Subject: arm64: Subscribe Microsoft Azure Cobalt 100 to ARM Neoverse N2 errata + +From: Easwar Hariharan + +commit fb091ff394792c018527b3211bbdfae93ea4ac02 upstream. + +Add the MIDR value of Microsoft Azure Cobalt 100, which is a Microsoft +implemented CPU based on r0p0 of the ARM Neoverse N2 CPU, and therefore +suffers from all the same errata. + +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Easwar Hariharan +Reviewed-by: Anshuman Khandual +Acked-by: Mark Rutland +Acked-by: Marc Zyngier +Reviewed-by: Oliver Upton +Link: https://lore.kernel.org/r/20240214175522.2457857-1-eahariha@linux.microsoft.com +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arch/arm64/silicon-errata.rst | 7 +++++++ + arch/arm64/include/asm/cputype.h | 4 ++++ + arch/arm64/kernel/cpu_errata.c | 3 +++ + 3 files changed, 14 insertions(+) + +--- a/Documentation/arch/arm64/silicon-errata.rst ++++ b/Documentation/arch/arm64/silicon-errata.rst +@@ -235,3 +235,10 @@ stable kernels. + +----------------+-----------------+-----------------+-----------------------------+ + | ASR | ASR8601 | #8601001 | N/A | + +----------------+-----------------+-----------------+-----------------------------+ +++----------------+-----------------+-----------------+-----------------------------+ ++| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | +++----------------+-----------------+-----------------+-----------------------------+ ++| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | +++----------------+-----------------+-----------------+-----------------------------+ ++| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | +++----------------+-----------------+-----------------+-----------------------------+ +--- a/arch/arm64/include/asm/cputype.h ++++ b/arch/arm64/include/asm/cputype.h +@@ -61,6 +61,7 @@ + #define ARM_CPU_IMP_HISI 0x48 + #define ARM_CPU_IMP_APPLE 0x61 + #define ARM_CPU_IMP_AMPERE 0xC0 ++#define ARM_CPU_IMP_MICROSOFT 0x6D + + #define ARM_CPU_PART_AEM_V8 0xD0F + #define ARM_CPU_PART_FOUNDATION 0xD00 +@@ -135,6 +136,8 @@ + + #define AMPERE_CPU_PART_AMPERE1 0xAC3 + ++#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ ++ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) + #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) +@@ -193,6 +196,7 @@ + #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) + #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) + #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) ++#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) + + /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ + #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -374,6 +374,7 @@ static const struct midr_range erratum_1 + static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { + #ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), ++ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + #endif + #ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +@@ -387,6 +388,7 @@ static const struct midr_range trbe_over + static const struct midr_range tsb_flush_fail_cpus[] = { + #ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), ++ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + #endif + #ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +@@ -399,6 +401,7 @@ static const struct midr_range tsb_flush + static struct midr_range trbe_write_out_of_range_cpus[] = { + #ifdef CONFIG_ARM64_ERRATUM_2253138 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), ++ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + #endif + #ifdef CONFIG_ARM64_ERRATUM_2224489 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), diff --git a/queue-6.7/asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch b/queue-6.7/asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch new file mode 100644 index 00000000000..033e655037e --- /dev/null +++ b/queue-6.7/asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch @@ -0,0 +1,39 @@ +From 610010737f74482a61896596a0116876ecf9e65c Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Mon, 5 Feb 2024 15:48:53 -0600 +Subject: ASoC: amd: yc: Add DMI quirk for Lenovo Ideapad Pro 5 16ARP8 + +From: Mario Limonciello + +commit 610010737f74482a61896596a0116876ecf9e65c upstream. + +The laptop requires a quirk ID to enable its internal microphone. Add +it to the DMI quirk table. + +Reported-by: Stanislav Petrov +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216925 +Cc: stable@vger.kernel.org +Signed-off-by: Mario Limonciello +Link: https://lore.kernel.org/r/20240205214853.2689-1-mario.limonciello@amd.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/sound/soc/amd/yc/acp6x-mach.c ++++ b/sound/soc/amd/yc/acp6x-mach.c +@@ -251,6 +251,13 @@ static const struct dmi_system_id yc_acp + { + .driver_data = &acp6x_card, + .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), ++ } ++ }, ++ { ++ .driver_data = &acp6x_card, ++ .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), + } diff --git a/queue-6.7/asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch b/queue-6.7/asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch new file mode 100644 index 00000000000..4a1da4b4a1a --- /dev/null +++ b/queue-6.7/asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch @@ -0,0 +1,43 @@ +From fcbe4873089c84da641df75cda9cac2e9addbb4b Mon Sep 17 00:00:00 2001 +From: Curtis Malainey +Date: Tue, 13 Feb 2024 14:38:34 +0200 +Subject: ASoC: SOF: IPC3: fix message bounds on ipc ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Curtis Malainey + +commit fcbe4873089c84da641df75cda9cac2e9addbb4b upstream. + +commit 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") +introduced a new allocation before the upper bounds check in +do_rx_work. As a result A DSP can cause bad allocations if spewing +garbage. + +Fixes: 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") +Reported-by: Tim Van Patten +Cc: stable@vger.kernel.org +Signed-off-by: Curtis Malainey +Reviewed-by: Péter Ujfalusi +Reviewed-by: Daniel Baluta +Reviewed-by: Pierre-Louis Bossart +Signed-off-by: Peter Ujfalusi +Link: https://msgid.link/r/20240213123834.4827-1-peter.ujfalusi@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/sof/ipc3.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/soc/sof/ipc3.c ++++ b/sound/soc/sof/ipc3.c +@@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_s + return; + } + +- if (hdr.size < sizeof(hdr)) { ++ if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) { + dev_err(sdev->dev, "The received message size is invalid\n"); + return; + } diff --git a/queue-6.7/asoc-tas2781-add-module-parameter-to-tascodec_init.patch b/queue-6.7/asoc-tas2781-add-module-parameter-to-tascodec_init.patch new file mode 100644 index 00000000000..e9d7c8c2b4e --- /dev/null +++ b/queue-6.7/asoc-tas2781-add-module-parameter-to-tascodec_init.patch @@ -0,0 +1,84 @@ +From 34a1066981a967eab619938e7b35a9be6b4c34e1 Mon Sep 17 00:00:00 2001 +From: Gergo Koteles +Date: Sun, 4 Feb 2024 21:01:17 +0100 +Subject: ASoC: tas2781: add module parameter to tascodec_init() + +From: Gergo Koteles + +commit 34a1066981a967eab619938e7b35a9be6b4c34e1 upstream. + +The tascodec_init() of the snd-soc-tas2781-comlib module is called from +snd-soc-tas2781-i2c and snd-hda-scodec-tas2781-i2c modules. It calls +request_firmware_nowait() with parameter THIS_MODULE and a cont/callback +from the latter modules. + +The latter modules can be removed while their callbacks are running, +resulting in a general protection failure. + +Add module parameter to tascodec_init() so request_firmware_nowait() can +be called with the module of the callback. + +Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") +CC: stable@vger.kernel.org +Signed-off-by: Gergo Koteles +Link: https://lore.kernel.org/r/118dad922cef50525e5aab09badef2fa0eb796e5.1707076603.git.soyer@irl.hu +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + include/sound/tas2781.h | 1 + + sound/pci/hda/tas2781_hda_i2c.c | 2 +- + sound/soc/codecs/tas2781-comlib.c | 3 ++- + sound/soc/codecs/tas2781-i2c.c | 2 +- + 4 files changed, 5 insertions(+), 3 deletions(-) + +--- a/include/sound/tas2781.h ++++ b/include/sound/tas2781.h +@@ -135,6 +135,7 @@ struct tasdevice_priv { + + void tas2781_reset(struct tasdevice_priv *tas_dev); + int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, ++ struct module *module, + void (*cont)(const struct firmware *fw, void *context)); + struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); + int tasdevice_init(struct tasdevice_priv *tas_priv); +--- a/sound/pci/hda/tas2781_hda_i2c.c ++++ b/sound/pci/hda/tas2781_hda_i2c.c +@@ -627,7 +627,7 @@ static int tas2781_hda_bind(struct devic + + strscpy(comps->name, dev_name(dev), sizeof(comps->name)); + +- ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready); ++ ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready); + if (!ret) + comps->playback_hook = tas2781_hda_playback_hook; + +--- a/sound/soc/codecs/tas2781-comlib.c ++++ b/sound/soc/codecs/tas2781-comlib.c +@@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv + EXPORT_SYMBOL_GPL(tas2781_reset); + + int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, ++ struct module *module, + void (*cont)(const struct firmware *fw, void *context)) + { + int ret = 0; +@@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv + tas_priv->dev_name, tas_priv->ndev); + crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL); + tas_priv->codec = codec; +- ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT, ++ ret = request_firmware_nowait(module, FW_ACTION_UEVENT, + tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv, + cont); + if (ret) +--- a/sound/soc/codecs/tas2781-i2c.c ++++ b/sound/soc/codecs/tas2781-i2c.c +@@ -564,7 +564,7 @@ static int tasdevice_codec_probe(struct + { + struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec); + +- return tascodec_init(tas_priv, codec, tasdevice_fw_ready); ++ return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready); + } + + static void tasdevice_deinit(void *context) diff --git a/queue-6.7/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch b/queue-6.7/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch new file mode 100644 index 00000000000..96bcdfd10fb --- /dev/null +++ b/queue-6.7/blk-wbt-fix-detection-of-dirty-throttled-tasks.patch @@ -0,0 +1,104 @@ +From f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 23 Jan 2024 18:58:26 +0100 +Subject: blk-wbt: Fix detection of dirty-throttled tasks + +From: Jan Kara + +commit f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 upstream. + +The detection of dirty-throttled tasks in blk-wbt has been subtly broken +since its beginning in 2016. Namely if we are doing cgroup writeback and +the throttled task is not in the root cgroup, balance_dirty_pages() will +set dirty_sleep for the non-root bdi_writeback structure. However +blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback +structure. Thus detection of recently throttled tasks is not working in +this case (we noticed this when we switched to cgroup v2 and suddently +writeback was slow). + +Since blk-wbt has no easy way to get to proper bdi_writeback and +furthermore its intention has always been to work on the whole device +rather than on individual cgroups, just move the dirty_sleep timestamp +from bdi_writeback to backing_dev_info. That fixes the checking for +recently throttled task and saves memory for everybody as a bonus. + +CC: stable@vger.kernel.org +Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz +[axboe: fixup indentation errors] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-wbt.c | 4 ++-- + include/linux/backing-dev-defs.h | 7 +++++-- + mm/backing-dev.c | 2 +- + mm/page-writeback.c | 2 +- + 4 files changed, 9 insertions(+), 6 deletions(-) + +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -165,9 +165,9 @@ static void wb_timestamp(struct rq_wb *r + */ + static bool wb_recent_wait(struct rq_wb *rwb) + { +- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; ++ struct backing_dev_info *bdi = rwb->rqos.disk->bdi; + +- return time_before(jiffies, wb->dirty_sleep + HZ); ++ return time_before(jiffies, bdi->last_bdp_sleep + HZ); + } + + static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, +--- a/include/linux/backing-dev-defs.h ++++ b/include/linux/backing-dev-defs.h +@@ -141,8 +141,6 @@ struct bdi_writeback { + struct delayed_work dwork; /* work item used for writeback */ + struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ + +- unsigned long dirty_sleep; /* last wait */ +- + struct list_head bdi_node; /* anchored at bdi->wb_list */ + + #ifdef CONFIG_CGROUP_WRITEBACK +@@ -179,6 +177,11 @@ struct backing_dev_info { + * any dirty wbs, which is depended upon by bdi_has_dirty(). + */ + atomic_long_t tot_write_bandwidth; ++ /* ++ * Jiffies when last process was dirty throttled on this bdi. Used by ++ * blk-wbt. ++ */ ++ unsigned long last_bdp_sleep; + + struct bdi_writeback wb; /* the root writeback info for this bdi */ + struct list_head wb_list; /* list of all wbs */ +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback + INIT_LIST_HEAD(&wb->work_list); + INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); +- wb->dirty_sleep = jiffies; + + err = fprop_local_init_percpu(&wb->completions, gfp); + if (err) +@@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bd + INIT_LIST_HEAD(&bdi->bdi_list); + INIT_LIST_HEAD(&bdi->wb_list); + init_waitqueue_head(&bdi->wb_waitq); ++ bdi->last_bdp_sleep = jiffies; + + return cgwb_bdi_init(bdi); + } +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -1921,7 +1921,7 @@ pause: + break; + } + __set_current_state(TASK_KILLABLE); +- wb->dirty_sleep = now; ++ bdi->last_bdp_sleep = jiffies; + io_schedule_timeout(pause); + + current->dirty_paused_when = now + pause; diff --git a/queue-6.7/can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch b/queue-6.7/can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch new file mode 100644 index 00000000000..f740f983827 --- /dev/null +++ b/queue-6.7/can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch @@ -0,0 +1,194 @@ +From efe7cf828039aedb297c1f9920b638fffee6aabc Mon Sep 17 00:00:00 2001 +From: Oleksij Rempel +Date: Fri, 20 Oct 2023 15:38:14 +0200 +Subject: can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER) + +From: Oleksij Rempel + +commit efe7cf828039aedb297c1f9920b638fffee6aabc upstream. + +Lock jsk->sk to prevent UAF when setsockopt(..., SO_J1939_FILTER, ...) +modifies jsk->filters while receiving packets. + +Following trace was seen on affected system: + ================================================================== + BUG: KASAN: slab-use-after-free in j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] + Read of size 4 at addr ffff888012144014 by task j1939/350 + + CPU: 0 PID: 350 Comm: j1939 Tainted: G W OE 6.5.0-rc5 #1 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 + Call Trace: + print_report+0xd3/0x620 + ? kasan_complete_mode_report_info+0x7d/0x200 + ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] + kasan_report+0xc2/0x100 + ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] + __asan_load4+0x84/0xb0 + j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] + j1939_sk_recv+0x20b/0x320 [can_j1939] + ? __kasan_check_write+0x18/0x20 + ? __pfx_j1939_sk_recv+0x10/0x10 [can_j1939] + ? j1939_simple_recv+0x69/0x280 [can_j1939] + ? j1939_ac_recv+0x5e/0x310 [can_j1939] + j1939_can_recv+0x43f/0x580 [can_j1939] + ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] + ? raw_rcv+0x42/0x3c0 [can_raw] + ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] + can_rcv_filter+0x11f/0x350 [can] + can_receive+0x12f/0x190 [can] + ? __pfx_can_rcv+0x10/0x10 [can] + can_rcv+0xdd/0x130 [can] + ? __pfx_can_rcv+0x10/0x10 [can] + __netif_receive_skb_one_core+0x13d/0x150 + ? __pfx___netif_receive_skb_one_core+0x10/0x10 + ? __kasan_check_write+0x18/0x20 + ? _raw_spin_lock_irq+0x8c/0xe0 + __netif_receive_skb+0x23/0xb0 + process_backlog+0x107/0x260 + __napi_poll+0x69/0x310 + net_rx_action+0x2a1/0x580 + ? __pfx_net_rx_action+0x10/0x10 + ? __pfx__raw_spin_lock+0x10/0x10 + ? handle_irq_event+0x7d/0xa0 + __do_softirq+0xf3/0x3f8 + do_softirq+0x53/0x80 + + + __local_bh_enable_ip+0x6e/0x70 + netif_rx+0x16b/0x180 + can_send+0x32b/0x520 [can] + ? __pfx_can_send+0x10/0x10 [can] + ? __check_object_size+0x299/0x410 + raw_sendmsg+0x572/0x6d0 [can_raw] + ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] + ? apparmor_socket_sendmsg+0x2f/0x40 + ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] + sock_sendmsg+0xef/0x100 + sock_write_iter+0x162/0x220 + ? __pfx_sock_write_iter+0x10/0x10 + ? __rtnl_unlock+0x47/0x80 + ? security_file_permission+0x54/0x320 + vfs_write+0x6ba/0x750 + ? __pfx_vfs_write+0x10/0x10 + ? __fget_light+0x1ca/0x1f0 + ? __rcu_read_unlock+0x5b/0x280 + ksys_write+0x143/0x170 + ? __pfx_ksys_write+0x10/0x10 + ? __kasan_check_read+0x15/0x20 + ? fpregs_assert_state_consistent+0x62/0x70 + __x64_sys_write+0x47/0x60 + do_syscall_64+0x60/0x90 + ? do_syscall_64+0x6d/0x90 + ? irqentry_exit+0x3f/0x50 + ? exc_page_fault+0x79/0xf0 + entry_SYSCALL_64_after_hwframe+0x6e/0xd8 + + Allocated by task 348: + kasan_save_stack+0x2a/0x50 + kasan_set_track+0x29/0x40 + kasan_save_alloc_info+0x1f/0x30 + __kasan_kmalloc+0xb5/0xc0 + __kmalloc_node_track_caller+0x67/0x160 + j1939_sk_setsockopt+0x284/0x450 [can_j1939] + __sys_setsockopt+0x15c/0x2f0 + __x64_sys_setsockopt+0x6b/0x80 + do_syscall_64+0x60/0x90 + entry_SYSCALL_64_after_hwframe+0x6e/0xd8 + + Freed by task 349: + kasan_save_stack+0x2a/0x50 + kasan_set_track+0x29/0x40 + kasan_save_free_info+0x2f/0x50 + __kasan_slab_free+0x12e/0x1c0 + __kmem_cache_free+0x1b9/0x380 + kfree+0x7a/0x120 + j1939_sk_setsockopt+0x3b2/0x450 [can_j1939] + __sys_setsockopt+0x15c/0x2f0 + __x64_sys_setsockopt+0x6b/0x80 + do_syscall_64+0x60/0x90 + entry_SYSCALL_64_after_hwframe+0x6e/0xd8 + +Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") +Reported-by: Sili Luo +Suggested-by: Sili Luo +Acked-by: Oleksij Rempel +Cc: stable@vger.kernel.org +Signed-off-by: Oleksij Rempel +Link: https://lore.kernel.org/all/20231020133814.383996-1-o.rempel@pengutronix.de +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/j1939-priv.h | 1 + + net/can/j1939/socket.c | 22 ++++++++++++++++++---- + 2 files changed, 19 insertions(+), 4 deletions(-) + +--- a/net/can/j1939/j1939-priv.h ++++ b/net/can/j1939/j1939-priv.h +@@ -301,6 +301,7 @@ struct j1939_sock { + + int ifindex; + struct j1939_addr addr; ++ spinlock_t filters_lock; + struct j1939_filter *filters; + int nfilters; + pgn_t pgn_rx_filter; +--- a/net/can/j1939/socket.c ++++ b/net/can/j1939/socket.c +@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1 + static bool j1939_sk_match_filter(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) + { +- const struct j1939_filter *f = jsk->filters; +- int nfilter = jsk->nfilters; ++ const struct j1939_filter *f; ++ int nfilter; ++ ++ spin_lock_bh(&jsk->filters_lock); ++ ++ f = jsk->filters; ++ nfilter = jsk->nfilters; + + if (!nfilter) + /* receive all when no filters are assigned */ +- return true; ++ goto filter_match_found; + + for (; nfilter; ++f, --nfilter) { + if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) +@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct + continue; + if ((skcb->addr.src_name & f->name_mask) != f->name) + continue; +- return true; ++ goto filter_match_found; + } ++ ++ spin_unlock_bh(&jsk->filters_lock); + return false; ++ ++filter_match_found: ++ spin_unlock_bh(&jsk->filters_lock); ++ return true; + } + + static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, +@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk + atomic_set(&jsk->skb_pending, 0); + spin_lock_init(&jsk->sk_session_queue_lock); + INIT_LIST_HEAD(&jsk->sk_session_queue); ++ spin_lock_init(&jsk->filters_lock); + + /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ + sock_set_flag(sk, SOCK_RCU_FREE); +@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct so + } + + lock_sock(&jsk->sk); ++ spin_lock_bh(&jsk->filters_lock); + ofilters = jsk->filters; + jsk->filters = filters; + jsk->nfilters = count; ++ spin_unlock_bh(&jsk->filters_lock); + release_sock(&jsk->sk); + kfree(ofilters); + return 0; diff --git a/queue-6.7/can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch b/queue-6.7/can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch new file mode 100644 index 00000000000..d3cab9af053 --- /dev/null +++ b/queue-6.7/can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch @@ -0,0 +1,153 @@ +From 6cdedc18ba7b9dacc36466e27e3267d201948c8d Mon Sep 17 00:00:00 2001 +From: Ziqi Zhao +Date: Fri, 21 Jul 2023 09:22:26 -0700 +Subject: can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock + +From: Ziqi Zhao + +commit 6cdedc18ba7b9dacc36466e27e3267d201948c8d upstream. + +The following 3 locks would race against each other, causing the +deadlock situation in the Syzbot bug report: + +- j1939_socks_lock +- active_session_list_lock +- sk_session_queue_lock + +A reasonable fix is to change j1939_socks_lock to an rwlock, since in +the rare situations where a write lock is required for the linked list +that j1939_socks_lock is protecting, the code does not attempt to +acquire any more locks. This would break the circular lock dependency, +where, for example, the current thread already locks j1939_socks_lock +and attempts to acquire sk_session_queue_lock, and at the same time, +another thread attempts to acquire j1939_socks_lock while holding +sk_session_queue_lock. + +NOTE: This patch along does not fix the unregister_netdevice bug +reported by Syzbot; instead, it solves a deadlock situation to prepare +for one or more further patches to actually fix the Syzbot bug, which +appears to be a reference counting problem within the j1939 codebase. + +Reported-by: +Signed-off-by: Ziqi Zhao +Reviewed-by: Oleksij Rempel +Acked-by: Oleksij Rempel +Link: https://lore.kernel.org/all/20230721162226.8639-1-astrajoan@yahoo.com +[mkl: remove unrelated newline change] +Cc: stable@vger.kernel.org +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/j1939-priv.h | 2 +- + net/can/j1939/main.c | 2 +- + net/can/j1939/socket.c | 24 ++++++++++++------------ + 3 files changed, 14 insertions(+), 14 deletions(-) + +--- a/net/can/j1939/j1939-priv.h ++++ b/net/can/j1939/j1939-priv.h +@@ -86,7 +86,7 @@ struct j1939_priv { + unsigned int tp_max_packet_size; + + /* lock for j1939_socks list */ +- spinlock_t j1939_socks_lock; ++ rwlock_t j1939_socks_lock; + struct list_head j1939_socks; + + struct kref rx_kref; +--- a/net/can/j1939/main.c ++++ b/net/can/j1939/main.c +@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(st + return ERR_PTR(-ENOMEM); + + j1939_tp_init(priv); +- spin_lock_init(&priv->j1939_socks_lock); ++ rwlock_init(&priv->j1939_socks_lock); + INIT_LIST_HEAD(&priv->j1939_socks); + + mutex_lock(&j1939_netdev_lock); +--- a/net/can/j1939/socket.c ++++ b/net/can/j1939/socket.c +@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_p + jsk->state |= J1939_SOCK_BOUND; + j1939_priv_get(priv); + +- spin_lock_bh(&priv->j1939_socks_lock); ++ write_lock_bh(&priv->j1939_socks_lock); + list_add_tail(&jsk->list, &priv->j1939_socks); +- spin_unlock_bh(&priv->j1939_socks_lock); ++ write_unlock_bh(&priv->j1939_socks_lock); + } + + static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) + { +- spin_lock_bh(&priv->j1939_socks_lock); ++ write_lock_bh(&priv->j1939_socks_lock); + list_del_init(&jsk->list); +- spin_unlock_bh(&priv->j1939_socks_lock); ++ write_unlock_bh(&priv->j1939_socks_lock); + + j1939_priv_put(priv); + jsk->state &= ~J1939_SOCK_BOUND; +@@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_pr + struct j1939_sock *jsk; + bool match = false; + +- spin_lock_bh(&priv->j1939_socks_lock); ++ read_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + match = j1939_sk_recv_match_one(jsk, skcb); + if (match) + break; + } +- spin_unlock_bh(&priv->j1939_socks_lock); ++ read_unlock_bh(&priv->j1939_socks_lock); + + return match; + } +@@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *pr + { + struct j1939_sock *jsk; + +- spin_lock_bh(&priv->j1939_socks_lock); ++ read_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + j1939_sk_recv_one(jsk, skb); + } +- spin_unlock_bh(&priv->j1939_socks_lock); ++ read_unlock_bh(&priv->j1939_socks_lock); + } + + static void j1939_sk_sock_destruct(struct sock *sk) +@@ -1080,12 +1080,12 @@ void j1939_sk_errqueue(struct j1939_sess + } + + /* spread RX notifications to all sockets subscribed to this session */ +- spin_lock_bh(&priv->j1939_socks_lock); ++ read_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + if (j1939_sk_recv_match_one(jsk, &session->skcb)) + __j1939_sk_errqueue(session, &jsk->sk, type); + } +- spin_unlock_bh(&priv->j1939_socks_lock); ++ read_unlock_bh(&priv->j1939_socks_lock); + }; + + void j1939_sk_send_loop_abort(struct sock *sk, int err) +@@ -1273,7 +1273,7 @@ void j1939_sk_netdev_event_netdown(struc + struct j1939_sock *jsk; + int error_code = ENETDOWN; + +- spin_lock_bh(&priv->j1939_socks_lock); ++ read_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + jsk->sk.sk_err = error_code; + if (!sock_flag(&jsk->sk, SOCK_DEAD)) +@@ -1281,7 +1281,7 @@ void j1939_sk_netdev_event_netdown(struc + + j1939_sk_queue_drop_all(priv, jsk, error_code); + } +- spin_unlock_bh(&priv->j1939_socks_lock); ++ read_unlock_bh(&priv->j1939_socks_lock); + } + + static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, diff --git a/queue-6.7/can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch b/queue-6.7/can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch new file mode 100644 index 00000000000..820cee3004f --- /dev/null +++ b/queue-6.7/can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch @@ -0,0 +1,35 @@ +From 2aa0a5e65eae27dbd96faca92c84ecbf6f492d42 Mon Sep 17 00:00:00 2001 +From: Maxime Jayat +Date: Mon, 6 Nov 2023 19:01:58 +0100 +Subject: can: netlink: Fix TDCO calculation using the old data bittiming + +From: Maxime Jayat + +commit 2aa0a5e65eae27dbd96faca92c84ecbf6f492d42 upstream. + +The TDCO calculation was done using the currently applied data bittiming, +instead of the newly computed data bittiming, which means that the TDCO +had an invalid value unless setting the same data bittiming twice. + +Fixes: d99755f71a80 ("can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC)") +Signed-off-by: Maxime Jayat +Reviewed-by: Vincent Mailhol +Link: https://lore.kernel.org/all/40579c18-63c0-43a4-8d4c-f3a6c1c0b417@munic.io +Cc: stable@vger.kernel.org +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/dev/netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/can/dev/netlink.c ++++ b/drivers/net/can/dev/netlink.c +@@ -346,7 +346,7 @@ static int can_changelink(struct net_dev + /* Neither of TDC parameters nor TDC flags are + * provided: do calculation + */ +- can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, ++ can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, + &priv->ctrlmode, priv->ctrlmode_supported); + } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly + * turned off. TDC is disabled: do nothing diff --git a/queue-6.7/ceph-prevent-use-after-free-in-encode_cap_msg.patch b/queue-6.7/ceph-prevent-use-after-free-in-encode_cap_msg.patch new file mode 100644 index 00000000000..87ad4aeb7a9 --- /dev/null +++ b/queue-6.7/ceph-prevent-use-after-free-in-encode_cap_msg.patch @@ -0,0 +1,53 @@ +From cda4672da1c26835dcbd7aec2bfed954eda9b5ef Mon Sep 17 00:00:00 2001 +From: Rishabh Dave +Date: Thu, 1 Feb 2024 17:07:16 +0530 +Subject: ceph: prevent use-after-free in encode_cap_msg() + +From: Rishabh Dave + +commit cda4672da1c26835dcbd7aec2bfed954eda9b5ef upstream. + +In fs/ceph/caps.c, in encode_cap_msg(), "use after free" error was +caught by KASAN at this line - 'ceph_buffer_get(arg->xattr_buf);'. This +implies before the refcount could be increment here, it was freed. + +In same file, in "handle_cap_grant()" refcount is decremented by this +line - 'ceph_buffer_put(ci->i_xattrs.blob);'. It appears that a race +occurred and resource was freed by the latter line before the former +line could increment it. + +encode_cap_msg() is called by __send_cap() and __send_cap() is called by +ceph_check_caps() after calling __prep_cap(). __prep_cap() is where +arg->xattr_buf is assigned to ci->i_xattrs.blob. This is the spot where +the refcount must be increased to prevent "use after free" error. + +Cc: stable@vger.kernel.org +Link: https://tracker.ceph.com/issues/59259 +Signed-off-by: Rishabh Dave +Reviewed-by: Jeff Layton +Reviewed-by: Xiubo Li +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/caps.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_ar + if (flushing & CEPH_CAP_XATTR_EXCL) { + arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); + arg->xattr_version = ci->i_xattrs.version; +- arg->xattr_buf = ci->i_xattrs.blob; ++ arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); + } else { + arg->xattr_buf = NULL; + arg->old_xattr_buf = NULL; +@@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_ar + encode_cap_msg(msg, arg); + ceph_con_send(&arg->session->s_con, msg); + ceph_buffer_put(arg->old_xattr_buf); ++ ceph_buffer_put(arg->xattr_buf); + if (arg->wake) + wake_up_all(&ci->i_cap_wq); + } diff --git a/queue-6.7/crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch b/queue-6.7/crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch new file mode 100644 index 00000000000..e4cf95021f4 --- /dev/null +++ b/queue-6.7/crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch @@ -0,0 +1,52 @@ +From 24c890dd712f6345e382256cae8c97abb0406b70 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Thu, 1 Feb 2024 13:49:09 +0800 +Subject: crypto: algif_hash - Remove bogus SGL free on zero-length error path + +From: Herbert Xu + +commit 24c890dd712f6345e382256cae8c97abb0406b70 upstream. + +When a zero-length message is hashed by algif_hash, and an error +is triggered, it tries to free an SG list that was never allocated +in the first place. Fix this by not freeing the SG list on the +zero-length error path. + +Reported-by: Shigeru Yoshida +Reported-by: xingwei lee +Fixes: b6d972f68983 ("crypto: af_alg/hash: Fix recvmsg() after sendmsg(MSG_MORE)") +Cc: +Signed-off-by: Herbert Xu +Reported-by: syzbot+3266db0c26d1fbbe3abb@syzkaller.appspotmail.com +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + crypto/algif_hash.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/crypto/algif_hash.c ++++ b/crypto/algif_hash.c +@@ -91,13 +91,13 @@ static int hash_sendmsg(struct socket *s + if (!(msg->msg_flags & MSG_MORE)) { + err = hash_alloc_result(sk, ctx); + if (err) +- goto unlock_free; ++ goto unlock_free_result; + ahash_request_set_crypt(&ctx->req, NULL, + ctx->result, 0); + err = crypto_wait_req(crypto_ahash_final(&ctx->req), + &ctx->wait); + if (err) +- goto unlock_free; ++ goto unlock_free_result; + } + goto done_more; + } +@@ -170,6 +170,7 @@ unlock: + + unlock_free: + af_alg_free_sg(&ctx->sgl); ++unlock_free_result: + hash_free_result(sk, ctx); + ctx->more = false; + goto unlock; diff --git a/queue-6.7/crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch b/queue-6.7/crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch new file mode 100644 index 00000000000..6996253fff7 --- /dev/null +++ b/queue-6.7/crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch @@ -0,0 +1,118 @@ +From ccb88e9549e7cfd8bcd511c538f437e20026e983 Mon Sep 17 00:00:00 2001 +From: Kim Phillips +Date: Thu, 25 Jan 2024 17:12:53 -0600 +Subject: crypto: ccp - Fix null pointer dereference in __sev_platform_shutdown_locked + +From: Kim Phillips + +commit ccb88e9549e7cfd8bcd511c538f437e20026e983 upstream. + +The SEV platform device can be shutdown with a null psp_master, +e.g., using DEBUG_TEST_DRIVER_REMOVE. Found using KASAN: + +[ 137.148210] ccp 0000:23:00.1: enabling device (0000 -> 0002) +[ 137.162647] ccp 0000:23:00.1: no command queues available +[ 137.170598] ccp 0000:23:00.1: sev enabled +[ 137.174645] ccp 0000:23:00.1: psp enabled +[ 137.178890] general protection fault, probably for non-canonical address 0xdffffc000000001e: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 137.182693] KASAN: null-ptr-deref in range [0x00000000000000f0-0x00000000000000f7] +[ 137.182693] CPU: 93 PID: 1 Comm: swapper/0 Not tainted 6.8.0-rc1+ #311 +[ 137.182693] RIP: 0010:__sev_platform_shutdown_locked+0x51/0x180 +[ 137.182693] Code: 08 80 3c 08 00 0f 85 0e 01 00 00 48 8b 1d 67 b6 01 08 48 b8 00 00 00 00 00 fc ff df 48 8d bb f0 00 00 00 48 89 f9 48 c1 e9 03 <80> 3c 01 00 0f 85 fe 00 00 00 48 8b 9b f0 00 00 00 48 85 db 74 2c +[ 137.182693] RSP: 0018:ffffc900000cf9b0 EFLAGS: 00010216 +[ 137.182693] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 000000000000001e +[ 137.182693] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000000000f0 +[ 137.182693] RBP: ffffc900000cf9c8 R08: 0000000000000000 R09: fffffbfff58f5a66 +[ 137.182693] R10: ffffc900000cf9c8 R11: ffffffffac7ad32f R12: ffff8881e5052c28 +[ 137.182693] R13: ffff8881e5052c28 R14: ffff8881758e43e8 R15: ffffffffac64abf8 +[ 137.182693] FS: 0000000000000000(0000) GS:ffff889de7000000(0000) knlGS:0000000000000000 +[ 137.182693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 137.182693] CR2: 0000000000000000 CR3: 0000001cf7c7e000 CR4: 0000000000350ef0 +[ 137.182693] Call Trace: +[ 137.182693] +[ 137.182693] ? show_regs+0x6c/0x80 +[ 137.182693] ? __die_body+0x24/0x70 +[ 137.182693] ? die_addr+0x4b/0x80 +[ 137.182693] ? exc_general_protection+0x126/0x230 +[ 137.182693] ? asm_exc_general_protection+0x2b/0x30 +[ 137.182693] ? __sev_platform_shutdown_locked+0x51/0x180 +[ 137.182693] sev_firmware_shutdown.isra.0+0x1e/0x80 +[ 137.182693] sev_dev_destroy+0x49/0x100 +[ 137.182693] psp_dev_destroy+0x47/0xb0 +[ 137.182693] sp_destroy+0xbb/0x240 +[ 137.182693] sp_pci_remove+0x45/0x60 +[ 137.182693] pci_device_remove+0xaa/0x1d0 +[ 137.182693] device_remove+0xc7/0x170 +[ 137.182693] really_probe+0x374/0xbe0 +[ 137.182693] ? srso_return_thunk+0x5/0x5f +[ 137.182693] __driver_probe_device+0x199/0x460 +[ 137.182693] driver_probe_device+0x4e/0xd0 +[ 137.182693] __driver_attach+0x191/0x3d0 +[ 137.182693] ? __pfx___driver_attach+0x10/0x10 +[ 137.182693] bus_for_each_dev+0x100/0x190 +[ 137.182693] ? __pfx_bus_for_each_dev+0x10/0x10 +[ 137.182693] ? __kasan_check_read+0x15/0x20 +[ 137.182693] ? srso_return_thunk+0x5/0x5f +[ 137.182693] ? _raw_spin_unlock+0x27/0x50 +[ 137.182693] driver_attach+0x41/0x60 +[ 137.182693] bus_add_driver+0x2a8/0x580 +[ 137.182693] driver_register+0x141/0x480 +[ 137.182693] __pci_register_driver+0x1d6/0x2a0 +[ 137.182693] ? srso_return_thunk+0x5/0x5f +[ 137.182693] ? esrt_sysfs_init+0x1cd/0x5d0 +[ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 +[ 137.182693] sp_pci_init+0x22/0x30 +[ 137.182693] sp_mod_init+0x14/0x30 +[ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 +[ 137.182693] do_one_initcall+0xd1/0x470 +[ 137.182693] ? __pfx_do_one_initcall+0x10/0x10 +[ 137.182693] ? parameq+0x80/0xf0 +[ 137.182693] ? srso_return_thunk+0x5/0x5f +[ 137.182693] ? __kmalloc+0x3b0/0x4e0 +[ 137.182693] ? kernel_init_freeable+0x92d/0x1050 +[ 137.182693] ? kasan_populate_vmalloc_pte+0x171/0x190 +[ 137.182693] ? srso_return_thunk+0x5/0x5f +[ 137.182693] kernel_init_freeable+0xa64/0x1050 +[ 137.182693] ? __pfx_kernel_init+0x10/0x10 +[ 137.182693] kernel_init+0x24/0x160 +[ 137.182693] ? __switch_to_asm+0x3e/0x70 +[ 137.182693] ret_from_fork+0x40/0x80 +[ 137.182693] ? __pfx_kernel_init+0x10/0x10 +[ 137.182693] ret_from_fork_asm+0x1b/0x30 +[ 137.182693] +[ 137.182693] Modules linked in: +[ 137.538483] ---[ end trace 0000000000000000 ]--- + +Fixes: 1b05ece0c931 ("crypto: ccp - During shutdown, check SEV data pointer before using") +Cc: stable@vger.kernel.org +Reviewed-by: Mario Limonciello +Signed-off-by: Kim Phillips +Reviewed-by: Liam Merwick +Acked-by: John Allen +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/ccp/sev-dev.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/ccp/sev-dev.c ++++ b/drivers/crypto/ccp/sev-dev.c +@@ -534,10 +534,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); + + static int __sev_platform_shutdown_locked(int *error) + { +- struct sev_device *sev = psp_master->sev_data; ++ struct psp_device *psp = psp_master; ++ struct sev_device *sev; + int ret; + +- if (!sev || sev->state == SEV_STATE_UNINIT) ++ if (!psp || !psp->sev_data) ++ return 0; ++ ++ sev = psp->sev_data; ++ ++ if (sev->state == SEV_STATE_UNINIT) + return 0; + + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); diff --git a/queue-6.7/docs-kernel_feat.py-fix-build-error-for-missing-files.patch b/queue-6.7/docs-kernel_feat.py-fix-build-error-for-missing-files.patch new file mode 100644 index 00000000000..9db350e8665 --- /dev/null +++ b/queue-6.7/docs-kernel_feat.py-fix-build-error-for-missing-files.patch @@ -0,0 +1,50 @@ +From c23de7ceae59e4ca5894c3ecf4f785c50c0fa428 Mon Sep 17 00:00:00 2001 +From: Vegard Nossum +Date: Mon, 5 Feb 2024 18:51:26 +0100 +Subject: docs: kernel_feat.py: fix build error for missing files + +From: Vegard Nossum + +commit c23de7ceae59e4ca5894c3ecf4f785c50c0fa428 upstream. + +If the directory passed to the '.. kernel-feat::' directive does not +exist or the get_feat.pl script does not find any files to extract +features from, Sphinx will report the following error: + + Sphinx parallel build error: + UnboundLocalError: local variable 'fname' referenced before assignment + make[2]: *** [Documentation/Makefile:102: htmldocs] Error 2 + +This is due to how I changed the script in c48a7c44a1d0 ("docs: +kernel_feat.py: fix potential command injection"). Before that, the +filename passed along to self.nestedParse() in this case was weirdly +just the whole get_feat.pl invocation. + +We can fix it by doing what kernel_abi.py does -- just pass +self.arguments[0] as 'fname'. + +Fixes: c48a7c44a1d0 ("docs: kernel_feat.py: fix potential command injection") +Cc: Justin Forbes +Cc: Salvatore Bonaccorso +Cc: Jani Nikula +Cc: Mauro Carvalho Chehab +Cc: stable@vger.kernel.org +Signed-off-by: Vegard Nossum +Link: https://lore.kernel.org/r/20240205175133.774271-2-vegard.nossum@oracle.com +Signed-off-by: Jonathan Corbet +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/sphinx/kernel_feat.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/sphinx/kernel_feat.py ++++ b/Documentation/sphinx/kernel_feat.py +@@ -109,7 +109,7 @@ class KernelFeat(Directive): + else: + out_lines += line + "\n" + +- nodeList = self.nestedParse(out_lines, fname) ++ nodeList = self.nestedParse(out_lines, self.arguments[0]) + return nodeList + + def nestedParse(self, lines, fname): diff --git a/queue-6.7/exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch b/queue-6.7/exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch new file mode 100644 index 00000000000..51596fe392d --- /dev/null +++ b/queue-6.7/exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch @@ -0,0 +1,59 @@ +From c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 23 Jan 2024 16:34:00 +0100 +Subject: exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock) + +From: Oleg Nesterov + +commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 upstream. + +After the recent changes nobody use siglock to read the values protected +by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and +update the comment. + +With this patch only __exit_signal() and thread_group_start_cputime() take +stats_lock under siglock. + +Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/exit.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1125,17 +1125,14 @@ static int wait_task_zombie(struct wait_ + * and nobody can change them. + * + * psig->stats_lock also protects us from our sub-threads +- * which can reap other children at the same time. Until +- * we change k_getrusage()-like users to rely on this lock +- * we have to take ->siglock as well. ++ * which can reap other children at the same time. + * + * We use thread_group_cputime_adjusted() to get times for + * the thread group, which consolidates times for all threads + * in the group including the group leader. + */ + thread_group_cputime_adjusted(p, &tgutime, &tgstime); +- spin_lock_irq(¤t->sighand->siglock); +- write_seqlock(&psig->stats_lock); ++ write_seqlock_irq(&psig->stats_lock); + psig->cutime += tgutime + sig->cutime; + psig->cstime += tgstime + sig->cstime; + psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; +@@ -1158,8 +1155,7 @@ static int wait_task_zombie(struct wait_ + psig->cmaxrss = maxrss; + task_io_accounting_add(&psig->ioac, &p->ioac); + task_io_accounting_add(&psig->ioac, &sig->ioac); +- write_sequnlock(&psig->stats_lock); +- spin_unlock_irq(¤t->sighand->siglock); ++ write_sequnlock_irq(&psig->stats_lock); + } + + if (wo->wo_rusage) diff --git a/queue-6.7/fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch b/queue-6.7/fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch new file mode 100644 index 00000000000..44c55a58460 --- /dev/null +++ b/queue-6.7/fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch @@ -0,0 +1,156 @@ +From 79d72c68c58784a3e1cd2378669d51bfd0cb7498 Mon Sep 17 00:00:00 2001 +From: Oscar Salvador +Date: Tue, 30 Jan 2024 22:04:18 +0100 +Subject: fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super + +From: Oscar Salvador + +commit 79d72c68c58784a3e1cd2378669d51bfd0cb7498 upstream. + +When configuring a hugetlb filesystem via the fsconfig() syscall, there is +a possible NULL dereference in hugetlbfs_fill_super() caused by assigning +NULL to ctx->hstate in hugetlbfs_parse_param() when the requested pagesize +is non valid. + +E.g: Taking the following steps: + + fd = fsopen("hugetlbfs", FSOPEN_CLOEXEC); + fsconfig(fd, FSCONFIG_SET_STRING, "pagesize", "1024", 0); + fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + +Given that the requested "pagesize" is invalid, ctxt->hstate will be replaced +with NULL, losing its previous value, and we will print an error: + + ... + ... + case Opt_pagesize: + ps = memparse(param->string, &rest); + ctx->hstate = h; + if (!ctx->hstate) { + pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); + return -EINVAL; + } + return 0; + ... + ... + +This is a problem because later on, we will dereference ctxt->hstate in +hugetlbfs_fill_super() + + ... + ... + sb->s_blocksize = huge_page_size(ctx->hstate); + ... + ... + +Causing below Oops. + +Fix this by replacing cxt->hstate value only when then pagesize is known +to be valid. + + kernel: hugetlbfs: Unsupported page size 0 MB + kernel: BUG: kernel NULL pointer dereference, address: 0000000000000028 + kernel: #PF: supervisor read access in kernel mode + kernel: #PF: error_code(0x0000) - not-present page + kernel: PGD 800000010f66c067 P4D 800000010f66c067 PUD 1b22f8067 PMD 0 + kernel: Oops: 0000 [#1] PREEMPT SMP PTI + kernel: CPU: 4 PID: 5659 Comm: syscall Tainted: G E 6.8.0-rc2-default+ #22 5a47c3fef76212addcc6eb71344aabc35190ae8f + kernel: Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 + kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 + kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 + kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 + kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 + kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 + kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 + kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 + kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 + kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 + kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 + kernel: Call Trace: + kernel: + kernel: ? __die_body+0x1a/0x60 + kernel: ? page_fault_oops+0x16f/0x4a0 + kernel: ? search_bpf_extables+0x65/0x70 + kernel: ? fixup_exception+0x22/0x310 + kernel: ? exc_page_fault+0x69/0x150 + kernel: ? asm_exc_page_fault+0x22/0x30 + kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 + kernel: ? hugetlbfs_fill_super+0xb4/0x1a0 + kernel: ? hugetlbfs_fill_super+0x28/0x1a0 + kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 + kernel: vfs_get_super+0x40/0xa0 + kernel: ? __pfx_bpf_lsm_capable+0x10/0x10 + kernel: vfs_get_tree+0x25/0xd0 + kernel: vfs_cmd_create+0x64/0xe0 + kernel: __x64_sys_fsconfig+0x395/0x410 + kernel: do_syscall_64+0x80/0x160 + kernel: ? syscall_exit_to_user_mode+0x82/0x240 + kernel: ? do_syscall_64+0x8d/0x160 + kernel: ? syscall_exit_to_user_mode+0x82/0x240 + kernel: ? do_syscall_64+0x8d/0x160 + kernel: ? exc_page_fault+0x69/0x150 + kernel: entry_SYSCALL_64_after_hwframe+0x6e/0x76 + kernel: RIP: 0033:0x7ffbc0cb87c9 + kernel: Code: 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 96 0d 00 f7 d8 64 89 01 48 + kernel: RSP: 002b:00007ffc29d2f388 EFLAGS: 00000206 ORIG_RAX: 00000000000001af + kernel: RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffbc0cb87c9 + kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 + kernel: RBP: 00007ffc29d2f3b0 R08: 0000000000000000 R09: 0000000000000000 + kernel: R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 + kernel: R13: 00007ffc29d2f4c0 R14: 0000000000000000 R15: 0000000000000000 + kernel: + kernel: Modules linked in: rpcsec_gss_krb5(E) auth_rpcgss(E) nfsv4(E) dns_resolver(E) nfs(E) lockd(E) grace(E) sunrpc(E) netfs(E) af_packet(E) bridge(E) stp(E) llc(E) iscsi_ibft(E) iscsi_boot_sysfs(E) intel_rapl_msr(E) intel_rapl_common(E) iTCO_wdt(E) intel_pmc_bxt(E) sb_edac(E) iTCO_vendor_support(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) rfkill(E) ipmi_ssif(E) kvm(E) acpi_ipmi(E) irqbypass(E) pcspkr(E) igb(E) ipmi_si(E) mei_me(E) i2c_i801(E) joydev(E) intel_pch_thermal(E) i2c_smbus(E) dca(E) lpc_ich(E) mei(E) ipmi_devintf(E) ipmi_msghandler(E) acpi_pad(E) tiny_power_button(E) button(E) fuse(E) efi_pstore(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) sd_mod(E) t10_pi(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) polyval_clmulni(E) ahci(E) xhci_pci(E) polyval_generic(E) gf128mul(E) ghash_clmulni_intel(E) sha512_ssse3(E) sha256_ssse3(E) xhci_pci_renesas(E) libahci(E) ehci_pci(E) sha1_ssse3(E) xhci_hcd(E) ehci_hcd(E) libata(E) + kernel: mgag200(E) i2c_algo_bit(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) aesni_intel(E) crypto_simd(E) cryptd(E) + kernel: Unloaded tainted modules: acpi_cpufreq(E):1 fjes(E):1 + kernel: CR2: 0000000000000028 + kernel: ---[ end trace 0000000000000000 ]--- + kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 + kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 + kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 + kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 + kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 + kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 + kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 + kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 + kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 + kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 + +Link: https://lkml.kernel.org/r/20240130210418.3771-1-osalvador@suse.de +Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") +Signed-off-by: Michal Hocko +Signed-off-by: Oscar Salvador +Acked-by: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -1354,6 +1354,7 @@ static int hugetlbfs_parse_param(struct + { + struct hugetlbfs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; ++ struct hstate *h; + char *rest; + unsigned long ps; + int opt; +@@ -1398,11 +1399,12 @@ static int hugetlbfs_parse_param(struct + + case Opt_pagesize: + ps = memparse(param->string, &rest); +- ctx->hstate = size_to_hstate(ps); +- if (!ctx->hstate) { ++ h = size_to_hstate(ps); ++ if (!h) { + pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); + return -EINVAL; + } ++ ctx->hstate = h; + return 0; + + case Opt_min_size: diff --git a/queue-6.7/fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch b/queue-6.7/fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch new file mode 100644 index 00000000000..ebd189b572b --- /dev/null +++ b/queue-6.7/fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch @@ -0,0 +1,76 @@ +From 60f92acb60a989b14e4b744501a0df0f82ef30a3 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 23 Jan 2024 16:33:55 +0100 +Subject: fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand() + +From: Oleg Nesterov + +commit 60f92acb60a989b14e4b744501a0df0f82ef30a3 upstream. + +Patch series "fs/proc: do_task_stat: use sig->stats_". + +do_task_stat() has the same problem as getrusage() had before "getrusage: +use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If +NR_CPUS threads call lock_task_sighand() at the same time and the process +has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * +NR_THREADS) time. + + +This patch (of 3): + +thread_group_cputime() does its own locking, we can safely shift +thread_group_cputime_adjusted() which does another for_each_thread loop +outside of ->siglock protected section. + +Not only this removes for_each_thread() from the critical section with +irqs disabled, this removes another case when stats_lock is taken with +siglock held. We want to remove this dependency, then we can change the +users of stats_lock to not disable irqs. + +Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com +Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/array.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file + + sigemptyset(&sigign); + sigemptyset(&sigcatch); +- cutime = cstime = utime = stime = 0; ++ cutime = cstime = 0; + cgtime = gtime = 0; + + if (lock_task_sighand(task, &flags)) { +@@ -546,7 +546,6 @@ static int do_task_stat(struct seq_file + + min_flt += sig->min_flt; + maj_flt += sig->maj_flt; +- thread_group_cputime_adjusted(task, &utime, &stime); + gtime += sig->gtime; + + if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) +@@ -562,10 +561,13 @@ static int do_task_stat(struct seq_file + + if (permitted && (!whole || num_threads < 2)) + wchan = !task_is_running(task); +- if (!whole) { ++ ++ if (whole) { ++ thread_group_cputime_adjusted(task, &utime, &stime); ++ } else { ++ task_cputime_adjusted(task, &utime, &stime); + min_flt = task->min_flt; + maj_flt = task->maj_flt; +- task_cputime_adjusted(task, &utime, &stime); + gtime = task_gtime(task); + } + diff --git a/queue-6.7/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch b/queue-6.7/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch new file mode 100644 index 00000000000..9f6dd355b4a --- /dev/null +++ b/queue-6.7/fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch @@ -0,0 +1,124 @@ +From 7601df8031fd67310af891897ef6cc0df4209305 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 23 Jan 2024 16:33:57 +0100 +Subject: fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats + +From: Oleg Nesterov + +commit 7601df8031fd67310af891897ef6cc0df4209305 upstream. + +lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call +do_task_stat() at the same time and the process has NR_THREADS, it will +spin with irqs disabled O(NR_CPUS * NR_THREADS) time. + +Change do_task_stat() to use sig->stats_lock to gather the statistics +outside of ->siglock protected section, in the likely case this code will +run lockless. + +Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/array.c | 58 ++++++++++++++++++++++++++++++-------------------------- + 1 file changed, 32 insertions(+), 26 deletions(-) + +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file + int permitted; + struct mm_struct *mm; + unsigned long long start_time; +- unsigned long cmin_flt = 0, cmaj_flt = 0; +- unsigned long min_flt = 0, maj_flt = 0; +- u64 cutime, cstime, utime, stime; +- u64 cgtime, gtime; ++ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; ++ u64 cutime, cstime, cgtime, utime, stime, gtime; + unsigned long rsslim = 0; + unsigned long flags; + int exit_code = task->exit_code; ++ struct signal_struct *sig = task->signal; ++ unsigned int seq = 1; + + state = *get_task_state(task); + vsize = eip = esp = 0; +@@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file + + sigemptyset(&sigign); + sigemptyset(&sigcatch); +- cutime = cstime = 0; +- cgtime = gtime = 0; + + if (lock_task_sighand(task, &flags)) { +- struct signal_struct *sig = task->signal; +- + if (sig->tty) { + struct pid *pgrp = tty_get_pgrp(sig->tty); + tty_pgrp = pid_nr_ns(pgrp, ns); +@@ -527,27 +523,9 @@ static int do_task_stat(struct seq_file + num_threads = get_nr_threads(task); + collect_sigign_sigcatch(task, &sigign, &sigcatch); + +- cmin_flt = sig->cmin_flt; +- cmaj_flt = sig->cmaj_flt; +- cutime = sig->cutime; +- cstime = sig->cstime; +- cgtime = sig->cgtime; + rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); + +- /* add up live thread stats at the group level */ + if (whole) { +- struct task_struct *t; +- +- __for_each_thread(sig, t) { +- min_flt += t->min_flt; +- maj_flt += t->maj_flt; +- gtime += task_gtime(t); +- } +- +- min_flt += sig->min_flt; +- maj_flt += sig->maj_flt; +- gtime += sig->gtime; +- + if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) + exit_code = sig->group_exit_code; + } +@@ -562,6 +540,34 @@ static int do_task_stat(struct seq_file + if (permitted && (!whole || num_threads < 2)) + wchan = !task_is_running(task); + ++ do { ++ seq++; /* 2 on the 1st/lockless path, otherwise odd */ ++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); ++ ++ cmin_flt = sig->cmin_flt; ++ cmaj_flt = sig->cmaj_flt; ++ cutime = sig->cutime; ++ cstime = sig->cstime; ++ cgtime = sig->cgtime; ++ ++ if (whole) { ++ struct task_struct *t; ++ ++ min_flt = sig->min_flt; ++ maj_flt = sig->maj_flt; ++ gtime = sig->gtime; ++ ++ rcu_read_lock(); ++ __for_each_thread(sig, t) { ++ min_flt += t->min_flt; ++ maj_flt += t->maj_flt; ++ gtime += task_gtime(t); ++ } ++ rcu_read_unlock(); ++ } ++ } while (need_seqretry(&sig->stats_lock, seq)); ++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags); ++ + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { diff --git a/queue-6.7/fs-relax-mount_setattr-permission-checks.patch b/queue-6.7/fs-relax-mount_setattr-permission-checks.patch new file mode 100644 index 00000000000..24d745f91fe --- /dev/null +++ b/queue-6.7/fs-relax-mount_setattr-permission-checks.patch @@ -0,0 +1,59 @@ +From 46f5ab762d048dad224436978315cbc2fa79c630 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Tue, 6 Feb 2024 11:22:09 +0100 +Subject: fs: relax mount_setattr() permission checks + +From: Christian Brauner + +commit 46f5ab762d048dad224436978315cbc2fa79c630 upstream. + +When we added mount_setattr() I added additional checks compared to the +legacy do_reconfigure_mnt() and do_change_type() helpers used by regular +mount(2). If that mount had a parent then verify that the caller and the +mount namespace the mount is attached to match and if not make sure that +it's an anonymous mount. + +The real rootfs falls into neither category. It is neither an anoymous +mount because it is obviously attached to the initial mount namespace +but it also obviously doesn't have a parent mount. So that means legacy +mount(2) allows changing mount properties on the real rootfs but +mount_setattr(2) blocks this. I never thought much about this but of +course someone on this planet of earth changes properties on the real +rootfs as can be seen in [1]. + +Since util-linux finally switched to the new mount api in 2.39 not so +long ago it also relies on mount_setattr() and that surfaced this issue +when Fedora 39 finally switched to it. Fix this. + +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843 +Link: https://lore.kernel.org/r/20240206-vfs-mount-rootfs-v1-1-19b335eee133@kernel.org +Reviewed-by: Jan Kara +Reported-by: Karel Zak +Cc: stable@vger.kernel.org # v5.12+ +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/namespace.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path + /* + * If this is an attached mount make sure it's located in the callers + * mount namespace. If it's not don't let the caller interact with it. +- * If this is a detached mount make sure it has an anonymous mount +- * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. ++ * ++ * If this mount doesn't have a parent it's most often simply a ++ * detached mount with an anonymous mount namespace. IOW, something ++ * that's simply not attached yet. But there are apparently also users ++ * that do change mount properties on the rootfs itself. That obviously ++ * neither has a parent nor is it a detached mount so we cannot ++ * unconditionally check for detached mounts. + */ +- if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) ++ if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) + goto out; + + /* diff --git a/queue-6.7/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch b/queue-6.7/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch new file mode 100644 index 00000000000..175651e1139 --- /dev/null +++ b/queue-6.7/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch @@ -0,0 +1,179 @@ +From 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 Mon Sep 17 00:00:00 2001 +From: Shradha Gupta +Date: Thu, 1 Feb 2024 20:40:38 -0800 +Subject: hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed + +From: Shradha Gupta + +commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 upstream. + +If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER +handler cannot perform VF register successfully as the register call +is received before netvsc_probe is finished. This is because we +register register_netdevice_notifier() very early( even before +vmbus_driver_register()). +To fix this, we try to register each such matching VF( if it is visible +as a netdevice) at the end of netvsc_probe. + +Cc: stable@vger.kernel.org +Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") +Suggested-by: Dexuan Cui +Signed-off-by: Shradha Gupta +Reviewed-by: Haiyang Zhang +Reviewed-by: Dexuan Cui +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 82 ++++++++++++++++++++++++++++++---------- + 1 file changed, 62 insertions(+), 20 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -42,6 +42,10 @@ + #define LINKCHANGE_INT (2 * HZ) + #define VF_TAKEOVER_INT (HZ / 10) + ++/* Macros to define the context of vf registration */ ++#define VF_REG_IN_PROBE 1 ++#define VF_REG_IN_NOTIFIER 2 ++ + static unsigned int ring_size __ro_after_init = 128; + module_param(ring_size, uint, 0444); + MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); +@@ -2183,7 +2187,7 @@ static rx_handler_result_t netvsc_vf_han + } + + static int netvsc_vf_join(struct net_device *vf_netdev, +- struct net_device *ndev) ++ struct net_device *ndev, int context) + { + struct net_device_context *ndev_ctx = netdev_priv(ndev); + int ret; +@@ -2206,7 +2210,11 @@ static int netvsc_vf_join(struct net_dev + goto upper_link_failed; + } + +- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); ++ /* If this registration is called from probe context vf_takeover ++ * is taken care of later in probe itself. ++ */ ++ if (context == VF_REG_IN_NOTIFIER) ++ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + + call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); + +@@ -2344,7 +2352,7 @@ static int netvsc_prepare_bonding(struct + return NOTIFY_DONE; + } + +-static int netvsc_register_vf(struct net_device *vf_netdev) ++static int netvsc_register_vf(struct net_device *vf_netdev, int context) + { + struct net_device_context *net_device_ctx; + struct netvsc_device *netvsc_dev; +@@ -2384,7 +2392,7 @@ static int netvsc_register_vf(struct net + + netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); + +- if (netvsc_vf_join(vf_netdev, ndev) != 0) ++ if (netvsc_vf_join(vf_netdev, ndev, context) != 0) + return NOTIFY_DONE; + + dev_hold(vf_netdev); +@@ -2482,10 +2490,31 @@ static int netvsc_unregister_vf(struct n + return NOTIFY_OK; + } + ++static int check_dev_is_matching_vf(struct net_device *event_ndev) ++{ ++ /* Skip NetVSC interfaces */ ++ if (event_ndev->netdev_ops == &device_ops) ++ return -ENODEV; ++ ++ /* Avoid non-Ethernet type devices */ ++ if (event_ndev->type != ARPHRD_ETHER) ++ return -ENODEV; ++ ++ /* Avoid Vlan dev with same MAC registering as VF */ ++ if (is_vlan_dev(event_ndev)) ++ return -ENODEV; ++ ++ /* Avoid Bonding master dev with same MAC registering as VF */ ++ if (netif_is_bond_master(event_ndev)) ++ return -ENODEV; ++ ++ return 0; ++} ++ + static int netvsc_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) + { +- struct net_device *net = NULL; ++ struct net_device *net = NULL, *vf_netdev; + struct net_device_context *net_device_ctx; + struct netvsc_device_info *device_info = NULL; + struct netvsc_device *nvdev; +@@ -2597,6 +2626,30 @@ static int netvsc_probe(struct hv_device + } + + list_add(&net_device_ctx->list, &netvsc_dev_list); ++ ++ /* When the hv_netvsc driver is unloaded and reloaded, the ++ * NET_DEVICE_REGISTER for the vf device is replayed before probe ++ * is complete. This is because register_netdevice_notifier() gets ++ * registered before vmbus_driver_register() so that callback func ++ * is set before probe and we don't miss events like NETDEV_POST_INIT ++ * So, in this section we try to register the matching vf device that ++ * is present as a netdevice, knowing that its register call is not ++ * processed in the netvsc_netdev_notifier(as probing is progress and ++ * get_netvsc_byslot fails). ++ */ ++ for_each_netdev(dev_net(net), vf_netdev) { ++ ret = check_dev_is_matching_vf(vf_netdev); ++ if (ret != 0) ++ continue; ++ ++ if (net != get_netvsc_byslot(vf_netdev)) ++ continue; ++ ++ netvsc_prepare_bonding(vf_netdev); ++ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); ++ __netvsc_vf_setup(net, vf_netdev); ++ break; ++ } + rtnl_unlock(); + + netvsc_devinfo_put(device_info); +@@ -2752,28 +2805,17 @@ static int netvsc_netdev_event(struct no + unsigned long event, void *ptr) + { + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); ++ int ret = 0; + +- /* Skip our own events */ +- if (event_dev->netdev_ops == &device_ops) +- return NOTIFY_DONE; +- +- /* Avoid non-Ethernet type devices */ +- if (event_dev->type != ARPHRD_ETHER) +- return NOTIFY_DONE; +- +- /* Avoid Vlan dev with same MAC registering as VF */ +- if (is_vlan_dev(event_dev)) +- return NOTIFY_DONE; +- +- /* Avoid Bonding master dev with same MAC registering as VF */ +- if (netif_is_bond_master(event_dev)) ++ ret = check_dev_is_matching_vf(event_dev); ++ if (ret != 0) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); + case NETDEV_REGISTER: +- return netvsc_register_vf(event_dev); ++ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); + case NETDEV_UNREGISTER: + return netvsc_unregister_vf(event_dev); + case NETDEV_UP: diff --git a/queue-6.7/irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch b/queue-6.7/irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch new file mode 100644 index 00000000000..9da13dcdc65 --- /dev/null +++ b/queue-6.7/irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch @@ -0,0 +1,80 @@ +From af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 13 Feb 2024 10:12:06 +0000 +Subject: irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update + +From: Marc Zyngier + +commit af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 upstream. + +When updating the affinity of a VPE, the VMOVP command is currently skipped +if the two CPUs are part of the same VPE affinity. + +But this is wrong, as the doorbell corresponding to this VPE is still +delivered on the 'old' CPU, which screws up the balancing. Furthermore, +offlining that 'old' CPU results in doorbell interrupts generated for this +VPE being discarded. + +The harsh reality is that VMOVP cannot be elided when a set_affinity() +request occurs. It needs to be obeyed, and if an optimisation is to be +made, it is at the point where the affinity change request is made (such as +in KVM). + +Drop the VMOVP elision altogether, and only use the vpe_table_mask +to try and stay within the same ITS affinity group if at all possible. + +Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP) +Reported-by: Kunkun Jiang +Signed-off-by: Marc Zyngier +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240213101206.2137483-4-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -3826,8 +3826,9 @@ static int its_vpe_set_affinity(struct i + bool force) + { + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); +- int from, cpu = cpumask_first(mask_val); ++ struct cpumask common, *table_mask; + unsigned long flags; ++ int from, cpu; + + /* + * Changing affinity is mega expensive, so let's be as lazy as +@@ -3843,19 +3844,22 @@ static int its_vpe_set_affinity(struct i + * taken on any vLPI handling path that evaluates vpe->col_idx. + */ + from = vpe_to_cpuid_lock(vpe, &flags); +- if (from == cpu) +- goto out; +- +- vpe->col_idx = cpu; ++ table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; + + /* +- * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD +- * is sharing its VPE table with the current one. ++ * If we are offered another CPU in the same GICv4.1 ITS ++ * affinity, pick this one. Otherwise, any CPU will do. + */ +- if (gic_data_rdist_cpu(cpu)->vpe_table_mask && +- cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) ++ if (table_mask && cpumask_and(&common, mask_val, table_mask)) ++ cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); ++ else ++ cpu = cpumask_first(mask_val); ++ ++ if (from == cpu) + goto out; + ++ vpe->col_idx = cpu; ++ + its_send_vmovp(vpe); + its_vpe_db_proxy_move(vpe, from, cpu); + diff --git a/queue-6.7/irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch b/queue-6.7/irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch new file mode 100644 index 00000000000..819da925a8b --- /dev/null +++ b/queue-6.7/irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch @@ -0,0 +1,48 @@ +From 8b02da04ad978827e5ccd675acf170198f747a7a Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 13 Feb 2024 10:12:05 +0000 +Subject: irqchip/gic-v3-its: Restore quirk probing for ACPI-based systems + +From: Marc Zyngier + +commit 8b02da04ad978827e5ccd675acf170198f747a7a upstream. + +While refactoring the way the ITSs are probed, the handling of quirks +applicable to ACPI-based platforms was lost. As a result, systems such as +HIP07 lose their GICv4 functionnality, and some other may even fail to +boot, unless they are configured to boot with DT. + +Move the enabling of quirks into its_probe_one(), making it common to all +firmware implementations. + +Fixes: 9585a495ac93 ("irqchip/gic-v3-its: Split allocation from initialisation of its_node") +Signed-off-by: Marc Zyngier +Signed-off-by: Thomas Gleixner +Reviewed-by: Lorenzo Pieralisi +Reviewed-by: Zenghui Yu +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240213101206.2137483-3-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-gic-v3-its.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -5091,6 +5091,8 @@ static int __init its_probe_one(struct i + u32 ctlr; + int err; + ++ its_enable_quirks(its); ++ + if (is_v4(its)) { + if (!(its->typer & GITS_TYPER_VMOVP)) { + err = its_compute_its_list_map(its); +@@ -5442,7 +5444,6 @@ static int __init its_of_probe(struct de + if (!its) + return -ENOMEM; + +- its_enable_quirks(its); + err = its_probe_one(its); + if (err) { + its_node_destroy(its); diff --git a/queue-6.7/irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch b/queue-6.7/irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch new file mode 100644 index 00000000000..5e999dc4022 --- /dev/null +++ b/queue-6.7/irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch @@ -0,0 +1,63 @@ +From b0344d6854d25a8b3b901c778b1728885dd99007 Mon Sep 17 00:00:00 2001 +From: Doug Berger +Date: Fri, 9 Feb 2024 17:24:49 -0800 +Subject: irqchip/irq-brcmstb-l2: Add write memory barrier before exit + +From: Doug Berger + +commit b0344d6854d25a8b3b901c778b1728885dd99007 upstream. + +It was observed on Broadcom devices that use GIC v3 architecture L1 +interrupt controllers as the parent of brcmstb-l2 interrupt controllers +that the deactivation of the parent interrupt could happen before the +brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the +interrupt only to find that no L2 interrupt was pending. The result was a +spurious interrupt invoking handle_bad_irq() with its associated +messaging. While this did not create a functional problem it is a waste of +cycles. + +The hazard exists because the memory mapped bus writes to the brcmstb-l2 +registers are buffered and the GIC v3 architecture uses a very efficient +system register write to deactivate the interrupt. + +Add a write memory barrier prior to invoking chained_irq_exit() to +introduce a dsb(st) on those systems to ensure the system register write +cannot be executed until the memory mapped writes are visible to the +system. + +[ florian: Added Fixes tag ] + +Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") +Signed-off-by: Doug Berger +Signed-off-by: Florian Fainelli +Signed-off-by: Thomas Gleixner +Acked-by: Florian Fainelli +Acked-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadcom.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-brcmstb-l2.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-brcmstb-l2.c ++++ b/drivers/irqchip/irq-brcmstb-l2.c +@@ -2,7 +2,7 @@ + /* + * Generic Broadcom Set Top Box Level 2 Interrupt controller driver + * +- * Copyright (C) 2014-2017 Broadcom ++ * Copyright (C) 2014-2024 Broadcom + */ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +@@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(s + generic_handle_domain_irq(b->domain, irq); + } while (status); + out: ++ /* Don't ack parent before all device writes are done */ ++ wmb(); ++ + chained_irq_exit(chip, desc); + } + diff --git a/queue-6.7/kvm-arm64-fix-circular-locking-dependency.patch b/queue-6.7/kvm-arm64-fix-circular-locking-dependency.patch new file mode 100644 index 00000000000..751d59cb753 --- /dev/null +++ b/queue-6.7/kvm-arm64-fix-circular-locking-dependency.patch @@ -0,0 +1,84 @@ +From 10c02aad111df02088d1a81792a709f6a7eca6cc Mon Sep 17 00:00:00 2001 +From: Sebastian Ene +Date: Wed, 24 Jan 2024 09:10:28 +0000 +Subject: KVM: arm64: Fix circular locking dependency + +From: Sebastian Ene + +commit 10c02aad111df02088d1a81792a709f6a7eca6cc upstream. + +The rule inside kvm enforces that the vcpu->mutex is taken *inside* +kvm->lock. The rule is violated by the pkvm_create_hyp_vm() which acquires +the kvm->lock while already holding the vcpu->mutex lock from +kvm_vcpu_ioctl(). Avoid the circular locking dependency altogether by +protecting the hyp vm handle with the config_lock, much like we already +do for other forms of VM-scoped data. + +Signed-off-by: Sebastian Ene +Cc: stable@vger.kernel.org +Reviewed-by: Oliver Upton +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20240124091027.1477174-2-sebastianene@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/pkvm.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +--- a/arch/arm64/kvm/pkvm.c ++++ b/arch/arm64/kvm/pkvm.c +@@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void) + hyp_mem_base); + } + ++static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) ++{ ++ if (host_kvm->arch.pkvm.handle) { ++ WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, ++ host_kvm->arch.pkvm.handle)); ++ } ++ ++ host_kvm->arch.pkvm.handle = 0; ++ free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); ++} ++ + /* + * Allocates and donates memory for hypervisor VM structs at EL2. + * +@@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct k + return 0; + + destroy_vm: +- pkvm_destroy_hyp_vm(host_kvm); ++ __pkvm_destroy_hyp_vm(host_kvm); + return ret; + free_vm: + free_pages_exact(hyp_vm, hyp_vm_sz); +@@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_ + { + int ret = 0; + +- mutex_lock(&host_kvm->lock); ++ mutex_lock(&host_kvm->arch.config_lock); + if (!host_kvm->arch.pkvm.handle) + ret = __pkvm_create_hyp_vm(host_kvm); +- mutex_unlock(&host_kvm->lock); ++ mutex_unlock(&host_kvm->arch.config_lock); + + return ret; + } + + void pkvm_destroy_hyp_vm(struct kvm *host_kvm) + { +- if (host_kvm->arch.pkvm.handle) { +- WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, +- host_kvm->arch.pkvm.handle)); +- } +- +- host_kvm->arch.pkvm.handle = 0; +- free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); ++ mutex_lock(&host_kvm->arch.config_lock); ++ __pkvm_destroy_hyp_vm(host_kvm); ++ mutex_unlock(&host_kvm->arch.config_lock); + } + + int pkvm_init_host_vm(struct kvm *host_kvm) diff --git a/queue-6.7/kvm-s390-vsie-fix-race-during-shadow-creation.patch b/queue-6.7/kvm-s390-vsie-fix-race-during-shadow-creation.patch new file mode 100644 index 00000000000..7c813a884f8 --- /dev/null +++ b/queue-6.7/kvm-s390-vsie-fix-race-during-shadow-creation.patch @@ -0,0 +1,59 @@ +From fe752331d4b361d43cfd0b89534b4b2176057c32 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Wed, 20 Dec 2023 13:53:17 +0100 +Subject: KVM: s390: vsie: fix race during shadow creation + +From: Christian Borntraeger + +commit fe752331d4b361d43cfd0b89534b4b2176057c32 upstream. + +Right now it is possible to see gmap->private being zero in +kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the +fact that we add gmap->private == kvm after creation: + +static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + struct vsie_page *vsie_page) +{ +[...] + gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); + if (IS_ERR(gmap)) + return PTR_ERR(gmap); + gmap->private = vcpu->kvm; + +Let children inherit the private field of the parent. + +Reported-by: Marc Hartmayer +Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") +Cc: +Cc: David Hildenbrand +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Signed-off-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kvm/vsie.c | 1 - + arch/s390/mm/gmap.c | 1 + + 2 files changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kvm/vsie.c ++++ b/arch/s390/kvm/vsie.c +@@ -1220,7 +1220,6 @@ static int acquire_gmap_shadow(struct kv + gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); + if (IS_ERR(gmap)) + return PTR_ERR(gmap); +- gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; + WRITE_ONCE(vsie_page->gmap, gmap); + return 0; +--- a/arch/s390/mm/gmap.c ++++ b/arch/s390/mm/gmap.c +@@ -1691,6 +1691,7 @@ struct gmap *gmap_shadow(struct gmap *pa + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); ++ new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; diff --git a/queue-6.7/loongarch-fix-earlycon-parameter-if-kasan-enabled.patch b/queue-6.7/loongarch-fix-earlycon-parameter-if-kasan-enabled.patch new file mode 100644 index 00000000000..0e76da501f2 --- /dev/null +++ b/queue-6.7/loongarch-fix-earlycon-parameter-if-kasan-enabled.patch @@ -0,0 +1,34 @@ +From 639420e9f6cd9ca074732b17ac450d2518d5937f Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Tue, 6 Feb 2024 12:32:05 +0800 +Subject: LoongArch: Fix earlycon parameter if KASAN enabled + +From: Huacai Chen + +commit 639420e9f6cd9ca074732b17ac450d2518d5937f upstream. + +The earlycon parameter is based on fixmap, and fixmap addresses are not +supposed to be shadowed by KASAN. So return the kasan_early_shadow_page +in kasan_mem_to_shadow() if the input address is above FIXADDR_START. +Otherwise earlycon cannot work after kasan_init(). + +Cc: stable@vger.kernel.org +Fixes: 5aa4ac64e6add3e ("LoongArch: Add KASAN (Kernel Address Sanitizer) support") +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/mm/kasan_init.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/loongarch/mm/kasan_init.c ++++ b/arch/loongarch/mm/kasan_init.c +@@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *ad + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + ++ if (maddr >= FIXADDR_START) ++ return (void *)(kasan_early_shadow_page); ++ + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: diff --git a/queue-6.7/mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch b/queue-6.7/mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch new file mode 100644 index 00000000000..188f915c1ce --- /dev/null +++ b/queue-6.7/mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch @@ -0,0 +1,37 @@ +From b9e4bc1046d20e0623a80660ef8627448056f817 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 2 Feb 2024 11:19:56 -0800 +Subject: mm/damon/sysfs-schemes: fix wrong DAMOS tried regions update timeout setup + +From: SeongJae Park + +commit b9e4bc1046d20e0623a80660ef8627448056f817 upstream. + +DAMON sysfs interface's update_schemes_tried_regions command has a timeout +of two apply intervals of the DAMOS scheme. Having zero value DAMOS +scheme apply interval means it will use the aggregation interval as the +value. However, the timeout setup logic is mistakenly using the sampling +interval insted of the aggregartion interval for the case. This could +cause earlier-than-expected timeout of the command. Fix it. + +Link: https://lkml.kernel.org/r/20240202191956.88791-1-sj@kernel.org +Fixes: 7d6fa31a2fd7 ("mm/damon/sysfs-schemes: add timeout for update_schemes_tried_regions") +Signed-off-by: SeongJae Park +Cc: # 6.7.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs-schemes.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/damon/sysfs-schemes.c ++++ b/mm/damon/sysfs-schemes.c +@@ -1928,7 +1928,7 @@ static void damos_tried_regions_init_upd + sysfs_regions->upd_timeout_jiffies = jiffies + + 2 * usecs_to_jiffies(scheme->apply_interval_us ? + scheme->apply_interval_us : +- ctx->attrs.sample_interval); ++ ctx->attrs.aggr_interval); + } + } + diff --git a/queue-6.7/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch b/queue-6.7/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch new file mode 100644 index 00000000000..5e11db04393 --- /dev/null +++ b/queue-6.7/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch @@ -0,0 +1,103 @@ +From e656c7a9e59607d1672d85ffa9a89031876ffe67 Mon Sep 17 00:00:00 2001 +From: Prakash Sangappa +Date: Tue, 23 Jan 2024 12:04:42 -0800 +Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE + +From: Prakash Sangappa + +commit e656c7a9e59607d1672d85ffa9a89031876ffe67 upstream. + +For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in +shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages +are not reserved. However when the shared memory is attached with the +shmat() call the hugetlb pages are getting reserved incorrectly for +SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. + +------------------------------- +Following test shows the issue. + +$cat shmhtb.c + +int main() +{ + int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; + int shmid; + + shmid = shmget(SKEY, SHMSZ, shmflags); + if (shmid < 0) + { + printf("shmat: shmget() failed, %d\n", errno); + return 1; + } + printf("After shmget()\n"); + system("cat /proc/meminfo | grep -i hugepages_"); + + shmat(shmid, NULL, 0); + printf("\nAfter shmat()\n"); + system("cat /proc/meminfo | grep -i hugepages_"); + + shmctl(shmid, IPC_RMID, NULL); + return 0; +} + + #sysctl -w vm.nr_hugepages=20 + #./shmhtb + +After shmget() +HugePages_Total: 20 +HugePages_Free: 20 +HugePages_Rsvd: 0 +HugePages_Surp: 0 + +After shmat() +HugePages_Total: 20 +HugePages_Free: 20 +HugePages_Rsvd: 5 <-- +HugePages_Surp: 0 +-------------------------------- + +Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared +memory in the shmat() call. + +Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com +Signed-off-by: Prakash Sangappa +Acked-by: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct fi + loff_t len, vma_len; + int ret; + struct hstate *h = hstate_file(file); ++ vm_flags_t vm_flags; + + /* + * vma address alignment (but not the pgoff alignment) has +@@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct fi + file_accessed(file); + + ret = -ENOMEM; ++ ++ vm_flags = vma->vm_flags; ++ /* ++ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip ++ * reserving here. Note: only for SHM hugetlbfs file, the inode ++ * flag S_PRIVATE is set. ++ */ ++ if (inode->i_flags & S_PRIVATE) ++ vm_flags |= VM_NORESERVE; ++ + if (!hugetlb_reserve_pages(inode, + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma, +- vma->vm_flags)) ++ vm_flags)) + goto out; + + ret = 0; diff --git a/queue-6.7/mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch b/queue-6.7/mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch new file mode 100644 index 00000000000..f19c3318d4a --- /dev/null +++ b/queue-6.7/mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch @@ -0,0 +1,69 @@ +From 58aeb5623c2ebdadefe6352b14f8076a7073fea0 Mon Sep 17 00:00:00 2001 +From: Fred Ai +Date: Sat, 3 Feb 2024 02:29:08 -0800 +Subject: mmc: sdhci-pci-o2micro: Fix a warm reboot issue that disk can't be detected by BIOS + +From: Fred Ai + +commit 58aeb5623c2ebdadefe6352b14f8076a7073fea0 upstream. + +Driver shall switch clock source from DLL clock to +OPE clock when power off card to ensure that card +can be identified with OPE clock by BIOS. + +Signed-off-by: Fred Ai +Fixes:4be33cf18703 ("mmc: sdhci-pci-o2micro: Improve card input timing at SDR104/HS200 mode") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240203102908.4683-1-fredaibayhubtech@126.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-pci-o2micro.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +--- a/drivers/mmc/host/sdhci-pci-o2micro.c ++++ b/drivers/mmc/host/sdhci-pci-o2micro.c +@@ -693,6 +693,35 @@ static int sdhci_pci_o2_init_sd_express( + return 0; + } + ++static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) ++{ ++ struct sdhci_pci_chip *chip; ++ struct sdhci_pci_slot *slot = sdhci_priv(host); ++ u32 scratch_32 = 0; ++ u8 scratch_8 = 0; ++ ++ chip = slot->chip; ++ ++ if (mode == MMC_POWER_OFF) { ++ /* UnLock WP */ ++ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); ++ scratch_8 &= 0x7f; ++ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); ++ ++ /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ ++ pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); ++ scratch_32 &= ~(O2_SD_SEL_DLL); ++ pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); ++ ++ /* Lock WP */ ++ pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); ++ scratch_8 |= 0x80; ++ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); ++ } ++ ++ sdhci_set_power(host, mode, vdd); ++} ++ + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) + { + struct sdhci_pci_chip *chip; +@@ -1051,6 +1080,7 @@ static const struct sdhci_ops sdhci_pci_ + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_power = sdhci_pci_o2_set_power, + }; + + const struct sdhci_pci_fixes sdhci_o2 = { diff --git a/queue-6.7/net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch b/queue-6.7/net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch new file mode 100644 index 00000000000..dd78ae99bbc --- /dev/null +++ b/queue-6.7/net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch @@ -0,0 +1,62 @@ +From bc4ce46b1e3d1da4309405cd4afc7c0fcddd0b90 Mon Sep 17 00:00:00 2001 +From: Sinthu Raja +Date: Tue, 6 Feb 2024 06:29:28 +0530 +Subject: net: ethernet: ti: cpsw: enable mac_managed_pm to fix mdio + +From: Sinthu Raja + +commit bc4ce46b1e3d1da4309405cd4afc7c0fcddd0b90 upstream. + +The below commit introduced a WARN when phy state is not in the states: +PHY_HALTED, PHY_READY and PHY_UP. +commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") + +When cpsw resumes, there have port in PHY_NOLINK state, so the below +warning comes out. Set mac_managed_pm be true to tell mdio that the phy +resume/suspend is managed by the mac, to fix the following warning: + +WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 +CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 +Hardware name: Generic AM33XX (Flattened Device Tree) + unwind_backtrace from show_stack+0x18/0x1c + show_stack from dump_stack_lvl+0x24/0x2c + dump_stack_lvl from __warn+0x84/0x15c + __warn from warn_slowpath_fmt+0x1a8/0x1c8 + warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 + mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 + dpm_run_callback from device_resume+0xb8/0x2b8 + device_resume from dpm_resume+0x144/0x314 + dpm_resume from dpm_resume_end+0x14/0x20 + dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 + suspend_devices_and_enter from pm_suspend+0x2e0/0x33c + pm_suspend from state_store+0x74/0xd0 + state_store from kernfs_fop_write_iter+0x104/0x1ec + kernfs_fop_write_iter from vfs_write+0x1b8/0x358 + vfs_write from ksys_write+0x78/0xf8 + ksys_write from ret_fast_syscall+0x0/0x54 +Exception stack(0xe094dfa8 to 0xe094dff0) +dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 +dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 +dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 + +Cc: # v6.0+ +Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") +Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") +Signed-off-by: Sinthu Raja +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/cpsw.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/ti/cpsw.c ++++ b/drivers/net/ethernet/ti/cpsw.c +@@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_ + } + } + ++ phy->mac_managed_pm = true; ++ + slave->phy = phy; + + phy_attached_info(slave->phy); diff --git a/queue-6.7/net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch b/queue-6.7/net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch new file mode 100644 index 00000000000..28a24c07ada --- /dev/null +++ b/queue-6.7/net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch @@ -0,0 +1,63 @@ +From 9def04e759caa5a3d741891037ae99f81e2fff01 Mon Sep 17 00:00:00 2001 +From: Sinthu Raja +Date: Tue, 6 Feb 2024 06:29:27 +0530 +Subject: net: ethernet: ti: cpsw_new: enable mac_managed_pm to fix mdio + +From: Sinthu Raja + +commit 9def04e759caa5a3d741891037ae99f81e2fff01 upstream. + +The below commit introduced a WARN when phy state is not in the states: +PHY_HALTED, PHY_READY and PHY_UP. +commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") + +When cpsw_new resumes, there have port in PHY_NOLINK state, so the below +warning comes out. Set mac_managed_pm be true to tell mdio that the phy +resume/suspend is managed by the mac, to fix the following warning: + +WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 +CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 +Hardware name: Generic AM33XX (Flattened Device Tree) + unwind_backtrace from show_stack+0x18/0x1c + show_stack from dump_stack_lvl+0x24/0x2c + dump_stack_lvl from __warn+0x84/0x15c + __warn from warn_slowpath_fmt+0x1a8/0x1c8 + warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 + mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 + dpm_run_callback from device_resume+0xb8/0x2b8 + device_resume from dpm_resume+0x144/0x314 + dpm_resume from dpm_resume_end+0x14/0x20 + dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 + suspend_devices_and_enter from pm_suspend+0x2e0/0x33c + pm_suspend from state_store+0x74/0xd0 + state_store from kernfs_fop_write_iter+0x104/0x1ec + kernfs_fop_write_iter from vfs_write+0x1b8/0x358 + vfs_write from ksys_write+0x78/0xf8 + ksys_write from ret_fast_syscall+0x0/0x54 +Exception stack(0xe094dfa8 to 0xe094dff0) +dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 +dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 +dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 + +Cc: # v6.0+ +Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") +Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") +Signed-off-by: Sinthu Raja +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/cpsw_new.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/ti/cpsw_new.c ++++ b/drivers/net/ethernet/ti/cpsw_new.c +@@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_ + slave->slave_num); + return; + } ++ ++ phy->mac_managed_pm = true; ++ + slave->phy = phy; + + phy_attached_info(slave->phy); diff --git a/queue-6.7/net-stmmac-protect-updates-of-64-bit-statistics-counters.patch b/queue-6.7/net-stmmac-protect-updates-of-64-bit-statistics-counters.patch new file mode 100644 index 00000000000..632268e6794 --- /dev/null +++ b/queue-6.7/net-stmmac-protect-updates-of-64-bit-statistics-counters.patch @@ -0,0 +1,740 @@ +From 38cc3c6dcc09dc3a1800b5ec22aef643ca11eab8 Mon Sep 17 00:00:00 2001 +From: Petr Tesarik +Date: Sat, 3 Feb 2024 20:09:27 +0100 +Subject: net: stmmac: protect updates of 64-bit statistics counters + +From: Petr Tesarik + +commit 38cc3c6dcc09dc3a1800b5ec22aef643ca11eab8 upstream. + +As explained by a comment in , write side of struct +u64_stats_sync must ensure mutual exclusion, or one seqcount update could +be lost on 32-bit platforms, thus blocking readers forever. Such lockups +have been observed in real world after stmmac_xmit() on one CPU raced with +stmmac_napi_poll_tx() on another CPU. + +To fix the issue without introducing a new lock, split the statics into +three parts: + +1. fields updated only under the tx queue lock, +2. fields updated only during NAPI poll, +3. fields updated only from interrupt context, + +Updates to fields in the first two groups are already serialized through +other locks. It is sufficient to split the existing struct u64_stats_sync +so that each group has its own. + +Note that tx_set_ic_bit is updated from both contexts. Split this counter +so that each context gets its own, and calculate their sum to get the total +value in stmmac_get_ethtool_stats(). + +For the third group, multiple interrupts may be processed by different CPUs +at the same time, but interrupts on the same CPU will not nest. Move fields +from this group to a newly created per-cpu struct stmmac_pcpu_stats. + +Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") +Link: https://lore.kernel.org/netdev/Za173PhviYg-1qIn@torres.zugschlus.de/t/ +Cc: stable@vger.kernel.org +Signed-off-by: Petr Tesarik +Reviewed-by: Jisheng Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/common.h | 56 +++++--- + drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 15 +- + drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 15 +- + drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 15 +- + drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 15 +- + drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 129 ++++++++++++------ + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 133 +++++++++---------- + 7 files changed, 221 insertions(+), 157 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -59,28 +59,51 @@ + #undef FRAME_FILTER_DEBUG + /* #define FRAME_FILTER_DEBUG */ + ++struct stmmac_q_tx_stats { ++ u64_stats_t tx_bytes; ++ u64_stats_t tx_set_ic_bit; ++ u64_stats_t tx_tso_frames; ++ u64_stats_t tx_tso_nfrags; ++}; ++ ++struct stmmac_napi_tx_stats { ++ u64_stats_t tx_packets; ++ u64_stats_t tx_pkt_n; ++ u64_stats_t poll; ++ u64_stats_t tx_clean; ++ u64_stats_t tx_set_ic_bit; ++}; ++ + struct stmmac_txq_stats { +- u64 tx_bytes; +- u64 tx_packets; +- u64 tx_pkt_n; +- u64 tx_normal_irq_n; +- u64 napi_poll; +- u64 tx_clean; +- u64 tx_set_ic_bit; +- u64 tx_tso_frames; +- u64 tx_tso_nfrags; +- struct u64_stats_sync syncp; ++ /* Updates protected by tx queue lock. */ ++ struct u64_stats_sync q_syncp; ++ struct stmmac_q_tx_stats q; ++ ++ /* Updates protected by NAPI poll logic. */ ++ struct u64_stats_sync napi_syncp; ++ struct stmmac_napi_tx_stats napi; + } ____cacheline_aligned_in_smp; + ++struct stmmac_napi_rx_stats { ++ u64_stats_t rx_bytes; ++ u64_stats_t rx_packets; ++ u64_stats_t rx_pkt_n; ++ u64_stats_t poll; ++}; ++ + struct stmmac_rxq_stats { +- u64 rx_bytes; +- u64 rx_packets; +- u64 rx_pkt_n; +- u64 rx_normal_irq_n; +- u64 napi_poll; +- struct u64_stats_sync syncp; ++ /* Updates protected by NAPI poll logic. */ ++ struct u64_stats_sync napi_syncp; ++ struct stmmac_napi_rx_stats napi; + } ____cacheline_aligned_in_smp; + ++/* Updates on each CPU protected by not allowing nested irqs. */ ++struct stmmac_pcpu_stats { ++ struct u64_stats_sync syncp; ++ u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES]; ++ u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES]; ++}; ++ + /* Extra statistic and debug information exposed by ethtool */ + struct stmmac_extra_stats { + /* Transmit errors */ +@@ -205,6 +228,7 @@ struct stmmac_extra_stats { + /* per queue statistics */ + struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; + struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; ++ struct stmmac_pcpu_stats __percpu *pcpu_stats; + unsigned long rx_dropped; + unsigned long rx_errors; + unsigned long tx_dropped; +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(str + struct stmmac_extra_stats *x, u32 chan, + u32 dir) + { +- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; +- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; ++ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); + int ret = 0; + u32 v; + +@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(str + + if (v & EMAC_TX_INT) { + ret |= handle_tx; +- u64_stats_update_begin(&txq_stats->syncp); +- txq_stats->tx_normal_irq_n++; +- u64_stats_update_end(&txq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->tx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + } + + if (v & EMAC_TX_DMA_STOP_INT) +@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(str + + if (v & EMAC_RX_INT) { + ret |= handle_rx; +- u64_stats_update_begin(&rxq_stats->syncp); +- rxq_stats->rx_normal_irq_n++; +- u64_stats_update_end(&rxq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->rx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + } + + if (v & EMAC_RX_BUF_UA_INT) +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_p + const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; + u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan)); + u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); +- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; +- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; ++ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); + int ret = 0; + + if (dir == DMA_DIR_RX) +@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_p + } + /* TX/RX NORMAL interrupts */ + if (likely(intr_status & DMA_CHAN_STATUS_RI)) { +- u64_stats_update_begin(&rxq_stats->syncp); +- rxq_stats->rx_normal_irq_n++; +- u64_stats_update_end(&rxq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->rx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & DMA_CHAN_STATUS_TI)) { +- u64_stats_update_begin(&txq_stats->syncp); +- txq_stats->tx_normal_irq_n++; +- u64_stats_update_end(&txq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->tx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_tx; + } + +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +@@ -162,8 +162,7 @@ static void show_rx_process_state(unsign + int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, + struct stmmac_extra_stats *x, u32 chan, u32 dir) + { +- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; +- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; ++ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); + int ret = 0; + /* read the status register (CSR5) */ + u32 intr_status = readl(ioaddr + DMA_STATUS); +@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_pr + u32 value = readl(ioaddr + DMA_INTR_ENA); + /* to schedule NAPI on real RIE event. */ + if (likely(value & DMA_INTR_ENA_RIE)) { +- u64_stats_update_begin(&rxq_stats->syncp); +- rxq_stats->rx_normal_irq_n++; +- u64_stats_update_end(&rxq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->rx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + } + if (likely(intr_status & DMA_STATUS_TI)) { +- u64_stats_update_begin(&txq_stats->syncp); +- txq_stats->tx_normal_irq_n++; +- u64_stats_update_end(&txq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->tx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_tx; + } + if (unlikely(intr_status & DMA_STATUS_ERI)) +--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct + struct stmmac_extra_stats *x, u32 chan, + u32 dir) + { +- struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; +- struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; ++ struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); + u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); + u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + int ret = 0; +@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct + /* TX/RX NORMAL interrupts */ + if (likely(intr_status & XGMAC_NIS)) { + if (likely(intr_status & XGMAC_RI)) { +- u64_stats_update_begin(&rxq_stats->syncp); +- rxq_stats->rx_normal_irq_n++; +- u64_stats_update_end(&rxq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->rx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { +- u64_stats_update_begin(&txq_stats->syncp); +- txq_stats->tx_normal_irq_n++; +- u64_stats_update_end(&txq_stats->syncp); ++ u64_stats_update_begin(&stats->syncp); ++ u64_stats_inc(&stats->tx_normal_irq_n[chan]); ++ u64_stats_update_end(&stats->syncp); + ret |= handle_tx; + } + } +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +@@ -539,44 +539,79 @@ stmmac_set_pauseparam(struct net_device + } + } + ++static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q) ++{ ++ u64 total; ++ int cpu; ++ ++ total = 0; ++ for_each_possible_cpu(cpu) { ++ struct stmmac_pcpu_stats *pcpu; ++ unsigned int start; ++ u64 irq_n; ++ ++ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu); ++ do { ++ start = u64_stats_fetch_begin(&pcpu->syncp); ++ irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]); ++ } while (u64_stats_fetch_retry(&pcpu->syncp, start)); ++ total += irq_n; ++ } ++ return total; ++} ++ ++static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q) ++{ ++ u64 total; ++ int cpu; ++ ++ total = 0; ++ for_each_possible_cpu(cpu) { ++ struct stmmac_pcpu_stats *pcpu; ++ unsigned int start; ++ u64 irq_n; ++ ++ pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu); ++ do { ++ start = u64_stats_fetch_begin(&pcpu->syncp); ++ irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]); ++ } while (u64_stats_fetch_retry(&pcpu->syncp, start)); ++ total += irq_n; ++ } ++ return total; ++} ++ + static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) + { + u32 tx_cnt = priv->plat->tx_queues_to_use; + u32 rx_cnt = priv->plat->rx_queues_to_use; + unsigned int start; +- int q, stat; +- char *p; ++ int q; + + for (q = 0; q < tx_cnt; q++) { + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; +- struct stmmac_txq_stats snapshot; ++ u64 pkt_n; + + do { +- start = u64_stats_fetch_begin(&txq_stats->syncp); +- snapshot = *txq_stats; +- } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); ++ start = u64_stats_fetch_begin(&txq_stats->napi_syncp); ++ pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n); ++ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); + +- p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n); +- for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { +- *data++ = (*(u64 *)p); +- p += sizeof(u64); +- } ++ *data++ = pkt_n; ++ *data++ = stmmac_get_tx_normal_irq_n(priv, q); + } + + for (q = 0; q < rx_cnt; q++) { + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; +- struct stmmac_rxq_stats snapshot; ++ u64 pkt_n; + + do { +- start = u64_stats_fetch_begin(&rxq_stats->syncp); +- snapshot = *rxq_stats; +- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); ++ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); ++ pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n); ++ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); + +- p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n); +- for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { +- *data++ = (*(u64 *)p); +- p += sizeof(u64); +- } ++ *data++ = pkt_n; ++ *data++ = stmmac_get_rx_normal_irq_n(priv, q); + } + } + +@@ -635,39 +670,49 @@ static void stmmac_get_ethtool_stats(str + pos = j; + for (i = 0; i < rx_queues_count; i++) { + struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i]; +- struct stmmac_rxq_stats snapshot; ++ struct stmmac_napi_rx_stats snapshot; ++ u64 n_irq; + + j = pos; + do { +- start = u64_stats_fetch_begin(&rxq_stats->syncp); +- snapshot = *rxq_stats; +- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); +- +- data[j++] += snapshot.rx_pkt_n; +- data[j++] += snapshot.rx_normal_irq_n; +- normal_irq_n += snapshot.rx_normal_irq_n; +- napi_poll += snapshot.napi_poll; ++ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); ++ snapshot = rxq_stats->napi; ++ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); ++ ++ data[j++] += u64_stats_read(&snapshot.rx_pkt_n); ++ n_irq = stmmac_get_rx_normal_irq_n(priv, i); ++ data[j++] += n_irq; ++ normal_irq_n += n_irq; ++ napi_poll += u64_stats_read(&snapshot.poll); + } + + pos = j; + for (i = 0; i < tx_queues_count; i++) { + struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i]; +- struct stmmac_txq_stats snapshot; ++ struct stmmac_napi_tx_stats napi_snapshot; ++ struct stmmac_q_tx_stats q_snapshot; ++ u64 n_irq; + + j = pos; + do { +- start = u64_stats_fetch_begin(&txq_stats->syncp); +- snapshot = *txq_stats; +- } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); +- +- data[j++] += snapshot.tx_pkt_n; +- data[j++] += snapshot.tx_normal_irq_n; +- normal_irq_n += snapshot.tx_normal_irq_n; +- data[j++] += snapshot.tx_clean; +- data[j++] += snapshot.tx_set_ic_bit; +- data[j++] += snapshot.tx_tso_frames; +- data[j++] += snapshot.tx_tso_nfrags; +- napi_poll += snapshot.napi_poll; ++ start = u64_stats_fetch_begin(&txq_stats->q_syncp); ++ q_snapshot = txq_stats->q; ++ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start)); ++ do { ++ start = u64_stats_fetch_begin(&txq_stats->napi_syncp); ++ napi_snapshot = txq_stats->napi; ++ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); ++ ++ data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n); ++ n_irq = stmmac_get_tx_normal_irq_n(priv, i); ++ data[j++] += n_irq; ++ normal_irq_n += n_irq; ++ data[j++] += u64_stats_read(&napi_snapshot.tx_clean); ++ data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) + ++ u64_stats_read(&napi_snapshot.tx_set_ic_bit); ++ data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames); ++ data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags); ++ napi_poll += u64_stats_read(&napi_snapshot.poll); + } + normal_irq_n += priv->xstats.rx_early_irq; + data[j++] = normal_irq_n; +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -2442,7 +2442,6 @@ static bool stmmac_xdp_xmit_zc(struct st + struct xdp_desc xdp_desc; + bool work_done = true; + u32 tx_set_ic_bit = 0; +- unsigned long flags; + + /* Avoids TX time-out as we are sharing with slow path */ + txq_trans_cond_update(nq); +@@ -2515,9 +2514,9 @@ static bool stmmac_xdp_xmit_zc(struct st + tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + entry = tx_q->cur_tx; + } +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->tx_set_ic_bit += tx_set_ic_bit; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_update_begin(&txq_stats->napi_syncp); ++ u64_stats_add(&txq_stats->napi.tx_set_ic_bit, tx_set_ic_bit); ++ u64_stats_update_end(&txq_stats->napi_syncp); + + if (tx_desc) { + stmmac_flush_tx_descriptors(priv, queue); +@@ -2565,7 +2564,6 @@ static int stmmac_tx_clean(struct stmmac + unsigned int bytes_compl = 0, pkts_compl = 0; + unsigned int entry, xmits = 0, count = 0; + u32 tx_packets = 0, tx_errors = 0; +- unsigned long flags; + + __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); + +@@ -2721,11 +2719,11 @@ static int stmmac_tx_clean(struct stmmac + if (tx_q->dirty_tx != tx_q->cur_tx) + *pending_packets = true; + +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->tx_packets += tx_packets; +- txq_stats->tx_pkt_n += tx_packets; +- txq_stats->tx_clean++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_update_begin(&txq_stats->napi_syncp); ++ u64_stats_add(&txq_stats->napi.tx_packets, tx_packets); ++ u64_stats_add(&txq_stats->napi.tx_pkt_n, tx_packets); ++ u64_stats_inc(&txq_stats->napi.tx_clean); ++ u64_stats_update_end(&txq_stats->napi_syncp); + + priv->xstats.tx_errors += tx_errors; + +@@ -4150,7 +4148,6 @@ static netdev_tx_t stmmac_tso_xmit(struc + struct stmmac_tx_queue *tx_q; + bool has_vlan, set_ic; + u8 proto_hdr_len, hdr; +- unsigned long flags; + u32 pay_len, mss; + dma_addr_t des; + int i; +@@ -4315,13 +4312,13 @@ static netdev_tx_t stmmac_tso_xmit(struc + netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); + } + +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->tx_bytes += skb->len; +- txq_stats->tx_tso_frames++; +- txq_stats->tx_tso_nfrags += nfrags; ++ u64_stats_update_begin(&txq_stats->q_syncp); ++ u64_stats_add(&txq_stats->q.tx_bytes, skb->len); ++ u64_stats_inc(&txq_stats->q.tx_tso_frames); ++ u64_stats_add(&txq_stats->q.tx_tso_nfrags, nfrags); + if (set_ic) +- txq_stats->tx_set_ic_bit++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_inc(&txq_stats->q.tx_set_ic_bit); ++ u64_stats_update_end(&txq_stats->q_syncp); + + if (priv->sarc_type) + stmmac_set_desc_sarc(priv, first, priv->sarc_type); +@@ -4420,7 +4417,6 @@ static netdev_tx_t stmmac_xmit(struct sk + struct stmmac_tx_queue *tx_q; + bool has_vlan, set_ic; + int entry, first_tx; +- unsigned long flags; + dma_addr_t des; + + tx_q = &priv->dma_conf.tx_queue[queue]; +@@ -4590,11 +4586,11 @@ static netdev_tx_t stmmac_xmit(struct sk + netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); + } + +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->tx_bytes += skb->len; ++ u64_stats_update_begin(&txq_stats->q_syncp); ++ u64_stats_add(&txq_stats->q.tx_bytes, skb->len); + if (set_ic) +- txq_stats->tx_set_ic_bit++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_inc(&txq_stats->q.tx_set_ic_bit); ++ u64_stats_update_end(&txq_stats->q_syncp); + + if (priv->sarc_type) + stmmac_set_desc_sarc(priv, first, priv->sarc_type); +@@ -4858,12 +4854,11 @@ static int stmmac_xdp_xmit_xdpf(struct s + set_ic = false; + + if (set_ic) { +- unsigned long flags; + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, tx_desc); +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->tx_set_ic_bit++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_update_begin(&txq_stats->q_syncp); ++ u64_stats_inc(&txq_stats->q.tx_set_ic_bit); ++ u64_stats_update_end(&txq_stats->q_syncp); + } + + stmmac_enable_dma_transmission(priv, priv->ioaddr); +@@ -5013,7 +5008,6 @@ static void stmmac_dispatch_skb_zc(struc + unsigned int len = xdp->data_end - xdp->data; + enum pkt_hash_types hash_type; + int coe = priv->hw->rx_csum; +- unsigned long flags; + struct sk_buff *skb; + u32 hash; + +@@ -5038,10 +5032,10 @@ static void stmmac_dispatch_skb_zc(struc + skb_record_rx_queue(skb, queue); + napi_gro_receive(&ch->rxtx_napi, skb); + +- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); +- rxq_stats->rx_pkt_n++; +- rxq_stats->rx_bytes += len; +- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_inc(&rxq_stats->napi.rx_pkt_n); ++ u64_stats_add(&rxq_stats->napi.rx_bytes, len); ++ u64_stats_update_end(&rxq_stats->napi_syncp); + } + + static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) +@@ -5123,7 +5117,6 @@ static int stmmac_rx_zc(struct stmmac_pr + unsigned int desc_size; + struct bpf_prog *prog; + bool failure = false; +- unsigned long flags; + int xdp_status = 0; + int status = 0; + +@@ -5278,9 +5271,9 @@ read_again: + + stmmac_finalize_xdp_rx(priv, xdp_status); + +- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); +- rxq_stats->rx_pkt_n += count; +- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count); ++ u64_stats_update_end(&rxq_stats->napi_syncp); + + priv->xstats.rx_dropped += rx_dropped; + priv->xstats.rx_errors += rx_errors; +@@ -5318,7 +5311,6 @@ static int stmmac_rx(struct stmmac_priv + unsigned int desc_size; + struct sk_buff *skb = NULL; + struct stmmac_xdp_buff ctx; +- unsigned long flags; + int xdp_status = 0; + int buf_sz; + +@@ -5571,11 +5563,11 @@ drain_data: + + stmmac_rx_refill(priv, queue); + +- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); +- rxq_stats->rx_packets += rx_packets; +- rxq_stats->rx_bytes += rx_bytes; +- rxq_stats->rx_pkt_n += count; +- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_add(&rxq_stats->napi.rx_packets, rx_packets); ++ u64_stats_add(&rxq_stats->napi.rx_bytes, rx_bytes); ++ u64_stats_add(&rxq_stats->napi.rx_pkt_n, count); ++ u64_stats_update_end(&rxq_stats->napi_syncp); + + priv->xstats.rx_dropped += rx_dropped; + priv->xstats.rx_errors += rx_errors; +@@ -5590,13 +5582,12 @@ static int stmmac_napi_poll_rx(struct na + struct stmmac_priv *priv = ch->priv_data; + struct stmmac_rxq_stats *rxq_stats; + u32 chan = ch->index; +- unsigned long flags; + int work_done; + + rxq_stats = &priv->xstats.rxq_stats[chan]; +- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); +- rxq_stats->napi_poll++; +- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_inc(&rxq_stats->napi.poll); ++ u64_stats_update_end(&rxq_stats->napi_syncp); + + work_done = stmmac_rx(priv, budget, chan); + if (work_done < budget && napi_complete_done(napi, work_done)) { +@@ -5618,13 +5609,12 @@ static int stmmac_napi_poll_tx(struct na + struct stmmac_txq_stats *txq_stats; + bool pending_packets = false; + u32 chan = ch->index; +- unsigned long flags; + int work_done; + + txq_stats = &priv->xstats.txq_stats[chan]; +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->napi_poll++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_update_begin(&txq_stats->napi_syncp); ++ u64_stats_inc(&txq_stats->napi.poll); ++ u64_stats_update_end(&txq_stats->napi_syncp); + + work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets); + work_done = min(work_done, budget); +@@ -5654,17 +5644,16 @@ static int stmmac_napi_poll_rxtx(struct + struct stmmac_rxq_stats *rxq_stats; + struct stmmac_txq_stats *txq_stats; + u32 chan = ch->index; +- unsigned long flags; + + rxq_stats = &priv->xstats.rxq_stats[chan]; +- flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); +- rxq_stats->napi_poll++; +- u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); ++ u64_stats_update_begin(&rxq_stats->napi_syncp); ++ u64_stats_inc(&rxq_stats->napi.poll); ++ u64_stats_update_end(&rxq_stats->napi_syncp); + + txq_stats = &priv->xstats.txq_stats[chan]; +- flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); +- txq_stats->napi_poll++; +- u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); ++ u64_stats_update_begin(&txq_stats->napi_syncp); ++ u64_stats_inc(&txq_stats->napi.poll); ++ u64_stats_update_end(&txq_stats->napi_syncp); + + tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets); + tx_done = min(tx_done, budget); +@@ -6990,10 +6979,13 @@ static void stmmac_get_stats64(struct ne + u64 tx_bytes; + + do { +- start = u64_stats_fetch_begin(&txq_stats->syncp); +- tx_packets = txq_stats->tx_packets; +- tx_bytes = txq_stats->tx_bytes; +- } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); ++ start = u64_stats_fetch_begin(&txq_stats->q_syncp); ++ tx_bytes = u64_stats_read(&txq_stats->q.tx_bytes); ++ } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start)); ++ do { ++ start = u64_stats_fetch_begin(&txq_stats->napi_syncp); ++ tx_packets = u64_stats_read(&txq_stats->napi.tx_packets); ++ } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); + + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; +@@ -7005,10 +6997,10 @@ static void stmmac_get_stats64(struct ne + u64 rx_bytes; + + do { +- start = u64_stats_fetch_begin(&rxq_stats->syncp); +- rx_packets = rxq_stats->rx_packets; +- rx_bytes = rxq_stats->rx_bytes; +- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); ++ start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); ++ rx_packets = u64_stats_read(&rxq_stats->napi.rx_packets); ++ rx_bytes = u64_stats_read(&rxq_stats->napi.rx_bytes); ++ } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +@@ -7402,9 +7394,16 @@ int stmmac_dvr_probe(struct device *devi + priv->dev = ndev; + + for (i = 0; i < MTL_MAX_RX_QUEUES; i++) +- u64_stats_init(&priv->xstats.rxq_stats[i].syncp); +- for (i = 0; i < MTL_MAX_TX_QUEUES; i++) +- u64_stats_init(&priv->xstats.txq_stats[i].syncp); ++ u64_stats_init(&priv->xstats.rxq_stats[i].napi_syncp); ++ for (i = 0; i < MTL_MAX_TX_QUEUES; i++) { ++ u64_stats_init(&priv->xstats.txq_stats[i].q_syncp); ++ u64_stats_init(&priv->xstats.txq_stats[i].napi_syncp); ++ } ++ ++ priv->xstats.pcpu_stats = ++ devm_netdev_alloc_pcpu_stats(device, struct stmmac_pcpu_stats); ++ if (!priv->xstats.pcpu_stats) ++ return -ENOMEM; + + stmmac_set_ethtool_ops(ndev); + priv->pause = pause; diff --git a/queue-6.7/nfp-enable-netdev_xdp_act_redirect-feature-flag.patch b/queue-6.7/nfp-enable-netdev_xdp_act_redirect-feature-flag.patch new file mode 100644 index 00000000000..28d9c5167d0 --- /dev/null +++ b/queue-6.7/nfp-enable-netdev_xdp_act_redirect-feature-flag.patch @@ -0,0 +1,33 @@ +From 0f4d6f011bca0df2051532b41b596366aa272019 Mon Sep 17 00:00:00 2001 +From: James Hershaw +Date: Fri, 2 Feb 2024 13:37:19 +0200 +Subject: nfp: enable NETDEV_XDP_ACT_REDIRECT feature flag + +From: James Hershaw + +commit 0f4d6f011bca0df2051532b41b596366aa272019 upstream. + +Enable previously excluded xdp feature flag for NFD3 devices. This +feature flag is required in order to bind nfp interfaces to an xdp +socket and the nfp driver does in fact support the feature. + +Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") +Cc: stable@vger.kernel.org # 6.3+ +Signed-off-by: James Hershaw +Signed-off-by: Louis Peens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -2588,6 +2588,7 @@ static void nfp_net_netdev_init(struct n + case NFP_NFD_VER_NFD3: + netdev->netdev_ops = &nfp_nfd3_netdev_ops; + netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; ++ netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; + break; + case NFP_NFD_VER_NFDK: + netdev->netdev_ops = &nfp_nfdk_netdev_ops; diff --git a/queue-6.7/nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch b/queue-6.7/nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch new file mode 100644 index 00000000000..be06ec9752e --- /dev/null +++ b/queue-6.7/nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch @@ -0,0 +1,50 @@ +From 1a1c13303ff6d64e6f718dc8aa614e580ca8d9b4 Mon Sep 17 00:00:00 2001 +From: Daniel de Villiers +Date: Fri, 2 Feb 2024 13:37:18 +0200 +Subject: nfp: flower: prevent re-adding mac index for bonded port + +From: Daniel de Villiers + +commit 1a1c13303ff6d64e6f718dc8aa614e580ca8d9b4 upstream. + +When physical ports are reset (either through link failure or manually +toggled down and up again) that are slaved to a Linux bond with a tunnel +endpoint IP address on the bond device, not all tunnel packets arriving +on the bond port are decapped as expected. + +The bond dev assigns the same MAC address to itself and each of its +slaves. When toggling a slave device, the same MAC address is therefore +offloaded to the NFP multiple times with different indexes. + +The issue only occurs when re-adding the shared mac. The +nfp_tunnel_add_shared_mac() function has a conditional check early on +that checks if a mac entry already exists and if that mac entry is +global: (entry && nfp_tunnel_is_mac_idx_global(entry->index)). In the +case of a bonded device (For example br-ex), the mac index is obtained, +and no new index is assigned. + +We therefore modify the conditional in nfp_tunnel_add_shared_mac() to +check if the port belongs to the LAG along with the existing checks to +prevent a new global mac index from being re-assigned to the slave port. + +Fixes: 20cce8865098 ("nfp: flower: enable MAC address sharing for offloadable devs") +CC: stable@vger.kernel.org # 5.1+ +Signed-off-by: Daniel de Villiers +Signed-off-by: Louis Peens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +@@ -1084,7 +1084,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app + u16 nfp_mac_idx = 0; + + entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); +- if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { ++ if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { + if (entry->bridge_count || + !nfp_flower_is_supported_bridge(netdev)) { + nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, diff --git a/queue-6.7/nfp-use-correct-macro-for-lengthselect-in-bar-config.patch b/queue-6.7/nfp-use-correct-macro-for-lengthselect-in-bar-config.patch new file mode 100644 index 00000000000..aa447157268 --- /dev/null +++ b/queue-6.7/nfp-use-correct-macro-for-lengthselect-in-bar-config.patch @@ -0,0 +1,46 @@ +From b3d4f7f2288901ed2392695919b3c0e24c1b4084 Mon Sep 17 00:00:00 2001 +From: Daniel Basilio +Date: Fri, 2 Feb 2024 13:37:17 +0200 +Subject: nfp: use correct macro for LengthSelect in BAR config + +From: Daniel Basilio + +commit b3d4f7f2288901ed2392695919b3c0e24c1b4084 upstream. + +The 1st and 2nd expansion BAR configuration registers are configured, +when the driver starts up, in variables 'barcfg_msix_general' and +'barcfg_msix_xpb', respectively. The 'LengthSelect' field is ORed in +from bit 0, which is incorrect. The 'LengthSelect' field should +start from bit 27. + +This has largely gone un-noticed because +NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT happens to be 0. + +Fixes: 4cb584e0ee7d ("nfp: add CPP access core") +Cc: stable@vger.kernel.org # 4.11+ +Signed-off-by: Daniel Basilio +Signed-off-by: Louis Peens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c ++++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +@@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pc + const u32 barcfg_msix_general = + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | +- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; ++ NFP_PCIE_BAR_PCIE2CPP_LengthSelect( ++ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); + const u32 barcfg_msix_xpb = + NFP_PCIE_BAR_PCIE2CPP_MapType( + NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | +- NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | ++ NFP_PCIE_BAR_PCIE2CPP_LengthSelect( ++ NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | + NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( + NFP_CPP_TARGET_ISLAND_XPB); + const u32 barcfg_explicit[4] = { diff --git a/queue-6.7/nouveau-gsp-use-correct-size-for-registry-rpc.patch b/queue-6.7/nouveau-gsp-use-correct-size-for-registry-rpc.patch new file mode 100644 index 00000000000..3d5b1d0af3e --- /dev/null +++ b/queue-6.7/nouveau-gsp-use-correct-size-for-registry-rpc.patch @@ -0,0 +1,41 @@ +From 61712c94782ce105253ee1939cda0c5c025b2c0c Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Tue, 30 Jan 2024 13:26:43 +1000 +Subject: nouveau/gsp: use correct size for registry rpc. + +From: Dave Airlie + +commit 61712c94782ce105253ee1939cda0c5c025b2c0c upstream. + +Timur pointed this out before, and it just slipped my mind, +but this might help some things work better, around pcie power +management. + +Cc: # v6.7 +Fixes: 8d55b0a940bb ("nouveau/gsp: add some basic registry entries.") +Signed-off-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20240130032643.2498315-1-airlied@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +@@ -1111,7 +1111,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gs + if (IS_ERR(rpc)) + return PTR_ERR(rpc); + +- rpc->size = sizeof(*rpc); + rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; + + str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); +@@ -1127,6 +1126,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gs + strings += name_len; + str_offset += name_len; + } ++ rpc->size = str_offset; + + return nvkm_gsp_rpc_wr(gsp, rpc, false); + } diff --git a/queue-6.7/of-property-fix-typo-in-io-channels.patch b/queue-6.7/of-property-fix-typo-in-io-channels.patch new file mode 100644 index 00000000000..ea04af2b22f --- /dev/null +++ b/queue-6.7/of-property-fix-typo-in-io-channels.patch @@ -0,0 +1,35 @@ +From 8f7e917907385e112a845d668ae2832f41e64bf5 Mon Sep 17 00:00:00 2001 +From: Nuno Sa +Date: Tue, 23 Jan 2024 16:14:22 +0100 +Subject: of: property: fix typo in io-channels + +From: Nuno Sa + +commit 8f7e917907385e112a845d668ae2832f41e64bf5 upstream. + +The property is io-channels and not io-channel. This was effectively +preventing the devlink creation. + +Fixes: 8e12257dead7 ("of: property: Add device link support for iommus, mboxes and io-channels") +Cc: stable@vger.kernel.org +Signed-off-by: Nuno Sa +Reviewed-by: Saravana Kannan +Acked-by: Jonathan Cameron +Link: https://lore.kernel.org/r/20240123-iio-backend-v7-1-1bff236b8693@analog.com +Signed-off-by: Rob Herring +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/property.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/of/property.c ++++ b/drivers/of/property.c +@@ -1213,7 +1213,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#c + DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") + DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") + DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") +-DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") ++DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") + DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) + DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") + DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") diff --git a/queue-6.7/pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch b/queue-6.7/pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch new file mode 100644 index 00000000000..99c63566b04 --- /dev/null +++ b/queue-6.7/pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch @@ -0,0 +1,52 @@ +From b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Fri, 26 Jan 2024 11:40:37 +0300 +Subject: PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dan Carpenter + +commit b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 upstream. + +The "msg_addr" variable is u64. However, the "aligned_offset" is an +unsigned int. This means that when the code does: + + msg_addr &= ~aligned_offset; + +it will unintentionally zero out the high 32 bits. Use ALIGN_DOWN() to do +the alignment instead. + +Fixes: 2217fffcd63f ("PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support") +Link: https://lore.kernel.org/r/af59c7ad-ab93-40f7-ad4a-7ac0b14d37f5@moroto.mountain +Signed-off-by: Dan Carpenter +Signed-off-by: Bjorn Helgaas +Reviewed-by: Niklas Cassel +Reviewed-by: Ilpo Järvinen +Reviewed-by: Manivannan Sadhasivam +Cc: +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/pci/controller/dwc/pcie-designware-ep.c ++++ b/drivers/pci/controller/dwc/pcie-designware-ep.c +@@ -6,6 +6,7 @@ + * Author: Kishon Vijay Abraham I + */ + ++#include + #include + #include + #include +@@ -615,7 +616,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_ + } + + aligned_offset = msg_addr & (epc->mem->window.page_size - 1); +- msg_addr &= ~aligned_offset; ++ msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); + ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, + epc->mem->window.page_size); + if (ret) diff --git a/queue-6.7/pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch b/queue-6.7/pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch new file mode 100644 index 00000000000..03c81ad7918 --- /dev/null +++ b/queue-6.7/pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch @@ -0,0 +1,34 @@ +From 741ba0134fa7822fcf4e4a0a537a5c4cfd706b20 Mon Sep 17 00:00:00 2001 +From: Konrad Dybcio +Date: Wed, 27 Dec 2023 16:21:24 +0100 +Subject: pmdomain: core: Move the unused cleanup to a _sync initcall + +From: Konrad Dybcio + +commit 741ba0134fa7822fcf4e4a0a537a5c4cfd706b20 upstream. + +The unused clock cleanup uses the _sync initcall to give all users at +earlier initcalls time to probe. Do the same to avoid leaving some PDs +dangling at "on" (which actually happened on qcom!). + +Fixes: 2fe71dcdfd10 ("PM / domains: Add late_initcall to disable unused PM domains") +Signed-off-by: Konrad Dybcio +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20231227-topic-pmdomain_sync_cleanup-v1-1-5f36769d538b@linaro.org +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/power/domain.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/power/domain.c ++++ b/drivers/base/power/domain.c +@@ -1111,7 +1111,7 @@ static int __init genpd_power_off_unused + + return 0; + } +-late_initcall(genpd_power_off_unused); ++late_initcall_sync(genpd_power_off_unused); + + #ifdef CONFIG_PM_SLEEP + diff --git a/queue-6.7/pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch b/queue-6.7/pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch new file mode 100644 index 00000000000..32660e033b4 --- /dev/null +++ b/queue-6.7/pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch @@ -0,0 +1,42 @@ +From f0e4a1356466ec1858ae8e5c70bea2ce5e55008b Mon Sep 17 00:00:00 2001 +From: Geert Uytterhoeven +Date: Fri, 12 Jan 2024 17:33:55 +0100 +Subject: pmdomain: renesas: r8a77980-sysc: CR7 must be always on +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Geert Uytterhoeven + +commit f0e4a1356466ec1858ae8e5c70bea2ce5e55008b upstream. + +The power domain containing the Cortex-R7 CPU core on the R-Car V3H SoC +must always be in power-on state, unlike on other SoCs in the R-Car Gen3 +family. See Table 9.4 "Power domains" in the R-Car Series, 3rd +Generation Hardware User’s Manual Rev.1.00 and later. + +Fix this by marking the domain as a CPU domain without control +registers, so the driver will not touch it. + +Fixes: 41d6d8bd8ae9 ("soc: renesas: rcar-sysc: add R8A77980 support") +Signed-off-by: Geert Uytterhoeven +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/fdad9a86132d53ecddf72b734dac406915c4edc0.1705076735.git.geert+renesas@glider.be +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pmdomain/renesas/r8a77980-sysc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/pmdomain/renesas/r8a77980-sysc.c ++++ b/drivers/pmdomain/renesas/r8a77980-sysc.c +@@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77 + PD_CPU_NOCR }, + { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, + PD_CPU_NOCR }, +- { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, ++ { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, ++ PD_CPU_NOCR }, + { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, + { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, + { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, diff --git a/queue-6.7/riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch b/queue-6.7/riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch new file mode 100644 index 00000000000..0b1afff2711 --- /dev/null +++ b/queue-6.7/riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch @@ -0,0 +1,34 @@ +From afb2a4fb84555ef9e61061f6ea63ed7087b295d5 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Fri, 12 Jan 2024 19:37:29 +0100 +Subject: riscv/efistub: Ensure GP-relative addressing is not used + +From: Jan Kiszka + +commit afb2a4fb84555ef9e61061f6ea63ed7087b295d5 upstream. + +The cflags for the RISC-V efistub were missing -mno-relax, thus were +under the risk that the compiler could use GP-relative addressing. That +happened for _edata with binutils-2.41 and kernel 6.1, causing the +relocation to fail due to an invalid kernel_size in handle_kernel_image. +It was not yet observed with newer versions, but that may just be luck. + +Cc: +Signed-off-by: Jan Kiszka +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/firmware/efi/libstub/Makefile ++++ b/drivers/firmware/efi/libstub/Makefile +@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRL + -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ + -DEFI_HAVE_STRCMP -fno-builtin -fpic \ + $(call cc-option,-mno-single-pic-base) +-cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE ++cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax + cflags-$(CONFIG_LOONGARCH) += -fpie + + cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt diff --git a/queue-6.7/s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch b/queue-6.7/s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch new file mode 100644 index 00000000000..860aa768152 --- /dev/null +++ b/queue-6.7/s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch @@ -0,0 +1,70 @@ +From 2fe8a236436fe40d8d26a1af8d150fc80f04ee1a Mon Sep 17 00:00:00 2001 +From: Alexandra Winter +Date: Tue, 6 Feb 2024 09:58:49 +0100 +Subject: s390/qeth: Fix potential loss of L3-IP@ in case of network issues + +From: Alexandra Winter + +commit 2fe8a236436fe40d8d26a1af8d150fc80f04ee1a upstream. + +Symptom: +In case of a bad cable connection (e.g. dirty optics) a fast sequence of +network DOWN-UP-DOWN-UP could happen. UP triggers recovery of the qeth +interface. In case of a second DOWN while recovery is still ongoing, it +can happen that the IP@ of a Layer3 qeth interface is lost and will not +be recovered by the second UP. + +Problem: +When registration of IP addresses with Layer 3 qeth devices fails, (e.g. +because of bad address format) the respective IP address is deleted from +its hash-table in the driver. If registration fails because of a ENETDOWN +condition, the address should stay in the hashtable, so a subsequent +recovery can restore it. + +3caa4af834df ("qeth: keep ip-address after LAN_OFFLINE failure") +fixes this for registration failures during normal operation, but not +during recovery. + +Solution: +Keep L3-IP address in case of ENETDOWN in qeth_l3_recover_ip(). For +consistency with qeth_l3_add_ip() we also keep it in case of EADDRINUSE, +i.e. for some reason the card already/still has this address registered. + +Fixes: 4a71df50047f ("qeth: new qeth device driver") +Cc: stable@vger.kernel.org +Signed-off-by: Alexandra Winter +Link: https://lore.kernel.org/r/20240206085849.2902775-1-wintera@linux.ibm.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l3_main.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/s390/net/qeth_l3_main.c ++++ b/drivers/s390/net/qeth_l3_main.c +@@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(stru + if (!recover) { + hash_del(&addr->hnode); + kfree(addr); +- continue; ++ } else { ++ /* prepare for recovery */ ++ addr->disp_flag = QETH_DISP_ADDR_ADD; + } +- addr->disp_flag = QETH_DISP_ADDR_ADD; + } + + mutex_unlock(&card->ip_lock); +@@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qe + if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + rc = qeth_l3_register_addr_entry(card, addr); + +- if (!rc) { ++ if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { ++ /* keep it in the records */ + addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; + } else { ++ /* bad address */ + hash_del(&addr->hnode); + kfree(addr); + } diff --git a/queue-6.7/series b/queue-6.7/series index 22813f3823d..0a9f6cbf648 100644 --- a/queue-6.7/series +++ b/queue-6.7/series @@ -244,3 +244,61 @@ alsa-hda-conexant-add-quirk-for-sws-js201d.patch alsa-hda-realtek-add-ids-for-dell-dual-spk-platform.patch nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes.patch nilfs2-fix-hang-in-nilfs_lookup_dirty_data_buffers.patch +crypto-ccp-fix-null-pointer-dereference-in-__sev_platform_shutdown_locked.patch +crypto-algif_hash-remove-bogus-sgl-free-on-zero-length-error-path.patch +nfp-use-correct-macro-for-lengthselect-in-bar-config.patch +nfp-enable-netdev_xdp_act_redirect-feature-flag.patch +nfp-flower-prevent-re-adding-mac-index-for-bonded-port.patch +wifi-iwlwifi-fix-double-free-bug.patch +wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch +wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch +wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch +pci-dwc-fix-a-64bit-bug-in-dw_pcie_ep_raise_msix_irq.patch +exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irq-siglock.patch +irqchip-irq-brcmstb-l2-add-write-memory-barrier-before-exit.patch +irqchip-gic-v3-its-restore-quirk-probing-for-acpi-based-systems.patch +irqchip-gic-v3-its-fix-gicv4.1-vpe-affinity-update.patch +thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch +smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch +smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch +kvm-s390-vsie-fix-race-during-shadow-creation.patch +kvm-arm64-fix-circular-locking-dependency.patch +zonefs-improve-error-handling.patch +mmc-sdhci-pci-o2micro-fix-a-warm-reboot-issue-that-disk-can-t-be-detected-by-bios.patch +arm64-signal-don-t-assume-that-tif_sve-means-we-saved-sve-state.patch +arm64-subscribe-microsoft-azure-cobalt-100-to-arm-neoverse-n2-errata.patch +asoc-sof-ipc3-fix-message-bounds-on-ipc-ops.patch +asoc-tas2781-add-module-parameter-to-tascodec_init.patch +asoc-amd-yc-add-dmi-quirk-for-lenovo-ideapad-pro-5-16arp8.patch +tools-rv-fix-curr_reactor-uninitialized-variable.patch +tools-rv-fix-makefile-compiler-options-for-clang.patch +tools-rtla-remove-unused-sched_getattr-function.patch +tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch +tools-rtla-fix-clang-warning-about-mount_point-var-size.patch +tools-rtla-exit-with-exit_success-when-help-is-invoked.patch +tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch +tools-rtla-fix-makefile-compiler-options-for-clang.patch +fs-relax-mount_setattr-permission-checks.patch +net-ethernet-ti-cpsw-enable-mac_managed_pm-to-fix-mdio.patch +s390-qeth-fix-potential-loss-of-l3-ip-in-case-of-network-issues.patch +mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch +net-ethernet-ti-cpsw_new-enable-mac_managed_pm-to-fix-mdio.patch +pmdomain-renesas-r8a77980-sysc-cr7-must-be-always-on.patch +riscv-efistub-ensure-gp-relative-addressing-is-not-used.patch +net-stmmac-protect-updates-of-64-bit-statistics-counters.patch +hv_netvsc-register-vf-in-netvsc_probe-if-net_device_register-missed.patch +ceph-prevent-use-after-free-in-encode_cap_msg.patch +nouveau-gsp-use-correct-size-for-registry-rpc.patch +fs-hugetlb-fix-null-pointer-dereference-in-hugetlbs_fill_super.patch +mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch +loongarch-fix-earlycon-parameter-if-kasan-enabled.patch +blk-wbt-fix-detection-of-dirty-throttled-tasks.patch +docs-kernel_feat.py-fix-build-error-for-missing-files.patch +of-property-fix-typo-in-io-channels.patch +xen-events-close-evtchn-after-mapping-cleanup.patch +can-netlink-fix-tdco-calculation-using-the-old-data-bittiming.patch +can-j1939-prevent-deadlock-by-changing-j1939_socks_lock-to-rwlock.patch +can-j1939-fix-uaf-in-j1939_sk_match_filter-during-setsockopt-so_j1939_filter.patch +pmdomain-core-move-the-unused-cleanup-to-a-_sync-initcall.patch +fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch +fs-proc-do_task_stat-use-sig-stats_lock-to-gather-the-threads-children-stats.patch diff --git a/queue-6.7/smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch b/queue-6.7/smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch new file mode 100644 index 00000000000..58b5c3ed4c4 --- /dev/null +++ b/queue-6.7/smb-client-set-correct-id-uid-and-cruid-for-multiuser-automounts.patch @@ -0,0 +1,56 @@ +From 4508ec17357094e2075f334948393ddedbb75157 Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Sun, 11 Feb 2024 20:19:30 -0300 +Subject: smb: client: set correct id, uid and cruid for multiuser automounts + +From: Paulo Alcantara + +commit 4508ec17357094e2075f334948393ddedbb75157 upstream. + +When uid, gid and cruid are not specified, we need to dynamically +set them into the filesystem context used for automounting otherwise +they'll end up reusing the values from the parent mount. + +Fixes: 9fd29a5bae6e ("cifs: use fs_context for automounts") +Reported-by: Shane Nehring +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2259257 +Cc: stable@vger.kernel.org # 6.2+ +Signed-off-by: Paulo Alcantara (Red Hat) +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/namespace.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/fs/smb/client/namespace.c ++++ b/fs/smb/client/namespace.c +@@ -168,6 +168,21 @@ static char *automount_fullpath(struct d + return s; + } + ++static void fs_context_set_ids(struct smb3_fs_context *ctx) ++{ ++ kuid_t uid = current_fsuid(); ++ kgid_t gid = current_fsgid(); ++ ++ if (ctx->multiuser) { ++ if (!ctx->uid_specified) ++ ctx->linux_uid = uid; ++ if (!ctx->gid_specified) ++ ctx->linux_gid = gid; ++ } ++ if (!ctx->cruid_specified) ++ ctx->cred_uid = uid; ++} ++ + /* + * Create a vfsmount that we can automount + */ +@@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automoun + tmp.leaf_fullpath = NULL; + tmp.UNC = tmp.prepath = NULL; + tmp.dfs_root_ses = NULL; ++ fs_context_set_ids(&tmp); + + rc = smb3_fs_context_dup(ctx, &tmp); + if (rc) { diff --git a/queue-6.7/smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch b/queue-6.7/smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch new file mode 100644 index 00000000000..39983db49c9 --- /dev/null +++ b/queue-6.7/smb-fix-regression-in-writes-when-non-standard-maximum-write-size-negotiated.patch @@ -0,0 +1,85 @@ +From 4860abb91f3d7fbaf8147d54782149bb1fc45892 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Tue, 6 Feb 2024 16:34:22 -0600 +Subject: smb: Fix regression in writes when non-standard maximum write size negotiated + +From: Steve French + +commit 4860abb91f3d7fbaf8147d54782149bb1fc45892 upstream. + +The conversion to netfs in the 6.3 kernel caused a regression when +maximum write size is set by the server to an unexpected value which is +not a multiple of 4096 (similarly if the user overrides the maximum +write size by setting mount parm "wsize", but sets it to a value that +is not a multiple of 4096). When negotiated write size is not a +multiple of 4096 the netfs code can skip the end of the final +page when doing large sequential writes, causing data corruption. + +This section of code is being rewritten/removed due to a large +netfs change, but until that point (ie for the 6.3 kernel until now) +we can not support non-standard maximum write sizes. + +Add a warning if a user specifies a wsize on mount that is not +a multiple of 4096 (and round down), also add a change where we +round down the maximum write size if the server negotiates a value +that is not a multiple of 4096 (we also have to check to make sure that +we do not round it down to zero). + +Reported-by: R. Diez" +Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") +Suggested-by: Ronnie Sahlberg +Acked-by: Ronnie Sahlberg +Tested-by: Matthew Ruffell +Reviewed-by: Shyam Prasad N +Cc: stable@vger.kernel.org # v6.3+ +Cc: David Howells +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/connect.c | 14 ++++++++++++-- + fs/smb/client/fs_context.c | 11 +++++++++++ + 2 files changed, 23 insertions(+), 2 deletions(-) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -3425,8 +3425,18 @@ int cifs_mount_get_tcon(struct cifs_moun + * the user on mount + */ + if ((cifs_sb->ctx->wsize == 0) || +- (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) +- cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx); ++ (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { ++ cifs_sb->ctx->wsize = ++ round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); ++ /* ++ * in the very unlikely event that the server sent a max write size under PAGE_SIZE, ++ * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 ++ */ ++ if (cifs_sb->ctx->wsize == 0) { ++ cifs_sb->ctx->wsize = PAGE_SIZE; ++ cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); ++ } ++ } + if ((cifs_sb->ctx->rsize == 0) || + (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) + cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); +--- a/fs/smb/client/fs_context.c ++++ b/fs/smb/client/fs_context.c +@@ -1107,6 +1107,17 @@ static int smb3_fs_context_parse_param(s + case Opt_wsize: + ctx->wsize = result.uint_32; + ctx->got_wsize = true; ++ if (ctx->wsize % PAGE_SIZE != 0) { ++ ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); ++ if (ctx->wsize == 0) { ++ ctx->wsize = PAGE_SIZE; ++ cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); ++ } else { ++ cifs_dbg(VFS, ++ "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", ++ ctx->wsize, PAGE_SIZE); ++ } ++ } + break; + case Opt_acregmax: + ctx->acregmax = HZ * result.uint_32; diff --git a/queue-6.7/thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch b/queue-6.7/thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch new file mode 100644 index 00000000000..190d9cd2ee6 --- /dev/null +++ b/queue-6.7/thunderbolt-fix-setting-the-cns-bit-in-router_cs_5.patch @@ -0,0 +1,46 @@ +From ec4d82f855ce332de26fe080892483de98cc1a19 Mon Sep 17 00:00:00 2001 +From: Mohammad Rahimi +Date: Sat, 27 Jan 2024 11:26:28 +0800 +Subject: thunderbolt: Fix setting the CNS bit in ROUTER_CS_5 + +From: Mohammad Rahimi + +commit ec4d82f855ce332de26fe080892483de98cc1a19 upstream. + +The bit 23, CM TBT3 Not Supported (CNS), in ROUTER_CS_5 indicates +whether a USB4 Connection Manager is TBT3-Compatible and should be: + 0b for TBT3-Compatible + 1b for Not TBT3-Compatible + +Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4") +Cc: stable@vger.kernel.org +Signed-off-by: Mohammad Rahimi +Signed-off-by: Mika Westerberg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/thunderbolt/tb_regs.h | 2 +- + drivers/thunderbolt/usb4.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/thunderbolt/tb_regs.h ++++ b/drivers/thunderbolt/tb_regs.h +@@ -203,7 +203,7 @@ struct tb_regs_switch_header { + #define ROUTER_CS_5_WOP BIT(1) + #define ROUTER_CS_5_WOU BIT(2) + #define ROUTER_CS_5_WOD BIT(3) +-#define ROUTER_CS_5_C3S BIT(23) ++#define ROUTER_CS_5_CNS BIT(23) + #define ROUTER_CS_5_PTO BIT(24) + #define ROUTER_CS_5_UTO BIT(25) + #define ROUTER_CS_5_HCO BIT(26) +--- a/drivers/thunderbolt/usb4.c ++++ b/drivers/thunderbolt/usb4.c +@@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch * + } + + /* TBT3 supported by the CM */ +- val |= ROUTER_CS_5_C3S; ++ val &= ~ROUTER_CS_5_CNS; + + return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1); + } diff --git a/queue-6.7/tools-rtla-exit-with-exit_success-when-help-is-invoked.patch b/queue-6.7/tools-rtla-exit-with-exit_success-when-help-is-invoked.patch new file mode 100644 index 00000000000..9d76588958e --- /dev/null +++ b/queue-6.7/tools-rtla-exit-with-exit_success-when-help-is-invoked.patch @@ -0,0 +1,90 @@ +From b5f319360371087d52070d8f3fc7789e80ce69a6 Mon Sep 17 00:00:00 2001 +From: John Kacur +Date: Fri, 2 Feb 2024 19:16:07 -0500 +Subject: tools/rtla: Exit with EXIT_SUCCESS when help is invoked + +From: John Kacur + +commit b5f319360371087d52070d8f3fc7789e80ce69a6 upstream. + +Fix rtla so that the following commands exit with 0 when help is invoked + +rtla osnoise top -h +rtla osnoise hist -h +rtla timerlat top -h +rtla timerlat hist -h + +Link: https://lore.kernel.org/linux-trace-devel/20240203001607.69703-1-jkacur@redhat.com + +Cc: stable@vger.kernel.org +Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") +Signed-off-by: John Kacur +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/osnoise_hist.c | 6 +++++- + tools/tracing/rtla/src/osnoise_top.c | 6 +++++- + tools/tracing/rtla/src/timerlat_hist.c | 6 +++++- + tools/tracing/rtla/src/timerlat_top.c | 6 +++++- + 4 files changed, 20 insertions(+), 4 deletions(-) + +--- a/tools/tracing/rtla/src/osnoise_hist.c ++++ b/tools/tracing/rtla/src/osnoise_hist.c +@@ -480,7 +480,11 @@ static void osnoise_hist_usage(char *usa + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); +- exit(1); ++ ++ if (usage) ++ exit(EXIT_FAILURE); ++ ++ exit(EXIT_SUCCESS); + } + + /* +--- a/tools/tracing/rtla/src/osnoise_top.c ++++ b/tools/tracing/rtla/src/osnoise_top.c +@@ -331,7 +331,11 @@ static void osnoise_top_usage(struct osn + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); +- exit(1); ++ ++ if (usage) ++ exit(EXIT_FAILURE); ++ ++ exit(EXIT_SUCCESS); + } + + /* +--- a/tools/tracing/rtla/src/timerlat_hist.c ++++ b/tools/tracing/rtla/src/timerlat_hist.c +@@ -546,7 +546,11 @@ static void timerlat_hist_usage(char *us + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); +- exit(1); ++ ++ if (usage) ++ exit(EXIT_FAILURE); ++ ++ exit(EXIT_SUCCESS); + } + + /* +--- a/tools/tracing/rtla/src/timerlat_top.c ++++ b/tools/tracing/rtla/src/timerlat_top.c +@@ -375,7 +375,11 @@ static void timerlat_top_usage(char *usa + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); +- exit(1); ++ ++ if (usage) ++ exit(EXIT_FAILURE); ++ ++ exit(EXIT_SUCCESS); + } + + /* diff --git a/queue-6.7/tools-rtla-fix-clang-warning-about-mount_point-var-size.patch b/queue-6.7/tools-rtla-fix-clang-warning-about-mount_point-var-size.patch new file mode 100644 index 00000000000..2141edeafe9 --- /dev/null +++ b/queue-6.7/tools-rtla-fix-clang-warning-about-mount_point-var-size.patch @@ -0,0 +1,52 @@ +From 30369084ac6e27479a347899e74f523e6ca29b89 Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:31 +0100 +Subject: tools/rtla: Fix clang warning about mount_point var size + +From: Daniel Bristot de Oliveira + +commit 30369084ac6e27479a347899e74f523e6ca29b89 upstream. + +clang is reporting this warning: + +$ make HOSTCC=clang CC=clang LLVM_IAS=1 +[...] +clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions + -fstack-protector-strong -fasynchronous-unwind-tables + -fstack-clash-protection -Wall -Werror=format-security + -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS + $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c + +src/utils.c:548:66: warning: 'fscanf' may overflow; destination buffer in argument 3 has size 1024, but the corresponding specifier may require size 1025 [-Wfortify-source] + 548 | while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { + | ^ + +Increase mount_point variable size to MAX_PATH+1 to avoid the overflow. + +Link: https://lkml.kernel.org/r/1b46712e93a2f4153909514a36016959dcc4021c.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Cc: Donald Zickus +Fixes: a957cbc02531 ("rtla: Add -C cgroup support") +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/tracing/rtla/src/utils.c ++++ b/tools/tracing/rtla/src/utils.c +@@ -530,7 +530,7 @@ int set_cpu_dma_latency(int32_t latency) + */ + static const int find_mount(const char *fs, char *mp, int sizeof_mp) + { +- char mount_point[MAX_PATH]; ++ char mount_point[MAX_PATH+1]; + char type[100]; + int found = 0; + FILE *fp; diff --git a/queue-6.7/tools-rtla-fix-makefile-compiler-options-for-clang.patch b/queue-6.7/tools-rtla-fix-makefile-compiler-options-for-clang.patch new file mode 100644 index 00000000000..18697ec213f --- /dev/null +++ b/queue-6.7/tools-rtla-fix-makefile-compiler-options-for-clang.patch @@ -0,0 +1,72 @@ +From bc4cbc9d260ba8358ca63662919f4bb223cb603b Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:29 +0100 +Subject: tools/rtla: Fix Makefile compiler options for clang + +From: Daniel Bristot de Oliveira + +commit bc4cbc9d260ba8358ca63662919f4bb223cb603b upstream. + +The following errors are showing up when compiling rtla with clang: + + $ make HOSTCC=clang CC=clang LLVM_IAS=1 + [...] + + clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects + -fexceptions -fstack-protector-strong + -fasynchronous-unwind-tables -fstack-clash-protection -Wall + -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 + -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c + + clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] + warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] + 1 warning generated. + + clang -o rtla -ggdb src/osnoise.o src/osnoise_hist.o src/osnoise_top.o + src/rtla.o src/timerlat_aa.o src/timerlat.o src/timerlat_hist.o + src/timerlat_top.o src/timerlat_u.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) + + src/osnoise.o: file not recognized: file format not recognized + clang: error: linker command failed with exit code 1 (use -v to see invocation) + make: *** [Makefile:110: rtla] Error 1 + +Solve these issues by: + - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang + - informing the linker about -flto=auto + +Link: https://lore.kernel.org/linux-trace-kernel/567ac1b94effc228ce9a0225b9df7232a9b35b55.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Fixes: 1a7b22ab15eb ("tools/rtla: Build with EXTRA_{C,LD}FLAGS") +Suggested-by: Donald Zickus +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/Makefile | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/tools/tracing/rtla/Makefile ++++ b/tools/tracing/rtla/Makefile +@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -f + -fasynchronous-unwind-tables -fstack-clash-protection + WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + ++ifeq ($(CC),clang) ++ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) ++ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) ++endif ++ + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + + CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) +-LDFLAGS := -ggdb $(EXTRA_LDFLAGS) ++LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) + LIBS := $$($(PKG_CONFIG) --libs libtracefs) + + SRC := $(wildcard src/*.c) diff --git a/queue-6.7/tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch b/queue-6.7/tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch new file mode 100644 index 00000000000..741897991fe --- /dev/null +++ b/queue-6.7/tools-rtla-fix-uninitialized-bucket-data-bucket_size-warning.patch @@ -0,0 +1,107 @@ +From 64dc40f7523369912d7adb22c8cb655f71610505 Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:30 +0100 +Subject: tools/rtla: Fix uninitialized bucket/data->bucket_size warning + +From: Daniel Bristot de Oliveira + +commit 64dc40f7523369912d7adb22c8cb655f71610505 upstream. + +When compiling rtla with clang, I am getting the following warnings: + +$ make HOSTCC=clang CC=clang LLVM_IAS=1 + +[..] +clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions + -fstack-protector-strong -fasynchronous-unwind-tables + -fstack-clash-protection -Wall -Werror=format-security + -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS + $(pkg-config --cflags libtracefs) + -c -o src/osnoise_hist.o src/osnoise_hist.c +src/osnoise_hist.c:138:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] + 138 | if (data->bucket_size) + | ^~~~~~~~~~~~~~~~~ +src/osnoise_hist.c:149:6: note: uninitialized use occurs here + 149 | if (bucket < entries) + | ^~~~~~ +src/osnoise_hist.c:138:2: note: remove the 'if' if its condition is always true + 138 | if (data->bucket_size) + | ^~~~~~~~~~~~~~~~~~~~~~ + 139 | bucket = duration / data->bucket_size; +src/osnoise_hist.c:132:12: note: initialize the variable 'bucket' to silence this warning + 132 | int bucket; + | ^ + | = 0 +1 warning generated. + +[...] + +clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions + -fstack-protector-strong -fasynchronous-unwind-tables + -fstack-clash-protection -Wall -Werror=format-security + -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS + $(pkg-config --cflags libtracefs) + -c -o src/timerlat_hist.o src/timerlat_hist.c +src/timerlat_hist.c:181:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] + 181 | if (data->bucket_size) + | ^~~~~~~~~~~~~~~~~ +src/timerlat_hist.c:204:6: note: uninitialized use occurs here + 204 | if (bucket < entries) + | ^~~~~~ +src/timerlat_hist.c:181:2: note: remove the 'if' if its condition is always true + 181 | if (data->bucket_size) + | ^~~~~~~~~~~~~~~~~~~~~~ + 182 | bucket = latency / data->bucket_size; +src/timerlat_hist.c:175:12: note: initialize the variable 'bucket' to silence this warning + 175 | int bucket; + | ^ + | = 0 +1 warning generated. + +This is a legit warning, but data->bucket_size is always > 0 (see +timerlat_hist_parse_args()), so the if is not necessary. + +Remove the unneeded if (data->bucket_size) to avoid the warning. + +Link: https://lkml.kernel.org/r/6e1b1665cd99042ae705b3e0fc410858c4c42346.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Cc: Donald Zickus +Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") +Fixes: 829a6c0b5698 ("rtla/osnoise: Add the hist mode") +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/osnoise_hist.c | 3 +-- + tools/tracing/rtla/src/timerlat_hist.c | 3 +-- + 2 files changed, 2 insertions(+), 4 deletions(-) + +--- a/tools/tracing/rtla/src/osnoise_hist.c ++++ b/tools/tracing/rtla/src/osnoise_hist.c +@@ -135,8 +135,7 @@ static void osnoise_hist_update_multiple + if (params->output_divisor) + duration = duration / params->output_divisor; + +- if (data->bucket_size) +- bucket = duration / data->bucket_size; ++ bucket = duration / data->bucket_size; + + total_duration = duration * count; + +--- a/tools/tracing/rtla/src/timerlat_hist.c ++++ b/tools/tracing/rtla/src/timerlat_hist.c +@@ -178,8 +178,7 @@ timerlat_hist_update(struct osnoise_tool + if (params->output_divisor) + latency = latency / params->output_divisor; + +- if (data->bucket_size) +- bucket = latency / data->bucket_size; ++ bucket = latency / data->bucket_size; + + if (!context) { + hist = data->hist[cpu].irq; diff --git a/queue-6.7/tools-rtla-remove-unused-sched_getattr-function.patch b/queue-6.7/tools-rtla-remove-unused-sched_getattr-function.patch new file mode 100644 index 00000000000..4a1bc8716f2 --- /dev/null +++ b/queue-6.7/tools-rtla-remove-unused-sched_getattr-function.patch @@ -0,0 +1,52 @@ +From 084ce16df0f060efd371092a09a7ae74a536dc11 Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:32 +0100 +Subject: tools/rtla: Remove unused sched_getattr() function + +From: Daniel Bristot de Oliveira + +commit 084ce16df0f060efd371092a09a7ae74a536dc11 upstream. + +Clang is reporting: + +$ make HOSTCC=clang CC=clang LLVM_IAS=1 +[...] +clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c +src/utils.c:241:19: warning: unused function 'sched_getattr' [-Wunused-function] + 241 | static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + | ^~~~~~~~~~~~~ +1 warning generated. + +Which is correct, so remove the unused function. + +Link: https://lkml.kernel.org/r/eaed7ba122c4ae88ce71277c824ef41cbf789385.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Cc: Donald Zickus +Fixes: b1696371d865 ("rtla: Helper functions for rtla") +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/utils.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/tools/tracing/rtla/src/utils.c ++++ b/tools/tracing/rtla/src/utils.c +@@ -238,12 +238,6 @@ static inline int sched_setattr(pid_t pi + return syscall(__NR_sched_setattr, pid, attr, flags); + } + +-static inline int sched_getattr(pid_t pid, struct sched_attr *attr, +- unsigned int size, unsigned int flags) +-{ +- return syscall(__NR_sched_getattr, pid, attr, size, flags); +-} +- + int __set_sched_attr(int pid, struct sched_attr *attr) + { + int flags = 0; diff --git a/queue-6.7/tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch b/queue-6.7/tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch new file mode 100644 index 00000000000..47ded3dadc8 --- /dev/null +++ b/queue-6.7/tools-rtla-replace-setting-prio-with-nice-for-sched_other.patch @@ -0,0 +1,55 @@ +From 14f08c976ffe0d2117c6199c32663df1cbc45c65 Mon Sep 17 00:00:00 2001 +From: limingming3 +Date: Wed, 7 Feb 2024 14:51:42 +0800 +Subject: tools/rtla: Replace setting prio with nice for SCHED_OTHER + +From: limingming3 + +commit 14f08c976ffe0d2117c6199c32663df1cbc45c65 upstream. + +Since the sched_priority for SCHED_OTHER is always 0, it makes no +sence to set it. +Setting nice for SCHED_OTHER seems more meaningful. + +Link: https://lkml.kernel.org/r/20240207065142.1753909-1-limingming3@lixiang.com + +Cc: stable@vger.kernel.org +Fixes: b1696371d865 ("rtla: Helper functions for rtla") +Signed-off-by: limingming3 +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/tracing/rtla/src/utils.c | 6 +++--- + tools/tracing/rtla/src/utils.h | 2 ++ + 2 files changed, 5 insertions(+), 3 deletions(-) + +--- a/tools/tracing/rtla/src/utils.c ++++ b/tools/tracing/rtla/src/utils.c +@@ -473,13 +473,13 @@ int parse_prio(char *arg, struct sched_a + if (prio == INVALID_VAL) + return -1; + +- if (prio < sched_get_priority_min(SCHED_OTHER)) ++ if (prio < MIN_NICE) + return -1; +- if (prio > sched_get_priority_max(SCHED_OTHER)) ++ if (prio > MAX_NICE) + return -1; + + sched_param->sched_policy = SCHED_OTHER; +- sched_param->sched_priority = prio; ++ sched_param->sched_nice = prio; + break; + default: + return -1; +--- a/tools/tracing/rtla/src/utils.h ++++ b/tools/tracing/rtla/src/utils.h +@@ -9,6 +9,8 @@ + */ + #define BUFF_U64_STR_SIZE 24 + #define MAX_PATH 1024 ++#define MAX_NICE 20 ++#define MIN_NICE -19 + + #define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ diff --git a/queue-6.7/tools-rv-fix-curr_reactor-uninitialized-variable.patch b/queue-6.7/tools-rv-fix-curr_reactor-uninitialized-variable.patch new file mode 100644 index 00000000000..f7d26d3bcb9 --- /dev/null +++ b/queue-6.7/tools-rv-fix-curr_reactor-uninitialized-variable.patch @@ -0,0 +1,80 @@ +From 61ec586bc0815959d3314cf7ce242529c977b357 Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:34 +0100 +Subject: tools/rv: Fix curr_reactor uninitialized variable + +From: Daniel Bristot de Oliveira + +commit 61ec586bc0815959d3314cf7ce242529c977b357 upstream. + +clang is reporting: + +$ make HOSTCC=clang CC=clang LLVM_IAS=1 + +clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions + -fstack-protector-strong -fasynchronous-unwind-tables + -fstack-clash-protection -Wall -Werror=format-security + -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS + $(pkg-config --cflags libtracefs) -I include + -c -o src/in_kernel.o src/in_kernel.c +[...] + +src/in_kernel.c:227:6: warning: variable 'curr_reactor' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] + 227 | if (!end) + | ^~~~ +src/in_kernel.c:242:9: note: uninitialized use occurs here + 242 | return curr_reactor; + | ^~~~~~~~~~~~ +src/in_kernel.c:227:2: note: remove the 'if' if its condition is always false + 227 | if (!end) + | ^~~~~~~~~ + 228 | goto out_free; + | ~~~~~~~~~~~~~ +src/in_kernel.c:221:6: warning: variable 'curr_reactor' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] + 221 | if (!start) + | ^~~~~~ +src/in_kernel.c:242:9: note: uninitialized use occurs here + 242 | return curr_reactor; + | ^~~~~~~~~~~~ +src/in_kernel.c:221:2: note: remove the 'if' if its condition is always false + 221 | if (!start) + | ^~~~~~~~~~~ + 222 | goto out_free; + | ~~~~~~~~~~~~~ +src/in_kernel.c:215:20: note: initialize the variable 'curr_reactor' to silence this warning + 215 | char *curr_reactor; + | ^ + | = NULL +2 warnings generated. + +Which is correct. Setting curr_reactor to NULL avoids the problem. + +Link: https://lkml.kernel.org/r/3a35551149e5ee0cb0950035afcb8082c3b5d05b.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Cc: Donald Zickus +Fixes: 6d60f89691fc ("tools/rv: Add in-kernel monitor interface") +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/verification/rv/src/in_kernel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/verification/rv/src/in_kernel.c ++++ b/tools/verification/rv/src/in_kernel.c +@@ -210,9 +210,9 @@ static char *ikm_read_reactor(char *moni + static char *ikm_get_current_reactor(char *monitor_name) + { + char *reactors = ikm_read_reactor(monitor_name); ++ char *curr_reactor = NULL; + char *start; + char *end; +- char *curr_reactor; + + if (!reactors) + return NULL; diff --git a/queue-6.7/tools-rv-fix-makefile-compiler-options-for-clang.patch b/queue-6.7/tools-rv-fix-makefile-compiler-options-for-clang.patch new file mode 100644 index 00000000000..1946af742c5 --- /dev/null +++ b/queue-6.7/tools-rv-fix-makefile-compiler-options-for-clang.patch @@ -0,0 +1,67 @@ +From f9b2c87105c989a7b259c6da87673ada96dce2f8 Mon Sep 17 00:00:00 2001 +From: Daniel Bristot de Oliveira +Date: Tue, 6 Feb 2024 12:05:33 +0100 +Subject: tools/rv: Fix Makefile compiler options for clang + +From: Daniel Bristot de Oliveira + +commit f9b2c87105c989a7b259c6da87673ada96dce2f8 upstream. + +The following errors are showing up when compiling rv with clang: + + $ make HOSTCC=clang CC=clang LLVM_IAS=1 + [...] + clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects + -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables + -fstack-clash-protection -Wall -Werror=format-security + -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS + -Wno-maybe-uninitialized $(pkg-config --cflags libtracefs) + -I include -c -o src/utils.o src/utils.c + clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] + warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] + 1 warning generated. + + clang -o rv -ggdb src/in_kernel.o src/rv.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) + src/in_kernel.o: file not recognized: file format not recognized + clang: error: linker command failed with exit code 1 (use -v to see invocation) + make: *** [Makefile:110: rv] Error 1 + +Solve these issues by: + - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang + - informing the linker about -flto=auto + +Link: https://lkml.kernel.org/r/ed94a8ddc2ca8c8ef663cfb7ae9dd196c4a66b33.1707217097.git.bristot@kernel.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Bill Wendling +Cc: Justin Stitt +Fixes: 4bc4b131d44c ("rv: Add rv tool") +Suggested-by: Donald Zickus +Signed-off-by: Daniel Bristot de Oliveira +Signed-off-by: Greg Kroah-Hartman +--- + tools/verification/rv/Makefile | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/tools/verification/rv/Makefile ++++ b/tools/verification/rv/Makefile +@@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -f + -fasynchronous-unwind-tables -fstack-clash-protection + WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + ++ifeq ($(CC),clang) ++ FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) ++ WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) ++endif ++ + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + + CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include +-LDFLAGS := -ggdb $(EXTRA_LDFLAGS) ++LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) + LIBS := $$($(PKG_CONFIG) --libs libtracefs) + + SRC := $(wildcard src/*.c) diff --git a/queue-6.7/wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch b/queue-6.7/wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch new file mode 100644 index 00000000000..ffdd1f9b2b8 --- /dev/null +++ b/queue-6.7/wifi-cfg80211-fix-wiphy-delayed-work-queueing.patch @@ -0,0 +1,46 @@ +From b743287d7a0007493f5cada34ed2085d475050b4 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Thu, 25 Jan 2024 09:51:09 +0100 +Subject: wifi: cfg80211: fix wiphy delayed work queueing + +From: Johannes Berg + +commit b743287d7a0007493f5cada34ed2085d475050b4 upstream. + +When a wiphy work is queued with timer, and then again +without a delay, it's started immediately but *also* +started again after the timer expires. This can lead, +for example, to warnings in mac80211's offchannel code +as reported by Jouni. Running the same work twice isn't +expected, of course. Fix this by deleting the timer at +this point, when queuing immediately due to delay=0. + +Cc: stable@vger.kernel.org +Reported-by: Jouni Malinen +Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") +Link: https://msgid.link/20240125095108.2feb0eaaa446.I4617f3210ed0e7f252290d5970dac6a876aa595b@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/wireless/core.c ++++ b/net/wireless/core.c +@@ -5,7 +5,7 @@ + * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2015-2017 Intel Deutschland GmbH +- * Copyright (C) 2018-2023 Intel Corporation ++ * Copyright (C) 2018-2024 Intel Corporation + */ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wip + unsigned long delay) + { + if (!delay) { ++ del_timer(&dwork->timer); + wiphy_work_queue(wiphy, &dwork->work); + return; + } diff --git a/queue-6.7/wifi-iwlwifi-fix-double-free-bug.patch b/queue-6.7/wifi-iwlwifi-fix-double-free-bug.patch new file mode 100644 index 00000000000..e55cc2ada42 --- /dev/null +++ b/queue-6.7/wifi-iwlwifi-fix-double-free-bug.patch @@ -0,0 +1,41 @@ +From 353d321f63f7dbfc9ef58498cc732c9fe886a596 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Tue, 23 Jan 2024 20:08:11 +0200 +Subject: wifi: iwlwifi: fix double-free bug + +From: Johannes Berg + +commit 353d321f63f7dbfc9ef58498cc732c9fe886a596 upstream. + +The storage for the TLV PC register data wasn't done like all +the other storage in the drv->fw area, which is cleared at the +end of deallocation. Therefore, the freeing must also be done +differently, explicitly NULL'ing it out after the free, since +otherwise there's a nasty double-free bug here if a file fails +to load after this has been parsed, and we get another free +later (e.g. because no other file exists.) Fix that by adding +the missing NULL assignment. + +Cc: stable@vger.kernel.org +Fixes: 5e31b3df86ec ("wifi: iwlwifi: dbg: print pc register data once fw dump occurred") +Reported-by: Guy Kaplan +Signed-off-by: Johannes Berg +Reviewed-by: Gregory Greenman +Signed-off-by: Miri Korenblit +Link: https://msgid.link/20240123200528.675f3c24ec0d.I6ab4015cd78d82dd95471f840629972ef0331de3@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +@@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl + kfree(drv->fw.ucode_capa.cmd_versions); + kfree(drv->fw.phy_integration_ver); + kfree(drv->trans->dbg.pc_data); ++ drv->trans->dbg.pc_data = NULL; + + for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) + iwl_free_fw_img(drv, drv->fw.img + i); diff --git a/queue-6.7/wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch b/queue-6.7/wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch new file mode 100644 index 00000000000..b9ae3ab4dc9 --- /dev/null +++ b/queue-6.7/wifi-iwlwifi-mvm-fix-a-crash-when-we-run-out-of-stations.patch @@ -0,0 +1,59 @@ +From b7198383ef2debe748118996f627452281cf27d7 Mon Sep 17 00:00:00 2001 +From: Emmanuel Grumbach +Date: Tue, 6 Feb 2024 18:02:04 +0200 +Subject: wifi: iwlwifi: mvm: fix a crash when we run out of stations + +From: Emmanuel Grumbach + +commit b7198383ef2debe748118996f627452281cf27d7 upstream. + +A DoS tool that injects loads of authentication frames made our AP +crash. The iwl_mvm_is_dup() function couldn't find the per-queue +dup_data which was not allocated. + +The root cause for that is that we ran out of stations in the firmware +and we didn't really add the station to the firmware, yet we didn't +return an error to mac80211. +Mac80211 was thinking that we have the station and because of that, +sta_info::uploaded was set to 1. This allowed +ieee80211_find_sta_by_ifaddr() to return a valid station object, but +that ieee80211_sta didn't have any iwl_mvm_sta object initialized and +that caused the crash mentioned earlier when we got Rx on that station. + +Cc: stable@vger.kernel.org +Fixes: 57974a55d995 ("wifi: iwlwifi: mvm: refactor iwl_mvm_mac_sta_state_common()") +Signed-off-by: Emmanuel Grumbach +Signed-off-by: Miri Korenblit +Link: https://msgid.link/20240206175739.1f76c44b2486.I6a00955e2842f15f0a089db2f834adb9d10fbe35@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +++ + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 ++++ + 2 files changed, 7 insertions(+) + +--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +@@ -3673,6 +3673,9 @@ iwl_mvm_sta_state_notexist_to_none(struc + NL80211_TDLS_SETUP); + } + ++ if (ret) ++ return ret; ++ + for_each_sta_active_link(vif, sta, link_sta, i) + link_sta->agg.max_rc_amsdu_len = 1; + +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +@@ -505,6 +505,10 @@ static bool iwl_mvm_is_dup(struct ieee80 + return false; + + mvm_sta = iwl_mvm_sta_from_mac80211(sta); ++ ++ if (WARN_ON_ONCE(!mvm_sta->dup_data)) ++ return false; ++ + dup_data = &mvm_sta->dup_data[queue]; + + /* diff --git a/queue-6.7/wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch b/queue-6.7/wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch new file mode 100644 index 00000000000..18a870b74b4 --- /dev/null +++ b/queue-6.7/wifi-mac80211-reload-info-pointer-in-ieee80211_tx_dequeue.patch @@ -0,0 +1,51 @@ +From c98d8836b817d11fdff4ca7749cbbe04ff7f0c64 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Wed, 31 Jan 2024 16:49:10 +0100 +Subject: wifi: mac80211: reload info pointer in ieee80211_tx_dequeue() + +From: Johannes Berg + +commit c98d8836b817d11fdff4ca7749cbbe04ff7f0c64 upstream. + +This pointer can change here since the SKB can change, so we +actually later open-coded IEEE80211_SKB_CB() again. Reload +the pointer where needed, so the monitor-mode case using it +gets fixed, and then use info-> later as well. + +Cc: stable@vger.kernel.org +Fixes: 531682159092 ("mac80211: fix VLAN handling with TXQs") +Link: https://msgid.link/20240131164910.b54c28d583bc.I29450cec84ea6773cff5d9c16ff92b836c331471@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/mac80211/tx.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -5,7 +5,7 @@ + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH +- * Copyright (C) 2018-2022 Intel Corporation ++ * Copyright (C) 2018-2024 Intel Corporation + * + * Transmit and frame generation functions. + */ +@@ -3927,6 +3927,7 @@ begin: + goto begin; + + skb = __skb_dequeue(&tx.skbs); ++ info = IEEE80211_SKB_CB(skb); + + if (!skb_queue_empty(&tx.skbs)) { + spin_lock_bh(&fq->lock); +@@ -3971,7 +3972,7 @@ begin: + } + + encap_out: +- IEEE80211_SKB_CB(skb)->control.vif = vif; ++ info->control.vif = vif; + + if (tx.sta && + wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { diff --git a/queue-6.7/xen-events-close-evtchn-after-mapping-cleanup.patch b/queue-6.7/xen-events-close-evtchn-after-mapping-cleanup.patch new file mode 100644 index 00000000000..df024b71bf5 --- /dev/null +++ b/queue-6.7/xen-events-close-evtchn-after-mapping-cleanup.patch @@ -0,0 +1,166 @@ +From fa765c4b4aed2d64266b694520ecb025c862c5a9 Mon Sep 17 00:00:00 2001 +From: Maximilian Heyne +Date: Wed, 24 Jan 2024 16:31:28 +0000 +Subject: xen/events: close evtchn after mapping cleanup + +From: Maximilian Heyne + +commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream. + +shutdown_pirq and startup_pirq are not taking the +irq_mapping_update_lock because they can't due to lock inversion. Both +are called with the irq_desc->lock being taking. The lock order, +however, is first irq_mapping_update_lock and then irq_desc->lock. + +This opens multiple races: +- shutdown_pirq can be interrupted by a function that allocates an event + channel: + + CPU0 CPU1 + shutdown_pirq { + xen_evtchn_close(e) + __startup_pirq { + EVTCHNOP_bind_pirq + -> returns just freed evtchn e + set_evtchn_to_irq(e, irq) + } + xen_irq_info_cleanup() { + set_evtchn_to_irq(e, -1) + } + } + + Assume here event channel e refers here to the same event channel + number. + After this race the evtchn_to_irq mapping for e is invalid (-1). + +- __startup_pirq races with __unbind_from_irq in a similar way. Because + __startup_pirq doesn't take irq_mapping_update_lock it can grab the + evtchn that __unbind_from_irq is currently freeing and cleaning up. In + this case even though the event channel is allocated, its mapping can + be unset in evtchn_to_irq. + +The fix is to first cleanup the mappings and then close the event +channel. In this way, when an event channel gets allocated it's +potential previous evtchn_to_irq mappings are guaranteed to be unset already. +This is also the reverse order of the allocation where first the event +channel is allocated and then the mappings are setup. + +On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal +[un]bind interfaces"), we hit a BUG like the following during probing of NVMe +devices. The issue is that during nvme_setup_io_queues, pci_free_irq +is called for every device which results in a call to shutdown_pirq. +With many nvme devices it's therefore likely to hit this race during +boot because there will be multiple calls to shutdown_pirq and +startup_pirq are running potentially in parallel. + + ------------[ cut here ]------------ + blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled + kernel BUG at drivers/xen/events/events_base.c:499! + invalid opcode: 0000 [#1] SMP PTI + CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 + Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 + Workqueue: nvme-reset-wq nvme_reset_work + RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 + Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 + RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 + RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff + RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 + R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed + R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 + FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + ? show_trace_log_lvl+0x1c1/0x2d9 + ? show_trace_log_lvl+0x1c1/0x2d9 + ? set_affinity_irq+0xdc/0x1c0 + ? __die_body.cold+0x8/0xd + ? die+0x2b/0x50 + ? do_trap+0x90/0x110 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? do_error_trap+0x65/0x80 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? exc_invalid_op+0x4e/0x70 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? asm_exc_invalid_op+0x12/0x20 + ? bind_evtchn_to_cpu+0xdf/0xf0 + ? bind_evtchn_to_cpu+0xc5/0xf0 + set_affinity_irq+0xdc/0x1c0 + irq_do_set_affinity+0x1d7/0x1f0 + irq_setup_affinity+0xd6/0x1a0 + irq_startup+0x8a/0xf0 + __setup_irq+0x639/0x6d0 + ? nvme_suspend+0x150/0x150 + request_threaded_irq+0x10c/0x180 + ? nvme_suspend+0x150/0x150 + pci_request_irq+0xa8/0xf0 + ? __blk_mq_free_request+0x74/0xa0 + queue_request_irq+0x6f/0x80 + nvme_create_queue+0x1af/0x200 + nvme_create_io_queues+0xbd/0xf0 + nvme_setup_io_queues+0x246/0x320 + ? nvme_irq_check+0x30/0x30 + nvme_reset_work+0x1c8/0x400 + process_one_work+0x1b0/0x350 + worker_thread+0x49/0x310 + ? process_one_work+0x350/0x350 + kthread+0x11b/0x140 + ? __kthread_bind_mask+0x60/0x60 + ret_from_fork+0x22/0x30 + Modules linked in: + ---[ end trace a11715de1eee1873 ]--- + +Fixes: d46a78b05c0e ("xen: implement pirq type event channels") +Cc: stable@vger.kernel.org +Co-debugged-by: Andrew Panyakin +Signed-off-by: Maximilian Heyne +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/events/events_base.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_dat + return; + + do_mask(info, EVT_MASK_REASON_EXPLICIT); +- xen_evtchn_close(evtchn); + xen_irq_info_cleanup(info); ++ xen_evtchn_close(evtchn); + } + + static void enable_pirq(struct irq_data *data) +@@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); + static void __unbind_from_irq(struct irq_info *info, unsigned int irq) + { + evtchn_port_t evtchn; ++ bool close_evtchn = false; + + if (!info) { + xen_irq_free_desc(irq); +@@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq + struct xenbus_device *dev; + + if (!info->is_static) +- xen_evtchn_close(evtchn); ++ close_evtchn = true; + + switch (info->type) { + case IRQT_VIRQ: +@@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq + } + + xen_irq_info_cleanup(info); ++ ++ if (close_evtchn) ++ xen_evtchn_close(evtchn); + } + + xen_free_irq(info); diff --git a/queue-6.7/zonefs-improve-error-handling.patch b/queue-6.7/zonefs-improve-error-handling.patch new file mode 100644 index 00000000000..959c864615b --- /dev/null +++ b/queue-6.7/zonefs-improve-error-handling.patch @@ -0,0 +1,252 @@ +From 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 8 Feb 2024 17:26:59 +0900 +Subject: zonefs: Improve error handling + +From: Damien Le Moal + +commit 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 upstream. + +Write error handling is racy and can sometime lead to the error recovery +path wrongly changing the inode size of a sequential zone file to an +incorrect value which results in garbage data being readable at the end +of a file. There are 2 problems: + +1) zonefs_file_dio_write() updates a zone file write pointer offset + after issuing a direct IO with iomap_dio_rw(). This update is done + only if the IO succeed for synchronous direct writes. However, for + asynchronous direct writes, the update is done without waiting for + the IO completion so that the next asynchronous IO can be + immediately issued. However, if an asynchronous IO completes with a + failure right before the i_truncate_mutex lock protecting the update, + the update may change the value of the inode write pointer offset + that was corrected by the error path (zonefs_io_error() function). + +2) zonefs_io_error() is called when a read or write error occurs. This + function executes a report zone operation using the callback function + zonefs_io_error_cb(), which does all the error recovery handling + based on the current zone condition, write pointer position and + according to the mount options being used. However, depending on the + zoned device being used, a report zone callback may be executed in a + context that is different from the context of __zonefs_io_error(). As + a result, zonefs_io_error_cb() may be executed without the inode + truncate mutex lock held, which can lead to invalid error processing. + +Fix both problems as follows: +- Problem 1: Perform the inode write pointer offset update before a + direct write is issued with iomap_dio_rw(). This is safe to do as + partial direct writes are not supported (IOMAP_DIO_PARTIAL is not + set) and any failed IO will trigger the execution of zonefs_io_error() + which will correct the inode write pointer offset to reflect the + current state of the one on the device. +- Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() + and call this function directly from __zonefs_io_error() after + obtaining the zone information using blkdev_report_zones() with a + simple callback function that copies to a local stack variable the + struct blk_zone obtained from the device. This ensures that error + handling is performed holding the inode truncate mutex. + This change also simplifies error handling for conventional zone files + by bypassing the execution of report zones entirely. This is safe to + do because the condition of conventional zones cannot be read-only or + offline and conventional zone files are always fully mapped with a + constant file size. + +Reported-by: Shin'ichiro Kawasaki +Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Tested-by: Shin'ichiro Kawasaki +Reviewed-by: Johannes Thumshirn +Reviewed-by: Himanshu Madhani +Signed-off-by: Greg Kroah-Hartman +--- + fs/zonefs/file.c | 42 +++++++++++++++++++++------------ + fs/zonefs/super.c | 68 ++++++++++++++++++++++++++++++------------------------ + 2 files changed, 66 insertions(+), 44 deletions(-) + +--- a/fs/zonefs/file.c ++++ b/fs/zonefs/file.c +@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io( + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (error) { +- zonefs_io_error(inode, true); ++ /* ++ * For Sync IOs, error recovery is called from ++ * zonefs_file_dio_write(). ++ */ ++ if (!is_sync_kiocb(iocb)) ++ zonefs_io_error(inode, true); + return error; + } + +@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(str + ret = -EINVAL; + goto inode_unlock; + } ++ /* ++ * Advance the zone write pointer offset. This assumes that the ++ * IO will succeed, which is OK to do because we do not allow ++ * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO ++ * fails, the error path will correct the write pointer offset. ++ */ ++ z->z_wpoffset += count; ++ zonefs_inode_account_active(inode); + mutex_unlock(&zi->i_truncate_mutex); + } + +@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(str + if (ret == -ENOTBLK) + ret = -EBUSY; + +- if (zonefs_zone_is_seq(z) && +- (ret > 0 || ret == -EIOCBQUEUED)) { +- if (ret > 0) +- count = ret; +- +- /* +- * Update the zone write pointer offset assuming the write +- * operation succeeded. If it did not, the error recovery path +- * will correct it. Also do active seq file accounting. +- */ +- mutex_lock(&zi->i_truncate_mutex); +- z->z_wpoffset += count; +- zonefs_inode_account_active(inode); +- mutex_unlock(&zi->i_truncate_mutex); ++ /* ++ * For a failed IO or partial completion, trigger error recovery ++ * to update the zone write pointer offset to a correct value. ++ * For asynchronous IOs, zonefs_file_write_dio_end_io() may already ++ * have executed error recovery if the IO already completed when we ++ * reach here. However, we cannot know that and execute error recovery ++ * again (that will not change anything). ++ */ ++ if (zonefs_zone_is_seq(z)) { ++ if (ret > 0 && ret != count) ++ ret = -EIO; ++ if (ret < 0 && ret != -EIOCBQUEUED) ++ zonefs_io_error(inode, true); + } + + inode_unlock: +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(str + z->z_mode = inode->i_mode; + } + +-struct zonefs_ioerr_data { +- struct inode *inode; +- bool write; +-}; +- + static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + void *data) + { +- struct zonefs_ioerr_data *err = data; +- struct inode *inode = err->inode; ++ struct blk_zone *z = data; ++ ++ *z = *zone; ++ return 0; ++} ++ ++static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, ++ bool write) ++{ + struct zonefs_zone *z = zonefs_inode_zone(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); +@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk + data_size = zonefs_check_zone_condition(sb, z, zone); + isize = i_size_read(inode); + if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && +- !err->write && isize == data_size) +- return 0; ++ !write && isize == data_size) ++ return; + + /* + * At this point, we detected either a bad zone or an inconsistency +@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ +- if (zonefs_zone_is_seq(z) && isize != data_size) ++ if (isize != data_size) + zonefs_warn(sb, + "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); +@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk + zonefs_i_size_write(inode, data_size); + z->z_wpoffset = data_size; + zonefs_inode_account_active(inode); +- +- return 0; + } + + /* +@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *ino + { + struct zonefs_zone *z = zonefs_inode_zone(inode); + struct super_block *sb = inode->i_sb; +- struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; +- unsigned int nr_zones = 1; +- struct zonefs_ioerr_data err = { +- .inode = inode, +- .write = write, +- }; ++ struct blk_zone zone; + int ret; + + /* +- * The only files that have more than one zone are conventional zone +- * files with aggregated conventional zones, for which the inode zone +- * size is always larger than the device zone size. +- */ +- if (z->z_size > bdev_zone_sectors(sb->s_bdev)) +- nr_zones = z->z_size >> +- (sbi->s_zone_sectors_shift + SECTOR_SHIFT); ++ * Conventional zone have no write pointer and cannot become read-only ++ * or offline. So simply fake a report for a single or aggregated zone ++ * and let zonefs_handle_io_error() correct the zone inode information ++ * according to the mount options. ++ */ ++ if (!zonefs_zone_is_seq(z)) { ++ zone.start = z->z_sector; ++ zone.len = z->z_size >> SECTOR_SHIFT; ++ zone.wp = zone.start + zone.len; ++ zone.type = BLK_ZONE_TYPE_CONVENTIONAL; ++ zone.cond = BLK_ZONE_COND_NOT_WP; ++ zone.capacity = zone.len; ++ goto handle_io_error; ++ } + + /* + * Memory allocations in blkdev_report_zones() can trigger a memory +@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *ino + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); +- ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, +- zonefs_io_error_cb, &err); +- if (ret != nr_zones) ++ ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, ++ zonefs_io_error_cb, &zone); ++ memalloc_noio_restore(noio_flag); ++ ++ if (ret != 1) { + zonefs_err(sb, "Get inode %lu zone information failed %d\n", + inode->i_ino, ret); +- memalloc_noio_restore(noio_flag); ++ zonefs_warn(sb, "remounting filesystem read-only\n"); ++ sb->s_flags |= SB_RDONLY; ++ return; ++ } ++ ++handle_io_error: ++ zonefs_handle_io_error(inode, &zone, write); + } + + static struct kmem_cache *zonefs_inode_cachep; -- 2.47.3