From 552d041e8f2023848d3bc0b69fdcee64d74d52c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 27 Oct 2025 12:35:59 +0100 Subject: [PATCH] 6.1-stable patches added patches: arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch arm64-cputype-add-neoverse-v3ae-definitions.patch arm64-errata-apply-workarounds-for-neoverse-v3ae.patch arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch f2fs-add-a-f2fs_get_block_locked-helper.patch f2fs-factor-a-f2fs_map_blocks_cached-helper.patch f2fs-fix-wrong-block-mapping-for-multi-devices.patch f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch fuse-allocate-ff-release_args-only-if-release-is-needed.patch fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch iio-imu-inv_icm42600-use-instead-of-memset.patch ixgbevf-add-support-for-intel-r-e610-device.patch ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch nfsd-fix-last-write-offset-handling-in-layoutcommit.patch nfsd-minor-cleanup-in-layoutcommit-processing.patch nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch pci-add-pci_vdevice_sub-helper-macro.patch pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch pci-j721e-fix-programming-sequence-of-strap-settings.patch pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch pm-runtime-add-new-devm-functions.patch revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch vfs-don-t-leak-disconnected-dentries-on-umount.patch x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch xfs-always-warn-about-deprecated-mount-options.patch xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch --- ...check-in-topology_parse_cpu_capacity.patch | 53 +++ ...putype-add-neoverse-v3ae-definitions.patch | 49 +++ ...-apply-workarounds-for-neoverse-v3ae.patch | 63 +++ ...-flag-the-zero-page-as-pg_mte_tagged.patch | 90 ++++ ...-locking-dependency-with-devcd-mutex.patch | 389 ++++++++++++++++++ ...-drm_sched_job_add_resv_dependencies.patch | 101 +++++ ...read-in-parse_apply_sb_mount_options.patch | 74 ++++ ...s-add-a-f2fs_get_block_locked-helper.patch | 99 +++++ ...ctor-a-f2fs_map_blocks_cached-helper.patch | 120 ++++++ ...rong-block-mapping-for-multi-devices.patch | 59 +++ ...e-create-argument-to-f2fs_map_blocks.patch | 288 +++++++++++++ ...lease_args-only-if-release-is-needed.patch | 241 +++++++++++ ...ronous-file-put-from-fuseblk-workers.patch | 94 +++++ ...ring-if-already-pm_runtime-suspended.patch | 66 +++ ...v_icm42600-simplify-pm_runtime-setup.patch | 86 ++++ ...u-inv_icm42600-use-instead-of-memset.patch | 70 ++++ ...-add-support-for-intel-r-e610-device.patch | 171 ++++++++ ...ing-link-speed-data-for-e610-devices.patch | 306 ++++++++++++++ ...ty-by-negotiating-supported-features.patch | 327 +++++++++++++++ ...-on-fsctl_query_interface_info-ioctl.patch | 196 +++++++++ ...lag-dropping-behavior-in-ksm_madvise.patch | 124 ++++++ ...commit-for-the-flexfiles-layout-type.patch | 50 +++ ...rite-offset-handling-in-layoutcommit.patch | 114 +++++ ...r-cleanup-in-layoutcommit-processing.patch | 50 +++ ...oding-and-decoding-of-nfsd4_deviceid.patch | 156 +++++++ ...u-when-reorder-sequence-wraps-around.patch | 44 ++ ...pci-add-pci_vdevice_sub-helper-macro.patch | 51 +++ ...-ti-syscon-acspcie-proxy-ctrl-exists.patch | 94 +++++ ...ogramming-sequence-of-strap-settings.patch | 90 ++++ ...s-when-running-in-pcie-endpoint-mode.patch | 75 ++++ ...fix-pll-lock-and-o_cmn_ready-polling.patch | 265 ++++++++++++ ...-wait-time-for-startup-state-machine.patch | 58 +++ ...dphy-store-hs_clk_rate-and-return-it.patch | 59 +++ .../pm-runtime-add-new-devm-functions.patch | 109 +++++ ...tlb-failure-on-64k-page-size-systems.patch | 52 +++ ...to-unregister-the-unused-subchannels.patch | 92 +++++ queue-6.1/series | 41 ++ ...leak-disconnected-dentries-on-umount.patch | 58 +++ ...tivating-previously-unavailable-rmid.patch | 136 ++++++ ...-warn-about-deprecated-mount-options.patch | 93 +++++ ...between-i386-and-other-architectures.patch | 173 ++++++++ ...crc-variable-in-xlog_recover_process.patch | 68 +++ 42 files changed, 4994 insertions(+) create mode 100644 queue-6.1/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch create mode 100644 queue-6.1/arm64-cputype-add-neoverse-v3ae-definitions.patch create mode 100644 queue-6.1/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch create mode 100644 queue-6.1/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch create mode 100644 queue-6.1/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch create mode 100644 queue-6.1/drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch create mode 100644 queue-6.1/ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch create mode 100644 queue-6.1/f2fs-add-a-f2fs_get_block_locked-helper.patch create mode 100644 queue-6.1/f2fs-factor-a-f2fs_map_blocks_cached-helper.patch create mode 100644 queue-6.1/f2fs-fix-wrong-block-mapping-for-multi-devices.patch create mode 100644 queue-6.1/f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch create mode 100644 queue-6.1/fuse-allocate-ff-release_args-only-if-release-is-needed.patch create mode 100644 queue-6.1/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch create mode 100644 queue-6.1/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch create mode 100644 queue-6.1/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch create mode 100644 queue-6.1/iio-imu-inv_icm42600-use-instead-of-memset.patch create mode 100644 queue-6.1/ixgbevf-add-support-for-intel-r-e610-device.patch create mode 100644 queue-6.1/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch create mode 100644 queue-6.1/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch create mode 100644 queue-6.1/ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch create mode 100644 queue-6.1/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch create mode 100644 queue-6.1/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch create mode 100644 queue-6.1/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch create mode 100644 queue-6.1/nfsd-minor-cleanup-in-layoutcommit-processing.patch create mode 100644 queue-6.1/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch create mode 100644 queue-6.1/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch create mode 100644 queue-6.1/pci-add-pci_vdevice_sub-helper-macro.patch create mode 100644 queue-6.1/pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch create mode 100644 queue-6.1/pci-j721e-fix-programming-sequence-of-strap-settings.patch create mode 100644 queue-6.1/pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch create mode 100644 queue-6.1/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch create mode 100644 queue-6.1/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch create mode 100644 queue-6.1/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch create mode 100644 queue-6.1/pm-runtime-add-new-devm-functions.patch create mode 100644 queue-6.1/revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch create mode 100644 queue-6.1/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch create mode 100644 queue-6.1/vfs-don-t-leak-disconnected-dentries-on-umount.patch create mode 100644 queue-6.1/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch create mode 100644 queue-6.1/xfs-always-warn-about-deprecated-mount-options.patch create mode 100644 queue-6.1/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch create mode 100644 queue-6.1/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch diff --git a/queue-6.1/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch b/queue-6.1/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch new file mode 100644 index 0000000000..70298eac02 --- /dev/null +++ b/queue-6.1/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch @@ -0,0 +1,53 @@ +From stable+bounces-189867-greg=kroah.com@vger.kernel.org Sun Oct 26 17:04:18 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 12:04:08 -0400 +Subject: arch_topology: Fix incorrect error check in topology_parse_cpu_capacity() +To: stable@vger.kernel.org +Cc: Kaushlendra Kumar , stable , Sudeep Holla , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20251026160408.99204-1-sashal@kernel.org> + +From: Kaushlendra Kumar + +[ Upstream commit 2eead19334516c8e9927c11b448fbe512b1f18a1 ] + +Fix incorrect use of PTR_ERR_OR_ZERO() in topology_parse_cpu_capacity() +which causes the code to proceed with NULL clock pointers. The current +logic uses !PTR_ERR_OR_ZERO(cpu_clk) which evaluates to true for both +valid pointers and NULL, leading to potential NULL pointer dereference +in clk_get_rate(). + +Per include/linux/err.h documentation, PTR_ERR_OR_ZERO(ptr) returns: +"The error code within @ptr if it is an error pointer; 0 otherwise." + +This means PTR_ERR_OR_ZERO() returns 0 for both valid pointers AND NULL +pointers. Therefore !PTR_ERR_OR_ZERO(cpu_clk) evaluates to true (proceed) +when cpu_clk is either valid or NULL, causing clk_get_rate(NULL) to be +called when of_clk_get() returns NULL. + +Replace with !IS_ERR_OR_NULL(cpu_clk) which only proceeds for valid +pointers, preventing potential NULL pointer dereference in clk_get_rate(). + +Cc: stable +Signed-off-by: Kaushlendra Kumar +Reviewed-by: Sudeep Holla +Fixes: b8fe128dad8f ("arch_topology: Adjust initial CPU capacities with current freq") +Link: https://patch.msgid.link/20250923174308.1771906-1-kaushlendra.kumar@intel.com +Signed-off-by: Greg Kroah-Hartman +[ Adjust context ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/arch_topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/arch_topology.c ++++ b/drivers/base/arch_topology.c +@@ -327,7 +327,7 @@ bool __init topology_parse_cpu_capacity( + * frequency (by keeping the initial freq_factor value). + */ + cpu_clk = of_clk_get(cpu_node, 0); +- if (!PTR_ERR_OR_ZERO(cpu_clk)) { ++ if (!IS_ERR_OR_NULL(cpu_clk)) { + per_cpu(freq_factor, cpu) = + clk_get_rate(cpu_clk) / 1000; + clk_put(cpu_clk); diff --git a/queue-6.1/arm64-cputype-add-neoverse-v3ae-definitions.patch b/queue-6.1/arm64-cputype-add-neoverse-v3ae-definitions.patch new file mode 100644 index 0000000000..ca8ee38ac6 --- /dev/null +++ b/queue-6.1/arm64-cputype-add-neoverse-v3ae-definitions.patch @@ -0,0 +1,49 @@ +From 3bbf004c4808e2c3241e5c1ad6cc102f38a03c39 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Fri, 19 Sep 2025 15:58:28 +0100 +Subject: arm64: cputype: Add Neoverse-V3AE definitions + +From: Mark Rutland + +commit 3bbf004c4808e2c3241e5c1ad6cc102f38a03c39 upstream. + +Add cputype definitions for Neoverse-V3AE. These will be used for errata +detection in subsequent patches. + +These values can be found in the Neoverse-V3AE TRM: + + https://developer.arm.com/documentation/SDEN-2615521/9-0/ + +... in section A.6.1 ("MIDR_EL1, Main ID Register"). + +Signed-off-by: Mark Rutland +Cc: James Morse +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Ryan Roberts +Signed-off-by: Will Deacon +[ Ryan: Trivial backport ] +Signed-off-by: Ryan Roberts +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/cputype.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/arm64/include/asm/cputype.h ++++ b/arch/arm64/include/asm/cputype.h +@@ -93,6 +93,7 @@ + #define ARM_CPU_PART_NEOVERSE_V2 0xD4F + #define ARM_CPU_PART_CORTEX_A720 0xD81 + #define ARM_CPU_PART_CORTEX_X4 0xD82 ++#define ARM_CPU_PART_NEOVERSE_V3AE 0xD83 + #define ARM_CPU_PART_NEOVERSE_V3 0xD84 + #define ARM_CPU_PART_CORTEX_X925 0xD85 + #define ARM_CPU_PART_CORTEX_A725 0xD87 +@@ -173,6 +174,7 @@ + #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) + #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) + #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) ++#define MIDR_NEOVERSE_V3AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3AE) + #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) + #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) + #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) diff --git a/queue-6.1/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch b/queue-6.1/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch new file mode 100644 index 0000000000..423a27f9ab --- /dev/null +++ b/queue-6.1/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch @@ -0,0 +1,63 @@ +From 0c33aa1804d101c11ba1992504f17a42233f0e11 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Fri, 19 Sep 2025 15:58:29 +0100 +Subject: arm64: errata: Apply workarounds for Neoverse-V3AE + +From: Mark Rutland + +commit 0c33aa1804d101c11ba1992504f17a42233f0e11 upstream. + +Neoverse-V3AE is also affected by erratum #3312417, as described in its +Software Developer Errata Notice (SDEN) document: + + Neoverse V3AE (MP172) SDEN v9.0, erratum 3312417 + https://developer.arm.com/documentation/SDEN-2615521/9-0/ + +Enable the workaround for Neoverse-V3AE, and document this. + +Signed-off-by: Mark Rutland +Cc: James Morse +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Ryan Roberts +Signed-off-by: Will Deacon +[ Ryan: Trivial backport ] +Signed-off-by: Ryan Roberts +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arm64/silicon-errata.rst | 2 ++ + arch/arm64/Kconfig | 1 + + arch/arm64/kernel/cpu_errata.c | 1 + + 3 files changed, 4 insertions(+) + +--- a/Documentation/arm64/silicon-errata.rst ++++ b/Documentation/arm64/silicon-errata.rst +@@ -181,6 +181,8 @@ stable kernels. + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | + +----------------+-----------------+-----------------+-----------------------------+ ++| ARM | Neoverse-V3AE | #3312417 | ARM64_ERRATUM_3194386 | +++----------------+-----------------+-----------------+-----------------------------+ + | ARM | MMU-500 | #841119,826419 | N/A | + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | MMU-600 | #1076982,1209401| N/A | +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1027,6 +1027,7 @@ config ARM64_ERRATUM_3194386 + * ARM Neoverse-V1 erratum 3324341 + * ARM Neoverse V2 erratum 3324336 + * ARM Neoverse-V3 erratum 3312417 ++ * ARM Neoverse-V3AE erratum 3312417 + + On affected cores "MSR SSBS, #0" instructions may not affect + subsequent speculative instructions, which may permit unexepected +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -457,6 +457,7 @@ static const struct midr_range erratum_s + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), ++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE), + {} + }; + #endif diff --git a/queue-6.1/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch b/queue-6.1/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch new file mode 100644 index 0000000000..df578213a5 --- /dev/null +++ b/queue-6.1/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch @@ -0,0 +1,90 @@ +From stable+bounces-188173-greg=kroah.com@vger.kernel.org Mon Oct 20 18:12:46 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:12:38 -0400 +Subject: arm64: mte: Do not flag the zero page as PG_mte_tagged +To: stable@vger.kernel.org +Cc: Catalin Marinas , Gergely Kovacs , Will Deacon , David Hildenbrand , Lance Yang , Sasha Levin +Message-ID: <20251020161238.1833261-1-sashal@kernel.org> + +From: Catalin Marinas + +[ Upstream commit f620d66af3165838bfa845dcf9f5f9b4089bf508 ] + +Commit 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the +zero page") attempted to fix ptrace() reading of tags from the zero page +by marking it as PG_mte_tagged during cpu_enable_mte(). The same commit +also changed the ptrace() tag access permission check to the VM_MTE vma +flag while turning the page flag test into a WARN_ON_ONCE(). + +Attempting to set the PG_mte_tagged flag early with +CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled may either hang (after commit +d77e59a8fccd "arm64: mte: Lock a page for MTE tag initialisation") or +have the flags cleared later during page_alloc_init_late(). In addition, +pages_identical() -> memcmp_pages() will reject any comparison with the +zero page as it is marked as tagged. + +Partially revert the above commit to avoid setting PG_mte_tagged on the +zero page. Update the __access_remote_tags() warning on untagged pages +to ignore the zero page since it is known to have the tags initialised. + +Note that all user mapping of the zero page are marked as pte_special(). +The arm64 set_pte_at() will not call mte_sync_tags() on such pages, so +PG_mte_tagged will remain cleared. + +Signed-off-by: Catalin Marinas +Fixes: 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the zero page") +Reported-by: Gergely Kovacs +Cc: stable@vger.kernel.org # 5.10.x +Cc: Will Deacon +Cc: David Hildenbrand +Cc: Lance Yang +Acked-by: Lance Yang +Reviewed-by: David Hildenbrand +Tested-by: Lance Yang +Signed-off-by: Will Deacon +[ removed folio-based hugetlb MTE checks ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpufeature.c | 10 +++++++--- + arch/arm64/kernel/mte.c | 2 +- + 2 files changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -2068,17 +2068,21 @@ static void bti_enable(const struct arm6 + #ifdef CONFIG_ARM64_MTE + static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) + { ++ static bool cleared_zero_page = false; ++ + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + + mte_cpu_setup(); + + /* + * Clear the tags in the zero page. This needs to be done via the +- * linear map which has the Tagged attribute. ++ * linear map which has the Tagged attribute. Since this page is ++ * always mapped as pte_special(), set_pte_at() will not attempt to ++ * clear the tags or set PG_mte_tagged. + */ +- if (!page_mte_tagged(ZERO_PAGE(0))) { ++ if (!cleared_zero_page) { ++ cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); +- set_page_mte_tagged(ZERO_PAGE(0)); + } + + kasan_init_hw_tags_cpu(); +--- a/arch/arm64/kernel/mte.c ++++ b/arch/arm64/kernel/mte.c +@@ -456,7 +456,7 @@ static int __access_remote_tags(struct m + put_page(page); + break; + } +- WARN_ON_ONCE(!page_mte_tagged(page)); ++ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); + + /* limit access to the end of the page */ + offset = offset_in_page(addr); diff --git a/queue-6.1/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch b/queue-6.1/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch new file mode 100644 index 0000000000..d835de2da4 --- /dev/null +++ b/queue-6.1/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch @@ -0,0 +1,389 @@ +From stable+bounces-189902-greg=kroah.com@vger.kernel.org Mon Oct 27 02:07:50 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 21:07:25 -0400 +Subject: devcoredump: Fix circular locking dependency with devcd->mutex. +To: stable@vger.kernel.org +Cc: Maarten Lankhorst , Mukesh Ojha , Greg Kroah-Hartman , Johannes Berg , "Rafael J. Wysocki" , Danilo Krummrich , linux-kernel@vger.kernel.org, Matthew Brost , Mukesh Ojha , Sasha Levin +Message-ID: <20251027010725.307225-1-sashal@kernel.org> + +From: Maarten Lankhorst + +[ Upstream commit a91c8096590bd7801a26454789f2992094fe36da ] + +The original code causes a circular locking dependency found by lockdep. + +====================================================== +WARNING: possible circular locking dependency detected +6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U +------------------------------------------------------ +xe_fault_inject/5091 is trying to acquire lock: +ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660 + +but task is already holding lock: + +ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 +which lock already depends on the new lock. +the existing dependency chain (in reverse order) is: +-> #2 (&devcd->mutex){+.+.}-{3:3}: + mutex_lock_nested+0x4e/0xc0 + devcd_data_write+0x27/0x90 + sysfs_kf_bin_write+0x80/0xf0 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +-> #1 (kn->active#236){++++}-{0:0}: + kernfs_drain+0x1e2/0x200 + __kernfs_remove+0xae/0x400 + kernfs_remove_by_name_ns+0x5d/0xc0 + remove_files+0x54/0x70 + sysfs_remove_group+0x3d/0xa0 + sysfs_remove_groups+0x2e/0x60 + device_remove_attrs+0xc7/0x100 + device_del+0x15d/0x3b0 + devcd_del+0x19/0x30 + process_one_work+0x22b/0x6f0 + worker_thread+0x1e8/0x3d0 + kthread+0x11c/0x250 + ret_from_fork+0x26c/0x2e0 + ret_from_fork_asm+0x1a/0x30 +-> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}: + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + __flush_work+0x27a/0x660 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +other info that might help us debug this: +Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex + Possible unsafe locking scenario: + CPU0 CPU1 + ---- ---- + lock(&devcd->mutex); + lock(kn->active#236); + lock(&devcd->mutex); + lock((work_completion)(&(&devcd->del_wk)->work)); + *** DEADLOCK *** +5 locks held by xe_fault_inject/5091: + #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0 + #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220 + #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280 + #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 + #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660 +stack backtrace: +CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)} +Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER +Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021 +Call Trace: + + dump_stack_lvl+0x91/0xf0 + dump_stack+0x10/0x20 + print_circular_bug+0x285/0x360 + check_noncircular+0x135/0x150 + ? register_lock_class+0x48/0x4a0 + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + ? __flush_work+0x25d/0x660 + ? mark_held_locks+0x46/0x90 + ? __flush_work+0x25d/0x660 + __flush_work+0x27a/0x660 + ? __flush_work+0x25d/0x660 + ? trace_hardirqs_on+0x1e/0xd0 + ? __pfx_wq_barrier_func+0x10/0x10 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + ? bus_find_device+0xa8/0xe0 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + ? __f_unlock_pos+0x15/0x20 + ? __x64_sys_getdents64+0x9b/0x130 + ? __pfx_filldir64+0x10/0x10 + ? do_syscall_64+0x1a2/0xb60 + ? clear_bhb_loop+0x30/0x80 + ? clear_bhb_loop+0x30/0x80 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x76e292edd574 +Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 +RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574 +RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b +RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063 +R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000 + +xe 0000:03:00.0: [drm] Xe device coredump has been deleted. + +Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work") +Cc: Mukesh Ojha +Cc: Greg Kroah-Hartman +Cc: Johannes Berg +Cc: Rafael J. Wysocki +Cc: Danilo Krummrich +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org # v6.1+ +Signed-off-by: Maarten Lankhorst +Cc: Matthew Brost +Acked-by: Mukesh Ojha +Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se +Signed-off-by: Greg Kroah-Hartman +[ replaced disable_delayed_work_sync() with cancel_delayed_work_sync() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/devcoredump.c | 138 +++++++++++++++++++++++++++------------------ + 1 file changed, 84 insertions(+), 54 deletions(-) + +--- a/drivers/base/devcoredump.c ++++ b/drivers/base/devcoredump.c +@@ -26,50 +26,46 @@ struct devcd_entry { + void *data; + size_t datalen; + /* +- * Here, mutex is required to serialize the calls to del_wk work between +- * user/kernel space which happens when devcd is added with device_add() +- * and that sends uevent to user space. User space reads the uevents, +- * and calls to devcd_data_write() which try to modify the work which is +- * not even initialized/queued from devcoredump. ++ * There are 2 races for which mutex is required. + * ++ * The first race is between device creation and userspace writing to ++ * schedule immediately destruction. + * ++ * This race is handled by arming the timer before device creation, but ++ * when device creation fails the timer still exists. + * +- * cpu0(X) cpu1(Y) ++ * To solve this, hold the mutex during device_add(), and set ++ * init_completed on success before releasing the mutex. + * +- * dev_coredump() uevent sent to user space +- * device_add() ======================> user space process Y reads the +- * uevents writes to devcd fd +- * which results into writes to ++ * That way the timer will never fire until device_add() is called, ++ * it will do nothing if init_completed is not set. The timer is also ++ * cancelled in that case. + * +- * devcd_data_write() +- * mod_delayed_work() +- * try_to_grab_pending() +- * del_timer() +- * debug_assert_init() +- * INIT_DELAYED_WORK() +- * schedule_delayed_work() +- * +- * +- * Also, mutex alone would not be enough to avoid scheduling of +- * del_wk work after it get flush from a call to devcd_free() +- * mentioned as below. +- * +- * disabled_store() +- * devcd_free() +- * mutex_lock() devcd_data_write() +- * flush_delayed_work() +- * mutex_unlock() +- * mutex_lock() +- * mod_delayed_work() +- * mutex_unlock() +- * So, delete_work flag is required. ++ * The second race involves multiple parallel invocations of devcd_free(), ++ * add a deleted flag so only 1 can call the destructor. + */ + struct mutex mutex; +- bool delete_work; ++ bool init_completed, deleted; + struct module *owner; + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen); + void (*free)(void *data); ++ /* ++ * If nothing interferes and device_add() was returns success, ++ * del_wk will destroy the device after the timer fires. ++ * ++ * Multiple userspace processes can interfere in the working of the timer: ++ * - Writing to the coredump will reschedule the timer to run immediately, ++ * if still armed. ++ * ++ * This is handled by using "if (cancel_delayed_work()) { ++ * schedule_delayed_work() }", to prevent re-arming after having ++ * been previously fired. ++ * - Writing to /sys/class/devcoredump/disabled will destroy the ++ * coredump synchronously. ++ * This is handled by using disable_delayed_work_sync(), and then ++ * checking if deleted flag is set with &devcd->mutex held. ++ */ + struct delayed_work del_wk; + struct device *failing_dev; + }; +@@ -98,14 +94,27 @@ static void devcd_dev_release(struct dev + kfree(devcd); + } + ++static void __devcd_del(struct devcd_entry *devcd) ++{ ++ devcd->deleted = true; ++ device_del(&devcd->devcd_dev); ++ put_device(&devcd->devcd_dev); ++} ++ + static void devcd_del(struct work_struct *wk) + { + struct devcd_entry *devcd; ++ bool init_completed; + + devcd = container_of(wk, struct devcd_entry, del_wk.work); + +- device_del(&devcd->devcd_dev); +- put_device(&devcd->devcd_dev); ++ /* devcd->mutex serializes against dev_coredumpm_timeout */ ++ mutex_lock(&devcd->mutex); ++ init_completed = devcd->init_completed; ++ mutex_unlock(&devcd->mutex); ++ ++ if (init_completed) ++ __devcd_del(devcd); + } + + static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, +@@ -125,12 +134,12 @@ static ssize_t devcd_data_write(struct f + struct device *dev = kobj_to_dev(kobj); + struct devcd_entry *devcd = dev_to_devcd(dev); + +- mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) { +- devcd->delete_work = true; +- mod_delayed_work(system_wq, &devcd->del_wk, 0); +- } +- mutex_unlock(&devcd->mutex); ++ /* ++ * Although it's tempting to use mod_delayed work here, ++ * that will cause a reschedule if the timer already fired. ++ */ ++ if (cancel_delayed_work(&devcd->del_wk)) ++ schedule_delayed_work(&devcd->del_wk, 0); + + return count; + } +@@ -158,11 +167,21 @@ static int devcd_free(struct device *dev + { + struct devcd_entry *devcd = dev_to_devcd(dev); + ++ /* ++ * To prevent a race with devcd_data_write(), cancel work and ++ * complete manually instead. ++ * ++ * We cannot rely on the return value of ++ * cancel_delayed_work_sync() here, because it might be in the ++ * middle of a cancel_delayed_work + schedule_delayed_work pair. ++ * ++ * devcd->mutex here guards against multiple parallel invocations ++ * of devcd_free(). ++ */ ++ cancel_delayed_work_sync(&devcd->del_wk); + mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) +- devcd->delete_work = true; +- +- flush_delayed_work(&devcd->del_wk); ++ if (!devcd->deleted) ++ __devcd_del(devcd); + mutex_unlock(&devcd->mutex); + return 0; + } +@@ -186,12 +205,10 @@ static ssize_t disabled_show(struct clas + * put_device() <- last reference + * error = fn(dev, data) devcd_dev_release() + * devcd_free(dev, data) kfree(devcd) +- * mutex_lock(&devcd->mutex); + * + * +- * In the above diagram, It looks like disabled_store() would be racing with parallely +- * running devcd_del() and result in memory abort while acquiring devcd->mutex which +- * is called after kfree of devcd memory after dropping its last reference with ++ * In the above diagram, it looks like disabled_store() would be racing with parallelly ++ * running devcd_del() and result in memory abort after dropping its last reference with + * put_device(). However, this will not happens as fn(dev, data) runs + * with its own reference to device via klist_node so it is not its last reference. + * so, above situation would not occur. +@@ -353,7 +370,7 @@ void dev_coredumpm(struct device *dev, s + devcd->read = read; + devcd->free = free; + devcd->failing_dev = get_device(dev); +- devcd->delete_work = false; ++ devcd->deleted = false; + + mutex_init(&devcd->mutex); + device_initialize(&devcd->devcd_dev); +@@ -362,8 +379,14 @@ void dev_coredumpm(struct device *dev, s + atomic_inc_return(&devcd_count)); + devcd->devcd_dev.class = &devcd_class; + +- mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); ++ ++ /* devcd->mutex prevents devcd_del() completing until init finishes */ ++ mutex_lock(&devcd->mutex); ++ devcd->init_completed = false; ++ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); ++ schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); ++ + if (device_add(&devcd->devcd_dev)) + goto put_device; + +@@ -380,13 +403,20 @@ void dev_coredumpm(struct device *dev, s + + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); +- INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); +- schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); ++ ++ /* ++ * Safe to run devcd_del() now that we are done with devcd_dev. ++ * Alternatively we could have taken a ref on devcd_dev before ++ * dropping the lock. ++ */ ++ devcd->init_completed = true; + mutex_unlock(&devcd->mutex); + return; + put_device: +- put_device(&devcd->devcd_dev); + mutex_unlock(&devcd->mutex); ++ cancel_delayed_work_sync(&devcd->del_wk); ++ put_device(&devcd->devcd_dev); ++ + put_module: + module_put(owner); + free: diff --git a/queue-6.1/drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch b/queue-6.1/drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch new file mode 100644 index 0000000000..dec26dc69a --- /dev/null +++ b/queue-6.1/drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch @@ -0,0 +1,101 @@ +From stable+bounces-188336-greg=kroah.com@vger.kernel.org Tue Oct 21 15:13:55 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:12:50 -0400 +Subject: drm/sched: Fix potential double free in drm_sched_job_add_resv_dependencies +To: stable@vger.kernel.org +Cc: "Tvrtko Ursulin" , "Dan Carpenter" , "Christian König" , "Rob Clark" , "Daniel Vetter" , "Matthew Brost" , "Danilo Krummrich" , "Philipp Stanner" , "Christian König" , dri-devel@lists.freedesktop.org, "Sasha Levin" +Message-ID: <20251021131250.2072371-1-sashal@kernel.org> + +From: Tvrtko Ursulin + +[ Upstream commit 5801e65206b065b0b2af032f7f1eef222aa2fd83 ] + +When adding dependencies with drm_sched_job_add_dependency(), that +function consumes the fence reference both on success and failure, so in +the latter case the dma_fence_put() on the error path (xarray failed to +expand) is a double free. + +Interestingly this bug appears to have been present ever since +commit ebd5f74255b9 ("drm/sched: Add dependency tracking"), since the code +back then looked like this: + +drm_sched_job_add_implicit_dependencies(): +... + for (i = 0; i < fence_count; i++) { + ret = drm_sched_job_add_dependency(job, fences[i]); + if (ret) + break; + } + + for (; i < fence_count; i++) + dma_fence_put(fences[i]); + +Which means for the failing 'i' the dma_fence_put was already a double +free. Possibly there were no users at that time, or the test cases were +insufficient to hit it. + +The bug was then only noticed and fixed after +commit 9c2ba265352a ("drm/scheduler: use new iterator in drm_sched_job_add_implicit_dependencies v2") +landed, with its fixup of +commit 4eaf02d6076c ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies"). + +At that point it was a slightly different flavour of a double free, which +commit 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") +noticed and attempted to fix. + +But it only moved the double free from happening inside the +drm_sched_job_add_dependency(), when releasing the reference not yet +obtained, to the caller, when releasing the reference already released by +the former in the failure case. + +As such it is not easy to identify the right target for the fixes tag so +lets keep it simple and just continue the chain. + +While fixing we also improve the comment and explain the reason for taking +the reference and not dropping it. + +Signed-off-by: Tvrtko Ursulin +Fixes: 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/dri-devel/aNFbXq8OeYl3QSdm@stanley.mountain/ +Cc: Christian König +Cc: Rob Clark +Cc: Daniel Vetter +Cc: Matthew Brost +Cc: Danilo Krummrich +Cc: Philipp Stanner +Cc: Christian König +Cc: dri-devel@lists.freedesktop.org +Cc: stable@vger.kernel.org # v5.16+ +Signed-off-by: Philipp Stanner +Link: https://lore.kernel.org/r/20251015084015.6273-1-tvrtko.ursulin@igalia.com +[ applied to drm_sched_job_add_implicit_dependencies instead of drm_sched_job_add_resv_dependencies ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/scheduler/sched_main.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -719,13 +719,14 @@ int drm_sched_job_add_implicit_dependenc + + dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write), + fence) { +- /* Make sure to grab an additional ref on the added fence */ +- dma_fence_get(fence); +- ret = drm_sched_job_add_dependency(job, fence); +- if (ret) { +- dma_fence_put(fence); ++ /* ++ * As drm_sched_job_add_dependency always consumes the fence ++ * reference (even when it fails), and dma_resv_for_each_fence ++ * is not obtaining one, we need to grab one before calling. ++ */ ++ ret = drm_sched_job_add_dependency(job, dma_fence_get(fence)); ++ if (ret) + return ret; +- } + } + return 0; + } diff --git a/queue-6.1/ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch b/queue-6.1/ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch new file mode 100644 index 0000000000..e02c2a33db --- /dev/null +++ b/queue-6.1/ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch @@ -0,0 +1,74 @@ +From stable+bounces-188384-greg=kroah.com@vger.kernel.org Tue Oct 21 19:04:09 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 13:04:01 -0400 +Subject: ext4: avoid potential buffer over-read in parse_apply_sb_mount_options() +To: stable@vger.kernel.org +Cc: Theodore Ts'o , Jan Kara , "Darrick J. Wong" , Sasha Levin +Message-ID: <20251021170401.2401806-1-sashal@kernel.org> + +From: Theodore Ts'o + +[ Upstream commit 8ecb790ea8c3fc69e77bace57f14cf0d7c177bd8 ] + +Unlike other strings in the ext4 superblock, we rely on tune2fs to +make sure s_mount_opts is NUL terminated. Harden +parse_apply_sb_mount_options() by treating s_mount_opts as a potential +__nonstring. + +Cc: stable@vger.kernel.org +Fixes: 8b67f04ab9de ("ext4: Add mount options in superblock") +Reviewed-by: Jan Kara +Reviewed-by: Darrick J. Wong +Signed-off-by: Theodore Ts'o +Message-ID: <20250916-tune2fs-v2-1-d594dc7486f0@mit.edu> +Signed-off-by: Theodore Ts'o +[ added sizeof() third argument to strscpy_pad() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2415,7 +2415,7 @@ static int parse_apply_sb_mount_options( + struct ext4_fs_context *m_ctx) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +- char *s_mount_opts = NULL; ++ char s_mount_opts[65]; + struct ext4_fs_context *s_ctx = NULL; + struct fs_context *fc = NULL; + int ret = -ENOMEM; +@@ -2423,15 +2423,11 @@ static int parse_apply_sb_mount_options( + if (!sbi->s_es->s_mount_opts[0]) + return 0; + +- s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, +- sizeof(sbi->s_es->s_mount_opts), +- GFP_KERNEL); +- if (!s_mount_opts) +- return ret; ++ strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts, sizeof(s_mount_opts)); + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) +- goto out_free; ++ return -ENOMEM; + + s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); + if (!s_ctx) +@@ -2463,11 +2459,8 @@ parse_failed: + ret = 0; + + out_free: +- if (fc) { +- ext4_fc_free(fc); +- kfree(fc); +- } +- kfree(s_mount_opts); ++ ext4_fc_free(fc); ++ kfree(fc); + return ret; + } + diff --git a/queue-6.1/f2fs-add-a-f2fs_get_block_locked-helper.patch b/queue-6.1/f2fs-add-a-f2fs_get_block_locked-helper.patch new file mode 100644 index 0000000000..de17eaf149 --- /dev/null +++ b/queue-6.1/f2fs-add-a-f2fs_get_block_locked-helper.patch @@ -0,0 +1,99 @@ +From stable+bounces-188251-greg=kroah.com@vger.kernel.org Mon Oct 20 22:51:37 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 16:51:25 -0400 +Subject: f2fs: add a f2fs_get_block_locked helper +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Chao Yu , Jaegeuk Kim , Sasha Levin +Message-ID: <20251020205128.1912678-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit cf342d3beda000b4c60990755ca7800de5038785 ] + +This allows to keep the f2fs_do_map_lock based locking scheme +private to data.c. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 9d5c4f5c7a2c ("f2fs: fix wrong block mapping for multi-devices") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 16 ++++++++++++++-- + fs/f2fs/f2fs.h | 3 +-- + fs/f2fs/file.c | 4 +--- + 3 files changed, 16 insertions(+), 7 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1192,7 +1192,7 @@ int f2fs_reserve_block(struct dnode_of_d + return err; + } + +-int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) ++static int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) + { + struct extent_info ei = {0, }; + struct inode *inode = dn->inode; +@@ -1432,7 +1432,7 @@ static int __allocate_data_block(struct + return 0; + } + +-void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) ++static void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) + { + if (flag == F2FS_GET_BLOCK_PRE_AIO) { + if (lock) +@@ -1447,6 +1447,18 @@ void f2fs_do_map_lock(struct f2fs_sb_inf + } + } + ++int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) ++{ ++ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); ++ int err; ++ ++ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); ++ err = f2fs_get_block(dn, index); ++ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); ++ ++ return err; ++} ++ + /* + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -3783,7 +3783,7 @@ void f2fs_set_data_blkaddr(struct dnode_ + void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); + int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); + int f2fs_reserve_new_block(struct dnode_of_data *dn); +-int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); ++int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index); + int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); + struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +@@ -3794,7 +3794,6 @@ struct page *f2fs_get_lock_data_page(str + struct page *f2fs_get_new_data_page(struct inode *inode, + struct page *ipage, pgoff_t index, bool new_i_size); + int f2fs_do_write_data_page(struct f2fs_io_info *fio); +-void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); + int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, + int create, int flag); + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -113,10 +113,8 @@ static vm_fault_t f2fs_vm_page_mkwrite(s + + if (need_alloc) { + /* block allocation */ +- f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); +- err = f2fs_get_block(&dn, page->index); +- f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); ++ err = f2fs_get_block_locked(&dn, page->index); + } + + #ifdef CONFIG_F2FS_FS_COMPRESSION diff --git a/queue-6.1/f2fs-factor-a-f2fs_map_blocks_cached-helper.patch b/queue-6.1/f2fs-factor-a-f2fs_map_blocks_cached-helper.patch new file mode 100644 index 0000000000..e160a6c24e --- /dev/null +++ b/queue-6.1/f2fs-factor-a-f2fs_map_blocks_cached-helper.patch @@ -0,0 +1,120 @@ +From stable+bounces-188253-greg=kroah.com@vger.kernel.org Mon Oct 20 22:51:39 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 16:51:27 -0400 +Subject: f2fs: factor a f2fs_map_blocks_cached helper +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Chao Yu , Jaegeuk Kim , Sasha Levin +Message-ID: <20251020205128.1912678-3-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 0094e98bd1477a6b7d97c25b47b19a7317c35279 ] + +Add a helper to deal with everything needed to return a f2fs_map_blocks +structure based on a lookup in the extent cache. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 9d5c4f5c7a2c ("f2fs: fix wrong block mapping for multi-devices") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 65 +++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 38 insertions(+), 27 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1459,6 +1459,41 @@ int f2fs_get_block_locked(struct dnode_o + return err; + } + ++static bool f2fs_map_blocks_cached(struct inode *inode, ++ struct f2fs_map_blocks *map, int flag) ++{ ++ struct f2fs_sb_info *sbi = F2FS_I_SB(inode); ++ unsigned int maxblocks = map->m_len; ++ pgoff_t pgoff = (pgoff_t)map->m_lblk; ++ struct extent_info ei = {}; ++ ++ if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei)) ++ return false; ++ ++ map->m_pblk = ei.blk + pgoff - ei.fofs; ++ map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff); ++ map->m_flags = F2FS_MAP_MAPPED; ++ if (map->m_next_extent) ++ *map->m_next_extent = pgoff + map->m_len; ++ ++ /* for hardware encryption, but to avoid potential issue in future */ ++ if (flag == F2FS_GET_BLOCK_DIO) ++ f2fs_wait_on_block_writeback_range(inode, ++ map->m_pblk, map->m_len); ++ ++ if (f2fs_allow_multi_device_dio(sbi, flag)) { ++ int bidx = f2fs_target_device_index(sbi, map->m_pblk); ++ struct f2fs_dev_info *dev = &sbi->devs[bidx]; ++ ++ map->m_bdev = dev->bdev; ++ map->m_pblk -= dev->start_blk; ++ map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); ++ } else { ++ map->m_bdev = inode->i_sb->s_bdev; ++ } ++ return true; ++} ++ + /* + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such +@@ -1474,7 +1509,6 @@ int f2fs_map_blocks(struct inode *inode, + int err = 0, ofs = 1; + unsigned int ofs_in_node, last_ofs_in_node; + blkcnt_t prealloc; +- struct extent_info ei = {0, }; + block_t blkaddr; + unsigned int start_pgofs; + int bidx = 0; +@@ -1482,6 +1516,9 @@ int f2fs_map_blocks(struct inode *inode, + if (!maxblocks) + return 0; + ++ if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) ++ goto out; ++ + map->m_bdev = inode->i_sb->s_bdev; + map->m_multidev_dio = + f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); +@@ -1493,32 +1530,6 @@ int f2fs_map_blocks(struct inode *inode, + pgofs = (pgoff_t)map->m_lblk; + end = pgofs + maxblocks; + +- if (map->m_may_create || +- !f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) +- goto next_dnode; +- +- /* Found the map in read extent cache */ +- map->m_pblk = ei.blk + pgofs - ei.fofs; +- map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); +- map->m_flags = F2FS_MAP_MAPPED; +- if (map->m_next_extent) +- *map->m_next_extent = pgofs + map->m_len; +- +- /* for hardware encryption, but to avoid potential issue in future */ +- if (flag == F2FS_GET_BLOCK_DIO) +- f2fs_wait_on_block_writeback_range(inode, +- map->m_pblk, map->m_len); +- +- if (map->m_multidev_dio) { +- bidx = f2fs_target_device_index(sbi, map->m_pblk); +- +- map->m_bdev = FDEV(bidx).bdev; +- map->m_pblk -= FDEV(bidx).start_blk; +- map->m_len = min(map->m_len, +- FDEV(bidx).end_blk + 1 - map->m_pblk); +- } +- goto out; +- + next_dnode: + if (map->m_may_create) + f2fs_do_map_lock(sbi, flag, true); diff --git a/queue-6.1/f2fs-fix-wrong-block-mapping-for-multi-devices.patch b/queue-6.1/f2fs-fix-wrong-block-mapping-for-multi-devices.patch new file mode 100644 index 0000000000..05fd2d9cb7 --- /dev/null +++ b/queue-6.1/f2fs-fix-wrong-block-mapping-for-multi-devices.patch @@ -0,0 +1,59 @@ +From stable+bounces-188254-greg=kroah.com@vger.kernel.org Mon Oct 20 22:51:43 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 16:51:28 -0400 +Subject: f2fs: fix wrong block mapping for multi-devices +To: stable@vger.kernel.org +Cc: Jaegeuk Kim , Chao Yu , Sasha Levin +Message-ID: <20251020205128.1912678-4-sashal@kernel.org> + +From: Jaegeuk Kim + +[ Upstream commit 9d5c4f5c7a2c7677e1b3942772122b032c265aae ] + +Assuming the disk layout as below, + +disk0: 0 --- 0x00035abfff +disk1: 0x00035ac000 --- 0x00037abfff +disk2: 0x00037ac000 --- 0x00037ebfff + +and we want to read data from offset=13568 having len=128 across the block +devices, we can illustrate the block addresses like below. + +0 .. 0x00037ac000 ------------------- 0x00037ebfff, 0x00037ec000 ------- + | ^ ^ ^ + | fofs 0 13568 13568+128 + | ------------------------------------------------------ + | LBA 0x37e8aa9 0x37ebfa9 0x37ec029 + --- map 0x3caa9 0x3ffa9 + +In this example, we should give the relative map of the target block device +ranging from 0x3caa9 to 0x3ffa9 where the length should be calculated by +0x37ebfff + 1 - 0x37ebfa9. + +In the below equation, however, map->m_pblk was supposed to be the original +address instead of the one from the target block address. + + - map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); + +Cc: stable@vger.kernel.org +Fixes: 71f2c8206202 ("f2fs: multidevice: support direct IO") +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1486,8 +1486,8 @@ static bool f2fs_map_blocks_cached(struc + struct f2fs_dev_info *dev = &sbi->devs[bidx]; + + map->m_bdev = dev->bdev; +- map->m_pblk -= dev->start_blk; + map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); ++ map->m_pblk -= dev->start_blk; + } else { + map->m_bdev = inode->i_sb->s_bdev; + } diff --git a/queue-6.1/f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch b/queue-6.1/f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch new file mode 100644 index 0000000000..7422585053 --- /dev/null +++ b/queue-6.1/f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch @@ -0,0 +1,288 @@ +From stable+bounces-188252-greg=kroah.com@vger.kernel.org Mon Oct 20 22:51:38 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 16:51:26 -0400 +Subject: f2fs: remove the create argument to f2fs_map_blocks +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Chao Yu , Jaegeuk Kim , Sasha Levin +Message-ID: <20251020205128.1912678-2-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit cd8fc5226bef3a1fda13a0e61794a039ca46744a ] + +The create argument is always identicaly to map->m_may_create, so use +that consistently. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 9d5c4f5c7a2c ("f2fs: fix wrong block mapping for multi-devices") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 65 ++++++++++++++++++-------------------------- + fs/f2fs/f2fs.h | 3 -- + fs/f2fs/file.c | 12 ++++---- + include/trace/events/f2fs.h | 11 ++----- + 4 files changed, 39 insertions(+), 52 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -1464,8 +1464,7 @@ int f2fs_get_block_locked(struct dnode_o + * maps continuous logical blocks to physical blocks, and return such + * info via f2fs_map_blocks structure. + */ +-int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, +- int create, int flag) ++int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) + { + unsigned int maxblocks = map->m_len; + struct dnode_of_data dn; +@@ -1494,38 +1493,31 @@ int f2fs_map_blocks(struct inode *inode, + pgofs = (pgoff_t)map->m_lblk; + end = pgofs + maxblocks; + +- if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) { +- if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && +- map->m_may_create) +- goto next_dnode; +- +- map->m_pblk = ei.blk + pgofs - ei.fofs; +- map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); +- map->m_flags = F2FS_MAP_MAPPED; +- if (map->m_next_extent) +- *map->m_next_extent = pgofs + map->m_len; ++ if (map->m_may_create || ++ !f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) ++ goto next_dnode; ++ ++ /* Found the map in read extent cache */ ++ map->m_pblk = ei.blk + pgofs - ei.fofs; ++ map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); ++ map->m_flags = F2FS_MAP_MAPPED; ++ if (map->m_next_extent) ++ *map->m_next_extent = pgofs + map->m_len; + +- /* for hardware encryption, but to avoid potential issue in future */ +- if (flag == F2FS_GET_BLOCK_DIO) +- f2fs_wait_on_block_writeback_range(inode, ++ /* for hardware encryption, but to avoid potential issue in future */ ++ if (flag == F2FS_GET_BLOCK_DIO) ++ f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + +- if (map->m_multidev_dio) { +- block_t blk_addr = map->m_pblk; +- +- bidx = f2fs_target_device_index(sbi, map->m_pblk); ++ if (map->m_multidev_dio) { ++ bidx = f2fs_target_device_index(sbi, map->m_pblk); + +- map->m_bdev = FDEV(bidx).bdev; +- map->m_pblk -= FDEV(bidx).start_blk; +- map->m_len = min(map->m_len, ++ map->m_bdev = FDEV(bidx).bdev; ++ map->m_pblk -= FDEV(bidx).start_blk; ++ map->m_len = min(map->m_len, + FDEV(bidx).end_blk + 1 - map->m_pblk); +- +- if (map->m_may_create) +- f2fs_update_device_state(sbi, inode->i_ino, +- blk_addr, map->m_len); +- } +- goto out; + } ++ goto out; + + next_dnode: + if (map->m_may_create) +@@ -1589,7 +1581,7 @@ next_block: + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { +- if (create) { ++ if (map->m_may_create) { + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto sync_out; +@@ -1764,7 +1756,7 @@ unlock_out: + f2fs_balance_fs(sbi, dn.node_changed); + } + out: +- trace_f2fs_map_blocks(inode, map, create, flag, err); ++ trace_f2fs_map_blocks(inode, map, flag, err); + return err; + } + +@@ -1786,7 +1778,7 @@ bool f2fs_overwrite_io(struct inode *ino + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; +- err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; +@@ -1960,7 +1952,7 @@ next: + map.m_len = cluster_size - count_in_cluster; + } + +- ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); ++ ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); + if (ret) + goto out; + +@@ -2093,7 +2085,7 @@ static int f2fs_read_single_page(struct + map->m_lblk = block_in_file; + map->m_len = last_block - block_in_file; + +- ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); ++ ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT); + if (ret) + goto out; + got_it: +@@ -3850,7 +3842,7 @@ static sector_t f2fs_bmap(struct address + map.m_next_pgofs = NULL; + map.m_seg_type = NO_CHECK_TYPE; + +- if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP)) ++ if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP)) + blknr = map.m_pblk; + } + out: +@@ -3958,7 +3950,7 @@ retry: + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; + +- ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); ++ ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); + if (ret) + goto out; + +@@ -4187,8 +4179,7 @@ static int f2fs_iomap_begin(struct inode + if (flags & IOMAP_WRITE) + map.m_may_create = true; + +- err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE, +- F2FS_GET_BLOCK_DIO); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); + if (err) + return err; + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -3794,8 +3794,7 @@ struct page *f2fs_get_lock_data_page(str + struct page *f2fs_get_new_data_page(struct inode *inode, + struct page *ipage, pgoff_t index, bool new_i_size); + int f2fs_do_write_data_page(struct f2fs_io_info *fio); +-int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, +- int create, int flag); ++int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag); + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); + int f2fs_encrypt_one_page(struct f2fs_io_info *fio); +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -1800,7 +1800,7 @@ next_alloc: + f2fs_unlock_op(sbi); + + map.m_seg_type = CURSEG_COLD_DATA_PINNED; +- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); + file_dont_truncate(inode); + + f2fs_up_write(&sbi->pin_sem); +@@ -1813,7 +1813,7 @@ next_alloc: + + map.m_len = expanded; + } else { +- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); + expanded = map.m_len; + } + out_err: +@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct + */ + while (map.m_lblk < pg_end) { + map.m_len = pg_end - map.m_lblk; +- err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); + if (err) + goto out; + +@@ -2757,7 +2757,7 @@ static int f2fs_defragment_range(struct + + do_map: + map.m_len = pg_end - map.m_lblk; +- err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); + if (err) + goto clear_out; + +@@ -3352,7 +3352,7 @@ int f2fs_precache_extents(struct inode * + map.m_len = end - map.m_lblk; + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); +- err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); ++ err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (err) + return err; +@@ -4635,7 +4635,7 @@ static int f2fs_preallocate_blocks(struc + flag = F2FS_GET_BLOCK_PRE_AIO; + } + +- ret = f2fs_map_blocks(inode, &map, 1, flag); ++ ret = f2fs_map_blocks(inode, &map, flag); + /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ + if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) + return ret; +--- a/include/trace/events/f2fs.h ++++ b/include/trace/events/f2fs.h +@@ -564,10 +564,10 @@ TRACE_EVENT(f2fs_file_write_iter, + ); + + TRACE_EVENT(f2fs_map_blocks, +- TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, +- int create, int flag, int ret), ++ TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int flag, ++ int ret), + +- TP_ARGS(inode, map, create, flag, ret), ++ TP_ARGS(inode, map, flag, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) +@@ -579,7 +579,6 @@ TRACE_EVENT(f2fs_map_blocks, + __field(int, m_seg_type) + __field(bool, m_may_create) + __field(bool, m_multidev_dio) +- __field(int, create) + __field(int, flag) + __field(int, ret) + ), +@@ -594,7 +593,6 @@ TRACE_EVENT(f2fs_map_blocks, + __entry->m_seg_type = map->m_seg_type; + __entry->m_may_create = map->m_may_create; + __entry->m_multidev_dio = map->m_multidev_dio; +- __entry->create = create; + __entry->flag = flag; + __entry->ret = ret; + ), +@@ -602,7 +600,7 @@ TRACE_EVENT(f2fs_map_blocks, + TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " + "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, " + "seg_type = %d, may_create = %d, multidevice = %d, " +- "create = %d, flag = %d, err = %d", ++ "flag = %d, err = %d", + show_dev_ino(__entry), + (unsigned long long)__entry->m_lblk, + (unsigned long long)__entry->m_pblk, +@@ -611,7 +609,6 @@ TRACE_EVENT(f2fs_map_blocks, + __entry->m_seg_type, + __entry->m_may_create, + __entry->m_multidev_dio, +- __entry->create, + __entry->flag, + __entry->ret) + ); diff --git a/queue-6.1/fuse-allocate-ff-release_args-only-if-release-is-needed.patch b/queue-6.1/fuse-allocate-ff-release_args-only-if-release-is-needed.patch new file mode 100644 index 0000000000..12d4f11b2f --- /dev/null +++ b/queue-6.1/fuse-allocate-ff-release_args-only-if-release-is-needed.patch @@ -0,0 +1,241 @@ +From stable+bounces-188166-greg=kroah.com@vger.kernel.org Mon Oct 20 18:05:08 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:02:31 -0400 +Subject: fuse: allocate ff->release_args only if release is needed +To: stable@vger.kernel.org +Cc: Amir Goldstein , Miklos Szeredi , Sasha Levin +Message-ID: <20251020160232.1828501-1-sashal@kernel.org> + +From: Amir Goldstein + +[ Upstream commit e26ee4efbc79610b20e7abe9d96c87f33dacc1ff ] + +This removed the need to pass isdir argument to fuse_put_file(). + +Signed-off-by: Amir Goldstein +Signed-off-by: Miklos Szeredi +Stable-dep-of: 26e5c67deb2e ("fuse: fix livelock in synchronous file put from fuseblk workers") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dir.c | 2 - + fs/fuse/file.c | 69 +++++++++++++++++++++++++++++++------------------------ + fs/fuse/fuse_i.h | 2 - + 3 files changed, 41 insertions(+), 32 deletions(-) + +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -584,7 +584,7 @@ static int fuse_create_open(struct inode + goto out_err; + + err = -ENOMEM; +- ff = fuse_file_alloc(fm); ++ ff = fuse_file_alloc(fm, true); + if (!ff) + goto out_put_forget_req; + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -54,7 +54,7 @@ struct fuse_release_args { + struct inode *inode; + }; + +-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) ++struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release) + { + struct fuse_file *ff; + +@@ -63,11 +63,13 @@ struct fuse_file *fuse_file_alloc(struct + return NULL; + + ff->fm = fm; +- ff->release_args = kzalloc(sizeof(*ff->release_args), +- GFP_KERNEL_ACCOUNT); +- if (!ff->release_args) { +- kfree(ff); +- return NULL; ++ if (release) { ++ ff->release_args = kzalloc(sizeof(*ff->release_args), ++ GFP_KERNEL_ACCOUNT); ++ if (!ff->release_args) { ++ kfree(ff); ++ return NULL; ++ } + } + + INIT_LIST_HEAD(&ff->write_entry); +@@ -103,14 +105,14 @@ static void fuse_release_end(struct fuse + kfree(ra); + } + +-static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) ++static void fuse_file_put(struct fuse_file *ff, bool sync) + { + if (refcount_dec_and_test(&ff->count)) { +- struct fuse_args *args = &ff->release_args->args; ++ struct fuse_release_args *ra = ff->release_args; ++ struct fuse_args *args = (ra ? &ra->args : NULL); + +- if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { +- /* Do nothing when client does not implement 'open' */ +- fuse_release_end(ff->fm, args, 0); ++ if (!args) { ++ /* Do nothing when server does not implement 'open' */ + } else if (sync) { + fuse_simple_request(ff->fm, args); + fuse_release_end(ff->fm, args, 0); +@@ -130,15 +132,16 @@ struct fuse_file *fuse_file_open(struct + struct fuse_conn *fc = fm->fc; + struct fuse_file *ff; + int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ++ bool open = isdir ? !fc->no_opendir : !fc->no_open; + +- ff = fuse_file_alloc(fm); ++ ff = fuse_file_alloc(fm, open); + if (!ff) + return ERR_PTR(-ENOMEM); + + ff->fh = 0; + /* Default for no-open */ + ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0); +- if (isdir ? !fc->no_opendir : !fc->no_open) { ++ if (open) { + struct fuse_open_out outarg; + int err; + +@@ -146,11 +149,13 @@ struct fuse_file *fuse_file_open(struct + if (!err) { + ff->fh = outarg.fh; + ff->open_flags = outarg.open_flags; +- + } else if (err != -ENOSYS) { + fuse_file_free(ff); + return ERR_PTR(err); + } else { ++ /* No release needed */ ++ kfree(ff->release_args); ++ ff->release_args = NULL; + if (isdir) + fc->no_opendir = 1; + else +@@ -272,7 +277,7 @@ out_inode_unlock: + } + + static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, +- unsigned int flags, int opcode) ++ unsigned int flags, int opcode, bool sync) + { + struct fuse_conn *fc = ff->fm->fc; + struct fuse_release_args *ra = ff->release_args; +@@ -290,6 +295,9 @@ static void fuse_prepare_release(struct + + wake_up_interruptible_all(&ff->poll_wait); + ++ if (!ra) ++ return; ++ + ra->inarg.fh = ff->fh; + ra->inarg.flags = flags; + ra->args.in_numargs = 1; +@@ -299,6 +307,13 @@ static void fuse_prepare_release(struct + ra->args.nodeid = ff->nodeid; + ra->args.force = true; + ra->args.nocreds = true; ++ ++ /* ++ * Hold inode until release is finished. ++ * From fuse_sync_release() the refcount is 1 and everything's ++ * synchronous, so we are fine with not doing igrab() here. ++ */ ++ ra->inode = sync ? NULL : igrab(&fi->inode); + } + + void fuse_file_release(struct inode *inode, struct fuse_file *ff, +@@ -308,14 +323,12 @@ void fuse_file_release(struct inode *ino + struct fuse_release_args *ra = ff->release_args; + int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; + +- fuse_prepare_release(fi, ff, open_flags, opcode); ++ fuse_prepare_release(fi, ff, open_flags, opcode, false); + +- if (ff->flock) { ++ if (ra && ff->flock) { + ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id); + } +- /* Hold inode until release is finished */ +- ra->inode = igrab(inode); + + /* + * Normally this will send the RELEASE request, however if +@@ -326,7 +339,7 @@ void fuse_file_release(struct inode *ino + * synchronous RELEASE is allowed (and desirable) in this case + * because the server can be trusted not to screw up. + */ +- fuse_file_put(ff, ff->fm->fc->destroy, isdir); ++ fuse_file_put(ff, ff->fm->fc->destroy); + } + + void fuse_release_common(struct file *file, bool isdir) +@@ -361,12 +374,8 @@ void fuse_sync_release(struct fuse_inode + unsigned int flags) + { + WARN_ON(refcount_read(&ff->count) > 1); +- fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); +- /* +- * iput(NULL) is a no-op and since the refcount is 1 and everything's +- * synchronous, we are fine with not doing igrab() here" +- */ +- fuse_file_put(ff, true, false); ++ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true); ++ fuse_file_put(ff, true); + } + EXPORT_SYMBOL_GPL(fuse_sync_release); + +@@ -923,7 +932,7 @@ static void fuse_readpages_end(struct fu + put_page(page); + } + if (ia->ff) +- fuse_file_put(ia->ff, false, false); ++ fuse_file_put(ia->ff, false); + + fuse_io_free(ia); + } +@@ -1670,7 +1679,7 @@ static void fuse_writepage_free(struct f + __free_page(ap->pages[i]); + + if (wpa->ia.ff) +- fuse_file_put(wpa->ia.ff, false, false); ++ fuse_file_put(wpa->ia.ff, false); + + kfree(ap->pages); + kfree(wpa); +@@ -1918,7 +1927,7 @@ int fuse_write_inode(struct inode *inode + ff = __fuse_write_file_get(fi); + err = fuse_flush_times(inode, ff); + if (ff) +- fuse_file_put(ff, false, false); ++ fuse_file_put(ff, false); + + return err; + } +@@ -2316,7 +2325,7 @@ static int fuse_writepages(struct addres + fuse_writepages_send(&data); + } + if (data.ff) +- fuse_file_put(data.ff, false, false); ++ fuse_file_put(data.ff, false); + + kfree(data.orig_pages); + out: +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -1022,7 +1022,7 @@ void fuse_read_args_fill(struct fuse_io_ + */ + int fuse_open_common(struct inode *inode, struct file *file, bool isdir); + +-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm); ++struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release); + void fuse_file_free(struct fuse_file *ff); + void fuse_finish_open(struct inode *inode, struct file *file); + diff --git a/queue-6.1/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch b/queue-6.1/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch new file mode 100644 index 0000000000..1f237ced84 --- /dev/null +++ b/queue-6.1/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch @@ -0,0 +1,94 @@ +From stable+bounces-188167-greg=kroah.com@vger.kernel.org Mon Oct 20 18:05:11 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:02:32 -0400 +Subject: fuse: fix livelock in synchronous file put from fuseblk workers +To: stable@vger.kernel.org +Cc: "Darrick J. Wong" , Miklos Szeredi , Sasha Levin +Message-ID: <20251020160232.1828501-2-sashal@kernel.org> + +From: "Darrick J. Wong" + +[ Upstream commit 26e5c67deb2e1f42a951f022fdf5b9f7eb747b01 ] + +I observed a hang when running generic/323 against a fuseblk server. +This test opens a file, initiates a lot of AIO writes to that file +descriptor, and closes the file descriptor before the writes complete. +Unsurprisingly, the AIO exerciser threads are mostly stuck waiting for +responses from the fuseblk server: + +# cat /proc/372265/task/372313/stack +[<0>] request_wait_answer+0x1fe/0x2a0 [fuse] +[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse] +[<0>] fuse_do_getattr+0xfc/0x1f0 [fuse] +[<0>] fuse_file_read_iter+0xbe/0x1c0 [fuse] +[<0>] aio_read+0x130/0x1e0 +[<0>] io_submit_one+0x542/0x860 +[<0>] __x64_sys_io_submit+0x98/0x1a0 +[<0>] do_syscall_64+0x37/0xf0 +[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +But the /weird/ part is that the fuseblk server threads are waiting for +responses from itself: + +# cat /proc/372210/task/372232/stack +[<0>] request_wait_answer+0x1fe/0x2a0 [fuse] +[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse] +[<0>] fuse_file_put+0x9a/0xd0 [fuse] +[<0>] fuse_release+0x36/0x50 [fuse] +[<0>] __fput+0xec/0x2b0 +[<0>] task_work_run+0x55/0x90 +[<0>] syscall_exit_to_user_mode+0xe9/0x100 +[<0>] do_syscall_64+0x43/0xf0 +[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +The fuseblk server is fuse2fs so there's nothing all that exciting in +the server itself. So why is the fuse server calling fuse_file_put? +The commit message for the fstest sheds some light on that: + +"By closing the file descriptor before calling io_destroy, you pretty +much guarantee that the last put on the ioctx will be done in interrupt +context (during I/O completion). + +Aha. AIO fgets a new struct file from the fd when it queues the ioctx. +The completion of the FUSE_WRITE command from userspace causes the fuse +server to call the AIO completion function. The completion puts the +struct file, queuing a delayed fput to the fuse server task. When the +fuse server task returns to userspace, it has to run the delayed fput, +which in the case of a fuseblk server, it does synchronously. + +Sending the FUSE_RELEASE command sychronously from fuse server threads +is a bad idea because a client program can initiate enough simultaneous +AIOs such that all the fuse server threads end up in delayed_fput, and +now there aren't any threads left to handle the queued fuse commands. + +Fix this by only using asynchronous fputs when closing files, and leave +a comment explaining why. + +Cc: stable@vger.kernel.org # v2.6.38 +Fixes: 5a18ec176c934c ("fuse: fix hang of single threaded fuseblk filesystem") +Signed-off-by: Darrick J. Wong +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/file.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -338,8 +338,14 @@ void fuse_file_release(struct inode *ino + * Make the release synchronous if this is a fuseblk mount, + * synchronous RELEASE is allowed (and desirable) in this case + * because the server can be trusted not to screw up. ++ * ++ * Always use the asynchronous file put because the current thread ++ * might be the fuse server. This can happen if a process starts some ++ * aio and closes the fd before the aio completes. Since aio takes its ++ * own ref to the file, the IO completion has to drop the ref, which is ++ * how the fuse server can end up closing its clients' files. + */ +- fuse_file_put(ff, ff->fm->fc->destroy); ++ fuse_file_put(ff, false); + } + + void fuse_release_common(struct file *file, bool isdir) diff --git a/queue-6.1/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch b/queue-6.1/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch new file mode 100644 index 0000000000..87266cbada --- /dev/null +++ b/queue-6.1/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch @@ -0,0 +1,66 @@ +From stable+bounces-188112-greg=kroah.com@vger.kernel.org Mon Oct 20 15:18:31 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:09:40 -0400 +Subject: iio: imu: inv_icm42600: Avoid configuring if already pm_runtime suspended +To: stable@vger.kernel.org +Cc: Sean Nyekjaer , Stable@vger.kernel.org, Jonathan Cameron , Sasha Levin +Message-ID: <20251020130940.1767272-2-sashal@kernel.org> + +From: Sean Nyekjaer + +[ Upstream commit 466f7a2fef2a4e426f809f79845a1ec1aeb558f4 ] + +Do as in suspend, skip resume configuration steps if the device is already +pm_runtime suspended. This avoids reconfiguring a device that is already +in the correct low-power state and ensures that pm_runtime handles the +power state transitions properly. + +Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") +Signed-off-by: Sean Nyekjaer +Link: https://patch.msgid.link/20250901-icm42pmreg-v3-3-ef1336246960@geanix.com +Cc: +Signed-off-by: Jonathan Cameron +[ adjusted context for suspend/resume functions lacking APEX/wakeup support ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +@@ -670,17 +670,15 @@ EXPORT_SYMBOL_GPL(inv_icm42600_core_prob + static int __maybe_unused inv_icm42600_suspend(struct device *dev) + { + struct inv_icm42600_state *st = dev_get_drvdata(dev); +- int ret; ++ int ret = 0; + + mutex_lock(&st->lock); + + st->suspended.gyro = st->conf.gyro.mode; + st->suspended.accel = st->conf.accel.mode; + st->suspended.temp = st->conf.temp_en; +- if (pm_runtime_suspended(dev)) { +- ret = 0; ++ if (pm_runtime_suspended(dev)) + goto out_unlock; +- } + + /* disable FIFO data streaming */ + if (st->fifo.on) { +@@ -712,10 +710,13 @@ static int __maybe_unused inv_icm42600_r + struct inv_icm42600_state *st = dev_get_drvdata(dev); + struct inv_icm42600_timestamp *gyro_ts = iio_priv(st->indio_gyro); + struct inv_icm42600_timestamp *accel_ts = iio_priv(st->indio_accel); +- int ret; ++ int ret = 0; + + mutex_lock(&st->lock); + ++ if (pm_runtime_suspended(dev)) ++ goto out_unlock; ++ + ret = inv_icm42600_enable_regulator_vddio(st); + if (ret) + goto out_unlock; diff --git a/queue-6.1/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch b/queue-6.1/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch new file mode 100644 index 0000000000..184a30eb46 --- /dev/null +++ b/queue-6.1/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch @@ -0,0 +1,86 @@ +From stable+bounces-188099-greg=kroah.com@vger.kernel.org Mon Oct 20 15:11:16 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:03:48 -0400 +Subject: iio: imu: inv_icm42600: Simplify pm_runtime setup +To: stable@vger.kernel.org +Cc: Sean Nyekjaer , Stable@vger.kernel.org, Jonathan Cameron , Sasha Levin +Message-ID: <20251020130348.1764406-2-sashal@kernel.org> + +From: Sean Nyekjaer + +[ Upstream commit 0792c1984a45ccd7a296d6b8cb78088bc99a212e ] + +Rework the power management in inv_icm42600_core_probe() to use +devm_pm_runtime_set_active_enabled(), which simplifies the runtime PM +setup by handling activation and enabling in one step. +Remove the separate inv_icm42600_disable_pm callback, as it's no longer +needed with the devm-managed approach. +Using devm_pm_runtime_enable() also fixes the missing disable of +autosuspend. +Update inv_icm42600_disable_vddio_reg() to only disable the regulator if +the device is not suspended i.e. powered-down, preventing unbalanced +disables. +Also remove redundant error msg on regulator_disable(), the regulator +framework already emits an error message when regulator_disable() fails. + +This simplifies the PM setup and avoids manipulating the usage counter +unnecessarily. + +Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") +Signed-off-by: Sean Nyekjaer +Link: https://patch.msgid.link/20250901-icm42pmreg-v3-1-ef1336246960@geanix.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 24 ++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +@@ -550,20 +550,12 @@ static void inv_icm42600_disable_vdd_reg + static void inv_icm42600_disable_vddio_reg(void *_data) + { + struct inv_icm42600_state *st = _data; +- const struct device *dev = regmap_get_device(st->map); +- int ret; +- +- ret = regulator_disable(st->vddio_supply); +- if (ret) +- dev_err(dev, "failed to disable vddio error %d\n", ret); +-} ++ struct device *dev = regmap_get_device(st->map); + +-static void inv_icm42600_disable_pm(void *_data) +-{ +- struct device *dev = _data; ++ if (pm_runtime_status_suspended(dev)) ++ return; + +- pm_runtime_put_sync(dev); +- pm_runtime_disable(dev); ++ regulator_disable(st->vddio_supply); + } + + int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, +@@ -660,16 +652,14 @@ int inv_icm42600_core_probe(struct regma + return ret; + + /* setup runtime power management */ +- ret = pm_runtime_set_active(dev); ++ ret = devm_pm_runtime_set_active_enabled(dev); + if (ret) + return ret; +- pm_runtime_get_noresume(dev); +- pm_runtime_enable(dev); ++ + pm_runtime_set_autosuspend_delay(dev, INV_ICM42600_SUSPEND_DELAY_MS); + pm_runtime_use_autosuspend(dev); +- pm_runtime_put(dev); + +- return devm_add_action_or_reset(dev, inv_icm42600_disable_pm, dev); ++ return ret; + } + EXPORT_SYMBOL_GPL(inv_icm42600_core_probe); + diff --git a/queue-6.1/iio-imu-inv_icm42600-use-instead-of-memset.patch b/queue-6.1/iio-imu-inv_icm42600-use-instead-of-memset.patch new file mode 100644 index 0000000000..096b7c3c11 --- /dev/null +++ b/queue-6.1/iio-imu-inv_icm42600-use-instead-of-memset.patch @@ -0,0 +1,70 @@ +From stable+bounces-188111-greg=kroah.com@vger.kernel.org Mon Oct 20 15:10:13 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:09:39 -0400 +Subject: iio: imu: inv_icm42600: use = { } instead of memset() +To: stable@vger.kernel.org +Cc: "David Lechner" , "Nuno Sá" , "Andy Shevchenko" , "Jonathan Cameron" , "Sasha Levin" +Message-ID: <20251020130940.1767272-1-sashal@kernel.org> + +From: David Lechner + +[ Upstream commit 352112e2d9aab6a156c2803ae14eb89a9fd93b7d ] + +Use { } instead of memset() to zero-initialize stack memory to simplify +the code. + +Signed-off-by: David Lechner +Reviewed-by: Nuno Sá +Reviewed-by: Andy Shevchenko +Link: https://patch.msgid.link/20250611-iio-zero-init-stack-with-instead-of-memset-v1-16-ebb2d0a24302@baylibre.com +Signed-off-by: Jonathan Cameron +Stable-dep-of: 466f7a2fef2a ("iio: imu: inv_icm42600: Avoid configuring if already pm_runtime suspended") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c | 5 ++--- + drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c | 5 ++--- + 2 files changed, 4 insertions(+), 6 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +@@ -748,7 +748,8 @@ int inv_icm42600_accel_parse_fifo(struct + const int8_t *temp; + unsigned int odr; + int64_t ts_val; +- struct inv_icm42600_accel_buffer buffer; ++ /* buffer is copied to userspace, zeroing it to avoid any data leak */ ++ struct inv_icm42600_accel_buffer buffer = { }; + + /* parse all fifo packets */ + for (i = 0, no = 0; i < st->fifo.count; i += size, ++no) { +@@ -767,8 +768,6 @@ int inv_icm42600_accel_parse_fifo(struct + inv_icm42600_timestamp_apply_odr(ts, st->fifo.period, + st->fifo.nb.total, no); + +- /* buffer is copied to userspace, zeroing it to avoid any data leak */ +- memset(&buffer, 0, sizeof(buffer)); + memcpy(&buffer.accel, accel, sizeof(buffer.accel)); + /* convert 8 bits FIFO temperature in high resolution format */ + buffer.temp = temp ? (*temp * 64) : 0; +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +@@ -760,7 +760,8 @@ int inv_icm42600_gyro_parse_fifo(struct + const int8_t *temp; + unsigned int odr; + int64_t ts_val; +- struct inv_icm42600_gyro_buffer buffer; ++ /* buffer is copied to userspace, zeroing it to avoid any data leak */ ++ struct inv_icm42600_gyro_buffer buffer = { }; + + /* parse all fifo packets */ + for (i = 0, no = 0; i < st->fifo.count; i += size, ++no) { +@@ -779,8 +780,6 @@ int inv_icm42600_gyro_parse_fifo(struct + inv_icm42600_timestamp_apply_odr(ts, st->fifo.period, + st->fifo.nb.total, no); + +- /* buffer is copied to userspace, zeroing it to avoid any data leak */ +- memset(&buffer, 0, sizeof(buffer)); + memcpy(&buffer.gyro, gyro, sizeof(buffer.gyro)); + /* convert 8 bits FIFO temperature in high resolution format */ + buffer.temp = temp ? (*temp * 64) : 0; diff --git a/queue-6.1/ixgbevf-add-support-for-intel-r-e610-device.patch b/queue-6.1/ixgbevf-add-support-for-intel-r-e610-device.patch new file mode 100644 index 0000000000..a9a912a8f9 --- /dev/null +++ b/queue-6.1/ixgbevf-add-support-for-intel-r-e610-device.patch @@ -0,0 +1,171 @@ +From stable+bounces-188237-greg=kroah.com@vger.kernel.org Mon Oct 20 21:53:56 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 15:53:46 -0400 +Subject: ixgbevf: Add support for Intel(R) E610 device +To: stable@vger.kernel.org +Cc: Piotr Kwapulinski , Przemek Kitszel , Simon Horman , Rafal Romanowski , Tony Nguyen , Sasha Levin +Message-ID: <20251020195348.1882212-2-sashal@kernel.org> + +From: Piotr Kwapulinski + +[ Upstream commit 4c44b450c69b676955c2790dcf467c1f969d80f1 ] + +Add support for Intel(R) E610 Series of network devices. The E610 +is based on X550 but adds firmware managed link, enhanced security +capabilities and support for updated server manageability + +Reviewed-by: Przemek Kitszel +Signed-off-by: Piotr Kwapulinski +Reviewed-by: Simon Horman +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/defines.h | 5 ++++- + drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 6 +++++- + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ++++++++++-- + drivers/net/ethernet/intel/ixgbevf/vf.c | 12 +++++++++++- + drivers/net/ethernet/intel/ixgbevf/vf.h | 4 +++- + 5 files changed, 33 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/defines.h ++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef _IXGBEVF_DEFINES_H_ + #define _IXGBEVF_DEFINES_H_ +@@ -16,6 +16,9 @@ + #define IXGBE_DEV_ID_X550_VF_HV 0x1564 + #define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 + ++#define IXGBE_DEV_ID_E610_VF 0x57AD ++#define IXGBE_SUBDEV_ID_E610_VF_HV 0x00FF ++ + #define IXGBE_VF_IRQ_CLEAR_MASK 7 + #define IXGBE_VF_MAX_TX_QUEUES 8 + #define IXGBE_VF_MAX_RX_QUEUES 8 +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef _IXGBEVF_H_ + #define _IXGBEVF_H_ +@@ -418,6 +418,8 @@ enum ixgbevf_boards { + board_X550EM_x_vf, + board_X550EM_x_vf_hv, + board_x550em_a_vf, ++ board_e610_vf, ++ board_e610_vf_hv, + }; + + enum ixgbevf_xcast_modes { +@@ -434,11 +436,13 @@ extern const struct ixgbevf_info ixgbevf + extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; + extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy; + extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info; ++extern const struct ixgbevf_info ixgbevf_e610_vf_info; + + extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info; ++extern const struct ixgbevf_info ixgbevf_e610_vf_hv_info; + extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops; + + /* needed by ethtool.c */ +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -1,5 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + /****************************************************************************** + Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code +@@ -39,7 +39,7 @@ static const char ixgbevf_driver_string[ + "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; + + static char ixgbevf_copyright[] = +- "Copyright (c) 2009 - 2018 Intel Corporation."; ++ "Copyright (c) 2009 - 2024 Intel Corporation."; + + static const struct ixgbevf_info *ixgbevf_info_tbl[] = { + [board_82599_vf] = &ixgbevf_82599_vf_info, +@@ -51,6 +51,8 @@ static const struct ixgbevf_info *ixgbev + [board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info, + [board_X550EM_x_vf_hv] = &ixgbevf_X550EM_x_vf_hv_info, + [board_x550em_a_vf] = &ixgbevf_x550em_a_vf_info, ++ [board_e610_vf] = &ixgbevf_e610_vf_info, ++ [board_e610_vf_hv] = &ixgbevf_e610_vf_hv_info, + }; + + /* ixgbevf_pci_tbl - PCI Device ID Table +@@ -71,6 +73,9 @@ static const struct pci_device_id ixgbev + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf }, ++ {PCI_VDEVICE_SUB(INTEL, IXGBE_DEV_ID_E610_VF, PCI_ANY_ID, ++ IXGBE_SUBDEV_ID_E610_VF_HV), board_e610_vf_hv}, ++ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_VF), board_e610_vf}, + /* required last entry */ + {0, } + }; +@@ -4686,6 +4691,9 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mac_X540_vf: + dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n"); + break; ++ case ixgbe_mac_e610_vf: ++ dev_info(&pdev->dev, "Intel(R) E610 Virtual Function\n"); ++ break; + case ixgbe_mac_82599_vf: + default: + dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n"); +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -1,5 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #include "vf.h" + #include "ixgbevf.h" +@@ -1076,3 +1076,13 @@ const struct ixgbevf_info ixgbevf_x550em + .mac = ixgbe_mac_x550em_a_vf, + .mac_ops = &ixgbevf_mac_ops, + }; ++ ++const struct ixgbevf_info ixgbevf_e610_vf_info = { ++ .mac = ixgbe_mac_e610_vf, ++ .mac_ops = &ixgbevf_mac_ops, ++}; ++ ++const struct ixgbevf_info ixgbevf_e610_vf_hv_info = { ++ .mac = ixgbe_mac_e610_vf, ++ .mac_ops = &ixgbevf_hv_mac_ops, ++}; +--- a/drivers/net/ethernet/intel/ixgbevf/vf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef __IXGBE_VF_H__ + #define __IXGBE_VF_H__ +@@ -54,6 +54,8 @@ enum ixgbe_mac_type { + ixgbe_mac_X550_vf, + ixgbe_mac_X550EM_x_vf, + ixgbe_mac_x550em_a_vf, ++ ixgbe_mac_e610, ++ ixgbe_mac_e610_vf, + ixgbe_num_macs + }; + diff --git a/queue-6.1/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch b/queue-6.1/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch new file mode 100644 index 0000000000..037990ea68 --- /dev/null +++ b/queue-6.1/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch @@ -0,0 +1,306 @@ +From stable+bounces-188238-greg=kroah.com@vger.kernel.org Mon Oct 20 21:53:56 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 15:53:47 -0400 +Subject: ixgbevf: fix getting link speed data for E610 devices +To: stable@vger.kernel.org +Cc: Jedrzej Jagielski , Andrzej Wilczynski , Przemek Kitszel , Aleksandr Loktionov , Rafal Romanowski , Jacob Keller , Jakub Kicinski , Sasha Levin +Message-ID: <20251020195348.1882212-3-sashal@kernel.org> + +From: Jedrzej Jagielski + +[ Upstream commit 53f0eb62b4d23d40686f2dd51776b8220f2887bb ] + +E610 adapters no longer use the VFLINKS register to read PF's link +speed and linkup state. As a result VF driver cannot get actual link +state and it incorrectly reports 10G which is the default option. +It leads to a situation where even 1G adapters print 10G as actual +link speed. The same happens when PF driver set speed different than 10G. + +Add new mailbox operation to let the VF driver request a PF driver +to provide actual link data. Update the mailbox api to v1.6. + +Incorporate both ways of getting link status within the legacy +ixgbe_check_mac_link_vf() function. + +Fixes: 4c44b450c69b ("ixgbevf: Add support for Intel(R) E610 device") +Co-developed-by: Andrzej Wilczynski +Signed-off-by: Andrzej Wilczynski +Reviewed-by: Przemek Kitszel +Reviewed-by: Aleksandr Loktionov +Cc: stable@vger.kernel.org +Signed-off-by: Jedrzej Jagielski +Tested-by: Rafal Romanowski +Signed-off-by: Jacob Keller +Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-2-ef32a425b92a@intel.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/defines.h | 1 + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 + drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + drivers/net/ethernet/intel/ixgbevf/vf.c | 137 +++++++++++++++++----- + 4 files changed, 116 insertions(+), 32 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/defines.h ++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h +@@ -28,6 +28,7 @@ + + /* Link speed */ + typedef u32 ixgbe_link_speed; ++#define IXGBE_LINK_SPEED_UNKNOWN 0 + #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 + #define IXGBE_LINK_SPEED_10GB_FULL 0x0080 + #define IXGBE_LINK_SPEED_100_FULL 0x0008 +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -2272,6 +2272,7 @@ static void ixgbevf_negotiate_api(struct + { + struct ixgbe_hw *hw = &adapter->hw; + static const int api[] = { ++ ixgbe_mbox_api_16, + ixgbe_mbox_api_15, + ixgbe_mbox_api_14, + ixgbe_mbox_api_13, +@@ -2291,7 +2292,8 @@ static void ixgbevf_negotiate_api(struct + idx++; + } + +- if (hw->api_version >= ixgbe_mbox_api_15) { ++ /* Following is not supported by API 1.6, it is specific for 1.5 */ ++ if (hw->api_version == ixgbe_mbox_api_15) { + hw->mbx.ops.init_params(hw); + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + sizeof(struct ixgbe_mbx_operations)); +@@ -2648,6 +2650,7 @@ static void ixgbevf_set_num_queues(struc + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; +@@ -4641,6 +4644,7 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; +--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h ++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h +@@ -66,6 +66,7 @@ enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ + ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ++ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ + }; +@@ -102,6 +103,9 @@ enum ixgbe_pfvf_api_rev { + + #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + ++/* mailbox API, version 1.6 VF requests */ ++#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ ++ + /* length of permanent address message returned from PF */ + #define IXGBE_VF_PERMADDR_MSG_LEN 4 + /* word in permanent address message with the current multicast type */ +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe + * is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_13: +@@ -382,6 +383,7 @@ int ixgbevf_get_rss_key_locked(struct ix + * or if the operation is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_13: +@@ -552,6 +554,7 @@ static s32 ixgbevf_update_xcast_mode(str + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + break; + default: + return -EOPNOTSUPP; +@@ -625,6 +628,48 @@ static s32 ixgbevf_hv_get_link_state_vf( + } + + /** ++ * ixgbevf_get_pf_link_state - Get PF's link status ++ * @hw: pointer to the HW structure ++ * @speed: link speed ++ * @link_up: indicate if link is up/down ++ * ++ * Ask PF to provide link_up state and speed of the link. ++ * ++ * Return: IXGBE_ERR_MBX in the case of mailbox error, ++ * -EOPNOTSUPP if the op is not supported or 0 on success. ++ */ ++static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed, ++ bool *link_up) ++{ ++ u32 msgbuf[3] = {}; ++ int err; ++ ++ switch (hw->api_version) { ++ case ixgbe_mbox_api_16: ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; ++ ++ err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, ++ ARRAY_SIZE(msgbuf)); ++ if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { ++ err = IXGBE_ERR_MBX; ++ *speed = IXGBE_LINK_SPEED_UNKNOWN; ++ /* No need to set @link_up to false as it will be done by ++ * ixgbe_check_mac_link_vf(). ++ */ ++ } else { ++ *speed = msgbuf[1]; ++ *link_up = msgbuf[2]; ++ } ++ ++ return err; ++} ++ ++/** + * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address + * @hw: pointer to the HW structure + * @vlan: 12 bit VLAN ID +@@ -659,6 +704,58 @@ mbx_err: + } + + /** ++ * ixgbe_read_vflinks - Read VFLINKS register ++ * @hw: pointer to the HW structure ++ * @speed: link speed ++ * @link_up: indicate if link is up/down ++ * ++ * Get linkup status and link speed from the VFLINKS register. ++ */ ++static void ixgbe_read_vflinks(struct ixgbe_hw *hw, ixgbe_link_speed *speed, ++ bool *link_up) ++{ ++ u32 vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); ++ ++ /* if link status is down no point in checking to see if PF is up */ ++ if (!(vflinks & IXGBE_LINKS_UP)) { ++ *link_up = false; ++ return; ++ } ++ ++ /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs ++ * before the link status is correct ++ */ ++ if (hw->mac.type == ixgbe_mac_82599_vf) { ++ for (int i = 0; i < 5; i++) { ++ udelay(100); ++ vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); ++ ++ if (!(vflinks & IXGBE_LINKS_UP)) { ++ *link_up = false; ++ return; ++ } ++ } ++ } ++ ++ /* We reached this point so there's link */ ++ *link_up = true; ++ ++ switch (vflinks & IXGBE_LINKS_SPEED_82599) { ++ case IXGBE_LINKS_SPEED_10G_82599: ++ *speed = IXGBE_LINK_SPEED_10GB_FULL; ++ break; ++ case IXGBE_LINKS_SPEED_1G_82599: ++ *speed = IXGBE_LINK_SPEED_1GB_FULL; ++ break; ++ case IXGBE_LINKS_SPEED_100_82599: ++ *speed = IXGBE_LINK_SPEED_100_FULL; ++ break; ++ default: ++ *speed = IXGBE_LINK_SPEED_UNKNOWN; ++ } ++} ++ ++/** + * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub. + * @hw: unused + * @vlan: unused +@@ -705,7 +802,6 @@ static s32 ixgbevf_check_mac_link_vf(str + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; +- u32 links_reg; + u32 in_msg = 0; + + /* If we were hit with a reset drop the link */ +@@ -715,36 +811,14 @@ static s32 ixgbevf_check_mac_link_vf(str + if (!mac->get_link_status) + goto out; + +- /* if link status is down no point in checking to see if pf is up */ +- links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); +- if (!(links_reg & IXGBE_LINKS_UP)) +- goto out; +- +- /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs +- * before the link status is correct +- */ +- if (mac->type == ixgbe_mac_82599_vf) { +- int i; +- +- for (i = 0; i < 5; i++) { +- udelay(100); +- links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); +- +- if (!(links_reg & IXGBE_LINKS_UP)) +- goto out; +- } +- } +- +- switch (links_reg & IXGBE_LINKS_SPEED_82599) { +- case IXGBE_LINKS_SPEED_10G_82599: +- *speed = IXGBE_LINK_SPEED_10GB_FULL; +- break; +- case IXGBE_LINKS_SPEED_1G_82599: +- *speed = IXGBE_LINK_SPEED_1GB_FULL; +- break; +- case IXGBE_LINKS_SPEED_100_82599: +- *speed = IXGBE_LINK_SPEED_100_FULL; +- break; ++ if (hw->mac.type == ixgbe_mac_e610_vf) { ++ ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); ++ if (ret_val) ++ goto out; ++ } else { ++ ixgbe_read_vflinks(hw, speed, link_up); ++ if (*link_up == false) ++ goto out; + } + + /* if the read failed it could just be a mailbox collision, best wait +@@ -951,6 +1025,7 @@ int ixgbevf_get_queues(struct ixgbe_hw * + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + break; + default: + return 0; diff --git a/queue-6.1/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch b/queue-6.1/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch new file mode 100644 index 0000000000..f4c0479199 --- /dev/null +++ b/queue-6.1/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch @@ -0,0 +1,327 @@ +From stable+bounces-188239-greg=kroah.com@vger.kernel.org Mon Oct 20 21:53:59 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 15:53:48 -0400 +Subject: ixgbevf: fix mailbox API compatibility by negotiating supported features +To: stable@vger.kernel.org +Cc: Jedrzej Jagielski , Jacob Keller , Przemek Kitszel , Aleksandr Loktionov , Rafal Romanowski , Jakub Kicinski , Sasha Levin +Message-ID: <20251020195348.1882212-4-sashal@kernel.org> + +From: Jedrzej Jagielski + +[ Upstream commit a7075f501bd33c93570af759b6f4302ef0175168 ] + +There was backward compatibility in the terms of mailbox API. Various +drivers from various OSes supporting 10G adapters from Intel portfolio +could easily negotiate mailbox API. + +This convention has been broken since introducing API 1.4. +Commit 0062e7cc955e ("ixgbevf: add VF IPsec offload code") added support +for IPSec which is specific only for the kernel ixgbe driver. None of the +rest of the Intel 10G PF/VF drivers supports it. And actually lack of +support was not included in the IPSec implementation - there were no such +code paths. No possibility to negotiate support for the feature was +introduced along with introduction of the feature itself. + +Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication +between PF and VF") increasing API version to 1.5 did the same - it +introduced code supported specifically by the PF ESX driver. It altered API +version for the VF driver in the same time not touching the version +defined for the PF ixgbe driver. It led to additional discrepancies, +as the code provided within API 1.6 cannot be supported for Linux ixgbe +driver as it causes crashes. + +The issue was noticed some time ago and mitigated by Jake within the commit +d0725312adf5 ("ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5"). +As a result we have regression for IPsec support and after increasing API +to version 1.6 ixgbevf driver stopped to support ESX MBX. + +To fix this mess add new mailbox op asking PF driver about supported +features. Basing on a response determine whether to set support for IPSec +and ESX-specific enhanced mailbox. + +New mailbox op, for compatibility purposes, must be added within new API +revision, as API version of OOT PF & VF drivers is already increased to +1.6 and doesn't incorporate features negotiate op. + +Features negotiation mechanism gives possibility to be extended with new +features when needed in the future. + +Reported-by: Jacob Keller +Closes: https://lore.kernel.org/intel-wired-lan/20241101-jk-ixgbevf-mailbox-v1-5-fixes-v1-0-f556dc9a66ed@intel.com/ +Fixes: 0062e7cc955e ("ixgbevf: add VF IPsec offload code") +Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") +Reviewed-by: Jacob Keller +Reviewed-by: Przemek Kitszel +Reviewed-by: Aleksandr Loktionov +Cc: stable@vger.kernel.org +Signed-off-by: Jedrzej Jagielski +Tested-by: Rafal Romanowski +Signed-off-by: Jacob Keller +Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-4-ef32a425b92a@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/ipsec.c | 10 ++++ + drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 +++ + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 32 ++++++++++++++- + drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + + drivers/net/ethernet/intel/ixgbevf/vf.c | 45 +++++++++++++++++++++- + drivers/net/ethernet/intel/ixgbevf/vf.h | 1 + 6 files changed, 96 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c +@@ -269,6 +269,9 @@ static int ixgbevf_ipsec_add_sa(struct x + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return -EOPNOTSUPP; ++ + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { + netdev_err(dev, "Unsupported protocol 0x%04x for IPsec offload\n", + xs->id.proto); +@@ -394,6 +397,9 @@ static void ixgbevf_ipsec_del_sa(struct + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return; ++ + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { + sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; + +@@ -622,6 +628,10 @@ void ixgbevf_init_ipsec_offload(struct i + size_t size; + + switch (adapter->hw.api_version) { ++ case ixgbe_mbox_api_17: ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return; ++ break; + case ixgbe_mbox_api_14: + break; + default: +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +@@ -366,6 +366,13 @@ struct ixgbevf_adapter { + /* Interrupt Throttle Rate */ + u32 eitr_param; + ++ u32 pf_features; ++#define IXGBEVF_PF_SUP_IPSEC BIT(0) ++#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) ++ ++#define IXGBEVF_SUPPORTED_FEATURES (IXGBEVF_PF_SUP_IPSEC | \ ++ IXGBEVF_PF_SUP_ESX_MBX) ++ + struct ixgbevf_hw_stats stats; + + unsigned long state; +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -2268,10 +2268,35 @@ static void ixgbevf_init_last_counter_st + adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; + } + ++/** ++ * ixgbevf_set_features - Set features supported by PF ++ * @adapter: pointer to the adapter struct ++ * ++ * Negotiate with PF supported features and then set pf_features accordingly. ++ */ ++static void ixgbevf_set_features(struct ixgbevf_adapter *adapter) ++{ ++ u32 *pf_features = &adapter->pf_features; ++ struct ixgbe_hw *hw = &adapter->hw; ++ int err; ++ ++ err = hw->mac.ops.negotiate_features(hw, pf_features); ++ if (err && err != -EOPNOTSUPP) ++ netdev_dbg(adapter->netdev, ++ "PF feature negotiation failed.\n"); ++ ++ /* Address also pre API 1.7 cases */ ++ if (hw->api_version == ixgbe_mbox_api_14) ++ *pf_features |= IXGBEVF_PF_SUP_IPSEC; ++ else if (hw->api_version == ixgbe_mbox_api_15) ++ *pf_features |= IXGBEVF_PF_SUP_ESX_MBX; ++} ++ + static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) + { + struct ixgbe_hw *hw = &adapter->hw; + static const int api[] = { ++ ixgbe_mbox_api_17, + ixgbe_mbox_api_16, + ixgbe_mbox_api_15, + ixgbe_mbox_api_14, +@@ -2292,8 +2317,9 @@ static void ixgbevf_negotiate_api(struct + idx++; + } + +- /* Following is not supported by API 1.6, it is specific for 1.5 */ +- if (hw->api_version == ixgbe_mbox_api_15) { ++ ixgbevf_set_features(adapter); ++ ++ if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) { + hw->mbx.ops.init_params(hw); + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + sizeof(struct ixgbe_mbx_operations)); +@@ -2651,6 +2677,7 @@ static void ixgbevf_set_num_queues(struc + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; +@@ -4645,6 +4672,7 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; +--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h ++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h +@@ -67,6 +67,7 @@ enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ ++ ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ + }; +@@ -106,6 +107,9 @@ enum ixgbe_pfvf_api_rev { + /* mailbox API, version 1.6 VF requests */ + #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + ++/* mailbox API, version 1.7 VF requests */ ++#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF*/ ++ + /* length of permanent address message returned from PF */ + #define IXGBE_VF_PERMADDR_MSG_LEN 4 + /* word in permanent address message with the current multicast type */ +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe + * is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_17: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: +@@ -383,6 +384,7 @@ int ixgbevf_get_rss_key_locked(struct ix + * or if the operation is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_17: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: +@@ -555,6 +557,7 @@ static s32 ixgbevf_update_xcast_mode(str + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; +@@ -646,6 +649,7 @@ static int ixgbevf_get_pf_link_state(str + + switch (hw->api_version) { + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; +@@ -670,6 +674,42 @@ static int ixgbevf_get_pf_link_state(str + } + + /** ++ * ixgbevf_negotiate_features_vf - negotiate supported features with PF driver ++ * @hw: pointer to the HW structure ++ * @pf_features: bitmask of features supported by PF ++ * ++ * Return: IXGBE_ERR_MBX in the case of mailbox error, ++ * -EOPNOTSUPP if the op is not supported or 0 on success. ++ */ ++static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features) ++{ ++ u32 msgbuf[2] = {}; ++ int err; ++ ++ switch (hw->api_version) { ++ case ixgbe_mbox_api_17: ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ msgbuf[0] = IXGBE_VF_FEATURES_NEGOTIATE; ++ msgbuf[1] = IXGBEVF_SUPPORTED_FEATURES; ++ ++ err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, ++ ARRAY_SIZE(msgbuf)); ++ ++ if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { ++ err = IXGBE_ERR_MBX; ++ *pf_features = 0x0; ++ } else { ++ *pf_features = msgbuf[1]; ++ } ++ ++ return err; ++} ++ ++/** + * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address + * @hw: pointer to the HW structure + * @vlan: 12 bit VLAN ID +@@ -799,6 +839,7 @@ static s32 ixgbevf_check_mac_link_vf(str + bool *link_up, + bool autoneg_wait_to_complete) + { ++ struct ixgbevf_adapter *adapter = hw->back; + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; +@@ -825,7 +866,7 @@ static s32 ixgbevf_check_mac_link_vf(str + * until we are called again and don't report an error + */ + if (mbx->ops.read(hw, &in_msg, 1)) { +- if (hw->api_version >= ixgbe_mbox_api_15) ++ if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) + mac->get_link_status = false; + goto out; + } +@@ -1026,6 +1067,7 @@ int ixgbevf_get_queues(struct ixgbe_hw * + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return 0; +@@ -1080,6 +1122,7 @@ static const struct ixgbe_mac_operations + .setup_link = ixgbevf_setup_mac_link_vf, + .check_link = ixgbevf_check_mac_link_vf, + .negotiate_api_version = ixgbevf_negotiate_api_version_vf, ++ .negotiate_features = ixgbevf_negotiate_features_vf, + .set_rar = ixgbevf_set_rar_vf, + .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, + .update_xcast_mode = ixgbevf_update_xcast_mode, +--- a/drivers/net/ethernet/intel/ixgbevf/vf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h +@@ -26,6 +26,7 @@ struct ixgbe_mac_operations { + s32 (*stop_adapter)(struct ixgbe_hw *); + s32 (*get_bus_info)(struct ixgbe_hw *); + s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); ++ int (*negotiate_features)(struct ixgbe_hw *hw, u32 *pf_features); + + /* Link */ + s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); diff --git a/queue-6.1/ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch b/queue-6.1/ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch new file mode 100644 index 0000000000..30fe8058a4 --- /dev/null +++ b/queue-6.1/ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch @@ -0,0 +1,196 @@ +From stable+bounces-188308-greg=kroah.com@vger.kernel.org Tue Oct 21 09:40:30 2025 +From: Namjae Jeon +Date: Tue, 21 Oct 2025 16:40:04 +0900 +Subject: ksmbd: browse interfaces list on FSCTL_QUERY_INTERFACE_INFO IOCTL +To: gregkh@linuxfoundation.org, sashal@kernel.org +Cc: hauke@hauke-m.de, smfrench@gmail.com, stable@vger.kernel.org, Namjae Jeon , Steve French +Message-ID: <20251021074004.6656-2-linkinjeon@kernel.org> + +From: Namjae Jeon + +[ Upstream commit b2d99376c5d61eb60ffdb6c503e4b6c8f9712ddd ] + +ksmbd.mount will give each interfaces list and bind_interfaces_only flags +to ksmbd server. Previously, the interfaces list was sent only +when bind_interfaces_only was enabled. +ksmbd server browse only interfaces list given from ksmbd.conf on +FSCTL_QUERY_INTERFACE_INFO IOCTL. + +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/ksmbd_netlink.h | 3 + + fs/smb/server/server.h | 1 + fs/smb/server/smb2pdu.c | 4 ++ + fs/smb/server/transport_ipc.c | 1 + fs/smb/server/transport_tcp.c | 67 +++++++++++++++++++----------------------- + fs/smb/server/transport_tcp.h | 1 + 6 files changed, 40 insertions(+), 37 deletions(-) + +--- a/fs/smb/server/ksmbd_netlink.h ++++ b/fs/smb/server/ksmbd_netlink.h +@@ -107,8 +107,9 @@ struct ksmbd_startup_request { + __u32 smb2_max_credits; /* MAX credits */ + __u32 smbd_max_io_size; /* smbd read write size */ + __u32 max_connections; /* Number of maximum simultaneous connections */ ++ __s8 bind_interfaces_only; + __u32 max_ip_connections; /* Number of maximum connection per ip address */ +- __u32 reserved[125]; /* Reserved room */ ++ __s8 reserved[499]; /* Reserved room */ + __u32 ifc_list_sz; /* interfaces list size */ + __s8 ____payload[]; + } __packed; +--- a/fs/smb/server/server.h ++++ b/fs/smb/server/server.h +@@ -45,6 +45,7 @@ struct ksmbd_server_config { + unsigned int max_ip_connections; + + char *conf[SERVER_CONF_WORK_GROUP + 1]; ++ bool bind_interfaces_only; + }; + + extern struct ksmbd_server_config server_conf; +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -37,6 +37,7 @@ + #include "mgmt/user_session.h" + #include "mgmt/ksmbd_ida.h" + #include "ndr.h" ++#include "transport_tcp.h" + + static void __wbuf(struct ksmbd_work *work, void **req, void **rsp) + { +@@ -7423,6 +7424,9 @@ static int fsctl_query_iface_info_ioctl( + if (netdev->type == ARPHRD_LOOPBACK) + continue; + ++ if (!ksmbd_find_netdev_name_iface_list(netdev->name)) ++ continue; ++ + flags = dev_get_flags(netdev); + if (!(flags & IFF_RUNNING)) + continue; +--- a/fs/smb/server/transport_ipc.c ++++ b/fs/smb/server/transport_ipc.c +@@ -324,6 +324,7 @@ static int ipc_server_config_on_startup( + ret = ksmbd_set_netbios_name(req->netbios_name); + ret |= ksmbd_set_server_string(req->server_string); + ret |= ksmbd_set_work_group(req->work_group); ++ server_conf.bind_interfaces_only = req->bind_interfaces_only; + ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req), + req->ifc_list_sz); + out: +--- a/fs/smb/server/transport_tcp.c ++++ b/fs/smb/server/transport_tcp.c +@@ -544,30 +544,37 @@ out_clear: + return ret; + } + ++struct interface *ksmbd_find_netdev_name_iface_list(char *netdev_name) ++{ ++ struct interface *iface; ++ ++ list_for_each_entry(iface, &iface_list, entry) ++ if (!strcmp(iface->name, netdev_name)) ++ return iface; ++ return NULL; ++} ++ + static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event, + void *ptr) + { + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct interface *iface; +- int ret, found = 0; ++ int ret; + + switch (event) { + case NETDEV_UP: + if (netif_is_bridge_port(netdev)) + return NOTIFY_OK; + +- list_for_each_entry(iface, &iface_list, entry) { +- if (!strcmp(iface->name, netdev->name)) { +- found = 1; +- if (iface->state != IFACE_STATE_DOWN) +- break; +- ret = create_socket(iface); +- if (ret) +- return NOTIFY_OK; +- break; +- } ++ iface = ksmbd_find_netdev_name_iface_list(netdev->name); ++ if (iface && iface->state == IFACE_STATE_DOWN) { ++ ksmbd_debug(CONN, "netdev-up event: netdev(%s) is going up\n", ++ iface->name); ++ ret = create_socket(iface); ++ if (ret) ++ return NOTIFY_OK; + } +- if (!found && bind_additional_ifaces) { ++ if (!iface && bind_additional_ifaces) { + iface = alloc_iface(kstrdup(netdev->name, GFP_KERNEL)); + if (!iface) + return NOTIFY_OK; +@@ -577,19 +584,19 @@ static int ksmbd_netdev_event(struct not + } + break; + case NETDEV_DOWN: +- list_for_each_entry(iface, &iface_list, entry) { +- if (!strcmp(iface->name, netdev->name) && +- iface->state == IFACE_STATE_CONFIGURED) { +- tcp_stop_kthread(iface->ksmbd_kthread); +- iface->ksmbd_kthread = NULL; +- mutex_lock(&iface->sock_release_lock); +- tcp_destroy_socket(iface->ksmbd_socket); +- iface->ksmbd_socket = NULL; +- mutex_unlock(&iface->sock_release_lock); ++ iface = ksmbd_find_netdev_name_iface_list(netdev->name); ++ if (iface && iface->state == IFACE_STATE_CONFIGURED) { ++ ksmbd_debug(CONN, "netdev-down event: netdev(%s) is going down\n", ++ iface->name); ++ tcp_stop_kthread(iface->ksmbd_kthread); ++ iface->ksmbd_kthread = NULL; ++ mutex_lock(&iface->sock_release_lock); ++ tcp_destroy_socket(iface->ksmbd_socket); ++ iface->ksmbd_socket = NULL; ++ mutex_unlock(&iface->sock_release_lock); + +- iface->state = IFACE_STATE_DOWN; +- break; +- } ++ iface->state = IFACE_STATE_DOWN; ++ break; + } + break; + } +@@ -658,18 +665,6 @@ int ksmbd_tcp_set_interfaces(char *ifc_l + int sz = 0; + + if (!ifc_list_sz) { +- struct net_device *netdev; +- +- rtnl_lock(); +- for_each_netdev(&init_net, netdev) { +- if (netif_is_bridge_port(netdev)) +- continue; +- if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) { +- rtnl_unlock(); +- return -ENOMEM; +- } +- } +- rtnl_unlock(); + bind_additional_ifaces = 1; + return 0; + } +--- a/fs/smb/server/transport_tcp.h ++++ b/fs/smb/server/transport_tcp.h +@@ -7,6 +7,7 @@ + #define __KSMBD_TRANSPORT_TCP_H__ + + int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz); ++struct interface *ksmbd_find_netdev_name_iface_list(char *netdev_name); + int ksmbd_tcp_init(void); + void ksmbd_tcp_destroy(void); + diff --git a/queue-6.1/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch b/queue-6.1/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch new file mode 100644 index 0000000000..93a7e6779c --- /dev/null +++ b/queue-6.1/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch @@ -0,0 +1,124 @@ +From f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 Mon Sep 17 00:00:00 2001 +From: Jakub Acs +Date: Wed, 1 Oct 2025 09:03:52 +0000 +Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise + +From: Jakub Acs + +commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 upstream. + +syzkaller discovered the following crash: (kernel BUG) + +[ 44.607039] ------------[ cut here ]------------ +[ 44.607422] kernel BUG at mm/userfaultfd.c:2067! +[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) +[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 + + + +[ 44.617726] Call Trace: +[ 44.617926] +[ 44.619284] userfaultfd_release+0xef/0x1b0 +[ 44.620976] __fput+0x3f9/0xb60 +[ 44.621240] fput_close_sync+0x110/0x210 +[ 44.622222] __x64_sys_close+0x8f/0x120 +[ 44.622530] do_syscall_64+0x5b/0x2f0 +[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 44.623244] RIP: 0033:0x7f365bb3f227 + +Kernel panics because it detects UFFD inconsistency during +userfaultfd_release_all(). Specifically, a VMA which has a valid pointer +to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. + +The inconsistency is caused in ksm_madvise(): when user calls madvise() +with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, +it accidentally clears all flags stored in the upper 32 bits of +vma->vm_flags. + +Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and +int are 32-bit wide. This setup causes the following mishap during the &= +~VM_MERGEABLE assignment. + +VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. +After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then +promoted to unsigned long before the & operation. This promotion fills +upper 32 bits with leading 0s, as we're doing unsigned conversion (and +even for a signed conversion, this wouldn't help as the leading bit is 0). +& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff +instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears +the upper 32-bits of its value. + +Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the +BIT() macro. + +Note: other VM_* flags are not affected: This only happens to the +VM_MERGEABLE flag, as the other VM_* flags are all constants of type int +and after ~ operation, they end up with leading 1 and are thus converted +to unsigned long with leading 1s. + +Note 2: +After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is +no longer a kernel BUG, but a WARNING at the same place: + +[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 + +but the root-cause (flag-drop) remains the same. + +[akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] + Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ +Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de +Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") +Signed-off-by: Jakub Acs +Signed-off-by: Miguel Ojeda +Acked-by: David Hildenbrand +Acked-by: SeongJae Park +Tested-by: Alice Ryhl +Tested-by: Miguel Ojeda +Cc: Xu Xin +Cc: Chengming Zhou +Cc: Peter Xu +Cc: Axel Rasmussen +Cc: +Signed-off-by: Andrew Morton +[acsjakub@amazon.de: adapt rust bindgen to older versions] +Signed-off-by: Jakub Acs +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 2 +- + rust/bindings/bindings_helper.h | 2 ++ + rust/bindings/lib.rs | 1 + + 3 files changed, 4 insertions(+), 1 deletion(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -316,7 +316,7 @@ extern unsigned int kobjsize(const void + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ + #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ +-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ ++#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ + + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ +--- a/rust/bindings/bindings_helper.h ++++ b/rust/bindings/bindings_helper.h +@@ -7,8 +7,10 @@ + */ + + #include ++#include + + /* `bindgen` gets confused at certain things. */ + const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; + const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; + const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; ++const vm_flags_t BINDINGS_VM_MERGEABLE = VM_MERGEABLE; +--- a/rust/bindings/lib.rs ++++ b/rust/bindings/lib.rs +@@ -51,3 +51,4 @@ pub use bindings_raw::*; + + pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL; + pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO; ++pub const VM_MERGEABLE: vm_flags_t = BINDINGS_VM_MERGEABLE; diff --git a/queue-6.1/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch b/queue-6.1/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch new file mode 100644 index 0000000000..b4da7a25b7 --- /dev/null +++ b/queue-6.1/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch @@ -0,0 +1,50 @@ +From stable+bounces-188272-greg=kroah.com@vger.kernel.org Tue Oct 21 02:25:03 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 20:24:54 -0400 +Subject: NFSD: Define a proc_layoutcommit for the FlexFiles layout type +To: stable@vger.kernel.org +Cc: Chuck Lever , Robert Morris , Thomas Haynes , Sasha Levin +Message-ID: <20251021002454.1948865-1-sashal@kernel.org> + +From: Chuck Lever + +[ Upstream commit 4b47a8601b71ad98833b447d465592d847b4dc77 ] + +Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT +operation on a FlexFiles layout. + +Reported-by: Robert Morris +Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t +Cc: Thomas Haynes +Cc: stable@vger.kernel.org +Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server") +Signed-off-by: Chuck Lever +[ removed struct svc_rqst parameter from nfsd4_ff_proc_layoutcommit ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/flexfilelayout.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/nfsd/flexfilelayout.c ++++ b/fs/nfsd/flexfilelayout.c +@@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super + return 0; + } + ++static __be32 ++nfsd4_ff_proc_layoutcommit(struct inode *inode, ++ struct nfsd4_layoutcommit *lcp) ++{ ++ return nfs_ok; ++} ++ + const struct nfsd4_layout_ops ff_layout_ops = { + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, +@@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ + .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, + .proc_layoutget = nfsd4_ff_proc_layoutget, + .encode_layoutget = nfsd4_ff_encode_layoutget, ++ .proc_layoutcommit = nfsd4_ff_proc_layoutcommit, + }; diff --git a/queue-6.1/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch b/queue-6.1/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch new file mode 100644 index 0000000000..784318400d --- /dev/null +++ b/queue-6.1/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch @@ -0,0 +1,114 @@ +From stable+bounces-188077-greg=kroah.com@vger.kernel.org Mon Oct 20 14:55:15 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:55:03 -0400 +Subject: NFSD: Fix last write offset handling in layoutcommit +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Konstantin Evtushenko , Christoph Hellwig , Jeff Layton , Chuck Lever , Sasha Levin +Message-ID: <20251020125503.1760951-3-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit d68886bae76a4b9b3484d23e5b7df086f940fa38 ] + +The data type of loca_last_write_offset is newoffset4 and is switched +on a boolean value, no_newoffset, that indicates if a previous write +occurred or not. If no_newoffset is FALSE, an offset is not given. +This means that client does not try to update the file size. Thus, +server should not try to calculate new file size and check if it fits +into the segment range. See RFC 8881, section 12.5.4.2. + +Sometimes the current incorrect logic may cause clients to hang when +trying to sync an inode. If layoutcommit fails, the client marks the +inode as dirty again. + +Fixes: 9cf514ccfacb ("nfsd: implement pNFS operations") +Cc: stable@vger.kernel.org +Co-developed-by: Konstantin Evtushenko +Signed-off-by: Konstantin Evtushenko +Signed-off-by: Sergey Bashirov +Reviewed-by: Christoph Hellwig +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +[ removed rqstp parameter from proc_layoutcommit ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 5 ++--- + fs/nfsd/nfs4proc.c | 30 +++++++++++++++--------------- + 2 files changed, 17 insertions(+), 18 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -117,7 +117,6 @@ static __be32 + nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, + struct iomap *iomaps, int nr_iomaps) + { +- loff_t new_size = lcp->lc_last_wr + 1; + struct iattr iattr = { .ia_valid = 0 }; + int error; + +@@ -127,9 +126,9 @@ nfsd4_block_commit_blocks(struct inode * + iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; + +- if (new_size > i_size_read(inode)) { ++ if (lcp->lc_size_chg) { + iattr.ia_valid |= ATTR_SIZE; +- iattr.ia_size = new_size; ++ iattr.ia_size = lcp->lc_newsize; + } + + error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2261,7 +2261,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + const struct nfsd4_layout_seg *seg = &lcp->lc_seg; + struct svc_fh *current_fh = &cstate->current_fh; + const struct nfsd4_layout_ops *ops; +- loff_t new_size = lcp->lc_last_wr + 1; + struct inode *inode; + struct nfs4_layout_stateid *ls; + __be32 nfserr; +@@ -2276,13 +2275,21 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + goto out; + inode = d_inode(current_fh->fh_dentry); + +- nfserr = nfserr_inval; +- if (new_size <= seg->offset) +- goto out; +- if (new_size > seg->offset + seg->length) +- goto out; +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) +- goto out; ++ lcp->lc_size_chg = false; ++ if (lcp->lc_newoffset) { ++ loff_t new_size = lcp->lc_last_wr + 1; ++ ++ nfserr = nfserr_inval; ++ if (new_size <= seg->offset) ++ goto out; ++ if (new_size > seg->offset + seg->length) ++ goto out; ++ ++ if (new_size > i_size_read(inode)) { ++ lcp->lc_size_chg = true; ++ lcp->lc_newsize = new_size; ++ } ++ } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, +@@ -2298,13 +2305,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + +- if (new_size > i_size_read(inode)) { +- lcp->lc_size_chg = 1; +- lcp->lc_newsize = new_size; +- } else { +- lcp->lc_size_chg = 0; +- } +- + nfserr = ops->proc_layoutcommit(inode, lcp); + nfs4_put_stid(&ls->ls_stid); + out: diff --git a/queue-6.1/nfsd-minor-cleanup-in-layoutcommit-processing.patch b/queue-6.1/nfsd-minor-cleanup-in-layoutcommit-processing.patch new file mode 100644 index 0000000000..dc876db0fd --- /dev/null +++ b/queue-6.1/nfsd-minor-cleanup-in-layoutcommit-processing.patch @@ -0,0 +1,50 @@ +From stable+bounces-188076-greg=kroah.com@vger.kernel.org Mon Oct 20 14:55:13 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:55:02 -0400 +Subject: NFSD: Minor cleanup in layoutcommit processing +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125503.1760951-2-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 274365a51d88658fb51cca637ba579034e90a799 ] + +Remove dprintk in nfsd4_layoutcommit. These are not needed +in day to day usage, and the information is also available +in Wireshark when capturing NFS traffic. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4proc.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2277,18 +2277,12 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + inode = d_inode(current_fh->fh_dentry); + + nfserr = nfserr_inval; +- if (new_size <= seg->offset) { +- dprintk("pnfsd: last write before layout segment\n"); ++ if (new_size <= seg->offset) + goto out; +- } +- if (new_size > seg->offset + seg->length) { +- dprintk("pnfsd: last write beyond layout segment\n"); ++ if (new_size > seg->offset + seg->length) + goto out; +- } +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { +- dprintk("pnfsd: layoutcommit beyond EOF\n"); ++ if (!lcp->lc_newoffset && new_size > i_size_read(inode)) + goto out; +- } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, diff --git a/queue-6.1/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch b/queue-6.1/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch new file mode 100644 index 0000000000..37fc6bce48 --- /dev/null +++ b/queue-6.1/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch @@ -0,0 +1,156 @@ +From stable+bounces-188075-greg=kroah.com@vger.kernel.org Mon Oct 20 14:55:11 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:55:01 -0400 +Subject: NFSD: Rework encoding and decoding of nfsd4_deviceid +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Chuck Lever , Sasha Levin +Message-ID: <20251020125503.1760951-1-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 832738e4b325b742940761e10487403f9aad13e8 ] + +Compilers may optimize the layout of C structures, so we should not rely +on sizeof struct and memcpy to encode and decode XDR structures. The byte +order of the fields should also be taken into account. + +This patch adds the correct functions to handle the deviceid4 structure +and removes the pad field, which is currently not used by NFSD, from the +runtime state. The server's byte order is preserved because the deviceid4 +blob on the wire is only used as a cookie by the client. + +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayoutxdr.c | 7 ++----- + fs/nfsd/flexfilelayoutxdr.c | 3 +-- + fs/nfsd/nfs4layouts.c | 1 - + fs/nfsd/nfs4xdr.c | 14 +------------- + fs/nfsd/xdr4.h | 36 +++++++++++++++++++++++++++++++++++- + 5 files changed, 39 insertions(+), 22 deletions(-) + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_ + *p++ = cpu_to_be32(len); + *p++ = cpu_to_be32(1); /* we always return a single extent */ + +- p = xdr_encode_opaque_fixed(p, &b->vol_id, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &b->vol_id); + p = xdr_encode_hyper(p, b->foff); + p = xdr_encode_hyper(p, b->len); + p = xdr_encode_hyper(p, b->soff); +@@ -145,9 +144,7 @@ nfsd4_block_decode_layoutupdate(__be32 * + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; + +- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid)); +- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid)); +- ++ p = svcxdr_decode_deviceid4(p, &bex.vol_id); + p = xdr_decode_hyper(p, &bex.foff); + if (bex.foff & (block_size - 1)) { + dprintk("%s: unaligned offset 0x%llx\n", +--- a/fs/nfsd/flexfilelayoutxdr.c ++++ b/fs/nfsd/flexfilelayoutxdr.c +@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_str + *p++ = cpu_to_be32(1); /* single mirror */ + *p++ = cpu_to_be32(1); /* single data server */ + +- p = xdr_encode_opaque_fixed(p, &fl->deviceid, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &fl->deviceid); + + *p++ = cpu_to_be32(1); /* efficiency */ + +--- a/fs/nfsd/nfs4layouts.c ++++ b/fs/nfsd/nfs4layouts.c +@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid + + id->fsid_idx = fhp->fh_export->ex_devid_map->idx; + id->generation = device_generation; +- id->pad = 0; + return 0; + } + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -566,18 +566,6 @@ nfsd4_decode_state_owner4(struct nfsd4_c + } + + #ifdef CONFIG_NFSD_PNFS +-static __be32 +-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, +- struct nfsd4_deviceid *devid) +-{ +- __be32 *p; +- +- p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); +- if (!p) +- return nfserr_bad_xdr; +- memcpy(devid, p, sizeof(*devid)); +- return nfs_ok; +-} + + static __be32 + nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, +@@ -1733,7 +1721,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_ + __be32 status; + + memset(gdev, 0, sizeof(*gdev)); +- status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); ++ status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -459,9 +459,43 @@ struct nfsd4_reclaim_complete { + struct nfsd4_deviceid { + u64 fsid_idx; + u32 generation; +- u32 pad; + }; + ++static inline __be32 * ++svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ *q = (__force __be64)devid->fsid_idx; ++ p += 2; ++ *p++ = (__force __be32)devid->generation; ++ *p++ = xdr_zero; ++ return p; ++} ++ ++static inline __be32 * ++svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ devid->fsid_idx = (__force u64)(*q); ++ p += 2; ++ devid->generation = (__force u32)(*p++); ++ p++; /* NFSD does not use the remaining octets */ ++ return p; ++} ++ ++static inline __be32 ++nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid) ++{ ++ __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); ++ ++ if (unlikely(!p)) ++ return nfserr_bad_xdr; ++ svcxdr_decode_deviceid4(p, devid); ++ return nfs_ok; ++} ++ + struct nfsd4_layout_seg { + u32 iomode; + u64 offset; diff --git a/queue-6.1/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch b/queue-6.1/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch new file mode 100644 index 0000000000..d82f679467 --- /dev/null +++ b/queue-6.1/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch @@ -0,0 +1,44 @@ +From stable+bounces-188142-greg=kroah.com@vger.kernel.org Mon Oct 20 17:39:22 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:39:11 -0400 +Subject: padata: Reset next CPU when reorder sequence wraps around +To: stable@vger.kernel.org +Cc: Xiao Liang , Herbert Xu , Sasha Levin +Message-ID: <20251020153911.1821042-1-sashal@kernel.org> + +From: Xiao Liang + +[ Upstream commit 501302d5cee0d8e8ec2c4a5919c37e0df9abc99b ] + +When seq_nr wraps around, the next reorder job with seq 0 is hashed to +the first CPU in padata_do_serial(). Correspondingly, need reset pd->cpu +to the first one when pd->processed wraps around. Otherwise, if the +number of used CPUs is not a power of 2, padata_find_next() will be +checking a wrong list, hence deadlock. + +Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder") +Cc: +Signed-off-by: Xiao Liang +Signed-off-by: Herbert Xu +[ applied fix in padata_find_next() instead of padata_reorder() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/padata.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -282,7 +282,11 @@ static struct padata_priv *padata_find_n + if (remove_object) { + list_del_init(&padata->list); + ++pd->processed; +- pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); ++ /* When sequence wraps around, reset to the first CPU. */ ++ if (unlikely(pd->processed == 0)) ++ pd->cpu = cpumask_first(pd->cpumask.pcpu); ++ else ++ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); + } + + spin_unlock(&reorder->lock); diff --git a/queue-6.1/pci-add-pci_vdevice_sub-helper-macro.patch b/queue-6.1/pci-add-pci_vdevice_sub-helper-macro.patch new file mode 100644 index 0000000000..8acb277ee9 --- /dev/null +++ b/queue-6.1/pci-add-pci_vdevice_sub-helper-macro.patch @@ -0,0 +1,51 @@ +From stable+bounces-188236-greg=kroah.com@vger.kernel.org Mon Oct 20 21:54:06 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 15:53:45 -0400 +Subject: PCI: Add PCI_VDEVICE_SUB helper macro +To: stable@vger.kernel.org +Cc: Piotr Kwapulinski , Przemek Kitszel , Bjorn Helgaas , Rafal Romanowski , Tony Nguyen , Sasha Levin +Message-ID: <20251020195348.1882212-1-sashal@kernel.org> + +From: Piotr Kwapulinski + +[ Upstream commit 208fff3f567e2a3c3e7e4788845e90245c3891b4 ] + +PCI_VDEVICE_SUB generates the pci_device_id struct layout for +the specific PCI device/subdevice. Private data may follow the +output. + +Reviewed-by: Przemek Kitszel +Signed-off-by: Piotr Kwapulinski +Acked-by: Bjorn Helgaas +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/pci.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -1027,6 +1027,20 @@ static inline struct pci_driver *to_pci_ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 + + /** ++ * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form ++ * @vend: the vendor name ++ * @dev: the 16 bit PCI Device ID ++ * @subvend: the 16 bit PCI Subvendor ID ++ * @subdev: the 16 bit PCI Subdevice ID ++ * ++ * Generate the pci_device_id struct layout for the specific PCI ++ * device/subdevice. Private data may follow the output. ++ */ ++#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \ ++ .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ ++ .subvendor = (subvend), .subdevice = (subdev), 0, 0 ++ ++/** + * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form + * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) + * @dev: the device name (without PCI_DEVICE_ID__ prefix) diff --git a/queue-6.1/pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch b/queue-6.1/pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch new file mode 100644 index 0000000000..bddce1f344 --- /dev/null +++ b/queue-6.1/pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch @@ -0,0 +1,94 @@ +From stable+bounces-188174-greg=kroah.com@vger.kernel.org Mon Oct 20 18:13:44 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:13:33 -0400 +Subject: PCI: j721e: Enable ACSPCIE Refclk if "ti,syscon-acspcie-proxy-ctrl" exists +To: stable@vger.kernel.org +Cc: "Siddharth Vadapalli" , "Krzysztof Wilczyński" , "Manivannan Sadhasivam" , "Sasha Levin" +Message-ID: <20251020161334.1833628-1-sashal@kernel.org> + +From: Siddharth Vadapalli + +[ Upstream commit 82c4be4168e26a5593aaa1002b5678128a638824 ] + +The ACSPCIE module is capable of driving the reference clock required by +the PCIe Endpoint device. It is an alternative to on-board and external +reference clock generators. Enabling the output from the ACSPCIE module's +PAD IO Buffers requires clearing the "PAD IO disable" bits of the +ACSPCIE_PROXY_CTRL register in the CTRL_MMR register space. + +Add support to enable the ACSPCIE reference clock output using the optional +device-tree property "ti,syscon-acspcie-proxy-ctrl". + +Link: https://lore.kernel.org/linux-pci/20240829105316.1483684-3-s-vadapalli@ti.com +Signed-off-by: Siddharth Vadapalli +Signed-off-by: Krzysztof Wilczyński +Reviewed-by: Manivannan Sadhasivam +Stable-dep-of: f842d3313ba1 ("PCI: j721e: Fix programming sequence of "strap" settings") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/cadence/pci-j721e.c | 39 ++++++++++++++++++++++++++++- + 1 file changed, 38 insertions(+), 1 deletion(-) + +--- a/drivers/pci/controller/cadence/pci-j721e.c ++++ b/drivers/pci/controller/cadence/pci-j721e.c +@@ -46,6 +46,7 @@ enum link_status { + #define LANE_COUNT_MASK BIT(8) + #define LANE_COUNT(n) ((n) << 8) + ++#define ACSPCIE_PAD_DISABLE_MASK GENMASK(1, 0) + #define GENERATION_SEL_MASK GENMASK(1, 0) + + #define MAX_LANES 2 +@@ -218,6 +219,36 @@ static int j721e_pcie_set_lane_count(str + return ret; + } + ++static int j721e_enable_acspcie_refclk(struct j721e_pcie *pcie, ++ struct regmap *syscon) ++{ ++ struct device *dev = pcie->cdns_pcie->dev; ++ struct device_node *node = dev->of_node; ++ u32 mask = ACSPCIE_PAD_DISABLE_MASK; ++ struct of_phandle_args args; ++ u32 val; ++ int ret; ++ ++ ret = of_parse_phandle_with_fixed_args(node, ++ "ti,syscon-acspcie-proxy-ctrl", ++ 1, 0, &args); ++ if (ret) { ++ dev_err(dev, ++ "ti,syscon-acspcie-proxy-ctrl has invalid arguments\n"); ++ return ret; ++ } ++ ++ /* Clear PAD IO disable bits to enable refclk output */ ++ val = ~(args.args[0]); ++ ret = regmap_update_bits(syscon, 0, mask, val); ++ if (ret) { ++ dev_err(dev, "failed to enable ACSPCIE refclk: %d\n", ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ + static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie) + { + struct device *dev = pcie->cdns_pcie->dev; +@@ -257,7 +288,13 @@ static int j721e_pcie_ctrl_init(struct j + return ret; + } + +- return 0; ++ /* Enable ACSPCIE refclk output if the optional property exists */ ++ syscon = syscon_regmap_lookup_by_phandle_optional(node, ++ "ti,syscon-acspcie-proxy-ctrl"); ++ if (!syscon) ++ return 0; ++ ++ return j721e_enable_acspcie_refclk(pcie, syscon); + } + + static int cdns_ti_pcie_config_read(struct pci_bus *bus, unsigned int devfn, diff --git a/queue-6.1/pci-j721e-fix-programming-sequence-of-strap-settings.patch b/queue-6.1/pci-j721e-fix-programming-sequence-of-strap-settings.patch new file mode 100644 index 0000000000..7e5f5c137d --- /dev/null +++ b/queue-6.1/pci-j721e-fix-programming-sequence-of-strap-settings.patch @@ -0,0 +1,90 @@ +From stable+bounces-188175-greg=kroah.com@vger.kernel.org Mon Oct 20 18:13:44 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:13:34 -0400 +Subject: PCI: j721e: Fix programming sequence of "strap" settings +To: stable@vger.kernel.org +Cc: Siddharth Vadapalli , Manivannan Sadhasivam , Sasha Levin +Message-ID: <20251020161334.1833628-2-sashal@kernel.org> + +From: Siddharth Vadapalli + +[ Upstream commit f842d3313ba179d4005096357289c7ad09cec575 ] + +The Cadence PCIe Controller integrated in the TI K3 SoCs supports both +Root-Complex and Endpoint modes of operation. The Glue Layer allows +"strapping" the Mode of operation of the Controller, the Link Speed +and the Link Width. This is enabled by programming the "PCIEn_CTRL" +register (n corresponds to the PCIe instance) within the CTRL_MMR +memory-mapped register space. The "reset-values" of the registers are +also different depending on the mode of operation. + +Since the PCIe Controller latches onto the "reset-values" immediately +after being powered on, if the Glue Layer configuration is not done while +the PCIe Controller is off, it will result in the PCIe Controller latching +onto the wrong "reset-values". In practice, this will show up as a wrong +representation of the PCIe Controller's capability structures in the PCIe +Configuration Space. Some such capabilities which are supported by the PCIe +Controller in the Root-Complex mode but are incorrectly latched onto as +being unsupported are: +- Link Bandwidth Notification +- Alternate Routing ID (ARI) Forwarding Support +- Next capability offset within Advanced Error Reporting (AER) capability + +Fix this by powering off the PCIe Controller before programming the "strap" +settings and powering it on after that. The runtime PM APIs namely +pm_runtime_put_sync() and pm_runtime_get_sync() will decrement and +increment the usage counter respectively, causing GENPD to power off and +power on the PCIe Controller. + +Fixes: f3e25911a430 ("PCI: j721e: Add TI J721E PCIe driver") +Signed-off-by: Siddharth Vadapalli +Signed-off-by: Manivannan Sadhasivam +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250908120828.1471776-1-s-vadapalli@ti.com +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/cadence/pci-j721e.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +--- a/drivers/pci/controller/cadence/pci-j721e.c ++++ b/drivers/pci/controller/cadence/pci-j721e.c +@@ -270,6 +270,25 @@ static int j721e_pcie_ctrl_init(struct j + if (!ret) + offset = args.args[0]; + ++ /* ++ * The PCIe Controller's registers have different "reset-values" ++ * depending on the "strap" settings programmed into the PCIEn_CTRL ++ * register within the CTRL_MMR memory-mapped register space. ++ * The registers latch onto a "reset-value" based on the "strap" ++ * settings sampled after the PCIe Controller is powered on. ++ * To ensure that the "reset-values" are sampled accurately, power ++ * off the PCIe Controller before programming the "strap" settings ++ * and power it on after that. The runtime PM APIs namely ++ * pm_runtime_put_sync() and pm_runtime_get_sync() will decrement and ++ * increment the usage counter respectively, causing GENPD to power off ++ * and power on the PCIe Controller. ++ */ ++ ret = pm_runtime_put_sync(dev); ++ if (ret < 0) { ++ dev_err(dev, "Failed to power off PCIe Controller\n"); ++ return ret; ++ } ++ + ret = j721e_pcie_set_mode(pcie, syscon, offset); + if (ret < 0) { + dev_err(dev, "Failed to set pci mode\n"); +@@ -288,6 +307,12 @@ static int j721e_pcie_ctrl_init(struct j + return ret; + } + ++ ret = pm_runtime_get_sync(dev); ++ if (ret < 0) { ++ dev_err(dev, "Failed to power on PCIe Controller\n"); ++ return ret; ++ } ++ + /* Enable ACSPCIE refclk output if the optional property exists */ + syscon = syscon_regmap_lookup_by_phandle_optional(node, + "ti,syscon-acspcie-proxy-ctrl"); diff --git a/queue-6.1/pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch b/queue-6.1/pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch new file mode 100644 index 0000000000..c0da270285 --- /dev/null +++ b/queue-6.1/pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch @@ -0,0 +1,75 @@ +From stable+bounces-188409-greg=kroah.com@vger.kernel.org Tue Oct 21 20:41:12 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 14:40:59 -0400 +Subject: PCI: tegra194: Reset BARs when running in PCIe endpoint mode +To: stable@vger.kernel.org +Cc: Niklas Cassel , Manivannan Sadhasivam , Bjorn Helgaas , Sasha Levin +Message-ID: <20251021184059.2524869-1-sashal@kernel.org> + +From: Niklas Cassel + +[ Upstream commit 42f9c66a6d0cc45758dab77233c5460e1cf003df ] + +Tegra already defines all BARs except BAR0 as BAR_RESERVED. This is +sufficient for pci-epf-test to not allocate backing memory and to not call +set_bar() for those BARs. However, marking a BAR as BAR_RESERVED does not +mean that the BAR gets disabled. + +The host side driver, pci_endpoint_test, simply does an ioremap for all +enabled BARs and will run tests against all enabled BARs, so it will run +tests against the BARs marked as BAR_RESERVED. + +After running the BAR tests (which will write to all enabled BARs), the +inbound address translation is broken. This is because the tegra controller +exposes the ATU Port Logic Structure in BAR4, so when BAR4 is written, the +inbound address translation settings get overwritten. + +To avoid this, implement the dw_pcie_ep_ops .init() callback and start off +by disabling all BARs (pci-epf-test will later enable/configure BARs that +are not defined as BAR_RESERVED). + +This matches the behavior of other PCIe endpoint drivers: dra7xx, imx6, +layerscape-ep, artpec6, dw-rockchip, qcom-ep, rcar-gen4, and uniphier-ep. + +With this, the PCI endpoint kselftest test case CONSECUTIVE_BAR_TEST (which +was specifically made to detect address translation issues) passes. + +Fixes: c57247f940e8 ("PCI: tegra: Add support for PCIe endpoint mode in Tegra194") +Signed-off-by: Niklas Cassel +Signed-off-by: Manivannan Sadhasivam +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250922140822.519796-7-cassel@kernel.org +[ changed .init field to .ep_init in pcie_ep_ops struct ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/controller/dwc/pcie-tegra194.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/pci/controller/dwc/pcie-tegra194.c ++++ b/drivers/pci/controller/dwc/pcie-tegra194.c +@@ -1949,6 +1949,15 @@ static irqreturn_t tegra_pcie_ep_pex_rst + return IRQ_HANDLED; + } + ++static void tegra_pcie_ep_init(struct dw_pcie_ep *ep) ++{ ++ struct dw_pcie *pci = to_dw_pcie_from_ep(ep); ++ enum pci_barno bar; ++ ++ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) ++ dw_pcie_ep_reset_bar(pci, bar); ++}; ++ + static int tegra_pcie_ep_raise_legacy_irq(struct tegra_pcie_dw *pcie, u16 irq) + { + /* Tegra194 supports only INTA */ +@@ -2022,6 +2031,7 @@ tegra_pcie_ep_get_features(struct dw_pci + } + + static const struct dw_pcie_ep_ops pcie_ep_ops = { ++ .ep_init = tegra_pcie_ep_init, + .raise_irq = tegra_pcie_ep_raise_irq, + .get_features = tegra_pcie_ep_get_features, + }; diff --git a/queue-6.1/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch b/queue-6.1/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch new file mode 100644 index 0000000000..e84772f075 --- /dev/null +++ b/queue-6.1/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch @@ -0,0 +1,265 @@ +From stable+bounces-188093-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:53 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:59:59 -0400 +Subject: phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251020125959.1763029-2-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 284fb19a3ffb1083c3ad9c00d29749d09dddb99c ] + +PLL lockup and O_CMN_READY assertion can only happen after common state +machine gets enabled by programming DPHY_CMN_SSM register, but driver was +polling them before the common state machine was enabled which is +incorrect. This is as per the DPHY initialization sequence as mentioned in +J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram". +It shows O_CMN_READY polling at the end after common configuration pin +setup where the common configuration pin setup step enables state machine +as referenced in "Table 12-1533. Common Configuration-Related Setup +mentions state machine" + +To fix this : +- Add new function callbacks for polling on PLL lock and O_CMN_READY + assertion. +- As state machine and clocks get enabled in power_on callback only, move + the clock related programming part from configure callback to power_on +callback and poll for the PLL lockup and O_CMN_READY assertion after state +machine gets enabled. +- The configure callback only saves the PLL configuration received from the + client driver which will be applied later on in power_on callback. +- Add checks to ensure configure is called before power_on and state + machine is in disabled state before power_on callback is called. +- Disable state machine in power_off so that client driver can re-configure + the PLL by following up a power_off, configure, power_on sequence. + +[1]: https://www.ti.com/lit/zip/spruil1 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 124 +++++++++++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 32 deletions(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -101,6 +101,8 @@ struct cdns_dphy_ops { + void (*set_pll_cfg)(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg); + unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy); ++ int (*wait_for_pll_lock)(struct cdns_dphy *dphy); ++ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy); + }; + + struct cdns_dphy { +@@ -110,6 +112,8 @@ struct cdns_dphy { + struct clk *pll_ref_clk; + const struct cdns_dphy_ops *ops; + struct phy *phy; ++ bool is_configured; ++ bool is_powered; + }; + + /* Order of bands is important since the index is the band number. */ +@@ -196,6 +200,16 @@ static unsigned long cdns_dphy_get_wakeu + return dphy->ops->get_wakeup_time_ns(dphy); + } + ++static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0; ++} ++ ++static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0; ++} ++ + static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy) + { + /* Default wakeup time is 800 ns (in a simulated environment). */ +@@ -237,7 +251,6 @@ static unsigned long cdns_dphy_j721e_get + static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) + { +- u32 status; + + /* + * set the PWM and PLL Byteclk divider settings to recommended values +@@ -254,13 +267,6 @@ static void cdns_dphy_j721e_set_pll_cfg( + + writel(DPHY_TX_J721E_WIZ_LANE_RSTB, + dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, +- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, +- (status & DPHY_TX_WIZ_O_CMN_READY), 0, +- POLL_TIMEOUT_US); + } + + static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div) +@@ -268,6 +274,23 @@ static void cdns_dphy_j721e_set_psm_div( + writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ); + } + ++static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, ++ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US); ++} ++ ++static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, ++ status & DPHY_TX_WIZ_O_CMN_READY, 0, ++ POLL_TIMEOUT_US); ++} ++ + /* + * This is the reference implementation of DPHY hooks. Specific integration of + * this IP may have to re-implement some of them depending on how they decided +@@ -283,6 +306,8 @@ static const struct cdns_dphy_ops j721e_ + .get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns, + .set_pll_cfg = cdns_dphy_j721e_set_pll_cfg, + .set_psm_div = cdns_dphy_j721e_set_psm_div, ++ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock, ++ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready, + }; + + static int cdns_dphy_config_from_opts(struct phy *phy, +@@ -340,21 +365,36 @@ static int cdns_dphy_validate(struct phy + static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); +- struct cdns_dphy_cfg cfg = { 0 }; +- int ret, band_ctrl; +- unsigned int reg; ++ int ret; + +- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg); +- if (ret) +- return ret; ++ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg); ++ if (!ret) ++ dphy->is_configured = true; ++ ++ return ret; ++} ++ ++static int cdns_dphy_power_on(struct phy *phy) ++{ ++ struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ int ret; ++ u32 reg; ++ ++ if (!dphy->is_configured || dphy->is_powered) ++ return -EINVAL; ++ ++ clk_prepare_enable(dphy->psm_clk); ++ clk_prepare_enable(dphy->pll_ref_clk); + + /* + * Configure the internal PSM clk divider so that the DPHY has a + * 1MHz clk (or something close). + */ + ret = cdns_dphy_setup_psm(dphy); +- if (ret) +- return ret; ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret); ++ goto err_power_on; ++ } + + /* + * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes +@@ -369,40 +409,60 @@ static int cdns_dphy_configure(struct ph + * Configure the DPHY PLL that will be used to generate the TX byte + * clk. + */ +- cdns_dphy_set_pll_cfg(dphy, &cfg); ++ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg); + +- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate); +- if (band_ctrl < 0) +- return band_ctrl; ++ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate); ++ if (ret < 0) { ++ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret); ++ goto err_power_on; ++ } + +- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) | +- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl); ++ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) | ++ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret); + writel(reg, dphy->regs + DPHY_BAND_CFG); + +- return 0; +-} +- +-static int cdns_dphy_power_on(struct phy *phy) +-{ +- struct cdns_dphy *dphy = phy_get_drvdata(phy); +- +- clk_prepare_enable(dphy->psm_clk); +- clk_prepare_enable(dphy->pll_ref_clk); +- + /* Start TX state machine. */ + writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + ++ ret = cdns_dphy_wait_for_pll_lock(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret); ++ goto err_power_on; ++ } ++ ++ ret = cdns_dphy_wait_for_cmn_ready(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n", ++ ret); ++ goto err_power_on; ++ } ++ ++ dphy->is_powered = true; ++ + return 0; ++ ++err_power_on: ++ clk_disable_unprepare(dphy->pll_ref_clk); ++ clk_disable_unprepare(dphy->psm_clk); ++ ++ return ret; + } + + static int cdns_dphy_power_off(struct phy *phy) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ u32 reg; + + clk_disable_unprepare(dphy->pll_ref_clk); + clk_disable_unprepare(dphy->psm_clk); + ++ /* Stop TX state machine. */ ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM); ++ ++ dphy->is_powered = false; ++ + return 0; + } + diff --git a/queue-6.1/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch b/queue-6.1/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch new file mode 100644 index 0000000000..aff1f28b26 --- /dev/null +++ b/queue-6.1/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch @@ -0,0 +1,58 @@ +From stable+bounces-188387-greg=kroah.com@vger.kernel.org Tue Oct 21 19:04:25 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 13:04:14 -0400 +Subject: phy: cadence: cdns-dphy: Update calibration wait time for startup state machine +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251021170414.2402792-3-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 2c27aaee934a1b5229152fe33a14f1fdf50da143 ] + +Do read-modify-write so that we re-use the characterized reset value as +specified in TRM [1] to program calibration wait time which defines number +of cycles to wait for after startup state machine is in bandgap enable +state. + +This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's +AM62L and J721E SoC since earlier calibration wait time was getting +overwritten to zero value thus failing the PLL to lockup and causing +timeout. + +[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2): +Link: https://www.ti.com/lit/pdf/spruj83 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -31,6 +31,7 @@ + + #define DPHY_CMN_SSM DPHY_PMA_CMN(0x20) + #define DPHY_CMN_SSM_EN BIT(0) ++#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1) + #define DPHY_CMN_TX_MODE_EN BIT(9) + + #define DPHY_CMN_PWM DPHY_PMA_CMN(0x40) +@@ -422,7 +423,8 @@ static int cdns_dphy_power_on(struct phy + writel(reg, dphy->regs + DPHY_BAND_CFG); + + /* Start TX state machine. */ +- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + + ret = cdns_dphy_wait_for_pll_lock(dphy); diff --git a/queue-6.1/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch b/queue-6.1/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch new file mode 100644 index 0000000000..7e647effe3 --- /dev/null +++ b/queue-6.1/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch @@ -0,0 +1,59 @@ +From stable+bounces-188092-greg=kroah.com@vger.kernel.org Mon Oct 20 15:03:09 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:59:58 -0400 +Subject: phy: cdns-dphy: Store hs_clk_rate and return it +To: stable@vger.kernel.org +Cc: Tomi Valkeinen , Aradhya Bhatia , Parth Pancholi , Jayesh Choudhary , Vinod Koul , Devarsh Thakkar , Sasha Levin +Message-ID: <20251020125959.1763029-1-sashal@kernel.org> + +From: Tomi Valkeinen + +[ Upstream commit 689a54acb56858c85de8c7285db82b8ae6dbf683 ] + +The DPHY driver does not return the actual hs_clk_rate, so the DSI +driver has no idea what clock was actually achieved. Set the realized +hs_clk_rate to the opts struct, so that the DSI driver gets it back. + +Reviewed-by: Aradhya Bhatia +Tested-by: Parth Pancholi +Tested-by: Jayesh Choudhary +Acked-by: Vinod Koul +Reviewed-by: Devarsh Thakkar +Signed-off-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250723-cdns-dphy-hs-clk-rate-fix-v1-1-d4539d44cbe7@ideasonboard.com +Signed-off-by: Vinod Koul +Stable-dep-of: 284fb19a3ffb ("phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -80,6 +80,7 @@ struct cdns_dphy_cfg { + u8 pll_ipdiv; + u8 pll_opdiv; + u16 pll_fbdiv; ++ u32 hs_clk_rate; + unsigned int nlanes; + }; + +@@ -155,6 +156,9 @@ static int cdns_dsi_get_dphy_pll_cfg(str + cfg->pll_ipdiv, + pll_ref_hz); + ++ cfg->hs_clk_rate = div_u64((u64)pll_ref_hz * cfg->pll_fbdiv, ++ 2 * cfg->pll_opdiv * cfg->pll_ipdiv); ++ + return 0; + } + +@@ -298,6 +302,7 @@ static int cdns_dphy_config_from_opts(st + if (ret) + return ret; + ++ opts->hs_clk_rate = cfg->hs_clk_rate; + opts->wakeup = cdns_dphy_get_wakeup_time_ns(dphy) / 1000; + + return 0; diff --git a/queue-6.1/pm-runtime-add-new-devm-functions.patch b/queue-6.1/pm-runtime-add-new-devm-functions.patch new file mode 100644 index 0000000000..dd94db9798 --- /dev/null +++ b/queue-6.1/pm-runtime-add-new-devm-functions.patch @@ -0,0 +1,109 @@ +From stable+bounces-188098-greg=kroah.com@vger.kernel.org Mon Oct 20 15:03:56 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:03:47 -0400 +Subject: PM: runtime: Add new devm functions +To: stable@vger.kernel.org +Cc: "Bence Csókás" , "Rafael J. Wysocki" , "Sasha Levin" +Message-ID: <20251020130348.1764406-1-sashal@kernel.org> + +From: Bence Csókás + +[ Upstream commit 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 ] + +Add `devm_pm_runtime_set_active_enabled()` and +`devm_pm_runtime_get_noresume()` for simplifying +common cases in drivers. + +Signed-off-by: Bence Csókás +Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 0792c1984a45 ("iio: imu: inv_icm42600: Simplify pm_runtime setup") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/power/runtime.c | 44 +++++++++++++++++++++++++++++++++++++++++++ + include/linux/pm_runtime.h | 4 +++ + 2 files changed, 48 insertions(+) + +--- a/drivers/base/power/runtime.c ++++ b/drivers/base/power/runtime.c +@@ -1512,6 +1512,32 @@ out: + } + EXPORT_SYMBOL_GPL(pm_runtime_enable); + ++static void pm_runtime_set_suspended_action(void *data) ++{ ++ pm_runtime_set_suspended(data); ++} ++ ++/** ++ * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. ++ * ++ * @dev: Device to handle. ++ */ ++int devm_pm_runtime_set_active_enabled(struct device *dev) ++{ ++ int err; ++ ++ err = pm_runtime_set_active(dev); ++ if (err) ++ return err; ++ ++ err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); ++ if (err) ++ return err; ++ ++ return devm_pm_runtime_enable(dev); ++} ++EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); ++ + static void pm_runtime_disable_action(void *data) + { + pm_runtime_dont_use_autosuspend(data); +@@ -1534,6 +1560,24 @@ int devm_pm_runtime_enable(struct device + } + EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); + ++static void pm_runtime_put_noidle_action(void *data) ++{ ++ pm_runtime_put_noidle(data); ++} ++ ++/** ++ * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. ++ * ++ * @dev: Device to handle. ++ */ ++int devm_pm_runtime_get_noresume(struct device *dev) ++{ ++ pm_runtime_get_noresume(dev); ++ ++ return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); ++} ++EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); ++ + /** + * pm_runtime_forbid - Block runtime PM of a device. + * @dev: Device to handle. +--- a/include/linux/pm_runtime.h ++++ b/include/linux/pm_runtime.h +@@ -95,7 +95,9 @@ extern void pm_runtime_new_link(struct d + extern void pm_runtime_drop_link(struct device_link *link); + extern void pm_runtime_release_supplier(struct device_link *link); + ++int devm_pm_runtime_set_active_enabled(struct device *dev); + extern int devm_pm_runtime_enable(struct device *dev); ++int devm_pm_runtime_get_noresume(struct device *dev); + + /** + * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. +@@ -292,7 +294,9 @@ static inline void __pm_runtime_disable( + static inline void pm_runtime_allow(struct device *dev) {} + static inline void pm_runtime_forbid(struct device *dev) {} + ++static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } + static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } ++static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } + + static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} + static inline void pm_runtime_get_noresume(struct device *dev) {} diff --git a/queue-6.1/revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch b/queue-6.1/revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch new file mode 100644 index 0000000000..d360fed162 --- /dev/null +++ b/queue-6.1/revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch @@ -0,0 +1,52 @@ +From leon.hwang@linux.dev Mon Oct 27 12:22:10 2025 +From: Leon Hwang +Date: Thu, 23 Oct 2025 09:47:32 +0800 +Subject: Revert "selftests: mm: fix map_hugetlb failure on 64K page size systems" +To: stable@vger.kernel.org, greg@kroah.com +Cc: akpm@linux-foundation.org, david@redhat.com, lorenzo.stoakes@oracle.com, lance.yang@linux.dev, shuah@kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, Leon Hwang +Message-ID: <20251023014732.73721-1-leon.hwang@linux.dev> + +From: Leon Hwang + +This reverts commit a584c7734a4dd050451fcdd65c66317e15660e81 which is +commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 upstream. + +This fixes the following build error: + +map_hugetlb.c: In function 'main': +map_hugetlb.c:79:25: warning: implicit declaration of function 'default_huge_page_size' [-Wimplicit-function-declaration] +79 | hugepage_size = default_huge_page_size(); + +Signed-off-by: Leon Hwang +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/vm/map_hugetlb.c | 7 ------- + 1 file changed, 7 deletions(-) + +--- a/tools/testing/selftests/vm/map_hugetlb.c ++++ b/tools/testing/selftests/vm/map_hugetlb.c +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include "vm_util.h" + + #define LENGTH (256UL*1024*1024) + #define PROTECTION (PROT_READ | PROT_WRITE) +@@ -71,16 +70,10 @@ int main(int argc, char **argv) + { + void *addr; + int ret; +- size_t hugepage_size; + size_t length = LENGTH; + int flags = FLAGS; + int shift = 0; + +- hugepage_size = default_huge_page_size(); +- /* munmap with fail if the length is not page aligned */ +- if (hugepage_size > length) +- length = hugepage_size; +- + if (argc > 1) + length = atol(argv[1]) << 20; + if (argc > 2) { diff --git a/queue-6.1/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch b/queue-6.1/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch new file mode 100644 index 0000000000..3ecbdd0c26 --- /dev/null +++ b/queue-6.1/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch @@ -0,0 +1,92 @@ +From 9daa5a8795865f9a3c93d8d1066785b07ded6073 Mon Sep 17 00:00:00 2001 +From: Vineeth Vijayan +Date: Wed, 1 Oct 2025 15:38:17 +0200 +Subject: s390/cio: Update purge function to unregister the unused subchannels + +From: Vineeth Vijayan + +commit 9daa5a8795865f9a3c93d8d1066785b07ded6073 upstream. + +Starting with 'commit 2297791c92d0 ("s390/cio: dont unregister +subchannel from child-drivers")', cio no longer unregisters +subchannels when the attached device is invalid or unavailable. + +As an unintended side-effect, the cio_ignore purge function no longer +removes subchannels for devices on the cio_ignore list if no CCW device +is attached. This situation occurs when a CCW device is non-operational +or unavailable + +To ensure the same outcome of the purge function as when the +current cio_ignore list had been active during boot, update the purge +function to remove I/O subchannels without working CCW devices if the +associated device number is found on the cio_ignore list. + +Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") +Suggested-by: Peter Oberparleiter +Reviewed-by: Peter Oberparleiter +Signed-off-by: Vineeth Vijayan +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/cio/device.c | 39 +++++++++++++++++++++++++-------------- + 1 file changed, 25 insertions(+), 14 deletions(-) + +--- a/drivers/s390/cio/device.c ++++ b/drivers/s390/cio/device.c +@@ -1309,23 +1309,34 @@ void ccw_device_schedule_recovery(void) + spin_unlock_irqrestore(&recovery_lock, flags); + } + +-static int purge_fn(struct device *dev, void *data) ++static int purge_fn(struct subchannel *sch, void *data) + { +- struct ccw_device *cdev = to_ccwdev(dev); +- struct ccw_dev_id *id = &cdev->private->dev_id; +- struct subchannel *sch = to_subchannel(cdev->dev.parent); +- +- spin_lock_irq(cdev->ccwlock); +- if (is_blacklisted(id->ssid, id->devno) && +- (cdev->private->state == DEV_STATE_OFFLINE) && +- (atomic_cmpxchg(&cdev->private->onoff, 0, 1) == 0)) { +- CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", id->ssid, +- id->devno); ++ struct ccw_device *cdev; ++ ++ spin_lock_irq(sch->lock); ++ if (sch->st != SUBCHANNEL_TYPE_IO || !sch->schib.pmcw.dnv) ++ goto unlock; ++ ++ if (!is_blacklisted(sch->schid.ssid, sch->schib.pmcw.dev)) ++ goto unlock; ++ ++ cdev = sch_get_cdev(sch); ++ if (cdev) { ++ if (cdev->private->state != DEV_STATE_OFFLINE) ++ goto unlock; ++ ++ if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0) ++ goto unlock; + ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); +- css_sched_sch_todo(sch, SCH_TODO_UNREG); + atomic_set(&cdev->private->onoff, 0); + } +- spin_unlock_irq(cdev->ccwlock); ++ ++ css_sched_sch_todo(sch, SCH_TODO_UNREG); ++ CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x%s\n", sch->schid.ssid, ++ sch->schib.pmcw.dev, cdev ? "" : " (no cdev)"); ++ ++unlock: ++ spin_unlock_irq(sch->lock); + /* Abort loop in case of pending signal. */ + if (signal_pending(current)) + return -EINTR; +@@ -1341,7 +1352,7 @@ static int purge_fn(struct device *dev, + int ccw_purge_blacklisted(void) + { + CIO_MSG_EVENT(2, "ccw: purging blacklisted devices\n"); +- bus_for_each_dev(&ccw_bus_type, NULL, NULL, purge_fn); ++ for_each_subchannel_staged(purge_fn, NULL, NULL); + return 0; + } + diff --git a/queue-6.1/series b/queue-6.1/series index 590d983a1f..081c5544e1 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -118,3 +118,44 @@ most-usb-fix-use-after-free-in-hdm_disconnect.patch most-usb-hdm_probe-fix-calling-put_device-before-device-initialization.patch serial-8250_dw-handle-reset-control-deassert-error.patch serial-8250_exar-add-support-for-advantech-2-port-card-with-device-id-0x0018.patch +xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch +xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch +phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch +phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch +pm-runtime-add-new-devm-functions.patch +iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch +iio-imu-inv_icm42600-use-instead-of-memset.patch +iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch +padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch +fuse-allocate-ff-release_args-only-if-release-is-needed.patch +fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch +arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch +pci-j721e-enable-acspcie-refclk-if-ti-syscon-acspcie-proxy-ctrl-exists.patch +pci-j721e-fix-programming-sequence-of-strap-settings.patch +nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch +nfsd-minor-cleanup-in-layoutcommit-processing.patch +nfsd-fix-last-write-offset-handling-in-layoutcommit.patch +vfs-don-t-leak-disconnected-dentries-on-umount.patch +nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch +ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options.patch +drm-sched-fix-potential-double-free-in-drm_sched_job_add_resv_dependencies.patch +pci-tegra194-reset-bars-when-running-in-pcie-endpoint-mode.patch +f2fs-add-a-f2fs_get_block_locked-helper.patch +f2fs-remove-the-create-argument-to-f2fs_map_blocks.patch +f2fs-factor-a-f2fs_map_blocks_cached-helper.patch +f2fs-fix-wrong-block-mapping-for-multi-devices.patch +pci-add-pci_vdevice_sub-helper-macro.patch +ixgbevf-add-support-for-intel-r-e610-device.patch +ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch +ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch +phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch +arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch +xfs-always-warn-about-deprecated-mount-options.patch +devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch +x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch +ksmbd-browse-interfaces-list-on-fsctl_query_interface_info-ioctl.patch +s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch +mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch +revert-selftests-mm-fix-map_hugetlb-failure-on-64k-page-size-systems.patch +arm64-cputype-add-neoverse-v3ae-definitions.patch +arm64-errata-apply-workarounds-for-neoverse-v3ae.patch diff --git a/queue-6.1/vfs-don-t-leak-disconnected-dentries-on-umount.patch b/queue-6.1/vfs-don-t-leak-disconnected-dentries-on-umount.patch new file mode 100644 index 0000000000..e6fe4e39dd --- /dev/null +++ b/queue-6.1/vfs-don-t-leak-disconnected-dentries-on-umount.patch @@ -0,0 +1,58 @@ +From stable+bounces-188273-greg=kroah.com@vger.kernel.org Tue Oct 21 02:25:06 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 20:24:58 -0400 +Subject: vfs: Don't leak disconnected dentries on umount +To: stable@vger.kernel.org +Cc: Jan Kara , syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com, Christian Brauner , Sasha Levin +Message-ID: <20251021002458.1948943-1-sashal@kernel.org> + +From: Jan Kara + +[ Upstream commit 56094ad3eaa21e6621396cc33811d8f72847a834 ] + +When user calls open_by_handle_at() on some inode that is not cached, we +will create disconnected dentry for it. If such dentry is a directory, +exportfs_decode_fh_raw() will then try to connect this dentry to the +dentry tree through reconnect_path(). It may happen for various reasons +(such as corrupted fs or race with rename) that the call to +lookup_one_unlocked() in reconnect_one() will fail to find the dentry we +are trying to reconnect and instead create a new dentry under the +parent. Now this dentry will not be marked as disconnected although the +parent still may well be disconnected (at least in case this +inconsistency happened because the fs is corrupted and .. doesn't point +to the real parent directory). This creates inconsistency in +disconnected flags but AFAICS it was mostly harmless. At least until +commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") +which removed adding of most disconnected dentries to sb->s_anon list. +Thus after this commit cleanup of disconnected dentries implicitely +relies on the fact that dput() will immediately reclaim such dentries. +However when some leaf dentry isn't marked as disconnected, as in the +scenario described above, the reclaim doesn't happen and the dentries +are "leaked". Memory reclaim can eventually reclaim them but otherwise +they stay in memory and if umount comes first, we hit infamous "Busy +inodes after unmount" bug. Make sure all dentries created under a +disconnected parent are marked as disconnected as well. + +Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com +Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Christian Brauner +[ relocated DCACHE_DISCONNECTED propagation from d_alloc_parallel() to d_alloc() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/dcache.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1862,6 +1862,8 @@ struct dentry *d_alloc(struct dentry * p + __dget_dlock(parent); + dentry->d_parent = parent; + list_add(&dentry->d_child, &parent->d_subdirs); ++ if (parent->d_flags & DCACHE_DISCONNECTED) ++ dentry->d_flags |= DCACHE_DISCONNECTED; + spin_unlock(&parent->d_lock); + + return dentry; diff --git a/queue-6.1/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch b/queue-6.1/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch new file mode 100644 index 0000000000..1c9ec78439 --- /dev/null +++ b/queue-6.1/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch @@ -0,0 +1,136 @@ +From stable+bounces-189245-greg=kroah.com@vger.kernel.org Fri Oct 24 20:17:13 2025 +From: Babu Moger +Date: Fri, 24 Oct 2025 13:13:11 -0500 +Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID +To: +Message-ID: <20251024181311.146536-1-babu.moger@amd.com> + +From: Babu Moger + +[ Upstream commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 ] + +Users can create as many monitoring groups as the number of RMIDs supported +by the hardware. However, on AMD systems, only a limited number of RMIDs +are guaranteed to be actively tracked by the hardware. RMIDs that exceed +this limit are placed in an "Unavailable" state. + +When a bandwidth counter is read for such an RMID, the hardware sets +MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked +again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable +remains set on first read after tracking re-starts and is clear on all +subsequent reads as long as the RMID is tracked. + +resctrl miscounts the bandwidth events after an RMID transitions from the +"Unavailable" state back to being tracked. This happens because when the +hardware starts counting again after resetting the counter to zero, resctrl +in turn compares the new count against the counter value stored from the +previous time the RMID was tracked. + +This results in resctrl computing an event value that is either undercounting +(when new counter is more than stored counter) or a mistaken overflow (when +new counter is less than stored counter). + +Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to +zero whenever the RMID is in the "Unavailable" state to ensure accurate +counting after the RMID resets to zero when it starts to be tracked again. + +Example scenario that results in mistaken overflow +================================================== +1. The resctrl filesystem is mounted, and a task is assigned to a + monitoring group. + + $mount -t resctrl resctrl /sys/fs/resctrl + $mkdir /sys/fs/resctrl/mon_groups/test1/ + $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 21323 <- Total bytes on domain 0 + "Unavailable" <- Total bytes on domain 1 + + Task is running on domain 0. Counter on domain 1 is "Unavailable". + +2. The task runs on domain 0 for a while and then moves to domain 1. The + counter starts incrementing on domain 1. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 7345357 <- Total bytes on domain 0 + 4545 <- Total bytes on domain 1 + +3. At some point, the RMID in domain 0 transitions to the "Unavailable" + state because the task is no longer executing in that domain. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + "Unavailable" <- Total bytes on domain 0 + 434341 <- Total bytes on domain 1 + +4. Since the task continues to migrate between domains, it may eventually + return to domain 0. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 17592178699059 <- Overflow on domain 0 + 3232332 <- Total bytes on domain 1 + +In this case, the RMID on domain 0 transitions from "Unavailable" state to +active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when +the counter is read and begins tracking the RMID counting from 0. + +Subsequent reads succeed but return a value smaller than the previously +saved MSR value (7345357). Consequently, the resctrl's overflow logic is +triggered, it compares the previous value (7345357) with the new, smaller +value and incorrectly interprets this as a counter overflow, adding a large +delta. + +In reality, this is a false positive: the counter did not overflow but was +simply reset when the RMID transitioned from "Unavailable" back to active +state. + +Here is the text from APM [1] available from [2]. + +"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the +first QM_CTR read when it begins tracking an RMID that it was not +previously tracking. The U bit will be zero for all subsequent reads from +that RMID while it is still tracked by the hardware. Therefore, a QM_CTR +read with the U bit set when that RMID is in use by a processor can be +considered 0 when calculating the difference with a subsequent read." + +[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming + Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory + Bandwidth (MBM). + + [ bp: Split commit message into smaller paragraph chunks for better + consumption. ] + +Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Tested-by: Reinette Chatre +Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] +(cherry picked from commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92) +[babu.moger@amd.com: Fix conflict for v6.1 stable] +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -224,11 +224,15 @@ int resctrl_arch_rmid_read(struct rdt_re + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + ++ am = get_arch_mbm_state(hw_dom, rmid, eventid); ++ + ret = __rmid_read(rmid, eventid, &msr_val); +- if (ret) ++ if (ret) { ++ if (am && ret == -EINVAL) ++ am->prev_msr = 0; + return ret; ++ } + +- am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { + am->chunks += mbm_overflow_count(am->prev_msr, msr_val, + hw_res->mbm_width); diff --git a/queue-6.1/xfs-always-warn-about-deprecated-mount-options.patch b/queue-6.1/xfs-always-warn-about-deprecated-mount-options.patch new file mode 100644 index 0000000000..ad00421437 --- /dev/null +++ b/queue-6.1/xfs-always-warn-about-deprecated-mount-options.patch @@ -0,0 +1,93 @@ +From stable+bounces-189893-greg=kroah.com@vger.kernel.org Mon Oct 27 00:01:52 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 19:01:43 -0400 +Subject: xfs: always warn about deprecated mount options +To: stable@vger.kernel.org +Cc: "Darrick J. Wong" , Christoph Hellwig , Carlos Maiolino , Carlos Maiolino , Sasha Levin +Message-ID: <20251026230143.275411-1-sashal@kernel.org> + +From: "Darrick J. Wong" + +[ Upstream commit 630785bfbe12c3ee3ebccd8b530a98d632b7e39d ] + +The deprecation of the 'attr2' mount option in 6.18 wasn't entirely +successful because nobody noticed that the kernel never printed a +warning about attr2 being set in fstab if the only xfs filesystem is the +root fs; the initramfs mounts the root fs with no mount options; and the +init scripts only conveyed the fstab options by remounting the root fs. + +Fix this by making it complain all the time. + +Cc: stable@vger.kernel.org # v5.13 +Fixes: 92cf7d36384b99 ("xfs: Skip repetitive warnings about mount options") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Carlos Maiolino +Signed-off-by: Carlos Maiolino +[ Update existing xfs_fs_warn_deprecated() callers ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_super.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -1201,16 +1201,25 @@ suffix_kstrtoint( + static inline void + xfs_fs_warn_deprecated( + struct fs_context *fc, +- struct fs_parameter *param, +- uint64_t flag, +- bool value) ++ struct fs_parameter *param) + { +- /* Don't print the warning if reconfiguring and current mount point +- * already had the flag set ++ /* ++ * Always warn about someone passing in a deprecated mount option. ++ * Previously we wouldn't print the warning if we were reconfiguring ++ * and current mount point already had the flag set, but that was not ++ * the right thing to do. ++ * ++ * Many distributions mount the root filesystem with no options in the ++ * initramfs and rely on mount -a to remount the root fs with the ++ * options in fstab. However, the old behavior meant that there would ++ * never be a warning about deprecated mount options for the root fs in ++ * /etc/fstab. On a single-fs system, that means no warning at all. ++ * ++ * Compounding this problem are distribution scripts that copy ++ * /proc/mounts to fstab, which means that we can't remove mount ++ * options unless we're 100% sure they have only ever been advertised ++ * in /proc/mounts in response to explicitly provided mount options. + */ +- if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && +- !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) +- return; + xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); + } + +@@ -1349,19 +1358,19 @@ xfs_fs_parse_param( + #endif + /* Following mount options will be removed in September 2025 */ + case Opt_ikeep: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_IKEEP; + return 0; + case Opt_noikeep: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features &= ~XFS_FEAT_IKEEP; + return 0; + case Opt_attr2: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_ATTR2; + return 0; + case Opt_noattr2: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_NOATTR2; + return 0; + default: diff --git a/queue-6.1/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch b/queue-6.1/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch new file mode 100644 index 0000000000..0c70d17377 --- /dev/null +++ b/queue-6.1/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch @@ -0,0 +1,173 @@ +From stable+bounces-188057-greg=kroah.com@vger.kernel.org Mon Oct 20 14:50:09 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:49:20 -0400 +Subject: xfs: fix log CRC mismatches between i386 and other architectures +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124920.1757717-2-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit e747883c7d7306acb4d683038d881528fbfbe749 ] + +When mounting file systems with a log that was dirtied on i386 on +other architectures or vice versa, log recovery is unhappy: + +[ 11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc. + +This is because the CRCs generated by i386 and other architectures +always diff. The reason for that is that sizeof(struct xlog_rec_header) +returns different values for i386 vs the rest (324 vs 328), because the +struct is not sizeof(uint64_t) aligned, and i386 has odd struct size +alignment rules. + +This issue goes back to commit 13cdc853c519 ("Add log versioning, and new +super block field for the log stripe") in the xfs-import tree, which +adds log v2 support and the h_size field that causes the unaligned size. +At that time it only mattered for the crude debug only log header +checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log") +it became a real issue for v5 file system, because now there is a proper +CRC, and regular builds actually expect it match. + +Fix this by allowing checksums with and without the padding. + +Fixes: 0e446be44806 ("xfs: add CRC checks to the log") +Cc: # v3.8 +Signed-off-by: Christoph Hellwig +Signed-off-by: Carlos Maiolino +[ Adjust context and file names ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_log_format.h | 30 +++++++++++++++++++++++++++++- + fs/xfs/xfs_log.c | 8 ++++---- + fs/xfs/xfs_log_priv.h | 4 ++-- + fs/xfs/xfs_log_recover.c | 19 +++++++++++++++++-- + fs/xfs/xfs_ondisk.h | 2 ++ + 5 files changed, 54 insertions(+), 9 deletions(-) + +--- a/fs/xfs/libxfs/xfs_log_format.h ++++ b/fs/xfs/libxfs/xfs_log_format.h +@@ -171,12 +171,40 @@ typedef struct xlog_rec_header { + __be32 h_prev_block; /* block number to previous LR : 4 */ + __be32 h_num_logops; /* number of log operations in this LR : 4 */ + __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; +- /* new fields */ ++ ++ /* fields added by the Linux port: */ + __be32 h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ ++ ++ /* fields added for log v2: */ + __be32 h_size; /* iclog size : 4 */ ++ ++ /* ++ * When h_size added for log v2 support, it caused structure to have ++ * a different size on i386 vs all other architectures because the ++ * sum of the size ofthe member is not aligned by that of the largest ++ * __be64-sized member, and i386 has really odd struct alignment rules. ++ * ++ * Due to the way the log headers are placed out on-disk that alone is ++ * not a problem becaue the xlog_rec_header always sits alone in a ++ * BBSIZEs area, and the rest of that area is padded with zeroes. ++ * But xlog_cksum used to calculate the checksum based on the structure ++ * size, and thus gives different checksums for i386 vs the rest. ++ * We now do two checksum validation passes for both sizes to allow ++ * moving v5 file systems with unclean logs between i386 and other ++ * (little-endian) architectures. ++ */ ++ __u32 h_pad0; + } xlog_rec_header_t; + ++#ifdef __i386__ ++#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size) ++#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header) ++#else ++#define XLOG_REC_SIZE sizeof(struct xlog_rec_header) ++#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size) ++#endif /* __i386__ */ ++ + typedef struct xlog_rec_ext_header { + __be32 xh_cycle; /* write cycle of log : 4 */ + __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1804,13 +1804,13 @@ xlog_cksum( + struct xlog *log, + struct xlog_rec_header *rhead, + char *dp, +- int size) ++ unsigned int hdrsize, ++ unsigned int size) + { + uint32_t crc; + + /* first generate the crc for the record header ... */ +- crc = xfs_start_cksum_update((char *)rhead, +- sizeof(struct xlog_rec_header), ++ crc = xfs_start_cksum_update((char *)rhead, hdrsize, + offsetof(struct xlog_rec_header, h_crc)); + + /* ... then for additional cycle data for v2 logs ... */ +@@ -2074,7 +2074,7 @@ xlog_sync( + + /* calculcate the checksum */ + iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, +- iclog->ic_datap, size); ++ iclog->ic_datap, XLOG_REC_SIZE, size); + /* + * Intentionally corrupt the log record CRC based on the error injection + * frequency, if defined. This facilitates testing log recovery in the +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -498,8 +498,8 @@ xlog_recover_finish( + extern void + xlog_recover_cancel(struct xlog *); + +-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, +- char *dp, int size); ++__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, ++ char *dp, unsigned int hdrsize, unsigned int size); + + extern struct kmem_cache *xfs_log_ticket_cache; + struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2854,9 +2854,24 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 expected_crc = rhead->h_crc, crc; ++ __le32 expected_crc = rhead->h_crc, crc, other_crc; + +- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); ++ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE, ++ be32_to_cpu(rhead->h_len)); ++ ++ /* ++ * Look at the end of the struct xlog_rec_header definition in ++ * xfs_log_format.h for the glory details. ++ */ ++ if (expected_crc && crc != expected_crc) { ++ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER, ++ be32_to_cpu(rhead->h_len)); ++ if (other_crc == expected_crc) { ++ xfs_notice_once(log->l_mp, ++ "Fixing up incorrect CRC due to padding."); ++ crc = other_crc; ++ } ++ } + + /* + * Nothing else to do if this is a CRC verification pass. Just return +--- a/fs/xfs/xfs_ondisk.h ++++ b/fs/xfs/xfs_ondisk.h +@@ -142,6 +142,8 @@ xfs_check_ondisk_structs(void) + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); diff --git a/queue-6.1/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch b/queue-6.1/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch new file mode 100644 index 0000000000..bf3e7e82cf --- /dev/null +++ b/queue-6.1/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch @@ -0,0 +1,68 @@ +From stable+bounces-188056-greg=kroah.com@vger.kernel.org Mon Oct 20 14:49:38 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:49:19 -0400 +Subject: xfs: rename the old_crc variable in xlog_recover_process +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124920.1757717-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 0b737f4ac1d3ec093347241df74bbf5f54a7e16c ] + +old_crc is a very misleading name. Rename it to expected_crc as that +described the usage much better. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: e747883c7d73 ("xfs: fix log CRC mismatches between i386 and other architectures") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2854,20 +2854,19 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 old_crc = rhead->h_crc; +- __le32 crc; ++ __le32 expected_crc = rhead->h_crc, crc; + + crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); + + /* + * Nothing else to do if this is a CRC verification pass. Just return + * if this a record with a non-zero crc. Unfortunately, mkfs always +- * sets old_crc to 0 so we must consider this valid even on v5 supers. +- * Otherwise, return EFSBADCRC on failure so the callers up the stack +- * know precisely what failed. ++ * sets expected_crc to 0 so we must consider this valid even on v5 ++ * supers. Otherwise, return EFSBADCRC on failure so the callers up the ++ * stack know precisely what failed. + */ + if (pass == XLOG_RECOVER_CRCPASS) { +- if (old_crc && crc != old_crc) ++ if (expected_crc && crc != expected_crc) + return -EFSBADCRC; + return 0; + } +@@ -2878,11 +2877,11 @@ xlog_recover_process( + * zero CRC check prevents warnings from being emitted when upgrading + * the kernel from one that does not add CRCs by default. + */ +- if (crc != old_crc) { +- if (old_crc || xfs_has_crc(log->l_mp)) { ++ if (crc != expected_crc) { ++ if (expected_crc || xfs_has_crc(log->l_mp)) { + xfs_alert(log->l_mp, + "log record CRC mismatch: found 0x%x, expected 0x%x.", +- le32_to_cpu(old_crc), ++ le32_to_cpu(expected_crc), + le32_to_cpu(crc)); + xfs_hex_dump(dp, 32); + } -- 2.47.3