From a45f41068722abd964bdd028da80b0e604f4c263 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Oct 2025 19:47:04 +0200 Subject: [PATCH] 6.17-stable patches added patches: arm64-cputype-add-neoverse-v3ae-definitions.patch arm64-debug-always-unmask-interrupts-in-el0_softstp.patch arm64-errata-apply-workarounds-for-neoverse-v3ae.patch cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch drm-xe-move-rebar-to-be-done-earlier.patch drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch drm-xe-unify-the-initialization-of-vram-regions.patch drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch nfsd-fix-last-write-offset-handling-in-layoutcommit.patch nfsd-implement-large-extent-array-support-in-pnfs.patch nfsd-minor-cleanup-in-layoutcommit-processing.patch nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch x86-resctrl-refactor-resctrl_arch_rmid_read.patch xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch --- ...putype-add-neoverse-v3ae-definitions.patch | 48 ++ ...ays-unmask-interrupts-in-el0_softstp.patch | 69 ++ ...-apply-workarounds-for-neoverse-v3ae.patch | 62 ++ ...ge-to-use-region_res_match_cxl_range.patch | 59 ++ ...-bos-in-same-vm-in-array-of-vm-binds.patch | 151 ++++ ...drm-xe-move-rebar-to-be-done-earlier.patch | 157 ++++ ...xe_vram_region-to-a-dedicated-header.patch | 355 +++++++++ ...y-the-initialization-of-vram-regions.patch | 669 ++++++++++++++++ ...r-vram-mapping-and-drop-manual-unmap.patch | 54 ++ ...le-and-device-vram-region-structures.patch | 724 ++++++++++++++++++ ...of-bounds-read-in-hfsplus_strcasecmp.patch | 223 ++++++ ...commit-for-the-flexfiles-layout-type.patch | 49 ++ ...rite-offset-handling-in-layoutcommit.patch | 113 +++ ...t-large-extent-array-support-in-pnfs.patch | 335 ++++++++ ...r-cleanup-in-layoutcommit-processing.patch | 50 ++ ...oding-and-decoding-of-nfsd4_deviceid.patch | 156 ++++ ...fix-pll-lock-and-o_cmn_ready-polling.patch | 265 +++++++ ...-wait-time-for-startup-state-machine.patch | 58 ++ ...dphy-store-hs_clk_rate-and-return-it.patch | 59 ++ queue-6.17/series | 23 + ...tivating-previously-unavailable-rmid.patch | 149 ++++ ...ctrl-refactor-resctrl_arch_rmid_read.patch | 89 +++ ...between-i386-and-other-architectures.patch | 172 +++++ ...crc-variable-in-xlog_recover_process.patch | 68 ++ 24 files changed, 4157 insertions(+) create mode 100644 queue-6.17/arm64-cputype-add-neoverse-v3ae-definitions.patch create mode 100644 queue-6.17/arm64-debug-always-unmask-interrupts-in-el0_softstp.patch create mode 100644 queue-6.17/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch create mode 100644 queue-6.17/cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch create mode 100644 queue-6.17/drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch create mode 100644 queue-6.17/drm-xe-move-rebar-to-be-done-earlier.patch create mode 100644 queue-6.17/drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch create mode 100644 queue-6.17/drm-xe-unify-the-initialization-of-vram-regions.patch create mode 100644 queue-6.17/drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch create mode 100644 queue-6.17/drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch create mode 100644 queue-6.17/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch create mode 100644 queue-6.17/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch create mode 100644 queue-6.17/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch create mode 100644 queue-6.17/nfsd-implement-large-extent-array-support-in-pnfs.patch create mode 100644 queue-6.17/nfsd-minor-cleanup-in-layoutcommit-processing.patch create mode 100644 queue-6.17/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch create mode 100644 queue-6.17/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch create mode 100644 queue-6.17/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch create mode 100644 queue-6.17/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch create mode 100644 queue-6.17/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch create mode 100644 queue-6.17/x86-resctrl-refactor-resctrl_arch_rmid_read.patch create mode 100644 queue-6.17/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch create mode 100644 queue-6.17/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch diff --git a/queue-6.17/arm64-cputype-add-neoverse-v3ae-definitions.patch b/queue-6.17/arm64-cputype-add-neoverse-v3ae-definitions.patch new file mode 100644 index 0000000000..8170424278 --- /dev/null +++ b/queue-6.17/arm64-cputype-add-neoverse-v3ae-definitions.patch @@ -0,0 +1,48 @@ +From 3bbf004c4808e2c3241e5c1ad6cc102f38a03c39 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Fri, 19 Sep 2025 15:58:28 +0100 +Subject: arm64: cputype: Add Neoverse-V3AE definitions + +From: Mark Rutland + +commit 3bbf004c4808e2c3241e5c1ad6cc102f38a03c39 upstream. + +Add cputype definitions for Neoverse-V3AE. These will be used for errata +detection in subsequent patches. + +These values can be found in the Neoverse-V3AE TRM: + + https://developer.arm.com/documentation/SDEN-2615521/9-0/ + +... in section A.6.1 ("MIDR_EL1, Main ID Register"). + +Signed-off-by: Mark Rutland +Cc: James Morse +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Ryan Roberts +Signed-off-by: Will Deacon +Signed-off-by: Ryan Roberts +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/cputype.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/arm64/include/asm/cputype.h ++++ b/arch/arm64/include/asm/cputype.h +@@ -93,6 +93,7 @@ + #define ARM_CPU_PART_NEOVERSE_V2 0xD4F + #define ARM_CPU_PART_CORTEX_A720 0xD81 + #define ARM_CPU_PART_CORTEX_X4 0xD82 ++#define ARM_CPU_PART_NEOVERSE_V3AE 0xD83 + #define ARM_CPU_PART_NEOVERSE_V3 0xD84 + #define ARM_CPU_PART_CORTEX_X925 0xD85 + #define ARM_CPU_PART_CORTEX_A725 0xD87 +@@ -182,6 +183,7 @@ + #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) + #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) + #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) ++#define MIDR_NEOVERSE_V3AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3AE) + #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) + #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) + #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) diff --git a/queue-6.17/arm64-debug-always-unmask-interrupts-in-el0_softstp.patch b/queue-6.17/arm64-debug-always-unmask-interrupts-in-el0_softstp.patch new file mode 100644 index 0000000000..c1ea0cb23d --- /dev/null +++ b/queue-6.17/arm64-debug-always-unmask-interrupts-in-el0_softstp.patch @@ -0,0 +1,69 @@ +From ea0d55ae4b3207c33691a73da3443b1fd379f1d2 Mon Sep 17 00:00:00 2001 +From: Ada Couprie Diaz +Date: Tue, 14 Oct 2025 10:25:36 +0100 +Subject: arm64: debug: always unmask interrupts in el0_softstp() + +From: Ada Couprie Diaz + +commit ea0d55ae4b3207c33691a73da3443b1fd379f1d2 upstream. + +We intend that EL0 exception handlers unmask all DAIF exceptions +before calling exit_to_user_mode(). + +When completing single-step of a suspended breakpoint, we do not call +local_daif_restore(DAIF_PROCCTX) before calling exit_to_user_mode(), +leaving all DAIF exceptions masked. + +When pseudo-NMIs are not in use this is benign. + +When pseudo-NMIs are in use, this is unsound. At this point interrupts +are masked by both DAIF.IF and PMR_EL1, and subsequent irq flag +manipulation may not work correctly. For example, a subsequent +local_irq_enable() within exit_to_user_mode_loop() will only unmask +interrupts via PMR_EL1 (leaving those masked via DAIF.IF), and +anything depending on interrupts being unmasked (e.g. delivery of +signals) will not work correctly. + +This was detected by CONFIG_ARM64_DEBUG_PRIORITY_MASKING. + +Move the call to `try_step_suspended_breakpoints()` outside of the check +so that interrupts can be unmasked even if we don't call the step handler. + +Fixes: 0ac7584c08ce ("arm64: debug: split single stepping exception entry") +Cc: # 6.17 +Signed-off-by: Ada Couprie Diaz +Acked-by: Mark Rutland +[catalin.marinas@arm.com: added Mark's rewritten commit log and some whitespace] +Signed-off-by: Catalin Marinas +[ada.coupriediaz@arm.com: Fix conflict for v6.17 stable] +Signed-off-by: Ada Couprie Diaz +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/entry-common.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kernel/entry-common.c ++++ b/arch/arm64/kernel/entry-common.c +@@ -832,6 +832,8 @@ static void noinstr el0_breakpt(struct p + + static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) + { ++ bool step_done; ++ + if (!is_ttbr0_addr(regs->pc)) + arm64_apply_bp_hardening(); + +@@ -842,10 +844,10 @@ static void noinstr el0_softstp(struct p + * If we are stepping a suspended breakpoint there's nothing more to do: + * the single-step is complete. + */ +- if (!try_step_suspended_breakpoints(regs)) { +- local_daif_restore(DAIF_PROCCTX); ++ step_done = try_step_suspended_breakpoints(regs); ++ local_daif_restore(DAIF_PROCCTX); ++ if (!step_done) + do_el0_softstep(esr, regs); +- } + exit_to_user_mode(regs); + } + diff --git a/queue-6.17/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch b/queue-6.17/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch new file mode 100644 index 0000000000..1a3dd59a00 --- /dev/null +++ b/queue-6.17/arm64-errata-apply-workarounds-for-neoverse-v3ae.patch @@ -0,0 +1,62 @@ +From 0c33aa1804d101c11ba1992504f17a42233f0e11 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Fri, 19 Sep 2025 15:58:29 +0100 +Subject: arm64: errata: Apply workarounds for Neoverse-V3AE + +From: Mark Rutland + +commit 0c33aa1804d101c11ba1992504f17a42233f0e11 upstream. + +Neoverse-V3AE is also affected by erratum #3312417, as described in its +Software Developer Errata Notice (SDEN) document: + + Neoverse V3AE (MP172) SDEN v9.0, erratum 3312417 + https://developer.arm.com/documentation/SDEN-2615521/9-0/ + +Enable the workaround for Neoverse-V3AE, and document this. + +Signed-off-by: Mark Rutland +Cc: James Morse +Cc: Will Deacon +Cc: Catalin Marinas +Signed-off-by: Ryan Roberts +Signed-off-by: Will Deacon +Signed-off-by: Ryan Roberts +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arch/arm64/silicon-errata.rst | 2 ++ + arch/arm64/Kconfig | 1 + + arch/arm64/kernel/cpu_errata.c | 1 + + 3 files changed, 4 insertions(+) + +--- a/Documentation/arch/arm64/silicon-errata.rst ++++ b/Documentation/arch/arm64/silicon-errata.rst +@@ -200,6 +200,8 @@ stable kernels. + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | + +----------------+-----------------+-----------------+-----------------------------+ ++| ARM | Neoverse-V3AE | #3312417 | ARM64_ERRATUM_3194386 | +++----------------+-----------------+-----------------+-----------------------------+ + | ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA| + | | | #562869,1047329 | | + +----------------+-----------------+-----------------+-----------------------------+ +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1138,6 +1138,7 @@ config ARM64_ERRATUM_3194386 + * ARM Neoverse-V1 erratum 3324341 + * ARM Neoverse V2 erratum 3324336 + * ARM Neoverse-V3 erratum 3312417 ++ * ARM Neoverse-V3AE erratum 3312417 + + On affected cores "MSR SSBS, #0" instructions may not affect + subsequent speculative instructions, which may permit unexepected +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -545,6 +545,7 @@ static const struct midr_range erratum_s + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), ++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE), + {} + }; + #endif diff --git a/queue-6.17/cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch b/queue-6.17/cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch new file mode 100644 index 0000000000..be0b6a962b --- /dev/null +++ b/queue-6.17/cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch @@ -0,0 +1,59 @@ +From stable+bounces-188359-greg=kroah.com@vger.kernel.org Tue Oct 21 16:52:21 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 10:52:11 -0400 +Subject: cxl: Fix match_region_by_range() to use region_res_match_cxl_range() +To: stable@vger.kernel.org +Cc: Dave Jiang , Gregory Price , Alison Schofield , Dan Williams , Sasha Levin +Message-ID: <20251021145211.2230999-1-sashal@kernel.org> + +From: Dave Jiang + +[ Upstream commit f4d027921c811ff7fc16e4d03c6bbbf4347cf37a ] + +match_region_by_range() is not using the helper function that also takes +extended linear cache size into account when comparing regions. This +causes a x2 region to show up as 2 partial incomplete regions rather +than a single CXL region with extended linear cache support. Replace +the open coded compare logic with the proper helper function for +comparison. User visible impact is that when 'cxl list' is issued, +no activa CXL region(s) are shown. There may be multiple idle regions +present. No actual active CXL region is present in the kernel. + +[dj: Fix stable address] + +Fixes: 0ec9849b6333 ("acpi/hmat / cxl: Add extended linear cache support for CXL") +Cc: stable@vger.kernel.org +Reviewed-by: Gregory Price +Reviewed-by: Alison Schofield +Reviewed-by: Dan Williams +Signed-off-by: Dave Jiang +[ constify struct range ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/region.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -831,7 +831,7 @@ static int match_free_decoder(struct dev + } + + static bool region_res_match_cxl_range(const struct cxl_region_params *p, +- struct range *range) ++ const struct range *range) + { + if (!p->res) + return false; +@@ -3287,10 +3287,7 @@ static int match_region_by_range(struct + p = &cxlr->params; + + guard(rwsem_read)(&cxl_rwsem.region); +- if (p->res && p->res->start == r->start && p->res->end == r->end) +- return 1; +- +- return 0; ++ return region_res_match_cxl_range(p, r); + } + + static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, diff --git a/queue-6.17/drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch b/queue-6.17/drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch new file mode 100644 index 0000000000..736fd6c4d0 --- /dev/null +++ b/queue-6.17/drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch @@ -0,0 +1,151 @@ +From stable+bounces-188353-greg=kroah.com@vger.kernel.org Tue Oct 21 16:13:07 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 10:11:33 -0400 +Subject: drm/xe: Don't allow evicting of BOs in same VM in array of VM binds +To: stable@vger.kernel.org +Cc: "Matthew Brost" , "Paulo Zanoni" , "Thomas Hellström" , "Lucas De Marchi" , "Sasha Levin" +Message-ID: <20251021141133.2151101-1-sashal@kernel.org> + +From: Matthew Brost + +[ Upstream commit 7ac74613e5f2ef3450f44fd2127198662c2563a9 ] + +An array of VM binds can potentially evict other buffer objects (BOs) +within the same VM under certain conditions, which may lead to NULL +pointer dereferences later in the bind pipeline. To prevent this, clear +the allow_res_evict flag in the xe_bo_validate call. + +v2: + - Invert polarity of no_res_evict (Thomas) + - Add comment in code explaining issue (Thomas) + +Cc: stable@vger.kernel.org +Reported-by: Paulo Zanoni +Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6268 +Fixes: 774b5fa509a9 ("drm/xe: Avoid evicting object of the same vm in none fault mode") +Fixes: 77f2ef3f16f5 ("drm/xe: Lock all gpuva ops during VM bind IOCTL") +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Signed-off-by: Matthew Brost +Tested-by: Paulo Zanoni +Reviewed-by: Thomas Hellström +Link: https://lore.kernel.org/r/20251009110618.3481870-1-matthew.brost@intel.com +(cherry picked from commit 8b9ba8d6d95fe75fed6b0480bb03da4b321bea08) +Signed-off-by: Lucas De Marchi +[ removed exec parameter from xe_bo_validate() calls ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_vm.c | 32 +++++++++++++++++++++++--------- + drivers/gpu/drm/xe/xe_vm_types.h | 2 ++ + 2 files changed, 25 insertions(+), 9 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_vm.c ++++ b/drivers/gpu/drm/xe/xe_vm.c +@@ -2894,7 +2894,7 @@ static void vm_bind_ioctl_ops_unwind(str + } + + static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, +- bool validate) ++ bool res_evict, bool validate) + { + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); +@@ -2905,7 +2905,8 @@ static int vma_lock_and_validate(struct + err = drm_exec_lock_obj(exec, &bo->ttm.base); + if (!err && validate) + err = xe_bo_validate(bo, vm, +- !xe_vm_in_preempt_fence_mode(vm)); ++ !xe_vm_in_preempt_fence_mode(vm) && ++ res_evict); + } + + return err; +@@ -2978,14 +2979,23 @@ static int prefetch_ranges(struct xe_vm + } + + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, +- struct xe_vma_op *op) ++ struct xe_vma_ops *vops, struct xe_vma_op *op) + { + int err = 0; ++ bool res_evict; ++ ++ /* ++ * We only allow evicting a BO within the VM if it is not part of an ++ * array of binds, as an array of binds can evict another BO within the ++ * bind. ++ */ ++ res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (!op->map.invalidate_on_bind) + err = vma_lock_and_validate(exec, op->map.vma, ++ res_evict, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); + break; +@@ -2996,11 +3006,13 @@ static int op_lock_and_prep(struct drm_e + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.remap.unmap->va), +- false); ++ res_evict, false); + if (!err && op->remap.prev) +- err = vma_lock_and_validate(exec, op->remap.prev, true); ++ err = vma_lock_and_validate(exec, op->remap.prev, ++ res_evict, true); + if (!err && op->remap.next) +- err = vma_lock_and_validate(exec, op->remap.next, true); ++ err = vma_lock_and_validate(exec, op->remap.next, ++ res_evict, true); + break; + case DRM_GPUVA_OP_UNMAP: + err = check_ufence(gpuva_to_vma(op->base.unmap.va)); +@@ -3009,7 +3021,7 @@ static int op_lock_and_prep(struct drm_e + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.unmap.va), +- false); ++ res_evict, false); + break; + case DRM_GPUVA_OP_PREFETCH: + { +@@ -3025,7 +3037,7 @@ static int op_lock_and_prep(struct drm_e + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.prefetch.va), +- false); ++ res_evict, false); + if (!err && !xe_vma_has_no_bo(vma)) + err = xe_bo_migrate(xe_vma_bo(vma), + region_to_mem_type[region]); +@@ -3069,7 +3081,7 @@ static int vm_bind_ioctl_ops_lock_and_pr + return err; + + list_for_each_entry(op, &vops->list, link) { +- err = op_lock_and_prep(exec, vm, op); ++ err = op_lock_and_prep(exec, vm, vops, op); + if (err) + return err; + } +@@ -3698,6 +3710,8 @@ int xe_vm_bind_ioctl(struct drm_device * + } + + xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); ++ if (args->num_binds > 1) ++ vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; +--- a/drivers/gpu/drm/xe/xe_vm_types.h ++++ b/drivers/gpu/drm/xe/xe_vm_types.h +@@ -467,6 +467,8 @@ struct xe_vma_ops { + struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; + /** @flag: signify the properties within xe_vma_ops*/ + #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) ++#define XE_VMA_OPS_FLAG_MADVISE BIT(1) ++#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) + u32 flags; + #ifdef TEST_VM_OPS_ERROR + /** @inject_error: inject error to test error handling */ diff --git a/queue-6.17/drm-xe-move-rebar-to-be-done-earlier.patch b/queue-6.17/drm-xe-move-rebar-to-be-done-earlier.patch new file mode 100644 index 0000000000..d35d5fafda --- /dev/null +++ b/queue-6.17/drm-xe-move-rebar-to-be-done-earlier.patch @@ -0,0 +1,157 @@ +From stable+bounces-188344-greg=kroah.com@vger.kernel.org Tue Oct 21 15:37:41 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:34:27 -0400 +Subject: drm/xe: Move rebar to be done earlier +To: stable@vger.kernel.org +Cc: "Lucas De Marchi" , "Ilpo Järvinen" , "Sasha Levin" +Message-ID: <20251021133427.2079917-5-sashal@kernel.org> + +From: Lucas De Marchi + +[ Upstream commit d30203739be798d3de5c84db3060e96f00c54e82 ] + +There may be cases in which the BAR0 also needs to move to accommodate +the bigger BAR2. However if it's not released, the BAR2 resize fails. +During the vram probe it can't be released as it's already in use by +xe_mmio for early register access. + +Add a new function in xe_vram and let xe_pci call it directly before +even early device probe. This allows the BAR2 to resize in cases BAR0 +also needs to move, assuming there aren't other reasons to hold that +move: + + [] xe 0000:03:00.0: vgaarb: deactivate vga console + [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB + [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing + [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing + [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing + [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing + [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned + [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned + [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned + [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned + [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] + [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] + [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] + [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] + [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] + [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] + [] pcieport 0000:02:01.0: PCI bridge to [bus 03] + [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] + [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] + [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M + [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ... + +For BMG there are additional fix needed in the PCI side, but this +helps getting it to a working resize. + +All the rebar logic is more pci-specific than xe-specific and can be +done very early in the probe sequence. In future it would be good to +move it out of xe_vram.c, but this refactor is left for later. + +Cc: Ilpo Järvinen +Cc: stable@vger.kernel.org # 6.12+ +Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.intel.com +Reviewed-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.com +Signed-off-by: Lucas De Marchi +(cherry picked from commit 45e33f220fd625492c11e15733d8e9b4f9db82a4) +Signed-off-by: Lucas De Marchi +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_pci.c | 2 ++ + drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++-------- + drivers/gpu/drm/xe/xe_vram.h | 1 + + 3 files changed, 29 insertions(+), 8 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_pci.c ++++ b/drivers/gpu/drm/xe/xe_pci.c +@@ -805,6 +805,8 @@ static int xe_pci_probe(struct pci_dev * + if (err) + return err; + ++ xe_vram_resize_bar(xe); ++ + err = xe_device_probe_early(xe); + /* + * In Boot Survivability mode, no drm card is exposed and driver +--- a/drivers/gpu/drm/xe/xe_vram.c ++++ b/drivers/gpu/drm/xe/xe_vram.c +@@ -26,15 +26,35 @@ + + #define BAR_SIZE_SHIFT 20 + +-static void +-_resize_bar(struct xe_device *xe, int resno, resource_size_t size) ++/* ++ * Release all the BARs that could influence/block LMEMBAR resizing, i.e. ++ * assigned IORESOURCE_MEM_64 BARs ++ */ ++static void release_bars(struct pci_dev *pdev) ++{ ++ struct resource *res; ++ int i; ++ ++ pci_dev_for_each_resource(pdev, res, i) { ++ /* Resource already un-assigned, do not reset it */ ++ if (!res->parent) ++ continue; ++ ++ /* No need to release unrelated BARs */ ++ if (!(res->flags & IORESOURCE_MEM_64)) ++ continue; ++ ++ pci_release_resource(pdev, i); ++ } ++} ++ ++static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) + { + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + +- if (pci_resource_len(pdev, resno)) +- pci_release_resource(pdev, resno); ++ release_bars(pdev); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { +@@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int re + * if force_vram_bar_size is set, attempt to set to the requested size + * else set to maximum possible size + */ +-static void resize_vram_bar(struct xe_device *xe) ++void xe_vram_resize_bar(struct xe_device *xe) + { + int force_vram_bar_size = xe_modparam.force_vram_bar_size; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); +@@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_de + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); + +- _resize_bar(xe, LMEM_BAR, rebar_size); ++ resize_bar(xe, LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +@@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struc + return -ENXIO; + } + +- resize_vram_bar(xe); +- + lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); + lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!lmem_bar->io_size) +--- a/drivers/gpu/drm/xe/xe_vram.h ++++ b/drivers/gpu/drm/xe/xe_vram.h +@@ -11,6 +11,7 @@ + struct xe_device; + struct xe_vram_region; + ++void xe_vram_resize_bar(struct xe_device *xe); + int xe_vram_probe(struct xe_device *xe); + + struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); diff --git a/queue-6.17/drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch b/queue-6.17/drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch new file mode 100644 index 0000000000..bd3e54e571 --- /dev/null +++ b/queue-6.17/drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch @@ -0,0 +1,355 @@ +From stable+bounces-188342-greg=kroah.com@vger.kernel.org Tue Oct 21 15:36:37 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:34:25 -0400 +Subject: drm/xe: Move struct xe_vram_region to a dedicated header +To: stable@vger.kernel.org +Cc: "Piotr Piórkowski" , "Jani Nikula" , "Satyanarayana K V P" , "Matthew Brost" , "Lucas De Marchi" , "Sasha Levin" +Message-ID: <20251021133427.2079917-3-sashal@kernel.org> + +From: Piotr Piórkowski + +[ Upstream commit 7a20b4f558f4291161f71a5b7384262db9ccd6b0 ] + +Let's move the xe_vram_region structure to a new header dedicated to VRAM +to improve modularity and avoid unnecessary dependencies when only +VRAM-related structures are needed. + +v2: Fix build if CONFIG_DRM_XE_DEVMEM_MIRROR is enabled +v3: Fix build if CONFIG_DRM_XE_DISPLAY is enabled +v4: Move helper to get tile dpagemap to xe_svm.c + +Signed-off-by: Piotr Piórkowski +Suggested-by: Jani Nikula +Reviewed-by: Satyanarayana K V P # rev3 +Acked-by: Matthew Brost +Link: https://lore.kernel.org/r/20250714184818.89201-4-piotr.piorkowski@intel.com +Signed-off-by: Lucas De Marchi +Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/display/xe_fb_pin.c | 1 + drivers/gpu/drm/xe/display/xe_plane_initial.c | 1 + drivers/gpu/drm/xe/xe_bo.c | 1 + drivers/gpu/drm/xe/xe_bo_types.h | 1 + drivers/gpu/drm/xe/xe_device.c | 1 + drivers/gpu/drm/xe/xe_device_types.h | 60 --------------------- + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 1 + drivers/gpu/drm/xe/xe_svm.c | 8 ++ + drivers/gpu/drm/xe/xe_tile.c | 1 + drivers/gpu/drm/xe/xe_tile.h | 12 ---- + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 1 + drivers/gpu/drm/xe/xe_vram.c | 1 + drivers/gpu/drm/xe/xe_vram_types.h | 74 ++++++++++++++++++++++++++ + 13 files changed, 91 insertions(+), 72 deletions(-) + create mode 100644 drivers/gpu/drm/xe/xe_vram_types.h + +--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c ++++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c +@@ -16,6 +16,7 @@ + #include "xe_device.h" + #include "xe_ggtt.h" + #include "xe_pm.h" ++#include "xe_vram_types.h" + + static void + write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, +--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c ++++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c +@@ -21,6 +21,7 @@ + #include "intel_plane.h" + #include "intel_plane_initial.h" + #include "xe_bo.h" ++#include "xe_vram_types.h" + #include "xe_wa.h" + + #include +--- a/drivers/gpu/drm/xe/xe_bo.c ++++ b/drivers/gpu/drm/xe/xe_bo.c +@@ -36,6 +36,7 @@ + #include "xe_trace_bo.h" + #include "xe_ttm_stolen_mgr.h" + #include "xe_vm.h" ++#include "xe_vram_types.h" + + const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", +--- a/drivers/gpu/drm/xe/xe_bo_types.h ++++ b/drivers/gpu/drm/xe/xe_bo_types.h +@@ -9,6 +9,7 @@ + #include + + #include ++#include + #include + #include + #include +--- a/drivers/gpu/drm/xe/xe_device.c ++++ b/drivers/gpu/drm/xe/xe_device.c +@@ -64,6 +64,7 @@ + #include "xe_ttm_sys_mgr.h" + #include "xe_vm.h" + #include "xe_vram.h" ++#include "xe_vram_types.h" + #include "xe_vsec.h" + #include "xe_wait_user_fence.h" + #include "xe_wa.h" +--- a/drivers/gpu/drm/xe/xe_device_types.h ++++ b/drivers/gpu/drm/xe/xe_device_types.h +@@ -10,7 +10,6 @@ + + #include + #include +-#include + #include + + #include "xe_devcoredump_types.h" +@@ -26,7 +25,6 @@ + #include "xe_sriov_vf_types.h" + #include "xe_step_types.h" + #include "xe_survivability_mode_types.h" +-#include "xe_ttm_vram_mgr_types.h" + + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + #define TEST_VM_OPS_ERROR +@@ -39,6 +37,7 @@ struct xe_ggtt; + struct xe_i2c; + struct xe_pat_ops; + struct xe_pxp; ++struct xe_vram_region; + + #define XE_BO_INVALID_OFFSET LONG_MAX + +@@ -72,63 +71,6 @@ struct xe_pxp; + struct xe_tile * : (tile__)->xe) + + /** +- * struct xe_vram_region - memory region structure +- * This is used to describe a memory region in xe +- * device, such as HBM memory or CXL extension memory. +- */ +-struct xe_vram_region { +- /** @tile: Back pointer to tile */ +- struct xe_tile *tile; +- /** @io_start: IO start address of this VRAM instance */ +- resource_size_t io_start; +- /** +- * @io_size: IO size of this VRAM instance +- * +- * This represents how much of this VRAM we can access +- * via the CPU through the VRAM BAR. This can be smaller +- * than @usable_size, in which case only part of VRAM is CPU +- * accessible (typically the first 256M). This +- * configuration is known as small-bar. +- */ +- resource_size_t io_size; +- /** @dpa_base: This memory regions's DPA (device physical address) base */ +- resource_size_t dpa_base; +- /** +- * @usable_size: usable size of VRAM +- * +- * Usable size of VRAM excluding reserved portions +- * (e.g stolen mem) +- */ +- resource_size_t usable_size; +- /** +- * @actual_physical_size: Actual VRAM size +- * +- * Actual VRAM size including reserved portions +- * (e.g stolen mem) +- */ +- resource_size_t actual_physical_size; +- /** @mapping: pointer to VRAM mappable space */ +- void __iomem *mapping; +- /** @ttm: VRAM TTM manager */ +- struct xe_ttm_vram_mgr ttm; +-#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +- /** @pagemap: Used to remap device memory as ZONE_DEVICE */ +- struct dev_pagemap pagemap; +- /** +- * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory +- * pages of this tile. +- */ +- struct drm_pagemap dpagemap; +- /** +- * @hpa_base: base host physical address +- * +- * This is generated when remap device memory as ZONE_DEVICE +- */ +- resource_size_t hpa_base; +-#endif +-}; +- +-/** + * struct xe_mmio - register mmio structure + * + * Represents an MMIO region that the CPU may use to access registers. A +--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c ++++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +@@ -33,6 +33,7 @@ + #include "xe_migrate.h" + #include "xe_sriov.h" + #include "xe_ttm_vram_mgr.h" ++#include "xe_vram_types.h" + #include "xe_wopcm.h" + + #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) +--- a/drivers/gpu/drm/xe/xe_svm.c ++++ b/drivers/gpu/drm/xe/xe_svm.c +@@ -17,6 +17,7 @@ + #include "xe_ttm_vram_mgr.h" + #include "xe_vm.h" + #include "xe_vm_types.h" ++#include "xe_vram_types.h" + + static bool xe_svm_range_in_vram(struct xe_svm_range *range) + { +@@ -989,6 +990,11 @@ int xe_svm_range_get_pages(struct xe_vm + + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + ++static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) ++{ ++ return &tile->mem.vram->dpagemap; ++} ++ + /** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. +@@ -1006,7 +1012,7 @@ int xe_svm_alloc_vram(struct xe_tile *ti + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + +- dpagemap = xe_tile_local_pagemap(tile); ++ dpagemap = tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, +--- a/drivers/gpu/drm/xe/xe_tile.c ++++ b/drivers/gpu/drm/xe/xe_tile.c +@@ -20,6 +20,7 @@ + #include "xe_ttm_vram_mgr.h" + #include "xe_wa.h" + #include "xe_vram.h" ++#include "xe_vram_types.h" + + /** + * DOC: Multi-tile Design +--- a/drivers/gpu/drm/xe/xe_tile.h ++++ b/drivers/gpu/drm/xe/xe_tile.h +@@ -18,18 +18,6 @@ int xe_tile_alloc_vram(struct xe_tile *t + + void xe_tile_migrate_wait(struct xe_tile *tile); + +-#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +-static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +-{ +- return &tile->mem.vram->dpagemap; +-} +-#else +-static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +-{ +- return NULL; +-} +-#endif +- + static inline bool xe_tile_is_root(struct xe_tile *tile) + { + return tile->id == 0; +--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c ++++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +@@ -15,6 +15,7 @@ + #include "xe_gt.h" + #include "xe_res_cursor.h" + #include "xe_ttm_vram_mgr.h" ++#include "xe_vram_types.h" + + static inline struct drm_buddy_block * + xe_ttm_vram_mgr_first_block(struct list_head *list) +--- a/drivers/gpu/drm/xe/xe_vram.c ++++ b/drivers/gpu/drm/xe/xe_vram.c +@@ -21,6 +21,7 @@ + #include "xe_module.h" + #include "xe_sriov.h" + #include "xe_vram.h" ++#include "xe_vram_types.h" + + #define BAR_SIZE_SHIFT 20 + +--- /dev/null ++++ b/drivers/gpu/drm/xe/xe_vram_types.h +@@ -0,0 +1,74 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright © 2025 Intel Corporation ++ */ ++ ++#ifndef _XE_VRAM_TYPES_H_ ++#define _XE_VRAM_TYPES_H_ ++ ++#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ++#include ++#endif ++ ++#include "xe_ttm_vram_mgr_types.h" ++ ++struct xe_tile; ++ ++/** ++ * struct xe_vram_region - memory region structure ++ * This is used to describe a memory region in xe ++ * device, such as HBM memory or CXL extension memory. ++ */ ++struct xe_vram_region { ++ /** @tile: Back pointer to tile */ ++ struct xe_tile *tile; ++ /** @io_start: IO start address of this VRAM instance */ ++ resource_size_t io_start; ++ /** ++ * @io_size: IO size of this VRAM instance ++ * ++ * This represents how much of this VRAM we can access ++ * via the CPU through the VRAM BAR. This can be smaller ++ * than @usable_size, in which case only part of VRAM is CPU ++ * accessible (typically the first 256M). This ++ * configuration is known as small-bar. ++ */ ++ resource_size_t io_size; ++ /** @dpa_base: This memory regions's DPA (device physical address) base */ ++ resource_size_t dpa_base; ++ /** ++ * @usable_size: usable size of VRAM ++ * ++ * Usable size of VRAM excluding reserved portions ++ * (e.g stolen mem) ++ */ ++ resource_size_t usable_size; ++ /** ++ * @actual_physical_size: Actual VRAM size ++ * ++ * Actual VRAM size including reserved portions ++ * (e.g stolen mem) ++ */ ++ resource_size_t actual_physical_size; ++ /** @mapping: pointer to VRAM mappable space */ ++ void __iomem *mapping; ++ /** @ttm: VRAM TTM manager */ ++ struct xe_ttm_vram_mgr ttm; ++#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ++ /** @pagemap: Used to remap device memory as ZONE_DEVICE */ ++ struct dev_pagemap pagemap; ++ /** ++ * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory ++ * pages of this tile. ++ */ ++ struct drm_pagemap dpagemap; ++ /** ++ * @hpa_base: base host physical address ++ * ++ * This is generated when remap device memory as ZONE_DEVICE ++ */ ++ resource_size_t hpa_base; ++#endif ++}; ++ ++#endif diff --git a/queue-6.17/drm-xe-unify-the-initialization-of-vram-regions.patch b/queue-6.17/drm-xe-unify-the-initialization-of-vram-regions.patch new file mode 100644 index 0000000000..74279072d6 --- /dev/null +++ b/queue-6.17/drm-xe-unify-the-initialization-of-vram-regions.patch @@ -0,0 +1,669 @@ +From stable+bounces-188343-greg=kroah.com@vger.kernel.org Tue Oct 21 15:37:38 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:34:26 -0400 +Subject: drm/xe: Unify the initialization of VRAM regions +To: stable@vger.kernel.org +Cc: "Piotr Piórkowski" , "Stuart Summers" , "Matthew Auld" , "Jani Nikula" , "Matthew Brost" , "Lucas De Marchi" , "Sasha Levin" +Message-ID: <20251021133427.2079917-4-sashal@kernel.org> + +From: Piotr Piórkowski + +[ Upstream commit 4b0a5f5ce7849aab7a67ba9f113ed75626f6de36 ] + +Currently in the drivers we have defined VRAM regions per device and per +tile. Initialization of these regions is done in two completely different +ways. To simplify the logic of the code and make it easier to add new +regions in the future, let's unify the way we initialize VRAM regions. + +v2: +- fix doc comments in struct xe_vram_region +- remove unnecessary includes (Jani) +v3: +- move code from xe_vram_init_regions_managers to xe_tile_init_noalloc + (Matthew) +- replace ioremap_wc to devm_ioremap_wc for mapping VRAM BAR + (Matthew) +- Replace the tile id parameter with vram region in the xe_pf_begin + function. +v4: +- remove tile back pointer from struct xe_vram_region +- add new back pointers: xe and migarte to xe_vram_region + +Signed-off-by: Piotr Piórkowski +Cc: Stuart Summers +Cc: Matthew Auld +Cc: Jani Nikula +Reviewed-by: Matthew Auld # rev3 +Acked-by: Matthew Brost +Link: https://lore.kernel.org/r/20250714184818.89201-6-piotr.piorkowski@intel.com +Signed-off-by: Lucas De Marchi +Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_bo.h | 4 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 13 +-- + drivers/gpu/drm/xe/xe_query.c | 3 + drivers/gpu/drm/xe/xe_svm.c | 43 ++++----- + drivers/gpu/drm/xe/xe_tile.c | 37 ++------ + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 16 ++- + drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 3 + drivers/gpu/drm/xe/xe_vram.c | 151 +++++++++++++++++++++-------------- + drivers/gpu/drm/xe/xe_vram.h | 2 + drivers/gpu/drm/xe/xe_vram_types.h | 17 +++ + 10 files changed, 164 insertions(+), 125 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_bo.h ++++ b/drivers/gpu/drm/xe/xe_bo.h +@@ -12,6 +12,7 @@ + #include "xe_macros.h" + #include "xe_vm_types.h" + #include "xe_vm.h" ++#include "xe_vram_types.h" + + #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ + +@@ -23,8 +24,9 @@ + #define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) + /* -- */ + #define XE_BO_FLAG_STOLEN BIT(4) ++#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id)) + #define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ +- XE_BO_FLAG_VRAM0 << (tile)->id : \ ++ XE_BO_FLAG_VRAM((tile)->mem.vram) : \ + XE_BO_FLAG_SYSTEM) + #define XE_BO_FLAG_GGTT BIT(5) + #define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) +--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c ++++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c +@@ -23,6 +23,7 @@ + #include "xe_svm.h" + #include "xe_trace_bo.h" + #include "xe_vm.h" ++#include "xe_vram_types.h" + + struct pagefault { + u64 page_addr; +@@ -74,7 +75,7 @@ static bool vma_is_valid(struct xe_tile + } + + static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, +- bool atomic, unsigned int id) ++ bool atomic, struct xe_vram_region *vram) + { + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); +@@ -84,14 +85,16 @@ static int xe_pf_begin(struct drm_exec * + if (err) + return err; + +- if (atomic && IS_DGFX(vm->xe)) { ++ if (atomic && vram) { ++ xe_assert(vm->xe, IS_DGFX(vm->xe)); ++ + if (xe_vma_is_userptr(vma)) { + err = -EACCES; + return err; + } + + /* Migrate to VRAM, move should invalidate the VMA first */ +- err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); ++ err = xe_bo_migrate(bo, vram->placement); + if (err) + return err; + } else if (bo) { +@@ -138,7 +141,7 @@ retry_userptr: + /* Lock VM and BOs dma-resv */ + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { +- err = xe_pf_begin(&exec, vma, atomic, tile->id); ++ err = xe_pf_begin(&exec, vma, atomic, tile->mem.vram); + drm_exec_retry_on_contention(&exec); + if (xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; +@@ -573,7 +576,7 @@ static int handle_acc(struct xe_gt *gt, + /* Lock VM and BOs dma-resv */ + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { +- ret = xe_pf_begin(&exec, vma, true, tile->id); ++ ret = xe_pf_begin(&exec, vma, true, tile->mem.vram); + drm_exec_retry_on_contention(&exec); + if (ret) + break; +--- a/drivers/gpu/drm/xe/xe_query.c ++++ b/drivers/gpu/drm/xe/xe_query.c +@@ -27,6 +27,7 @@ + #include "xe_oa.h" + #include "xe_pxp.h" + #include "xe_ttm_vram_mgr.h" ++#include "xe_vram_types.h" + #include "xe_wa.h" + + static const u16 xe_to_user_engine_class[] = { +@@ -407,7 +408,7 @@ static int query_gt_list(struct xe_devic + gt_list->gt_list[iter].near_mem_regions = 0x1; + else + gt_list->gt_list[iter].near_mem_regions = +- BIT(gt_to_tile(gt)->id) << 1; ++ BIT(gt_to_tile(gt)->mem.vram->id) << 1; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; + +--- a/drivers/gpu/drm/xe/xe_svm.c ++++ b/drivers/gpu/drm/xe/xe_svm.c +@@ -311,12 +311,11 @@ static u64 xe_vram_region_page_to_dpa(st + struct page *page) + { + u64 dpa; +- struct xe_tile *tile = vr->tile; + u64 pfn = page_to_pfn(page); + u64 offset; + +- xe_tile_assert(tile, is_device_private_page(page)); +- xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); ++ xe_assert(vr->xe, is_device_private_page(page)); ++ xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base); + + offset = (pfn << PAGE_SHIFT) - vr->hpa_base; + dpa = vr->dpa_base + offset; +@@ -333,7 +332,7 @@ static int xe_svm_copy(struct page **pag + unsigned long npages, const enum xe_svm_copy_dir dir) + { + struct xe_vram_region *vr = NULL; +- struct xe_tile *tile; ++ struct xe_device *xe; + struct dma_fence *fence = NULL; + unsigned long i; + #define XE_VRAM_ADDR_INVALID ~0x0ull +@@ -366,7 +365,7 @@ static int xe_svm_copy(struct page **pag + + if (!vr && spage) { + vr = page_to_vr(spage); +- tile = vr->tile; ++ xe = vr->xe; + } + XE_WARN_ON(spage && page_to_vr(spage) != vr); + +@@ -398,18 +397,18 @@ static int xe_svm_copy(struct page **pag + + if (vram_addr != XE_VRAM_ADDR_INVALID) { + if (sram) { +- vm_dbg(&tile->xe->drm, ++ vm_dbg(&xe->drm, + "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", + vram_addr, (u64)dma_addr[pos], i - pos + incr); +- __fence = xe_migrate_from_vram(tile->migrate, ++ __fence = xe_migrate_from_vram(vr->migrate, + i - pos + incr, + vram_addr, + dma_addr + pos); + } else { +- vm_dbg(&tile->xe->drm, ++ vm_dbg(&xe->drm, + "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", + (u64)dma_addr[pos], vram_addr, i - pos + incr); +- __fence = xe_migrate_to_vram(tile->migrate, ++ __fence = xe_migrate_to_vram(vr->migrate, + i - pos + incr, + dma_addr + pos, + vram_addr); +@@ -434,17 +433,17 @@ static int xe_svm_copy(struct page **pag + /* Extra mismatched device page, copy it */ + if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { + if (sram) { +- vm_dbg(&tile->xe->drm, ++ vm_dbg(&xe->drm, + "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", + vram_addr, (u64)dma_addr[pos], 1); +- __fence = xe_migrate_from_vram(tile->migrate, 1, ++ __fence = xe_migrate_from_vram(vr->migrate, 1, + vram_addr, + dma_addr + pos); + } else { +- vm_dbg(&tile->xe->drm, ++ vm_dbg(&xe->drm, + "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", + (u64)dma_addr[pos], vram_addr, 1); +- __fence = xe_migrate_to_vram(tile->migrate, 1, ++ __fence = xe_migrate_to_vram(vr->migrate, 1, + dma_addr + pos, + vram_addr); + } +@@ -502,9 +501,9 @@ static u64 block_offset_to_pfn(struct xe + return PHYS_PFN(offset + vr->hpa_base); + } + +-static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) ++static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) + { +- return &tile->mem.vram->ttm.mm; ++ return &vram->ttm.mm; + } + + static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, +@@ -518,8 +517,7 @@ static int xe_svm_populate_devmem_pfn(st + + list_for_each_entry(block, blocks, link) { + struct xe_vram_region *vr = block->private; +- struct xe_tile *tile = vr->tile; +- struct drm_buddy *buddy = tile_to_buddy(tile); ++ struct drm_buddy *buddy = vram_to_buddy(vr); + u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); + int i; + +@@ -685,8 +683,7 @@ static int xe_drm_pagemap_populate_mm(st + unsigned long timeslice_ms) + { + struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); +- struct xe_tile *tile = vr->tile; +- struct xe_device *xe = tile_to_xe(tile); ++ struct xe_device *xe = vr->xe; + struct device *dev = xe->drm.dev; + struct drm_buddy_block *block; + struct list_head *blocks; +@@ -700,9 +697,9 @@ static int xe_drm_pagemap_populate_mm(st + xe_pm_runtime_get(xe); + + retry: +- bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, ++ bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start, + ttm_bo_type_device, +- XE_BO_FLAG_VRAM_IF_DGFX(tile) | ++ (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | + XE_BO_FLAG_CPU_ADDR_MIRROR); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); +@@ -712,9 +709,7 @@ static int xe_drm_pagemap_populate_mm(st + } + + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, +- &dpagemap_devmem_ops, +- &tile->mem.vram->dpagemap, +- end - start); ++ &dpagemap_devmem_ops, dpagemap, end - start); + + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; + list_for_each_entry(block, blocks, link) +--- a/drivers/gpu/drm/xe/xe_tile.c ++++ b/drivers/gpu/drm/xe/xe_tile.c +@@ -7,6 +7,7 @@ + + #include + ++#include "xe_bo.h" + #include "xe_device.h" + #include "xe_ggtt.h" + #include "xe_gt.h" +@@ -114,11 +115,9 @@ int xe_tile_alloc_vram(struct xe_tile *t + if (!IS_DGFX(xe)) + return 0; + +- vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); +- if (!vram) +- return -ENOMEM; +- +- vram->tile = tile; ++ vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id); ++ if (IS_ERR(vram)) ++ return PTR_ERR(vram); + tile->mem.vram = vram; + + return 0; +@@ -156,21 +155,6 @@ int xe_tile_init_early(struct xe_tile *t + } + ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ + +-static int tile_ttm_mgr_init(struct xe_tile *tile) +-{ +- struct xe_device *xe = tile_to_xe(tile); +- int err; +- +- if (tile->mem.vram) { +- err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram->ttm); +- if (err) +- return err; +- xe->info.mem_region_mask |= BIT(tile->id) << 1; +- } +- +- return 0; +-} +- + /** + * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. + * @tile: The tile to initialize. +@@ -188,17 +172,20 @@ static int tile_ttm_mgr_init(struct xe_t + int xe_tile_init_noalloc(struct xe_tile *tile) + { + struct xe_device *xe = tile_to_xe(tile); +- int err; +- +- err = tile_ttm_mgr_init(tile); +- if (err) +- return err; + + xe_wa_apply_tile_workarounds(tile); + + if (xe->info.has_usm && IS_DGFX(xe)) + xe_devm_add(tile, tile->mem.vram); + ++ if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { ++ int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); ++ ++ if (err) ++ return err; ++ xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; ++ } ++ + return xe_tile_sysfs_init(tile); + } + +--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c ++++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +@@ -338,12 +338,18 @@ int __xe_ttm_vram_mgr_init(struct xe_dev + return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); + } + +-int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) ++/** ++ * xe_ttm_vram_mgr_init - initialize TTM VRAM region ++ * @xe: pointer to Xe device ++ * @vram: pointer to xe_vram_region that contains the memory region attributes ++ * ++ * Initialize the Xe TTM for given @vram region using the given parameters. ++ * ++ * Returns 0 for success, negative error code otherwise. ++ */ ++int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram) + { +- struct xe_device *xe = tile_to_xe(tile); +- struct xe_vram_region *vram = tile->mem.vram; +- +- return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, ++ return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement, + xe_vram_region_usable_size(vram), + xe_vram_region_io_size(vram), + PAGE_SIZE); +--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h ++++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +@@ -11,11 +11,12 @@ + enum dma_data_direction; + struct xe_device; + struct xe_tile; ++struct xe_vram_region; + + int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, + u32 mem_type, u64 size, u64 io_size, + u64 default_page_size); +-int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); ++int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram); + int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, +--- a/drivers/gpu/drm/xe/xe_vram.c ++++ b/drivers/gpu/drm/xe/xe_vram.c +@@ -20,6 +20,7 @@ + #include "xe_mmio.h" + #include "xe_module.h" + #include "xe_sriov.h" ++#include "xe_ttm_vram_mgr.h" + #include "xe_vram.h" + #include "xe_vram_types.h" + +@@ -138,7 +139,7 @@ static bool resource_is_valid(struct pci + return true; + } + +-static int determine_lmem_bar_size(struct xe_device *xe) ++static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar) + { + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + +@@ -149,17 +150,16 @@ static int determine_lmem_bar_size(struc + + resize_vram_bar(xe); + +- xe->mem.vram->io_start = pci_resource_start(pdev, LMEM_BAR); +- xe->mem.vram->io_size = pci_resource_len(pdev, LMEM_BAR); +- if (!xe->mem.vram->io_size) ++ lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); ++ lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); ++ if (!lmem_bar->io_size) + return -EIO; + + /* XXX: Need to change when xe link code is ready */ +- xe->mem.vram->dpa_base = 0; ++ lmem_bar->dpa_base = 0; + + /* set up a map to the total memory area. */ +- xe->mem.vram->mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram->io_start, +- xe->mem.vram->io_size); ++ lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size); + + return 0; + } +@@ -287,6 +287,67 @@ static void vram_fini(void *arg) + tile->mem.vram->mapping = NULL; + } + ++struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) ++{ ++ struct xe_vram_region *vram; ++ struct drm_device *drm = &xe->drm; ++ ++ xe_assert(xe, id < xe->info.tile_count); ++ ++ vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL); ++ if (!vram) ++ return NULL; ++ ++ vram->xe = xe; ++ vram->id = id; ++ vram->placement = placement; ++#if defined(CONFIG_DRM_XE_PAGEMAP) ++ vram->migrate = xe->tiles[id].migrate; ++#endif ++ return vram; ++} ++ ++static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram) ++{ ++ struct drm_device *drm = &xe->drm; ++ ++ if (vram->io_size < vram->usable_size) ++ drm_info(drm, "Small BAR device\n"); ++ ++ drm_info(drm, ++ "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", ++ vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size); ++ drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", ++ vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size, ++ &vram->io_start, vram->io_start + (u64)vram->io_size); ++} ++ ++static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram, ++ struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size, ++ u64 region_size, resource_size_t remain_io_size) ++{ ++ /* Check if VRAM region is already initialized */ ++ if (vram->mapping) ++ return 0; ++ ++ vram->actual_physical_size = region_size; ++ vram->io_start = lmem_bar->io_start + offset; ++ vram->io_size = min_t(u64, usable_size, remain_io_size); ++ ++ if (!vram->io_size) { ++ drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); ++ return -ENODEV; ++ } ++ ++ vram->dpa_base = lmem_bar->dpa_base + offset; ++ vram->mapping = lmem_bar->mapping + offset; ++ vram->usable_size = usable_size; ++ ++ print_vram_region_info(xe, vram); ++ ++ return 0; ++} ++ + /** + * xe_vram_probe() - Probe VRAM configuration + * @xe: the &xe_device +@@ -298,82 +359,52 @@ static void vram_fini(void *arg) + int xe_vram_probe(struct xe_device *xe) + { + struct xe_tile *tile; +- resource_size_t io_size; ++ struct xe_vram_region lmem_bar; ++ resource_size_t remain_io_size; + u64 available_size = 0; + u64 total_size = 0; +- u64 tile_offset; +- u64 tile_size; +- u64 vram_size; + int err; + u8 id; + + if (!IS_DGFX(xe)) + return 0; + +- /* Get the size of the root tile's vram for later accessibility comparison */ +- tile = xe_device_get_root_tile(xe); +- err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); ++ err = determine_lmem_bar_size(xe, &lmem_bar); + if (err) + return err; ++ drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size); + +- err = determine_lmem_bar_size(xe); +- if (err) +- return err; +- +- drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram->io_start, +- &xe->mem.vram->io_size); +- +- io_size = xe->mem.vram->io_size; ++ remain_io_size = lmem_bar.io_size; + +- /* tile specific ranges */ + for_each_tile(tile, xe, id) { +- err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); ++ u64 region_size; ++ u64 usable_size; ++ u64 tile_offset; ++ ++ err = tile_vram_size(tile, &usable_size, ®ion_size, &tile_offset); + if (err) + return err; + +- tile->mem.vram->actual_physical_size = tile_size; +- tile->mem.vram->io_start = xe->mem.vram->io_start + tile_offset; +- tile->mem.vram->io_size = min_t(u64, vram_size, io_size); +- +- if (!tile->mem.vram->io_size) { +- drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); +- return -ENODEV; +- } ++ total_size += region_size; ++ available_size += usable_size; + +- tile->mem.vram->dpa_base = xe->mem.vram->dpa_base + tile_offset; +- tile->mem.vram->usable_size = vram_size; +- tile->mem.vram->mapping = xe->mem.vram->mapping + tile_offset; +- +- if (tile->mem.vram->io_size < tile->mem.vram->usable_size) +- drm_info(&xe->drm, "Small BAR device\n"); +- drm_info(&xe->drm, +- "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", +- id, tile->id, &tile->mem.vram->actual_physical_size, +- &tile->mem.vram->usable_size, &tile->mem.vram->io_size); +- drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", +- id, tile->id, &tile->mem.vram->dpa_base, +- tile->mem.vram->dpa_base + (u64)tile->mem.vram->actual_physical_size, +- &tile->mem.vram->io_start, +- tile->mem.vram->io_start + (u64)tile->mem.vram->io_size); +- +- /* calculate total size using tile size to get the correct HW sizing */ +- total_size += tile_size; +- available_size += vram_size; ++ err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size, ++ region_size, remain_io_size); ++ if (err) ++ return err; + +- if (total_size > xe->mem.vram->io_size) { ++ if (total_size > lmem_bar.io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", +- &total_size, &xe->mem.vram->io_size); ++ &total_size, &lmem_bar.io_size); + } + +- io_size -= min_t(u64, tile_size, io_size); ++ remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size); + } + +- xe->mem.vram->actual_physical_size = total_size; +- +- drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram->io_start, +- &xe->mem.vram->actual_physical_size); +- drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram->io_start, +- &available_size); ++ err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size, ++ lmem_bar.io_size); ++ if (err) ++ return err; + + return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); + } +--- a/drivers/gpu/drm/xe/xe_vram.h ++++ b/drivers/gpu/drm/xe/xe_vram.h +@@ -13,6 +13,8 @@ struct xe_vram_region; + + int xe_vram_probe(struct xe_device *xe); + ++struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); ++ + resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); + resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); + resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); +--- a/drivers/gpu/drm/xe/xe_vram_types.h ++++ b/drivers/gpu/drm/xe/xe_vram_types.h +@@ -12,7 +12,8 @@ + + #include "xe_ttm_vram_mgr_types.h" + +-struct xe_tile; ++struct xe_device; ++struct xe_migrate; + + /** + * struct xe_vram_region - memory region structure +@@ -20,8 +21,14 @@ struct xe_tile; + * device, such as HBM memory or CXL extension memory. + */ + struct xe_vram_region { +- /** @tile: Back pointer to tile */ +- struct xe_tile *tile; ++ /** @xe: Back pointer to xe device */ ++ struct xe_device *xe; ++ /** ++ * @id: VRAM region instance id ++ * ++ * The value should be unique for VRAM region. ++ */ ++ u8 id; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** +@@ -54,7 +61,11 @@ struct xe_vram_region { + void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; ++ /** @placement: TTM placement dedicated for this region */ ++ u32 placement; + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ++ /** @migrate: Back pointer to migrate */ ++ struct xe_migrate *migrate; + /** @pagemap: Used to remap device memory as ZONE_DEVICE */ + struct dev_pagemap pagemap; + /** diff --git a/queue-6.17/drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch b/queue-6.17/drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch new file mode 100644 index 0000000000..f6476741bd --- /dev/null +++ b/queue-6.17/drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch @@ -0,0 +1,54 @@ +From stable+bounces-188340-greg=kroah.com@vger.kernel.org Tue Oct 21 15:35:39 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:34:23 -0400 +Subject: drm/xe: Use devm_ioremap_wc for VRAM mapping and drop manual unmap +To: stable@vger.kernel.org +Cc: "Piotr Piórkowski" , "Matthew Auld" , "Matthew Brost" , "Lucas De Marchi" , "Sasha Levin" +Message-ID: <20251021133427.2079917-1-sashal@kernel.org> + +From: Piotr Piórkowski + +[ Upstream commit 922ae875230be91c7f05f2aa90d176b6693e2601 ] + +Let's replace the manual call to ioremap_wc function with devm_ioremap_wc +function, ensuring that VRAM mappings are automatically released when +the driver is detached. +Since devm_ioremap_wc registers the mapping with the device's managed +resources, the explicit iounmap call in vram_fini is no longer needed, +so let's remove it. + +Signed-off-by: Piotr Piórkowski +Suggested-by: Matthew Auld +Reviewed-by: Matthew Auld +Acked-by: Matthew Brost +Link: https://lore.kernel.org/r/20250714184818.89201-2-piotr.piorkowski@intel.com +Signed-off-by: Lucas De Marchi +Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_vram.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_vram.c ++++ b/drivers/gpu/drm/xe/xe_vram.c +@@ -156,7 +156,8 @@ static int determine_lmem_bar_size(struc + xe->mem.vram.dpa_base = 0; + + /* set up a map to the total memory area. */ +- xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); ++ xe->mem.vram.mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram.io_start, ++ xe->mem.vram.io_size); + + return 0; + } +@@ -278,9 +279,6 @@ static void vram_fini(void *arg) + struct xe_tile *tile; + int id; + +- if (xe->mem.vram.mapping) +- iounmap(xe->mem.vram.mapping); +- + xe->mem.vram.mapping = NULL; + + for_each_tile(tile, xe, id) diff --git a/queue-6.17/drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch b/queue-6.17/drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch new file mode 100644 index 0000000000..4f52f60ded --- /dev/null +++ b/queue-6.17/drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch @@ -0,0 +1,724 @@ +From stable+bounces-188341-greg=kroah.com@vger.kernel.org Tue Oct 21 15:36:28 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 09:34:24 -0400 +Subject: drm/xe: Use dynamic allocation for tile and device VRAM region structures +To: stable@vger.kernel.org +Cc: "Piotr Piórkowski" , "Stuart Summers" , "Matthew Auld" , "Satyanarayana K V P" , "Matthew Brost" , "Lucas De Marchi" , "Sasha Levin" +Message-ID: <20251021133427.2079917-2-sashal@kernel.org> + +From: Piotr Piórkowski + +[ Upstream commit f92cfd72d9a650f90260c54accd840c6500c4c3a ] + +In future platforms, we will need to represent the device and tile +VRAM regions in a more dynamic way, so let's abandon the static +allocation of these structures and start use a dynamic allocation. + +v2: + - Add a helpers for accessing fields of the xe_vram_region structure +v3: +- Add missing EXPORT_SYMBOL_IF_KUNIT for + xe_vram_region_actual_physical_size + +Signed-off-by: Piotr Piórkowski +Cc: Stuart Summers +Cc: Matthew Auld +Cc: Satyanarayana K V P +Reviewed-by: Satyanarayana K V P +Acked-by: Matthew Brost +Link: https://lore.kernel.org/r/20250714184818.89201-3-piotr.piorkowski@intel.com +Signed-off-by: Lucas De Marchi +Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/display/xe_fb_pin.c | 4 + drivers/gpu/drm/xe/display/xe_plane_initial.c | 2 + drivers/gpu/drm/xe/xe_assert.h | 4 + drivers/gpu/drm/xe/xe_device.c | 19 ++++ + drivers/gpu/drm/xe/xe_device_types.h | 6 - + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 + drivers/gpu/drm/xe/xe_migrate.c | 25 +++-- + drivers/gpu/drm/xe/xe_pci.c | 6 + + drivers/gpu/drm/xe/xe_query.c | 2 + drivers/gpu/drm/xe/xe_svm.c | 24 +---- + drivers/gpu/drm/xe/xe_tile.c | 34 ++++++- + drivers/gpu/drm/xe/xe_tile.h | 4 + drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 10 +- + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 7 - + drivers/gpu/drm/xe/xe_vram.c | 121 +++++++++++++++++++------- + drivers/gpu/drm/xe/xe_vram.h | 9 + + 16 files changed, 202 insertions(+), 77 deletions(-) + +--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c ++++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c +@@ -289,7 +289,7 @@ static struct i915_vma *__xe_pin_fb_vma( + if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && + intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && + !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { +- struct xe_tile *tile = xe_device_get_root_tile(xe); ++ struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram; + + /* + * If we need to able to access the clear-color value stored in +@@ -297,7 +297,7 @@ static struct i915_vma *__xe_pin_fb_vma( + * accessible. This is important on small-bar systems where + * only some subset of VRAM is CPU accessible. + */ +- if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { ++ if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) { + ret = -EINVAL; + goto err; + } +--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c ++++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c +@@ -103,7 +103,7 @@ initial_plane_bo(struct xe_device *xe, + * We don't currently expect this to ever be placed in the + * stolen portion. + */ +- if (phys_base >= tile0->mem.vram.usable_size) { ++ if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) { + drm_err(&xe->drm, + "Initial plane programming using invalid range, phys_base=%pa\n", + &phys_base); +--- a/drivers/gpu/drm/xe/xe_assert.h ++++ b/drivers/gpu/drm/xe/xe_assert.h +@@ -12,6 +12,7 @@ + + #include "xe_gt_types.h" + #include "xe_step.h" ++#include "xe_vram.h" + + /** + * DOC: Xe Asserts +@@ -145,7 +146,8 @@ + const struct xe_tile *__tile = (tile); \ + char __buf[10] __maybe_unused; \ + xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ +- __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ ++ __tile->id, ({ string_get_size( \ ++ xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \ + STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ + }) + +--- a/drivers/gpu/drm/xe/xe_device.c ++++ b/drivers/gpu/drm/xe/xe_device.c +@@ -688,6 +688,21 @@ static void sriov_update_device_info(str + } + } + ++static int xe_device_vram_alloc(struct xe_device *xe) ++{ ++ struct xe_vram_region *vram; ++ ++ if (!IS_DGFX(xe)) ++ return 0; ++ ++ vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); ++ if (!vram) ++ return -ENOMEM; ++ ++ xe->mem.vram = vram; ++ return 0; ++} ++ + /** + * xe_device_probe_early: Device early probe + * @xe: xe device instance +@@ -735,6 +750,10 @@ int xe_device_probe_early(struct xe_devi + + xe->wedged.mode = xe_modparam.wedged_mode; + ++ err = xe_device_vram_alloc(xe); ++ if (err) ++ return err; ++ + return 0; + } + ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ +--- a/drivers/gpu/drm/xe/xe_device_types.h ++++ b/drivers/gpu/drm/xe/xe_device_types.h +@@ -77,6 +77,8 @@ struct xe_pxp; + * device, such as HBM memory or CXL extension memory. + */ + struct xe_vram_region { ++ /** @tile: Back pointer to tile */ ++ struct xe_tile *tile; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** +@@ -216,7 +218,7 @@ struct xe_tile { + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ +- struct xe_vram_region vram; ++ struct xe_vram_region *vram; + + /** @mem.ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; +@@ -412,7 +414,7 @@ struct xe_device { + /** @mem: memory info for device */ + struct { + /** @mem.vram: VRAM info for device */ +- struct xe_vram_region vram; ++ struct xe_vram_region *vram; + /** @mem.sys_mgr: system TTM manager */ + struct ttm_resource_manager sys_mgr; + /** @mem.sys_mgr: system memory shrinker. */ +--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c ++++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +@@ -1604,7 +1604,7 @@ static u64 pf_query_free_lmem(struct xe_ + { + struct xe_tile *tile = gt->tile; + +- return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); ++ return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager); + } + + static u64 pf_query_max_lmem(struct xe_gt *gt) +--- a/drivers/gpu/drm/xe/xe_migrate.c ++++ b/drivers/gpu/drm/xe/xe_migrate.c +@@ -34,6 +34,7 @@ + #include "xe_sync.h" + #include "xe_trace_bo.h" + #include "xe_vm.h" ++#include "xe_vram.h" + + /** + * struct xe_migrate - migrate context. +@@ -130,34 +131,36 @@ static u64 xe_migrate_vram_ofs(struct xe + u64 identity_offset = IDENTITY_OFFSET; + + if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) +- identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); ++ identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size ++ (xe->mem.vram), SZ_1G); + +- addr -= xe->mem.vram.dpa_base; ++ addr -= xe_vram_region_dpa_base(xe->mem.vram); + return addr + (identity_offset << xe_pt_shift(2)); + } + + static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, + u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) + { ++ struct xe_vram_region *vram = xe->mem.vram; ++ resource_size_t dpa_base = xe_vram_region_dpa_base(vram); + u64 pos, ofs, flags; + u64 entry; + /* XXX: Unclear if this should be usable_size? */ +- u64 vram_limit = xe->mem.vram.actual_physical_size + +- xe->mem.vram.dpa_base; ++ u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base; + u32 level = 2; + + ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; + flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, + true, 0); + +- xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); ++ xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M)); + + /* + * Use 1GB pages when possible, last chunk always use 2M + * pages as mixing reserved memory (stolen, WOCPM) with a single + * mapping is not allowed on certain platforms. + */ +- for (pos = xe->mem.vram.dpa_base; pos < vram_limit; ++ for (pos = dpa_base; pos < vram_limit; + pos += SZ_1G, ofs += 8) { + if (pos + SZ_1G >= vram_limit) { + entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, +@@ -307,11 +310,11 @@ static int xe_migrate_prepare_vm(struct + /* Identity map the entire vram at 256GiB offset */ + if (IS_DGFX(xe)) { + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; ++ resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram); + + xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, + pat_index, pt30_ofs); +- xe_assert(xe, xe->mem.vram.actual_physical_size <= +- (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); ++ xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); + + /* + * Identity map the entire vram for compressed pat_index for xe2+ +@@ -320,11 +323,11 @@ static int xe_migrate_prepare_vm(struct + if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { + u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; + u64 vram_offset = IDENTITY_OFFSET + +- DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); ++ DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G); + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; + +- xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - +- IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); ++ xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - ++ IDENTITY_OFFSET / 2) * SZ_1G); + xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, + comp_pat_index, pt31_ofs); + } +--- a/drivers/gpu/drm/xe/xe_pci.c ++++ b/drivers/gpu/drm/xe/xe_pci.c +@@ -687,6 +687,8 @@ static int xe_info_init(struct xe_device + * All of these together determine the overall GT count. + */ + for_each_tile(tile, xe, id) { ++ int err; ++ + gt = tile->primary_gt; + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; +@@ -694,6 +696,10 @@ static int xe_info_init(struct xe_device + gt->info.engine_mask = graphics_desc->hw_engine_mask; + xe->info.gt_count++; + ++ err = xe_tile_alloc_vram(tile); ++ if (err) ++ return err; ++ + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.engine_mask |= media_desc->hw_engine_mask; + +--- a/drivers/gpu/drm/xe/xe_query.c ++++ b/drivers/gpu/drm/xe/xe_query.c +@@ -334,7 +334,7 @@ static int query_config(struct xe_device + config->num_params = num_params; + config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + xe->info.devid | (xe->info.revid << 16); +- if (xe_device_get_root_tile(xe)->mem.vram.usable_size) ++ if (xe->mem.vram) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; + if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) +--- a/drivers/gpu/drm/xe/xe_svm.c ++++ b/drivers/gpu/drm/xe/xe_svm.c +@@ -306,16 +306,11 @@ static struct xe_vram_region *page_to_vr + return container_of(page_pgmap(page), struct xe_vram_region, pagemap); + } + +-static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) +-{ +- return container_of(vr, struct xe_tile, mem.vram); +-} +- + static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, + struct page *page) + { + u64 dpa; +- struct xe_tile *tile = vr_to_tile(vr); ++ struct xe_tile *tile = vr->tile; + u64 pfn = page_to_pfn(page); + u64 offset; + +@@ -370,7 +365,7 @@ static int xe_svm_copy(struct page **pag + + if (!vr && spage) { + vr = page_to_vr(spage); +- tile = vr_to_tile(vr); ++ tile = vr->tile; + } + XE_WARN_ON(spage && page_to_vr(spage) != vr); + +@@ -508,7 +503,7 @@ static u64 block_offset_to_pfn(struct xe + + static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) + { +- return &tile->mem.vram.ttm.mm; ++ return &tile->mem.vram->ttm.mm; + } + + static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, +@@ -522,7 +517,7 @@ static int xe_svm_populate_devmem_pfn(st + + list_for_each_entry(block, blocks, link) { + struct xe_vram_region *vr = block->private; +- struct xe_tile *tile = vr_to_tile(vr); ++ struct xe_tile *tile = vr->tile; + struct drm_buddy *buddy = tile_to_buddy(tile); + u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); + int i; +@@ -683,20 +678,15 @@ u64 xe_svm_find_vma_start(struct xe_vm * + } + + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +-static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) +-{ +- return &tile->mem.vram; +-} +- + static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) + { +- struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); ++ struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); ++ struct xe_tile *tile = vr->tile; + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; +- struct xe_vram_region *vr = tile_to_vr(tile); + struct drm_buddy_block *block; + struct list_head *blocks; + struct xe_bo *bo; +@@ -722,7 +712,7 @@ static int xe_drm_pagemap_populate_mm(st + + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, +- &tile->mem.vram.dpagemap, ++ &tile->mem.vram->dpagemap, + end - start); + + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; +--- a/drivers/gpu/drm/xe/xe_tile.c ++++ b/drivers/gpu/drm/xe/xe_tile.c +@@ -19,6 +19,7 @@ + #include "xe_tile_sysfs.h" + #include "xe_ttm_vram_mgr.h" + #include "xe_wa.h" ++#include "xe_vram.h" + + /** + * DOC: Multi-tile Design +@@ -96,6 +97,33 @@ static int xe_tile_alloc(struct xe_tile + } + + /** ++ * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation ++ * @tile: Tile to perform allocations for ++ * ++ * Allocates VRAM per-tile data structures using DRM-managed allocations. ++ * Does not touch the hardware. ++ * ++ * Returns -ENOMEM if allocations fail, otherwise 0. ++ */ ++int xe_tile_alloc_vram(struct xe_tile *tile) ++{ ++ struct xe_device *xe = tile_to_xe(tile); ++ struct xe_vram_region *vram; ++ ++ if (!IS_DGFX(xe)) ++ return 0; ++ ++ vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); ++ if (!vram) ++ return -ENOMEM; ++ ++ vram->tile = tile; ++ tile->mem.vram = vram; ++ ++ return 0; ++} ++ ++/** + * xe_tile_init_early - Initialize the tile and primary GT + * @tile: Tile to initialize + * @xe: Parent Xe device +@@ -132,8 +160,8 @@ static int tile_ttm_mgr_init(struct xe_t + struct xe_device *xe = tile_to_xe(tile); + int err; + +- if (tile->mem.vram.usable_size) { +- err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); ++ if (tile->mem.vram) { ++ err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram->ttm); + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->id) << 1; +@@ -168,7 +196,7 @@ int xe_tile_init_noalloc(struct xe_tile + xe_wa_apply_tile_workarounds(tile); + + if (xe->info.has_usm && IS_DGFX(xe)) +- xe_devm_add(tile, &tile->mem.vram); ++ xe_devm_add(tile, tile->mem.vram); + + return xe_tile_sysfs_init(tile); + } +--- a/drivers/gpu/drm/xe/xe_tile.h ++++ b/drivers/gpu/drm/xe/xe_tile.h +@@ -14,12 +14,14 @@ int xe_tile_init_early(struct xe_tile *t + int xe_tile_init_noalloc(struct xe_tile *tile); + int xe_tile_init(struct xe_tile *tile); + ++int xe_tile_alloc_vram(struct xe_tile *tile); ++ + void xe_tile_migrate_wait(struct xe_tile *tile); + + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) + { +- return &tile->mem.vram.dpagemap; ++ return &tile->mem.vram->dpagemap; + } + #else + static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c ++++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +@@ -25,6 +25,7 @@ + #include "xe_ttm_stolen_mgr.h" + #include "xe_ttm_vram_mgr.h" + #include "xe_wa.h" ++#include "xe_vram.h" + + struct xe_ttm_stolen_mgr { + struct xe_ttm_vram_mgr base; +@@ -82,15 +83,16 @@ static u32 get_wopcm_size(struct xe_devi + + static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) + { +- struct xe_tile *tile = xe_device_get_root_tile(xe); ++ struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; ++ resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u64 stolen_size, wopcm_size; + u64 tile_offset; + u64 tile_size; + +- tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; +- tile_size = tile->mem.vram.actual_physical_size; ++ tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); ++ tile_size = xe_vram_region_actual_physical_size(tile_vram); + + /* Use DSM base address instead for stolen memory */ + mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; +@@ -107,7 +109,7 @@ static s64 detect_bar2_dgfx(struct xe_de + + /* Verify usage fits in the actual resource available */ + if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) +- mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; ++ mgr->io_base = tile_io_start + mgr->stolen_base; + + /* + * There may be few KB of platform dependent reserved memory at the end +--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c ++++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +@@ -340,10 +340,11 @@ int __xe_ttm_vram_mgr_init(struct xe_dev + int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) + { + struct xe_device *xe = tile_to_xe(tile); +- struct xe_vram_region *vram = &tile->mem.vram; ++ struct xe_vram_region *vram = tile->mem.vram; + + return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, +- vram->usable_size, vram->io_size, ++ xe_vram_region_usable_size(vram), ++ xe_vram_region_io_size(vram), + PAGE_SIZE); + } + +@@ -392,7 +393,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_ + */ + xe_res_first(res, offset, length, &cursor); + for_each_sgtable_sg((*sgt), sg, i) { +- phys_addr_t phys = cursor.start + tile->mem.vram.io_start; ++ phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram); + size_t size = min_t(u64, cursor.size, SZ_2G); + dma_addr_t addr; + +--- a/drivers/gpu/drm/xe/xe_vram.c ++++ b/drivers/gpu/drm/xe/xe_vram.c +@@ -3,6 +3,7 @@ + * Copyright © 2021-2024 Intel Corporation + */ + ++#include + #include + + #include +@@ -147,17 +148,17 @@ static int determine_lmem_bar_size(struc + + resize_vram_bar(xe); + +- xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); +- xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); +- if (!xe->mem.vram.io_size) ++ xe->mem.vram->io_start = pci_resource_start(pdev, LMEM_BAR); ++ xe->mem.vram->io_size = pci_resource_len(pdev, LMEM_BAR); ++ if (!xe->mem.vram->io_size) + return -EIO; + + /* XXX: Need to change when xe link code is ready */ +- xe->mem.vram.dpa_base = 0; ++ xe->mem.vram->dpa_base = 0; + + /* set up a map to the total memory area. */ +- xe->mem.vram.mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram.io_start, +- xe->mem.vram.io_size); ++ xe->mem.vram->mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram->io_start, ++ xe->mem.vram->io_size); + + return 0; + } +@@ -279,10 +280,10 @@ static void vram_fini(void *arg) + struct xe_tile *tile; + int id; + +- xe->mem.vram.mapping = NULL; ++ xe->mem.vram->mapping = NULL; + + for_each_tile(tile, xe, id) +- tile->mem.vram.mapping = NULL; ++ tile->mem.vram->mapping = NULL; + } + + /** +@@ -318,10 +319,10 @@ int xe_vram_probe(struct xe_device *xe) + if (err) + return err; + +- drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, +- &xe->mem.vram.io_size); ++ drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram->io_start, ++ &xe->mem.vram->io_size); + +- io_size = xe->mem.vram.io_size; ++ io_size = xe->mem.vram->io_size; + + /* tile specific ranges */ + for_each_tile(tile, xe, id) { +@@ -329,45 +330,105 @@ int xe_vram_probe(struct xe_device *xe) + if (err) + return err; + +- tile->mem.vram.actual_physical_size = tile_size; +- tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; +- tile->mem.vram.io_size = min_t(u64, vram_size, io_size); ++ tile->mem.vram->actual_physical_size = tile_size; ++ tile->mem.vram->io_start = xe->mem.vram->io_start + tile_offset; ++ tile->mem.vram->io_size = min_t(u64, vram_size, io_size); + +- if (!tile->mem.vram.io_size) { ++ if (!tile->mem.vram->io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + +- tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; +- tile->mem.vram.usable_size = vram_size; +- tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; ++ tile->mem.vram->dpa_base = xe->mem.vram->dpa_base + tile_offset; ++ tile->mem.vram->usable_size = vram_size; ++ tile->mem.vram->mapping = xe->mem.vram->mapping + tile_offset; + +- if (tile->mem.vram.io_size < tile->mem.vram.usable_size) ++ if (tile->mem.vram->io_size < tile->mem.vram->usable_size) + drm_info(&xe->drm, "Small BAR device\n"); +- drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, +- tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); +- drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, +- &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, +- &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); ++ drm_info(&xe->drm, ++ "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", ++ id, tile->id, &tile->mem.vram->actual_physical_size, ++ &tile->mem.vram->usable_size, &tile->mem.vram->io_size); ++ drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", ++ id, tile->id, &tile->mem.vram->dpa_base, ++ tile->mem.vram->dpa_base + (u64)tile->mem.vram->actual_physical_size, ++ &tile->mem.vram->io_start, ++ tile->mem.vram->io_start + (u64)tile->mem.vram->io_size); + + /* calculate total size using tile size to get the correct HW sizing */ + total_size += tile_size; + available_size += vram_size; + +- if (total_size > xe->mem.vram.io_size) { ++ if (total_size > xe->mem.vram->io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", +- &total_size, &xe->mem.vram.io_size); ++ &total_size, &xe->mem.vram->io_size); + } + + io_size -= min_t(u64, tile_size, io_size); + } + +- xe->mem.vram.actual_physical_size = total_size; ++ xe->mem.vram->actual_physical_size = total_size; + +- drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, +- &xe->mem.vram.actual_physical_size); +- drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, ++ drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram->io_start, ++ &xe->mem.vram->actual_physical_size); ++ drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram->io_start, + &available_size); + + return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); + } ++ ++/** ++ * xe_vram_region_io_start - Get the IO start of a VRAM region ++ * @vram: the VRAM region ++ * ++ * Return: the IO start of the VRAM region, or 0 if not valid ++ */ ++resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram) ++{ ++ return vram ? vram->io_start : 0; ++} ++ ++/** ++ * xe_vram_region_io_size - Get the IO size of a VRAM region ++ * @vram: the VRAM region ++ * ++ * Return: the IO size of the VRAM region, or 0 if not valid ++ */ ++resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram) ++{ ++ return vram ? vram->io_size : 0; ++} ++ ++/** ++ * xe_vram_region_dpa_base - Get the DPA base of a VRAM region ++ * @vram: the VRAM region ++ * ++ * Return: the DPA base of the VRAM region, or 0 if not valid ++ */ ++resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram) ++{ ++ return vram ? vram->dpa_base : 0; ++} ++ ++/** ++ * xe_vram_region_usable_size - Get the usable size of a VRAM region ++ * @vram: the VRAM region ++ * ++ * Return: the usable size of the VRAM region, or 0 if not valid ++ */ ++resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram) ++{ ++ return vram ? vram->usable_size : 0; ++} ++ ++/** ++ * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region ++ * @vram: the VRAM region ++ * ++ * Return: the actual physical size of the VRAM region, or 0 if not valid ++ */ ++resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram) ++{ ++ return vram ? vram->actual_physical_size : 0; ++} ++EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size); +--- a/drivers/gpu/drm/xe/xe_vram.h ++++ b/drivers/gpu/drm/xe/xe_vram.h +@@ -6,8 +6,17 @@ + #ifndef _XE_VRAM_H_ + #define _XE_VRAM_H_ + ++#include ++ + struct xe_device; ++struct xe_vram_region; + + int xe_vram_probe(struct xe_device *xe); + ++resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); ++resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); ++resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); ++resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram); ++resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram); ++ + #endif diff --git a/queue-6.17/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch b/queue-6.17/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch new file mode 100644 index 0000000000..0660fa2a1e --- /dev/null +++ b/queue-6.17/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch @@ -0,0 +1,223 @@ +From 42520df65bf67189541a425f7d36b0b3e7bd7844 Mon Sep 17 00:00:00 2001 +From: Viacheslav Dubeyko +Date: Fri, 19 Sep 2025 12:12:44 -0700 +Subject: hfsplus: fix slab-out-of-bounds read in hfsplus_strcasecmp() + +From: Viacheslav Dubeyko + +commit 42520df65bf67189541a425f7d36b0b3e7bd7844 upstream. + +The hfsplus_strcasecmp() logic can trigger the issue: + +[ 117.317703][ T9855] ================================================================== +[ 117.318353][ T9855] BUG: KASAN: slab-out-of-bounds in hfsplus_strcasecmp+0x1bc/0x490 +[ 117.318991][ T9855] Read of size 2 at addr ffff88802160f40c by task repro/9855 +[ 117.319577][ T9855] +[ 117.319773][ T9855] CPU: 0 UID: 0 PID: 9855 Comm: repro Not tainted 6.17.0-rc6 #33 PREEMPT(full) +[ 117.319780][ T9855] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +[ 117.319783][ T9855] Call Trace: +[ 117.319785][ T9855] +[ 117.319788][ T9855] dump_stack_lvl+0x1c1/0x2a0 +[ 117.319795][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319803][ T9855] ? __pfx_dump_stack_lvl+0x10/0x10 +[ 117.319808][ T9855] ? rcu_is_watching+0x15/0xb0 +[ 117.319816][ T9855] ? lock_release+0x4b/0x3e0 +[ 117.319821][ T9855] ? __kasan_check_byte+0x12/0x40 +[ 117.319828][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319835][ T9855] ? __virt_addr_valid+0x4a5/0x5c0 +[ 117.319842][ T9855] print_report+0x17e/0x7e0 +[ 117.319848][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319855][ T9855] ? __virt_addr_valid+0x4a5/0x5c0 +[ 117.319862][ T9855] ? __phys_addr+0xd3/0x180 +[ 117.319869][ T9855] ? hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319876][ T9855] kasan_report+0x147/0x180 +[ 117.319882][ T9855] ? hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319891][ T9855] hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319900][ T9855] ? __pfx_hfsplus_cat_case_cmp_key+0x10/0x10 +[ 117.319906][ T9855] hfs_find_rec_by_key+0xa9/0x1e0 +[ 117.319913][ T9855] __hfsplus_brec_find+0x18e/0x470 +[ 117.319920][ T9855] ? __pfx_hfsplus_bnode_find+0x10/0x10 +[ 117.319926][ T9855] ? __pfx_hfs_find_rec_by_key+0x10/0x10 +[ 117.319933][ T9855] ? __pfx___hfsplus_brec_find+0x10/0x10 +[ 117.319942][ T9855] hfsplus_brec_find+0x28f/0x510 +[ 117.319949][ T9855] ? __pfx_hfs_find_rec_by_key+0x10/0x10 +[ 117.319956][ T9855] ? __pfx_hfsplus_brec_find+0x10/0x10 +[ 117.319963][ T9855] ? __kmalloc_noprof+0x2a9/0x510 +[ 117.319969][ T9855] ? hfsplus_find_init+0x8c/0x1d0 +[ 117.319976][ T9855] hfsplus_brec_read+0x2b/0x120 +[ 117.319983][ T9855] hfsplus_lookup+0x2aa/0x890 +[ 117.319990][ T9855] ? __pfx_hfsplus_lookup+0x10/0x10 +[ 117.320003][ T9855] ? d_alloc_parallel+0x2f0/0x15e0 +[ 117.320008][ T9855] ? __lock_acquire+0xaec/0xd80 +[ 117.320013][ T9855] ? __pfx_d_alloc_parallel+0x10/0x10 +[ 117.320019][ T9855] ? __raw_spin_lock_init+0x45/0x100 +[ 117.320026][ T9855] ? __init_waitqueue_head+0xa9/0x150 +[ 117.320034][ T9855] __lookup_slow+0x297/0x3d0 +[ 117.320039][ T9855] ? __pfx___lookup_slow+0x10/0x10 +[ 117.320045][ T9855] ? down_read+0x1ad/0x2e0 +[ 117.320055][ T9855] lookup_slow+0x53/0x70 +[ 117.320065][ T9855] walk_component+0x2f0/0x430 +[ 117.320073][ T9855] path_lookupat+0x169/0x440 +[ 117.320081][ T9855] filename_lookup+0x212/0x590 +[ 117.320089][ T9855] ? __pfx_filename_lookup+0x10/0x10 +[ 117.320098][ T9855] ? strncpy_from_user+0x150/0x290 +[ 117.320105][ T9855] ? getname_flags+0x1e5/0x540 +[ 117.320112][ T9855] user_path_at+0x3a/0x60 +[ 117.320117][ T9855] __x64_sys_umount+0xee/0x160 +[ 117.320123][ T9855] ? __pfx___x64_sys_umount+0x10/0x10 +[ 117.320129][ T9855] ? do_syscall_64+0xb7/0x3a0 +[ 117.320135][ T9855] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320141][ T9855] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320145][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.320150][ T9855] ? exc_page_fault+0x9f/0xf0 +[ 117.320154][ T9855] entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320158][ T9855] RIP: 0033:0x7f7dd7908b07 +[ 117.320163][ T9855] Code: 23 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 08 +[ 117.320167][ T9855] RSP: 002b:00007ffd5ebd9698 EFLAGS: 00000202 ORIG_RAX: 00000000000000a6 +[ 117.320172][ T9855] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7dd7908b07 +[ 117.320176][ T9855] RDX: 0000000000000009 RSI: 0000000000000009 RDI: 00007ffd5ebd9740 +[ 117.320179][ T9855] RBP: 00007ffd5ebda780 R08: 0000000000000005 R09: 00007ffd5ebd9530 +[ 117.320181][ T9855] R10: 00007f7dd799bfc0 R11: 0000000000000202 R12: 000055e2008b32d0 +[ 117.320184][ T9855] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 +[ 117.320189][ T9855] +[ 117.320190][ T9855] +[ 117.351311][ T9855] Allocated by task 9855: +[ 117.351683][ T9855] kasan_save_track+0x3e/0x80 +[ 117.352093][ T9855] __kasan_kmalloc+0x8d/0xa0 +[ 117.352490][ T9855] __kmalloc_noprof+0x288/0x510 +[ 117.352914][ T9855] hfsplus_find_init+0x8c/0x1d0 +[ 117.353342][ T9855] hfsplus_lookup+0x19c/0x890 +[ 117.353747][ T9855] __lookup_slow+0x297/0x3d0 +[ 117.354148][ T9855] lookup_slow+0x53/0x70 +[ 117.354514][ T9855] walk_component+0x2f0/0x430 +[ 117.354921][ T9855] path_lookupat+0x169/0x440 +[ 117.355325][ T9855] filename_lookup+0x212/0x590 +[ 117.355740][ T9855] user_path_at+0x3a/0x60 +[ 117.356115][ T9855] __x64_sys_umount+0xee/0x160 +[ 117.356529][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.356920][ T9855] entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.357429][ T9855] +[ 117.357636][ T9855] The buggy address belongs to the object at ffff88802160f000 +[ 117.357636][ T9855] which belongs to the cache kmalloc-2k of size 2048 +[ 117.358827][ T9855] The buggy address is located 0 bytes to the right of +[ 117.358827][ T9855] allocated 1036-byte region [ffff88802160f000, ffff88802160f40c) +[ 117.360061][ T9855] +[ 117.360266][ T9855] The buggy address belongs to the physical page: +[ 117.360813][ T9855] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x21608 +[ 117.361562][ T9855] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +[ 117.362285][ T9855] flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) +[ 117.362929][ T9855] page_type: f5(slab) +[ 117.363282][ T9855] raw: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002 +[ 117.364015][ T9855] raw: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000 +[ 117.364750][ T9855] head: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002 +[ 117.365491][ T9855] head: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000 +[ 117.366232][ T9855] head: 00fff00000000003 ffffea0000858201 00000000ffffffff 00000000ffffffff +[ 117.366968][ T9855] head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008 +[ 117.367711][ T9855] page dumped because: kasan: bad access detected +[ 117.368259][ T9855] page_owner tracks the page as allocated +[ 117.368745][ T9855] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN1 +[ 117.370541][ T9855] post_alloc_hook+0x240/0x2a0 +[ 117.370954][ T9855] get_page_from_freelist+0x2101/0x21e0 +[ 117.371435][ T9855] __alloc_frozen_pages_noprof+0x274/0x380 +[ 117.371935][ T9855] alloc_pages_mpol+0x241/0x4b0 +[ 117.372360][ T9855] allocate_slab+0x8d/0x380 +[ 117.372752][ T9855] ___slab_alloc+0xbe3/0x1400 +[ 117.373159][ T9855] __kmalloc_cache_noprof+0x296/0x3d0 +[ 117.373621][ T9855] nexthop_net_init+0x75/0x100 +[ 117.374038][ T9855] ops_init+0x35c/0x5c0 +[ 117.374400][ T9855] setup_net+0x10c/0x320 +[ 117.374768][ T9855] copy_net_ns+0x31b/0x4d0 +[ 117.375156][ T9855] create_new_namespaces+0x3f3/0x720 +[ 117.375613][ T9855] unshare_nsproxy_namespaces+0x11c/0x170 +[ 117.376094][ T9855] ksys_unshare+0x4ca/0x8d0 +[ 117.376477][ T9855] __x64_sys_unshare+0x38/0x50 +[ 117.376879][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.377265][ T9855] page last free pid 9110 tgid 9110 stack trace: +[ 117.377795][ T9855] __free_frozen_pages+0xbeb/0xd50 +[ 117.378229][ T9855] __put_partials+0x152/0x1a0 +[ 117.378625][ T9855] put_cpu_partial+0x17c/0x250 +[ 117.379026][ T9855] __slab_free+0x2d4/0x3c0 +[ 117.379404][ T9855] qlist_free_all+0x97/0x140 +[ 117.379790][ T9855] kasan_quarantine_reduce+0x148/0x160 +[ 117.380250][ T9855] __kasan_slab_alloc+0x22/0x80 +[ 117.380662][ T9855] __kmalloc_noprof+0x232/0x510 +[ 117.381074][ T9855] tomoyo_supervisor+0xc0a/0x1360 +[ 117.381498][ T9855] tomoyo_env_perm+0x149/0x1e0 +[ 117.381903][ T9855] tomoyo_find_next_domain+0x15ad/0x1b90 +[ 117.382378][ T9855] tomoyo_bprm_check_security+0x11c/0x180 +[ 117.382859][ T9855] security_bprm_check+0x89/0x280 +[ 117.383289][ T9855] bprm_execve+0x8f1/0x14a0 +[ 117.383673][ T9855] do_execveat_common+0x528/0x6b0 +[ 117.384103][ T9855] __x64_sys_execve+0x94/0xb0 +[ 117.384500][ T9855] +[ 117.384706][ T9855] Memory state around the buggy address: +[ 117.385179][ T9855] ffff88802160f300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 117.385854][ T9855] ffff88802160f380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 117.386534][ T9855] >ffff88802160f400: 00 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.387204][ T9855] ^ +[ 117.387566][ T9855] ffff88802160f480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.388243][ T9855] ffff88802160f500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.388918][ T9855] ================================================================== + +The issue takes place if the length field of struct hfsplus_unistr +is bigger than HFSPLUS_MAX_STRLEN. The patch simply checks +the length of comparing strings. And if the strings' length +is bigger than HFSPLUS_MAX_STRLEN, then it is corrected +to this value. + +v2 +The string length correction has been added for hfsplus_strcmp(). + +Reported-by: Jiaming Zhang +Signed-off-by: Viacheslav Dubeyko +cc: John Paul Adrian Glaubitz +cc: Yangtao Li +cc: linux-fsdevel@vger.kernel.org +cc: syzkaller@googlegroups.com +Link: https://lore.kernel.org/r/20250919191243.1370388-1-slava@dubeyko.com +Signed-off-by: Viacheslav Dubeyko +Signed-off-by: Greg Kroah-Hartman +--- + fs/hfsplus/unicode.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/fs/hfsplus/unicode.c ++++ b/fs/hfsplus/unicode.c +@@ -40,6 +40,18 @@ int hfsplus_strcasecmp(const struct hfsp + p1 = s1->unicode; + p2 = s2->unicode; + ++ if (len1 > HFSPLUS_MAX_STRLEN) { ++ len1 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s1->length), len1); ++ } ++ ++ if (len2 > HFSPLUS_MAX_STRLEN) { ++ len2 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s2->length), len2); ++ } ++ + while (1) { + c1 = c2 = 0; + +@@ -74,6 +86,18 @@ int hfsplus_strcmp(const struct hfsplus_ + p1 = s1->unicode; + p2 = s2->unicode; + ++ if (len1 > HFSPLUS_MAX_STRLEN) { ++ len1 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s1->length), len1); ++ } ++ ++ if (len2 > HFSPLUS_MAX_STRLEN) { ++ len2 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s2->length), len2); ++ } ++ + for (len = min(len1, len2); len > 0; len--) { + c1 = be16_to_cpu(*p1); + c2 = be16_to_cpu(*p2); diff --git a/queue-6.17/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch b/queue-6.17/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch new file mode 100644 index 0000000000..e790217174 --- /dev/null +++ b/queue-6.17/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch @@ -0,0 +1,49 @@ +From stable+bounces-188242-greg=kroah.com@vger.kernel.org Mon Oct 20 21:59:03 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 15:58:46 -0400 +Subject: NFSD: Define a proc_layoutcommit for the FlexFiles layout type +To: stable@vger.kernel.org +Cc: Chuck Lever , Robert Morris , Thomas Haynes , Sasha Levin +Message-ID: <20251020195846.1896208-3-sashal@kernel.org> + +From: Chuck Lever + +[ Upstream commit 4b47a8601b71ad98833b447d465592d847b4dc77 ] + +Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT +operation on a FlexFiles layout. + +Reported-by: Robert Morris +Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t +Cc: Thomas Haynes +Cc: stable@vger.kernel.org +Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server") +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/flexfilelayout.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/nfsd/flexfilelayout.c ++++ b/fs/nfsd/flexfilelayout.c +@@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super + return 0; + } + ++static __be32 ++nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, ++ struct nfsd4_layoutcommit *lcp) ++{ ++ return nfs_ok; ++} ++ + const struct nfsd4_layout_ops ff_layout_ops = { + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, +@@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ + .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, + .proc_layoutget = nfsd4_ff_proc_layoutget, + .encode_layoutget = nfsd4_ff_encode_layoutget, ++ .proc_layoutcommit = nfsd4_ff_proc_layoutcommit, + }; diff --git a/queue-6.17/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch b/queue-6.17/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch new file mode 100644 index 0000000000..9ff671e616 --- /dev/null +++ b/queue-6.17/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch @@ -0,0 +1,113 @@ +From stable+bounces-188064-greg=kroah.com@vger.kernel.org Mon Oct 20 14:52:53 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:52:25 -0400 +Subject: NFSD: Fix last write offset handling in layoutcommit +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Konstantin Evtushenko , Christoph Hellwig , Jeff Layton , Chuck Lever , Sasha Levin +Message-ID: <20251020125226.1759978-4-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit d68886bae76a4b9b3484d23e5b7df086f940fa38 ] + +The data type of loca_last_write_offset is newoffset4 and is switched +on a boolean value, no_newoffset, that indicates if a previous write +occurred or not. If no_newoffset is FALSE, an offset is not given. +This means that client does not try to update the file size. Thus, +server should not try to calculate new file size and check if it fits +into the segment range. See RFC 8881, section 12.5.4.2. + +Sometimes the current incorrect logic may cause clients to hang when +trying to sync an inode. If layoutcommit fails, the client marks the +inode as dirty again. + +Fixes: 9cf514ccfacb ("nfsd: implement pNFS operations") +Cc: stable@vger.kernel.org +Co-developed-by: Konstantin Evtushenko +Signed-off-by: Konstantin Evtushenko +Signed-off-by: Sergey Bashirov +Reviewed-by: Christoph Hellwig +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 5 ++--- + fs/nfsd/nfs4proc.c | 30 +++++++++++++++--------------- + 2 files changed, 17 insertions(+), 18 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -118,7 +118,6 @@ nfsd4_block_commit_blocks(struct inode * + struct iomap *iomaps, int nr_iomaps) + { + struct timespec64 mtime = inode_get_mtime(inode); +- loff_t new_size = lcp->lc_last_wr + 1; + struct iattr iattr = { .ia_valid = 0 }; + int error; + +@@ -128,9 +127,9 @@ nfsd4_block_commit_blocks(struct inode * + iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; + +- if (new_size > i_size_read(inode)) { ++ if (lcp->lc_size_chg) { + iattr.ia_valid |= ATTR_SIZE; +- iattr.ia_size = new_size; ++ iattr.ia_size = lcp->lc_newsize; + } + + error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2504,7 +2504,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + const struct nfsd4_layout_seg *seg = &lcp->lc_seg; + struct svc_fh *current_fh = &cstate->current_fh; + const struct nfsd4_layout_ops *ops; +- loff_t new_size = lcp->lc_last_wr + 1; + struct inode *inode; + struct nfs4_layout_stateid *ls; + __be32 nfserr; +@@ -2520,13 +2519,21 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + goto out; + inode = d_inode(current_fh->fh_dentry); + +- nfserr = nfserr_inval; +- if (new_size <= seg->offset) +- goto out; +- if (new_size > seg->offset + seg->length) +- goto out; +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) +- goto out; ++ lcp->lc_size_chg = false; ++ if (lcp->lc_newoffset) { ++ loff_t new_size = lcp->lc_last_wr + 1; ++ ++ nfserr = nfserr_inval; ++ if (new_size <= seg->offset) ++ goto out; ++ if (new_size > seg->offset + seg->length) ++ goto out; ++ ++ if (new_size > i_size_read(inode)) { ++ lcp->lc_size_chg = true; ++ lcp->lc_newsize = new_size; ++ } ++ } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, +@@ -2542,13 +2549,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + +- if (new_size > i_size_read(inode)) { +- lcp->lc_size_chg = true; +- lcp->lc_newsize = new_size; +- } else { +- lcp->lc_size_chg = false; +- } +- + nfserr = ops->proc_layoutcommit(inode, rqstp, lcp); + nfs4_put_stid(&ls->ls_stid); + out: diff --git a/queue-6.17/nfsd-implement-large-extent-array-support-in-pnfs.patch b/queue-6.17/nfsd-implement-large-extent-array-support-in-pnfs.patch new file mode 100644 index 0000000000..9a685853ac --- /dev/null +++ b/queue-6.17/nfsd-implement-large-extent-array-support-in-pnfs.patch @@ -0,0 +1,335 @@ +From stable+bounces-188063-greg=kroah.com@vger.kernel.org Mon Oct 20 14:52:45 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:52:24 -0400 +Subject: NFSD: Implement large extent array support in pNFS +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Konstantin Evtushenko , Jeff Layton , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125226.1759978-3-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit f963cf2b91a30b5614c514f3ad53ca124cb65280 ] + +When pNFS client in the block or scsi layout mode sends layoutcommit +to MDS, a variable length array of modified extents is supplied within +the request. This patch allows the server to accept such extent arrays +if they do not fit within single memory page. + +The issue can be reproduced when writing to a 1GB file using FIO with +O_DIRECT, 4K block and large I/O depth without preallocation of the +file. In this case, the server returns NFSERR_BADXDR to the client. + +Co-developed-by: Konstantin Evtushenko +Signed-off-by: Konstantin Evtushenko +Signed-off-by: Sergey Bashirov +Reviewed-by: Jeff Layton +Reviewed-by: Christoph Hellwig +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 20 ++++++----- + fs/nfsd/blocklayoutxdr.c | 83 +++++++++++++++++++++++++++++++---------------- + fs/nfsd/blocklayoutxdr.h | 4 +- + fs/nfsd/nfs4proc.c | 2 - + fs/nfsd/nfs4xdr.c | 11 ++---- + fs/nfsd/pnfs.h | 1 + fs/nfsd/xdr4.h | 3 - + 7 files changed, 78 insertions(+), 46 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct su + } + + static __be32 +-nfsd4_block_proc_layoutcommit(struct inode *inode, ++nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) + { + struct iomap *iomaps; + int nr_iomaps; + __be32 nfserr; + +- nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, &nr_iomaps, +- i_blocksize(inode)); ++ rqstp->rq_arg = lcp->lc_up_layout; ++ svcxdr_init_decode(rqstp); ++ ++ nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream, ++ &iomaps, &nr_iomaps, i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; + +@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct sup + return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); + } + static __be32 +-nfsd4_scsi_proc_layoutcommit(struct inode *inode, ++nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) + { + struct iomap *iomaps; + int nr_iomaps; + __be32 nfserr; + +- nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, &nr_iomaps, +- i_blocksize(inode)); ++ rqstp->rq_arg = lcp->lc_up_layout; ++ svcxdr_init_decode(rqstp); ++ ++ nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream, ++ &iomaps, &nr_iomaps, i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct + + /** + * nfsd4_block_decode_layoutupdate - decode the block layout extent array +- * @p: pointer to the xdr data +- * @len: number of bytes to decode ++ * @xdr: subbuf set to the encoded array + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length +@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid +- * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_bad_xdr: The encoded array in @xdr is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ + __be32 +-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, ++nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp, + int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; +- u32 nr_iomaps, i; ++ u32 nr_iomaps, expected, len, i; ++ __be32 nfserr; + +- if (len < sizeof(u32)) +- return nfserr_bad_xdr; +- len -= sizeof(u32); +- if (len % PNFS_BLOCK_EXTENT_SIZE) ++ if (xdr_stream_decode_u32(xdr, &nr_iomaps)) + return nfserr_bad_xdr; + +- nr_iomaps = be32_to_cpup(p++); +- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) ++ len = sizeof(__be32) + xdr_stream_remaining(xdr); ++ expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE; ++ if (len != expected) + return nfserr_bad_xdr; + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); +@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 * + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; + +- p = svcxdr_decode_deviceid4(p, &bex.vol_id); +- p = xdr_decode_hyper(p, &bex.foff); ++ if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.foff)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (bex.foff & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.len)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- p = xdr_decode_hyper(p, &bex.len); + if (bex.len & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.soff)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- p = xdr_decode_hyper(p, &bex.soff); + if (bex.soff & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u32(xdr, &bex.es)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- bex.es = be32_to_cpup(p++); + if (bex.es != PNFS_BLOCK_READWRITE_DATA) { ++ nfserr = nfserr_inval; + goto fail; + } + +@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 * + return nfs_ok; + fail: + kfree(iomaps); +- return nfserr_inval; ++ return nfserr; + } + + /** + * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array +- * @p: pointer to the xdr data +- * @len: number of bytes to decode ++ * @xdr: subbuf set to the encoded array + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length +@@ -200,21 +220,22 @@ fail: + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid +- * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_bad_xdr: The encoded array in @xdr is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ + __be32 +-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, ++nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp, + int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; +- u32 nr_iomaps, expected, i; ++ u32 nr_iomaps, expected, len, i; ++ __be32 nfserr; + +- if (len < sizeof(u32)) ++ if (xdr_stream_decode_u32(xdr, &nr_iomaps)) + return nfserr_bad_xdr; + +- nr_iomaps = be32_to_cpup(p++); ++ len = sizeof(__be32) + xdr_stream_remaining(xdr); + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; + if (len != expected) + return nfserr_bad_xdr; +@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + for (i = 0; i < nr_iomaps; i++) { + u64 val; + +- p = xdr_decode_hyper(p, &val); ++ if (xdr_stream_decode_u64(xdr, &val)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (val & (block_size - 1)) { ++ nfserr = nfserr_inval; + goto fail; + } + iomaps[i].offset = val; + +- p = xdr_decode_hyper(p, &val); ++ if (xdr_stream_decode_u64(xdr, &val)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (val & (block_size - 1)) { ++ nfserr = nfserr_inval; + goto fail; + } + iomaps[i].length = val; +@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + return nfs_ok; + fail: + kfree(iomaps); +- return nfserr_inval; ++ return nfserr; + } +--- a/fs/nfsd/blocklayoutxdr.h ++++ b/fs/nfsd/blocklayoutxdr.h +@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo( + const struct nfsd4_getdeviceinfo *gdp); + __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, + const struct nfsd4_layoutget *lgp); +-__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, ++__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); +-__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, ++__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); + + #endif /* _NFSD_BLOCKLAYOUTXDR_H */ +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2549,7 +2549,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + lcp->lc_size_chg = false; + } + +- nfserr = ops->proc_layoutcommit(inode, lcp); ++ nfserr = ops->proc_layoutcommit(inode, rqstp, lcp); + nfs4_put_stid(&ls->ls_stid); + out: + return nfserr; +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -593,6 +593,8 @@ static __be32 + nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutcommit *lcp) + { ++ u32 len; ++ + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0) + return nfserr_bad_xdr; + if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES) +@@ -600,13 +602,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_ + if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX) + return nfserr_bad_xdr; + +- if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0) ++ if (xdr_stream_decode_u32(argp->xdr, &len) < 0) ++ return nfserr_bad_xdr; ++ if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len)) + return nfserr_bad_xdr; +- if (lcp->lc_up_len > 0) { +- lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len); +- if (!lcp->lc_up_layout) +- return nfserr_bad_xdr; +- } + + return nfs_ok; + } +--- a/fs/nfsd/pnfs.h ++++ b/fs/nfsd/pnfs.h +@@ -35,6 +35,7 @@ struct nfsd4_layout_ops { + const struct nfsd4_layoutget *lgp); + + __be32 (*proc_layoutcommit)(struct inode *inode, ++ struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp); + + void (*fence_client)(struct nfs4_layout_stateid *ls, +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -664,8 +664,7 @@ struct nfsd4_layoutcommit { + u64 lc_last_wr; /* request */ + struct timespec64 lc_mtime; /* request */ + u32 lc_layout_type; /* request */ +- u32 lc_up_len; /* layout length */ +- void *lc_up_layout; /* decoded by callback */ ++ struct xdr_buf lc_up_layout; /* decoded by callback */ + bool lc_size_chg; /* response */ + u64 lc_newsize; /* response */ + }; diff --git a/queue-6.17/nfsd-minor-cleanup-in-layoutcommit-processing.patch b/queue-6.17/nfsd-minor-cleanup-in-layoutcommit-processing.patch new file mode 100644 index 0000000000..3dba14d181 --- /dev/null +++ b/queue-6.17/nfsd-minor-cleanup-in-layoutcommit-processing.patch @@ -0,0 +1,50 @@ +From stable+bounces-188062-greg=kroah.com@vger.kernel.org Mon Oct 20 14:52:45 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:52:23 -0400 +Subject: NFSD: Minor cleanup in layoutcommit processing +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125226.1759978-2-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 274365a51d88658fb51cca637ba579034e90a799 ] + +Remove dprintk in nfsd4_layoutcommit. These are not needed +in day to day usage, and the information is also available +in Wireshark when capturing NFS traffic. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4proc.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2521,18 +2521,12 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + inode = d_inode(current_fh->fh_dentry); + + nfserr = nfserr_inval; +- if (new_size <= seg->offset) { +- dprintk("pnfsd: last write before layout segment\n"); ++ if (new_size <= seg->offset) + goto out; +- } +- if (new_size > seg->offset + seg->length) { +- dprintk("pnfsd: last write beyond layout segment\n"); ++ if (new_size > seg->offset + seg->length) + goto out; +- } +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { +- dprintk("pnfsd: layoutcommit beyond EOF\n"); ++ if (!lcp->lc_newoffset && new_size > i_size_read(inode)) + goto out; +- } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, diff --git a/queue-6.17/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch b/queue-6.17/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch new file mode 100644 index 0000000000..3c5413ae77 --- /dev/null +++ b/queue-6.17/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch @@ -0,0 +1,156 @@ +From stable+bounces-188061-greg=kroah.com@vger.kernel.org Mon Oct 20 14:52:38 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:52:22 -0400 +Subject: NFSD: Rework encoding and decoding of nfsd4_deviceid +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Chuck Lever , Sasha Levin +Message-ID: <20251020125226.1759978-1-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 832738e4b325b742940761e10487403f9aad13e8 ] + +Compilers may optimize the layout of C structures, so we should not rely +on sizeof struct and memcpy to encode and decode XDR structures. The byte +order of the fields should also be taken into account. + +This patch adds the correct functions to handle the deviceid4 structure +and removes the pad field, which is currently not used by NFSD, from the +runtime state. The server's byte order is preserved because the deviceid4 +blob on the wire is only used as a cookie by the client. + +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayoutxdr.c | 7 ++----- + fs/nfsd/flexfilelayoutxdr.c | 3 +-- + fs/nfsd/nfs4layouts.c | 1 - + fs/nfsd/nfs4xdr.c | 14 +------------- + fs/nfsd/xdr4.h | 36 +++++++++++++++++++++++++++++++++++- + 5 files changed, 39 insertions(+), 22 deletions(-) + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_ + *p++ = cpu_to_be32(len); + *p++ = cpu_to_be32(1); /* we always return a single extent */ + +- p = xdr_encode_opaque_fixed(p, &b->vol_id, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &b->vol_id); + p = xdr_encode_hyper(p, b->foff); + p = xdr_encode_hyper(p, b->len); + p = xdr_encode_hyper(p, b->soff); +@@ -156,9 +155,7 @@ nfsd4_block_decode_layoutupdate(__be32 * + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; + +- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid)); +- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid)); +- ++ p = svcxdr_decode_deviceid4(p, &bex.vol_id); + p = xdr_decode_hyper(p, &bex.foff); + if (bex.foff & (block_size - 1)) { + goto fail; +--- a/fs/nfsd/flexfilelayoutxdr.c ++++ b/fs/nfsd/flexfilelayoutxdr.c +@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_str + *p++ = cpu_to_be32(1); /* single mirror */ + *p++ = cpu_to_be32(1); /* single data server */ + +- p = xdr_encode_opaque_fixed(p, &fl->deviceid, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &fl->deviceid); + + *p++ = cpu_to_be32(1); /* efficiency */ + +--- a/fs/nfsd/nfs4layouts.c ++++ b/fs/nfsd/nfs4layouts.c +@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid + + id->fsid_idx = fhp->fh_export->ex_devid_map->idx; + id->generation = device_generation; +- id->pad = 0; + return 0; + } + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -588,18 +588,6 @@ nfsd4_decode_state_owner4(struct nfsd4_c + } + + #ifdef CONFIG_NFSD_PNFS +-static __be32 +-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, +- struct nfsd4_deviceid *devid) +-{ +- __be32 *p; +- +- p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); +- if (!p) +- return nfserr_bad_xdr; +- memcpy(devid, p, sizeof(*devid)); +- return nfs_ok; +-} + + static __be32 + nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, +@@ -1784,7 +1772,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_ + __be32 status; + + memset(gdev, 0, sizeof(*gdev)); +- status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); ++ status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -595,9 +595,43 @@ struct nfsd4_reclaim_complete { + struct nfsd4_deviceid { + u64 fsid_idx; + u32 generation; +- u32 pad; + }; + ++static inline __be32 * ++svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ *q = (__force __be64)devid->fsid_idx; ++ p += 2; ++ *p++ = (__force __be32)devid->generation; ++ *p++ = xdr_zero; ++ return p; ++} ++ ++static inline __be32 * ++svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ devid->fsid_idx = (__force u64)(*q); ++ p += 2; ++ devid->generation = (__force u32)(*p++); ++ p++; /* NFSD does not use the remaining octets */ ++ return p; ++} ++ ++static inline __be32 ++nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid) ++{ ++ __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); ++ ++ if (unlikely(!p)) ++ return nfserr_bad_xdr; ++ svcxdr_decode_deviceid4(p, devid); ++ return nfs_ok; ++} ++ + struct nfsd4_layout_seg { + u32 iomode; + u64 offset; diff --git a/queue-6.17/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch b/queue-6.17/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch new file mode 100644 index 0000000000..029e2e138c --- /dev/null +++ b/queue-6.17/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch @@ -0,0 +1,265 @@ +From stable+bounces-188087-greg=kroah.com@vger.kernel.org Mon Oct 20 15:01:17 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:58:35 -0400 +Subject: phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251020125835.1762427-2-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 284fb19a3ffb1083c3ad9c00d29749d09dddb99c ] + +PLL lockup and O_CMN_READY assertion can only happen after common state +machine gets enabled by programming DPHY_CMN_SSM register, but driver was +polling them before the common state machine was enabled which is +incorrect. This is as per the DPHY initialization sequence as mentioned in +J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram". +It shows O_CMN_READY polling at the end after common configuration pin +setup where the common configuration pin setup step enables state machine +as referenced in "Table 12-1533. Common Configuration-Related Setup +mentions state machine" + +To fix this : +- Add new function callbacks for polling on PLL lock and O_CMN_READY + assertion. +- As state machine and clocks get enabled in power_on callback only, move + the clock related programming part from configure callback to power_on +callback and poll for the PLL lockup and O_CMN_READY assertion after state +machine gets enabled. +- The configure callback only saves the PLL configuration received from the + client driver which will be applied later on in power_on callback. +- Add checks to ensure configure is called before power_on and state + machine is in disabled state before power_on callback is called. +- Disable state machine in power_off so that client driver can re-configure + the PLL by following up a power_off, configure, power_on sequence. + +[1]: https://www.ti.com/lit/zip/spruil1 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 124 +++++++++++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 32 deletions(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -100,6 +100,8 @@ struct cdns_dphy_ops { + void (*set_pll_cfg)(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg); + unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy); ++ int (*wait_for_pll_lock)(struct cdns_dphy *dphy); ++ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy); + }; + + struct cdns_dphy { +@@ -109,6 +111,8 @@ struct cdns_dphy { + struct clk *pll_ref_clk; + const struct cdns_dphy_ops *ops; + struct phy *phy; ++ bool is_configured; ++ bool is_powered; + }; + + /* Order of bands is important since the index is the band number. */ +@@ -195,6 +199,16 @@ static unsigned long cdns_dphy_get_wakeu + return dphy->ops->get_wakeup_time_ns(dphy); + } + ++static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0; ++} ++ ++static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0; ++} ++ + static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy) + { + /* Default wakeup time is 800 ns (in a simulated environment). */ +@@ -236,7 +250,6 @@ static unsigned long cdns_dphy_j721e_get + static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) + { +- u32 status; + + /* + * set the PWM and PLL Byteclk divider settings to recommended values +@@ -253,13 +266,6 @@ static void cdns_dphy_j721e_set_pll_cfg( + + writel(DPHY_TX_J721E_WIZ_LANE_RSTB, + dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, +- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, +- (status & DPHY_TX_WIZ_O_CMN_READY), 0, +- POLL_TIMEOUT_US); + } + + static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div) +@@ -267,6 +273,23 @@ static void cdns_dphy_j721e_set_psm_div( + writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ); + } + ++static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, ++ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US); ++} ++ ++static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, ++ status & DPHY_TX_WIZ_O_CMN_READY, 0, ++ POLL_TIMEOUT_US); ++} ++ + /* + * This is the reference implementation of DPHY hooks. Specific integration of + * this IP may have to re-implement some of them depending on how they decided +@@ -282,6 +305,8 @@ static const struct cdns_dphy_ops j721e_ + .get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns, + .set_pll_cfg = cdns_dphy_j721e_set_pll_cfg, + .set_psm_div = cdns_dphy_j721e_set_psm_div, ++ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock, ++ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready, + }; + + static int cdns_dphy_config_from_opts(struct phy *phy, +@@ -339,21 +364,36 @@ static int cdns_dphy_validate(struct phy + static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); +- struct cdns_dphy_cfg cfg = { 0 }; +- int ret, band_ctrl; +- unsigned int reg; ++ int ret; + +- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg); +- if (ret) +- return ret; ++ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg); ++ if (!ret) ++ dphy->is_configured = true; ++ ++ return ret; ++} ++ ++static int cdns_dphy_power_on(struct phy *phy) ++{ ++ struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ int ret; ++ u32 reg; ++ ++ if (!dphy->is_configured || dphy->is_powered) ++ return -EINVAL; ++ ++ clk_prepare_enable(dphy->psm_clk); ++ clk_prepare_enable(dphy->pll_ref_clk); + + /* + * Configure the internal PSM clk divider so that the DPHY has a + * 1MHz clk (or something close). + */ + ret = cdns_dphy_setup_psm(dphy); +- if (ret) +- return ret; ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret); ++ goto err_power_on; ++ } + + /* + * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes +@@ -368,40 +408,60 @@ static int cdns_dphy_configure(struct ph + * Configure the DPHY PLL that will be used to generate the TX byte + * clk. + */ +- cdns_dphy_set_pll_cfg(dphy, &cfg); ++ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg); + +- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate); +- if (band_ctrl < 0) +- return band_ctrl; ++ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate); ++ if (ret < 0) { ++ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret); ++ goto err_power_on; ++ } + +- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) | +- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl); ++ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) | ++ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret); + writel(reg, dphy->regs + DPHY_BAND_CFG); + +- return 0; +-} +- +-static int cdns_dphy_power_on(struct phy *phy) +-{ +- struct cdns_dphy *dphy = phy_get_drvdata(phy); +- +- clk_prepare_enable(dphy->psm_clk); +- clk_prepare_enable(dphy->pll_ref_clk); +- + /* Start TX state machine. */ + writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + ++ ret = cdns_dphy_wait_for_pll_lock(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret); ++ goto err_power_on; ++ } ++ ++ ret = cdns_dphy_wait_for_cmn_ready(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n", ++ ret); ++ goto err_power_on; ++ } ++ ++ dphy->is_powered = true; ++ + return 0; ++ ++err_power_on: ++ clk_disable_unprepare(dphy->pll_ref_clk); ++ clk_disable_unprepare(dphy->psm_clk); ++ ++ return ret; + } + + static int cdns_dphy_power_off(struct phy *phy) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ u32 reg; + + clk_disable_unprepare(dphy->pll_ref_clk); + clk_disable_unprepare(dphy->psm_clk); + ++ /* Stop TX state machine. */ ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM); ++ ++ dphy->is_powered = false; ++ + return 0; + } + diff --git a/queue-6.17/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch b/queue-6.17/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch new file mode 100644 index 0000000000..ead4ebf98d --- /dev/null +++ b/queue-6.17/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch @@ -0,0 +1,58 @@ +From stable+bounces-188371-greg=kroah.com@vger.kernel.org Tue Oct 21 18:02:19 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 12:02:07 -0400 +Subject: phy: cadence: cdns-dphy: Update calibration wait time for startup state machine +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251021160207.2330991-3-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 2c27aaee934a1b5229152fe33a14f1fdf50da143 ] + +Do read-modify-write so that we re-use the characterized reset value as +specified in TRM [1] to program calibration wait time which defines number +of cycles to wait for after startup state machine is in bandgap enable +state. + +This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's +AM62L and J721E SoC since earlier calibration wait time was getting +overwritten to zero value thus failing the PLL to lockup and causing +timeout. + +[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2): +Link: https://www.ti.com/lit/pdf/spruj83 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -30,6 +30,7 @@ + + #define DPHY_CMN_SSM DPHY_PMA_CMN(0x20) + #define DPHY_CMN_SSM_EN BIT(0) ++#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1) + #define DPHY_CMN_TX_MODE_EN BIT(9) + + #define DPHY_CMN_PWM DPHY_PMA_CMN(0x40) +@@ -421,7 +422,8 @@ static int cdns_dphy_power_on(struct phy + writel(reg, dphy->regs + DPHY_BAND_CFG); + + /* Start TX state machine. */ +- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + + ret = cdns_dphy_wait_for_pll_lock(dphy); diff --git a/queue-6.17/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch b/queue-6.17/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch new file mode 100644 index 0000000000..6c9df31697 --- /dev/null +++ b/queue-6.17/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch @@ -0,0 +1,59 @@ +From stable+bounces-188086-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:12 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:58:34 -0400 +Subject: phy: cdns-dphy: Store hs_clk_rate and return it +To: stable@vger.kernel.org +Cc: Tomi Valkeinen , Aradhya Bhatia , Parth Pancholi , Jayesh Choudhary , Vinod Koul , Devarsh Thakkar , Sasha Levin +Message-ID: <20251020125835.1762427-1-sashal@kernel.org> + +From: Tomi Valkeinen + +[ Upstream commit 689a54acb56858c85de8c7285db82b8ae6dbf683 ] + +The DPHY driver does not return the actual hs_clk_rate, so the DSI +driver has no idea what clock was actually achieved. Set the realized +hs_clk_rate to the opts struct, so that the DSI driver gets it back. + +Reviewed-by: Aradhya Bhatia +Tested-by: Parth Pancholi +Tested-by: Jayesh Choudhary +Acked-by: Vinod Koul +Reviewed-by: Devarsh Thakkar +Signed-off-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250723-cdns-dphy-hs-clk-rate-fix-v1-1-d4539d44cbe7@ideasonboard.com +Signed-off-by: Vinod Koul +Stable-dep-of: 284fb19a3ffb ("phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -79,6 +79,7 @@ struct cdns_dphy_cfg { + u8 pll_ipdiv; + u8 pll_opdiv; + u16 pll_fbdiv; ++ u32 hs_clk_rate; + unsigned int nlanes; + }; + +@@ -154,6 +155,9 @@ static int cdns_dsi_get_dphy_pll_cfg(str + cfg->pll_ipdiv, + pll_ref_hz); + ++ cfg->hs_clk_rate = div_u64((u64)pll_ref_hz * cfg->pll_fbdiv, ++ 2 * cfg->pll_opdiv * cfg->pll_ipdiv); ++ + return 0; + } + +@@ -297,6 +301,7 @@ static int cdns_dphy_config_from_opts(st + if (ret) + return ret; + ++ opts->hs_clk_rate = cfg->hs_clk_rate; + opts->wakeup = cdns_dphy_get_wakeup_time_ns(dphy) / 1000; + + return 0; diff --git a/queue-6.17/series b/queue-6.17/series index 86282c9372..8299eb83fd 100644 --- a/queue-6.17/series +++ b/queue-6.17/series @@ -134,3 +134,26 @@ drm-xe-evict-drop-bogus-assert.patch selftests-arg_parsing-ensure-data-is-flushed-to-disk.patch nvme-tcp-handle-tls-partially-sent-records-in-write_.patch rust-cpufreq-fix-formatting.patch +hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch +arm64-debug-always-unmask-interrupts-in-el0_softstp.patch +arm64-cputype-add-neoverse-v3ae-definitions.patch +arm64-errata-apply-workarounds-for-neoverse-v3ae.patch +xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch +xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch +nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch +nfsd-minor-cleanup-in-layoutcommit-processing.patch +nfsd-implement-large-extent-array-support-in-pnfs.patch +nfsd-fix-last-write-offset-handling-in-layoutcommit.patch +phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch +phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch +nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch +x86-resctrl-refactor-resctrl_arch_rmid_read.patch +x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch +cxl-fix-match_region_by_range-to-use-region_res_match_cxl_range.patch +phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch +drm-xe-use-devm_ioremap_wc-for-vram-mapping-and-drop-manual-unmap.patch +drm-xe-use-dynamic-allocation-for-tile-and-device-vram-region-structures.patch +drm-xe-move-struct-xe_vram_region-to-a-dedicated-header.patch +drm-xe-unify-the-initialization-of-vram-regions.patch +drm-xe-move-rebar-to-be-done-earlier.patch +drm-xe-don-t-allow-evicting-of-bos-in-same-vm-in-array-of-vm-binds.patch diff --git a/queue-6.17/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch b/queue-6.17/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch new file mode 100644 index 0000000000..10842ef3a2 --- /dev/null +++ b/queue-6.17/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch @@ -0,0 +1,149 @@ +From stable+bounces-188202-greg=kroah.com@vger.kernel.org Mon Oct 20 18:54:47 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:53:09 -0400 +Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID +To: stable@vger.kernel.org +Cc: Babu Moger , "Borislav Petkov (AMD)" , Reinette Chatre , Sasha Levin +Message-ID: <20251020165309.1843541-2-sashal@kernel.org> + +From: Babu Moger + +[ Upstream commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 ] + +Users can create as many monitoring groups as the number of RMIDs supported +by the hardware. However, on AMD systems, only a limited number of RMIDs +are guaranteed to be actively tracked by the hardware. RMIDs that exceed +this limit are placed in an "Unavailable" state. + +When a bandwidth counter is read for such an RMID, the hardware sets +MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked +again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable +remains set on first read after tracking re-starts and is clear on all +subsequent reads as long as the RMID is tracked. + +resctrl miscounts the bandwidth events after an RMID transitions from the +"Unavailable" state back to being tracked. This happens because when the +hardware starts counting again after resetting the counter to zero, resctrl +in turn compares the new count against the counter value stored from the +previous time the RMID was tracked. + +This results in resctrl computing an event value that is either undercounting +(when new counter is more than stored counter) or a mistaken overflow (when +new counter is less than stored counter). + +Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to +zero whenever the RMID is in the "Unavailable" state to ensure accurate +counting after the RMID resets to zero when it starts to be tracked again. + +Example scenario that results in mistaken overflow +================================================== +1. The resctrl filesystem is mounted, and a task is assigned to a + monitoring group. + + $mount -t resctrl resctrl /sys/fs/resctrl + $mkdir /sys/fs/resctrl/mon_groups/test1/ + $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 21323 <- Total bytes on domain 0 + "Unavailable" <- Total bytes on domain 1 + + Task is running on domain 0. Counter on domain 1 is "Unavailable". + +2. The task runs on domain 0 for a while and then moves to domain 1. The + counter starts incrementing on domain 1. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 7345357 <- Total bytes on domain 0 + 4545 <- Total bytes on domain 1 + +3. At some point, the RMID in domain 0 transitions to the "Unavailable" + state because the task is no longer executing in that domain. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + "Unavailable" <- Total bytes on domain 0 + 434341 <- Total bytes on domain 1 + +4. Since the task continues to migrate between domains, it may eventually + return to domain 0. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 17592178699059 <- Overflow on domain 0 + 3232332 <- Total bytes on domain 1 + +In this case, the RMID on domain 0 transitions from "Unavailable" state to +active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when +the counter is read and begins tracking the RMID counting from 0. + +Subsequent reads succeed but return a value smaller than the previously +saved MSR value (7345357). Consequently, the resctrl's overflow logic is +triggered, it compares the previous value (7345357) with the new, smaller +value and incorrectly interprets this as a counter overflow, adding a large +delta. + +In reality, this is a false positive: the counter did not overflow but was +simply reset when the RMID transitioned from "Unavailable" back to active +state. + +Here is the text from APM [1] available from [2]. + +"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the +first QM_CTR read when it begins tracking an RMID that it was not +previously tracking. The U bit will be zero for all subsequent reads from +that RMID while it is still tracked by the hardware. Therefore, a QM_CTR +read with the U bit set when that RMID is in use by a processor can be +considered 0 when calculating the difference with a subsequent read." + +[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming + Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory + Bandwidth (MBM). + + [ bp: Split commit message into smaller paragraph chunks for better + consumption. ] + +Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Tested-by: Reinette Chatre +Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -249,7 +249,9 @@ int resctrl_arch_rmid_read(struct rdt_re + u32 unused, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *ignored) + { ++ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); ++ struct arch_mbm_state *am; + u64 msr_val; + u32 prmid; + int ret; +@@ -258,12 +260,16 @@ int resctrl_arch_rmid_read(struct rdt_re + + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); +- if (ret) +- return ret; + +- *val = get_corrected_val(r, d, rmid, eventid, msr_val); ++ if (!ret) { ++ *val = get_corrected_val(r, d, rmid, eventid, msr_val); ++ } else if (ret == -EINVAL) { ++ am = get_arch_mbm_state(hw_dom, rmid, eventid); ++ if (am) ++ am->prev_msr = 0; ++ } + +- return 0; ++ return ret; + } + + /* diff --git a/queue-6.17/x86-resctrl-refactor-resctrl_arch_rmid_read.patch b/queue-6.17/x86-resctrl-refactor-resctrl_arch_rmid_read.patch new file mode 100644 index 0000000000..d291c951ea --- /dev/null +++ b/queue-6.17/x86-resctrl-refactor-resctrl_arch_rmid_read.patch @@ -0,0 +1,89 @@ +From stable+bounces-188201-greg=kroah.com@vger.kernel.org Mon Oct 20 18:54:45 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:53:08 -0400 +Subject: x86/resctrl: Refactor resctrl_arch_rmid_read() +To: stable@vger.kernel.org +Cc: Babu Moger , "Borislav Petkov (AMD)" , Reinette Chatre , Sasha Levin +Message-ID: <20251020165309.1843541-1-sashal@kernel.org> + +From: Babu Moger + +[ Upstream commit 7c9ac605e202c4668e441fc8146a993577131ca1 ] + +resctrl_arch_rmid_read() adjusts the value obtained from MSR_IA32_QM_CTR to +account for the overflow for MBM events and apply counter scaling for all the +events. This logic is common to both reading an RMID and reading a hardware +counter directly. + +Refactor the hardware value adjustment logic into get_corrected_val() to +prepare for support of reading a hardware counter. + +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com +Stable-dep-of: 15292f1b4c55 ("x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 38 ++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -224,24 +224,13 @@ static u64 mbm_overflow_count(u64 prev_m + return chunks >> shift; + } + +-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, +- u32 unused, u32 rmid, enum resctrl_event_id eventid, +- u64 *val, void *ignored) ++static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d, ++ u32 rmid, enum resctrl_event_id eventid, u64 msr_val) + { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); +- int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; +- u64 msr_val, chunks; +- u32 prmid; +- int ret; +- +- resctrl_arch_rmid_read_context_check(); +- +- prmid = logical_rmid_to_physical_rmid(cpu, rmid); +- ret = __rmid_read_phys(prmid, eventid, &msr_val); +- if (ret) +- return ret; ++ u64 chunks; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { +@@ -253,7 +242,26 @@ int resctrl_arch_rmid_read(struct rdt_re + chunks = msr_val; + } + +- *val = chunks * hw_res->mon_scale; ++ return chunks * hw_res->mon_scale; ++} ++ ++int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, ++ u32 unused, u32 rmid, enum resctrl_event_id eventid, ++ u64 *val, void *ignored) ++{ ++ int cpu = cpumask_any(&d->hdr.cpu_mask); ++ u64 msr_val; ++ u32 prmid; ++ int ret; ++ ++ resctrl_arch_rmid_read_context_check(); ++ ++ prmid = logical_rmid_to_physical_rmid(cpu, rmid); ++ ret = __rmid_read_phys(prmid, eventid, &msr_val); ++ if (ret) ++ return ret; ++ ++ *val = get_corrected_val(r, d, rmid, eventid, msr_val); + + return 0; + } diff --git a/queue-6.17/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch b/queue-6.17/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch new file mode 100644 index 0000000000..aaef5a101e --- /dev/null +++ b/queue-6.17/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch @@ -0,0 +1,172 @@ +From stable+bounces-188050-greg=kroah.com@vger.kernel.org Mon Oct 20 14:46:26 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:43:25 -0400 +Subject: xfs: fix log CRC mismatches between i386 and other architectures +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124325.1755939-2-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit e747883c7d7306acb4d683038d881528fbfbe749 ] + +When mounting file systems with a log that was dirtied on i386 on +other architectures or vice versa, log recovery is unhappy: + +[ 11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc. + +This is because the CRCs generated by i386 and other architectures +always diff. The reason for that is that sizeof(struct xlog_rec_header) +returns different values for i386 vs the rest (324 vs 328), because the +struct is not sizeof(uint64_t) aligned, and i386 has odd struct size +alignment rules. + +This issue goes back to commit 13cdc853c519 ("Add log versioning, and new +super block field for the log stripe") in the xfs-import tree, which +adds log v2 support and the h_size field that causes the unaligned size. +At that time it only mattered for the crude debug only log header +checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log") +it became a real issue for v5 file system, because now there is a proper +CRC, and regular builds actually expect it match. + +Fix this by allowing checksums with and without the padding. + +Fixes: 0e446be44806 ("xfs: add CRC checks to the log") +Cc: # v3.8 +Signed-off-by: Christoph Hellwig +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_log_format.h | 30 +++++++++++++++++++++++++++++- + fs/xfs/libxfs/xfs_ondisk.h | 2 ++ + fs/xfs/xfs_log.c | 8 ++++---- + fs/xfs/xfs_log_priv.h | 4 ++-- + fs/xfs/xfs_log_recover.c | 19 +++++++++++++++++-- + 5 files changed, 54 insertions(+), 9 deletions(-) + +--- a/fs/xfs/libxfs/xfs_log_format.h ++++ b/fs/xfs/libxfs/xfs_log_format.h +@@ -174,12 +174,40 @@ typedef struct xlog_rec_header { + __be32 h_prev_block; /* block number to previous LR : 4 */ + __be32 h_num_logops; /* number of log operations in this LR : 4 */ + __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; +- /* new fields */ ++ ++ /* fields added by the Linux port: */ + __be32 h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ ++ ++ /* fields added for log v2: */ + __be32 h_size; /* iclog size : 4 */ ++ ++ /* ++ * When h_size added for log v2 support, it caused structure to have ++ * a different size on i386 vs all other architectures because the ++ * sum of the size ofthe member is not aligned by that of the largest ++ * __be64-sized member, and i386 has really odd struct alignment rules. ++ * ++ * Due to the way the log headers are placed out on-disk that alone is ++ * not a problem becaue the xlog_rec_header always sits alone in a ++ * BBSIZEs area, and the rest of that area is padded with zeroes. ++ * But xlog_cksum used to calculate the checksum based on the structure ++ * size, and thus gives different checksums for i386 vs the rest. ++ * We now do two checksum validation passes for both sizes to allow ++ * moving v5 file systems with unclean logs between i386 and other ++ * (little-endian) architectures. ++ */ ++ __u32 h_pad0; + } xlog_rec_header_t; + ++#ifdef __i386__ ++#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size) ++#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header) ++#else ++#define XLOG_REC_SIZE sizeof(struct xlog_rec_header) ++#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size) ++#endif /* __i386__ */ ++ + typedef struct xlog_rec_ext_header { + __be32 xh_cycle; /* write cycle of log : 4 */ + __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ +--- a/fs/xfs/libxfs/xfs_ondisk.h ++++ b/fs/xfs/libxfs/xfs_ondisk.h +@@ -174,6 +174,8 @@ xfs_check_ondisk_structs(void) + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1568,13 +1568,13 @@ xlog_cksum( + struct xlog *log, + struct xlog_rec_header *rhead, + char *dp, +- int size) ++ unsigned int hdrsize, ++ unsigned int size) + { + uint32_t crc; + + /* first generate the crc for the record header ... */ +- crc = xfs_start_cksum_update((char *)rhead, +- sizeof(struct xlog_rec_header), ++ crc = xfs_start_cksum_update((char *)rhead, hdrsize, + offsetof(struct xlog_rec_header, h_crc)); + + /* ... then for additional cycle data for v2 logs ... */ +@@ -1818,7 +1818,7 @@ xlog_sync( + + /* calculcate the checksum */ + iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, +- iclog->ic_datap, size); ++ iclog->ic_datap, XLOG_REC_SIZE, size); + /* + * Intentionally corrupt the log record CRC based on the error injection + * frequency, if defined. This facilitates testing log recovery in the +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -499,8 +499,8 @@ xlog_recover_finish( + extern void + xlog_recover_cancel(struct xlog *); + +-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, +- char *dp, int size); ++__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, ++ char *dp, unsigned int hdrsize, unsigned int size); + + extern struct kmem_cache *xfs_log_ticket_cache; + struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2894,9 +2894,24 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 expected_crc = rhead->h_crc, crc; ++ __le32 expected_crc = rhead->h_crc, crc, other_crc; + +- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); ++ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE, ++ be32_to_cpu(rhead->h_len)); ++ ++ /* ++ * Look at the end of the struct xlog_rec_header definition in ++ * xfs_log_format.h for the glory details. ++ */ ++ if (expected_crc && crc != expected_crc) { ++ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER, ++ be32_to_cpu(rhead->h_len)); ++ if (other_crc == expected_crc) { ++ xfs_notice_once(log->l_mp, ++ "Fixing up incorrect CRC due to padding."); ++ crc = other_crc; ++ } ++ } + + /* + * Nothing else to do if this is a CRC verification pass. Just return diff --git a/queue-6.17/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch b/queue-6.17/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch new file mode 100644 index 0000000000..4a5c01a203 --- /dev/null +++ b/queue-6.17/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch @@ -0,0 +1,68 @@ +From stable+bounces-188049-greg=kroah.com@vger.kernel.org Mon Oct 20 14:43:34 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:43:24 -0400 +Subject: xfs: rename the old_crc variable in xlog_recover_process +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124325.1755939-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 0b737f4ac1d3ec093347241df74bbf5f54a7e16c ] + +old_crc is a very misleading name. Rename it to expected_crc as that +described the usage much better. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: e747883c7d73 ("xfs: fix log CRC mismatches between i386 and other architectures") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2894,20 +2894,19 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 old_crc = rhead->h_crc; +- __le32 crc; ++ __le32 expected_crc = rhead->h_crc, crc; + + crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); + + /* + * Nothing else to do if this is a CRC verification pass. Just return + * if this a record with a non-zero crc. Unfortunately, mkfs always +- * sets old_crc to 0 so we must consider this valid even on v5 supers. +- * Otherwise, return EFSBADCRC on failure so the callers up the stack +- * know precisely what failed. ++ * sets expected_crc to 0 so we must consider this valid even on v5 ++ * supers. Otherwise, return EFSBADCRC on failure so the callers up the ++ * stack know precisely what failed. + */ + if (pass == XLOG_RECOVER_CRCPASS) { +- if (old_crc && crc != old_crc) ++ if (expected_crc && crc != expected_crc) + return -EFSBADCRC; + return 0; + } +@@ -2918,11 +2917,11 @@ xlog_recover_process( + * zero CRC check prevents warnings from being emitted when upgrading + * the kernel from one that does not add CRCs by default. + */ +- if (crc != old_crc) { +- if (old_crc || xfs_has_crc(log->l_mp)) { ++ if (crc != expected_crc) { ++ if (expected_crc || xfs_has_crc(log->l_mp)) { + xfs_alert(log->l_mp, + "log record CRC mismatch: found 0x%x, expected 0x%x.", +- le32_to_cpu(old_crc), ++ le32_to_cpu(expected_crc), + le32_to_cpu(crc)); + xfs_hex_dump(dp, 32); + } -- 2.47.3