From: Greg Kroah-Hartman Date: Tue, 27 May 2025 15:35:10 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v6.12.31~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bf2ca7b64a96c66c6c171056d346bf838c270978;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: bpf-abort-verification-if-env-cur_state-loop_entry-null.patch drm-amd-display-exit-idle-optimizations-before-accessing-phy.patch drm-amdkfd-correct-f8_mode-for-gfx950.patch drm-gem-internally-test-import_attach-for-imported-objects.patch i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch serial-sh-sci-save-and-restore-more-registers.patch watchdog-aspeed-fix-64-bit-division.patch x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch --- diff --git a/queue-6.12/bpf-abort-verification-if-env-cur_state-loop_entry-null.patch b/queue-6.12/bpf-abort-verification-if-env-cur_state-loop_entry-null.patch new file mode 100644 index 0000000000..cab3babbb8 --- /dev/null +++ b/queue-6.12/bpf-abort-verification-if-env-cur_state-loop_entry-null.patch @@ -0,0 +1,39 @@ +From f3c2d243a36ef23be07bc2bce7c6a5cb6e07d9e3 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Mon, 24 Feb 2025 16:38:38 -0800 +Subject: bpf: abort verification if env->cur_state->loop_entry != NULL + +From: Eduard Zingerman + +commit f3c2d243a36ef23be07bc2bce7c6a5cb6e07d9e3 upstream. + +In addition to warning abort verification with -EFAULT. +If env->cur_state->loop_entry != NULL something is irrecoverably +buggy. + +Fixes: bbbc02b7445e ("bpf: copy_verifier_state() should copy 'loop_entry' field") +Suggested-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/r/20250225003838.135319-1-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -18721,8 +18721,10 @@ process_bpf_exit: + return err; + break; + } else { +- if (WARN_ON_ONCE(env->cur_state->loop_entry)) +- env->cur_state->loop_entry = NULL; ++ if (WARN_ON_ONCE(env->cur_state->loop_entry)) { ++ verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n"); ++ return -EFAULT; ++ } + do_print_state = true; + continue; + } diff --git a/queue-6.12/drm-amd-display-exit-idle-optimizations-before-accessing-phy.patch b/queue-6.12/drm-amd-display-exit-idle-optimizations-before-accessing-phy.patch new file mode 100644 index 0000000000..20535a5de4 --- /dev/null +++ b/queue-6.12/drm-amd-display-exit-idle-optimizations-before-accessing-phy.patch @@ -0,0 +1,54 @@ +From c488967488d7eff7b9c527d5469c424c15377502 Mon Sep 17 00:00:00 2001 +From: Ovidiu Bunea +Date: Mon, 3 Feb 2025 15:43:32 -0500 +Subject: drm/amd/display: Exit idle optimizations before accessing PHY + +From: Ovidiu Bunea + +commit c488967488d7eff7b9c527d5469c424c15377502 upstream. + +[why & how] +By default, DCN HW is in idle optimized state which does not allow access +to PHY registers. If BIOS powers up the DCN, it is fine because they will +power up everything. Only exit idle optimized state when not taking control +from VBIOS. + +Fixes: be704e5ef4bd ("Revert "drm/amd/display: Exit idle optimizations before attempt to access PHY"") +Reviewed-by: Charlene Liu +Signed-off-by: Ovidiu Bunea +Signed-off-by: Roman Li +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +@@ -1888,6 +1888,7 @@ void dce110_enable_accelerated_mode(stru + bool can_apply_edp_fast_boot = false; + bool can_apply_seamless_boot = false; + bool keep_edp_vdd_on = false; ++ struct dc_bios *dcb = dc->ctx->dc_bios; + DC_LOGGER_INIT(); + + +@@ -1964,6 +1965,8 @@ void dce110_enable_accelerated_mode(stru + hws->funcs.edp_backlight_control(edp_link_with_sink, false); + } + /*resume from S3, no vbios posting, no need to power down again*/ ++ if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) ++ clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + + power_down_all_hw_blocks(dc); + +@@ -1976,6 +1979,8 @@ void dce110_enable_accelerated_mode(stru + disable_vga_and_power_gate_all_controllers(dc); + if (edp_link_with_sink && !keep_edp_vdd_on) + dc->hwss.edp_power_control(edp_link_with_sink, false); ++ if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) ++ clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + } + bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1); + } diff --git a/queue-6.12/drm-amdkfd-correct-f8_mode-for-gfx950.patch b/queue-6.12/drm-amdkfd-correct-f8_mode-for-gfx950.patch new file mode 100644 index 0000000000..063ce834ed --- /dev/null +++ b/queue-6.12/drm-amdkfd-correct-f8_mode-for-gfx950.patch @@ -0,0 +1,32 @@ +From 0c7e053448945e5a4379dc4396c762d7422b11ca Mon Sep 17 00:00:00 2001 +From: Amber Lin +Date: Wed, 12 Mar 2025 21:14:43 -0400 +Subject: drm/amdkfd: Correct F8_MODE for gfx950 + +From: Amber Lin + +commit 0c7e053448945e5a4379dc4396c762d7422b11ca upstream. + +Correct F8_MODE setting for gfx950 that was removed + +Fixes: 61972cd93af7 ("drm/amdkfd: Set per-process flags only once for gfx9/10/11/12") +Signed-off-by: Amber Lin +Reviewed-by: Harish Kasiviswanathan +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +@@ -69,8 +69,7 @@ static bool set_cache_memory_policy_v9(s + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || +- KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || +- KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) ++ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) + qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); + + qpd->sh_mem_ape1_limit = 0; diff --git a/queue-6.12/drm-gem-internally-test-import_attach-for-imported-objects.patch b/queue-6.12/drm-gem-internally-test-import_attach-for-imported-objects.patch new file mode 100644 index 0000000000..962196a04d --- /dev/null +++ b/queue-6.12/drm-gem-internally-test-import_attach-for-imported-objects.patch @@ -0,0 +1,61 @@ +From 8260731ccad0451207b45844bb66eb161a209218 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Wed, 16 Apr 2025 08:57:45 +0200 +Subject: drm/gem: Internally test import_attach for imported objects +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Zimmermann + +commit 8260731ccad0451207b45844bb66eb161a209218 upstream. + +Test struct drm_gem_object.import_attach to detect imported objects. + +During object clenanup, the dma_buf field might be NULL. Testing it in +an object's free callback then incorrectly does a cleanup as for native +objects. Happens for calls to drm_mode_destroy_dumb_ioctl() that +clears the dma_buf field in drm_gem_object_exported_dma_buf_free(). + +v3: +- only test for import_attach (Boris) +v2: +- use import_attach.dmabuf instead of dma_buf (Christian) + +Signed-off-by: Thomas Zimmermann +Fixes: b57aa47d39e9 ("drm/gem: Test for imported GEM buffers with helper") +Reported-by: Andy Yan +Closes: https://lore.kernel.org/dri-devel/38d09d34.4354.196379aa560.Coremail.andyshrk@163.com/ +Tested-by: Andy Yan +Cc: Thomas Zimmermann +Cc: Anusha Srivatsa +Cc: Christian König +Cc: Maarten Lankhorst +Cc: Maxime Ripard +Cc: David Airlie +Cc: Simona Vetter +Cc: Sumit Semwal +Cc: "Christian König" +Cc: dri-devel@lists.freedesktop.org +Cc: linux-media@vger.kernel.org +Cc: linaro-mm-sig@lists.linaro.org +Reviewed-by: Boris Brezillon +Reviewed-by: Simona Vetter +Link: https://lore.kernel.org/r/20250416065820.26076-1-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + include/drm/drm_gem.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/drm/drm_gem.h ++++ b/include/drm/drm_gem.h +@@ -580,8 +580,7 @@ static inline bool drm_gem_object_is_sha + */ + static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) + { +- /* The dma-buf's priv field points to the original GEM object. */ +- return obj->dma_buf && (obj->dma_buf->priv != obj); ++ return !!obj->import_attach; + } + + #ifdef CONFIG_LOCKDEP diff --git a/queue-6.12/i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch b/queue-6.12/i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch new file mode 100644 index 0000000000..da2161fbb7 --- /dev/null +++ b/queue-6.12/i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch @@ -0,0 +1,46 @@ +From e8d2d287e26d9bd9114cf258a123a6b70812442e Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Wed, 19 Mar 2025 09:08:01 -0700 +Subject: i3c: master: svc: Fix implicit fallthrough in svc_i3c_master_ibi_work() + +From: Nathan Chancellor + +commit e8d2d287e26d9bd9114cf258a123a6b70812442e upstream. + +Clang warns (or errors with CONFIG_WERROR=y): + + drivers/i3c/master/svc-i3c-master.c:596:2: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] + 596 | default: + | ^ + drivers/i3c/master/svc-i3c-master.c:596:2: note: insert 'break;' to avoid fall-through + 596 | default: + | ^ + | break; + 1 error generated. + +Clang is a little more pedantic than GCC, which does not warn when +falling through to a case that is just break or return. Clang's version +is more in line with the kernel's own stance in deprecated.rst, which +states that all switch/case blocks must end in either break, +fallthrough, continue, goto, or return. Add the missing break to silence +the warning. + +Fixes: 0430bf9bc1ac ("i3c: master: svc: Fix missing STOP for master request") +Signed-off-by: Nathan Chancellor +Link: https://lore.kernel.org/r/20250319-i3c-fix-clang-fallthrough-v1-1-d8e02be1ef5c@kernel.org +Signed-off-by: Alexandre Belloni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/i3c/master/svc-i3c-master.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/i3c/master/svc-i3c-master.c ++++ b/drivers/i3c/master/svc-i3c-master.c +@@ -512,6 +512,7 @@ static void svc_i3c_master_ibi_work(stru + break; + case SVC_I3C_MSTATUS_IBITYPE_MASTER_REQUEST: + svc_i3c_master_emit_stop(master); ++ break; + default: + break; + } diff --git a/queue-6.12/pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch b/queue-6.12/pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch new file mode 100644 index 0000000000..ea20c8efd7 --- /dev/null +++ b/queue-6.12/pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch @@ -0,0 +1,34 @@ +From 5a062c3c3b82004766bc3ece82b594d337076152 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Wed, 19 Mar 2025 10:05:47 +0300 +Subject: pinctrl: tegra: Fix off by one in tegra_pinctrl_get_group() + +From: Dan Carpenter + +commit 5a062c3c3b82004766bc3ece82b594d337076152 upstream. + +This should be >= pmx->soc->ngroups instead of > to avoid an out of +bounds access. The pmx->soc->groups[] array is allocated in +tegra_pinctrl_probe(). + +Fixes: c12bfa0fee65 ("pinctrl-tegra: Restore SFSEL bit when freeing pins") +Signed-off-by: Dan Carpenter +Reviewed-by: Kunwu Chan +Link: https://lore.kernel.org/82b40d9d-b437-42a9-9eb3-2328aa6877ac@stanley.mountain +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pinctrl/tegra/pinctrl-tegra.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pinctrl/tegra/pinctrl-tegra.c ++++ b/drivers/pinctrl/tegra/pinctrl-tegra.c +@@ -305,7 +305,7 @@ static const struct tegra_pingroup *tegr + { + struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); + +- if (group_index < 0 || group_index > pmx->soc->ngroups) ++ if (group_index < 0 || group_index >= pmx->soc->ngroups) + return NULL; + + return &pmx->soc->groups[group_index]; diff --git a/queue-6.12/serial-sh-sci-save-and-restore-more-registers.patch b/queue-6.12/serial-sh-sci-save-and-restore-more-registers.patch new file mode 100644 index 0000000000..60491fd9ca --- /dev/null +++ b/queue-6.12/serial-sh-sci-save-and-restore-more-registers.patch @@ -0,0 +1,108 @@ +From 81100b9a7b0515132996d62a7a676a77676cb6e3 Mon Sep 17 00:00:00 2001 +From: Geert Uytterhoeven +Date: Tue, 4 Mar 2025 20:06:11 +0100 +Subject: serial: sh-sci: Save and restore more registers + +From: Geert Uytterhoeven + +commit 81100b9a7b0515132996d62a7a676a77676cb6e3 upstream. + +On (H)SCIF with a Baud Rate Generator for External Clock (BRG), there +are multiple ways to configure the requested serial speed. If firmware +uses a different method than Linux, and if any debug info is printed +after the Bit Rate Register (SCBRR) is restored, but before termios is +reconfigured (which configures the alternative method), the system may +lock-up during resume. + +Fix this by saving and restoring the contents of the BRG Frequency +Division (SCDL) and Clock Select (SCCKS) registers as well. + +Also save and restore the HSCIF's Sampling Rate Register (HSSRR), which +configures the sampling point, and the SCIFA/SCIFB's Serial Port Control +and Data Registers (SCPCR/SCPDR), which configure the optional control +flow signals. + +After this, all registers that are not saved/restored are either: + - read-only, + - write-only, + - status registers containing flags with clear-after-set semantics, + - FIFO Data Count Trigger registers, which do not matter much for + the serial console. + +Fixes: 22a6984c5b5df8ea ("serial: sh-sci: Update the suspend/resume support") +Signed-off-by: Geert Uytterhoeven +Tested-by: Claudiu Beznea +Reviewed-by: Claudiu Beznea +Link: https://lore.kernel.org/r/11c2eab45d48211e75d8b8202cce60400880fe55.1741114989.git.geert+renesas@glider.be +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/sh-sci.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -105,10 +105,15 @@ struct plat_sci_reg { + }; + + struct sci_suspend_regs { ++ u16 scdl; ++ u16 sccks; + u16 scsmr; + u16 scscr; + u16 scfcr; + u16 scsptr; ++ u16 hssrr; ++ u16 scpcr; ++ u16 scpdr; + u8 scbrr; + u8 semr; + }; +@@ -3550,6 +3555,10 @@ static void sci_console_save(struct sci_ + struct sci_suspend_regs *regs = &s->suspend_regs; + struct uart_port *port = &s->port; + ++ if (sci_getreg(port, SCDL)->size) ++ regs->scdl = sci_serial_in(port, SCDL); ++ if (sci_getreg(port, SCCKS)->size) ++ regs->sccks = sci_serial_in(port, SCCKS); + if (sci_getreg(port, SCSMR)->size) + regs->scsmr = sci_serial_in(port, SCSMR); + if (sci_getreg(port, SCSCR)->size) +@@ -3560,6 +3569,12 @@ static void sci_console_save(struct sci_ + regs->scsptr = sci_serial_in(port, SCSPTR); + if (sci_getreg(port, SCBRR)->size) + regs->scbrr = sci_serial_in(port, SCBRR); ++ if (sci_getreg(port, HSSRR)->size) ++ regs->hssrr = sci_serial_in(port, HSSRR); ++ if (sci_getreg(port, SCPCR)->size) ++ regs->scpcr = sci_serial_in(port, SCPCR); ++ if (sci_getreg(port, SCPDR)->size) ++ regs->scpdr = sci_serial_in(port, SCPDR); + if (sci_getreg(port, SEMR)->size) + regs->semr = sci_serial_in(port, SEMR); + } +@@ -3569,6 +3584,10 @@ static void sci_console_restore(struct s + struct sci_suspend_regs *regs = &s->suspend_regs; + struct uart_port *port = &s->port; + ++ if (sci_getreg(port, SCDL)->size) ++ sci_serial_out(port, SCDL, regs->scdl); ++ if (sci_getreg(port, SCCKS)->size) ++ sci_serial_out(port, SCCKS, regs->sccks); + if (sci_getreg(port, SCSMR)->size) + sci_serial_out(port, SCSMR, regs->scsmr); + if (sci_getreg(port, SCSCR)->size) +@@ -3579,6 +3598,12 @@ static void sci_console_restore(struct s + sci_serial_out(port, SCSPTR, regs->scsptr); + if (sci_getreg(port, SCBRR)->size) + sci_serial_out(port, SCBRR, regs->scbrr); ++ if (sci_getreg(port, HSSRR)->size) ++ sci_serial_out(port, HSSRR, regs->hssrr); ++ if (sci_getreg(port, SCPCR)->size) ++ sci_serial_out(port, SCPCR, regs->scpcr); ++ if (sci_getreg(port, SCPDR)->size) ++ sci_serial_out(port, SCPDR, regs->scpdr); + if (sci_getreg(port, SEMR)->size) + sci_serial_out(port, SEMR, regs->semr); + } diff --git a/queue-6.12/series b/queue-6.12/series index 554e5eee5d..eb08b83302 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -616,3 +616,12 @@ gcc-15-make-unterminated-string-initialization-just-a-warning.patch gcc-15-disable-wunterminated-string-initialization-entirely-for-now-a-commit-9d7a0577c9db35c4cc52db90bc415ea248446472-upstream.patch fix-mis-uses-of-cc-option-for-warning-disablement.patch kbuild-properly-disable-wunterminated-string-initialization-for-clang.patch +drm-amd-display-exit-idle-optimizations-before-accessing-phy.patch +bpf-abort-verification-if-env-cur_state-loop_entry-null.patch +serial-sh-sci-save-and-restore-more-registers.patch +drm-amdkfd-correct-f8_mode-for-gfx950.patch +watchdog-aspeed-fix-64-bit-division.patch +pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch +i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch +x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch +drm-gem-internally-test-import_attach-for-imported-objects.patch diff --git a/queue-6.12/watchdog-aspeed-fix-64-bit-division.patch b/queue-6.12/watchdog-aspeed-fix-64-bit-division.patch new file mode 100644 index 0000000000..44b6511b8c --- /dev/null +++ b/queue-6.12/watchdog-aspeed-fix-64-bit-division.patch @@ -0,0 +1,38 @@ +From 48a136639ec233614a61653e19f559977d5da2b5 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Fri, 14 Mar 2025 17:02:44 +0100 +Subject: watchdog: aspeed: fix 64-bit division + +From: Arnd Bergmann + +commit 48a136639ec233614a61653e19f559977d5da2b5 upstream. + +On 32-bit architectures, the new calculation causes a build failure: + +ld.lld-21: error: undefined symbol: __aeabi_uldivmod + +Since neither value is ever larger than a register, cast both +sides into a uintptr_t. + +Fixes: 5c03f9f4d362 ("watchdog: aspeed: Update bootstatus handling") +Signed-off-by: Arnd Bergmann +Reviewed-by: Guenter Roeck +Link: https://lore.kernel.org/r/20250314160248.502324-1-arnd@kernel.org +Signed-off-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Greg Kroah-Hartman +--- + drivers/watchdog/aspeed_wdt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/watchdog/aspeed_wdt.c ++++ b/drivers/watchdog/aspeed_wdt.c +@@ -254,7 +254,7 @@ static void aspeed_wdt_update_bootstatus + + if (!of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2400-wdt")) { + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +- idx = ((intptr_t)wdt->base & 0x00000fff) / resource_size(res); ++ idx = ((intptr_t)wdt->base & 0x00000fff) / (uintptr_t)resource_size(res); + } + + scu_base = syscon_regmap_lookup_by_compatible(scu.compatible); diff --git a/queue-6.12/x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch b/queue-6.12/x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch new file mode 100644 index 0000000000..991d1db9fc --- /dev/null +++ b/queue-6.12/x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch @@ -0,0 +1,106 @@ +From 7170130e4c72ce0caa0cb42a1627c635cc262821 Mon Sep 17 00:00:00 2001 +From: Balbir Singh +Date: Tue, 1 Apr 2025 11:07:52 +1100 +Subject: x86/mm/init: Handle the special case of device private pages in add_pages(), to not increase max_pfn and trigger dma_addressing_limited() bounce buffers + bounce buffers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Balbir Singh + +commit 7170130e4c72ce0caa0cb42a1627c635cc262821 upstream. + +As Bert Karwatzki reported, the following recent commit causes a +performance regression on AMD iGPU and dGPU systems: + + 7ffb791423c7 ("x86/kaslr: Reduce KASLR entropy on most x86 systems") + +It exposed a bug with nokaslr and zone device interaction. + +The root cause of the bug is that, the GPU driver registers a zone +device private memory region. When KASLR is disabled or the above commit +is applied, the direct_map_physmem_end is set to much higher than 10 TiB +typically to the 64TiB address. When zone device private memory is added +to the system via add_pages(), it bumps up the max_pfn to the same +value. This causes dma_addressing_limited() to return true, since the +device cannot address memory all the way up to max_pfn. + +This caused a regression for games played on the iGPU, as it resulted in +the DMA32 zone being used for GPU allocations. + +Fix this by not bumping up max_pfn on x86 systems, when pgmap is passed +into add_pages(). The presence of pgmap is used to determine if device +private memory is being added via add_pages(). + +More details: + +devm_request_mem_region() and request_free_mem_region() request for +device private memory. iomem_resource is passed as the base resource +with start and end parameters. iomem_resource's end depends on several +factors, including the platform and virtualization. On x86 for example +on bare metal, this value is set to boot_cpu_data.x86_phys_bits. +boot_cpu_data.x86_phys_bits can change depending on support for MKTME. +By default it is set to the same as log2(direct_map_physmem_end) which +is 46 to 52 bits depending on the number of levels in the page table. +The allocation routines used iomem_resource's end and +direct_map_physmem_end to figure out where to allocate the region. + +[ arch/powerpc is also impacted by this problem, but this patch does not fix + the issue for PowerPC. ] + +Testing: + + 1. Tested on a virtual machine with test_hmm for zone device inseration + + 2. A previous version of this patch was tested by Bert, please see: + https://lore.kernel.org/lkml/d87680bab997fdc9fb4e638983132af235d9a03a.camel@web.de/ + +[ mingo: Clarified the comments and the changelog. ] + +Reported-by: Bert Karwatzki +Tested-by: Bert Karwatzki +Fixes: 7ffb791423c7 ("x86/kaslr: Reduce KASLR entropy on most x86 systems") +Signed-off-by: Balbir Singh +Signed-off-by: Ingo Molnar +Cc: Brian Gerst +Cc: Juergen Gross +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Andrew Morton +Cc: Christoph Hellwig +Cc: Pierre-Eric Pelloux-Prayer +Cc: Alex Deucher +Cc: Christian König +Cc: David Airlie +Cc: Simona Vetter +Link: https://lore.kernel.org/r/20250401000752.249348-1-balbirs@nvidia.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/init_64.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -967,9 +967,18 @@ int add_pages(int nid, unsigned long sta + ret = __add_pages(nid, start_pfn, nr_pages, params); + WARN_ON_ONCE(ret); + +- /* update max_pfn, max_low_pfn and high_memory */ +- update_end_of_memory_vars(start_pfn << PAGE_SHIFT, +- nr_pages << PAGE_SHIFT); ++ /* ++ * Special case: add_pages() is called by memremap_pages() for adding device ++ * private pages. Do not bump up max_pfn in the device private path, ++ * because max_pfn changes affect dma_addressing_limited(). ++ * ++ * dma_addressing_limited() returning true when max_pfn is the device's ++ * addressable memory can force device drivers to use bounce buffers ++ * and impact their performance negatively: ++ */ ++ if (!params->pgmap) ++ /* update max_pfn, max_low_pfn and high_memory */ ++ update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); + + return ret; + }