From: Sasha Levin Date: Fri, 21 Feb 2025 16:51:10 +0000 (-0500) Subject: Fixes for 6.12 X-Git-Tag: v6.6.80~27^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a0da2d723c53a9e979fb69293bb290f2e3519365;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.12 Signed-off-by: Sasha Levin --- diff --git a/queue-6.12/accel-ivpu-add-coredump-support.patch b/queue-6.12/accel-ivpu-add-coredump-support.patch new file mode 100644 index 0000000000..e2857eef66 --- /dev/null +++ b/queue-6.12/accel-ivpu-add-coredump-support.patch @@ -0,0 +1,231 @@ +From d7d42e419d6397fbb94bfaeca7bddbd6fbfb8c67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Sep 2024 21:52:58 +0200 +Subject: accel/ivpu: Add coredump support + +From: Karol Wachowski + +[ Upstream commit bade0340526827d03d9c293450c0422beba77f04 ] + +Use coredump (if available) to collect FW logs in case of a FW crash. +This makes dmesg more readable and allows to collect more log data. + +Signed-off-by: Karol Wachowski +Reviewed-by: Jacek Lawrynowicz +Reviewed-by: Jeffrey Hugo +Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-8-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Jacek Lawrynowicz +Stable-dep-of: 41a2d8286c90 ("accel/ivpu: Fix error handling in recovery/reset") +Signed-off-by: Sasha Levin +--- + drivers/accel/ivpu/Kconfig | 1 + + drivers/accel/ivpu/Makefile | 1 + + drivers/accel/ivpu/ivpu_coredump.c | 39 ++++++++++++++++++++++++++++++ + drivers/accel/ivpu/ivpu_coredump.h | 25 +++++++++++++++++++ + drivers/accel/ivpu/ivpu_drv.c | 5 ++-- + drivers/accel/ivpu/ivpu_fw_log.h | 8 ------ + drivers/accel/ivpu/ivpu_pm.c | 9 ++++--- + 7 files changed, 74 insertions(+), 14 deletions(-) + create mode 100644 drivers/accel/ivpu/ivpu_coredump.c + create mode 100644 drivers/accel/ivpu/ivpu_coredump.h + +diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig +index 682c532452863..e4d418b44626e 100644 +--- a/drivers/accel/ivpu/Kconfig ++++ b/drivers/accel/ivpu/Kconfig +@@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU + select FW_LOADER + select DRM_GEM_SHMEM_HELPER + select GENERIC_ALLOCATOR ++ select WANT_DEV_COREDUMP + help + Choose this option if you have a system with an 14th generation + Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) +diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile +index ebd682a42eb12..232ea6d28c6e2 100644 +--- a/drivers/accel/ivpu/Makefile ++++ b/drivers/accel/ivpu/Makefile +@@ -19,5 +19,6 @@ intel_vpu-y := \ + ivpu_sysfs.o + + intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o ++intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o + + obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o +diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c +new file mode 100644 +index 0000000000000..16ad0c30818cc +--- /dev/null ++++ b/drivers/accel/ivpu/ivpu_coredump.c +@@ -0,0 +1,39 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2020-2024 Intel Corporation ++ */ ++ ++#include ++#include ++ ++#include "ivpu_coredump.h" ++#include "ivpu_fw.h" ++#include "ivpu_gem.h" ++#include "vpu_boot_api.h" ++ ++#define CRASH_DUMP_HEADER "Intel NPU crash dump" ++#define CRASH_DUMP_HEADERS_SIZE SZ_4K ++ ++void ivpu_dev_coredump(struct ivpu_device *vdev) ++{ ++ struct drm_print_iterator pi = {}; ++ struct drm_printer p; ++ size_t coredump_size; ++ char *coredump; ++ ++ coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + ++ ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); ++ coredump = vmalloc(coredump_size); ++ if (!coredump) ++ return; ++ ++ pi.data = coredump; ++ pi.remain = coredump_size; ++ p = drm_coredump_printer(&pi); ++ ++ drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); ++ drm_printf(&p, "FW version: %s\n", vdev->fw->version); ++ ivpu_fw_log_print(vdev, false, &p); ++ ++ dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); ++} +diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h +new file mode 100644 +index 0000000000000..8efb09d024411 +--- /dev/null ++++ b/drivers/accel/ivpu/ivpu_coredump.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2020-2024 Intel Corporation ++ */ ++ ++#ifndef __IVPU_COREDUMP_H__ ++#define __IVPU_COREDUMP_H__ ++ ++#include ++ ++#include "ivpu_drv.h" ++#include "ivpu_fw_log.h" ++ ++#ifdef CONFIG_DEV_COREDUMP ++void ivpu_dev_coredump(struct ivpu_device *vdev); ++#else ++static inline void ivpu_dev_coredump(struct ivpu_device *vdev) ++{ ++ struct drm_printer p = drm_info_printer(vdev->drm.dev); ++ ++ ivpu_fw_log_print(vdev, false, &p); ++} ++#endif ++ ++#endif /* __IVPU_COREDUMP_H__ */ +diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c +index c91400ecf9265..38b4158f52784 100644 +--- a/drivers/accel/ivpu/ivpu_drv.c ++++ b/drivers/accel/ivpu/ivpu_drv.c +@@ -14,7 +14,7 @@ + #include + #include + +-#include "vpu_boot_api.h" ++#include "ivpu_coredump.h" + #include "ivpu_debugfs.h" + #include "ivpu_drv.h" + #include "ivpu_fw.h" +@@ -29,6 +29,7 @@ + #include "ivpu_ms.h" + #include "ivpu_pm.h" + #include "ivpu_sysfs.h" ++#include "vpu_boot_api.h" + + #ifndef DRIVER_VERSION_STR + #define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ +@@ -382,7 +383,7 @@ int ivpu_boot(struct ivpu_device *vdev) + ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); +- ivpu_fw_log_dump(vdev); ++ ivpu_dev_coredump(vdev); + return ret; + } + +diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h +index 0b2573f6f3151..4b390a99699d6 100644 +--- a/drivers/accel/ivpu/ivpu_fw_log.h ++++ b/drivers/accel/ivpu/ivpu_fw_log.h +@@ -8,8 +8,6 @@ + + #include + +-#include +- + #include "ivpu_drv.h" + + #define IVPU_FW_LOG_DEFAULT 0 +@@ -28,11 +26,5 @@ extern unsigned int ivpu_log_level; + void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); + void ivpu_fw_log_clear(struct ivpu_device *vdev); + +-static inline void ivpu_fw_log_dump(struct ivpu_device *vdev) +-{ +- struct drm_printer p = drm_info_printer(vdev->drm.dev); +- +- ivpu_fw_log_print(vdev, false, &p); +-} + + #endif /* __IVPU_FW_LOG_H__ */ +diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c +index ef9a4ba18cb8a..0110f5ee7d069 100644 +--- a/drivers/accel/ivpu/ivpu_pm.c ++++ b/drivers/accel/ivpu/ivpu_pm.c +@@ -9,17 +9,18 @@ + #include + #include + +-#include "vpu_boot_api.h" ++#include "ivpu_coredump.h" + #include "ivpu_drv.h" +-#include "ivpu_hw.h" + #include "ivpu_fw.h" + #include "ivpu_fw_log.h" ++#include "ivpu_hw.h" + #include "ivpu_ipc.h" + #include "ivpu_job.h" + #include "ivpu_jsm_msg.h" + #include "ivpu_mmu.h" + #include "ivpu_ms.h" + #include "ivpu_pm.h" ++#include "vpu_boot_api.h" + + static bool ivpu_disable_recovery; + module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); +@@ -123,7 +124,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work) + if (ret) + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + +- ivpu_fw_log_dump(vdev); ++ ivpu_dev_coredump(vdev); + + atomic_inc(&vdev->pm->reset_counter); + atomic_set(&vdev->pm->reset_pending, 1); +@@ -262,7 +263,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) + if (!is_idle || ret_d0i3) { + ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); + atomic_inc(&vdev->pm->reset_counter); +- ivpu_fw_log_dump(vdev); ++ ivpu_dev_coredump(vdev); + ivpu_pm_prepare_cold_boot(vdev); + } else { + ivpu_pm_prepare_warm_boot(vdev); +-- +2.39.5 + diff --git a/queue-6.12/accel-ivpu-add-fw-state-dump-on-tdr.patch b/queue-6.12/accel-ivpu-add-fw-state-dump-on-tdr.patch new file mode 100644 index 0000000000..3f8f53a3f2 --- /dev/null +++ b/queue-6.12/accel-ivpu-add-fw-state-dump-on-tdr.patch @@ -0,0 +1,163 @@ +From 6819fe7aae75d7acf2b00d39bacd3bf062a5411c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Sep 2024 21:52:59 +0200 +Subject: accel/ivpu: Add FW state dump on TDR + +From: Tomasz Rusinowicz + +[ Upstream commit 5e162f872d7af8f041b143536617ab2563ea7de5 ] + +Send JSM state dump message at the beginning of TDR handler. This allows +FW to collect debug info in the FW log before the state of the NPU is +lost allowing to analyze the cause of a TDR. + +Wait a predefined timeout (10 ms) so the FW has a chance to write debug +logs. We cannot wait for JSM response at this point because IRQs are +already disabled before TDR handler is invoked. + +Signed-off-by: Tomasz Rusinowicz +Reviewed-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-9-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Jacek Lawrynowicz +Stable-dep-of: 41a2d8286c90 ("accel/ivpu: Fix error handling in recovery/reset") +Signed-off-by: Sasha Levin +--- + drivers/accel/ivpu/ivpu_drv.h | 1 + + drivers/accel/ivpu/ivpu_hw.c | 3 +++ + drivers/accel/ivpu/ivpu_ipc.c | 26 ++++++++++++++++++++++++++ + drivers/accel/ivpu/ivpu_ipc.h | 2 ++ + drivers/accel/ivpu/ivpu_jsm_msg.c | 8 ++++++++ + drivers/accel/ivpu/ivpu_jsm_msg.h | 2 ++ + drivers/accel/ivpu/ivpu_pm.c | 1 + + 7 files changed, 43 insertions(+) + +diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h +index 63f13b697eed7..2b30cc2e9272e 100644 +--- a/drivers/accel/ivpu/ivpu_drv.h ++++ b/drivers/accel/ivpu/ivpu_drv.h +@@ -152,6 +152,7 @@ struct ivpu_device { + int tdr; + int autosuspend; + int d0i3_entry_msg; ++ int state_dump_msg; + } timeout; + }; + +diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c +index e69c0613513f1..08b3cef58fd2d 100644 +--- a/drivers/accel/ivpu/ivpu_hw.c ++++ b/drivers/accel/ivpu/ivpu_hw.c +@@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev) + vdev->timeout.tdr = 2000000; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 500; ++ vdev->timeout.state_dump_msg = 10; + } else if (ivpu_is_simics(vdev)) { + vdev->timeout.boot = 50; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 10000; + vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 100; ++ vdev->timeout.state_dump_msg = 10; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; +@@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev) + else + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 5; ++ vdev->timeout.state_dump_msg = 10; + } + } + +diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c +index 29b723039a345..13c8a12162e89 100644 +--- a/drivers/accel/ivpu/ivpu_ipc.c ++++ b/drivers/accel/ivpu/ivpu_ipc.c +@@ -353,6 +353,32 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + return ret; + } + ++int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, ++ u32 channel, unsigned long timeout_ms) ++{ ++ struct ivpu_ipc_consumer cons; ++ int ret; ++ ++ ret = ivpu_rpm_get(vdev); ++ if (ret < 0) ++ return ret; ++ ++ ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ++ ++ ret = ivpu_ipc_send(vdev, &cons, req); ++ if (ret) { ++ ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); ++ goto consumer_del; ++ } ++ ++ msleep(timeout_ms); ++ ++consumer_del: ++ ivpu_ipc_consumer_del(vdev, &cons); ++ ivpu_rpm_put(vdev); ++ return ret; ++} ++ + static bool + ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h +index fb4de7fb8210e..b4dfb504679ba 100644 +--- a/drivers/accel/ivpu/ivpu_ipc.h ++++ b/drivers/accel/ivpu/ivpu_ipc.h +@@ -107,5 +107,7 @@ int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg + int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms); ++int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, ++ u32 channel, unsigned long timeout_ms); + + #endif /* __IVPU_IPC_H__ */ +diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c +index 88105963c1b28..f7618b605f021 100644 +--- a/drivers/accel/ivpu/ivpu_jsm_msg.c ++++ b/drivers/accel/ivpu/ivpu_jsm_msg.c +@@ -555,3 +555,11 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev) + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + } ++ ++int ivpu_jsm_state_dump(struct ivpu_device *vdev) ++{ ++ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; ++ ++ return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, ++ vdev->timeout.state_dump_msg); ++} +diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h +index e4e42c0ff6e65..9e84d3526a146 100644 +--- a/drivers/accel/ivpu/ivpu_jsm_msg.h ++++ b/drivers/accel/ivpu/ivpu_jsm_msg.h +@@ -43,4 +43,6 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas + u64 buffer_size, u32 *sample_size, u64 *info_size); + int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); + int ivpu_jsm_dct_disable(struct ivpu_device *vdev); ++int ivpu_jsm_state_dump(struct ivpu_device *vdev); ++ + #endif +diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c +index 0110f5ee7d069..848d7468d48ce 100644 +--- a/drivers/accel/ivpu/ivpu_pm.c ++++ b/drivers/accel/ivpu/ivpu_pm.c +@@ -124,6 +124,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work) + if (ret) + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + ++ ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + + atomic_inc(&vdev->pm->reset_counter); +-- +2.39.5 + diff --git a/queue-6.12/accel-ivpu-fix-error-handling-in-recovery-reset.patch b/queue-6.12/accel-ivpu-fix-error-handling-in-recovery-reset.patch new file mode 100644 index 0000000000..5466bea630 --- /dev/null +++ b/queue-6.12/accel-ivpu-fix-error-handling-in-recovery-reset.patch @@ -0,0 +1,157 @@ +From 3df6a25a8ee97b102cc23500e91c360abdfed8d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Jan 2025 13:40:09 +0100 +Subject: accel/ivpu: Fix error handling in recovery/reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jacek Lawrynowicz + +[ Upstream commit 41a2d8286c905614f29007f1bc8e652d54654b82 ] + +Disable runtime PM for the duration of reset/recovery so it is possible +to set the correct runtime PM state depending on the outcome of the +`ivpu_resume()`. Don’t suspend or reset the HW if the NPU is suspended +when the reset/recovery is requested. Also, move common reset/recovery +code to separate functions for better code readability. + +Fixes: 27d19268cf39 ("accel/ivpu: Improve recovery and reset support") +Cc: stable@vger.kernel.org # v6.8+ +Reviewed-by: Maciej Falkowski +Reviewed-by: Jeffrey Hugo +Signed-off-by: Jacek Lawrynowicz +Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-4-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Sasha Levin +--- + drivers/accel/ivpu/ivpu_pm.c | 79 ++++++++++++++++++++---------------- + 1 file changed, 43 insertions(+), 36 deletions(-) + +diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c +index 848d7468d48ce..fbb61a2c3b19c 100644 +--- a/drivers/accel/ivpu/ivpu_pm.c ++++ b/drivers/accel/ivpu/ivpu_pm.c +@@ -111,41 +111,57 @@ static int ivpu_resume(struct ivpu_device *vdev) + return ret; + } + +-static void ivpu_pm_recovery_work(struct work_struct *work) ++static void ivpu_pm_reset_begin(struct ivpu_device *vdev) + { +- struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); +- struct ivpu_device *vdev = pm->vdev; +- char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; +- int ret; +- +- ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); +- +- ret = pm_runtime_resume_and_get(vdev->drm.dev); +- if (ret) +- ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); +- +- ivpu_jsm_state_dump(vdev); +- ivpu_dev_coredump(vdev); ++ pm_runtime_disable(vdev->drm.dev); + + atomic_inc(&vdev->pm->reset_counter); + atomic_set(&vdev->pm->reset_pending, 1); + down_write(&vdev->pm->reset_lock); ++} ++ ++static void ivpu_pm_reset_complete(struct ivpu_device *vdev) ++{ ++ int ret; + +- ivpu_suspend(vdev); + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); + + ret = ivpu_resume(vdev); +- if (ret) ++ if (ret) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); ++ pm_runtime_set_suspended(vdev->drm.dev); ++ } else { ++ pm_runtime_set_active(vdev->drm.dev); ++ } + + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); + +- kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); + pm_runtime_mark_last_busy(vdev->drm.dev); +- pm_runtime_put_autosuspend(vdev->drm.dev); ++ pm_runtime_enable(vdev->drm.dev); ++} ++ ++static void ivpu_pm_recovery_work(struct work_struct *work) ++{ ++ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); ++ struct ivpu_device *vdev = pm->vdev; ++ char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; ++ ++ ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); ++ ++ ivpu_pm_reset_begin(vdev); ++ ++ if (!pm_runtime_status_suspended(vdev->drm.dev)) { ++ ivpu_jsm_state_dump(vdev); ++ ivpu_dev_coredump(vdev); ++ ivpu_suspend(vdev); ++ } ++ ++ ivpu_pm_reset_complete(vdev); ++ ++ kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); + } + + void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) +@@ -316,16 +332,13 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + ivpu_dbg(vdev, PM, "Pre-reset..\n"); +- atomic_inc(&vdev->pm->reset_counter); +- atomic_set(&vdev->pm->reset_pending, 1); + +- pm_runtime_get_sync(vdev->drm.dev); +- down_write(&vdev->pm->reset_lock); +- ivpu_prepare_for_reset(vdev); +- ivpu_hw_reset(vdev); +- ivpu_pm_prepare_cold_boot(vdev); +- ivpu_jobs_abort_all(vdev); +- ivpu_ms_cleanup_all(vdev); ++ ivpu_pm_reset_begin(vdev); ++ ++ if (!pm_runtime_status_suspended(vdev->drm.dev)) { ++ ivpu_prepare_for_reset(vdev); ++ ivpu_hw_reset(vdev); ++ } + + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); + } +@@ -333,18 +346,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) + void ivpu_pm_reset_done_cb(struct pci_dev *pdev) + { + struct ivpu_device *vdev = pci_get_drvdata(pdev); +- int ret; + + ivpu_dbg(vdev, PM, "Post-reset..\n"); +- ret = ivpu_resume(vdev); +- if (ret) +- ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); +- up_write(&vdev->pm->reset_lock); +- atomic_set(&vdev->pm->reset_pending, 0); +- ivpu_dbg(vdev, PM, "Post-reset done.\n"); + +- pm_runtime_mark_last_busy(vdev->drm.dev); +- pm_runtime_put_autosuspend(vdev->drm.dev); ++ ivpu_pm_reset_complete(vdev); ++ ++ ivpu_dbg(vdev, PM, "Post-reset done.\n"); + } + + void ivpu_pm_init(struct ivpu_device *vdev) +-- +2.39.5 + diff --git a/queue-6.12/accel-ivpu-limit-fw-version-string-length.patch b/queue-6.12/accel-ivpu-limit-fw-version-string-length.patch new file mode 100644 index 0000000000..ace47969ef --- /dev/null +++ b/queue-6.12/accel-ivpu-limit-fw-version-string-length.patch @@ -0,0 +1,78 @@ +From d3f9b007c136816ed5de08f54f76427855ffa43c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Sep 2024 21:52:57 +0200 +Subject: accel/ivpu: Limit FW version string length + +From: Jacek Lawrynowicz + +[ Upstream commit 990b1e3d150104249115a0ad81ea77c53b28f0f8 ] + +Limit FW version string, when parsing FW binary, to 256 bytes and +always add NULL-terminate it. + +Reviewed-by: Karol Wachowski +Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-7-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Jacek Lawrynowicz +Stable-dep-of: 41a2d8286c90 ("accel/ivpu: Fix error handling in recovery/reset") +Signed-off-by: Sasha Levin +--- + drivers/accel/ivpu/ivpu_fw.c | 7 ++++--- + drivers/accel/ivpu/ivpu_fw.h | 6 +++++- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c +index ede6165e09d90..b2b6d89f06537 100644 +--- a/drivers/accel/ivpu/ivpu_fw.c ++++ b/drivers/accel/ivpu/ivpu_fw.c +@@ -25,7 +25,6 @@ + #define FW_SHAVE_NN_MAX_SIZE SZ_2M + #define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) + #define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) +-#define FW_VERSION_HEADER_SIZE SZ_4K + #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) + + #define WATCHDOG_MSS_REDIRECT 32 +@@ -191,8 +190,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) + ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", + fw_hdr->header_version, fw_hdr->image_format); + +- ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, +- (const char *)fw_hdr + VPU_FW_HEADER_SIZE); ++ if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) ++ ivpu_warn(vdev, "Missing firmware version\n"); ++ ++ ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); + + if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) + return -EINVAL; +diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h +index 40d9d17be3f52..5e8eb608b70f1 100644 +--- a/drivers/accel/ivpu/ivpu_fw.h ++++ b/drivers/accel/ivpu/ivpu_fw.h +@@ -1,11 +1,14 @@ + /* SPDX-License-Identifier: GPL-2.0-only */ + /* +- * Copyright (C) 2020-2023 Intel Corporation ++ * Copyright (C) 2020-2024 Intel Corporation + */ + + #ifndef __IVPU_FW_H__ + #define __IVPU_FW_H__ + ++#define FW_VERSION_HEADER_SIZE SZ_4K ++#define FW_VERSION_STR_SIZE SZ_256 ++ + struct ivpu_device; + struct ivpu_bo; + struct vpu_boot_params; +@@ -13,6 +16,7 @@ struct vpu_boot_params; + struct ivpu_fw_info { + const struct firmware *file; + const char *name; ++ char version[FW_VERSION_STR_SIZE]; + struct ivpu_bo *mem; + struct ivpu_bo *mem_shave_nn; + struct ivpu_bo *mem_log_crit; +-- +2.39.5 + diff --git a/queue-6.12/alsa-hda-cirrus-correct-the-full-scale-volume-set-lo.patch b/queue-6.12/alsa-hda-cirrus-correct-the-full-scale-volume-set-lo.patch new file mode 100644 index 0000000000..0a8908549c --- /dev/null +++ b/queue-6.12/alsa-hda-cirrus-correct-the-full-scale-volume-set-lo.patch @@ -0,0 +1,121 @@ +From dff8ed2c94e70859af063b9b4b696f73658c9541 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 21:07:28 +0000 +Subject: ALSA: hda/cirrus: Correct the full scale volume set logic + +From: Vitaly Rodionov + +[ Upstream commit 08b613b9e2ba431db3bd15cb68ca72472a50ef5c ] + +This patch corrects the full-scale volume setting logic. On certain +platforms, the full-scale volume bit is required. The current logic +mistakenly sets this bit and incorrectly clears reserved bit 0, causing +the headphone output to be muted. + +Fixes: 342b6b610ae2 ("ALSA: hda/cs8409: Fix Full Scale Volume setting for all variants") +Signed-off-by: Vitaly Rodionov +Link: https://patch.msgid.link/20250214210736.30814-1-vitalyr@opensource.cirrus.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_cs8409-tables.c | 6 +++--- + sound/pci/hda/patch_cs8409.c | 20 +++++++++++--------- + sound/pci/hda/patch_cs8409.h | 5 +++-- + 3 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c +index 759f48038273d..621f947e38174 100644 +--- a/sound/pci/hda/patch_cs8409-tables.c ++++ b/sound/pci/hda/patch_cs8409-tables.c +@@ -121,7 +121,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = { + { CS42L42_MIXER_CHA_VOL, 0x3F }, + { CS42L42_MIXER_CHB_VOL, 0x3F }, + { CS42L42_MIXER_ADC_VOL, 0x3f }, +- { CS42L42_HP_CTL, 0x03 }, ++ { CS42L42_HP_CTL, 0x0D }, + { CS42L42_MIC_DET_CTL1, 0xB6 }, + { CS42L42_TIPSENSE_CTL, 0xC2 }, + { CS42L42_HS_CLAMP_DISABLE, 0x01 }, +@@ -315,7 +315,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = { + { CS42L42_ASP_TX_SZ_EN, 0x01 }, + { CS42L42_PWR_CTL1, 0x0A }, + { CS42L42_PWR_CTL2, 0x84 }, +- { CS42L42_HP_CTL, 0x03 }, ++ { CS42L42_HP_CTL, 0x0D }, + { CS42L42_MIXER_CHA_VOL, 0x3F }, + { CS42L42_MIXER_CHB_VOL, 0x3F }, + { CS42L42_MIXER_ADC_VOL, 0x3f }, +@@ -371,7 +371,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = { + { CS42L42_ASP_TX_SZ_EN, 0x00 }, + { CS42L42_PWR_CTL1, 0x0E }, + { CS42L42_PWR_CTL2, 0x84 }, +- { CS42L42_HP_CTL, 0x01 }, ++ { CS42L42_HP_CTL, 0x0D }, + { CS42L42_MIXER_CHA_VOL, 0x3F }, + { CS42L42_MIXER_CHB_VOL, 0x3F }, + { CS42L42_MIXER_ADC_VOL, 0x3f }, +diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c +index 614327218634c..b760332a4e357 100644 +--- a/sound/pci/hda/patch_cs8409.c ++++ b/sound/pci/hda/patch_cs8409.c +@@ -876,7 +876,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42) + { CS42L42_DET_INT_STATUS2, 0x00 }, + { CS42L42_TSRS_PLUG_STATUS, 0x00 }, + }; +- int fsv_old, fsv_new; ++ unsigned int fsv; + + /* Bring CS42L42 out of Reset */ + spec->gpio_data = snd_hda_codec_read(codec, CS8409_PIN_AFG, 0, AC_VERB_GET_GPIO_DATA, 0); +@@ -893,13 +893,15 @@ static void cs42l42_resume(struct sub_codec *cs42l42) + /* Clear interrupts, by reading interrupt status registers */ + cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs)); + +- fsv_old = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); +- if (cs42l42->full_scale_vol == CS42L42_FULL_SCALE_VOL_0DB) +- fsv_new = fsv_old & ~CS42L42_FULL_SCALE_VOL_MASK; +- else +- fsv_new = fsv_old & CS42L42_FULL_SCALE_VOL_MASK; +- if (fsv_new != fsv_old) +- cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv_new); ++ fsv = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); ++ if (cs42l42->full_scale_vol) { ++ // Set the full scale volume bit ++ fsv |= CS42L42_FULL_SCALE_VOL_MASK; ++ cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); ++ } ++ // Unmute analog channels A and B ++ fsv = (fsv & ~CS42L42_ANA_MUTE_AB); ++ cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); + + /* we have to explicitly allow unsol event handling even during the + * resume phase so that the jack event is processed properly +@@ -920,7 +922,7 @@ static void cs42l42_suspend(struct sub_codec *cs42l42) + { CS42L42_MIXER_CHA_VOL, 0x3F }, + { CS42L42_MIXER_ADC_VOL, 0x3F }, + { CS42L42_MIXER_CHB_VOL, 0x3F }, +- { CS42L42_HP_CTL, 0x0F }, ++ { CS42L42_HP_CTL, 0x0D }, + { CS42L42_ASP_RX_DAI0_EN, 0x00 }, + { CS42L42_ASP_CLK_CFG, 0x00 }, + { CS42L42_PWR_CTL1, 0xFE }, +diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h +index 5e48115caf096..14645d25e70fd 100644 +--- a/sound/pci/hda/patch_cs8409.h ++++ b/sound/pci/hda/patch_cs8409.h +@@ -230,9 +230,10 @@ enum cs8409_coefficient_index_registers { + #define CS42L42_PDN_TIMEOUT_US (250000) + #define CS42L42_PDN_SLEEP_US (2000) + #define CS42L42_INIT_TIMEOUT_MS (45) ++#define CS42L42_ANA_MUTE_AB (0x0C) + #define CS42L42_FULL_SCALE_VOL_MASK (2) +-#define CS42L42_FULL_SCALE_VOL_0DB (1) +-#define CS42L42_FULL_SCALE_VOL_MINUS6DB (0) ++#define CS42L42_FULL_SCALE_VOL_0DB (0) ++#define CS42L42_FULL_SCALE_VOL_MINUS6DB (1) + + /* Dell BULLSEYE / WARLOCK / CYBORG Specific Definitions */ + +-- +2.39.5 + diff --git a/queue-6.12/alsa-hda-realtek-fixup-alc225-depop-procedure.patch b/queue-6.12/alsa-hda-realtek-fixup-alc225-depop-procedure.patch new file mode 100644 index 0000000000..2a27d2dac9 --- /dev/null +++ b/queue-6.12/alsa-hda-realtek-fixup-alc225-depop-procedure.patch @@ -0,0 +1,36 @@ +From cd910ed10760a257a262e2c82aed731ac18dbc13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Feb 2025 14:40:46 +0800 +Subject: ALSA: hda/realtek: Fixup ALC225 depop procedure + +From: Kailang Yang + +[ Upstream commit 174448badb4409491bfba2e6b46f7aa078741c5e ] + +Headset MIC will no function when power_save=0. + +Fixes: 1fd50509fe14 ("ALSA: hda/realtek: Update ALC225 depop procedure") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219743 +Signed-off-by: Kailang Yang +Link: https://lore.kernel.org/0474a095ab0044d0939ec4bf4362423d@realtek.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index f3f849b96402d..9bf99fe6cd34d 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -3790,6 +3790,7 @@ static void alc225_init(struct hda_codec *codec) + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + + msleep(75); ++ alc_update_coef_idx(codec, 0x4a, 3 << 10, 0); + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ + } + } +-- +2.39.5 + diff --git a/queue-6.12/alsa-seq-drop-ump-events-when-no-ump-conversion-is-s.patch b/queue-6.12/alsa-seq-drop-ump-events-when-no-ump-conversion-is-s.patch new file mode 100644 index 0000000000..0cf2056d78 --- /dev/null +++ b/queue-6.12/alsa-seq-drop-ump-events-when-no-ump-conversion-is-s.patch @@ -0,0 +1,57 @@ +From b16ba07438afc260efad45061280ffe678d22c33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 18:00:30 +0100 +Subject: ALSA: seq: Drop UMP events when no UMP-conversion is set + +From: Takashi Iwai + +[ Upstream commit e77aa4b2eaa7fb31b2a7a50214ecb946b2a8b0f6 ] + +When a destination client is a user client in the legacy MIDI mode and +it sets the no-UMP-conversion flag, currently the all UMP events are +still passed as-is. But this may confuse the user-space, because the +event packet size is different from the legacy mode. + +Since we cannot handle UMP events in user clients unless it's running +in the UMP client mode, we should filter out those events instead of +accepting blindly. This patch addresses it by slightly adjusting the +conditions for UMP event handling at the event delivery time. + +Fixes: 329ffe11a014 ("ALSA: seq: Allow suppressing UMP conversions") +Link: https://lore.kernel.org/b77a2cd6-7b59-4eb0-a8db-22d507d3af5f@gmail.com +Link: https://patch.msgid.link/20250217170034.21930-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_clientmgr.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c +index 77b6ac9b5c11b..9955c4d54e42a 100644 +--- a/sound/core/seq/seq_clientmgr.c ++++ b/sound/core/seq/seq_clientmgr.c +@@ -678,12 +678,18 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client, + dest_port->time_real); + + #if IS_ENABLED(CONFIG_SND_SEQ_UMP) +- if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { +- if (snd_seq_ev_is_ump(event)) { ++ if (snd_seq_ev_is_ump(event)) { ++ if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { + result = snd_seq_deliver_from_ump(client, dest, dest_port, + event, atomic, hop); + goto __skip; +- } else if (snd_seq_client_is_ump(dest)) { ++ } else if (dest->type == USER_CLIENT && ++ !snd_seq_client_is_ump(dest)) { ++ result = 0; // drop the event ++ goto __skip; ++ } ++ } else if (snd_seq_client_is_ump(dest)) { ++ if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { + result = snd_seq_deliver_to_ump(client, dest, dest_port, + event, atomic, hop); + goto __skip; +-- +2.39.5 + diff --git a/queue-6.12/arm64-dts-mediatek-mt8183-disable-dsi-display-output.patch b/queue-6.12/arm64-dts-mediatek-mt8183-disable-dsi-display-output.patch new file mode 100644 index 0000000000..c11d7e62ef --- /dev/null +++ b/queue-6.12/arm64-dts-mediatek-mt8183-disable-dsi-display-output.patch @@ -0,0 +1,61 @@ +From 80aa919bbf6e38aaff9766b6f4302a5254910123 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Oct 2024 15:56:28 +0800 +Subject: arm64: dts: mediatek: mt8183: Disable DSI display output by default + +From: Chen-Yu Tsai + +[ Upstream commit 26f6e91fa29a58fdc76b47f94f8f6027944a490c ] + +Most SoC dtsi files have the display output interfaces disabled by +default, and only enabled on boards that utilize them. The MT8183 +has it backwards: the display outputs are left enabled by default, +and only disabled at the board level. + +Reverse the situation for the DSI output so that it follows the +normal scheme. For ease of backporting the DPI output is handled +in a separate patch. + +Fixes: 88ec840270e6 ("arm64: dts: mt8183: Add dsi node") +Fixes: 19b6403f1e2a ("arm64: dts: mt8183: add mt8183 pumpkin board") +Cc: stable@vger.kernel.org +Signed-off-by: Chen-Yu Tsai +Reviewed-by: Fei Shao +Link: https://lore.kernel.org/r/20241025075630.3917458-2-wenst@chromium.org +Signed-off-by: AngeloGioacchino Del Regno +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts | 4 ---- + arch/arm64/boot/dts/mediatek/mt8183.dtsi | 1 + + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts +index 61a6f66914b86..dbdee604edab4 100644 +--- a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts ++++ b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts +@@ -522,10 +522,6 @@ + status = "okay"; + }; + +-&dsi0 { +- status = "disabled"; +-}; +- + &dpi0 { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&dpi_func_pins>; +diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi +index 5cb6bd3c5acbb..92c41463d10e3 100644 +--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi ++++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi +@@ -1835,6 +1835,7 @@ + resets = <&mmsys MT8183_MMSYS_SW0_RST_B_DISP_DSI0>; + phys = <&mipi_tx0>; + phy-names = "dphy"; ++ status = "disabled"; + }; + + dpi0: dpi@14015000 { +-- +2.39.5 + diff --git a/queue-6.12/arm64-dts-mediatek-mt8183-pumpkin-add-hdmi-support.patch b/queue-6.12/arm64-dts-mediatek-mt8183-pumpkin-add-hdmi-support.patch new file mode 100644 index 0000000000..2f6241fb95 --- /dev/null +++ b/queue-6.12/arm64-dts-mediatek-mt8183-pumpkin-add-hdmi-support.patch @@ -0,0 +1,184 @@ +From c10a01dbff3ace5dda8e321dc55cc40a04c76bed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2024 17:41:49 +0800 +Subject: arm64: dts: mediatek: mt8183-pumpkin: add HDMI support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Fabien Parent + +[ Upstream commit 72f3e3d68cfda508ec4b6c8927c50814229cd04e ] + +The MT8183 Pumpkin board has a micro-HDMI connector. HDMI support is +provided by an IT66121 DPI <-> HDMI bridge. + +Enable the DPI and add the node for the IT66121 bridge. + +Signed-off-by: Fabien Parent +Co-developed-by: Pin-yen Lin +Signed-off-by: Pin-yen Lin +Reviewed-by: Nícolas F. R. A. Prado +Link: https://lore.kernel.org/r/20240919094212.1902073-1-treapking@chromium.org +Signed-off-by: AngeloGioacchino Del Regno +Stable-dep-of: 26f6e91fa29a ("arm64: dts: mediatek: mt8183: Disable DSI display output by default") +Signed-off-by: Sasha Levin +--- + .../boot/dts/mediatek/mt8183-pumpkin.dts | 123 ++++++++++++++++++ + 1 file changed, 123 insertions(+) + +diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts +index 1aa668c3ccf92..61a6f66914b86 100644 +--- a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts ++++ b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts +@@ -63,6 +63,18 @@ + pulldown-ohm = <0>; + io-channels = <&auxadc 0>; + }; ++ ++ connector { ++ compatible = "hdmi-connector"; ++ label = "hdmi"; ++ type = "d"; ++ ++ port { ++ hdmi_connector_in: endpoint { ++ remote-endpoint = <&hdmi_connector_out>; ++ }; ++ }; ++ }; + }; + + &auxadc { +@@ -120,6 +132,43 @@ + pinctrl-0 = <&i2c6_pins>; + status = "okay"; + clock-frequency = <100000>; ++ #address-cells = <1>; ++ #size-cells = <0>; ++ ++ it66121hdmitx: hdmitx@4c { ++ compatible = "ite,it66121"; ++ reg = <0x4c>; ++ pinctrl-names = "default"; ++ pinctrl-0 = <&ite_pins>; ++ reset-gpios = <&pio 160 GPIO_ACTIVE_LOW>; ++ interrupt-parent = <&pio>; ++ interrupts = <4 IRQ_TYPE_LEVEL_LOW>; ++ vcn33-supply = <&mt6358_vcn33_reg>; ++ vcn18-supply = <&mt6358_vcn18_reg>; ++ vrf12-supply = <&mt6358_vrf12_reg>; ++ ++ ports { ++ #address-cells = <1>; ++ #size-cells = <0>; ++ ++ port@0 { ++ reg = <0>; ++ ++ it66121_in: endpoint { ++ bus-width = <12>; ++ remote-endpoint = <&dpi_out>; ++ }; ++ }; ++ ++ port@1 { ++ reg = <1>; ++ ++ hdmi_connector_out: endpoint { ++ remote-endpoint = <&hdmi_connector_in>; ++ }; ++ }; ++ }; ++ }; + }; + + &keyboard { +@@ -362,6 +411,67 @@ + input-enable; + }; + }; ++ ++ ite_pins: ite-pins { ++ pins-irq { ++ pinmux = ; ++ input-enable; ++ bias-pull-up; ++ }; ++ ++ pins-rst { ++ pinmux = ; ++ output-high; ++ }; ++ }; ++ ++ dpi_func_pins: dpi-func-pins { ++ pins-dpi { ++ pinmux = , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ ; ++ }; ++ }; ++ ++ dpi_idle_pins: dpi-idle-pins { ++ pins-idle { ++ pinmux = , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ , ++ ; ++ }; ++ }; + }; + + &pmic { +@@ -415,3 +525,16 @@ + &dsi0 { + status = "disabled"; + }; ++ ++&dpi0 { ++ pinctrl-names = "default", "sleep"; ++ pinctrl-0 = <&dpi_func_pins>; ++ pinctrl-1 = <&dpi_idle_pins>; ++ status = "okay"; ++ ++ port { ++ dpi_out: endpoint { ++ remote-endpoint = <&it66121_in>; ++ }; ++ }; ++}; +-- +2.39.5 + diff --git a/queue-6.12/arp-switch-to-dev_getbyhwaddr-in-arp_req_set_public.patch b/queue-6.12/arp-switch-to-dev_getbyhwaddr-in-arp_req_set_public.patch new file mode 100644 index 0000000000..e831065ee0 --- /dev/null +++ b/queue-6.12/arp-switch-to-dev_getbyhwaddr-in-arp_req_set_public.patch @@ -0,0 +1,47 @@ +From ee1c6116998682b776d67639fff5c7745270adb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 05:49:31 -0800 +Subject: arp: switch to dev_getbyhwaddr() in arp_req_set_public() + +From: Breno Leitao + +[ Upstream commit 4eae0ee0f1e6256d0b0b9dd6e72f1d9cf8f72e08 ] + +The arp_req_set_public() function is called with the rtnl lock held, +which provides enough synchronization protection. This makes the RCU +variant of dev_getbyhwaddr() unnecessary. Switch to using the simpler +dev_getbyhwaddr() function since we already have the required rtnl +locking. + +This change helps maintain consistency in the networking code by using +the appropriate helper function for the existing locking context. +Since we're not holding the RCU read lock in arp_req_set_public() +existing code could trigger false positive locking warnings. + +Fixes: 941666c2e3e0 ("net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()") +Suggested-by: Kuniyuki Iwashima +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: Breno Leitao +Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-2-d3d6892db9e1@debian.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/arp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c +index 59ffaa89d7b05..8fb48f42581ce 100644 +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -1077,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, + __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; + + if (!dev && (r->arp_flags & ATF_COM)) { +- dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, ++ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, + r->arp_ha.sa_data); + if (!dev) + return -ENODEV; +-- +2.39.5 + diff --git a/queue-6.12/asoc-imx-audmix-remove-cpu_mclk-which-is-from-cpu-da.patch b/queue-6.12/asoc-imx-audmix-remove-cpu_mclk-which-is-from-cpu-da.patch new file mode 100644 index 0000000000..1415f45459 --- /dev/null +++ b/queue-6.12/asoc-imx-audmix-remove-cpu_mclk-which-is-from-cpu-da.patch @@ -0,0 +1,93 @@ +From 8277c05230fc3eae8e88cd7cbd1c114ffd06b808 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 15:05:18 +0800 +Subject: ASoC: imx-audmix: remove cpu_mclk which is from cpu dai device + +From: Shengjiu Wang + +[ Upstream commit 571b69f2f9b1ec7cf7d0e9b79e52115a87a869c4 ] + +When defer probe happens, there may be below error: + +platform 59820000.sai: Resources present before probing + +The cpu_mclk clock is from the cpu dai device, if it is not released, +then the cpu dai device probe will fail for the second time. + +The cpu_mclk is used to get rate for rate constraint, rate constraint +may be specific for each platform, which is not necessary for machine +driver, so remove it. + +Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver") +Signed-off-by: Shengjiu Wang +Link: https://patch.msgid.link/20250213070518.547375-1-shengjiu.wang@nxp.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/fsl/imx-audmix.c | 31 ------------------------------- + 1 file changed, 31 deletions(-) + +diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c +index 8e7b75cf64db4..ff3671226306b 100644 +--- a/sound/soc/fsl/imx-audmix.c ++++ b/sound/soc/fsl/imx-audmix.c +@@ -23,7 +23,6 @@ struct imx_audmix { + struct snd_soc_card card; + struct platform_device *audmix_pdev; + struct platform_device *out_pdev; +- struct clk *cpu_mclk; + int num_dai; + struct snd_soc_dai_link *dai; + int num_dai_conf; +@@ -32,34 +31,11 @@ struct imx_audmix { + struct snd_soc_dapm_route *dapm_routes; + }; + +-static const u32 imx_audmix_rates[] = { +- 8000, 12000, 16000, 24000, 32000, 48000, 64000, 96000, +-}; +- +-static const struct snd_pcm_hw_constraint_list imx_audmix_rate_constraints = { +- .count = ARRAY_SIZE(imx_audmix_rates), +- .list = imx_audmix_rates, +-}; +- + static int imx_audmix_fe_startup(struct snd_pcm_substream *substream) + { +- struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); +- struct imx_audmix *priv = snd_soc_card_get_drvdata(rtd->card); + struct snd_pcm_runtime *runtime = substream->runtime; +- struct device *dev = rtd->card->dev; +- unsigned long clk_rate = clk_get_rate(priv->cpu_mclk); + int ret; + +- if (clk_rate % 24576000 == 0) { +- ret = snd_pcm_hw_constraint_list(runtime, 0, +- SNDRV_PCM_HW_PARAM_RATE, +- &imx_audmix_rate_constraints); +- if (ret < 0) +- return ret; +- } else { +- dev_warn(dev, "mclk may be not supported %lu\n", clk_rate); +- } +- + ret = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS, + 1, 8); + if (ret < 0) +@@ -325,13 +301,6 @@ static int imx_audmix_probe(struct platform_device *pdev) + } + put_device(&cpu_pdev->dev); + +- priv->cpu_mclk = devm_clk_get(&cpu_pdev->dev, "mclk1"); +- if (IS_ERR(priv->cpu_mclk)) { +- ret = PTR_ERR(priv->cpu_mclk); +- dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret); +- return ret; +- } +- + priv->audmix_pdev = audmix_pdev; + priv->out_pdev = cpu_pdev; + +-- +2.39.5 + diff --git a/queue-6.12/asoc-renesas-rz-ssi-add-a-check-for-negative-sample_.patch b/queue-6.12/asoc-renesas-rz-ssi-add-a-check-for-negative-sample_.patch new file mode 100644 index 0000000000..cea6704a21 --- /dev/null +++ b/queue-6.12/asoc-renesas-rz-ssi-add-a-check-for-negative-sample_.patch @@ -0,0 +1,45 @@ +From 2cf3f236b29a71cdd25f8b2b1d78fe0304dc7d79 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Jan 2025 12:28:46 +0300 +Subject: ASoC: renesas: rz-ssi: Add a check for negative sample_space + +From: Dan Carpenter + +[ Upstream commit 82a0a3e6f8c02b3236b55e784a083fa4ee07c321 ] + +My static checker rule complains about this code. The concern is that +if "sample_space" is negative then the "sample_space >= runtime->channels" +condition will not work as intended because it will be type promoted to a +high unsigned int value. + +strm->fifo_sample_size is SSI_FIFO_DEPTH (32). The SSIFSR_TDC_MASK is +0x3f. Without any further context it does seem like a reasonable warning +and it can't hurt to add a check for negatives. + +Cc: stable@vger.kernel.org +Fixes: 03e786bd4341 ("ASoC: sh: Add RZ/G2L SSIF-2 driver") +Signed-off-by: Dan Carpenter +Reviewed-by: Geert Uytterhoeven +Link: https://patch.msgid.link/e07c3dc5-d885-4b04-a742-71f42243f4fd@stanley.mountain +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sh/rz-ssi.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c +index 1b74dc1137958..4f483bfa584f5 100644 +--- a/sound/soc/sh/rz-ssi.c ++++ b/sound/soc/sh/rz-ssi.c +@@ -528,6 +528,8 @@ static int rz_ssi_pio_send(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) + sample_space = strm->fifo_sample_size; + ssifsr = rz_ssi_reg_readl(ssi, SSIFSR); + sample_space -= (ssifsr >> SSIFSR_TDC_SHIFT) & SSIFSR_TDC_MASK; ++ if (sample_space < 0) ++ return -EINVAL; + + /* Only add full frames at a time */ + while (frames_left && (sample_space >= runtime->channels)) { +-- +2.39.5 + diff --git a/queue-6.12/asoc-renesas-rz-ssi-terminate-all-the-dma-transactio.patch b/queue-6.12/asoc-renesas-rz-ssi-terminate-all-the-dma-transactio.patch new file mode 100644 index 0000000000..9fff762aef --- /dev/null +++ b/queue-6.12/asoc-renesas-rz-ssi-terminate-all-the-dma-transactio.patch @@ -0,0 +1,88 @@ +From 29a934ba17354e10bb925baa97ff0d3a6d09acd0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 19:09:33 +0200 +Subject: ASoC: renesas: rz-ssi: Terminate all the DMA transactions + +From: Claudiu Beznea + +[ Upstream commit 541011dc2d7c4c82523706f726f422a5e23cc86f ] + +The stop trigger invokes rz_ssi_stop() and rz_ssi_stream_quit(). +- The purpose of rz_ssi_stop() is to disable TX/RX, terminate DMA + transactions, and set the controller to idle. +- The purpose of rz_ssi_stream_quit() is to reset the substream-specific + software data by setting strm->running and strm->substream appropriately. + +The function rz_ssi_is_stream_running() checks if both strm->substream and +strm->running are valid and returns true if so. Its implementation is as +follows: + +static inline bool rz_ssi_is_stream_running(struct rz_ssi_stream *strm) +{ + return strm->substream && strm->running; +} + +When the controller is configured in full-duplex mode (with both playback +and capture active), the rz_ssi_stop() function does not modify the +controller settings when called for the first substream in the full-duplex +setup. Instead, it simply sets strm->running = 0 and returns if the +companion substream is still running. The following code illustrates this: + +static int rz_ssi_stop(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) +{ + strm->running = 0; + + if (rz_ssi_is_stream_running(&ssi->playback) || + rz_ssi_is_stream_running(&ssi->capture)) + return 0; + + // ... +} + +The controller settings, along with the DMA termination (for the last +stopped substream), are only applied when the last substream in the +full-duplex setup is stopped. + +While applying the controller settings only when the last substream stops +is not problematic, terminating the DMA operations for only one substream +causes failures when starting and stopping full-duplex operations multiple +times in a loop. + +To address this issue, call dmaengine_terminate_async() for both substreams +involved in the full-duplex setup when the last substream in the setup is +stopped. + +Fixes: 4f8cd05a4305 ("ASoC: sh: rz-ssi: Add full duplex support") +Cc: stable@vger.kernel.org +Reviewed-by: Biju Das +Signed-off-by: Claudiu Beznea +Reviewed-by: Geert Uytterhoeven +Link: https://patch.msgid.link/20241210170953.2936724-5-claudiu.beznea.uj@bp.renesas.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sh/rz-ssi.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c +index 32db2cead8a4e..1b74dc1137958 100644 +--- a/sound/soc/sh/rz-ssi.c ++++ b/sound/soc/sh/rz-ssi.c +@@ -416,8 +416,12 @@ static int rz_ssi_stop(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) + rz_ssi_reg_mask_setl(ssi, SSICR, SSICR_TEN | SSICR_REN, 0); + + /* Cancel all remaining DMA transactions */ +- if (rz_ssi_is_dma_enabled(ssi)) +- dmaengine_terminate_async(strm->dma_ch); ++ if (rz_ssi_is_dma_enabled(ssi)) { ++ if (ssi->playback.dma_ch) ++ dmaengine_terminate_async(ssi->playback.dma_ch); ++ if (ssi->capture.dma_ch) ++ dmaengine_terminate_async(ssi->capture.dma_ch); ++ } + + rz_ssi_set_idle(ssi); + +-- +2.39.5 + diff --git a/queue-6.12/asoc-rockchip-i2s-tdm-fix-shift-config-for-snd_soc_d.patch b/queue-6.12/asoc-rockchip-i2s-tdm-fix-shift-config-for-snd_soc_d.patch new file mode 100644 index 0000000000..fd74ff2ec0 --- /dev/null +++ b/queue-6.12/asoc-rockchip-i2s-tdm-fix-shift-config-for-snd_soc_d.patch @@ -0,0 +1,46 @@ +From 249152bfa2d2ee3d74091b2b8a69ad366e16bc2f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Feb 2025 16:13:10 +0000 +Subject: ASoC: rockchip: i2s-tdm: fix shift config for SND_SOC_DAIFMT_DSP_[AB] + +From: John Keeping + +[ Upstream commit 6b24e67b4056ba83b1e95e005b7e50fdb1cc6cf4 ] + +Commit 2f45a4e289779 ("ASoC: rockchip: i2s_tdm: Fixup config for +SND_SOC_DAIFMT_DSP_A/B") applied a partial change to fix the +configuration for DSP A and DSP B formats. + +The shift control also needs updating to set the correct offset for +frame data compared to LRCK. Set the correct values. + +Fixes: 081068fd64140 ("ASoC: rockchip: add support for i2s-tdm controller") +Signed-off-by: John Keeping +Link: https://patch.msgid.link/20250204161311.2117240-1-jkeeping@inmusicbrands.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/rockchip/rockchip_i2s_tdm.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c +index acd75e48851fc..7feefeb6b876d 100644 +--- a/sound/soc/rockchip/rockchip_i2s_tdm.c ++++ b/sound/soc/rockchip/rockchip_i2s_tdm.c +@@ -451,11 +451,11 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai, + break; + case SND_SOC_DAIFMT_DSP_A: + val = I2S_TXCR_TFS_TDM_PCM; +- tdm_val = TDM_SHIFT_CTRL(0); ++ tdm_val = TDM_SHIFT_CTRL(2); + break; + case SND_SOC_DAIFMT_DSP_B: + val = I2S_TXCR_TFS_TDM_PCM; +- tdm_val = TDM_SHIFT_CTRL(2); ++ tdm_val = TDM_SHIFT_CTRL(4); + break; + default: + ret = -EINVAL; +-- +2.39.5 + diff --git a/queue-6.12/asoc-sof-ipc4-topology-harden-loops-for-looking-up-a.patch b/queue-6.12/asoc-sof-ipc4-topology-harden-loops-for-looking-up-a.patch new file mode 100644 index 0000000000..4a702bfbdf --- /dev/null +++ b/queue-6.12/asoc-sof-ipc4-topology-harden-loops-for-looking-up-a.patch @@ -0,0 +1,74 @@ +From 2417522b10d99b745f935356d075ae474e1afd19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Feb 2025 10:46:42 +0200 +Subject: ASoC: SOF: ipc4-topology: Harden loops for looking up ALH copiers + +From: Peter Ujfalusi + +[ Upstream commit 6fd60136d256b3b948333ebdb3835f41a95ab7ef ] + +Other, non DAI copier widgets could have the same stream name (sname) as +the ALH copier and in that case the copier->data is NULL, no alh_data is +attached, which could lead to NULL pointer dereference. +We could check for this NULL pointer in sof_ipc4_prepare_copier_module() +and avoid the crash, but a similar loop in sof_ipc4_widget_setup_comp_dai() +will miscalculate the ALH device count, causing broken audio. + +The correct fix is to harden the matching logic by making sure that the +1. widget is a DAI widget - so dai = w->private is valid +2. the dai (and thus the copier) is ALH copier + +Fixes: a150345aa758 ("ASoC: SOF: ipc4-topology: add SoundWire/ALH aggregation support") +Reported-by: Seppo Ingalsuo +Link: https://github.com/thesofproject/sof/pull/9652 +Signed-off-by: Peter Ujfalusi +Reviewed-by: Liam Girdwood +Reviewed-by: Ranjani Sridharan +Reviewed-by: Bard Liao +Link: https://patch.msgid.link/20250206084642.14988-1-peter.ujfalusi@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/ipc4-topology.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c +index 240fee2166d12..f82db7f2a6b7e 100644 +--- a/sound/soc/sof/ipc4-topology.c ++++ b/sound/soc/sof/ipc4-topology.c +@@ -671,10 +671,16 @@ static int sof_ipc4_widget_setup_comp_dai(struct snd_sof_widget *swidget) + } + + list_for_each_entry(w, &sdev->widget_list, list) { +- if (w->widget->sname && ++ struct snd_sof_dai *alh_dai; ++ ++ if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || + strcmp(w->widget->sname, swidget->widget->sname)) + continue; + ++ alh_dai = w->private; ++ if (alh_dai->type != SOF_DAI_INTEL_ALH) ++ continue; ++ + blob->alh_cfg.device_count++; + } + +@@ -1973,11 +1979,13 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, + list_for_each_entry(w, &sdev->widget_list, list) { + u32 node_type; + +- if (w->widget->sname && ++ if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || + strcmp(w->widget->sname, swidget->widget->sname)) + continue; + + dai = w->private; ++ if (dai->type != SOF_DAI_INTEL_ALH) ++ continue; + alh_copier = (struct sof_ipc4_copier *)dai->private; + alh_data = &alh_copier->data; + node_type = SOF_IPC4_GET_NODE_TYPE(alh_data->gtw_cfg.node_id); +-- +2.39.5 + diff --git a/queue-6.12/bluetooth-qca-fix-poor-rf-performance-for-wcn6855.patch b/queue-6.12/bluetooth-qca-fix-poor-rf-performance-for-wcn6855.patch new file mode 100644 index 0000000000..6e96831be4 --- /dev/null +++ b/queue-6.12/bluetooth-qca-fix-poor-rf-performance-for-wcn6855.patch @@ -0,0 +1,48 @@ +From b69b4270350be081c756d1636bab635e8e97280a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Jan 2025 22:43:23 +0800 +Subject: Bluetooth: qca: Fix poor RF performance for WCN6855 + +From: Zijun Hu + +[ Upstream commit a2fad248947d702ed3dcb52b8377c1a3ae201e44 ] + +For WCN6855, board ID specific NVM needs to be downloaded once board ID +is available, but the default NVM is always downloaded currently. + +The wrong NVM causes poor RF performance, and effects user experience +for several types of laptop with WCN6855 on the market. + +Fix by downloading board ID specific NVM if board ID is available. + +Fixes: 095327fede00 ("Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855") +Cc: stable@vger.kernel.org # 6.4 +Signed-off-by: Zijun Hu +Tested-by: Johan Hovold +Reviewed-by: Johan Hovold +Tested-by: Steev Klimaszewski #Thinkpad X13s +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btqca.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c +index 5cb1fd1a0c7b5..04d02c746ec0f 100644 +--- a/drivers/bluetooth/btqca.c ++++ b/drivers/bluetooth/btqca.c +@@ -905,8 +905,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + "qca/msnv%02x.bin", rom_ver); + break; + case QCA_WCN6855: +- snprintf(config.fwname, sizeof(config.fwname), +- "qca/hpnv%02x.bin", rom_ver); ++ qca_read_fw_board_id(hdev, &boardid); ++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname), ++ "hpnv", soc_type, ver, rom_ver, boardid); + break; + case QCA_WCN7850: + qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname), +-- +2.39.5 + diff --git a/queue-6.12/bluetooth-qca-update-firmware-name-to-support-board-.patch b/queue-6.12/bluetooth-qca-update-firmware-name-to-support-board-.patch new file mode 100644 index 0000000000..c56573639b --- /dev/null +++ b/queue-6.12/bluetooth-qca-update-firmware-name-to-support-board-.patch @@ -0,0 +1,202 @@ +From ae5db987ded08376ff9822efcffdfc092ac796f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Jan 2025 17:26:49 +0800 +Subject: Bluetooth: qca: Update firmware-name to support board specific nvm + +From: Cheng Jiang + +[ Upstream commit a4c5a468c6329bde7dfd46bacff2cbf5f8a8152e ] + +Different connectivity boards may be attached to the same platform. For +example, QCA6698-based boards can support either a two-antenna or +three-antenna solution, both of which work on the sa8775p-ride platform. +Due to differences in connectivity boards and variations in RF +performance from different foundries, different NVM configurations are +used based on the board ID. + +Therefore, in the firmware-name property, if the NVM file has an +extension, the NVM file will be used. Otherwise, the system will first +try the .bNN (board ID) file, and if that fails, it will fall back to +the .bin file. + +Possible configurations: +firmware-name = "QCA6698/hpnv21"; +firmware-name = "QCA6698/hpnv21.bin"; + +Signed-off-by: Cheng Jiang +Signed-off-by: Luiz Augusto von Dentz +Stable-dep-of: a2fad248947d ("Bluetooth: qca: Fix poor RF performance for WCN6855") +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btqca.c | 113 ++++++++++++++++++++++++++++---------- + 1 file changed, 85 insertions(+), 28 deletions(-) + +diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c +index dfbbac92242a8..5cb1fd1a0c7b5 100644 +--- a/drivers/bluetooth/btqca.c ++++ b/drivers/bluetooth/btqca.c +@@ -272,6 +272,39 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) + } + EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); + ++static bool qca_filename_has_extension(const char *filename) ++{ ++ const char *suffix = strrchr(filename, '.'); ++ ++ /* File extensions require a dot, but not as the first or last character */ ++ if (!suffix || suffix == filename || *(suffix + 1) == '\0') ++ return 0; ++ ++ /* Avoid matching directories with names that look like files with extensions */ ++ return !strchr(suffix, '/'); ++} ++ ++static bool qca_get_alt_nvm_file(char *filename, size_t max_size) ++{ ++ char fwname[64]; ++ const char *suffix; ++ ++ /* nvm file name has an extension, replace with .bin */ ++ if (qca_filename_has_extension(filename)) { ++ suffix = strrchr(filename, '.'); ++ strscpy(fwname, filename, suffix - filename + 1); ++ snprintf(fwname + (suffix - filename), ++ sizeof(fwname) - (suffix - filename), ".bin"); ++ /* If nvm file is already the default one, return false to skip the retry. */ ++ if (strcmp(fwname, filename) == 0) ++ return false; ++ ++ snprintf(filename, max_size, "%s", fwname); ++ return true; ++ } ++ return false; ++} ++ + static int qca_tlv_check_data(struct hci_dev *hdev, + struct qca_fw_config *config, + u8 *fw_data, size_t fw_size, +@@ -564,6 +597,19 @@ static int qca_download_firmware(struct hci_dev *hdev, + config->fwname, ret); + return ret; + } ++ } ++ /* If the board-specific file is missing, try loading the default ++ * one, unless that was attempted already. ++ */ ++ else if (config->type == TLV_TYPE_NVM && ++ qca_get_alt_nvm_file(config->fwname, sizeof(config->fwname))) { ++ bt_dev_info(hdev, "QCA Downloading %s", config->fwname); ++ ret = request_firmware(&fw, config->fwname, &hdev->dev); ++ if (ret) { ++ bt_dev_err(hdev, "QCA Failed to request file: %s (%d)", ++ config->fwname, ret); ++ return ret; ++ } + } else { + bt_dev_err(hdev, "QCA Failed to request file: %s (%d)", + config->fwname, ret); +@@ -700,34 +746,38 @@ static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *co + return 0; + } + +-static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size, ++static void qca_get_nvm_name_by_board(char *fwname, size_t max_size, ++ const char *stem, enum qca_btsoc_type soc_type, + struct qca_btsoc_version ver, u8 rom_ver, u16 bid) + { + const char *variant; ++ const char *prefix; + +- /* hsp gf chip */ +- if ((le32_to_cpu(ver.soc_id) & QCA_HSP_GF_SOC_MASK) == QCA_HSP_GF_SOC_ID) +- variant = "g"; +- else +- variant = ""; ++ /* Set the default value to variant and prefix */ ++ variant = ""; ++ prefix = "b"; + +- if (bid == 0x0) +- snprintf(fwname, max_size, "qca/hpnv%02x%s.bin", rom_ver, variant); +- else +- snprintf(fwname, max_size, "qca/hpnv%02x%s.%x", rom_ver, variant, bid); +-} ++ if (soc_type == QCA_QCA2066) ++ prefix = ""; + +-static inline void qca_get_nvm_name_generic(struct qca_fw_config *cfg, +- const char *stem, u8 rom_ver, u16 bid) +-{ +- if (bid == 0x0) +- snprintf(cfg->fwname, sizeof(cfg->fwname), "qca/%snv%02x.bin", stem, rom_ver); +- else if (bid & 0xff00) +- snprintf(cfg->fwname, sizeof(cfg->fwname), +- "qca/%snv%02x.b%x", stem, rom_ver, bid); +- else +- snprintf(cfg->fwname, sizeof(cfg->fwname), +- "qca/%snv%02x.b%02x", stem, rom_ver, bid); ++ if (soc_type == QCA_WCN6855 || soc_type == QCA_QCA2066) { ++ /* If the chip is manufactured by GlobalFoundries */ ++ if ((le32_to_cpu(ver.soc_id) & QCA_HSP_GF_SOC_MASK) == QCA_HSP_GF_SOC_ID) ++ variant = "g"; ++ } ++ ++ if (rom_ver != 0) { ++ if (bid == 0x0 || bid == 0xffff) ++ snprintf(fwname, max_size, "qca/%s%02x%s.bin", stem, rom_ver, variant); ++ else ++ snprintf(fwname, max_size, "qca/%s%02x%s.%s%02x", stem, rom_ver, ++ variant, prefix, bid); ++ } else { ++ if (bid == 0x0 || bid == 0xffff) ++ snprintf(fwname, max_size, "qca/%s%s.bin", stem, variant); ++ else ++ snprintf(fwname, max_size, "qca/%s%s.%s%02x", stem, variant, prefix, bid); ++ } + } + + int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, +@@ -816,8 +866,14 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + /* Download NVM configuration */ + config.type = TLV_TYPE_NVM; + if (firmware_name) { +- snprintf(config.fwname, sizeof(config.fwname), +- "qca/%s", firmware_name); ++ /* The firmware name has an extension, use it directly */ ++ if (qca_filename_has_extension(firmware_name)) { ++ snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); ++ } else { ++ qca_read_fw_board_id(hdev, &boardid); ++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname), ++ firmware_name, soc_type, ver, 0, boardid); ++ } + } else { + switch (soc_type) { + case QCA_WCN3990: +@@ -836,8 +892,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + "qca/apnv%02x.bin", rom_ver); + break; + case QCA_QCA2066: +- qca_generate_hsp_nvm_name(config.fwname, +- sizeof(config.fwname), ver, rom_ver, boardid); ++ qca_get_nvm_name_by_board(config.fwname, ++ sizeof(config.fwname), "hpnv", soc_type, ver, ++ rom_ver, boardid); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), +@@ -852,9 +909,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, + "qca/hpnv%02x.bin", rom_ver); + break; + case QCA_WCN7850: +- qca_get_nvm_name_generic(&config, "hmt", rom_ver, boardid); ++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname), ++ "hmtnv", soc_type, ver, rom_ver, boardid); + break; +- + default: + snprintf(config.fwname, sizeof(config.fwname), + "qca/nvm_%08x.bin", soc_ver); +-- +2.39.5 + diff --git a/queue-6.12/btrfs-do-not-assume-the-full-page-range-is-not-dirty.patch b/queue-6.12/btrfs-do-not-assume-the-full-page-range-is-not-dirty.patch new file mode 100644 index 0000000000..3b31f2112d --- /dev/null +++ b/queue-6.12/btrfs-do-not-assume-the-full-page-range-is-not-dirty.patch @@ -0,0 +1,63 @@ +From 7a2dc671c2105c12e3a4beeefbc6ec54658a07ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Sep 2024 08:52:36 +0930 +Subject: btrfs: do not assume the full page range is not dirty in + extent_writepage_io() + +From: Qu Wenruo + +[ Upstream commit 928b4de66ed3b0d9a6f201ce41ab2eed6ea2e7ef ] + +The function extent_writepage_io() will submit the dirty sectors inside +the page for the write. + +But recently to co-operate with the incoming subpage compression +enhancement, a new bitmap is introduced to +btrfs_bio_ctrl::submit_bitmap, to only avoid a subset of the dirty +range. + +This is because we can have the following cases with 64K page size: + + 0 16K 32K 48K 64K + | |/////////| |/| + 52K + +For range [16K, 32K), we queue the dirty range for compression, which is +ran in a delayed workqueue. +Then for range [48K, 52K), we go through the regular submission path. + +In that case, our btrfs_bio_ctrl::submit_bitmap will exclude the range +[16K, 32K). + +The dirty flags for the range [16K, 32K) is only cleared when the +compression is done, by the extent_clear_unlock_delalloc() call inside +submit_one_async_extent(). + +This patch fix the false alert by removing the +btrfs_folio_assert_not_dirty() check, since it's no longer correct for +subpage compression cases. + +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index fe08c983d5bb4..9ff72a5a13eb3 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1394,8 +1394,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, + goto out; + submitted_io = true; + } +- +- btrfs_folio_assert_not_dirty(fs_info, folio, start, len); + out: + /* + * If we didn't submitted any sector (>= i_size), folio dirty get +-- +2.39.5 + diff --git a/queue-6.12/btrfs-fix-double-accounting-race-when-btrfs_run_dela.patch b/queue-6.12/btrfs-fix-double-accounting-race-when-btrfs_run_dela.patch new file mode 100644 index 0000000000..f642ba0971 --- /dev/null +++ b/queue-6.12/btrfs-fix-double-accounting-race-when-btrfs_run_dela.patch @@ -0,0 +1,315 @@ +From 0d2fdc313f51f0e8cd4cbeaae79cac3782138972 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 16:43:55 +1030 +Subject: btrfs: fix double accounting race when btrfs_run_delalloc_range() + failed + +From: Qu Wenruo + +[ Upstream commit 72dad8e377afa50435940adfb697e070d3556670 ] + +[BUG] +When running btrfs with block size (4K) smaller than page size (64K, +aarch64), there is a very high chance to crash the kernel at +generic/750, with the following messages: +(before the call traces, there are 3 extra debug messages added) + + BTRFS warning (device dm-3): read-write for sector size 4096 with page size 65536 is experimental + BTRFS info (device dm-3): checking UUID tree + hrtimer: interrupt took 5451385 ns + BTRFS error (device dm-3): cow_file_range failed, root=4957 inode=257 start=1605632 len=69632: -28 + BTRFS error (device dm-3): run_delalloc_nocow failed, root=4957 inode=257 start=1605632 len=69632: -28 + BTRFS error (device dm-3): failed to run delalloc range, root=4957 ino=257 folio=1572864 submit_bitmap=8-15 start=1605632 len=69632: -28 + ------------[ cut here ]------------ + WARNING: CPU: 2 PID: 3020984 at ordered-data.c:360 can_finish_ordered_extent+0x370/0x3b8 [btrfs] + CPU: 2 UID: 0 PID: 3020984 Comm: kworker/u24:1 Tainted: G OE 6.13.0-rc1-custom+ #89 + Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE + Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 + Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs] + pc : can_finish_ordered_extent+0x370/0x3b8 [btrfs] + lr : can_finish_ordered_extent+0x1ec/0x3b8 [btrfs] + Call trace: + can_finish_ordered_extent+0x370/0x3b8 [btrfs] (P) + can_finish_ordered_extent+0x1ec/0x3b8 [btrfs] (L) + btrfs_mark_ordered_io_finished+0x130/0x2b8 [btrfs] + extent_writepage+0x10c/0x3b8 [btrfs] + extent_write_cache_pages+0x21c/0x4e8 [btrfs] + btrfs_writepages+0x94/0x160 [btrfs] + do_writepages+0x74/0x190 + filemap_fdatawrite_wbc+0x74/0xa0 + start_delalloc_inodes+0x17c/0x3b0 [btrfs] + btrfs_start_delalloc_roots+0x17c/0x288 [btrfs] + shrink_delalloc+0x11c/0x280 [btrfs] + flush_space+0x288/0x328 [btrfs] + btrfs_async_reclaim_data_space+0x180/0x228 [btrfs] + process_one_work+0x228/0x680 + worker_thread+0x1bc/0x360 + kthread+0x100/0x118 + ret_from_fork+0x10/0x20 + ---[ end trace 0000000000000000 ]--- + BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1605632 OE len=16384 to_dec=16384 left=0 + BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1622016 OE len=12288 to_dec=12288 left=0 + Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 + BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1634304 OE len=8192 to_dec=4096 left=0 + CPU: 1 UID: 0 PID: 3286940 Comm: kworker/u24:3 Tainted: G W OE 6.13.0-rc1-custom+ #89 + Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 + Workqueue: btrfs_work_helper [btrfs] (btrfs-endio-write) + pstate: 404000c5 (nZcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : process_one_work+0x110/0x680 + lr : worker_thread+0x1bc/0x360 + Call trace: + process_one_work+0x110/0x680 (P) + worker_thread+0x1bc/0x360 (L) + worker_thread+0x1bc/0x360 + kthread+0x100/0x118 + ret_from_fork+0x10/0x20 + Code: f84086a1 f9000fe1 53041c21 b9003361 (f9400661) + ---[ end trace 0000000000000000 ]--- + Kernel panic - not syncing: Oops: Fatal exception + SMP: stopping secondary CPUs + SMP: failed to stop secondary CPUs 2-3 + Dumping ftrace buffer: + (ftrace buffer empty) + Kernel Offset: 0x275bb9540000 from 0xffff800080000000 + PHYS_OFFSET: 0xffff8fbba0000000 + CPU features: 0x100,00000070,00801250,8201720b + +[CAUSE] +The above warning is triggered immediately after the delalloc range +failure, this happens in the following sequence: + +- Range [1568K, 1636K) is dirty + + 1536K 1568K 1600K 1636K 1664K + | |/////////|////////| | + + Where 1536K, 1600K and 1664K are page boundaries (64K page size) + +- Enter extent_writepage() for page 1536K + +- Enter run_delalloc_nocow() with locked page 1536K and range + [1568K, 1636K) + This is due to the inode having preallocated extents. + +- Enter cow_file_range() with locked page 1536K and range + [1568K, 1636K) + +- btrfs_reserve_extent() only reserved two extents + The main loop of cow_file_range() only reserved two data extents, + + Now we have: + + 1536K 1568K 1600K 1636K 1664K + | |<-->|<--->|/|///////| | + 1584K 1596K + Range [1568K, 1596K) has an ordered extent reserved. + +- btrfs_reserve_extent() failed inside cow_file_range() for file offset + 1596K + This is already a bug in our space reservation code, but for now let's + focus on the error handling path. + + Now cow_file_range() returned -ENOSPC. + +- btrfs_run_delalloc_range() do error cleanup <<< ROOT CAUSE + Call btrfs_cleanup_ordered_extents() with locked folio 1536K and range + [1568K, 1636K) + + Function btrfs_cleanup_ordered_extents() normally needs to skip the + ranges inside the folio, as it will normally be cleaned up by + extent_writepage(). + + Such split error handling is already problematic in the first place. + + What's worse is the folio range skipping itself, which is not taking + subpage cases into consideration at all, it will only skip the range + if the page start >= the range start. + In our case, the page start < the range start, since for subpage cases + we can have delalloc ranges inside the folio but not covering the + folio. + + So it doesn't skip the page range at all. + This means all the ordered extents, both [1568K, 1584K) and + [1584K, 1596K) will be marked as IOERR. + + And these two ordered extents have no more pending ios, they are marked + finished, and *QUEUED* to be deleted from the io tree. + +- extent_writepage() do error cleanup + Call btrfs_mark_ordered_io_finished() for the range [1536K, 1600K). + + Although ranges [1568K, 1584K) and [1584K, 1596K) are finished, the + deletion from io tree is async, it may or may not happen at this + time. + + If the ranges have not yet been removed, we will do double cleaning on + those ranges, triggering the above ordered extent warnings. + +In theory there are other bugs, like the cleanup in extent_writepage() +can cause double accounting on ranges that are submitted asynchronously +(compression for example). + +But that's much harder to trigger because normally we do not mix regular +and compression delalloc ranges. + +[FIX] +The folio range split is already buggy and not subpage compatible, it +was introduced a long time ago where subpage support was not even considered. + +So instead of splitting the ordered extents cleanup into the folio range +and out of folio range, do all the cleanup inside writepage_delalloc(). + +- Pass @NULL as locked_folio for btrfs_cleanup_ordered_extents() in + btrfs_run_delalloc_range() + +- Skip the btrfs_cleanup_ordered_extents() if writepage_delalloc() + failed + + So all ordered extents are only cleaned up by + btrfs_run_delalloc_range(). + +- Handle the ranges that already have ordered extents allocated + If part of the folio already has ordered extent allocated, and + btrfs_run_delalloc_range() failed, we also need to cleanup that range. + +Now we have a concentrated error handling for ordered extents during +btrfs_run_delalloc_range(). + +Fixes: d1051d6ebf8e ("btrfs: Fix error handling in btrfs_cleanup_ordered_extents") +CC: stable@vger.kernel.org # 5.15+ +Reviewed-by: Boris Burkov +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 59 +++++++++++++++++++++++++++++++++++--------- + fs/btrfs/inode.c | 3 +-- + 2 files changed, 49 insertions(+), 13 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index e8f882f949051..15cbb2a865e5e 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1145,14 +1145,19 @@ static bool find_next_delalloc_bitmap(struct folio *folio, + } + + /* +- * helper for extent_writepage(), doing all of the delayed allocation setup. ++ * Do all of the delayed allocation setup. + * +- * This returns 1 if btrfs_run_delalloc_range function did all the work required +- * to write the page (copy into inline extent). In this case the IO has +- * been started and the page is already unlocked. ++ * Return >0 if all the dirty blocks are submitted async (compression) or inlined. ++ * The @folio should no longer be touched (treat it as already unlocked). + * +- * This returns 0 if all went well (page still locked) +- * This returns < 0 if there were errors (page still locked) ++ * Return 0 if there is still dirty block that needs to be submitted through ++ * extent_writepage_io(). ++ * bio_ctrl->submit_bitmap will indicate which blocks of the folio should be ++ * submitted, and @folio is still kept locked. ++ * ++ * Return <0 if there is any error hit. ++ * Any allocated ordered extent range covering this folio will be marked ++ * finished (IOERR), and @folio is still kept locked. + */ + static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + struct folio *folio, +@@ -1170,6 +1175,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + * last delalloc end. + */ + u64 last_delalloc_end = 0; ++ /* ++ * The range end (exclusive) of the last successfully finished delalloc ++ * range. ++ * Any range covered by ordered extent must either be manually marked ++ * finished (error handling), or has IO submitted (and finish the ++ * ordered extent normally). ++ * ++ * This records the end of ordered extent cleanup if we hit an error. ++ */ ++ u64 last_finished_delalloc_end = page_start; + u64 delalloc_start = page_start; + u64 delalloc_end = page_end; + u64 delalloc_to_write = 0; +@@ -1238,11 +1253,19 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + found_len = last_delalloc_end + 1 - found_start; + + if (ret >= 0) { ++ /* ++ * Some delalloc range may be created by previous folios. ++ * Thus we still need to clean up this range during error ++ * handling. ++ */ ++ last_finished_delalloc_end = found_start; + /* No errors hit so far, run the current delalloc range. */ + ret = btrfs_run_delalloc_range(inode, folio, + found_start, + found_start + found_len - 1, + wbc); ++ if (ret >= 0) ++ last_finished_delalloc_end = found_start + found_len; + } else { + /* + * We've hit an error during previous delalloc range, +@@ -1277,8 +1300,22 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + + delalloc_start = found_start + found_len; + } +- if (ret < 0) ++ /* ++ * It's possible we had some ordered extents created before we hit ++ * an error, cleanup non-async successfully created delalloc ranges. ++ */ ++ if (unlikely(ret < 0)) { ++ unsigned int bitmap_size = min( ++ (last_finished_delalloc_end - page_start) >> ++ fs_info->sectorsize_bits, ++ fs_info->sectors_per_page); ++ ++ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size) ++ btrfs_mark_ordered_io_finished(inode, folio, ++ page_start + (bit << fs_info->sectorsize_bits), ++ fs_info->sectorsize, false); + return ret; ++ } + out: + if (last_delalloc_end) + delalloc_end = last_delalloc_end; +@@ -1512,13 +1549,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + + bio_ctrl->wbc->nr_to_write--; + +-done: +- if (ret) { ++ if (ret) + btrfs_mark_ordered_io_finished(inode, folio, + page_start, PAGE_SIZE, !ret); +- mapping_set_error(folio->mapping, ret); +- } + ++done: ++ if (ret < 0) ++ mapping_set_error(folio->mapping, ret); + /* + * Only unlock ranges that are submitted. As there can be some async + * submitted ranges inside the folio. +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index f7e7d864f4144..5b842276573e8 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -2419,8 +2419,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol + + out: + if (ret < 0) +- btrfs_cleanup_ordered_extents(inode, locked_folio, start, +- end - start + 1); ++ btrfs_cleanup_ordered_extents(inode, NULL, start, end - start + 1); + return ret; + } + +-- +2.39.5 + diff --git a/queue-6.12/btrfs-fix-double-accounting-race-when-extent_writepa.patch b/queue-6.12/btrfs-fix-double-accounting-race-when-extent_writepa.patch new file mode 100644 index 0000000000..ee336762a5 --- /dev/null +++ b/queue-6.12/btrfs-fix-double-accounting-race-when-extent_writepa.patch @@ -0,0 +1,156 @@ +From fbb7b9e573f8c10531b4f5234e07eb9eaf113b07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 16:43:56 +1030 +Subject: btrfs: fix double accounting race when extent_writepage_io() failed + +From: Qu Wenruo + +[ Upstream commit 8bf334beb3496da3c3fbf3daf3856f7eec70dacc ] + +[BUG] +If submit_one_sector() failed inside extent_writepage_io() for sector +size < page size cases (e.g. 4K sector size and 64K page size), then +we can hit double ordered extent accounting error. + +This should be very rare, as submit_one_sector() only fails when we +failed to grab the extent map, and such extent map should exist inside +the memory and has been pinned. + +[CAUSE] +For example we have the following folio layout: + + 0 4K 32K 48K 60K 64K + |//| |//////| |///| + +Where |///| is the dirty range we need to writeback. The 3 different +dirty ranges are submitted for regular COW. + +Now we hit the following sequence: + +- submit_one_sector() returned 0 for [0, 4K) + +- submit_one_sector() returned 0 for [32K, 48K) + +- submit_one_sector() returned error for [60K, 64K) + +- btrfs_mark_ordered_io_finished() called for the whole folio + This will mark the following ranges as finished: + * [0, 4K) + * [32K, 48K) + Both ranges have their IO already submitted, this cleanup will + lead to double accounting. + + * [60K, 64K) + That's the correct cleanup. + +The only good news is, this error is only theoretical, as the target +extent map is always pinned, thus we should directly grab it from +memory, other than reading it from the disk. + +[FIX] +Instead of calling btrfs_mark_ordered_io_finished() for the whole folio +range, which can touch ranges we should not touch, instead +move the error handling inside extent_writepage_io(). + +So that we can cleanup exact sectors that ought to be submitted but failed. + +This provides much more accurate cleanup, avoiding the double accounting. + +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 15cbb2a865e5e..660a5b9c08e9e 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1431,6 +1431,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, + struct btrfs_fs_info *fs_info = inode->root->fs_info; + unsigned long range_bitmap = 0; + bool submitted_io = false; ++ bool error = false; + const u64 folio_start = folio_pos(folio); + u64 cur; + int bit; +@@ -1473,11 +1474,26 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, + break; + } + ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size); +- if (ret < 0) +- goto out; ++ if (unlikely(ret < 0)) { ++ /* ++ * bio_ctrl may contain a bio crossing several folios. ++ * Submit it immediately so that the bio has a chance ++ * to finish normally, other than marked as error. ++ */ ++ submit_one_bio(bio_ctrl); ++ /* ++ * Failed to grab the extent map which should be very rare. ++ * Since there is no bio submitted to finish the ordered ++ * extent, we have to manually finish this sector. ++ */ ++ btrfs_mark_ordered_io_finished(inode, folio, cur, ++ fs_info->sectorsize, false); ++ error = true; ++ continue; ++ } + submitted_io = true; + } +-out: ++ + /* + * If we didn't submitted any sector (>= i_size), folio dirty get + * cleared but PAGECACHE_TAG_DIRTY is not cleared (only cleared +@@ -1485,8 +1501,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, + * + * Here we set writeback and clear for the range. If the full folio + * is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag. ++ * ++ * If we hit any error, the corresponding sector will still be dirty ++ * thus no need to clear PAGECACHE_TAG_DIRTY. + */ +- if (!submitted_io) { ++ if (!submitted_io && !error) { + btrfs_folio_set_writeback(fs_info, folio, start, len); + btrfs_folio_clear_writeback(fs_info, folio, start, len); + } +@@ -1506,7 +1525,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + { + struct btrfs_inode *inode = BTRFS_I(folio->mapping->host); + struct btrfs_fs_info *fs_info = inode->root->fs_info; +- const u64 page_start = folio_pos(folio); + int ret; + size_t pg_offset; + loff_t i_size = i_size_read(&inode->vfs_inode); +@@ -1549,10 +1567,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + + bio_ctrl->wbc->nr_to_write--; + +- if (ret) +- btrfs_mark_ordered_io_finished(inode, folio, +- page_start, PAGE_SIZE, !ret); +- + done: + if (ret < 0) + mapping_set_error(folio->mapping, ret); +@@ -2312,11 +2326,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f + if (ret == 1) + goto next_page; + +- if (ret) { +- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio, +- cur, cur_len, !ret); ++ if (ret) + mapping_set_error(mapping, ret); +- } + btrfs_folio_end_lock(fs_info, folio, cur, cur_len); + if (ret < 0) + found_error = true; +-- +2.39.5 + diff --git a/queue-6.12/btrfs-mark-all-dirty-sectors-as-locked-inside-writep.patch b/queue-6.12/btrfs-mark-all-dirty-sectors-as-locked-inside-writep.patch new file mode 100644 index 0000000000..15c9fd21e7 --- /dev/null +++ b/queue-6.12/btrfs-mark-all-dirty-sectors-as-locked-inside-writep.patch @@ -0,0 +1,99 @@ +From 182a65d6168ea39c695e59c044242a703263b0ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2024 08:12:40 +0930 +Subject: btrfs: mark all dirty sectors as locked inside writepage_delalloc() + +From: Qu Wenruo + +[ Upstream commit c96d0e3921419bd3e5d8a1f355970c8ae3047ef4 ] + +Currently we only mark sectors as locked if there is a *NEW* delalloc +range for it. + +But NEW delalloc range is not the same as dirty sectors we want to +submit, e.g: + + 0 32K 64K 96K 128K + | |////////||///////| |////| + 120K + +For above 64K page size case, writepage_delalloc() for page 0 will find +and lock the delalloc range [32K, 96K), which is beyond the page +boundary. + +Then when writepage_delalloc() is called for the page 64K, since [64K, +96K) is already locked, only [120K, 128K) will be locked. + +This means, although range [64K, 96K) is dirty and will be submitted +later by extent_writepage_io(), it will not be marked as locked. + +This is fine for now, as we call btrfs_folio_end_writer_lock_bitmap() to +free every non-compressed sector, and compression is only allowed for +full page range. + +But this is not safe for future sector perfect compression support, as +this can lead to double folio unlock: + + Thread A | Thread B +---------------------------------------+-------------------------------- + | submit_one_async_extent() + | |- extent_clear_unlock_delalloc() +extent_writepage() | |- btrfs_folio_end_writer_lock() +|- btrfs_folio_end_writer_lock_bitmap()| |- btrfs_subpage_end_and_test_writer() + | | | |- atomic_sub_and_test() + | | | /* Now the atomic value is 0 */ + |- if (atomic_read() == 0) | | + |- folio_unlock() | |- folio_unlock() + +The root cause is the above range [64K, 96K) is dirtied and should also +be locked but it isn't. + +So to make everything more consistent and prepare for the incoming +sector perfect compression, mark all dirty sectors as locked. + +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index ba34b92d48c2f..8222ae6f29af5 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1174,6 +1174,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + u64 delalloc_end = page_end; + u64 delalloc_to_write = 0; + int ret = 0; ++ int bit; + + /* Save the dirty bitmap as our submission bitmap will be a subset of it. */ + if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { +@@ -1183,6 +1184,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + bio_ctrl->submit_bitmap = 1; + } + ++ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { ++ u64 start = page_start + (bit << fs_info->sectorsize_bits); ++ ++ btrfs_folio_set_writer_lock(fs_info, folio, start, fs_info->sectorsize); ++ } ++ + /* Lock all (subpage) delalloc ranges inside the folio first. */ + while (delalloc_start < page_end) { + delalloc_end = page_end; +@@ -1193,9 +1200,6 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + } + set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start, + min(delalloc_end, page_end) + 1 - delalloc_start); +- btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start, +- min(delalloc_end, page_end) + 1 - +- delalloc_start); + last_delalloc_end = delalloc_end; + delalloc_start = delalloc_end + 1; + } +-- +2.39.5 + diff --git a/queue-6.12/btrfs-move-the-delalloc-range-bitmap-search-into-ext.patch b/queue-6.12/btrfs-move-the-delalloc-range-bitmap-search-into-ext.patch new file mode 100644 index 0000000000..c785a9e7df --- /dev/null +++ b/queue-6.12/btrfs-move-the-delalloc-range-bitmap-search-into-ext.patch @@ -0,0 +1,198 @@ +From b4e7cfb5375e542aaee4c73cd072d7507872f39d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2024 08:03:00 +0930 +Subject: btrfs: move the delalloc range bitmap search into extent_io.c + +From: Qu Wenruo + +[ Upstream commit 2bca8eb0774d271b1077b72f1be135073e0a898f ] + +Currently for subpage (sector size < page size) cases, we reuse subpage +locked bitmap to find out all delalloc ranges we have locked, and run +all those found ranges. + +However such reuse is not perfect, e.g.: + + 0 32K 64K 96K 128K + | |////////||///////| |////| + 120K + +For above range, writepage_delalloc() for page 0 will handle the range +[32K, 96k), note delalloc range can be beyond the page boundary. + +But writepage_delalloc() for page 64K will only handle range [120K, +128K), as the previous run on page 0 has already handled range [64K, +96K). +Meanwhile for the writeback we should expect range [64K, 96K) to also be +locked, this leads to the mismatch from locked bitmap and delalloc +range. + +This is not causing problems yet, but it's still an inconsistent +behavior. + +So instead of relying on the subpage locked bitmap, move the delalloc +range search using local @delalloc_bitmap, so that we can remove the +existing btrfs_folio_find_writer_locked(). + +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 44 ++++++++++++++++++++++++++++++++++++++++- + fs/btrfs/subpage.c | 47 -------------------------------------------- + fs/btrfs/subpage.h | 4 ---- + 3 files changed, 43 insertions(+), 52 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 9ff72a5a13eb3..ba34b92d48c2f 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1105,6 +1105,45 @@ int btrfs_read_folio(struct file *file, struct folio *folio) + return ret; + } + ++static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bitmap, ++ u64 start, u32 len) ++{ ++ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ++ const u64 folio_start = folio_pos(folio); ++ unsigned int start_bit; ++ unsigned int nbits; ++ ++ ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE); ++ start_bit = (start - folio_start) >> fs_info->sectorsize_bits; ++ nbits = len >> fs_info->sectorsize_bits; ++ ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits)); ++ bitmap_set(delalloc_bitmap, start_bit, nbits); ++} ++ ++static bool find_next_delalloc_bitmap(struct folio *folio, ++ unsigned long *delalloc_bitmap, u64 start, ++ u64 *found_start, u32 *found_len) ++{ ++ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ++ const u64 folio_start = folio_pos(folio); ++ const unsigned int bitmap_size = fs_info->sectors_per_page; ++ unsigned int start_bit; ++ unsigned int first_zero; ++ unsigned int first_set; ++ ++ ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE); ++ ++ start_bit = (start - folio_start) >> fs_info->sectorsize_bits; ++ first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit); ++ if (first_set >= bitmap_size) ++ return false; ++ ++ *found_start = folio_start + (first_set << fs_info->sectorsize_bits); ++ first_zero = find_next_zero_bit(delalloc_bitmap, bitmap_size, first_set); ++ *found_len = (first_zero - first_set) << fs_info->sectorsize_bits; ++ return true; ++} ++ + /* + * helper for extent_writepage(), doing all of the delayed allocation setup. + * +@@ -1124,6 +1163,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping); + const u64 page_start = folio_pos(folio); + const u64 page_end = page_start + folio_size(folio) - 1; ++ unsigned long delalloc_bitmap = 0; + /* + * Save the last found delalloc end. As the delalloc end can go beyond + * page boundary, thus we cannot rely on subpage bitmap to locate the +@@ -1151,6 +1191,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + delalloc_start = delalloc_end + 1; + continue; + } ++ set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start, ++ min(delalloc_end, page_end) + 1 - delalloc_start); + btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start, + min(delalloc_end, page_end) + 1 - + delalloc_start); +@@ -1178,7 +1220,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + found_len = last_delalloc_end + 1 - found_start; + found = true; + } else { +- found = btrfs_subpage_find_writer_locked(fs_info, folio, ++ found = find_next_delalloc_bitmap(folio, &delalloc_bitmap, + delalloc_start, &found_start, &found_len); + } + if (!found) +diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c +index ec7328a6bfd75..c4950e04f481a 100644 +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -801,53 +801,6 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, + spin_unlock_irqrestore(&subpage->lock, flags); + } + +-/* +- * Find any subpage writer locked range inside @folio, starting at file offset +- * @search_start. The caller should ensure the folio is locked. +- * +- * Return true and update @found_start_ret and @found_len_ret to the first +- * writer locked range. +- * Return false if there is no writer locked range. +- */ +-bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 search_start, +- u64 *found_start_ret, u32 *found_len_ret) +-{ +- struct btrfs_subpage *subpage = folio_get_private(folio); +- const u32 sectors_per_page = fs_info->sectors_per_page; +- const unsigned int len = PAGE_SIZE - offset_in_page(search_start); +- const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, +- locked, search_start, len); +- const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked; +- const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page; +- unsigned long flags; +- int first_zero; +- int first_set; +- bool found = false; +- +- ASSERT(folio_test_locked(folio)); +- spin_lock_irqsave(&subpage->lock, flags); +- first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit); +- if (first_set >= locked_bitmap_end) +- goto out; +- +- found = true; +- +- *found_start_ret = folio_pos(folio) + +- ((first_set - locked_bitmap_start) << fs_info->sectorsize_bits); +- /* +- * Since @first_set is ensured to be smaller than locked_bitmap_end +- * here, @found_start_ret should be inside the folio. +- */ +- ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE); +- +- first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set); +- *found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits; +-out: +- spin_unlock_irqrestore(&subpage->lock, flags); +- return found; +-} +- + #define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \ + { \ + const int sectors_per_page = fs_info->sectors_per_page; \ +diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h +index cdb554e0d215e..197ec6c0b07b2 100644 +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -108,10 +108,6 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len); + void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, + struct folio *folio, unsigned long bitmap); +-bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 search_start, +- u64 *found_start_ret, u32 *found_len_ret); +- + /* + * Template for subpage related operations. + * +-- +2.39.5 + diff --git a/queue-6.12/btrfs-remove-unused-btrfs_folio_start_writer_lock.patch b/queue-6.12/btrfs-remove-unused-btrfs_folio_start_writer_lock.patch new file mode 100644 index 0000000000..0c6310a5cc --- /dev/null +++ b/queue-6.12/btrfs-remove-unused-btrfs_folio_start_writer_lock.patch @@ -0,0 +1,107 @@ +From 1baf5db2f160c880cee80b46c9b89addf514b65a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 09:37:04 +1030 +Subject: btrfs: remove unused btrfs_folio_start_writer_lock() + +From: Qu Wenruo + +[ Upstream commit 8511074c42b6255e03eceb09396338572572f1c7 ] + +This function is not really suitable to lock a folio, as it lacks the +proper mapping checks, thus the locked folio may not even belong to +btrfs. + +And due to the above reason, the last user inside lock_delalloc_folios() +is already removed, and we can remove this function. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/subpage.c | 47 ---------------------------------------------- + fs/btrfs/subpage.h | 2 -- + 2 files changed, 49 deletions(-) + +diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c +index c4950e04f481a..99341e98bbcc7 100644 +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -295,26 +295,6 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) + orig_start + orig_len) - *start; + } + +-static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) +-{ +- struct btrfs_subpage *subpage = folio_get_private(folio); +- const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); +- const int nbits = (len >> fs_info->sectorsize_bits); +- unsigned long flags; +- int ret; +- +- btrfs_subpage_assert(fs_info, folio, start, len); +- +- spin_lock_irqsave(&subpage->lock, flags); +- ASSERT(atomic_read(&subpage->readers) == 0); +- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); +- bitmap_set(subpage->bitmaps, start_bit, nbits); +- ret = atomic_add_return(nbits, &subpage->writers); +- ASSERT(ret == nbits); +- spin_unlock_irqrestore(&subpage->lock, flags); +-} +- + static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +@@ -351,33 +331,6 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf + return last; + } + +-/* +- * Lock a folio for delalloc page writeback. +- * +- * Return -EAGAIN if the page is not properly initialized. +- * Return 0 with the page locked, and writer counter updated. +- * +- * Even with 0 returned, the page still need extra check to make sure +- * it's really the correct page, as the caller is using +- * filemap_get_folios_contig(), which can race with page invalidating. +- */ +-int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) +-{ +- if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { +- folio_lock(folio); +- return 0; +- } +- folio_lock(folio); +- if (!folio_test_private(folio) || !folio_get_private(folio)) { +- folio_unlock(folio); +- return -EAGAIN; +- } +- btrfs_subpage_clamp_range(folio, &start, &len); +- btrfs_subpage_start_writer(fs_info, folio, start, len); +- return 0; +-} +- + /* + * Handle different locked folios: + * +diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h +index 197ec6c0b07b2..6289d6f65b87d 100644 +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -100,8 +100,6 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, + void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len); + +-int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len); + void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len); + void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, +-- +2.39.5 + diff --git a/queue-6.12/btrfs-rename-btrfs_folio_-set-start-end-_writer_lock.patch b/queue-6.12/btrfs-rename-btrfs_folio_-set-start-end-_writer_lock.patch new file mode 100644 index 0000000000..c2cfd6855a --- /dev/null +++ b/queue-6.12/btrfs-rename-btrfs_folio_-set-start-end-_writer_lock.patch @@ -0,0 +1,279 @@ +From 2969eb0d73a2fce2e61c2246121287c1b0aeb78d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 16:21:07 +1030 +Subject: btrfs: rename btrfs_folio_(set|start|end)_writer_lock() + +From: Qu Wenruo + +[ Upstream commit 0f7120266584490616f031873e7148495d77dd68 ] + +Since there is no user of reader locks, rename the writer locks into a +more generic name, by removing the "_writer" part from the name. + +And also rename btrfs_subpage::writer into btrfs_subpage::locked. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/compression.c | 2 +- + fs/btrfs/extent_io.c | 14 ++++++------- + fs/btrfs/subpage.c | 46 +++++++++++++++++++++--------------------- + fs/btrfs/subpage.h | 20 ++++++++++-------- + 4 files changed, 43 insertions(+), 39 deletions(-) + +diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c +index 64eaf74fbebc8..40332ab62f101 100644 +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -545,7 +545,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, + * subpage::readers and to unlock the page. + */ + if (fs_info->sectorsize < PAGE_SIZE) +- btrfs_folio_set_writer_lock(fs_info, folio, cur, add_size); ++ btrfs_folio_set_lock(fs_info, folio, cur, add_size); + folio_put(folio); + cur += add_size; + } +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 5d6b3b812593d..5a1bde8cc8b64 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -190,7 +190,7 @@ static void process_one_folio(struct btrfs_fs_info *fs_info, + btrfs_folio_clamp_clear_writeback(fs_info, folio, start, len); + + if (folio != locked_folio && (page_ops & PAGE_UNLOCK)) +- btrfs_folio_end_writer_lock(fs_info, folio, start, len); ++ btrfs_folio_end_lock(fs_info, folio, start, len); + } + + static void __process_folios_contig(struct address_space *mapping, +@@ -276,7 +276,7 @@ static noinline int lock_delalloc_folios(struct inode *inode, + range_start = max_t(u64, folio_pos(folio), start); + range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + end + 1) - range_start; +- btrfs_folio_set_writer_lock(fs_info, folio, range_start, range_len); ++ btrfs_folio_set_lock(fs_info, folio, range_start, range_len); + + processed_end = range_start + range_len - 1; + } +@@ -438,7 +438,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le + if (!btrfs_is_subpage(fs_info, folio->mapping)) + folio_unlock(folio); + else +- btrfs_folio_end_writer_lock(fs_info, folio, start, len); ++ btrfs_folio_end_lock(fs_info, folio, start, len); + } + + /* +@@ -495,7 +495,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) + return; + + ASSERT(folio_test_private(folio)); +- btrfs_folio_set_writer_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); ++ btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); + } + + /* +@@ -1187,7 +1187,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { + u64 start = page_start + (bit << fs_info->sectorsize_bits); + +- btrfs_folio_set_writer_lock(fs_info, folio, start, fs_info->sectorsize); ++ btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize); + } + + /* Lock all (subpage) delalloc ranges inside the folio first. */ +@@ -1523,7 +1523,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + * Only unlock ranges that are submitted. As there can be some async + * submitted ranges inside the folio. + */ +- btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap); ++ btrfs_folio_end_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap); + ASSERT(ret <= 0); + return ret; + } +@@ -2280,7 +2280,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f + cur, cur_len, !ret); + mapping_set_error(mapping, ret); + } +- btrfs_folio_end_writer_lock(fs_info, folio, cur, cur_len); ++ btrfs_folio_end_lock(fs_info, folio, cur, cur_len); + if (ret < 0) + found_error = true; + next_page: +diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c +index 0587a7d7b5e81..88a01d51ab11f 100644 +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -143,7 +143,7 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, + if (type == BTRFS_SUBPAGE_METADATA) + atomic_set(&ret->eb_refs, 0); + else +- atomic_set(&ret->writers, 0); ++ atomic_set(&ret->nr_locked, 0); + return ret; + } + +@@ -237,8 +237,8 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) + orig_start + orig_len) - *start; + } + +-static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) ++static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, u64 start, u32 len) + { + struct btrfs_subpage *subpage = folio_get_private(folio); + const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); +@@ -256,9 +256,9 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf + * extent_clear_unlock_delalloc() for compression path. + * + * This @locked_page is locked by plain lock_page(), thus its +- * subpage::writers is 0. Handle them in a special way. ++ * subpage::locked is 0. Handle them in a special way. + */ +- if (atomic_read(&subpage->writers) == 0) { ++ if (atomic_read(&subpage->nr_locked) == 0) { + spin_unlock_irqrestore(&subpage->lock, flags); + return true; + } +@@ -267,8 +267,8 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf + clear_bit(bit, subpage->bitmaps); + cleared++; + } +- ASSERT(atomic_read(&subpage->writers) >= cleared); +- last = atomic_sub_and_test(cleared, &subpage->writers); ++ ASSERT(atomic_read(&subpage->nr_locked) >= cleared); ++ last = atomic_sub_and_test(cleared, &subpage->nr_locked); + spin_unlock_irqrestore(&subpage->lock, flags); + return last; + } +@@ -289,8 +289,8 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf + * bitmap, reduce the writer lock number, and unlock the page if that's + * the last locked range. + */ +-void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) ++void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, u64 start, u32 len) + { + struct btrfs_subpage *subpage = folio_get_private(folio); + +@@ -303,24 +303,24 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, + + /* + * For subpage case, there are two types of locked page. With or +- * without writers number. ++ * without locked number. + * +- * Since we own the page lock, no one else could touch subpage::writers ++ * Since we own the page lock, no one else could touch subpage::locked + * and we are safe to do several atomic operations without spinlock. + */ +- if (atomic_read(&subpage->writers) == 0) { +- /* No writers, locked by plain lock_page(). */ ++ if (atomic_read(&subpage->nr_locked) == 0) { ++ /* No subpage lock, locked by plain lock_page(). */ + folio_unlock(folio); + return; + } + + btrfs_subpage_clamp_range(folio, &start, &len); +- if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len)) ++ if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) + folio_unlock(folio); + } + +-void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, +- struct folio *folio, unsigned long bitmap) ++void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, unsigned long bitmap) + { + struct btrfs_subpage *subpage = folio_get_private(folio); + const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked; +@@ -334,8 +334,8 @@ void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, + return; + } + +- if (atomic_read(&subpage->writers) == 0) { +- /* No writers, locked by plain lock_page(). */ ++ if (atomic_read(&subpage->nr_locked) == 0) { ++ /* No subpage lock, locked by plain lock_page(). */ + folio_unlock(folio); + return; + } +@@ -345,8 +345,8 @@ void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, + if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) + cleared++; + } +- ASSERT(atomic_read(&subpage->writers) >= cleared); +- last = atomic_sub_and_test(cleared, &subpage->writers); ++ ASSERT(atomic_read(&subpage->nr_locked) >= cleared); ++ last = atomic_sub_and_test(cleared, &subpage->nr_locked); + spin_unlock_irqrestore(&subpage->lock, flags); + if (last) + folio_unlock(folio); +@@ -671,8 +671,8 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, + * This populates the involved subpage ranges so that subpage helpers can + * properly unlock them. + */ +-void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) ++void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, u64 start, u32 len) + { + struct btrfs_subpage *subpage; + unsigned long flags; +@@ -691,7 +691,7 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, + /* Target range should not yet be locked. */ + ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); + bitmap_set(subpage->bitmaps, start_bit, nbits); +- ret = atomic_add_return(nbits, &subpage->writers); ++ ret = atomic_add_return(nbits, &subpage->nr_locked); + ASSERT(ret <= fs_info->sectors_per_page); + spin_unlock_irqrestore(&subpage->lock, flags); + } +diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h +index 8488ea057b30b..44fff1f4eac48 100644 +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -54,8 +54,12 @@ struct btrfs_subpage { + */ + atomic_t eb_refs; + +- /* Structures only used by data */ +- atomic_t writers; ++ /* ++ * Structures only used by data, ++ * ++ * How many sectors inside the page is locked. ++ */ ++ atomic_t nr_locked; + }; + unsigned long bitmaps[]; + }; +@@ -87,12 +91,12 @@ void btrfs_free_subpage(struct btrfs_subpage *subpage); + void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); + void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); + +-void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len); +-void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len); +-void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, +- struct folio *folio, unsigned long bitmap); ++void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, u64 start, u32 len); ++void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, u64 start, u32 len); ++void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, unsigned long bitmap); + /* + * Template for subpage related operations. + * +-- +2.39.5 + diff --git a/queue-6.12/btrfs-unify-to-use-writer-locks-for-subpage-locking.patch b/queue-6.12/btrfs-unify-to-use-writer-locks-for-subpage-locking.patch new file mode 100644 index 0000000000..b1b93d5b7a --- /dev/null +++ b/queue-6.12/btrfs-unify-to-use-writer-locks-for-subpage-locking.patch @@ -0,0 +1,207 @@ +From 53439df281508c3c819e6fd8f7f14fb84d93b2f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 16:21:06 +1030 +Subject: btrfs: unify to use writer locks for subpage locking + +From: Qu Wenruo + +[ Upstream commit 336e69f3025fb70db9d0dfb7f36ac79887bf5341 ] + +Since commit d7172f52e993 ("btrfs: use per-buffer locking for +extent_buffer reading"), metadata read no longer relies on the subpage +reader locking. + +This means we do not need to maintain a different metadata/data split +for locking, so we can convert the existing reader lock users by: + +- add_ra_bio_pages() + Convert to btrfs_folio_set_writer_lock() + +- end_folio_read() + Convert to btrfs_folio_end_writer_lock() + +- begin_folio_read() + Convert to btrfs_folio_set_writer_lock() + +- folio_range_has_eb() + Remove the subpage->readers checks, since it is always 0. + +- Remove btrfs_subpage_start_reader() and btrfs_subpage_end_reader() + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/compression.c | 3 +- + fs/btrfs/extent_io.c | 10 ++----- + fs/btrfs/subpage.c | 62 ++---------------------------------------- + fs/btrfs/subpage.h | 13 --------- + 4 files changed, 5 insertions(+), 83 deletions(-) + +diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c +index 90aef2627ca27..64eaf74fbebc8 100644 +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -545,8 +545,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, + * subpage::readers and to unlock the page. + */ + if (fs_info->sectorsize < PAGE_SIZE) +- btrfs_subpage_start_reader(fs_info, folio, cur, +- add_size); ++ btrfs_folio_set_writer_lock(fs_info, folio, cur, add_size); + folio_put(folio); + cur += add_size; + } +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 8222ae6f29af5..5d6b3b812593d 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -438,7 +438,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le + if (!btrfs_is_subpage(fs_info, folio->mapping)) + folio_unlock(folio); + else +- btrfs_subpage_end_reader(fs_info, folio, start, len); ++ btrfs_folio_end_writer_lock(fs_info, folio, start, len); + } + + /* +@@ -495,7 +495,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) + return; + + ASSERT(folio_test_private(folio)); +- btrfs_subpage_start_reader(fs_info, folio, folio_pos(folio), PAGE_SIZE); ++ btrfs_folio_set_writer_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); + } + + /* +@@ -2507,12 +2507,6 @@ static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *foli + subpage = folio_get_private(folio); + if (atomic_read(&subpage->eb_refs)) + return true; +- /* +- * Even there is no eb refs here, we may still have +- * end_folio_read() call relying on page::private. +- */ +- if (atomic_read(&subpage->readers)) +- return true; + } + return false; + } +diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c +index 99341e98bbcc7..0587a7d7b5e81 100644 +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -140,12 +140,10 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, + return ERR_PTR(-ENOMEM); + + spin_lock_init(&ret->lock); +- if (type == BTRFS_SUBPAGE_METADATA) { ++ if (type == BTRFS_SUBPAGE_METADATA) + atomic_set(&ret->eb_refs, 0); +- } else { +- atomic_set(&ret->readers, 0); ++ else + atomic_set(&ret->writers, 0); +- } + return ret; + } + +@@ -221,62 +219,6 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, + __start_bit; \ + }) + +-void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) +-{ +- struct btrfs_subpage *subpage = folio_get_private(folio); +- const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); +- const int nbits = len >> fs_info->sectorsize_bits; +- unsigned long flags; +- +- +- btrfs_subpage_assert(fs_info, folio, start, len); +- +- spin_lock_irqsave(&subpage->lock, flags); +- /* +- * Even though it's just for reading the page, no one should have +- * locked the subpage range. +- */ +- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); +- bitmap_set(subpage->bitmaps, start_bit, nbits); +- atomic_add(nbits, &subpage->readers); +- spin_unlock_irqrestore(&subpage->lock, flags); +-} +- +-void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len) +-{ +- struct btrfs_subpage *subpage = folio_get_private(folio); +- const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); +- const int nbits = len >> fs_info->sectorsize_bits; +- unsigned long flags; +- bool is_data; +- bool last; +- +- btrfs_subpage_assert(fs_info, folio, start, len); +- is_data = is_data_inode(BTRFS_I(folio->mapping->host)); +- +- spin_lock_irqsave(&subpage->lock, flags); +- +- /* The range should have already been locked. */ +- ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); +- ASSERT(atomic_read(&subpage->readers) >= nbits); +- +- bitmap_clear(subpage->bitmaps, start_bit, nbits); +- last = atomic_sub_and_test(nbits, &subpage->readers); +- +- /* +- * For data we need to unlock the page if the last read has finished. +- * +- * And please don't replace @last with atomic_sub_and_test() call +- * inside if () condition. +- * As we want the atomic_sub_and_test() to be always executed. +- */ +- if (is_data && last) +- folio_unlock(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); +-} +- + static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) + { + u64 orig_start = *start; +diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h +index 6289d6f65b87d..8488ea057b30b 100644 +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -45,14 +45,6 @@ enum { + struct btrfs_subpage { + /* Common members for both data and metadata pages */ + spinlock_t lock; +- /* +- * Both data and metadata needs to track how many readers are for the +- * page. +- * Data relies on @readers to unlock the page when last reader finished. +- * While metadata doesn't need page unlock, it needs to prevent +- * page::private get cleared before the last end_page_read(). +- */ +- atomic_t readers; + union { + /* + * Structures only used by metadata +@@ -95,11 +87,6 @@ void btrfs_free_subpage(struct btrfs_subpage *subpage); + void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); + void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); + +-void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len); +-void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, +- struct folio *folio, u64 start, u32 len); +- + void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len); + void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, +-- +2.39.5 + diff --git a/queue-6.12/btrfs-use-btrfs_inode-in-extent_writepage.patch b/queue-6.12/btrfs-use-btrfs_inode-in-extent_writepage.patch new file mode 100644 index 0000000000..6a791ab8d6 --- /dev/null +++ b/queue-6.12/btrfs-use-btrfs_inode-in-extent_writepage.patch @@ -0,0 +1,73 @@ +From a2d0dcb6b3f8fd092ab215b7c1d63675e7db8c5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Jan 2025 11:24:17 +0100 +Subject: btrfs: use btrfs_inode in extent_writepage() + +From: David Sterba + +[ Upstream commit 011a9a1f244656cc3cbde47edba2b250f794d440 ] + +As extent_writepage() is internal helper we should use our inode type, +so change it from struct inode. + +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: David Sterba +Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed") +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index 5a1bde8cc8b64..e8f882f949051 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1467,15 +1467,15 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, + */ + static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl) + { +- struct inode *inode = folio->mapping->host; +- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); ++ struct btrfs_inode *inode = BTRFS_I(folio->mapping->host); ++ struct btrfs_fs_info *fs_info = inode->root->fs_info; + const u64 page_start = folio_pos(folio); + int ret; + size_t pg_offset; +- loff_t i_size = i_size_read(inode); ++ loff_t i_size = i_size_read(&inode->vfs_inode); + unsigned long end_index = i_size >> PAGE_SHIFT; + +- trace_extent_writepage(folio, inode, bio_ctrl->wbc); ++ trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc); + + WARN_ON(!folio_test_locked(folio)); + +@@ -1499,13 +1499,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + if (ret < 0) + goto done; + +- ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl); ++ ret = writepage_delalloc(inode, folio, bio_ctrl); + if (ret == 1) + return 0; + if (ret) + goto done; + +- ret = extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio), ++ ret = extent_writepage_io(inode, folio, folio_pos(folio), + PAGE_SIZE, bio_ctrl, i_size); + if (ret == 1) + return 0; +@@ -1514,7 +1514,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl + + done: + if (ret) { +- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio, ++ btrfs_mark_ordered_io_finished(inode, folio, + page_start, PAGE_SIZE, !ret); + mapping_set_error(folio->mapping, ret); + } +-- +2.39.5 + diff --git a/queue-6.12/drm-amd-display-correct-register-address-in-dcn35.patch b/queue-6.12/drm-amd-display-correct-register-address-in-dcn35.patch new file mode 100644 index 0000000000..b6fd97b3b8 --- /dev/null +++ b/queue-6.12/drm-amd-display-correct-register-address-in-dcn35.patch @@ -0,0 +1,40 @@ +From ef6a8ac94a2f2f745da7b9396a41106023e7ac21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Jan 2025 17:43:29 +0800 +Subject: drm/amd/display: Correct register address in dcn35 + +From: loanchen + +[ Upstream commit f88192d2335b5a911fcfa09338cc00624571ec5e ] + +[Why] +the offset address of mmCLK5_spll_field_8 was incorrect for dcn35 +which causes SSC not to be enabled. + +Reviewed-by: Charlene Liu +Signed-off-by: Lo-An Chen +Signed-off-by: Zaeem Mohamed +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +index d8a4cdbb5495d..7d0d8852ce8d2 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +@@ -89,7 +89,7 @@ + #define mmCLK1_CLK4_ALLOW_DS 0x16EA8 + #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +-#define mmCLK5_spll_field_8 0x1B04B ++#define mmCLK5_spll_field_8 0x1B24B + #define mmDENTIST_DISPCLK_CNTL 0x0124 + #define regDENTIST_DISPCLK_CNTL 0x0064 + #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 +-- +2.39.5 + diff --git a/queue-6.12/drm-amd-display-refactoring-if-and-endif-statements-.patch b/queue-6.12/drm-amd-display-refactoring-if-and-endif-statements-.patch new file mode 100644 index 0000000000..d9ae49c5c1 --- /dev/null +++ b/queue-6.12/drm-amd-display-refactoring-if-and-endif-statements-.patch @@ -0,0 +1,106 @@ +From 9597fe632b964495a84056dad775ca5a537b0f59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Aug 2024 17:04:04 +0530 +Subject: drm/amd/display: Refactoring if and endif statements to enable + DC_LOGGER + +From: Lohita Mudimela + +[ Upstream commit b04200432c4730c9bb730a66be46551c83d60263 ] + +[Why] +For Header related changes for core + +[How] +Refactoring if and endif statements to enable DC_LOGGER + +Reviewed-by: Mounika Adhuri +Reviewed-by: Alvin Lee +Signed-off-by: Lohita Mudimela +Signed-off-by: Tom Chung +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Stable-dep-of: f88192d2335b ("drm/amd/display: Correct register address in dcn35") +Signed-off-by: Sasha Levin +--- + .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 5 +++-- + .../gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 6 +++--- + .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 + + .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 3 ++- + 4 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +index e93df3d6222e6..bc123f1884da3 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +@@ -50,12 +50,13 @@ + #include "link.h" + + #include "logger_types.h" ++ ++ ++#include "yellow_carp_offset.h" + #undef DC_LOGGER + #define DC_LOGGER \ + clk_mgr->base.base.ctx->logger + +-#include "yellow_carp_offset.h" +- + #define regCLK1_CLK_PLL_REQ 0x0237 + #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +index 29eff386505ab..91d872d6d392b 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +@@ -53,9 +53,6 @@ + + + #include "logger_types.h" +-#undef DC_LOGGER +-#define DC_LOGGER \ +- clk_mgr->base.base.ctx->logger + + + #define MAX_INSTANCE 7 +@@ -77,6 +74,9 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, + { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } }, + { { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } }; + ++#undef DC_LOGGER ++#define DC_LOGGER \ ++ clk_mgr->base.base.ctx->logger + #define regCLK1_CLK_PLL_REQ 0x0237 + #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +index 3bd0d46c17010..bbdc39ae57b9d 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +@@ -55,6 +55,7 @@ + #define DC_LOGGER \ + clk_mgr->base.base.ctx->logger + ++ + #define regCLK1_CLK_PLL_REQ 0x0237 + #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +index d78c8ec4de79e..885e749cdc6e9 100644 +--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c ++++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +@@ -51,9 +51,10 @@ + #include "dc_dmub_srv.h" + #include "gpio_service_interface.h" + ++#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ ++ + #define DC_LOGGER \ + link->ctx->logger +-#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ + + #ifndef MAX + #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +-- +2.39.5 + diff --git a/queue-6.12/drm-amd-display-update-dcn351-used-clock-offset.patch b/queue-6.12/drm-amd-display-update-dcn351-used-clock-offset.patch new file mode 100644 index 0000000000..7c377fcbc5 --- /dev/null +++ b/queue-6.12/drm-amd-display-update-dcn351-used-clock-offset.patch @@ -0,0 +1,505 @@ +From fc47d0d37103236c508286606a8c1c9077c305b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Nov 2024 17:18:50 -0500 +Subject: drm/amd/display: update dcn351 used clock offset + +From: Charlene Liu + +[ Upstream commit a1fc2837f4960e84e9375e12292584ad2ae472da ] + +[why] +hw register offset delta + +Reviewed-by: Martin Leung +Signed-off-by: Charlene Liu +Signed-off-by: Aurabindo Pillai +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Stable-dep-of: f88192d2335b ("drm/amd/display: Correct register address in dcn35") +Signed-off-by: Sasha Levin +--- + .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 2 +- + .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 5 +- + .../display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c | 140 ++++++++++++++++++ + .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 132 +++++++++++++---- + .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h | 4 + + .../amd/display/dc/inc/hw/clk_mgr_internal.h | 59 ++++++++ + 6 files changed, 308 insertions(+), 34 deletions(-) + create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +index ab1132bc896a3..d9955c5d2e5ed 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +@@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) + ############################################################################### + # DCN35 + ############################################################################### +-CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o ++CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o + + AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35)) + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +index 0e243f4344d05..4c3e58c730b11 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +@@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p + BREAK_TO_DEBUGGER(); + return NULL; + } ++ if (ctx->dce_version == DCN_VERSION_3_51) ++ dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); ++ else ++ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + +- dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + return &clk_mgr->base.base; + } + break; +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c +new file mode 100644 +index 0000000000000..6a6ae618650b6 +--- /dev/null ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c +@@ -0,0 +1,140 @@ ++/* ++ * Copyright 2024 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: AMD ++ * ++ */ ++ ++#include "core_types.h" ++#include "dcn35_clk_mgr.h" ++ ++#define DCN_BASE__INST0_SEG1 0x000000C0 ++#define mmCLK1_CLK_PLL_REQ 0x16E37 ++ ++#define mmCLK1_CLK0_DFS_CNTL 0x16E69 ++#define mmCLK1_CLK1_DFS_CNTL 0x16E6C ++#define mmCLK1_CLK2_DFS_CNTL 0x16E6F ++#define mmCLK1_CLK3_DFS_CNTL 0x16E72 ++#define mmCLK1_CLK4_DFS_CNTL 0x16E75 ++#define mmCLK1_CLK5_DFS_CNTL 0x16E78 ++ ++#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC ++#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD ++#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE ++#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF ++#define mmCLK1_CLK4_CURRENT_CNT 0x16F00 ++#define mmCLK1_CLK5_CURRENT_CNT 0x16F01 ++ ++#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A ++#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 ++#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C ++#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 ++#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE ++#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 ++ ++#define mmCLK1_CLK0_DS_CNTL 0x16E83 ++#define mmCLK1_CLK1_DS_CNTL 0x16E8C ++#define mmCLK1_CLK2_DS_CNTL 0x16E95 ++#define mmCLK1_CLK3_DS_CNTL 0x16E9E ++#define mmCLK1_CLK4_DS_CNTL 0x16EA7 ++#define mmCLK1_CLK5_DS_CNTL 0x16EB0 ++ ++#define mmCLK1_CLK0_ALLOW_DS 0x16E84 ++#define mmCLK1_CLK1_ALLOW_DS 0x16E8D ++#define mmCLK1_CLK2_ALLOW_DS 0x16E96 ++#define mmCLK1_CLK3_ALLOW_DS 0x16E9F ++#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 ++#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 ++ ++#define mmCLK5_spll_field_8 0x1B04B ++#define mmDENTIST_DISPCLK_CNTL 0x0124 ++#define regDENTIST_DISPCLK_CNTL 0x0064 ++#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 ++ ++#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 ++#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc ++#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 ++#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL ++#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L ++#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L ++ ++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L ++ ++// DENTIST_DISPCLK_CNTL ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L ++ ++#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L ++ ++#define REG(reg) \ ++ (clk_mgr->regs->reg) ++ ++#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg ++ ++#define BASE(seg) BASE_INNER(seg) ++ ++#define SR(reg_name)\ ++ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ ++ reg ## reg_name ++ ++#define CLK_SR_DCN35(reg_name)\ ++ .reg_name = mm ## reg_name ++ ++static const struct clk_mgr_registers clk_mgr_regs_dcn351 = { ++ CLK_REG_LIST_DCN35() ++}; ++ ++static const struct clk_mgr_shift clk_mgr_shift_dcn351 = { ++ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) ++}; ++ ++static const struct clk_mgr_mask clk_mgr_mask_dcn351 = { ++ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) ++}; ++ ++#define TO_CLK_MGR_DCN35(clk_mgr)\ ++ container_of(clk_mgr, struct clk_mgr_dcn35, base) ++ ++ ++void dcn351_clk_mgr_construct( ++ struct dc_context *ctx, ++ struct clk_mgr_dcn35 *clk_mgr, ++ struct pp_smu_funcs *pp_smu, ++ struct dccg *dccg) ++{ ++ /*register offset changed*/ ++ clk_mgr->base.regs = &clk_mgr_regs_dcn351; ++ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351; ++ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351; ++ ++ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); ++ ++} ++ ++ +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +index bbdc39ae57b9d..d8a4cdbb5495d 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +@@ -36,15 +36,11 @@ + #include "dcn20/dcn20_clk_mgr.h" + + +- +- + #include "reg_helper.h" + #include "core_types.h" + #include "dcn35_smu.h" + #include "dm_helpers.h" + +-/* TODO: remove this include once we ported over remaining clk mgr functions*/ +-#include "dcn30/dcn30_clk_mgr.h" + #include "dcn31/dcn31_clk_mgr.h" + + #include "dc_dmub_srv.h" +@@ -55,35 +51,102 @@ + #define DC_LOGGER \ + clk_mgr->base.base.ctx->logger + ++#define DCN_BASE__INST0_SEG1 0x000000C0 ++#define mmCLK1_CLK_PLL_REQ 0x16E37 ++ ++#define mmCLK1_CLK0_DFS_CNTL 0x16E69 ++#define mmCLK1_CLK1_DFS_CNTL 0x16E6C ++#define mmCLK1_CLK2_DFS_CNTL 0x16E6F ++#define mmCLK1_CLK3_DFS_CNTL 0x16E72 ++#define mmCLK1_CLK4_DFS_CNTL 0x16E75 ++#define mmCLK1_CLK5_DFS_CNTL 0x16E78 ++ ++#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB ++#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC ++#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD ++#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE ++#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF ++#define mmCLK1_CLK5_CURRENT_CNT 0x16F00 ++ ++#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A ++#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 ++#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C ++#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 ++#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE ++#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 ++ ++#define mmCLK1_CLK0_DS_CNTL 0x16E83 ++#define mmCLK1_CLK1_DS_CNTL 0x16E8C ++#define mmCLK1_CLK2_DS_CNTL 0x16E95 ++#define mmCLK1_CLK3_DS_CNTL 0x16E9E ++#define mmCLK1_CLK4_DS_CNTL 0x16EA7 ++#define mmCLK1_CLK5_DS_CNTL 0x16EB0 ++ ++#define mmCLK1_CLK0_ALLOW_DS 0x16E84 ++#define mmCLK1_CLK1_ALLOW_DS 0x16E8D ++#define mmCLK1_CLK2_ALLOW_DS 0x16E96 ++#define mmCLK1_CLK3_ALLOW_DS 0x16E9F ++#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 ++#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 ++ ++#define mmCLK5_spll_field_8 0x1B04B ++#define mmDENTIST_DISPCLK_CNTL 0x0124 ++#define regDENTIST_DISPCLK_CNTL 0x0064 ++#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 ++ ++#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 ++#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc ++#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 ++#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL ++#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L ++#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L ++ ++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L ++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L ++// DENTIST_DISPCLK_CNTL ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L ++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L ++ ++#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L ++ ++#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 ++#undef FN ++#define FN(reg_name, field_name) \ ++ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name + +-#define regCLK1_CLK_PLL_REQ 0x0237 +-#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 ++#define REG(reg) \ ++ (clk_mgr->regs->reg) + +-#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +-#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +-#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +-#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +-#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +-#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L ++#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +-#define regCLK1_CLK2_BYPASS_CNTL 0x029c +-#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 ++#define BASE(seg) BASE_INNER(seg) + +-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 +-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 +-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L ++#define SR(reg_name)\ ++ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ ++ reg ## reg_name + +-#define regCLK5_0_CLK5_spll_field_8 0x464b +-#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0 ++#define CLK_SR_DCN35(reg_name)\ ++ .reg_name = mm ## reg_name + +-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd +-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L ++static const struct clk_mgr_registers clk_mgr_regs_dcn35 = { ++ CLK_REG_LIST_DCN35() ++}; + +-#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 ++static const struct clk_mgr_shift clk_mgr_shift_dcn35 = { ++ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) ++}; + +-#define REG(reg_name) \ +- (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) ++static const struct clk_mgr_mask clk_mgr_mask_dcn35 = { ++ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) ++}; + + #define TO_CLK_MGR_DCN35(clk_mgr)\ + container_of(clk_mgr, struct clk_mgr_dcn35, base) +@@ -444,7 +507,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; +- struct dc_context *ctx = clk_mgr->base.ctx; + + /* + * Register value of fbmult is in 8.16 format, we are converting to 314.32 +@@ -504,12 +566,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs + static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) + { + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); +- struct dc_context *ctx = clk_mgr->base.ctx; ++ + uint32_t ssc_enable; + +- REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable); ++ ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; + +- return ssc_enable == 1; ++ return ssc_enable != 0; + } + + static void init_clk_states(struct clk_mgr *clk_mgr) +@@ -634,10 +696,10 @@ static struct dcn35_ss_info_table ss_info_table = { + + static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) + { +- struct dc_context *ctx = clk_mgr->base.ctx; +- uint32_t clock_source; ++ uint32_t clock_source = 0; ++ ++ clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK; + +- REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); + // If it's DFS mode, clock_source is 0. + if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { + clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; +@@ -1107,6 +1169,12 @@ void dcn35_clk_mgr_construct( + clk_mgr->base.dprefclk_ss_divider = 1000; + clk_mgr->base.ss_on_dprefclk = false; + clk_mgr->base.dfs_ref_freq_khz = 48000; ++ if (ctx->dce_version == DCN_VERSION_3_5) { ++ clk_mgr->base.regs = &clk_mgr_regs_dcn35; ++ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35; ++ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35; ++ } ++ + + clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( + clk_mgr->base.base.ctx, +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h +index 1203dc605b12c..a12a9bf90806e 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h +@@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx, + + void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); + ++void dcn351_clk_mgr_construct(struct dc_context *ctx, ++ struct clk_mgr_dcn35 *clk_mgr, ++ struct pp_smu_funcs *pp_smu, ++ struct dccg *dccg); + #endif //__DCN35_CLK_MGR_H__ +diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +index c2dd061892f4d..7a1ca1e98059b 100644 +--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h ++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +@@ -166,6 +166,41 @@ enum dentist_divider_range { + CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \ + CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT) + ++#define CLK_REG_LIST_DCN35() \ ++ CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \ ++ CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \ ++ CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\ ++ CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \ ++ CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ ++ CLK_SR_DCN35(CLK5_spll_field_8), \ ++ SR(DENTIST_DISPCLK_CNTL), \ ++ + #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ + CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\ + CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\ +@@ -236,6 +271,7 @@ struct clk_mgr_registers { + uint32_t CLK1_CLK2_DFS_CNTL; + uint32_t CLK1_CLK3_DFS_CNTL; + uint32_t CLK1_CLK4_DFS_CNTL; ++ uint32_t CLK1_CLK5_DFS_CNTL; + uint32_t CLK2_CLK2_DFS_CNTL; + + uint32_t CLK1_CLK0_CURRENT_CNT; +@@ -243,11 +279,34 @@ struct clk_mgr_registers { + uint32_t CLK1_CLK2_CURRENT_CNT; + uint32_t CLK1_CLK3_CURRENT_CNT; + uint32_t CLK1_CLK4_CURRENT_CNT; ++ uint32_t CLK1_CLK5_CURRENT_CNT; + + uint32_t CLK0_CLK0_DFS_CNTL; + uint32_t CLK0_CLK1_DFS_CNTL; + uint32_t CLK0_CLK3_DFS_CNTL; + uint32_t CLK0_CLK4_DFS_CNTL; ++ uint32_t CLK1_CLK0_BYPASS_CNTL; ++ uint32_t CLK1_CLK1_BYPASS_CNTL; ++ uint32_t CLK1_CLK2_BYPASS_CNTL; ++ uint32_t CLK1_CLK3_BYPASS_CNTL; ++ uint32_t CLK1_CLK4_BYPASS_CNTL; ++ uint32_t CLK1_CLK5_BYPASS_CNTL; ++ ++ uint32_t CLK1_CLK0_DS_CNTL; ++ uint32_t CLK1_CLK1_DS_CNTL; ++ uint32_t CLK1_CLK2_DS_CNTL; ++ uint32_t CLK1_CLK3_DS_CNTL; ++ uint32_t CLK1_CLK4_DS_CNTL; ++ uint32_t CLK1_CLK5_DS_CNTL; ++ ++ uint32_t CLK1_CLK0_ALLOW_DS; ++ uint32_t CLK1_CLK1_ALLOW_DS; ++ uint32_t CLK1_CLK2_ALLOW_DS; ++ uint32_t CLK1_CLK3_ALLOW_DS; ++ uint32_t CLK1_CLK4_ALLOW_DS; ++ uint32_t CLK1_CLK5_ALLOW_DS; ++ uint32_t CLK5_spll_field_8; ++ + }; + + struct clk_mgr_shift { +-- +2.39.5 + diff --git a/queue-6.12/drm-amdkfd-ensure-consistent-barrier-state-saved-in-.patch b/queue-6.12/drm-amdkfd-ensure-consistent-barrier-state-saved-in-.patch new file mode 100644 index 0000000000..2ae56b0ec7 --- /dev/null +++ b/queue-6.12/drm-amdkfd-ensure-consistent-barrier-state-saved-in-.patch @@ -0,0 +1,61 @@ +From 0f6d92774a1d6ba6d6e367505b6819a5526449fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Jan 2025 19:16:49 +0000 +Subject: drm/amdkfd: Ensure consistent barrier state saved in gfx12 trap + handler + +From: Lancelot SIX + +[ Upstream commit d584198a6fe4c51f4aa88ad72f258f8961a0f11c ] + +It is possible for some waves in a workgroup to finish their save +sequence before the group leader has had time to capture the workgroup +barrier state. When this happens, having those waves exit do impact the +barrier state. As a consequence, the state captured by the group leader +is invalid, and is eventually incorrectly restored. + +This patch proposes to have all waves in a workgroup wait for each other +at the end of their save sequence (just before calling s_endpgm_saved). + +Signed-off-by: Lancelot SIX +Reviewed-by: Jay Cornwall +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org # 6.12.x +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 3 ++- + drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 4 ++++ + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +index 02f7ba8c93cd4..7062f12b5b751 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +@@ -4117,7 +4117,8 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { + 0x0000ffff, 0x8bfe7e7e, + 0x8bea6a6a, 0xb97af804, + 0xbe804ec2, 0xbf94fffe, +- 0xbe804a6c, 0xbfb10000, ++ 0xbe804a6c, 0xbe804ec2, ++ 0xbf94fffe, 0xbfb10000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +index 1740e98c6719d..7b9d36e5fa437 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +@@ -1049,6 +1049,10 @@ L_SKIP_BARRIER_RESTORE: + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + + L_END_PGM: ++ // Make sure that no wave of the workgroup can exit the trap handler ++ // before the workgroup barrier state is saved. ++ s_barrier_signal -2 ++ s_barrier_wait -2 + s_endpgm_saved + end + +-- +2.39.5 + diff --git a/queue-6.12/drm-amdkfd-move-gfx12-trap-handler-to-separate-file.patch b/queue-6.12/drm-amdkfd-move-gfx12-trap-handler-to-separate-file.patch new file mode 100644 index 0000000000..db6e8df522 --- /dev/null +++ b/queue-6.12/drm-amdkfd-move-gfx12-trap-handler-to-separate-file.patch @@ -0,0 +1,1536 @@ +From acb27f2df8e7e5a4a457649a899147e845bdd533 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Oct 2024 16:12:35 -0500 +Subject: drm/amdkfd: Move gfx12 trap handler to separate file + +From: Jay Cornwall + +[ Upstream commit 62498e797aeb2bfa92a823ee1a8253f96d1cbe3f ] + +gfx12 derivatives will have substantially different trap handler +implementations from gfx10/gfx11. Add a separate source file for +gfx12+ and remove unneeded conditional code. + +No functional change. + +v2: Revert copyright date to 2018, minor comment fixes + +Signed-off-by: Jay Cornwall +Reviewed-by: Lancelot Six +Cc: Jonathan Kim +Acked-by: Alex Deucher +Signed-off-by: Alex Deucher +Stable-dep-of: d584198a6fe4 ("drm/amdkfd: Ensure consistent barrier state saved in gfx12 trap handler") +Signed-off-by: Sasha Levin +--- + .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 202 +-- + .../amd/amdkfd/cwsr_trap_handler_gfx12.asm | 1126 +++++++++++++++++ + 2 files changed, 1127 insertions(+), 201 deletions(-) + create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +index 44772eec9ef4d..96fbb16ceb216 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +@@ -34,41 +34,24 @@ + * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 + * sp3 gfx11.sp3 -hex gfx11.hex + * +- * gfx12: +- * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3 +- * sp3 gfx12.sp3 -hex gfx12.hex + */ + + #define CHIP_NAVI10 26 + #define CHIP_SIENNA_CICHLID 30 + #define CHIP_PLUM_BONITO 36 +-#define CHIP_GFX12 37 + + #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) + #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) + #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) + #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) +-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12) ++#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO) + #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger + #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised + +-#if ASIC_FAMILY < CHIP_GFX12 + #define S_COHERENCE glc:1 + #define V_COHERENCE slc:1 glc:1 + #define S_WAITCNT_0 s_waitcnt 0 +-#else +-#define S_COHERENCE scope:SCOPE_SYS +-#define V_COHERENCE scope:SCOPE_SYS +-#define S_WAITCNT_0 s_wait_idle +- +-#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO +-#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI +-#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC +-#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC +-#define HW_REG_MODE HW_REG_WAVE_MODE +-#endif + +-#if ASIC_FAMILY < CHIP_GFX12 + var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 + var SQ_WAVE_STATUS_HALT_MASK = 0x2000 + var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 +@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E + var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK + var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 + var S_SAVE_PC_HI_HT_MASK = 0x01000000 +-#else +-var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 +-var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 +-var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 +-var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 +-var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 +-var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 +-var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 +-var SQ_WAVE_STATUS_WAVE64_SIZE = 1 +-var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 +-var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV +-var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK +-var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK +-var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 +-#endif + + var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 + var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 + #endif + +-#if ASIC_FAMILY < CHIP_GFX12 + var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 + var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF + var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT + var S_TRAPSTS_HWREG = HW_REG_TRAPSTS + var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK + var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT +-#else +-var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF +-var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 +-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 +-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 +-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 +-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 +-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 +-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 +-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 +-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 +-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 +-var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 +-var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 +-var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 +- +-var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV +-var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK +-var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT +-var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ +- SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ +- SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ +- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ +- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ +- SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK +-var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT +-var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +-var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT +-var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT +-var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT +-var BARRIER_STATE_SIGNAL_OFFSET = 16 +-var BARRIER_STATE_VALID_OFFSET = 0 +-#endif + + // bits [31:24] unused by SPI debug data + var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 +@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER: + + L_HALTED: + // Host trap may occur while wave is halted. +-#if ASIC_FAMILY < CHIP_GFX12 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK +-#else +- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK +-#endif + s_cbranch_scc1 L_FETCH_2ND_TRAP + + L_CHECK_SAVE: +@@ -336,7 +266,6 @@ L_NOT_HALTED: + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. +-#if ASIC_FAMILY < CHIP_GFX12 + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_CHECK_TRAP_ID + +@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH: + s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT + s_and_b32 ttmp2, ttmp2, ttmp3 + s_cbranch_scc1 L_FETCH_2ND_TRAP +-#else +- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) +- s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK +- s_cbranch_scc0 L_NOT_ADDR_WATCH +- s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK +- +-L_NOT_ADDR_WATCH: +- s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) +- s_and_b32 ttmp2, ttmp3, ttmp2 +- s_cbranch_scc1 L_FETCH_2ND_TRAP +-#endif + + L_CHECK_TRAP_ID: + // Check trap_id != 0 +@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID: + #if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. +-#if ASIC_FAMILY < CHIP_GFX12 + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK +-#else +- // WAVE_TRAP_CTRL is already in ttmp3. +- s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK +-#endif + s_cbranch_scc1 L_FETCH_2ND_TRAP + #endif + +@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP: + s_cbranch_scc1 L_TRAP_CASE + + // Host trap will not cause trap re-entry. +-#if ASIC_FAMILY < CHIP_GFX12 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK +-#else +- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) +- s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK +-#endif + s_cbranch_scc1 L_EXIT_TRAP + s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK + +@@ -457,16 +365,7 @@ L_EXIT_TRAP: + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + +-#if ASIC_FAMILY < CHIP_GFX12 + s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status +-#else +- // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. +- // Only restore fields which the trap handler changes. +- s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT +- s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ +- SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status +-#endif +- + s_rfe_b64 [ttmp0, ttmp1] + + L_SAVE: +@@ -478,14 +377,6 @@ L_SAVE: + s_endpgm + L_HAVE_VGPRS: + #endif +-#if ASIC_FAMILY >= CHIP_GFX12 +- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) +- s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT +- s_cbranch_scc0 L_HAVE_VGPRS +- s_endpgm +-L_HAVE_VGPRS: +-#endif +- + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit +@@ -671,19 +562,6 @@ L_SAVE_HWREG: + s_mov_b32 m0, 0x0 //Next lane of v2 to write to + #endif + +-#if ASIC_FAMILY >= CHIP_GFX12 +- // Ensure no further changes to barrier or LDS state. +- // STATE_PRIV.BARRIER_COMPLETE may change up to this point. +- s_barrier_signal -2 +- s_barrier_wait -2 +- +- // Re-read final state of BARRIER_COMPLETE field for save. +- s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG) +- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK +- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK +- s_or_b32 s_save_status, s_save_status, s_save_tmp +-#endif +- + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK +@@ -707,21 +585,6 @@ L_SAVE_HWREG: + s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + +-#if ASIC_FAMILY >= CHIP_GFX12 +- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) +- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) +- +- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) +- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) +- +- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) +- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) +- +- s_get_barrier_state s_save_tmp, -1 +- s_wait_kmcnt (0) +- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) +-#endif +- + #if NO_SQC_STORE + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF +@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL: + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + +-#if ASIC_FAMILY < CHIP_GFX12 + s_barrier //LDS is used? wait for other waves in the same TG +-#endif + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + s_cbranch_scc0 L_SAVE_LDS_DONE + +@@ -1081,11 +942,6 @@ L_RESTORE: + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + +-#if ASIC_FAMILY >= CHIP_GFX12 +- // Save s_restore_spi_init_hi for later use. +- s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi +-#endif +- + //determine it is wave32 or wave64 + get_wave_size2(s_restore_size) + +@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR: + // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception. + // Clear DEBUG_EN before and restore MODE after the barrier. + s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0 +-#if ASIC_FAMILY < CHIP_GFX12 + s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG +-#endif + + /* restore HW registers */ + L_RESTORE_HWREG: +@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG: + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + +-#if ASIC_FAMILY >= CHIP_GFX12 +- // Restore s_restore_spi_init_hi before the saved value gets clobbered. +- s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save +-#endif +- + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) +@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG: + + s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch + +-#if ASIC_FAMILY >= CHIP_GFX12 +- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) +- S_WAITCNT_0 +- s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp +- +- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) +- S_WAITCNT_0 +- s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp +- +- // Only the first wave needs to restore the workgroup barrier. +- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK +- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE +- +- // Skip over WAVE_STATUS, since there is no state to restore from it +- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 +- +- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) +- S_WAITCNT_0 +- +- s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET +- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE +- +- // extract the saved signal count from s_restore_tmp +- s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET +- +- // We need to call s_barrier_signal repeatedly to restore the signal +- // count of the work group barrier. The member count is already +- // initialized with the number of waves in the work group. +-L_BARRIER_RESTORE_LOOP: +- s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp +- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE +- s_barrier_signal -1 +- s_add_i32 s_restore_tmp, s_restore_tmp, -1 +- s_branch L_BARRIER_RESTORE_LOOP +- +-L_SKIP_BARRIER_RESTORE: +-#endif +- + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi +@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV: + + s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu + +-#if ASIC_FAMILY >= CHIP_GFX12 +- // Make barrier and LDS state visible to all waves in the group. +- // STATE_PRIV.BARRIER_COMPLETE may change after this point. +- s_barrier_signal -2 +- s_barrier_wait -2 +-#endif +- + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + + L_END_PGM: +@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes + end + + function get_wave_size2(s_reg) +-#if ASIC_FAMILY < CHIP_GFX12 + s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) +-#else +- s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) +-#endif + s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE + end + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +new file mode 100644 +index 0000000000000..1740e98c6719d +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +@@ -0,0 +1,1126 @@ ++/* ++ * Copyright 2018 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++/* To compile this assembly code: ++ * ++ * gfx12: ++ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3 ++ * sp3 gfx12.sp3 -hex gfx12.hex ++ */ ++ ++#define CHIP_GFX12 37 ++ ++#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised ++ ++var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 ++var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 ++var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00 ++var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000 ++var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000 ++var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15 ++var SQ_WAVE_STATUS_WAVE64_SHIFT = 29 ++var SQ_WAVE_STATUS_WAVE64_SIZE = 1 ++var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24 ++var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK ++var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000 ++ ++var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 ++var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 ++var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 ++var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 ++var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 ++var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 ++var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9 ++ ++var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF ++var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10 ++var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5 ++var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 ++var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 ++var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 ++var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 ++var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 ++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 ++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 ++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 ++var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800 ++var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80 ++var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200 ++ ++var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ ++ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\ ++ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\ ++ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ ++ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ ++ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK ++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT ++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT ++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT ++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT ++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT ++var BARRIER_STATE_SIGNAL_OFFSET = 16 ++var BARRIER_STATE_VALID_OFFSET = 0 ++ ++var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 ++var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 ++ ++// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] ++// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE ++var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 ++var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC ++var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 ++var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 ++ ++var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 ++var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 ++ ++var s_sgpr_save_num = 108 ++ ++var s_save_spi_init_lo = exec_lo ++var s_save_spi_init_hi = exec_hi ++var s_save_pc_lo = ttmp0 ++var s_save_pc_hi = ttmp1 ++var s_save_exec_lo = ttmp2 ++var s_save_exec_hi = ttmp3 ++var s_save_state_priv = ttmp12 ++var s_save_excp_flag_priv = ttmp15 ++var s_save_xnack_mask = s_save_excp_flag_priv ++var s_wave_size = ttmp7 ++var s_save_buf_rsrc0 = ttmp8 ++var s_save_buf_rsrc1 = ttmp9 ++var s_save_buf_rsrc2 = ttmp10 ++var s_save_buf_rsrc3 = ttmp11 ++var s_save_mem_offset = ttmp4 ++var s_save_alloc_size = s_save_excp_flag_priv ++var s_save_tmp = ttmp14 ++var s_save_m0 = ttmp5 ++var s_save_ttmps_lo = s_save_tmp ++var s_save_ttmps_hi = s_save_excp_flag_priv ++ ++var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE ++var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC ++ ++var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 ++var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 ++var S_WAVE_SIZE = 25 ++ ++var s_restore_spi_init_lo = exec_lo ++var s_restore_spi_init_hi = exec_hi ++var s_restore_mem_offset = ttmp12 ++var s_restore_alloc_size = ttmp3 ++var s_restore_tmp = ttmp2 ++var s_restore_mem_offset_save = s_restore_tmp ++var s_restore_m0 = s_restore_alloc_size ++var s_restore_mode = ttmp7 ++var s_restore_flat_scratch = s_restore_tmp ++var s_restore_pc_lo = ttmp0 ++var s_restore_pc_hi = ttmp1 ++var s_restore_exec_lo = ttmp4 ++var s_restore_exec_hi = ttmp5 ++var s_restore_state_priv = ttmp14 ++var s_restore_excp_flag_priv = ttmp15 ++var s_restore_xnack_mask = ttmp13 ++var s_restore_buf_rsrc0 = ttmp8 ++var s_restore_buf_rsrc1 = ttmp9 ++var s_restore_buf_rsrc2 = ttmp10 ++var s_restore_buf_rsrc3 = ttmp11 ++var s_restore_size = ttmp6 ++var s_restore_ttmps_lo = s_restore_tmp ++var s_restore_ttmps_hi = s_restore_alloc_size ++var s_restore_spi_init_hi_save = s_restore_exec_hi ++ ++shader main ++ asic(DEFAULT) ++ type(CS) ++ wave_size(32) ++ ++ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save ++ ++L_JUMP_TO_RESTORE: ++ s_branch L_RESTORE ++ ++L_SKIP_RESTORE: ++ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC ++ ++ // Clear SPI_PRIO: do not save with elevated priority. ++ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. ++ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK ++ ++ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ++ ++ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK ++ s_cbranch_scc0 L_NOT_HALTED ++ ++L_HALTED: ++ // Host trap may occur while wave is halted. ++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK ++ s_cbranch_scc1 L_FETCH_2ND_TRAP ++ ++L_CHECK_SAVE: ++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK ++ s_cbranch_scc1 L_SAVE ++ ++ // Wave is halted but neither host trap nor SAVECTX is raised. ++ // Caused by instruction fetch memory violation. ++ // Spin wait until context saved to prevent interrupt storm. ++ s_sleep 0x10 ++ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ++ s_branch L_CHECK_SAVE ++ ++L_NOT_HALTED: ++ // Let second-level handle non-SAVECTX exception or trap. ++ // Any concurrent SAVECTX will be handled upon re-entry once halted. ++ ++ // Check non-maskable exceptions. memory_violation, illegal_instruction ++ // and xnack_error exceptions always cause the wave to enter the trap ++ // handler. ++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK ++ s_cbranch_scc1 L_FETCH_2ND_TRAP ++ ++ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. ++ // Maskable exceptions only cause the wave to enter the trap handler if ++ // their respective bit in mode.excp_en is set. ++ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) ++ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK ++ s_cbranch_scc0 L_NOT_ADDR_WATCH ++ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK ++ ++L_NOT_ADDR_WATCH: ++ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL) ++ s_and_b32 ttmp2, ttmp3, ttmp2 ++ s_cbranch_scc1 L_FETCH_2ND_TRAP ++ ++L_CHECK_TRAP_ID: ++ // Check trap_id != 0 ++ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK ++ s_cbranch_scc1 L_FETCH_2ND_TRAP ++ ++#if SINGLE_STEP_MISSED_WORKAROUND ++ // Prioritize single step exception over context save. ++ // Second-level trap will halt wave and RFE, re-entering for SAVECTX. ++ // WAVE_TRAP_CTRL is already in ttmp3. ++ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK ++ s_cbranch_scc1 L_FETCH_2ND_TRAP ++#endif ++ ++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK ++ s_cbranch_scc1 L_SAVE ++ ++L_FETCH_2ND_TRAP: ++ // Read second-level TBA/TMA from first-level TMA and jump if available. ++ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) ++ // ttmp12 holds SQ_WAVE_STATUS ++ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) ++ s_wait_idle ++ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 ++ ++ s_bitcmp1_b32 ttmp15, 0xF ++ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA ++ s_or_b32 ttmp15, ttmp15, 0xFFFF0000 ++L_NO_SIGN_EXTEND_TMA: ++ ++ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag ++ s_wait_idle ++ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT ++ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK ++ s_or_b32 ttmp11, ttmp11, ttmp2 ++ ++ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA ++ s_wait_idle ++ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA ++ s_wait_idle ++ ++ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] ++ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set ++ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler ++ ++L_NO_NEXT_TRAP: ++ // If not caused by trap then halt wave to prevent re-entry. ++ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK ++ s_cbranch_scc1 L_TRAP_CASE ++ ++ // Host trap will not cause trap re-entry. ++ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ++ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK ++ s_cbranch_scc1 L_EXIT_TRAP ++ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK ++ ++ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set. ++ // Rewind the PC to prevent this from occurring. ++ s_sub_u32 ttmp0, ttmp0, 0x8 ++ s_subb_u32 ttmp1, ttmp1, 0x0 ++ ++ s_branch L_EXIT_TRAP ++ ++L_TRAP_CASE: ++ // Advance past trap instruction to prevent re-entry. ++ s_add_u32 ttmp0, ttmp0, 0x4 ++ s_addc_u32 ttmp1, ttmp1, 0x0 ++ ++L_EXIT_TRAP: ++ s_and_b32 ttmp1, ttmp1, 0xFFFF ++ ++ // Restore SQ_WAVE_STATUS. ++ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 ++ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 ++ ++ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. ++ // Only restore fields which the trap handler changes. ++ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT ++ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ ++ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv ++ ++ s_rfe_b64 [ttmp0, ttmp1] ++ ++L_SAVE: ++ // If VGPRs have been deallocated then terminate the wavefront. ++ // It has no remaining program to run and cannot save without VGPRs. ++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) ++ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT ++ s_cbranch_scc0 L_HAVE_VGPRS ++ s_endpgm ++L_HAVE_VGPRS: ++ ++ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] ++ s_mov_b32 s_save_tmp, 0 ++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit ++ ++ /* inform SPI the readiness and wait for SPI's go signal */ ++ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI ++ s_mov_b32 s_save_exec_hi, exec_hi ++ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive ++ ++ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) ++ s_wait_idle ++ ++ // Save first_wave flag so we can clear high bits of save address. ++ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK ++ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) ++ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp ++ ++ // Trap temporaries must be saved via VGPR but all VGPRs are in use. ++ // There is no ttmp space to hold the resource constant for VGPR save. ++ // Save v0 by itself since it requires only two SGPRs. ++ s_mov_b32 s_save_ttmps_lo, exec_lo ++ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF ++ s_mov_b32 exec_lo, 0xFFFFFFFF ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS ++ v_mov_b32 v0, 0x0 ++ s_mov_b32 exec_lo, s_save_ttmps_lo ++ s_mov_b32 exec_hi, s_save_ttmps_hi ++ ++ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic ++ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 ++ get_wave_size2(s_save_ttmps_hi) ++ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) ++ get_svgpr_size_bytes(s_save_ttmps_hi) ++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi ++ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF ++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() ++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo ++ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 ++ ++ v_writelane_b32 v0, ttmp4, 0x4 ++ v_writelane_b32 v0, ttmp5, 0x5 ++ v_writelane_b32 v0, ttmp6, 0x6 ++ v_writelane_b32 v0, ttmp7, 0x7 ++ v_writelane_b32 v0, ttmp8, 0x8 ++ v_writelane_b32 v0, ttmp9, 0x9 ++ v_writelane_b32 v0, ttmp10, 0xA ++ v_writelane_b32 v0, ttmp11, 0xB ++ v_writelane_b32 v0, ttmp13, 0xD ++ v_writelane_b32 v0, exec_lo, 0xE ++ v_writelane_b32 v0, exec_hi, 0xF ++ ++ s_mov_b32 exec_lo, 0x3FFF ++ s_mov_b32 exec_hi, 0x0 ++ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS ++ v_readlane_b32 ttmp14, v0, 0xE ++ v_readlane_b32 ttmp15, v0, 0xF ++ s_mov_b32 exec_lo, ttmp14 ++ s_mov_b32 exec_hi, ttmp15 ++ ++ /* setup Resource Contants */ ++ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo ++ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi ++ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE ++ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited ++ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC ++ ++ s_mov_b32 s_save_m0, m0 ++ ++ /* global mem offset */ ++ s_mov_b32 s_save_mem_offset, 0x0 ++ get_wave_size2(s_wave_size) ++ ++ /* save first 4 VGPRs, needed for SGPR save */ ++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on ++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI ++ s_mov_b32 exec_hi, 0x00000000 ++ s_branch L_SAVE_4VGPR_WAVE32 ++L_ENABLE_SAVE_4VGPR_EXEC_HI: ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++ s_branch L_SAVE_4VGPR_WAVE64 ++L_SAVE_4VGPR_WAVE32: ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR Allocated in 4-GPR granularity ++ ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 ++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 ++ s_branch L_SAVE_HWREG ++ ++L_SAVE_4VGPR_WAVE64: ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR Allocated in 4-GPR granularity ++ ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 ++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 ++ ++ /* save HW registers */ ++ ++L_SAVE_HWREG: ++ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) ++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) ++ get_svgpr_size_bytes(s_save_tmp) ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() ++ ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource ++ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource ++ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store ++ s_mov_b32 m0, 0x0 //Next lane of v2 to write to ++ ++ // Ensure no further changes to barrier or LDS state. ++ // STATE_PRIV.BARRIER_COMPLETE may change up to this point. ++ s_barrier_signal -2 ++ s_barrier_wait -2 ++ ++ // Re-read final state of BARRIER_COMPLETE field for save. ++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV) ++ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK ++ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK ++ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp ++ ++ write_hwreg_to_v2(s_save_m0) ++ write_hwreg_to_v2(s_save_pc_lo) ++ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK ++ write_hwreg_to_v2(s_save_tmp) ++ write_hwreg_to_v2(s_save_exec_lo) ++ write_hwreg_to_v2(s_save_exec_hi) ++ write_hwreg_to_v2(s_save_state_priv) ++ ++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ++ write_hwreg_to_v2(s_save_tmp) ++ ++ write_hwreg_to_v2(s_save_xnack_mask) ++ ++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) ++ write_hwreg_to_v2(s_save_m0) ++ ++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) ++ write_hwreg_to_v2(s_save_m0) ++ ++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) ++ write_hwreg_to_v2(s_save_m0) ++ ++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) ++ write_hwreg_to_v2(s_save_m0) ++ ++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) ++ write_hwreg_to_v2(s_save_m0) ++ ++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) ++ write_hwreg_to_v2(s_save_tmp) ++ ++ s_get_barrier_state s_save_tmp, -1 ++ s_wait_kmcnt (0) ++ write_hwreg_to_v2(s_save_tmp) ++ ++ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. ++ s_mov_b32 exec_lo, 0xFFFF ++ s_mov_b32 exec_hi, 0x0 ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ ++ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. ++ s_mov_b32 exec_lo, 0xFFFFFFFF ++ ++ /* save SGPRs */ ++ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... ++ ++ // SGPR SR memory offset : size(VGPR)+size(SVGPR) ++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) ++ get_svgpr_size_bytes(s_save_tmp) ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into ++ ++ s_mov_b32 m0, 0x0 //SGPR initial index value =0 ++ s_nop 0x0 //Manually inserted wait states ++L_SAVE_SGPR_LOOP: ++ // SGPR is allocated in 16 SGPR granularity ++ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] ++ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] ++ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] ++ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] ++ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] ++ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] ++ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] ++ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] ++ ++ write_16sgpr_to_v2(s0) ++ ++ s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? ++ s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE ++ ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 ++ s_mov_b32 ttmp13, 0x0 ++ v_mov_b32 v2, 0x0 ++L_SAVE_SGPR_SKIP_TCP_STORE: ++ ++ s_add_u32 m0, m0, 16 //next sgpr index ++ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? ++ ++ //save the rest 12 SGPR ++ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] ++ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] ++ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] ++ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] ++ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] ++ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] ++ write_12sgpr_to_v2(s0) ++ ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ ++ /* save LDS */ ++ ++L_SAVE_LDS: ++ // Change EXEC to all threads... ++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on ++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI ++ s_mov_b32 exec_hi, 0x00000000 ++ s_branch L_SAVE_LDS_NORMAL ++L_ENABLE_SAVE_LDS_EXEC_HI: ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++L_SAVE_LDS_NORMAL: ++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) ++ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? ++ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE ++ ++ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK ++ s_cbranch_scc0 L_SAVE_LDS_DONE ++ ++ // first wave do LDS save; ++ ++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY ++ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes ++ ++ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) ++ // ++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) ++ get_svgpr_size_bytes(s_save_tmp) ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() ++ ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ //load 0~63*4(byte address) to vgpr v0 ++ v_mbcnt_lo_u32_b32 v0, -1, 0 ++ v_mbcnt_hi_u32_b32 v0, -1, v0 ++ v_mul_u32_u24 v0, 4, v0 ++ ++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_mov_b32 m0, 0x0 ++ s_cbranch_scc1 L_SAVE_LDS_W64 ++ ++L_SAVE_LDS_W32: ++ s_mov_b32 s3, 128 ++ s_nop 0 ++ s_nop 0 ++ s_nop 0 ++L_SAVE_LDS_LOOP_W32: ++ ds_read_b32 v1, v0 ++ s_wait_idle ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ ++ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 ++ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes ++ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? ++ ++ s_branch L_SAVE_LDS_DONE ++ ++L_SAVE_LDS_W64: ++ s_mov_b32 s3, 256 ++ s_nop 0 ++ s_nop 0 ++ s_nop 0 ++L_SAVE_LDS_LOOP_W64: ++ ds_read_b32 v1, v0 ++ s_wait_idle ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ ++ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 ++ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes ++ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? ++ ++L_SAVE_LDS_DONE: ++ /* save VGPRs - set the Rest VGPRs */ ++L_SAVE_VGPR: ++ // VGPR SR memory offset: 0 ++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on ++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI ++ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs ++ s_mov_b32 exec_hi, 0x00000000 ++ s_branch L_SAVE_VGPR_NORMAL ++L_ENABLE_SAVE_VGPR_EXEC_HI: ++ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++L_SAVE_VGPR_NORMAL: ++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) ++ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 ++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) ++ //determine it is wave32 or wave64 ++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_SAVE_VGPR_WAVE64 ++ ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR Allocated in 4-GPR granularity ++ ++ // VGPR store using dw burst ++ s_mov_b32 m0, 0x4 //VGPR initial index value =4 ++ s_cmp_lt_u32 m0, s_save_alloc_size ++ s_cbranch_scc0 L_SAVE_VGPR_END ++ ++L_SAVE_VGPR_W32_LOOP: ++ v_movrels_b32 v0, v0 //v0 = v[0+m0] ++ v_movrels_b32 v1, v1 //v1 = v[1+m0] ++ v_movrels_b32 v2, v2 //v2 = v[2+m0] ++ v_movrels_b32 v3, v3 //v3 = v[3+m0] ++ ++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128 ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2 ++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3 ++ ++ s_add_u32 m0, m0, 4 //next vgpr index ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes ++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? ++ ++ s_branch L_SAVE_VGPR_END ++ ++L_SAVE_VGPR_WAVE64: ++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR store using dw burst ++ s_mov_b32 m0, 0x4 //VGPR initial index value =4 ++ s_cmp_lt_u32 m0, s_save_alloc_size ++ s_cbranch_scc0 L_SAVE_SHARED_VGPR ++ ++L_SAVE_VGPR_W64_LOOP: ++ v_movrels_b32 v0, v0 //v0 = v[0+m0] ++ v_movrels_b32 v1, v1 //v1 = v[1+m0] ++ v_movrels_b32 v2, v2 //v2 = v[2+m0] ++ v_movrels_b32 v3, v3 //v3 = v[3+m0] ++ ++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256 ++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2 ++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3 ++ ++ s_add_u32 m0, m0, 4 //next vgpr index ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes ++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? ++ ++L_SAVE_SHARED_VGPR: ++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) ++ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? ++ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS ++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) ++ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. ++ //save shared_vgpr will start from the index of m0 ++ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 ++ s_mov_b32 exec_lo, 0xFFFFFFFF ++ s_mov_b32 exec_hi, 0x00000000 ++ ++L_SAVE_SHARED_VGPR_WAVE64_LOOP: ++ v_movrels_b32 v0, v0 //v0 = v[0+m0] ++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS ++ s_add_u32 m0, m0, 1 //next vgpr index ++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 ++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? ++ ++L_SAVE_VGPR_END: ++ s_branch L_END_PGM ++ ++L_RESTORE: ++ /* Setup Resource Contants */ ++ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo ++ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi ++ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE ++ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) ++ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC ++ ++ // Save s_restore_spi_init_hi for later use. ++ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi ++ ++ //determine it is wave32 or wave64 ++ get_wave_size2(s_restore_size) ++ ++ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK ++ s_cbranch_scc0 L_RESTORE_VGPR ++ ++ /* restore LDS */ ++L_RESTORE_LDS: ++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on ++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI ++ s_mov_b32 exec_hi, 0x00000000 ++ s_branch L_RESTORE_LDS_NORMAL ++L_ENABLE_RESTORE_LDS_EXEC_HI: ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++L_RESTORE_LDS_NORMAL: ++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) ++ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? ++ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR ++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY ++ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes ++ ++ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) ++ // ++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) ++ get_svgpr_size_bytes(s_restore_tmp) ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() ++ ++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_mov_b32 m0, 0x0 ++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 ++ ++L_RESTORE_LDS_LOOP_W32: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset ++ s_wait_idle ++ ds_store_addtid_b32 v0 ++ s_add_u32 m0, m0, 128 // 128 DW ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW ++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? ++ s_branch L_RESTORE_VGPR ++ ++L_RESTORE_LDS_LOOP_W64: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset ++ s_wait_idle ++ ds_store_addtid_b32 v0 ++ s_add_u32 m0, m0, 256 // 256 DW ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW ++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? ++ ++ /* restore VGPRs */ ++L_RESTORE_VGPR: ++ // VGPR SR memory offset : 0 ++ s_mov_b32 s_restore_mem_offset, 0x0 ++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on ++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI ++ s_mov_b32 exec_hi, 0x00000000 ++ s_branch L_RESTORE_VGPR_NORMAL ++L_ENABLE_RESTORE_VGPR_EXEC_HI: ++ s_mov_b32 exec_hi, 0xFFFFFFFF ++L_RESTORE_VGPR_NORMAL: ++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) ++ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 ++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) ++ //determine it is wave32 or wave64 ++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE ++ s_and_b32 m0, m0, 1 ++ s_cmp_eq_u32 m0, 1 ++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 ++ ++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR load using dw burst ++ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 ++ s_mov_b32 m0, 4 //VGPR initial index value = 4 ++ s_cmp_lt_u32 m0, s_restore_alloc_size ++ s_cbranch_scc0 L_RESTORE_SGPR ++ ++L_RESTORE_VGPR_WAVE32_LOOP: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS ++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128 ++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2 ++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3 ++ s_wait_idle ++ v_movreld_b32 v0, v0 //v[0+m0] = v0 ++ v_movreld_b32 v1, v1 ++ v_movreld_b32 v2, v2 ++ v_movreld_b32 v3, v3 ++ s_add_u32 m0, m0, 4 //next vgpr index ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes ++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? ++ ++ /* VGPR restore on v0 */ ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS ++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128 ++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2 ++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3 ++ s_wait_idle ++ ++ s_branch L_RESTORE_SGPR ++ ++L_RESTORE_VGPR_WAVE64: ++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // VGPR load using dw burst ++ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 ++ s_mov_b32 m0, 4 //VGPR initial index value = 4 ++ s_cmp_lt_u32 m0, s_restore_alloc_size ++ s_cbranch_scc0 L_RESTORE_SHARED_VGPR ++ ++L_RESTORE_VGPR_WAVE64_LOOP: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS ++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256 ++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2 ++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3 ++ s_wait_idle ++ v_movreld_b32 v0, v0 //v[0+m0] = v0 ++ v_movreld_b32 v1, v1 ++ v_movreld_b32 v2, v2 ++ v_movreld_b32 v3, v3 ++ s_add_u32 m0, m0, 4 //next vgpr index ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes ++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? ++ ++L_RESTORE_SHARED_VGPR: ++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size ++ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? ++ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? ++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) ++ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. ++ //restore shared_vgpr will start from the index of m0 ++ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 ++ s_mov_b32 exec_lo, 0xFFFFFFFF ++ s_mov_b32 exec_hi, 0x00000000 ++L_RESTORE_SHARED_VGPR_WAVE64_LOOP: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS ++ s_wait_idle ++ v_movreld_b32 v0, v0 //v[0+m0] = v0 ++ s_add_u32 m0, m0, 1 //next vgpr index ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 ++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 ++ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? ++ ++ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! ++ ++ /* VGPR restore on v0 */ ++L_RESTORE_V0: ++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS ++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256 ++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2 ++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3 ++ s_wait_idle ++ ++ /* restore SGPRs */ ++ //will be 2+8+16*6 ++ // SGPR SR memory offset : size(VGPR)+size(SVGPR) ++L_RESTORE_SGPR: ++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) ++ get_svgpr_size_bytes(s_restore_tmp) ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() ++ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved ++ ++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ s_mov_b32 m0, s_sgpr_save_num ++ ++ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] ++ s_nop 0 // hazard SALU M0=> S_MOVREL ++ ++ s_movreld_b64 s0, s0 //s[0+m0] = s0 ++ s_movreld_b64 s2, s2 ++ ++ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] ++ s_nop 0 // hazard SALU M0=> S_MOVREL ++ ++ s_movreld_b64 s0, s0 //s[0+m0] = s0 ++ s_movreld_b64 s2, s2 ++ s_movreld_b64 s4, s4 ++ s_movreld_b64 s6, s6 ++ ++ L_RESTORE_SGPR_LOOP: ++ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] ++ s_nop 0 // hazard SALU M0=> S_MOVREL ++ ++ s_movreld_b64 s0, s0 //s[0+m0] = s0 ++ s_movreld_b64 s2, s2 ++ s_movreld_b64 s4, s4 ++ s_movreld_b64 s6, s6 ++ s_movreld_b64 s8, s8 ++ s_movreld_b64 s10, s10 ++ s_movreld_b64 s12, s12 ++ s_movreld_b64 s14, s14 ++ ++ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 ++ s_cbranch_scc0 L_RESTORE_SGPR_LOOP ++ ++ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception. ++ // Clear DEBUG_EN before and restore MODE after the barrier. ++ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0 ++ ++ /* restore HW registers */ ++L_RESTORE_HWREG: ++ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) ++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) ++ get_svgpr_size_bytes(s_restore_tmp) ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() ++ ++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes ++ ++ // Restore s_restore_spi_init_hi before the saved value gets clobbered. ++ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save ++ ++ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) ++ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch ++ ++ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch ++ ++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp ++ ++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp ++ ++ // Only the first wave needs to restore the workgroup barrier. ++ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK ++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE ++ ++ // Skip over WAVE_STATUS, since there is no state to restore from it ++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4 ++ ++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset) ++ s_wait_idle ++ ++ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET ++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE ++ ++ // extract the saved signal count from s_restore_tmp ++ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET ++ ++ // We need to call s_barrier_signal repeatedly to restore the signal ++ // count of the work group barrier. The member count is already ++ // initialized with the number of waves in the work group. ++L_BARRIER_RESTORE_LOOP: ++ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp ++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE ++ s_barrier_signal -1 ++ s_add_i32 s_restore_tmp, s_restore_tmp, -1 ++ s_branch L_BARRIER_RESTORE_LOOP ++ ++L_SKIP_BARRIER_RESTORE: ++ ++ s_mov_b32 m0, s_restore_m0 ++ s_mov_b32 exec_lo, s_restore_exec_lo ++ s_mov_b32 exec_hi, s_restore_exec_hi ++ ++ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. ++ // Only restore the other fields to avoid clobbering them. ++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv ++ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT ++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv ++ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT ++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv ++ ++ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode ++ ++ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic ++ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 ++ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) ++ get_svgpr_size_bytes(s_restore_ttmps_hi) ++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi ++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() ++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 ++ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 ++ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF ++ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS ++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS ++ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS ++ s_wait_idle ++ ++ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS ++ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 ++ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 ++ ++ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu ++ ++ // Make barrier and LDS state visible to all waves in the group. ++ // STATE_PRIV.BARRIER_COMPLETE may change after this point. ++ s_barrier_signal -2 ++ s_barrier_wait -2 ++ ++ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution ++ ++L_END_PGM: ++ s_endpgm_saved ++end ++ ++function write_hwreg_to_v2(s) ++ // Copy into VGPR for later TCP store. ++ v_writelane_b32 v2, s, m0 ++ s_add_u32 m0, m0, 0x1 ++end ++ ++ ++function write_16sgpr_to_v2(s) ++ // Copy into VGPR for later TCP store. ++ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ ++ v_writelane_b32 v2, s[sgpr_idx], ttmp13 ++ s_add_u32 ttmp13, ttmp13, 0x1 ++ end ++end ++ ++function write_12sgpr_to_v2(s) ++ // Copy into VGPR for later TCP store. ++ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ ++ v_writelane_b32 v2, s[sgpr_idx], ttmp13 ++ s_add_u32 ttmp13, ttmp13, 0x1 ++ end ++end ++ ++function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) ++ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS ++ s_add_u32 s_mem_offset, s_mem_offset, 4 ++end ++ ++function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) ++ s_sub_u32 s_mem_offset, s_mem_offset, 4*16 ++ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS ++end ++ ++function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) ++ s_sub_u32 s_mem_offset, s_mem_offset, 4*8 ++ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS ++end ++ ++function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) ++ s_sub_u32 s_mem_offset, s_mem_offset, 4*4 ++ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS ++end ++ ++function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) ++ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) ++ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 ++ s_bitcmp1_b32 s_size, S_WAVE_SIZE ++ s_cbranch_scc1 L_ENABLE_SHIFT_W64 ++ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) ++ s_branch L_SHIFT_DONE ++L_ENABLE_SHIFT_W64: ++ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) ++L_SHIFT_DONE: ++end ++ ++function get_svgpr_size_bytes(s_svgpr_size_byte) ++ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) ++ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) ++end ++ ++function get_sgpr_size_bytes ++ return 512 ++end ++ ++function get_hwreg_size_bytes ++ return 128 ++end ++ ++function get_wave_size2(s_reg) ++ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) ++ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE ++end +-- +2.39.5 + diff --git a/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch b/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch new file mode 100644 index 0000000000..86d830e980 --- /dev/null +++ b/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch @@ -0,0 +1,102 @@ +From b1b5b7c39395e00cae08544fdbef2eaa18a1a229 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 13:03:48 -0700 +Subject: drm/xe/oa: Add input fence dependencies + +From: Ashutosh Dixit + +[ Upstream commit 2fb4350a283af03a5ee34ba765783a941f942b82 ] + +Add input fence dependencies which will make OA configuration wait till +these dependencies are met (till input fences signal). + +v2: Change add_deps arg to xe_oa_submit_bb from bool to enum (Matt Brost) + +Reviewed-by: Jonathan Cavitt +Signed-off-by: Ashutosh Dixit +Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-4-ashutosh.dixit@intel.com +Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_oa.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c +index 20d279ed3c382..1bfc4b58b5c17 100644 +--- a/drivers/gpu/drm/xe/xe_oa.c ++++ b/drivers/gpu/drm/xe/xe_oa.c +@@ -42,6 +42,11 @@ + #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) + #define XE_OA_UNIT_INVALID U32_MAX + ++enum xe_oa_submit_deps { ++ XE_OA_SUBMIT_NO_DEPS, ++ XE_OA_SUBMIT_ADD_DEPS, ++}; ++ + struct xe_oa_reg { + struct xe_reg addr; + u32 value; +@@ -572,7 +577,8 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) + return ret; + } + +-static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) ++static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, ++ struct xe_bb *bb) + { + struct xe_sched_job *job; + struct dma_fence *fence; +@@ -585,11 +591,22 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_ + goto exit; + } + ++ if (deps == XE_OA_SUBMIT_ADD_DEPS) { ++ for (int i = 0; i < stream->num_syncs && !err; i++) ++ err = xe_sync_entry_add_deps(&stream->syncs[i], job); ++ if (err) { ++ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); ++ goto err_put_job; ++ } ++ } ++ + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + return fence; ++err_put_job: ++ xe_sched_job_put(job); + exit: + return ERR_PTR(err); + } +@@ -667,7 +684,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr + + xe_oa_store_flex(stream, lrc, bb, flex, count); + +- fence = xe_oa_submit_bb(stream, bb); ++ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; +@@ -696,7 +713,7 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re + + write_cs_mi_lri(bb, reg_lri, 1); + +- fence = xe_oa_submit_bb(stream, bb); ++ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; +@@ -944,7 +961,7 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config + goto exit; + } + +- fence = xe_oa_submit_bb(stream, oa_bo->bb); ++ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto exit; +-- +2.39.5 + diff --git a/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch b/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch new file mode 100644 index 0000000000..47d9993906 --- /dev/null +++ b/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch @@ -0,0 +1,150 @@ +From a583f642663c5c653ede6bd0ca3837165826a183 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 13:03:46 -0700 +Subject: drm/xe/oa: Separate batch submission from waiting for completion + +From: Ashutosh Dixit + +[ Upstream commit dddcb19ad4d4bbe943a72a1fb3266c6e8aa8d541 ] + +When we introduce xe_syncs, we don't wait for internal OA programming +batches to complete. That is, xe_syncs are signaled asynchronously. In +anticipation for this, separate out batch submission from waiting for +completion of those batches. + +v2: Change return type of xe_oa_submit_bb to "struct dma_fence *" (Matt B) +v3: Retain init "int err = 0;" in xe_oa_submit_bb (Jose) + +Reviewed-by: Jonathan Cavitt +Signed-off-by: Ashutosh Dixit +Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-2-ashutosh.dixit@intel.com +Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_oa.c | 57 +++++++++++++++++++++++++++++--------- + 1 file changed, 44 insertions(+), 13 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c +index 6fc00d63b2857..3328529774cb7 100644 +--- a/drivers/gpu/drm/xe/xe_oa.c ++++ b/drivers/gpu/drm/xe/xe_oa.c +@@ -567,11 +567,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) + return ret; + } + +-static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) ++static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) + { + struct xe_sched_job *job; + struct dma_fence *fence; +- long timeout; + int err = 0; + + /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ +@@ -585,14 +584,9 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + +- timeout = dma_fence_wait_timeout(fence, false, HZ); +- dma_fence_put(fence); +- if (timeout < 0) +- err = timeout; +- else if (!timeout) +- err = -ETIME; ++ return fence; + exit: +- return err; ++ return ERR_PTR(err); + } + + static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) +@@ -656,6 +650,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, + static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, + const struct flex *flex, u32 count) + { ++ struct dma_fence *fence; + struct xe_bb *bb; + int err; + +@@ -667,7 +662,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr + + xe_oa_store_flex(stream, lrc, bb, flex, count); + +- err = xe_oa_submit_bb(stream, bb); ++ fence = xe_oa_submit_bb(stream, bb); ++ if (IS_ERR(fence)) { ++ err = PTR_ERR(fence); ++ goto free_bb; ++ } ++ xe_bb_free(bb, fence); ++ dma_fence_put(fence); ++ ++ return 0; ++free_bb: + xe_bb_free(bb, NULL); + exit: + return err; +@@ -675,6 +679,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr + + static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) + { ++ struct dma_fence *fence; + struct xe_bb *bb; + int err; + +@@ -686,7 +691,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re + + write_cs_mi_lri(bb, reg_lri, 1); + +- err = xe_oa_submit_bb(stream, bb); ++ fence = xe_oa_submit_bb(stream, bb); ++ if (IS_ERR(fence)) { ++ err = PTR_ERR(fence); ++ goto free_bb; ++ } ++ xe_bb_free(bb, fence); ++ dma_fence_put(fence); ++ ++ return 0; ++free_bb: + xe_bb_free(bb, NULL); + exit: + return err; +@@ -914,15 +928,32 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config + { + #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + struct xe_oa_config_bo *oa_bo; +- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; ++ int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; ++ struct dma_fence *fence; ++ long timeout; + ++ /* Emit OA configuration batch */ + oa_bo = xe_oa_alloc_config_buffer(stream, config); + if (IS_ERR(oa_bo)) { + err = PTR_ERR(oa_bo); + goto exit; + } + +- err = xe_oa_submit_bb(stream, oa_bo->bb); ++ fence = xe_oa_submit_bb(stream, oa_bo->bb); ++ if (IS_ERR(fence)) { ++ err = PTR_ERR(fence); ++ goto exit; ++ } ++ ++ /* Wait till all previous batches have executed */ ++ timeout = dma_fence_wait_timeout(fence, false, 5 * HZ); ++ dma_fence_put(fence); ++ if (timeout < 0) ++ err = timeout; ++ else if (!timeout) ++ err = -ETIME; ++ if (err) ++ drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err); + + /* Additional empirical delay needed for NOA programming after registers are written */ + usleep_range(us, 2 * us); +-- +2.39.5 + diff --git a/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch b/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch new file mode 100644 index 0000000000..dc3ccca252 --- /dev/null +++ b/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch @@ -0,0 +1,253 @@ +From 80f51c2a8c5b4c9783479bd34a983ef410cef4b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 13:03:47 -0700 +Subject: drm/xe/oa/uapi: Define and parse OA sync properties +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ashutosh Dixit + +[ Upstream commit c8507a25cebd179db935dd266a33c51bef1b1e80 ] + +Now that we have laid the groundwork, introduce OA sync properties in the +uapi and parse the input xe_sync array as is done elsewhere in the +driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace. + +v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B) + Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose) + +Acked-by: José Roberto de Souza +Reviewed-by: Jonathan Cavitt +Signed-off-by: Ashutosh Dixit +Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com +Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_oa.c | 83 +++++++++++++++++++++++++++++++- + drivers/gpu/drm/xe/xe_oa_types.h | 6 +++ + drivers/gpu/drm/xe/xe_query.c | 2 +- + include/uapi/drm/xe_drm.h | 17 +++++++ + 4 files changed, 106 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c +index 3328529774cb7..20d279ed3c382 100644 +--- a/drivers/gpu/drm/xe/xe_oa.c ++++ b/drivers/gpu/drm/xe/xe_oa.c +@@ -36,6 +36,7 @@ + #include "xe_pm.h" + #include "xe_sched_job.h" + #include "xe_sriov.h" ++#include "xe_sync.h" + + #define DEFAULT_POLL_FREQUENCY_HZ 200 + #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) +@@ -70,6 +71,7 @@ struct flex { + }; + + struct xe_oa_open_param { ++ struct xe_file *xef; + u32 oa_unit_id; + bool sample; + u32 metric_set; +@@ -81,6 +83,9 @@ struct xe_oa_open_param { + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; + bool no_preempt; ++ struct drm_xe_sync __user *syncs_user; ++ int num_syncs; ++ struct xe_sync_entry *syncs; + }; + + struct xe_oa_config_bo { +@@ -1393,6 +1398,9 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, + stream->period_exponent = param->period_exponent; + stream->no_preempt = param->no_preempt; + ++ stream->num_syncs = param->num_syncs; ++ stream->syncs = param->syncs; ++ + /* + * For Xe2+, when overrun mode is enabled, there are no partial reports at the end + * of buffer, making the OA buffer effectively a non-power-of-2 size circular +@@ -1743,6 +1751,20 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + return 0; + } + ++static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, ++ struct xe_oa_open_param *param) ++{ ++ param->num_syncs = value; ++ return 0; ++} ++ ++static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, ++ struct xe_oa_open_param *param) ++{ ++ param->syncs_user = u64_to_user_ptr(value); ++ return 0; ++} ++ + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); + static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { +@@ -1755,6 +1777,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, ++ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, ++ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + }; + + static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, +@@ -1814,6 +1838,49 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number + return 0; + } + ++static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) ++{ ++ int ret, num_syncs, num_ufence = 0; ++ ++ if (param->num_syncs && !param->syncs_user) { ++ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ if (param->num_syncs) { ++ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); ++ if (!param->syncs) { ++ ret = -ENOMEM; ++ goto exit; ++ } ++ } ++ ++ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ++ ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], ++ ¶m->syncs_user[num_syncs], 0); ++ if (ret) ++ goto err_syncs; ++ ++ if (xe_sync_is_ufence(¶m->syncs[num_syncs])) ++ num_ufence++; ++ } ++ ++ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { ++ ret = -EINVAL; ++ goto err_syncs; ++ } ++ ++ return 0; ++ ++err_syncs: ++ while (num_syncs--) ++ xe_sync_entry_cleanup(¶m->syncs[num_syncs]); ++ kfree(param->syncs); ++exit: ++ return ret; ++} ++ + /** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device +@@ -1839,6 +1906,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f + return -ENODEV; + } + ++ param.xef = xef; + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; +@@ -1907,11 +1975,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } + ++ ret = xe_oa_parse_syncs(oa, ¶m); ++ if (ret) ++ goto err_exec_q; ++ + mutex_lock(¶m.hwe->gt->oa.gt_lock); + ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); + mutex_unlock(¶m.hwe->gt->oa.gt_lock); ++ if (ret < 0) ++ goto err_sync_cleanup; ++ ++ return ret; ++ ++err_sync_cleanup: ++ while (param.num_syncs--) ++ xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); ++ kfree(param.syncs); + err_exec_q: +- if (ret < 0 && param.exec_q) ++ if (param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; + } +diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h +index 8862eca73fbe3..99f4b2d4bdcf6 100644 +--- a/drivers/gpu/drm/xe/xe_oa_types.h ++++ b/drivers/gpu/drm/xe/xe_oa_types.h +@@ -238,5 +238,11 @@ struct xe_oa_stream { + + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ + u32 no_preempt; ++ ++ /** @num_syncs: size of @syncs array */ ++ u32 num_syncs; ++ ++ /** @syncs: syncs to wait on and to signal */ ++ struct xe_sync_entry *syncs; + }; + #endif +diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c +index 1c96375bd7df7..6fec5d1a1eb44 100644 +--- a/drivers/gpu/drm/xe/xe_query.c ++++ b/drivers/gpu/drm/xe/xe_query.c +@@ -679,7 +679,7 @@ static int query_oa_units(struct xe_device *xe, + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); +- du->capabilities = DRM_XE_OA_CAPS_BASE; ++ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { +diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h +index c4182e95a6195..4a8a4a63e99ca 100644 +--- a/include/uapi/drm/xe_drm.h ++++ b/include/uapi/drm/xe_drm.h +@@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit { + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; + #define DRM_XE_OA_CAPS_BASE (1 << 0) ++#define DRM_XE_OA_CAPS_SYNCS (1 << 1) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; +@@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id { + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, ++ ++ /** ++ * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array ++ * specified in @DRM_XE_OA_PROPERTY_SYNCS ++ */ ++ DRM_XE_OA_PROPERTY_NUM_SYNCS, ++ ++ /** ++ * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array ++ * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA ++ * configuration will wait till input fences signal. Output fences ++ * will signal after the new OA configuration takes effect. For ++ * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar ++ * to the VM bind case. ++ */ ++ DRM_XE_OA_PROPERTY_SYNCS, + }; + + /** +-- +2.39.5 + diff --git a/queue-6.12/flow_dissector-fix-handling-of-mixed-port-and-port-r.patch b/queue-6.12/flow_dissector-fix-handling-of-mixed-port-and-port-r.patch new file mode 100644 index 0000000000..f3a6b6ceae --- /dev/null +++ b/queue-6.12/flow_dissector-fix-handling-of-mixed-port-and-port-r.patch @@ -0,0 +1,94 @@ +From 0df17c0aef2adc5db0c7a7225278a40d0aa6162d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 20:32:07 -0800 +Subject: flow_dissector: Fix handling of mixed port and port-range keys + +From: Cong Wang + +[ Upstream commit 3e5796862c692ea608d96f0a1437f9290f44953a ] + +This patch fixes a bug in TC flower filter where rules combining a +specific destination port with a source port range weren't working +correctly. + +The specific case was when users tried to configure rules like: + +tc filter add dev ens38 ingress protocol ip flower ip_proto udp \ +dst_port 5000 src_port 2000-3000 action drop + +The root cause was in the flow dissector code. While both +FLOW_DISSECTOR_KEY_PORTS and FLOW_DISSECTOR_KEY_PORTS_RANGE flags +were being set correctly in the classifier, the __skb_flow_dissect_ports() +function was only populating one of them: whichever came first in +the enum check. This meant that when the code needed both a specific +port and a port range, one of them would be left as 0, causing the +filter to not match packets as expected. + +Fix it by removing the either/or logic and instead checking and +populating both key types independently when they're in use. + +Fixes: 8ffb055beae5 ("cls_flower: Fix the behavior using port ranges with hw-offload") +Reported-by: Qiang Zhang +Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/ +Cc: Yoshiki Komachi +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Reviewed-by: Ido Schimmel +Link: https://patch.msgid.link/20250218043210.732959-2-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/flow_dissector.c | 31 +++++++++++++++++++------------ + 1 file changed, 19 insertions(+), 12 deletions(-) + +diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c +index 5db41bf2ed93e..c33af3ef0b790 100644 +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb, + void *target_container, const void *data, + int nhoff, u8 ip_proto, int hlen) + { +- enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; +- struct flow_dissector_key_ports *key_ports; ++ struct flow_dissector_key_ports_range *key_ports_range = NULL; ++ struct flow_dissector_key_ports *key_ports = NULL; ++ __be32 ports; + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) +- dissector_ports = FLOW_DISSECTOR_KEY_PORTS; +- else if (dissector_uses_key(flow_dissector, +- FLOW_DISSECTOR_KEY_PORTS_RANGE)) +- dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; ++ key_ports = skb_flow_dissector_target(flow_dissector, ++ FLOW_DISSECTOR_KEY_PORTS, ++ target_container); + +- if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) ++ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) ++ key_ports_range = skb_flow_dissector_target(flow_dissector, ++ FLOW_DISSECTOR_KEY_PORTS_RANGE, ++ target_container); ++ ++ if (!key_ports && !key_ports_range) + return; + +- key_ports = skb_flow_dissector_target(flow_dissector, +- dissector_ports, +- target_container); +- key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, +- data, hlen); ++ ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); ++ ++ if (key_ports) ++ key_ports->ports = ports; ++ ++ if (key_ports_range) ++ key_ports_range->tp.ports = ports; + } + + static void +-- +2.39.5 + diff --git a/queue-6.12/flow_dissector-fix-port-range-key-handling-in-bpf-co.patch b/queue-6.12/flow_dissector-fix-port-range-key-handling-in-bpf-co.patch new file mode 100644 index 0000000000..21807268c8 --- /dev/null +++ b/queue-6.12/flow_dissector-fix-port-range-key-handling-in-bpf-co.patch @@ -0,0 +1,76 @@ +From 3c431cc15c881050fbbe8ca4299ef8cb26873eff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 20:32:09 -0800 +Subject: flow_dissector: Fix port range key handling in BPF conversion + +From: Cong Wang + +[ Upstream commit 69ab34f705fbfabcace64b5d53bb7a4450fac875 ] + +Fix how port range keys are handled in __skb_flow_bpf_to_target() by: +- Separating PORTS and PORTS_RANGE key handling +- Using correct key_ports_range structure for range keys +- Properly initializing both key types independently + +This ensures port range information is correctly stored in its dedicated +structure rather than incorrectly using the regular ports key structure. + +Fixes: 59fb9b62fb6c ("flow_dissector: Fix to use new variables for port ranges in bpf hook") +Reported-by: Qiang Zhang +Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/ +Cc: Yoshiki Komachi +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Link: https://patch.msgid.link/20250218043210.732959-4-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/flow_dissector.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c +index c33af3ef0b790..9cd8de6bebb54 100644 +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -931,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, + struct flow_dissector *flow_dissector, + void *target_container) + { ++ struct flow_dissector_key_ports_range *key_ports_range = NULL; + struct flow_dissector_key_ports *key_ports = NULL; + struct flow_dissector_key_control *key_control; + struct flow_dissector_key_basic *key_basic; +@@ -975,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + +- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) ++ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); +- else if (dissector_uses_key(flow_dissector, +- FLOW_DISSECTOR_KEY_PORTS_RANGE)) +- key_ports = skb_flow_dissector_target(flow_dissector, +- FLOW_DISSECTOR_KEY_PORTS_RANGE, +- target_container); +- +- if (key_ports) { + key_ports->src = flow_keys->sport; + key_ports->dst = flow_keys->dport; + } ++ if (dissector_uses_key(flow_dissector, ++ FLOW_DISSECTOR_KEY_PORTS_RANGE)) { ++ key_ports_range = skb_flow_dissector_target(flow_dissector, ++ FLOW_DISSECTOR_KEY_PORTS_RANGE, ++ target_container); ++ key_ports_range->tp.src = flow_keys->sport; ++ key_ports_range->tp.dst = flow_keys->dport; ++ } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { +-- +2.39.5 + diff --git a/queue-6.12/geneve-fix-use-after-free-in-geneve_find_dev.patch b/queue-6.12/geneve-fix-use-after-free-in-geneve_find_dev.patch new file mode 100644 index 0000000000..afd3e94559 --- /dev/null +++ b/queue-6.12/geneve-fix-use-after-free-in-geneve_find_dev.patch @@ -0,0 +1,200 @@ +From ce30622d4cdcea318e0392b02ca462ebfb08615c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 13:33:54 +0900 +Subject: geneve: Fix use-after-free in geneve_find_dev(). + +From: Kuniyuki Iwashima + +[ Upstream commit 9593172d93b9f91c362baec4643003dc29802929 ] + +syzkaller reported a use-after-free in geneve_find_dev() [0] +without repro. + +geneve_configure() links struct geneve_dev.next to +net_generic(net, geneve_net_id)->geneve_list. + +The net here could differ from dev_net(dev) if IFLA_NET_NS_PID, +IFLA_NET_NS_FD, or IFLA_TARGET_NETNSID is set. + +When dev_net(dev) is dismantled, geneve_exit_batch_rtnl() finally +calls unregister_netdevice_queue() for each dev in the netns, +and later the dev is freed. + +However, its geneve_dev.next is still linked to the backend UDP +socket netns. + +Then, use-after-free will occur when another geneve dev is created +in the netns. + +Let's call geneve_dellink() instead in geneve_destroy_tunnels(). + +[0]: +BUG: KASAN: slab-use-after-free in geneve_find_dev drivers/net/geneve.c:1295 [inline] +BUG: KASAN: slab-use-after-free in geneve_configure+0x234/0x858 drivers/net/geneve.c:1343 +Read of size 2 at addr ffff000054d6ee24 by task syz.1.4029/13441 + +CPU: 1 UID: 0 PID: 13441 Comm: syz.1.4029 Not tainted 6.13.0-g0ad9617c78ac #24 dc35ca22c79fb82e8e7bc5c9c9adafea898b1e3d +Hardware name: linux,dummy-virt (DT) +Call trace: + show_stack+0x38/0x50 arch/arm64/kernel/stacktrace.c:466 (C) + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0xbc/0x108 lib/dump_stack.c:120 + print_address_description mm/kasan/report.c:378 [inline] + print_report+0x16c/0x6f0 mm/kasan/report.c:489 + kasan_report+0xc0/0x120 mm/kasan/report.c:602 + __asan_report_load2_noabort+0x20/0x30 mm/kasan/report_generic.c:379 + geneve_find_dev drivers/net/geneve.c:1295 [inline] + geneve_configure+0x234/0x858 drivers/net/geneve.c:1343 + geneve_newlink+0xb8/0x128 drivers/net/geneve.c:1634 + rtnl_newlink_create+0x23c/0x868 net/core/rtnetlink.c:3795 + __rtnl_newlink net/core/rtnetlink.c:3906 [inline] + rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021 + rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911 + netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543 + rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938 + netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] + netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348 + netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892 + sock_sendmsg_nosec net/socket.c:713 [inline] + __sock_sendmsg net/socket.c:728 [inline] + ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568 + ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622 + __sys_sendmsg net/socket.c:2654 [inline] + __do_sys_sendmsg net/socket.c:2659 [inline] + __se_sys_sendmsg net/socket.c:2657 [inline] + __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657 + __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] + invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49 + el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132 + do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151 + el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744 + el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762 + el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600 + +Allocated by task 13247: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x30/0x68 mm/kasan/common.c:68 + kasan_save_alloc_info+0x44/0x58 mm/kasan/generic.c:568 + poison_kmalloc_redzone mm/kasan/common.c:377 [inline] + __kasan_kmalloc+0x84/0xa0 mm/kasan/common.c:394 + kasan_kmalloc include/linux/kasan.h:260 [inline] + __do_kmalloc_node mm/slub.c:4298 [inline] + __kmalloc_node_noprof+0x2a0/0x560 mm/slub.c:4304 + __kvmalloc_node_noprof+0x9c/0x230 mm/util.c:645 + alloc_netdev_mqs+0xb8/0x11a0 net/core/dev.c:11470 + rtnl_create_link+0x2b8/0xb50 net/core/rtnetlink.c:3604 + rtnl_newlink_create+0x19c/0x868 net/core/rtnetlink.c:3780 + __rtnl_newlink net/core/rtnetlink.c:3906 [inline] + rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021 + rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911 + netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543 + rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938 + netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] + netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348 + netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892 + sock_sendmsg_nosec net/socket.c:713 [inline] + __sock_sendmsg net/socket.c:728 [inline] + ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568 + ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622 + __sys_sendmsg net/socket.c:2654 [inline] + __do_sys_sendmsg net/socket.c:2659 [inline] + __se_sys_sendmsg net/socket.c:2657 [inline] + __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657 + __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] + invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49 + el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132 + do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151 + el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744 + el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762 + el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600 + +Freed by task 45: + kasan_save_stack mm/kasan/common.c:47 [inline] + kasan_save_track+0x30/0x68 mm/kasan/common.c:68 + kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:582 + poison_slab_object mm/kasan/common.c:247 [inline] + __kasan_slab_free+0x48/0x68 mm/kasan/common.c:264 + kasan_slab_free include/linux/kasan.h:233 [inline] + slab_free_hook mm/slub.c:2353 [inline] + slab_free mm/slub.c:4613 [inline] + kfree+0x140/0x420 mm/slub.c:4761 + kvfree+0x4c/0x68 mm/util.c:688 + netdev_release+0x94/0xc8 net/core/net-sysfs.c:2065 + device_release+0x98/0x1c0 + kobject_cleanup lib/kobject.c:689 [inline] + kobject_release lib/kobject.c:720 [inline] + kref_put include/linux/kref.h:65 [inline] + kobject_put+0x2b0/0x438 lib/kobject.c:737 + netdev_run_todo+0xe5c/0xfc8 net/core/dev.c:11185 + rtnl_unlock+0x20/0x38 net/core/rtnetlink.c:151 + cleanup_net+0x4fc/0x8c0 net/core/net_namespace.c:648 + process_one_work+0x700/0x1398 kernel/workqueue.c:3236 + process_scheduled_works kernel/workqueue.c:3317 [inline] + worker_thread+0x8c4/0xe10 kernel/workqueue.c:3398 + kthread+0x4bc/0x608 kernel/kthread.c:464 + ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:862 + +The buggy address belongs to the object at ffff000054d6e000 + which belongs to the cache kmalloc-cg-4k of size 4096 +The buggy address is located 3620 bytes inside of + freed 4096-byte region [ffff000054d6e000, ffff000054d6f000) + +The buggy address belongs to the physical page: +page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x94d68 +head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +memcg:ffff000016276181 +flags: 0x3fffe0000000040(head|node=0|zone=0|lastcpupid=0x1ffff) +page_type: f5(slab) +raw: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000 +raw: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181 +head: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000 +head: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181 +head: 03fffe0000000003 fffffdffc1535a01 ffffffffffffffff 0000000000000000 +head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff000054d6ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff000054d6ed80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff000054d6ee00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff000054d6ee80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff000054d6ef00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + +Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") +Reported-by: syzkaller +Signed-off-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250213043354.91368-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/geneve.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c +index ba15a0a4ce629..9c53b0bbb4c57 100644 +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1907,16 +1907,11 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) + /* gather any geneve devices that were moved into this ns */ + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &geneve_link_ops) +- unregister_netdevice_queue(dev, head); ++ geneve_dellink(dev, head); + + /* now gather any other geneve devices that were created in this ns */ +- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { +- /* If geneve->dev is in the same netns, it was already added +- * to the list by the previous loop. +- */ +- if (!net_eq(dev_net(geneve->dev), net)) +- unregister_netdevice_queue(geneve->dev, head); +- } ++ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) ++ geneve_dellink(geneve->dev, head); + } + + static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, +-- +2.39.5 + diff --git a/queue-6.12/geneve-suppress-list-corruption-splat-in-geneve_dest.patch b/queue-6.12/geneve-suppress-list-corruption-splat-in-geneve_dest.patch new file mode 100644 index 0000000000..caf3b0db61 --- /dev/null +++ b/queue-6.12/geneve-suppress-list-corruption-splat-in-geneve_dest.patch @@ -0,0 +1,50 @@ +From 1fee1a7ad94e9709869d4a9b495868ed9a3e4da5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 12:37:05 -0800 +Subject: geneve: Suppress list corruption splat in geneve_destroy_tunnels(). + +From: Kuniyuki Iwashima + +[ Upstream commit 62fab6eef61f245dc8797e3a6a5b890ef40e8628 ] + +As explained in the previous patch, iterating for_each_netdev() and +gn->geneve_list during ->exit_batch_rtnl() could trigger ->dellink() +twice for the same device. + +If CONFIG_DEBUG_LIST is enabled, we will see a list_del() corruption +splat in the 2nd call of geneve_dellink(). + +Let's remove for_each_netdev() in geneve_destroy_tunnels() and delegate +that part to default_device_exit_batch(). + +Fixes: 9593172d93b9 ("geneve: Fix use-after-free in geneve_find_dev().") +Signed-off-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250217203705.40342-3-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/geneve.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c +index 9c53b0bbb4c57..963fb9261f017 100644 +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1902,14 +1902,7 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) + { + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev *geneve, *next; +- struct net_device *dev, *aux; + +- /* gather any geneve devices that were moved into this ns */ +- for_each_netdev_safe(net, dev, aux) +- if (dev->rtnl_link_ops == &geneve_link_ops) +- geneve_dellink(dev, head); +- +- /* now gather any other geneve devices that were created in this ns */ + list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) + geneve_dellink(geneve->dev, head); + } +-- +2.39.5 + diff --git a/queue-6.12/gtp-suppress-list-corruption-splat-in-gtp_net_exit_b.patch b/queue-6.12/gtp-suppress-list-corruption-splat-in-gtp_net_exit_b.patch new file mode 100644 index 0000000000..34c4d103e5 --- /dev/null +++ b/queue-6.12/gtp-suppress-list-corruption-splat-in-gtp_net_exit_b.patch @@ -0,0 +1,121 @@ +From 985923af8902311fb074374c035a07310c2adb0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 12:37:04 -0800 +Subject: gtp: Suppress list corruption splat in gtp_net_exit_batch_rtnl(). + +From: Kuniyuki Iwashima + +[ Upstream commit 4ccacf86491d33d2486b62d4d44864d7101b299d ] + +Brad Spengler reported the list_del() corruption splat in +gtp_net_exit_batch_rtnl(). [0] + +Commit eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns +dismantle.") added the for_each_netdev() loop in gtp_net_exit_batch_rtnl() +to destroy devices in each netns as done in geneve and ip tunnels. + +However, this could trigger ->dellink() twice for the same device during +->exit_batch_rtnl(). + +Say we have two netns A & B and gtp device B that resides in netns B but +whose UDP socket is in netns A. + + 1. cleanup_net() processes netns A and then B. + + 2. gtp_net_exit_batch_rtnl() finds the device B while iterating + netns A's gn->gtp_dev_list and calls ->dellink(). + + [ device B is not yet unlinked from netns B + as unregister_netdevice_many() has not been called. ] + + 3. gtp_net_exit_batch_rtnl() finds the device B while iterating + netns B's for_each_netdev() and calls ->dellink(). + +gtp_dellink() cleans up the device's hash table, unlinks the dev from +gn->gtp_dev_list, and calls unregister_netdevice_queue(). + +Basically, calling gtp_dellink() multiple times is fine unless +CONFIG_DEBUG_LIST is enabled. + +Let's remove for_each_netdev() in gtp_net_exit_batch_rtnl() and +delegate the destruction to default_device_exit_batch() as done +in bareudp. + +[0]: +list_del corruption, ffff8880aaa62c00->next (autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object]) is LIST_POISON1 (ffffffffffffff02) (prev is 0xffffffffffffff04) +kernel BUG at lib/list_debug.c:58! +Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN +CPU: 1 UID: 0 PID: 1804 Comm: kworker/u8:7 Tainted: G T 6.12.13-grsec-full-20250211091339 #1 +Tainted: [T]=RANDSTRUCT +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 +Workqueue: netns cleanup_net +RIP: 0010:[] __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58 +Code: c2 76 91 31 c0 e8 9f b1 f7 fc 0f 0b 4d 89 f0 48 c7 c1 02 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 e0 c2 76 91 31 c0 e8 7f b1 f7 fc <0f> 0b 4d 89 e8 48 c7 c1 04 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 60 +RSP: 0018:fffffe8040b4fbd0 EFLAGS: 00010283 +RAX: 00000000000000cc RBX: dffffc0000000000 RCX: ffffffff818c4054 +RDX: ffffffff84947381 RSI: ffffffff818d1512 RDI: 0000000000000000 +RBP: ffff8880aaa62c00 R08: 0000000000000001 R09: fffffbd008169f32 +R10: fffffe8040b4f997 R11: 0000000000000001 R12: a1988d84f24943e4 +R13: ffffffffffffff02 R14: ffffffffffffff04 R15: ffff8880aaa62c08 +RBX: kasan shadow of 0x0 +RCX: __wake_up_klogd.part.0+0x74/0xe0 kernel/printk/printk.c:4554 +RDX: __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58 +RSI: vprintk+0x72/0x100 kernel/printk/printk_safe.c:71 +RBP: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object] +RSP: process kstack fffffe8040b4fbd0+0x7bd0/0x8000 [kworker/u8:7+netns 1804 ] +R09: kasan shadow of process kstack fffffe8040b4f990+0x7990/0x8000 [kworker/u8:7+netns 1804 ] +R10: process kstack fffffe8040b4f997+0x7997/0x8000 [kworker/u8:7+netns 1804 ] +R15: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc08/0x1000 [slab object] +FS: 0000000000000000(0000) GS:ffff888116000000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000748f5372c000 CR3: 0000000015408000 CR4: 00000000003406f0 shadow CR4: 00000000003406f0 +Stack: + 0000000000000000 ffffffff8a0c35e7 ffffffff8a0c3603 ffff8880aaa62c00 + ffff8880aaa62c00 0000000000000004 ffff88811145311c 0000000000000005 + 0000000000000001 ffff8880aaa62000 fffffe8040b4fd40 ffffffff8a0c360d +Call Trace: + + [] __list_del_entry_valid include/linux/list.h:131 [inline] fffffe8040b4fc28 + [] __list_del_entry include/linux/list.h:248 [inline] fffffe8040b4fc28 + [] list_del include/linux/list.h:262 [inline] fffffe8040b4fc28 + [] gtp_dellink+0x16d/0x360 drivers/net/gtp.c:1557 fffffe8040b4fc28 + [] gtp_net_exit_batch_rtnl+0x124/0x2c0 drivers/net/gtp.c:2495 fffffe8040b4fc88 + [] cleanup_net+0x5a4/0xbe0 net/core/net_namespace.c:635 fffffe8040b4fcd0 + [] process_one_work+0xbd7/0x2160 kernel/workqueue.c:3326 fffffe8040b4fd88 + [] process_scheduled_works kernel/workqueue.c:3407 [inline] fffffe8040b4fec0 + [] worker_thread+0x6b5/0xfa0 kernel/workqueue.c:3488 fffffe8040b4fec0 + [] kthread+0x360/0x4c0 kernel/kthread.c:397 fffffe8040b4ff78 + [] ret_from_fork+0x74/0xe0 arch/x86/kernel/process.c:172 fffffe8040b4ffb8 + [] ret_from_fork_asm+0x29/0xc0 arch/x86/entry/entry_64.S:399 fffffe8040b4ffe8 + +Modules linked in: + +Fixes: eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns dismantle.") +Reported-by: Brad Spengler +Signed-off-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250217203705.40342-2-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/gtp.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c +index 47406ce990161..33b78b4007fe7 100644 +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -2487,11 +2487,6 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, + list_for_each_entry(net, net_list, exit_list) { + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp, *gtp_next; +- struct net_device *dev; +- +- for_each_netdev(net, dev) +- if (dev->rtnl_link_ops == >p_link_ops) +- gtp_dellink(dev, dev_to_kill); + + list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) + gtp_dellink(gtp->dev, dev_to_kill); +-- +2.39.5 + diff --git a/queue-6.12/ibmvnic-add-stat-for-tx-direct-vs-tx-batched.patch b/queue-6.12/ibmvnic-add-stat-for-tx-direct-vs-tx-batched.patch new file mode 100644 index 0000000000..0e61882fbf --- /dev/null +++ b/queue-6.12/ibmvnic-add-stat-for-tx-direct-vs-tx-batched.patch @@ -0,0 +1,127 @@ +From 95b149d261df316bf69a7fb813763dd32c2f16b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Oct 2024 11:35:31 -0500 +Subject: ibmvnic: Add stat for tx direct vs tx batched + +From: Nick Child + +[ Upstream commit 2ee73c54a615b74d2e7ee6f20844fd3ba63fc485 ] + +Allow tracking of packets sent with send_subcrq direct vs +indirect. `ethtool -S ` will now provide a counter +of the number of uses of each xmit method. This metric will +be useful in performance debugging. + +Signed-off-by: Nick Child +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241001163531.1803152-1-nnac123@linux.ibm.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: bdf5d13aa05e ("ibmvnic: Don't reference skb after sending to VIOS") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ibm/ibmvnic.c | 23 ++++++++++++++++------- + drivers/net/ethernet/ibm/ibmvnic.h | 3 ++- + 2 files changed, 18 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index 97425c06e1ed7..cca2ed6ad2899 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -2310,7 +2310,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; +- adapter->tx_stats_buffers[queue_num].packets--; ++ adapter->tx_stats_buffers[queue_num].batched_packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); +@@ -2402,7 +2402,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; + unsigned int tx_dropped = 0; +- unsigned int tx_packets = 0; ++ unsigned int tx_dpackets = 0; ++ unsigned int tx_bpackets = 0; + unsigned int tx_bytes = 0; + dma_addr_t data_dma_addr; + struct netdev_queue *txq; +@@ -2575,6 +2576,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + if (lpar_rc != H_SUCCESS) + goto tx_err; + ++ tx_dpackets++; + goto early_exit; + } + +@@ -2603,6 +2605,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + goto tx_err; + } + ++ tx_bpackets++; ++ + early_exit: + if (atomic_add_return(num_entries, &tx_scrq->used) + >= adapter->req_tx_entries_per_subcrq) { +@@ -2610,7 +2614,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + netif_stop_subqueue(netdev, queue_num); + } + +- tx_packets++; + tx_bytes += skb->len; + txq_trans_cond_update(txq); + ret = NETDEV_TX_OK; +@@ -2640,10 +2643,11 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + rcu_read_unlock(); + netdev->stats.tx_dropped += tx_dropped; + netdev->stats.tx_bytes += tx_bytes; +- netdev->stats.tx_packets += tx_packets; ++ netdev->stats.tx_packets += tx_bpackets + tx_dpackets; + adapter->tx_send_failed += tx_send_failed; + adapter->tx_map_failed += tx_map_failed; +- adapter->tx_stats_buffers[queue_num].packets += tx_packets; ++ adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets; ++ adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets; + adapter->tx_stats_buffers[queue_num].bytes += tx_bytes; + adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped; + +@@ -3808,7 +3812,10 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + + for (i = 0; i < adapter->req_tx_queues; i++) { +- snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); ++ snprintf(data, ETH_GSTRING_LEN, "tx%d_batched_packets", i); ++ data += ETH_GSTRING_LEN; ++ ++ snprintf(data, ETH_GSTRING_LEN, "tx%d_direct_packets", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); +@@ -3873,7 +3880,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, + (adapter, ibmvnic_stats[i].offset)); + + for (j = 0; j < adapter->req_tx_queues; j++) { +- data[i] = adapter->tx_stats_buffers[j].packets; ++ data[i] = adapter->tx_stats_buffers[j].batched_packets; ++ i++; ++ data[i] = adapter->tx_stats_buffers[j].direct_packets; + i++; + data[i] = adapter->tx_stats_buffers[j].bytes; + i++; +diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h +index 94ac36b1408be..a189038d88df0 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.h ++++ b/drivers/net/ethernet/ibm/ibmvnic.h +@@ -213,7 +213,8 @@ struct ibmvnic_statistics { + + #define NUM_TX_STATS 3 + struct ibmvnic_tx_queue_stats { +- u64 packets; ++ u64 batched_packets; ++ u64 direct_packets; + u64 bytes; + u64 dropped_packets; + }; +-- +2.39.5 + diff --git a/queue-6.12/ibmvnic-don-t-reference-skb-after-sending-to-vios.patch b/queue-6.12/ibmvnic-don-t-reference-skb-after-sending-to-vios.patch new file mode 100644 index 0000000000..969c1ab8ce --- /dev/null +++ b/queue-6.12/ibmvnic-don-t-reference-skb-after-sending-to-vios.patch @@ -0,0 +1,85 @@ +From d02c2c70cf0820591c4ef609ac2949b052bccc7c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 09:52:33 -0600 +Subject: ibmvnic: Don't reference skb after sending to VIOS + +From: Nick Child + +[ Upstream commit bdf5d13aa05ec314d4385b31ac974d6c7e0997c9 ] + +Previously, after successfully flushing the xmit buffer to VIOS, +the tx_bytes stat was incremented by the length of the skb. + +It is invalid to access the skb memory after sending the buffer to +the VIOS because, at any point after sending, the VIOS can trigger +an interrupt to free this memory. A race between reading skb->len +and freeing the skb is possible (especially during LPM) and will +result in use-after-free: + ================================================================== + BUG: KASAN: slab-use-after-free in ibmvnic_xmit+0x75c/0x1808 [ibmvnic] + Read of size 4 at addr c00000024eb48a70 by task hxecom/14495 + <...> + Call Trace: + [c000000118f66cf0] [c0000000018cba6c] dump_stack_lvl+0x84/0xe8 (unreliable) + [c000000118f66d20] [c0000000006f0080] print_report+0x1a8/0x7f0 + [c000000118f66df0] [c0000000006f08f0] kasan_report+0x128/0x1f8 + [c000000118f66f00] [c0000000006f2868] __asan_load4+0xac/0xe0 + [c000000118f66f20] [c0080000046eac84] ibmvnic_xmit+0x75c/0x1808 [ibmvnic] + [c000000118f67340] [c0000000014be168] dev_hard_start_xmit+0x150/0x358 + <...> + Freed by task 0: + kasan_save_stack+0x34/0x68 + kasan_save_track+0x2c/0x50 + kasan_save_free_info+0x64/0x108 + __kasan_mempool_poison_object+0x148/0x2d4 + napi_skb_cache_put+0x5c/0x194 + net_tx_action+0x154/0x5b8 + handle_softirqs+0x20c/0x60c + do_softirq_own_stack+0x6c/0x88 + <...> + The buggy address belongs to the object at c00000024eb48a00 which + belongs to the cache skbuff_head_cache of size 224 +================================================================== + +Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") +Signed-off-by: Nick Child +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250214155233.235559-1-nnac123@linux.ibm.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ibm/ibmvnic.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index cca2ed6ad2899..61db00b2b33e4 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -2408,6 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + dma_addr_t data_dma_addr; + struct netdev_queue *txq; + unsigned long lpar_rc; ++ unsigned int skblen; + union sub_crq tx_crq; + unsigned int offset; + bool use_scrq_send_direct = false; +@@ -2522,6 +2523,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + tx_buff->skb = skb; + tx_buff->index = bufidx; + tx_buff->pool_index = queue_num; ++ skblen = skb->len; + + memset(&tx_crq, 0, sizeof(tx_crq)); + tx_crq.v1.first = IBMVNIC_CRQ_CMD; +@@ -2614,7 +2616,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) + netif_stop_subqueue(netdev, queue_num); + } + +- tx_bytes += skb->len; ++ tx_bytes += skblen; + txq_trans_cond_update(txq); + ret = NETDEV_TX_OK; + goto out; +-- +2.39.5 + diff --git a/queue-6.12/input-serio-define-serio_pause_rx-guard-to-pause-and.patch b/queue-6.12/input-serio-define-serio_pause_rx-guard-to-pause-and.patch new file mode 100644 index 0000000000..f516215426 --- /dev/null +++ b/queue-6.12/input-serio-define-serio_pause_rx-guard-to-pause-and.patch @@ -0,0 +1,52 @@ +From 2f136558ce088ef2add684278ff88677a63f5829 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Sep 2024 21:17:06 -0700 +Subject: Input: serio - define serio_pause_rx guard to pause and resume serio + ports + +From: Dmitry Torokhov + +[ Upstream commit 0e45a09a1da0872786885c505467aab8fb29b5b4 ] + +serio_pause_rx() and serio_continue_rx() are usually used together to +temporarily stop receiving interrupts/data for a given serio port. +Define "serio_pause_rx" guard for this so that the port is always +resumed once critical section is over. + +Example: + + scoped_guard(serio_pause_rx, elo->serio) { + elo->expected_packet = toupper(packet[0]); + init_completion(&elo->cmd_done); + } + +Link: https://lore.kernel.org/r/20240905041732.2034348-2-dmitry.torokhov@gmail.com +Signed-off-by: Dmitry Torokhov +Stable-dep-of: 08bd5b7c9a24 ("Input: synaptics - fix crash when enabling pass-through port") +Signed-off-by: Sasha Levin +--- + include/linux/serio.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/include/linux/serio.h b/include/linux/serio.h +index bf2191f253509..69a47674af653 100644 +--- a/include/linux/serio.h ++++ b/include/linux/serio.h +@@ -6,6 +6,7 @@ + #define _SERIO_H + + ++#include + #include + #include + #include +@@ -161,4 +162,6 @@ static inline void serio_continue_rx(struct serio *serio) + spin_unlock_irq(&serio->lock); + } + ++DEFINE_GUARD(serio_pause_rx, struct serio *, serio_pause_rx(_T), serio_continue_rx(_T)) ++ + #endif +-- +2.39.5 + diff --git a/queue-6.12/input-synaptics-fix-crash-when-enabling-pass-through.patch b/queue-6.12/input-synaptics-fix-crash-when-enabling-pass-through.patch new file mode 100644 index 0000000000..328a7f3306 --- /dev/null +++ b/queue-6.12/input-synaptics-fix-crash-when-enabling-pass-through.patch @@ -0,0 +1,138 @@ +From 3d625035e86a1478849408f54e50523d7f86083c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Jan 2025 09:23:40 -0800 +Subject: Input: synaptics - fix crash when enabling pass-through port + +From: Dmitry Torokhov + +[ Upstream commit 08bd5b7c9a2401faabdaa1472d45c7de0755fd7e ] + +When enabling a pass-through port an interrupt might come before psmouse +driver binds to the pass-through port. However synaptics sub-driver +tries to access psmouse instance presumably associated with the +pass-through port to figure out if only 1 byte of response or entire +protocol packet needs to be forwarded to the pass-through port and may +crash if psmouse instance has not been attached to the port yet. + +Fix the crash by introducing open() and close() methods for the port and +check if the port is open before trying to access psmouse instance. +Because psmouse calls serio_open() only after attaching psmouse instance +to serio port instance this prevents the potential crash. + +Reported-by: Takashi Iwai +Fixes: 100e16959c3c ("Input: libps2 - attach ps2dev instances as serio port's drvdata") +Link: https://bugzilla.suse.com/show_bug.cgi?id=1219522 +Cc: stable@vger.kernel.org +Reviewed-by: Takashi Iwai +Link: https://lore.kernel.org/r/Z4qSHORvPn7EU2j1@google.com +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/mouse/synaptics.c | 56 ++++++++++++++++++++++++--------- + drivers/input/mouse/synaptics.h | 1 + + 2 files changed, 43 insertions(+), 14 deletions(-) + +diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c +index 380aa1614442f..3d1459b551bb2 100644 +--- a/drivers/input/mouse/synaptics.c ++++ b/drivers/input/mouse/synaptics.c +@@ -667,23 +667,50 @@ static void synaptics_pt_stop(struct serio *serio) + serio_continue_rx(parent->ps2dev.serio); + } + ++static int synaptics_pt_open(struct serio *serio) ++{ ++ struct psmouse *parent = psmouse_from_serio(serio->parent); ++ struct synaptics_data *priv = parent->private; ++ ++ guard(serio_pause_rx)(parent->ps2dev.serio); ++ priv->pt_port_open = true; ++ ++ return 0; ++} ++ ++static void synaptics_pt_close(struct serio *serio) ++{ ++ struct psmouse *parent = psmouse_from_serio(serio->parent); ++ struct synaptics_data *priv = parent->private; ++ ++ guard(serio_pause_rx)(parent->ps2dev.serio); ++ priv->pt_port_open = false; ++} ++ + static int synaptics_is_pt_packet(u8 *buf) + { + return (buf[0] & 0xFC) == 0x84 && (buf[3] & 0xCC) == 0xC4; + } + +-static void synaptics_pass_pt_packet(struct serio *ptport, u8 *packet) ++static void synaptics_pass_pt_packet(struct synaptics_data *priv, u8 *packet) + { +- struct psmouse *child = psmouse_from_serio(ptport); ++ struct serio *ptport; + +- if (child && child->state == PSMOUSE_ACTIVATED) { +- serio_interrupt(ptport, packet[1], 0); +- serio_interrupt(ptport, packet[4], 0); +- serio_interrupt(ptport, packet[5], 0); +- if (child->pktsize == 4) +- serio_interrupt(ptport, packet[2], 0); +- } else { +- serio_interrupt(ptport, packet[1], 0); ++ ptport = priv->pt_port; ++ if (!ptport) ++ return; ++ ++ serio_interrupt(ptport, packet[1], 0); ++ ++ if (priv->pt_port_open) { ++ struct psmouse *child = psmouse_from_serio(ptport); ++ ++ if (child->state == PSMOUSE_ACTIVATED) { ++ serio_interrupt(ptport, packet[4], 0); ++ serio_interrupt(ptport, packet[5], 0); ++ if (child->pktsize == 4) ++ serio_interrupt(ptport, packet[2], 0); ++ } + } + } + +@@ -722,6 +749,8 @@ static void synaptics_pt_create(struct psmouse *psmouse) + serio->write = synaptics_pt_write; + serio->start = synaptics_pt_start; + serio->stop = synaptics_pt_stop; ++ serio->open = synaptics_pt_open; ++ serio->close = synaptics_pt_close; + serio->parent = psmouse->ps2dev.serio; + + psmouse->pt_activate = synaptics_pt_activate; +@@ -1218,11 +1247,10 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse) + + if (SYN_CAP_PASS_THROUGH(priv->info.capabilities) && + synaptics_is_pt_packet(psmouse->packet)) { +- if (priv->pt_port) +- synaptics_pass_pt_packet(priv->pt_port, +- psmouse->packet); +- } else ++ synaptics_pass_pt_packet(priv, psmouse->packet); ++ } else { + synaptics_process_packet(psmouse); ++ } + + return PSMOUSE_FULL_PACKET; + } +diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h +index 08533d1b1b16f..4b34f13b9f761 100644 +--- a/drivers/input/mouse/synaptics.h ++++ b/drivers/input/mouse/synaptics.h +@@ -188,6 +188,7 @@ struct synaptics_data { + bool disable_gesture; /* disable gestures */ + + struct serio *pt_port; /* Pass-through serio port */ ++ bool pt_port_open; + + /* + * Last received Advanced Gesture Mode (AGM) packet. An AGM packet +-- +2.39.5 + diff --git a/queue-6.12/kvm-nvmx-defer-svi-update-to-vmcs01-on-eoi-when-l2-i.patch b/queue-6.12/kvm-nvmx-defer-svi-update-to-vmcs01-on-eoi-when-l2-i.patch new file mode 100644 index 0000000000..a67d52503f --- /dev/null +++ b/queue-6.12/kvm-nvmx-defer-svi-update-to-vmcs01-on-eoi-when-l2-i.patch @@ -0,0 +1,158 @@ +From 500cc2aceba1d8916edd8a253d146af2145cfc5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Nov 2024 16:00:10 -0800 +Subject: KVM: nVMX: Defer SVI update to vmcs01 on EOI when L2 is active w/o + VID +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chao Gao + +[ Upstream commit 04bc93cf49d16d01753b95ddb5d4f230b809a991 ] + +If KVM emulates an EOI for L1's virtual APIC while L2 is active, defer +updating GUEST_INTERUPT_STATUS.SVI, i.e. the VMCS's cache of the highest +in-service IRQ, until L1 is active, as vmcs01, not vmcs02, needs to track +vISR. The missed SVI update for vmcs01 can result in L1 interrupts being +incorrectly blocked, e.g. if there is a pending interrupt with lower +priority than the interrupt that was EOI'd. + +This bug only affects use cases where L1's vAPIC is effectively passed +through to L2, e.g. in a pKVM scenario where L2 is L1's depriveleged host, +as KVM will only emulate an EOI for L1's vAPIC if Virtual Interrupt +Delivery (VID) is disabled in vmc12, and L1 isn't intercepting L2 accesses +to its (virtual) APIC page (or if x2APIC is enabled, the EOI MSR). + +WARN() if KVM updates L1's ISR while L2 is active with VID enabled, as an +EOI from L2 is supposed to affect L2's vAPIC, but still defer the update, +to try to keep L1 alive. Specifically, KVM forwards all APICv-related +VM-Exits to L1 via nested_vmx_l1_wants_exit(): + + case EXIT_REASON_APIC_ACCESS: + case EXIT_REASON_APIC_WRITE: + case EXIT_REASON_EOI_INDUCED: + /* + * The controls for "virtualize APIC accesses," "APIC- + * register virtualization," and "virtual-interrupt + * delivery" only come from vmcs12. + */ + return true; + +Fixes: c7c9c56ca26f ("x86, apicv: add virtual interrupt delivery support") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/kvm/20230312180048.1778187-1-jason.cj.chen@intel.com +Reported-by: Markku Ahvenjärvi +Closes: https://lore.kernel.org/all/20240920080012.74405-1-mankku@gmail.com +Cc: Janne Karhunen +Signed-off-by: Chao Gao +[sean: drop request, handle in VMX, write changelog] +Tested-by: Chao Gao +Link: https://lore.kernel.org/r/20241128000010.4051275-3-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/lapic.c | 11 +++++++++++ + arch/x86/kvm/lapic.h | 1 + + arch/x86/kvm/vmx/nested.c | 5 +++++ + arch/x86/kvm/vmx/vmx.c | 21 +++++++++++++++++++++ + arch/x86/kvm/vmx/vmx.h | 1 + + 5 files changed, 39 insertions(+) + +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index 375bbb9600d3c..1a8148dec4afe 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -816,6 +816,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) + } + } + ++void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_lapic *apic = vcpu->arch.apic; ++ ++ if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) ++ return; ++ ++ kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); ++} ++EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); ++ + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) + { + /* This may race with setting of irr in __apic_accept_irq() and +diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h +index fc4bd36d44cfc..3aa599db77968 100644 +--- a/arch/x86/kvm/lapic.h ++++ b/arch/x86/kvm/lapic.h +@@ -120,6 +120,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); + int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); + int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); ++void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); + + u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index 931a7361c30f2..22bee8a711442 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -5043,6 +5043,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } + ++ if (vmx->nested.update_vmcs01_hwapic_isr) { ++ vmx->nested.update_vmcs01_hwapic_isr = false; ++ kvm_apic_update_hwapic_isr(vcpu); ++ } ++ + if ((vm_exit_reason != -1) && + (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx))) + vmx->nested.need_vmcs12_to_shadow_sync = true; +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index f06d443ec3c68..1af30e3472cdd 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6858,6 +6858,27 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) + u16 status; + u8 old; + ++ /* ++ * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI ++ * is only relevant for if and only if Virtual Interrupt Delivery is ++ * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's ++ * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested ++ * VM-Exit, otherwise L1 with run with a stale SVI. ++ */ ++ if (is_guest_mode(vcpu)) { ++ /* ++ * KVM is supposed to forward intercepted L2 EOIs to L1 if VID ++ * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. ++ * Note, userspace can stuff state while L2 is active; assert ++ * that VID is disabled if and only if the vCPU is in KVM_RUN ++ * to avoid false positives if userspace is setting APIC state. ++ */ ++ WARN_ON_ONCE(vcpu->wants_to_run && ++ nested_cpu_has_vid(get_vmcs12(vcpu))); ++ to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; ++ return; ++ } ++ + if (max_isr == -1) + max_isr = 0; + +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 2325f773a20be..41bf59bbc6426 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -176,6 +176,7 @@ struct nested_vmx { + bool reload_vmcs01_apic_access_page; + bool update_vmcs01_cpu_dirty_logging; + bool update_vmcs01_apicv_status; ++ bool update_vmcs01_hwapic_isr; + + /* + * Enlightened VMCS has been enabled. It does not mean that L1 has to +-- +2.39.5 + diff --git a/queue-6.12/kvm-x86-get-vcpu-arch.apic_base-directly-and-drop-kv.patch b/queue-6.12/kvm-x86-get-vcpu-arch.apic_base-directly-and-drop-kv.patch new file mode 100644 index 0000000000..8c511edde2 --- /dev/null +++ b/queue-6.12/kvm-x86-get-vcpu-arch.apic_base-directly-and-drop-kv.patch @@ -0,0 +1,90 @@ +From 0589a65dcc94f80629bb99a5e0dfc766b357cf44 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Nov 2024 11:35:49 -0700 +Subject: KVM: x86: Get vcpu->arch.apic_base directly and drop + kvm_get_apic_base() + +From: Sean Christopherson + +[ Upstream commit d91060e342a66b52d9bd64f0b123b9c306293b76 ] + +Access KVM's emulated APIC base MSR value directly instead of bouncing +through a helper, as there is no reason to add a layer of indirection, and +there are other MSRs with a "set" but no "get", e.g. EFER. + +No functional change intended. + +Reviewed-by: Kai Huang +Reviewed-by: Paolo Bonzini +Link: https://lore.kernel.org/r/20241009181742.1128779-4-seanjc@google.com +Link: https://lore.kernel.org/r/20241101183555.1794700-4-seanjc@google.com +Signed-off-by: Sean Christopherson +Stable-dep-of: 04bc93cf49d1 ("KVM: nVMX: Defer SVI update to vmcs01 on EOI when L2 is active w/o VID") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/lapic.h | 1 - + arch/x86/kvm/x86.c | 13 ++++--------- + 2 files changed, 4 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h +index 1b8ef9856422a..441abc4f4afd9 100644 +--- a/arch/x86/kvm/lapic.h ++++ b/arch/x86/kvm/lapic.h +@@ -117,7 +117,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); + void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); + +-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); + int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); + int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 0846e3af5f6c5..36bedf235340c 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -667,14 +667,9 @@ static void drop_user_return_notifiers(void) + kvm_on_user_return(&msrs->urn); + } + +-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) +-{ +- return vcpu->arch.apic_base; +-} +- + enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) + { +- return kvm_apic_mode(kvm_get_apic_base(vcpu)); ++ return kvm_apic_mode(vcpu->arch.apic_base); + } + EXPORT_SYMBOL_GPL(kvm_get_apic_mode); + +@@ -4314,7 +4309,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + msr_info->data = 1 << 24; + break; + case MSR_IA32_APICBASE: +- msr_info->data = kvm_get_apic_base(vcpu); ++ msr_info->data = vcpu->arch.apic_base; + break; + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: + return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); +@@ -10159,7 +10154,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) + + kvm_run->if_flag = kvm_x86_call(get_if_flag)(vcpu); + kvm_run->cr8 = kvm_get_cr8(vcpu); +- kvm_run->apic_base = kvm_get_apic_base(vcpu); ++ kvm_run->apic_base = vcpu->arch.apic_base; + + kvm_run->ready_for_interrupt_injection = + pic_in_kernel(vcpu->kvm) || +@@ -11718,7 +11713,7 @@ static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) + sregs->cr4 = kvm_read_cr4(vcpu); + sregs->cr8 = kvm_get_cr8(vcpu); + sregs->efer = vcpu->arch.efer; +- sregs->apic_base = kvm_get_apic_base(vcpu); ++ sregs->apic_base = vcpu->arch.apic_base; + } + + static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +-- +2.39.5 + diff --git a/queue-6.12/kvm-x86-inline-kvm_get_apic_mode-in-lapic.h.patch b/queue-6.12/kvm-x86-inline-kvm_get_apic_mode-in-lapic.h.patch new file mode 100644 index 0000000000..ab5a87159a --- /dev/null +++ b/queue-6.12/kvm-x86-inline-kvm_get_apic_mode-in-lapic.h.patch @@ -0,0 +1,71 @@ +From 2b1009e4a0c08036eaf5f9b83ed0a37c1ec4410f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Nov 2024 11:35:50 -0700 +Subject: KVM: x86: Inline kvm_get_apic_mode() in lapic.h + +From: Sean Christopherson + +[ Upstream commit adfec1f4591cf8c69664104eaf41e06b2e7b767e ] + +Inline kvm_get_apic_mode() in lapic.h to avoid a CALL+RET as well as an +export. The underlying kvm_apic_mode() helper is public information, i.e. +there is no state/information that needs to be hidden from vendor modules. + +No functional change intended. + +Reviewed-by: Kai Huang +Reviewed-by: Paolo Bonzini +Link: https://lore.kernel.org/r/20241009181742.1128779-5-seanjc@google.com +Link: https://lore.kernel.org/r/20241101183555.1794700-5-seanjc@google.com +Signed-off-by: Sean Christopherson +Stable-dep-of: 04bc93cf49d1 ("KVM: nVMX: Defer SVI update to vmcs01 on EOI when L2 is active w/o VID") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/lapic.h | 6 +++++- + arch/x86/kvm/x86.c | 6 ------ + 2 files changed, 5 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h +index 441abc4f4afd9..fc4bd36d44cfc 100644 +--- a/arch/x86/kvm/lapic.h ++++ b/arch/x86/kvm/lapic.h +@@ -120,7 +120,6 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); + int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); + int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +-enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); + + u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); +@@ -270,6 +269,11 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base) + return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + } + ++static inline enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) ++{ ++ return kvm_apic_mode(vcpu->arch.apic_base); ++} ++ + static inline u8 kvm_xapic_id(struct kvm_lapic *apic) + { + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 36bedf235340c..b67a2f46e40b0 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -667,12 +667,6 @@ static void drop_user_return_notifiers(void) + kvm_on_user_return(&msrs->urn); + } + +-enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) +-{ +- return kvm_apic_mode(vcpu->arch.apic_base); +-} +-EXPORT_SYMBOL_GPL(kvm_get_apic_mode); +- + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + { + enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); +-- +2.39.5 + diff --git a/queue-6.12/net-add-non-rcu-dev_getbyhwaddr-helper.patch b/queue-6.12/net-add-non-rcu-dev_getbyhwaddr-helper.patch new file mode 100644 index 0000000000..f6689867b1 --- /dev/null +++ b/queue-6.12/net-add-non-rcu-dev_getbyhwaddr-helper.patch @@ -0,0 +1,118 @@ +From ec9c532651d73baa333fd6fa9361c0797c9a4ef6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 05:49:30 -0800 +Subject: net: Add non-RCU dev_getbyhwaddr() helper + +From: Breno Leitao + +[ Upstream commit 4b5a28b38c4a0106c64416a1b2042405166b26ce ] + +Add dedicated helper for finding devices by hardware address when +holding rtnl_lock, similar to existing dev_getbyhwaddr_rcu(). This prevents +PROVE_LOCKING warnings when rtnl_lock is held but RCU read lock is not. + +Extract common address comparison logic into dev_addr_cmp(). + +The context about this change could be found in the following +discussion: + +Link: https://lore.kernel.org/all/20250206-scarlet-ermine-of-improvement-1fcac5@leitao/ + +Cc: kuniyu@amazon.com +Cc: ushankar@purestorage.com +Suggested-by: Eric Dumazet +Signed-off-by: Breno Leitao +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-1-d3d6892db9e1@debian.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 4eae0ee0f1e6 ("arp: switch to dev_getbyhwaddr() in arp_req_set_public()") +Signed-off-by: Sasha Levin +--- + include/linux/netdevice.h | 2 ++ + net/core/dev.c | 37 ++++++++++++++++++++++++++++++++++--- + 2 files changed, 36 insertions(+), 3 deletions(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 4f17b786828af..35b886385f329 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3064,6 +3064,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net) + } + + int netdev_boot_setup_check(struct net_device *dev); ++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, ++ const char *hwaddr); + struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *hwaddr); + struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); +diff --git a/net/core/dev.c b/net/core/dev.c +index 2e0fe38d0e877..c761f862bc5a2 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1012,6 +1012,12 @@ int netdev_get_name(struct net *net, char *name, int ifindex) + return ret; + } + ++static bool dev_addr_cmp(struct net_device *dev, unsigned short type, ++ const char *ha) ++{ ++ return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len); ++} ++ + /** + * dev_getbyhwaddr_rcu - find a device by its hardware address + * @net: the applicable net namespace +@@ -1020,7 +1026,7 @@ int netdev_get_name(struct net *net, char *name, int ifindex) + * + * Search for an interface by MAC address. Returns NULL if the device + * is not found or a pointer to the device. +- * The caller must hold RCU or RTNL. ++ * The caller must hold RCU. + * The returned device has not had its ref count increased + * and the caller must therefore be careful about locking + * +@@ -1032,14 +1038,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + struct net_device *dev; + + for_each_netdev_rcu(net, dev) +- if (dev->type == type && +- !memcmp(dev->dev_addr, ha, dev->addr_len)) ++ if (dev_addr_cmp(dev, type, ha)) + return dev; + + return NULL; + } + EXPORT_SYMBOL(dev_getbyhwaddr_rcu); + ++/** ++ * dev_getbyhwaddr() - find a device by its hardware address ++ * @net: the applicable net namespace ++ * @type: media type of device ++ * @ha: hardware address ++ * ++ * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold ++ * rtnl_lock. ++ * ++ * Context: rtnl_lock() must be held. ++ * Return: pointer to the net_device, or NULL if not found ++ */ ++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, ++ const char *ha) ++{ ++ struct net_device *dev; ++ ++ ASSERT_RTNL(); ++ for_each_netdev(net, dev) ++ if (dev_addr_cmp(dev, type, ha)) ++ return dev; ++ ++ return NULL; ++} ++EXPORT_SYMBOL(dev_getbyhwaddr); ++ + struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) + { + struct net_device *dev, *ret = NULL; +-- +2.39.5 + diff --git a/queue-6.12/net-allow-small-head-cache-usage-with-large-max_skb_.patch b/queue-6.12/net-allow-small-head-cache-usage-with-large-max_skb_.patch new file mode 100644 index 0000000000..b91da475cf --- /dev/null +++ b/queue-6.12/net-allow-small-head-cache-usage-with-large-max_skb_.patch @@ -0,0 +1,146 @@ +From b8a54a88a0ef1d7017fdb2f29f082351e36ddb47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 19:29:39 +0100 +Subject: net: allow small head cache usage with large MAX_SKB_FRAGS values + +From: Paolo Abeni + +[ Upstream commit 14ad6ed30a10afbe91b0749d6378285f4225d482 ] + +Sabrina reported the following splat: + + WARNING: CPU: 0 PID: 1 at net/core/dev.c:6935 netif_napi_add_weight_locked+0x8f2/0xba0 + Modules linked in: + CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc1-net-00092-g011b03359038 #996 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 + RIP: 0010:netif_napi_add_weight_locked+0x8f2/0xba0 + Code: e8 c3 e6 6a fe 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc c7 44 24 10 ff ff ff ff e9 8f fb ff ff e8 9e e6 6a fe <0f> 0b e9 d3 fe ff ff e8 92 e6 6a fe 48 8b 04 24 be ff ff ff ff 48 + RSP: 0000:ffffc9000001fc60 EFLAGS: 00010293 + RAX: 0000000000000000 RBX: ffff88806ce48128 RCX: 1ffff11001664b9e + RDX: ffff888008f00040 RSI: ffffffff8317ca42 RDI: ffff88800b325cb6 + RBP: ffff88800b325c40 R08: 0000000000000001 R09: ffffed100167502c + R10: ffff88800b3a8163 R11: 0000000000000000 R12: ffff88800ac1c168 + R13: ffff88800ac1c168 R14: ffff88800ac1c168 R15: 0000000000000007 + FS: 0000000000000000(0000) GS:ffff88806ce00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffff888008201000 CR3: 0000000004c94001 CR4: 0000000000370ef0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + gro_cells_init+0x1ba/0x270 + xfrm_input_init+0x4b/0x2a0 + xfrm_init+0x38/0x50 + ip_rt_init+0x2d7/0x350 + ip_init+0xf/0x20 + inet_init+0x406/0x590 + do_one_initcall+0x9d/0x2e0 + do_initcalls+0x23b/0x280 + kernel_init_freeable+0x445/0x490 + kernel_init+0x20/0x1d0 + ret_from_fork+0x46/0x80 + ret_from_fork_asm+0x1a/0x30 + + irq event stamp: 584330 + hardirqs last enabled at (584338): [] __up_console_sem+0x77/0xb0 + hardirqs last disabled at (584345): [] __up_console_sem+0x5c/0xb0 + softirqs last enabled at (583242): [] netlink_insert+0x14d/0x470 + softirqs last disabled at (583754): [] netif_napi_add_weight_locked+0x77d/0xba0 + +on kernel built with MAX_SKB_FRAGS=45, where SKB_WITH_OVERHEAD(1024) +is smaller than GRO_MAX_HEAD. + +Such built additionally contains the revert of the single page frag cache +so that napi_get_frags() ends up using the page frag allocator, triggering +the splat. + +Note that the underlying issue is independent from the mentioned +revert; address it ensuring that the small head cache will fit either TCP +and GRO allocation and updating napi_alloc_skb() and __netdev_alloc_skb() +to select kmalloc() usage for any allocation fitting such cache. + +Reported-by: Sabrina Dubroca +Suggested-by: Eric Dumazet +Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/gro.h | 3 +++ + net/core/gro.c | 3 --- + net/core/skbuff.c | 10 +++++++--- + 3 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/include/net/gro.h b/include/net/gro.h +index b9b58c1f8d190..7b548f91754bf 100644 +--- a/include/net/gro.h ++++ b/include/net/gro.h +@@ -11,6 +11,9 @@ + #include + #include + ++/* This should be increased if a protocol with a bigger head is added. */ ++#define GRO_MAX_HEAD (MAX_HEADER + 128) ++ + struct napi_gro_cb { + union { + struct { +diff --git a/net/core/gro.c b/net/core/gro.c +index d1f44084e978f..78b320b631744 100644 +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -7,9 +7,6 @@ + + #define MAX_GRO_SKBS 8 + +-/* This should be increased if a protocol with a bigger head is added. */ +-#define GRO_MAX_HEAD (MAX_HEADER + 128) +- + static DEFINE_SPINLOCK(offload_lock); + + /** +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 74149dc4ee318..61a950f13a91c 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -69,6 +69,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -95,7 +96,9 @@ + static struct kmem_cache *skbuff_ext_cache __ro_after_init; + #endif + +-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) ++#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) ++#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ ++ GRO_MAX_HEAD_PAD)) + + /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. + * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique +@@ -736,7 +739,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ +- if (len <= SKB_WITH_OVERHEAD(1024) || ++ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); +@@ -816,7 +819,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) + * When the small frag allocator is available, prefer it over kmalloc + * for small fragments + */ +- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || ++ if ((!NAPI_HAS_SMALL_PAGE_FRAG && ++ len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, +-- +2.39.5 + diff --git a/queue-6.12/net-axienet-set-mac_managed_pm.patch b/queue-6.12/net-axienet-set-mac_managed_pm.patch new file mode 100644 index 0000000000..603afc3539 --- /dev/null +++ b/queue-6.12/net-axienet-set-mac_managed_pm.patch @@ -0,0 +1,43 @@ +From 71267ef342cf667623bbb7f1e62113a5dd513f09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 13:58:42 +0800 +Subject: net: axienet: Set mac_managed_pm + +From: Nick Hu + +[ Upstream commit a370295367b55662a32a4be92565fe72a5aa79bb ] + +The external PHY will undergo a soft reset twice during the resume process +when it wake up from suspend. The first reset occurs when the axienet +driver calls phylink_of_phy_connect(), and the second occurs when +mdio_bus_phy_resume() invokes phy_init_hw(). The second soft reset of the +external PHY does not reinitialize the internal PHY, which causes issues +with the internal PHY, resulting in the PHY link being down. To prevent +this, setting the mac_managed_pm flag skips the mdio_bus_phy_resume() +function. + +Fixes: a129b41fe0a8 ("Revert "net: phy: dp83867: perform soft reset and retain established link"") +Signed-off-by: Nick Hu +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20250217055843.19799-1-nick.hu@sifive.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +index de10a2d08c428..fe3438abcd253 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -2888,6 +2888,7 @@ static int axienet_probe(struct platform_device *pdev) + + lp->phylink_config.dev = &ndev->dev; + lp->phylink_config.type = PHYLINK_NETDEV; ++ lp->phylink_config.mac_managed_pm = true; + lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | + MAC_10FD | MAC_100FD | MAC_1000FD; + +-- +2.39.5 + diff --git a/queue-6.12/net-pse-pd-avoid-setting-max_ua-in-regulator-constra.patch b/queue-6.12/net-pse-pd-avoid-setting-max_ua-in-regulator-constra.patch new file mode 100644 index 0000000000..774598a70f --- /dev/null +++ b/queue-6.12/net-pse-pd-avoid-setting-max_ua-in-regulator-constra.patch @@ -0,0 +1,56 @@ +From 908ab0a06a1b2e0ef6ee4275c5aae369721eb243 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Jan 2025 10:40:21 +0100 +Subject: net: pse-pd: Avoid setting max_uA in regulator constraints + +From: Kory Maincent + +[ Upstream commit 675d0e3cacc3ae7c29294a5f6a820187f862ad8b ] + +Setting the max_uA constraint in the regulator API imposes a current +limit during the regulator registration process. This behavior conflicts +with preserving the maximum PI power budget configuration across reboots. + +Instead, compare the desired current limit to MAX_PI_CURRENT in the +pse_pi_set_current_limit() function to ensure proper handling of the +power budget. + +Acked-by: Oleksij Rempel +Signed-off-by: Kory Maincent +Signed-off-by: Paolo Abeni +Stable-dep-of: f6093c5ec74d ("net: pse-pd: pd692x0: Fix power limit retrieval") +Signed-off-by: Sasha Levin +--- + drivers/net/pse-pd/pse_core.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c +index 2906ce173f66c..9fee4dd53515a 100644 +--- a/drivers/net/pse-pd/pse_core.c ++++ b/drivers/net/pse-pd/pse_core.c +@@ -357,6 +357,9 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, + if (!ops->pi_set_current_limit) + return -EOPNOTSUPP; + ++ if (max_uA > MAX_PI_CURRENT) ++ return -ERANGE; ++ + id = rdev_get_id(rdev); + mutex_lock(&pcdev->lock); + ret = ops->pi_set_current_limit(pcdev, id, max_uA); +@@ -403,11 +406,9 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, + + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + +- if (pcdev->ops->pi_set_current_limit) { ++ if (pcdev->ops->pi_set_current_limit) + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; +- rinit_data->constraints.max_uA = MAX_PI_CURRENT; +- } + + rinit_data->supply_regulator = "vpwr"; + +-- +2.39.5 + diff --git a/queue-6.12/net-pse-pd-pd692x0-fix-power-limit-retrieval.patch b/queue-6.12/net-pse-pd-pd692x0-fix-power-limit-retrieval.patch new file mode 100644 index 0000000000..6169ebb34c --- /dev/null +++ b/queue-6.12/net-pse-pd-pd692x0-fix-power-limit-retrieval.patch @@ -0,0 +1,45 @@ +From 34fcb0a4ea6260c506f7fac650ca4787e2286bd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 14:48:11 +0100 +Subject: net: pse-pd: pd692x0: Fix power limit retrieval + +From: Kory Maincent + +[ Upstream commit f6093c5ec74d5cc495f89bd359253d9c738d04d9 ] + +Fix incorrect data offset read in the pd692x0_pi_get_pw_limit callback. +The issue was previously unnoticed as it was only used by the regulator +API and not thoroughly tested, since the PSE is mainly controlled via +ethtool. + +The function became actively used by ethtool after commit 3e9dbfec4998 +("net: pse-pd: Split ethtool_get_status into multiple callbacks"), +which led to the discovery of this issue. + +Fix it by using the correct data offset. + +Fixes: a87e699c9d33 ("net: pse-pd: pd692x0: Enhance with new current limit and voltage read callbacks") +Signed-off-by: Kory Maincent +Link: https://patch.msgid.link/20250217134812.1925345-1-kory.maincent@bootlin.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/pse-pd/pd692x0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c +index 9f00538f7e450..7cfc36cadb576 100644 +--- a/drivers/net/pse-pd/pd692x0.c ++++ b/drivers/net/pse-pd/pd692x0.c +@@ -1012,7 +1012,7 @@ static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev, + if (ret < 0) + return ret; + +- return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]); ++ return pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]); + } + + static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev, +-- +2.39.5 + diff --git a/queue-6.12/net-pse-pd-use-power-limit-at-driver-side-instead-of.patch b/queue-6.12/net-pse-pd-use-power-limit-at-driver-side-instead-of.patch new file mode 100644 index 0000000000..6d2a99ac2c --- /dev/null +++ b/queue-6.12/net-pse-pd-use-power-limit-at-driver-side-instead-of.patch @@ -0,0 +1,311 @@ +From 83a014e0e1cb5c4e2960c0f0587c82964776c07a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Jan 2025 10:40:26 +0100 +Subject: net: pse-pd: Use power limit at driver side instead of current limit + +From: Kory Maincent + +[ Upstream commit e0a5e2bba38aa61a900934b45d6e846e0a6d7524 ] + +The regulator framework uses current limits, but the PSE standard and +known PSE controllers rely on power limits. Instead of converting +current to power within each driver, perform the conversion in the PSE +core. This avoids redundancy in driver implementation and aligns better +with the standard, simplifying driver development. + +Remove at the same time the _pse_ethtool_get_status() function which is +not needed anymore. + +Acked-by: Oleksij Rempel +Signed-off-by: Kory Maincent +Signed-off-by: Paolo Abeni +Stable-dep-of: f6093c5ec74d ("net: pse-pd: pd692x0: Fix power limit retrieval") +Signed-off-by: Sasha Levin +--- + drivers/net/pse-pd/pd692x0.c | 45 ++++------------- + drivers/net/pse-pd/pse_core.c | 91 ++++++++++++++++------------------- + include/linux/pse-pd/pse.h | 16 +++--- + 3 files changed, 57 insertions(+), 95 deletions(-) + +diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c +index 0af7db80b2f88..9f00538f7e450 100644 +--- a/drivers/net/pse-pd/pd692x0.c ++++ b/drivers/net/pse-pd/pd692x0.c +@@ -999,13 +999,12 @@ static int pd692x0_pi_get_voltage(struct pse_controller_dev *pcdev, int id) + return (buf.sub[0] << 8 | buf.sub[1]) * 100000; + } + +-static int pd692x0_pi_get_current_limit(struct pse_controller_dev *pcdev, +- int id) ++static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev, ++ int id) + { + struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); + struct pd692x0_msg msg, buf = {0}; +- int mW, uV, uA, ret; +- s64 tmp_64; ++ int ret; + + msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_PARAM]; + msg.sub[2] = id; +@@ -1013,48 +1012,24 @@ static int pd692x0_pi_get_current_limit(struct pse_controller_dev *pcdev, + if (ret < 0) + return ret; + +- ret = pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]); +- if (ret < 0) +- return ret; +- mW = ret; +- +- ret = pd692x0_pi_get_voltage(pcdev, id); +- if (ret < 0) +- return ret; +- uV = ret; +- +- tmp_64 = mW; +- tmp_64 *= 1000000000ull; +- /* uA = mW * 1000000000 / uV */ +- uA = DIV_ROUND_CLOSEST_ULL(tmp_64, uV); +- return uA; ++ return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]); + } + +-static int pd692x0_pi_set_current_limit(struct pse_controller_dev *pcdev, +- int id, int max_uA) ++static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev, ++ int id, int max_mW) + { + struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); + struct device *dev = &priv->client->dev; + struct pd692x0_msg msg, buf = {0}; +- int uV, ret, mW; +- s64 tmp_64; ++ int ret; + + ret = pd692x0_fw_unavailable(priv); + if (ret) + return ret; + +- ret = pd692x0_pi_get_voltage(pcdev, id); +- if (ret < 0) +- return ret; +- uV = ret; +- + msg = pd692x0_msg_template_list[PD692X0_MSG_SET_PORT_PARAM]; + msg.sub[2] = id; +- tmp_64 = uV; +- tmp_64 *= max_uA; +- /* mW = uV * uA / 1000000000 */ +- mW = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000); +- ret = pd692x0_pi_set_pw_from_table(dev, &msg, mW); ++ ret = pd692x0_pi_set_pw_from_table(dev, &msg, max_mW); + if (ret) + return ret; + +@@ -1068,8 +1043,8 @@ static const struct pse_controller_ops pd692x0_ops = { + .pi_disable = pd692x0_pi_disable, + .pi_is_enabled = pd692x0_pi_is_enabled, + .pi_get_voltage = pd692x0_pi_get_voltage, +- .pi_get_current_limit = pd692x0_pi_get_current_limit, +- .pi_set_current_limit = pd692x0_pi_set_current_limit, ++ .pi_get_pw_limit = pd692x0_pi_get_pw_limit, ++ .pi_set_pw_limit = pd692x0_pi_set_pw_limit, + }; + + #define PD692X0_FW_LINE_MAX_SZ 0xff +diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c +index 9fee4dd53515a..4c5abef9e94ee 100644 +--- a/drivers/net/pse-pd/pse_core.c ++++ b/drivers/net/pse-pd/pse_core.c +@@ -291,33 +291,25 @@ static int pse_pi_get_voltage(struct regulator_dev *rdev) + return ret; + } + +-static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev, +- int id, +- struct netlink_ext_ack *extack, +- struct pse_control_status *status); +- + static int pse_pi_get_current_limit(struct regulator_dev *rdev) + { + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + const struct pse_controller_ops *ops; +- struct netlink_ext_ack extack = {}; +- struct pse_control_status st = {}; +- int id, uV, ret; ++ int id, uV, mW, ret; + s64 tmp_64; + + ops = pcdev->ops; + id = rdev_get_id(rdev); ++ if (!ops->pi_get_pw_limit || !ops->pi_get_voltage) ++ return -EOPNOTSUPP; ++ + mutex_lock(&pcdev->lock); +- if (ops->pi_get_current_limit) { +- ret = ops->pi_get_current_limit(pcdev, id); ++ ret = ops->pi_get_pw_limit(pcdev, id); ++ if (ret < 0) + goto out; +- } ++ mW = ret; + +- /* If pi_get_current_limit() callback not populated get voltage +- * from pi_get_voltage() and power limit from ethtool_get_status() +- * to calculate current limit. +- */ +- ret = _pse_pi_get_voltage(rdev); ++ ret = pse_pi_get_voltage(rdev); + if (!ret) { + dev_err(pcdev->dev, "Voltage null\n"); + ret = -ERANGE; +@@ -327,16 +319,7 @@ static int pse_pi_get_current_limit(struct regulator_dev *rdev) + goto out; + uV = ret; + +- ret = _pse_ethtool_get_status(pcdev, id, &extack, &st); +- if (ret) +- goto out; +- +- if (!st.c33_avail_pw_limit) { +- ret = -ENODATA; +- goto out; +- } +- +- tmp_64 = st.c33_avail_pw_limit; ++ tmp_64 = mW; + tmp_64 *= 1000000000ull; + /* uA = mW * 1000000000 / uV */ + ret = DIV_ROUND_CLOSEST_ULL(tmp_64, uV); +@@ -351,10 +334,11 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, + { + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + const struct pse_controller_ops *ops; +- int id, ret; ++ int id, mW, ret; ++ s64 tmp_64; + + ops = pcdev->ops; +- if (!ops->pi_set_current_limit) ++ if (!ops->pi_set_pw_limit || !ops->pi_get_voltage) + return -EOPNOTSUPP; + + if (max_uA > MAX_PI_CURRENT) +@@ -362,7 +346,21 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, + + id = rdev_get_id(rdev); + mutex_lock(&pcdev->lock); +- ret = ops->pi_set_current_limit(pcdev, id, max_uA); ++ ret = pse_pi_get_voltage(rdev); ++ if (!ret) { ++ dev_err(pcdev->dev, "Voltage null\n"); ++ ret = -ERANGE; ++ goto out; ++ } ++ if (ret < 0) ++ goto out; ++ ++ tmp_64 = ret; ++ tmp_64 *= max_uA; ++ /* mW = uA * uV / 1000000000 */ ++ mW = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000); ++ ret = ops->pi_set_pw_limit(pcdev, id, mW); ++out: + mutex_unlock(&pcdev->lock); + + return ret; +@@ -406,7 +404,7 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, + + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + +- if (pcdev->ops->pi_set_current_limit) ++ if (pcdev->ops->pi_set_pw_limit) + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; + +@@ -737,23 +735,6 @@ struct pse_control *of_pse_control_get(struct device_node *node) + } + EXPORT_SYMBOL_GPL(of_pse_control_get); + +-static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev, +- int id, +- struct netlink_ext_ack *extack, +- struct pse_control_status *status) +-{ +- const struct pse_controller_ops *ops; +- +- ops = pcdev->ops; +- if (!ops->ethtool_get_status) { +- NL_SET_ERR_MSG(extack, +- "PSE driver does not support status report"); +- return -EOPNOTSUPP; +- } +- +- return ops->ethtool_get_status(pcdev, id, extack, status); +-} +- + /** + * pse_ethtool_get_status - get status of PSE control + * @psec: PSE control pointer +@@ -766,11 +747,21 @@ int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status) + { ++ const struct pse_controller_ops *ops; ++ struct pse_controller_dev *pcdev; + int err; + +- mutex_lock(&psec->pcdev->lock); +- err = _pse_ethtool_get_status(psec->pcdev, psec->id, extack, status); +- mutex_unlock(&psec->pcdev->lock); ++ pcdev = psec->pcdev; ++ ops = pcdev->ops; ++ if (!ops->ethtool_get_status) { ++ NL_SET_ERR_MSG(extack, ++ "PSE driver does not support status report"); ++ return -EOPNOTSUPP; ++ } ++ ++ mutex_lock(&pcdev->lock); ++ err = ops->ethtool_get_status(pcdev, psec->id, extack, status); ++ mutex_unlock(&pcdev->lock); + + return err; + } +diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h +index 591a53e082e65..df1592022d938 100644 +--- a/include/linux/pse-pd/pse.h ++++ b/include/linux/pse-pd/pse.h +@@ -75,12 +75,8 @@ struct pse_control_status { + * @pi_disable: Configure the PSE PI as disabled. + * @pi_get_voltage: Return voltage similarly to get_voltage regulator + * callback. +- * @pi_get_current_limit: Get the configured current limit similarly to +- * get_current_limit regulator callback. +- * @pi_set_current_limit: Configure the current limit similarly to +- * set_current_limit regulator callback. +- * Should not return an error in case of MAX_PI_CURRENT +- * current value set. ++ * @pi_get_pw_limit: Get the configured power limit of the PSE PI. ++ * @pi_set_pw_limit: Configure the power limit of the PSE PI. + */ + struct pse_controller_ops { + int (*ethtool_get_status)(struct pse_controller_dev *pcdev, +@@ -91,10 +87,10 @@ struct pse_controller_ops { + int (*pi_enable)(struct pse_controller_dev *pcdev, int id); + int (*pi_disable)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id); +- int (*pi_get_current_limit)(struct pse_controller_dev *pcdev, +- int id); +- int (*pi_set_current_limit)(struct pse_controller_dev *pcdev, +- int id, int max_uA); ++ int (*pi_get_pw_limit)(struct pse_controller_dev *pcdev, ++ int id); ++ int (*pi_set_pw_limit)(struct pse_controller_dev *pcdev, ++ int id, int max_mW); + }; + + struct module; +-- +2.39.5 + diff --git a/queue-6.12/net-sched-cls_api-fix-error-handling-causing-null-de.patch b/queue-6.12/net-sched-cls_api-fix-error-handling-causing-null-de.patch new file mode 100644 index 0000000000..ec28ab51da --- /dev/null +++ b/queue-6.12/net-sched-cls_api-fix-error-handling-causing-null-de.patch @@ -0,0 +1,54 @@ +From 1467432376c47eb8f7d571c552346e263aad4c3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 23:36:10 +0100 +Subject: net/sched: cls_api: fix error handling causing NULL dereference + +From: Pierre Riteau + +[ Upstream commit 071ed42cff4fcdd89025d966d48eabef59913bf2 ] + +tcf_exts_miss_cookie_base_alloc() calls xa_alloc_cyclic() which can +return 1 if the allocation succeeded after wrapping. This was treated as +an error, with value 1 returned to caller tcf_exts_init_ex() which sets +exts->actions to NULL and returns 1 to caller fl_change(). + +fl_change() treats err == 1 as success, calling tcf_exts_validate_ex() +which calls tcf_action_init() with exts->actions as argument, where it +is dereferenced. + +Example trace: + +BUG: kernel NULL pointer dereference, address: 0000000000000000 +CPU: 114 PID: 16151 Comm: handler114 Kdump: loaded Not tainted 5.14.0-503.16.1.el9_5.x86_64 #1 +RIP: 0010:tcf_action_init+0x1f8/0x2c0 +Call Trace: + tcf_action_init+0x1f8/0x2c0 + tcf_exts_validate_ex+0x175/0x190 + fl_change+0x537/0x1120 [cls_flower] + +Fixes: 80cd22c35c90 ("net/sched: cls_api: Support hardware miss to tc action") +Signed-off-by: Pierre Riteau +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/20250213223610.320278-1-pierre@stackhpc.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c +index dfa3067084948..998ea3b5badfc 100644 +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, + + err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, + n, xa_limit_32b, &next, GFP_KERNEL); +- if (err) ++ if (err < 0) + goto err_xa_alloc; + + exts->miss_cookie_node = n; +-- +2.39.5 + diff --git a/queue-6.12/pci-export-pci_intx_unmanaged-and-pcim_intx.patch b/queue-6.12/pci-export-pci_intx_unmanaged-and-pcim_intx.patch new file mode 100644 index 0000000000..d94fadc4ed --- /dev/null +++ b/queue-6.12/pci-export-pci_intx_unmanaged-and-pcim_intx.patch @@ -0,0 +1,147 @@ +From 153cbf56ef7ce1288abf905b43b3fb96e53e464d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 14:06:23 +0100 +Subject: PCI: Export pci_intx_unmanaged() and pcim_intx() + +From: Philipp Stanner + +[ Upstream commit f546e8033d8f3e45d49622f04ca2fde650b80f6d ] + +pci_intx() is a hybrid function which sometimes performs devres operations, +depending on whether pcim_enable_device() has been used to enable the +pci_dev. This sometimes-managed nature of the function is problematic. +Notably, it causes the function to allocate under some circumstances which +makes it unusable from interrupt context. + +Export pcim_intx() (which is always managed) and rename __pcim_intx() +(which is never managed) to pci_intx_unmanaged() and export it as well. + +Then all callers of pci_intx() can be ported to the version they need, +depending whether they use pci_enable_device() or pcim_enable_device(). + +Link: https://lore.kernel.org/r/20241209130632.132074-3-pstanner@redhat.com +Signed-off-by: Philipp Stanner +[bhelgaas: commit log] +Signed-off-by: Bjorn Helgaas +Reviewed-by: Damien Le Moal +Stable-dep-of: d555ed45a5a1 ("PCI: Restore original INTX_DISABLE bit by pcim_intx()") +Signed-off-by: Sasha Levin +--- + drivers/pci/devres.c | 24 +++--------------------- + drivers/pci/pci.c | 29 +++++++++++++++++++++++++++++ + include/linux/pci.h | 2 ++ + 3 files changed, 34 insertions(+), 21 deletions(-) + +diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c +index 2a64da5c91fb9..c3699105656a7 100644 +--- a/drivers/pci/devres.c ++++ b/drivers/pci/devres.c +@@ -411,31 +411,12 @@ static inline bool mask_contains_bar(int mask, int bar) + return mask & BIT(bar); + } + +-/* +- * This is a copy of pci_intx() used to bypass the problem of recursive +- * function calls due to the hybrid nature of pci_intx(). +- */ +-static void __pcim_intx(struct pci_dev *pdev, int enable) +-{ +- u16 pci_command, new; +- +- pci_read_config_word(pdev, PCI_COMMAND, &pci_command); +- +- if (enable) +- new = pci_command & ~PCI_COMMAND_INTX_DISABLE; +- else +- new = pci_command | PCI_COMMAND_INTX_DISABLE; +- +- if (new != pci_command) +- pci_write_config_word(pdev, PCI_COMMAND, new); +-} +- + static void pcim_intx_restore(struct device *dev, void *data) + { + struct pci_dev *pdev = to_pci_dev(dev); + struct pcim_intx_devres *res = data; + +- __pcim_intx(pdev, res->orig_intx); ++ pci_intx_unmanaged(pdev, res->orig_intx); + } + + static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) +@@ -472,10 +453,11 @@ int pcim_intx(struct pci_dev *pdev, int enable) + return -ENOMEM; + + res->orig_intx = !enable; +- __pcim_intx(pdev, enable); ++ pci_intx_unmanaged(pdev, enable); + + return 0; + } ++EXPORT_SYMBOL_GPL(pcim_intx); + + static void pcim_disable_device(void *pdev_raw) + { +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index dd3c6dcb47ae4..3916e0b23cdaf 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -4480,6 +4480,35 @@ void pci_disable_parity(struct pci_dev *dev) + } + } + ++/** ++ * pci_intx_unmanaged - enables/disables PCI INTx for device dev, ++ * unmanaged version ++ * @pdev: the PCI device to operate on ++ * @enable: boolean: whether to enable or disable PCI INTx ++ * ++ * Enables/disables PCI INTx for device @pdev ++ * ++ * This function behavios identically to pci_intx(), but is never managed with ++ * devres. ++ */ ++void pci_intx_unmanaged(struct pci_dev *pdev, int enable) ++{ ++ u16 pci_command, new; ++ ++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command); ++ ++ if (enable) ++ new = pci_command & ~PCI_COMMAND_INTX_DISABLE; ++ else ++ new = pci_command | PCI_COMMAND_INTX_DISABLE; ++ ++ if (new == pci_command) ++ return; ++ ++ pci_write_config_word(pdev, PCI_COMMAND, new); ++} ++EXPORT_SYMBOL_GPL(pci_intx_unmanaged); ++ + /** + * pci_intx - enables/disables PCI INTx for device dev + * @pdev: the PCI device to operate on +diff --git a/include/linux/pci.h b/include/linux/pci.h +index 000965a713edf..6ef32a8d146b1 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -1353,6 +1353,7 @@ int __must_check pcim_set_mwi(struct pci_dev *dev); + int pci_try_set_mwi(struct pci_dev *dev); + void pci_clear_mwi(struct pci_dev *dev); + void pci_disable_parity(struct pci_dev *dev); ++void pci_intx_unmanaged(struct pci_dev *pdev, int enable); + void pci_intx(struct pci_dev *dev, int enable); + bool pci_check_and_mask_intx(struct pci_dev *dev); + bool pci_check_and_unmask_intx(struct pci_dev *dev); +@@ -2293,6 +2294,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, + struct pci_dev *dev) { } + #endif + ++int pcim_intx(struct pci_dev *pdev, int enabled); + int pcim_request_all_regions(struct pci_dev *pdev, const char *name); + void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); + void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, +-- +2.39.5 + diff --git a/queue-6.12/pci-make-pcim_request_all_regions-a-public-function.patch b/queue-6.12/pci-make-pcim_request_all_regions-a-public-function.patch new file mode 100644 index 0000000000..e0f3ae8a3f --- /dev/null +++ b/queue-6.12/pci-make-pcim_request_all_regions-a-public-function.patch @@ -0,0 +1,66 @@ +From d6e8e5ea0e9129df2c32812c53a8d11d08d962a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 12:27:34 +0100 +Subject: PCI: Make pcim_request_all_regions() a public function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Philipp Stanner + +[ Upstream commit d9d959c36bec59f11c0eb6b5308729e3c4901b5e ] + +In order to remove the deprecated function +pcim_iomap_regions_request_all(), a few drivers need an interface to +request all BARs a PCI device offers. + +Make pcim_request_all_regions() a public interface. + +Link: https://lore.kernel.org/r/20241030112743.104395-2-pstanner@redhat.com +Signed-off-by: Philipp Stanner +Signed-off-by: Bjorn Helgaas +Reviewed-by: Damien Le Moal +Reviewed-by: Ilpo Järvinen +Stable-dep-of: d555ed45a5a1 ("PCI: Restore original INTX_DISABLE bit by pcim_intx()") +Signed-off-by: Sasha Levin +--- + drivers/pci/devres.c | 3 ++- + include/linux/pci.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c +index b133967faef84..2a64da5c91fb9 100644 +--- a/drivers/pci/devres.c ++++ b/drivers/pci/devres.c +@@ -939,7 +939,7 @@ static void pcim_release_all_regions(struct pci_dev *pdev) + * desired, release individual regions with pcim_release_region() or all of + * them at once with pcim_release_all_regions(). + */ +-static int pcim_request_all_regions(struct pci_dev *pdev, const char *name) ++int pcim_request_all_regions(struct pci_dev *pdev, const char *name) + { + int ret; + int bar; +@@ -957,6 +957,7 @@ static int pcim_request_all_regions(struct pci_dev *pdev, const char *name) + + return ret; + } ++EXPORT_SYMBOL(pcim_request_all_regions); + + /** + * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones +diff --git a/include/linux/pci.h b/include/linux/pci.h +index 4e77c4230c0a1..000965a713edf 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -2293,6 +2293,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, + struct pci_dev *dev) { } + #endif + ++int pcim_request_all_regions(struct pci_dev *pdev, const char *name); + void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); + void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, + const char *name); +-- +2.39.5 + diff --git a/queue-6.12/pci-remove-devres-from-pci_intx.patch b/queue-6.12/pci-remove-devres-from-pci_intx.patch new file mode 100644 index 0000000000..d9e67bb206 --- /dev/null +++ b/queue-6.12/pci-remove-devres-from-pci_intx.patch @@ -0,0 +1,137 @@ +From 0af882dfedb2f1b98cfece017e98031e38a761d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 14:06:33 +0100 +Subject: PCI: Remove devres from pci_intx() + +From: Philipp Stanner + +[ Upstream commit dfa2f4d5f9e5d757700cefa8ee480099889f1c69 ] + +pci_intx() is a hybrid function which can sometimes be managed through +devres. This hybrid nature is undesirable. + +Since all users of pci_intx() have by now been ported either to +always-managed pcim_intx() or never-managed pci_intx_unmanaged(), the +devres functionality can be removed from pci_intx(). + +Consequently, pci_intx_unmanaged() is now redundant, because pci_intx() +itself is now unmanaged. + +Remove the devres functionality from pci_intx(). Have all users of +pci_intx_unmanaged() call pci_intx(). Remove pci_intx_unmanaged(). + +Link: https://lore.kernel.org/r/20241209130632.132074-13-pstanner@redhat.com +Signed-off-by: Philipp Stanner +Signed-off-by: Bjorn Helgaas +Acked-by: Paolo Abeni +Stable-dep-of: d555ed45a5a1 ("PCI: Restore original INTX_DISABLE bit by pcim_intx()") +Signed-off-by: Sasha Levin +--- + drivers/pci/devres.c | 4 ++-- + drivers/pci/pci.c | 43 ++----------------------------------------- + include/linux/pci.h | 1 - + 3 files changed, 4 insertions(+), 44 deletions(-) + +diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c +index c3699105656a7..70f1a46d07c5e 100644 +--- a/drivers/pci/devres.c ++++ b/drivers/pci/devres.c +@@ -416,7 +416,7 @@ static void pcim_intx_restore(struct device *dev, void *data) + struct pci_dev *pdev = to_pci_dev(dev); + struct pcim_intx_devres *res = data; + +- pci_intx_unmanaged(pdev, res->orig_intx); ++ pci_intx(pdev, res->orig_intx); + } + + static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) +@@ -453,7 +453,7 @@ int pcim_intx(struct pci_dev *pdev, int enable) + return -ENOMEM; + + res->orig_intx = !enable; +- pci_intx_unmanaged(pdev, enable); ++ pci_intx(pdev, enable); + + return 0; + } +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index 3916e0b23cdaf..1aa5d6f98ebda 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -4481,17 +4481,13 @@ void pci_disable_parity(struct pci_dev *dev) + } + + /** +- * pci_intx_unmanaged - enables/disables PCI INTx for device dev, +- * unmanaged version ++ * pci_intx - enables/disables PCI INTx for device dev + * @pdev: the PCI device to operate on + * @enable: boolean: whether to enable or disable PCI INTx + * + * Enables/disables PCI INTx for device @pdev +- * +- * This function behavios identically to pci_intx(), but is never managed with +- * devres. + */ +-void pci_intx_unmanaged(struct pci_dev *pdev, int enable) ++void pci_intx(struct pci_dev *pdev, int enable) + { + u16 pci_command, new; + +@@ -4507,41 +4503,6 @@ void pci_intx_unmanaged(struct pci_dev *pdev, int enable) + + pci_write_config_word(pdev, PCI_COMMAND, new); + } +-EXPORT_SYMBOL_GPL(pci_intx_unmanaged); +- +-/** +- * pci_intx - enables/disables PCI INTx for device dev +- * @pdev: the PCI device to operate on +- * @enable: boolean: whether to enable or disable PCI INTx +- * +- * Enables/disables PCI INTx for device @pdev +- * +- * NOTE: +- * This is a "hybrid" function: It's normally unmanaged, but becomes managed +- * when pcim_enable_device() has been called in advance. This hybrid feature is +- * DEPRECATED! If you want managed cleanup, use pcim_intx() instead. +- */ +-void pci_intx(struct pci_dev *pdev, int enable) +-{ +- u16 pci_command, new; +- +- pci_read_config_word(pdev, PCI_COMMAND, &pci_command); +- +- if (enable) +- new = pci_command & ~PCI_COMMAND_INTX_DISABLE; +- else +- new = pci_command | PCI_COMMAND_INTX_DISABLE; +- +- if (new != pci_command) { +- /* Preserve the "hybrid" behavior for backwards compatibility */ +- if (pci_is_managed(pdev)) { +- WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); +- return; +- } +- +- pci_write_config_word(pdev, PCI_COMMAND, new); +- } +-} + EXPORT_SYMBOL_GPL(pci_intx); + + /** +diff --git a/include/linux/pci.h b/include/linux/pci.h +index 6ef32a8d146b1..74114acbb07fb 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -1353,7 +1353,6 @@ int __must_check pcim_set_mwi(struct pci_dev *dev); + int pci_try_set_mwi(struct pci_dev *dev); + void pci_clear_mwi(struct pci_dev *dev); + void pci_disable_parity(struct pci_dev *dev); +-void pci_intx_unmanaged(struct pci_dev *pdev, int enable); + void pci_intx(struct pci_dev *dev, int enable); + bool pci_check_and_mask_intx(struct pci_dev *dev); + bool pci_check_and_unmask_intx(struct pci_dev *dev); +-- +2.39.5 + diff --git a/queue-6.12/pci-restore-original-intx_disable-bit-by-pcim_intx.patch b/queue-6.12/pci-restore-original-intx_disable-bit-by-pcim_intx.patch new file mode 100644 index 0000000000..04a4232179 --- /dev/null +++ b/queue-6.12/pci-restore-original-intx_disable-bit-by-pcim_intx.patch @@ -0,0 +1,111 @@ +From 67f6dd8c21b0b01ba4b3c88703096e2ecc117332 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 31 Oct 2024 14:42:56 +0100 +Subject: PCI: Restore original INTX_DISABLE bit by pcim_intx() + +From: Takashi Iwai + +[ Upstream commit d555ed45a5a10a813528c7685f432369d536ae3d ] + +pcim_intx() tries to restore the INTx bit at removal via devres, but there +is a chance that it restores a wrong value. + +Because the value to be restored is blindly assumed to be the negative of +the enable argument, when a driver calls pcim_intx() unnecessarily for the +already enabled state, it'll restore to the disabled state in turn. That +is, the function assumes the case like: + + // INTx == 1 + pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> correct + +but it might be like the following, too: + + // INTx == 0 + pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> wrong + +Also, when a driver calls pcim_intx() multiple times with different enable +argument values, the last one will win no matter what value it is. This +can lead to inconsistency, e.g. + + // INTx == 1 + pcim_intx(pdev, 0); // OK + ... + pcim_intx(pdev, 1); // now old INTx wrongly assumed to be 0 + +This patch addresses those inconsistencies by saving the original INTx +state at the first pcim_intx() call. For that, get_or_create_intx_devres() +is folded into pcim_intx() caller side; it allows us to simply check the +already allocated devres and record the original INTx along with the +devres_alloc() call. + +Link: https://lore.kernel.org/r/20241031134300.10296-1-tiwai@suse.de +Fixes: 25216afc9db5 ("PCI: Add managed pcim_intx()") +Link: https://lore.kernel.org/87v7xk2ps5.wl-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Bjorn Helgaas +Reviewed-by: Philipp Stanner +Cc: stable@vger.kernel.org # v6.11+ +Signed-off-by: Sasha Levin +--- + drivers/pci/devres.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c +index 70f1a46d07c5e..643f85849ef64 100644 +--- a/drivers/pci/devres.c ++++ b/drivers/pci/devres.c +@@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data) + pci_intx(pdev, res->orig_intx); + } + +-static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) ++static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res) + { +- struct pcim_intx_devres *res; +- +- res = devres_find(dev, pcim_intx_restore, NULL, NULL); +- if (res) +- return res; ++ u16 pci_command; + +- res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); +- if (res) +- devres_add(dev, res); +- +- return res; ++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command); ++ res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE); + } + + /** +@@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) + int pcim_intx(struct pci_dev *pdev, int enable) + { + struct pcim_intx_devres *res; ++ struct device *dev = &pdev->dev; + +- res = get_or_create_intx_devres(&pdev->dev); +- if (!res) +- return -ENOMEM; ++ /* ++ * pcim_intx() must only restore the INTx value that existed before the ++ * driver was loaded, i.e., before it called pcim_intx() for the ++ * first time. ++ */ ++ res = devres_find(dev, pcim_intx_restore, NULL, NULL); ++ if (!res) { ++ res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); ++ if (!res) ++ return -ENOMEM; ++ ++ save_orig_intx(pdev, res); ++ devres_add(dev, res); ++ } + +- res->orig_intx = !enable; + pci_intx(pdev, enable); + + return 0; +-- +2.39.5 + diff --git a/queue-6.12/powerpc-64s-rewrite-__real_pte-and-__rpte_to_hidx-as.patch b/queue-6.12/powerpc-64s-rewrite-__real_pte-and-__rpte_to_hidx-as.patch new file mode 100644 index 0000000000..b56d8c4d8c --- /dev/null +++ b/queue-6.12/powerpc-64s-rewrite-__real_pte-and-__rpte_to_hidx-as.patch @@ -0,0 +1,64 @@ +From 3d2caebfc20e7affe828ab13284b9574f41b15e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 12 Jan 2025 19:24:46 +0100 +Subject: powerpc/64s: Rewrite __real_pte() and __rpte_to_hidx() as static + inline + +From: Christophe Leroy + +[ Upstream commit 61bcc752d1b81fde3cae454ff20c1d3c359df500 ] + +Rewrite __real_pte() and __rpte_to_hidx() as static inline in order to +avoid following warnings/errors when building with 4k page size: + + CC arch/powerpc/mm/book3s64/hash_tlb.o + arch/powerpc/mm/book3s64/hash_tlb.c: In function 'hpte_need_flush': + arch/powerpc/mm/book3s64/hash_tlb.c:49:16: error: variable 'offset' set but not used [-Werror=unused-but-set-variable] + 49 | int i, offset; + | ^~~~~~ + + CC arch/powerpc/mm/book3s64/hash_native.o + arch/powerpc/mm/book3s64/hash_native.c: In function 'native_flush_hash_range': + arch/powerpc/mm/book3s64/hash_native.c:782:29: error: variable 'index' set but not used [-Werror=unused-but-set-variable] + 782 | unsigned long hash, index, hidx, shift, slot; + | ^~~~~ + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202501081741.AYFwybsq-lkp@intel.com/ +Fixes: ff31e105464d ("powerpc/mm/hash64: Store the slot information at the right offset for hugetlb") +Signed-off-by: Christophe Leroy +Reviewed-by: Ritesh Harjani (IBM) +Signed-off-by: Madhavan Srinivasan +Link: https://patch.msgid.link/e0d340a5b7bd478ecbf245d826e6ab2778b74e06.1736706263.git.christophe.leroy@csgroup.eu +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/book3s/64/hash-4k.h | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h +index c3efacab4b941..aa90a048f319a 100644 +--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h ++++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h +@@ -77,9 +77,17 @@ + /* + * With 4K page size the real_pte machinery is all nops. + */ +-#define __real_pte(e, p, o) ((real_pte_t){(e)}) ++static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) ++{ ++ return (real_pte_t){pte}; ++} ++ + #define __rpte_to_pte(r) ((r).pte) +-#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) ++ ++static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) ++{ ++ return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT; ++} + + #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ +-- +2.39.5 + diff --git a/queue-6.12/powerpc-code-patching-disable-kasan-report-during-pa.patch b/queue-6.12/powerpc-code-patching-disable-kasan-report-during-pa.patch new file mode 100644 index 0000000000..8636620c8a --- /dev/null +++ b/queue-6.12/powerpc-code-patching-disable-kasan-report-during-pa.patch @@ -0,0 +1,104 @@ +From f2d35dea482df603137f7febbcd49ba1a3fac1dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Feb 2025 11:14:57 +0100 +Subject: powerpc/code-patching: Disable KASAN report during patching via + temporary mm + +From: Christophe Leroy + +[ Upstream commit dc9c5166c3cb044f8a001e397195242fd6796eee ] + +Erhard reports the following KASAN hit on Talos II (power9) with kernel 6.13: + +[ 12.028126] ================================================================== +[ 12.028198] BUG: KASAN: user-memory-access in copy_to_kernel_nofault+0x8c/0x1a0 +[ 12.028260] Write of size 8 at addr 0000187e458f2000 by task systemd/1 + +[ 12.028346] CPU: 87 UID: 0 PID: 1 Comm: systemd Tainted: G T 6.13.0-P9-dirty #3 +[ 12.028408] Tainted: [T]=RANDSTRUCT +[ 12.028446] Hardware name: T2P9D01 REV 1.01 POWER9 0x4e1202 opal:skiboot-bc106a0 PowerNV +[ 12.028500] Call Trace: +[ 12.028536] [c000000008dbf3b0] [c000000001656a48] dump_stack_lvl+0xbc/0x110 (unreliable) +[ 12.028609] [c000000008dbf3f0] [c0000000006e2fc8] print_report+0x6b0/0x708 +[ 12.028666] [c000000008dbf4e0] [c0000000006e2454] kasan_report+0x164/0x300 +[ 12.028725] [c000000008dbf600] [c0000000006e54d4] kasan_check_range+0x314/0x370 +[ 12.028784] [c000000008dbf640] [c0000000006e6310] __kasan_check_write+0x20/0x40 +[ 12.028842] [c000000008dbf660] [c000000000578e8c] copy_to_kernel_nofault+0x8c/0x1a0 +[ 12.028902] [c000000008dbf6a0] [c0000000000acfe4] __patch_instructions+0x194/0x210 +[ 12.028965] [c000000008dbf6e0] [c0000000000ade80] patch_instructions+0x150/0x590 +[ 12.029026] [c000000008dbf7c0] [c0000000001159bc] bpf_arch_text_copy+0x6c/0xe0 +[ 12.029085] [c000000008dbf800] [c000000000424250] bpf_jit_binary_pack_finalize+0x40/0xc0 +[ 12.029147] [c000000008dbf830] [c000000000115dec] bpf_int_jit_compile+0x3bc/0x930 +[ 12.029206] [c000000008dbf990] [c000000000423720] bpf_prog_select_runtime+0x1f0/0x280 +[ 12.029266] [c000000008dbfa00] [c000000000434b18] bpf_prog_load+0xbb8/0x1370 +[ 12.029324] [c000000008dbfb70] [c000000000436ebc] __sys_bpf+0x5ac/0x2e00 +[ 12.029379] [c000000008dbfd00] [c00000000043a228] sys_bpf+0x28/0x40 +[ 12.029435] [c000000008dbfd20] [c000000000038eb4] system_call_exception+0x334/0x610 +[ 12.029497] [c000000008dbfe50] [c00000000000c270] system_call_vectored_common+0xf0/0x280 +[ 12.029561] --- interrupt: 3000 at 0x3fff82f5cfa8 +[ 12.029608] NIP: 00003fff82f5cfa8 LR: 00003fff82f5cfa8 CTR: 0000000000000000 +[ 12.029660] REGS: c000000008dbfe80 TRAP: 3000 Tainted: G T (6.13.0-P9-dirty) +[ 12.029735] MSR: 900000000280f032 CR: 42004848 XER: 00000000 +[ 12.029855] IRQMASK: 0 + GPR00: 0000000000000169 00003fffdcf789a0 00003fff83067100 0000000000000005 + GPR04: 00003fffdcf78a98 0000000000000090 0000000000000000 0000000000000008 + GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + GPR12: 0000000000000000 00003fff836ff7e0 c000000000010678 0000000000000000 + GPR16: 0000000000000000 0000000000000000 00003fffdcf78f28 00003fffdcf78f90 + GPR20: 0000000000000000 0000000000000000 0000000000000000 00003fffdcf78f80 + GPR24: 00003fffdcf78f70 00003fffdcf78d10 00003fff835c7239 00003fffdcf78bd8 + GPR28: 00003fffdcf78a98 0000000000000000 0000000000000000 000000011f547580 +[ 12.030316] NIP [00003fff82f5cfa8] 0x3fff82f5cfa8 +[ 12.030361] LR [00003fff82f5cfa8] 0x3fff82f5cfa8 +[ 12.030405] --- interrupt: 3000 +[ 12.030444] ================================================================== + +Commit c28c15b6d28a ("powerpc/code-patching: Use temporary mm for +Radix MMU") is inspired from x86 but unlike x86 is doesn't disable +KASAN reports during patching. This wasn't a problem at the begining +because __patch_mem() is not instrumented. + +Commit 465cabc97b42 ("powerpc/code-patching: introduce +patch_instructions()") use copy_to_kernel_nofault() to copy several +instructions at once. But when using temporary mm the destination is +not regular kernel memory but a kind of kernel-like memory located +in user address space. Because it is not in kernel address space it is +not covered by KASAN shadow memory. Since commit e4137f08816b ("mm, +kasan, kmsan: instrument copy_from/to_kernel_nofault") KASAN reports +bad accesses from copy_to_kernel_nofault(). Here a bad access to user +memory is reported because KASAN detects the lack of shadow memory and +the address is below TASK_SIZE. + +Do like x86 in commit b3fd8e83ada0 ("x86/alternatives: Use temporary +mm for text poking") and disable KASAN reports during patching when +using temporary mm. + +Reported-by: Erhard Furtner +Close: https://lore.kernel.org/all/20250201151435.48400261@yea/ +Fixes: 465cabc97b42 ("powerpc/code-patching: introduce patch_instructions()") +Signed-off-by: Christophe Leroy +Acked-by: Michael Ellerman +Signed-off-by: Madhavan Srinivasan +Link: https://patch.msgid.link/1c05b2a1b02ad75b981cfc45927e0b4a90441046.1738577687.git.christophe.leroy@csgroup.eu +Signed-off-by: Sasha Levin +--- + arch/powerpc/lib/code-patching.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c +index acdab294b340a..2685d7efea511 100644 +--- a/arch/powerpc/lib/code-patching.c ++++ b/arch/powerpc/lib/code-patching.c +@@ -493,7 +493,9 @@ static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool rep + + orig_mm = start_using_temp_mm(patching_mm); + ++ kasan_disable_current(); + err = __patch_instructions(patch_addr, code, len, repeat_instr); ++ kasan_enable_current(); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); +-- +2.39.5 + diff --git a/queue-6.12/powerpc-code-patching-fix-kasan-hit-by-not-flagging-.patch b/queue-6.12/powerpc-code-patching-fix-kasan-hit-by-not-flagging-.patch new file mode 100644 index 0000000000..2b2ba4e154 --- /dev/null +++ b/queue-6.12/powerpc-code-patching-fix-kasan-hit-by-not-flagging-.patch @@ -0,0 +1,112 @@ +From 12ebcd30942c2bb25ff6ec9634e20a3c1a625440 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Feb 2025 07:46:28 +0100 +Subject: powerpc/code-patching: Fix KASAN hit by not flagging text patching + area as VM_ALLOC + +From: Christophe Leroy + +[ Upstream commit d262a192d38e527faa5984629aabda2e0d1c4f54 ] + +Erhard reported the following KASAN hit while booting his PowerMac G4 +with a KASAN-enabled kernel 6.13-rc6: + + BUG: KASAN: vmalloc-out-of-bounds in copy_to_kernel_nofault+0xd8/0x1c8 + Write of size 8 at addr f1000000 by task chronyd/1293 + + CPU: 0 UID: 123 PID: 1293 Comm: chronyd Tainted: G W 6.13.0-rc6-PMacG4 #2 + Tainted: [W]=WARN + Hardware name: PowerMac3,6 7455 0x80010303 PowerMac + Call Trace: + [c2437590] [c1631a84] dump_stack_lvl+0x70/0x8c (unreliable) + [c24375b0] [c0504998] print_report+0xdc/0x504 + [c2437610] [c050475c] kasan_report+0xf8/0x108 + [c2437690] [c0505a3c] kasan_check_range+0x24/0x18c + [c24376a0] [c03fb5e4] copy_to_kernel_nofault+0xd8/0x1c8 + [c24376c0] [c004c014] patch_instructions+0x15c/0x16c + [c2437710] [c00731a8] bpf_arch_text_copy+0x60/0x7c + [c2437730] [c0281168] bpf_jit_binary_pack_finalize+0x50/0xac + [c2437750] [c0073cf4] bpf_int_jit_compile+0xb30/0xdec + [c2437880] [c0280394] bpf_prog_select_runtime+0x15c/0x478 + [c24378d0] [c1263428] bpf_prepare_filter+0xbf8/0xc14 + [c2437990] [c12677ec] bpf_prog_create_from_user+0x258/0x2b4 + [c24379d0] [c027111c] do_seccomp+0x3dc/0x1890 + [c2437ac0] [c001d8e0] system_call_exception+0x2dc/0x420 + [c2437f30] [c00281ac] ret_from_syscall+0x0/0x2c + --- interrupt: c00 at 0x5a1274 + NIP: 005a1274 LR: 006a3b3c CTR: 005296c8 + REGS: c2437f40 TRAP: 0c00 Tainted: G W (6.13.0-rc6-PMacG4) + MSR: 0200f932 CR: 24004422 XER: 00000000 + + GPR00: 00000166 af8f3fa0 a7ee3540 00000001 00000000 013b6500 005a5858 0200f932 + GPR08: 00000000 00001fe9 013d5fc8 005296c8 2822244c 00b2fcd8 00000000 af8f4b57 + GPR16: 00000000 00000001 00000000 00000000 00000000 00000001 00000000 00000002 + GPR24: 00afdbb0 00000000 00000000 00000000 006e0004 013ce060 006e7c1c 00000001 + NIP [005a1274] 0x5a1274 + LR [006a3b3c] 0x6a3b3c + --- interrupt: c00 + + The buggy address belongs to the virtual mapping at + [f1000000, f1002000) created by: + text_area_cpu_up+0x20/0x190 + + The buggy address belongs to the physical page: + page: refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x76e30 + flags: 0x80000000(zone=2) + raw: 80000000 00000000 00000122 00000000 00000000 00000000 ffffffff 00000001 + raw: 00000000 + page dumped because: kasan: bad access detected + + Memory state around the buggy address: + f0ffff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + f0ffff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + >f1000000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 + ^ + f1000080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 + f1000100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 + ================================================================== + +f8 corresponds to KASAN_VMALLOC_INVALID which means the area is not +initialised hence not supposed to be used yet. + +Powerpc text patching infrastructure allocates a virtual memory area +using get_vm_area() and flags it as VM_ALLOC. But that flag is meant +to be used for vmalloc() and vmalloc() allocated memory is not +supposed to be used before a call to __vmalloc_node_range() which is +never called for that area. + +That went undetected until commit e4137f08816b ("mm, kasan, kmsan: +instrument copy_from/to_kernel_nofault") + +The area allocated by text_area_cpu_up() is not vmalloc memory, it is +mapped directly on demand when needed by map_kernel_page(). There is +no VM flag corresponding to such usage, so just pass no flag. That way +the area will be unpoisonned and usable immediately. + +Reported-by: Erhard Furtner +Closes: https://lore.kernel.org/all/20250112135832.57c92322@yea/ +Fixes: 37bc3e5fd764 ("powerpc/lib/code-patching: Use alternate map for patch_instruction()") +Signed-off-by: Christophe Leroy +Signed-off-by: Madhavan Srinivasan +Link: https://patch.msgid.link/06621423da339b374f48c0886e3a5db18e896be8.1739342693.git.christophe.leroy@csgroup.eu +Signed-off-by: Sasha Levin +--- + arch/powerpc/lib/code-patching.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c +index 2685d7efea511..c1d9b031f0d57 100644 +--- a/arch/powerpc/lib/code-patching.c ++++ b/arch/powerpc/lib/code-patching.c +@@ -108,7 +108,7 @@ static int text_area_cpu_up(unsigned int cpu) + unsigned long addr; + int err; + +- area = get_vm_area(PAGE_SIZE, VM_ALLOC); ++ area = get_vm_area(PAGE_SIZE, 0); + if (!area) { + WARN_ONCE(1, "Failed to create text area for cpu %d\n", + cpu); +-- +2.39.5 + diff --git a/queue-6.12/s390-ism-add-release-function-for-struct-device.patch b/queue-6.12/s390-ism-add-release-function-for-struct-device.patch new file mode 100644 index 0000000000..b8186d3099 --- /dev/null +++ b/queue-6.12/s390-ism-add-release-function-for-struct-device.patch @@ -0,0 +1,81 @@ +From 8c4ad73abf8516648028b31c22dd28de97ec3ca0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 13:01:37 +0100 +Subject: s390/ism: add release function for struct device + +From: Julian Ruess + +[ Upstream commit 915e34d5ad35a6a9e56113f852ade4a730fb88f0 ] + +According to device_release() in /drivers/base/core.c, +a device without a release function is a broken device +and must be fixed. + +The current code directly frees the device after calling device_add() +without waiting for other kernel parts to release their references. +Thus, a reference could still be held to a struct device, +e.g., by sysfs, leading to potential use-after-free +issues if a proper release function is not set. + +Fixes: 8c81ba20349d ("net/smc: De-tangle ism and smc device initialization") +Reviewed-by: Alexandra Winter +Reviewed-by: Wenjia Zhang +Signed-off-by: Julian Ruess +Signed-off-by: Alexandra Winter +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250214120137.563409-1-wintera@linux.ibm.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ism_drv.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index e36e3ea165d3b..2f34761e64135 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -588,6 +588,15 @@ static int ism_dev_init(struct ism_dev *ism) + return ret; + } + ++static void ism_dev_release(struct device *dev) ++{ ++ struct ism_dev *ism; ++ ++ ism = container_of(dev, struct ism_dev, dev); ++ ++ kfree(ism); ++} ++ + static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) + { + struct ism_dev *ism; +@@ -601,6 +610,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) + dev_set_drvdata(&pdev->dev, ism); + ism->pdev = pdev; + ism->dev.parent = &pdev->dev; ++ ism->dev.release = ism_dev_release; + device_initialize(&ism->dev); + dev_set_name(&ism->dev, dev_name(&pdev->dev)); + ret = device_add(&ism->dev); +@@ -637,7 +647,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) + device_del(&ism->dev); + err_dev: + dev_set_drvdata(&pdev->dev, NULL); +- kfree(ism); ++ put_device(&ism->dev); + + return ret; + } +@@ -682,7 +692,7 @@ static void ism_remove(struct pci_dev *pdev) + pci_disable_device(pdev); + device_del(&ism->dev); + dev_set_drvdata(&pdev->dev, NULL); +- kfree(ism); ++ put_device(&ism->dev); + } + + static struct pci_driver ism_driver = { +-- +2.39.5 + diff --git a/queue-6.12/sched_ext-factor-out-move_task_between_dsqs-from-scx.patch b/queue-6.12/sched_ext-factor-out-move_task_between_dsqs-from-scx.patch new file mode 100644 index 0000000000..27c1cb5f9d --- /dev/null +++ b/queue-6.12/sched_ext-factor-out-move_task_between_dsqs-from-scx.patch @@ -0,0 +1,168 @@ +From cd59bfedb7998c5e3dfe075536bd2e28df4a0732 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Sep 2024 14:08:52 -1000 +Subject: sched_ext: Factor out move_task_between_dsqs() from + scx_dispatch_from_dsq() + +From: Tejun Heo + +[ Upstream commit 8427acb6b5861d205abca7afa656a897bbae34b7 ] + +Pure reorganization. No functional changes. + +Signed-off-by: Tejun Heo +Stable-dep-of: 32966821574c ("sched_ext: Fix migration disabled handling in targeted dispatches") +Signed-off-by: Sasha Levin +--- + kernel/sched/ext.c | 116 +++++++++++++++++++++++++++++---------------- + 1 file changed, 75 insertions(+), 41 deletions(-) + +diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c +index 689f7e8f69f54..97076748dee0e 100644 +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -2397,6 +2397,73 @@ static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *r + static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; } + #endif /* CONFIG_SMP */ + ++/** ++ * move_task_between_dsqs() - Move a task from one DSQ to another ++ * @p: target task ++ * @enq_flags: %SCX_ENQ_* ++ * @src_dsq: DSQ @p is currently on, must not be a local DSQ ++ * @dst_dsq: DSQ @p is being moved to, can be any DSQ ++ * ++ * Must be called with @p's task_rq and @src_dsq locked. If @dst_dsq is a local ++ * DSQ and @p is on a different CPU, @p will be migrated and thus its task_rq ++ * will change. As @p's task_rq is locked, this function doesn't need to use the ++ * holding_cpu mechanism. ++ * ++ * On return, @src_dsq is unlocked and only @p's new task_rq, which is the ++ * return value, is locked. ++ */ ++static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, ++ struct scx_dispatch_q *src_dsq, ++ struct scx_dispatch_q *dst_dsq) ++{ ++ struct rq *src_rq = task_rq(p), *dst_rq; ++ ++ BUG_ON(src_dsq->id == SCX_DSQ_LOCAL); ++ lockdep_assert_held(&src_dsq->lock); ++ lockdep_assert_rq_held(src_rq); ++ ++ if (dst_dsq->id == SCX_DSQ_LOCAL) { ++ dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); ++ if (!task_can_run_on_remote_rq(p, dst_rq, true)) { ++ dst_dsq = find_global_dsq(p); ++ dst_rq = src_rq; ++ } ++ } else { ++ /* no need to migrate if destination is a non-local DSQ */ ++ dst_rq = src_rq; ++ } ++ ++ /* ++ * Move @p into $dst_dsq. If $dst_dsq is the local DSQ of a different ++ * CPU, @p will be migrated. ++ */ ++ if (dst_dsq->id == SCX_DSQ_LOCAL) { ++ /* @p is going from a non-local DSQ to a local DSQ */ ++ if (src_rq == dst_rq) { ++ task_unlink_from_dsq(p, src_dsq); ++ move_local_task_to_local_dsq(p, enq_flags, ++ src_dsq, dst_rq); ++ raw_spin_unlock(&src_dsq->lock); ++ } else { ++ raw_spin_unlock(&src_dsq->lock); ++ move_remote_task_to_local_dsq(p, enq_flags, ++ src_rq, dst_rq); ++ } ++ } else { ++ /* ++ * @p is going from a non-local DSQ to a non-local DSQ. As ++ * $src_dsq is already locked, do an abbreviated dequeue. ++ */ ++ task_unlink_from_dsq(p, src_dsq); ++ p->scx.dsq = NULL; ++ raw_spin_unlock(&src_dsq->lock); ++ ++ dispatch_enqueue(dst_dsq, p, enq_flags); ++ } ++ ++ return dst_rq; ++} ++ + static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) + { + struct task_struct *p; +@@ -6134,7 +6201,7 @@ static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit, + u64 enq_flags) + { + struct scx_dispatch_q *src_dsq = kit->dsq, *dst_dsq; +- struct rq *this_rq, *src_rq, *dst_rq, *locked_rq; ++ struct rq *this_rq, *src_rq, *locked_rq; + bool dispatched = false; + bool in_balance; + unsigned long flags; +@@ -6180,51 +6247,18 @@ static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit, + /* @p is still on $src_dsq and stable, determine the destination */ + dst_dsq = find_dsq_for_dispatch(this_rq, dsq_id, p); + +- if (dst_dsq->id == SCX_DSQ_LOCAL) { +- dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); +- if (!task_can_run_on_remote_rq(p, dst_rq, true)) { +- dst_dsq = find_global_dsq(p); +- dst_rq = src_rq; +- } +- } else { +- /* no need to migrate if destination is a non-local DSQ */ +- dst_rq = src_rq; +- } +- + /* +- * Move @p into $dst_dsq. If $dst_dsq is the local DSQ of a different +- * CPU, @p will be migrated. ++ * Apply vtime and slice updates before moving so that the new time is ++ * visible before inserting into $dst_dsq. @p is still on $src_dsq but ++ * this is safe as we're locking it. + */ +- if (dst_dsq->id == SCX_DSQ_LOCAL) { +- /* @p is going from a non-local DSQ to a local DSQ */ +- if (src_rq == dst_rq) { +- task_unlink_from_dsq(p, src_dsq); +- move_local_task_to_local_dsq(p, enq_flags, +- src_dsq, dst_rq); +- raw_spin_unlock(&src_dsq->lock); +- } else { +- raw_spin_unlock(&src_dsq->lock); +- move_remote_task_to_local_dsq(p, enq_flags, +- src_rq, dst_rq); +- locked_rq = dst_rq; +- } +- } else { +- /* +- * @p is going from a non-local DSQ to a non-local DSQ. As +- * $src_dsq is already locked, do an abbreviated dequeue. +- */ +- task_unlink_from_dsq(p, src_dsq); +- p->scx.dsq = NULL; +- raw_spin_unlock(&src_dsq->lock); +- +- if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_VTIME) +- p->scx.dsq_vtime = kit->vtime; +- dispatch_enqueue(dst_dsq, p, enq_flags); +- } +- ++ if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_VTIME) ++ p->scx.dsq_vtime = kit->vtime; + if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_SLICE) + p->scx.slice = kit->slice; + ++ /* execute move */ ++ locked_rq = move_task_between_dsqs(p, enq_flags, src_dsq, dst_dsq); + dispatched = true; + out: + if (in_balance) { +-- +2.39.5 + diff --git a/queue-6.12/sched_ext-fix-migration-disabled-handling-in-targete.patch b/queue-6.12/sched_ext-fix-migration-disabled-handling-in-targete.patch new file mode 100644 index 0000000000..f17a43e674 --- /dev/null +++ b/queue-6.12/sched_ext-fix-migration-disabled-handling-in-targete.patch @@ -0,0 +1,101 @@ +From 4ed1445add52861d73826ed98afbc340b141ef3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Feb 2025 10:59:06 -1000 +Subject: sched_ext: Fix migration disabled handling in targeted dispatches + +From: Tejun Heo + +[ Upstream commit 32966821574cd2917bd60f2554f435fe527f4702 ] + +A dispatch operation that can target a specific local DSQ - +scx_bpf_dsq_move_to_local() or scx_bpf_dsq_move() - checks whether the task +can be migrated to the target CPU using task_can_run_on_remote_rq(). If the +task can't be migrated to the targeted CPU, it is bounced through a global +DSQ. + +task_can_run_on_remote_rq() assumes that the task is on a CPU that's +different from the targeted CPU but the callers doesn't uphold the +assumption and may call the function when the task is already on the target +CPU. When such task has migration disabled, task_can_run_on_remote_rq() ends +up returning %false incorrectly unnecessarily bouncing the task to a global +DSQ. + +Fix it by updating the callers to only call task_can_run_on_remote_rq() when +the task is on a different CPU than the target CPU. As this is a bit subtle, +for clarity and documentation: + +- Make task_can_run_on_remote_rq() trigger SCHED_WARN_ON() if the task is on + the same CPU as the target CPU. + +- is_migration_disabled() test in task_can_run_on_remote_rq() cannot trigger + if the task is on a different CPU than the target CPU as the preceding + task_allowed_on_cpu() test should fail beforehand. Convert the test into + SCHED_WARN_ON(). + +Signed-off-by: Tejun Heo +Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") +Fixes: 0366017e0973 ("sched_ext: Use task_can_run_on_remote_rq() test in dispatch_to_local_dsq()") +Cc: stable@vger.kernel.org # v6.12+ +Signed-off-by: Sasha Levin +--- + kernel/sched/ext.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c +index 97076748dee0e..0fc18fc5753b0 100644 +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -2300,12 +2300,16 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, + * + * - The BPF scheduler is bypassed while the rq is offline and we can always say + * no to the BPF scheduler initiated migrations while offline. ++ * ++ * The caller must ensure that @p and @rq are on different CPUs. + */ + static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, + bool trigger_error) + { + int cpu = cpu_of(rq); + ++ SCHED_WARN_ON(task_cpu(p) == cpu); ++ + /* + * We don't require the BPF scheduler to avoid dispatching to offline + * CPUs mostly for convenience but also because CPUs can go offline +@@ -2319,8 +2323,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, + return false; + } + +- if (unlikely(is_migration_disabled(p))) +- return false; ++ /* ++ * If @p has migration disabled, @p->cpus_ptr only contains its current ++ * CPU and the above task_allowed_on_cpu() test should have failed. ++ */ ++ SCHED_WARN_ON(is_migration_disabled(p)); + + if (!scx_rq_online(rq)) + return false; +@@ -2424,7 +2431,8 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, + + if (dst_dsq->id == SCX_DSQ_LOCAL) { + dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); +- if (!task_can_run_on_remote_rq(p, dst_rq, true)) { ++ if (src_rq != dst_rq && ++ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { + dst_dsq = find_global_dsq(p); + dst_rq = src_rq; + } +@@ -2541,7 +2549,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, + } + + #ifdef CONFIG_SMP +- if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { ++ if (src_rq != dst_rq && ++ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { + dispatch_enqueue(find_global_dsq(p), p, + enq_flags | SCX_ENQ_CLEAR_OPSS); + return; +-- +2.39.5 + diff --git a/queue-6.12/series b/queue-6.12/series index 23ce9dbdbe..0fd67fec7f 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -1 +1,73 @@ arm64-mte-do-not-allow-prot_mte-on-map_hugetlb-user-mappings.patch +drm-xe-oa-separate-batch-submission-from-waiting-for.patch +drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch +drm-xe-oa-add-input-fence-dependencies.patch +xe-oa-fix-query-mode-of-operation-for-oar-oac.patch +btrfs-do-not-assume-the-full-page-range-is-not-dirty.patch +btrfs-move-the-delalloc-range-bitmap-search-into-ext.patch +btrfs-mark-all-dirty-sectors-as-locked-inside-writep.patch +btrfs-remove-unused-btrfs_folio_start_writer_lock.patch +btrfs-unify-to-use-writer-locks-for-subpage-locking.patch +btrfs-rename-btrfs_folio_-set-start-end-_writer_lock.patch +btrfs-use-btrfs_inode-in-extent_writepage.patch +btrfs-fix-double-accounting-race-when-btrfs_run_dela.patch +btrfs-fix-double-accounting-race-when-extent_writepa.patch +kvm-x86-get-vcpu-arch.apic_base-directly-and-drop-kv.patch +kvm-x86-inline-kvm_get_apic_mode-in-lapic.h.patch +kvm-nvmx-defer-svi-update-to-vmcs01-on-eoi-when-l2-i.patch +drm-amd-display-refactoring-if-and-endif-statements-.patch +drm-amd-display-update-dcn351-used-clock-offset.patch +drm-amd-display-correct-register-address-in-dcn35.patch +bluetooth-qca-update-firmware-name-to-support-board-.patch +bluetooth-qca-fix-poor-rf-performance-for-wcn6855.patch +input-serio-define-serio_pause_rx-guard-to-pause-and.patch +input-synaptics-fix-crash-when-enabling-pass-through.patch +asoc-renesas-rz-ssi-terminate-all-the-dma-transactio.patch +asoc-renesas-rz-ssi-add-a-check-for-negative-sample_.patch +pci-make-pcim_request_all_regions-a-public-function.patch +pci-export-pci_intx_unmanaged-and-pcim_intx.patch +pci-remove-devres-from-pci_intx.patch +pci-restore-original-intx_disable-bit-by-pcim_intx.patch +arm64-dts-mediatek-mt8183-pumpkin-add-hdmi-support.patch +arm64-dts-mediatek-mt8183-disable-dsi-display-output.patch +accel-ivpu-limit-fw-version-string-length.patch +accel-ivpu-add-coredump-support.patch +accel-ivpu-add-fw-state-dump-on-tdr.patch +accel-ivpu-fix-error-handling-in-recovery-reset.patch +drm-amdkfd-move-gfx12-trap-handler-to-separate-file.patch +drm-amdkfd-ensure-consistent-barrier-state-saved-in-.patch +tracing-switch-trace.c-code-over-to-use-guard.patch +tracing-have-the-error-of-__tracing_resize_ring_buff.patch +usb-gadget-f_midi-f_midi_complete-to-call-queue_work.patch +sched_ext-factor-out-move_task_between_dsqs-from-scx.patch +sched_ext-fix-migration-disabled-handling-in-targete.patch +asoc-rockchip-i2s-tdm-fix-shift-config-for-snd_soc_d.patch +asoc-sof-ipc4-topology-harden-loops-for-looking-up-a.patch +powerpc-code-patching-disable-kasan-report-during-pa.patch +powerpc-64s-rewrite-__real_pte-and-__rpte_to_hidx-as.patch +alsa-hda-realtek-fixup-alc225-depop-procedure.patch +powerpc-code-patching-fix-kasan-hit-by-not-flagging-.patch +asoc-imx-audmix-remove-cpu_mclk-which-is-from-cpu-da.patch +vsock-virtio-fix-variables-initialization-during-res.patch +geneve-fix-use-after-free-in-geneve_find_dev.patch +alsa-hda-cirrus-correct-the-full-scale-volume-set-lo.patch +net-sched-cls_api-fix-error-handling-causing-null-de.patch +alsa-seq-drop-ump-events-when-no-ump-conversion-is-s.patch +s390-ism-add-release-function-for-struct-device.patch +ibmvnic-add-stat-for-tx-direct-vs-tx-batched.patch +ibmvnic-don-t-reference-skb-after-sending-to-vios.patch +sockmap-vsock-for-connectible-sockets-allow-only-con.patch +vsock-bpf-warn-on-socket-without-transport.patch +tcp-adjust-rcvq_space-after-updating-scaling-ratio.patch +net-pse-pd-avoid-setting-max_ua-in-regulator-constra.patch +net-pse-pd-use-power-limit-at-driver-side-instead-of.patch +net-pse-pd-pd692x0-fix-power-limit-retrieval.patch +gtp-suppress-list-corruption-splat-in-gtp_net_exit_b.patch +geneve-suppress-list-corruption-splat-in-geneve_dest.patch +flow_dissector-fix-handling-of-mixed-port-and-port-r.patch +flow_dissector-fix-port-range-key-handling-in-bpf-co.patch +net-add-non-rcu-dev_getbyhwaddr-helper.patch +arp-switch-to-dev_getbyhwaddr-in-arp_req_set_public.patch +net-axienet-set-mac_managed_pm.patch +tcp-drop-secpath-at-the-same-time-as-we-currently-dr.patch +net-allow-small-head-cache-usage-with-large-max_skb_.patch diff --git a/queue-6.12/sockmap-vsock-for-connectible-sockets-allow-only-con.patch b/queue-6.12/sockmap-vsock-for-connectible-sockets-allow-only-con.patch new file mode 100644 index 0000000000..a218af2aec --- /dev/null +++ b/queue-6.12/sockmap-vsock-for-connectible-sockets-allow-only-con.patch @@ -0,0 +1,65 @@ +From d453859cb951dd6f29208d7d5cb91ef415cb67ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 12:58:49 +0100 +Subject: sockmap, vsock: For connectible sockets allow only connected + +From: Michal Luczaj + +[ Upstream commit 8fb5bb169d17cdd12c2dcc2e96830ed487d77a0f ] + +sockmap expects all vsocks to have a transport assigned, which is expressed +in vsock_proto::psock_update_sk_prot(). However, there is an edge case +where an unconnected (connectible) socket may lose its previously assigned +transport. This is handled with a NULL check in the vsock/BPF recv path. + +Another design detail is that listening vsocks are not supposed to have any +transport assigned at all. Which implies they are not supported by the +sockmap. But this is complicated by the fact that a socket, before +switching to TCP_LISTEN, may have had some transport assigned during a +failed connect() attempt. Hence, we may end up with a listening vsock in a +sockmap, which blows up quickly: + +KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127] +CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+ +Workqueue: vsock-loopback vsock_loopback_work +RIP: 0010:vsock_read_skb+0x4b/0x90 +Call Trace: + sk_psock_verdict_data_ready+0xa4/0x2e0 + virtio_transport_recv_pkt+0x1ca8/0x2acc + vsock_loopback_work+0x27d/0x3f0 + process_one_work+0x846/0x1420 + worker_thread+0x5b3/0xf80 + kthread+0x35a/0x700 + ret_from_fork+0x2d/0x70 + ret_from_fork_asm+0x1a/0x30 + +For connectible sockets, instead of relying solely on the state of +vsk->transport, tell sockmap to only allow those representing established +connections. This aligns with the behaviour for AF_INET and AF_UNIX. + +Fixes: 634f1a7110b4 ("vsock: support sockmap") +Signed-off-by: Michal Luczaj +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/sock_map.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/core/sock_map.c b/net/core/sock_map.c +index f1b9b3958792c..2f1be9baad057 100644 +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -541,6 +541,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; ++ if (sk_is_vsock(sk) && ++ (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) ++ return (1 << sk->sk_state) & TCPF_ESTABLISHED; + return true; + } + +-- +2.39.5 + diff --git a/queue-6.12/tcp-adjust-rcvq_space-after-updating-scaling-ratio.patch b/queue-6.12/tcp-adjust-rcvq_space-after-updating-scaling-ratio.patch new file mode 100644 index 0000000000..5cbbe26e33 --- /dev/null +++ b/queue-6.12/tcp-adjust-rcvq_space-after-updating-scaling-ratio.patch @@ -0,0 +1,67 @@ +From 3c0760d24dbaac751009183e1e63a1da8284df84 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 15:29:05 -0800 +Subject: tcp: adjust rcvq_space after updating scaling ratio + +From: Jakub Kicinski + +[ Upstream commit f5da7c45188eea71394bf445655cae2df88a7788 ] + +Since commit under Fixes we set the window clamp in accordance +to newly measured rcvbuf scaling_ratio. If the scaling_ratio +decreased significantly we may put ourselves in a situation +where windows become smaller than rcvq_space, preventing +tcp_rcv_space_adjust() from increasing rcvbuf. + +The significant decrease of scaling_ratio is far more likely +since commit 697a6c8cec03 ("tcp: increase the default TCP scaling ratio"), +which increased the "default" scaling ratio from ~30% to 50%. + +Hitting the bad condition depends a lot on TCP tuning, and +drivers at play. One of Meta's workloads hits it reliably +under following conditions: + - default rcvbuf of 125k + - sender MTU 1500, receiver MTU 5000 + - driver settles on scaling_ratio of 78 for the config above. +Initial rcvq_space gets calculated as TCP_INIT_CWND * tp->advmss +(10 * 5k = 50k). Once we find out the true scaling ratio and +MSS we clamp the windows to 38k. Triggering the condition also +depends on the message sequence of this workload. I can't repro +the problem with simple iperf or TCP_RR-style tests. + +Fixes: a2cbb1603943 ("tcp: Update window clamping condition") +Reviewed-by: Eric Dumazet +Reviewed-by: Neal Cardwell +Link: https://patch.msgid.link/20250217232905.3162187-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 2d43b29da15e2..bb17add6e4a78 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) + do_div(val, skb->truesize); + tcp_sk(sk)->scaling_ratio = val ? val : 1; + +- if (old_ratio != tcp_sk(sk)->scaling_ratio) +- WRITE_ONCE(tcp_sk(sk)->window_clamp, +- tcp_win_from_space(sk, sk->sk_rcvbuf)); ++ if (old_ratio != tcp_sk(sk)->scaling_ratio) { ++ struct tcp_sock *tp = tcp_sk(sk); ++ ++ val = tcp_win_from_space(sk, sk->sk_rcvbuf); ++ tcp_set_window_clamp(sk, val); ++ ++ if (tp->window_clamp < tp->rcvq_space.space) ++ tp->rcvq_space.space = tp->window_clamp; ++ } + } + icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, + tcp_sk(sk)->advmss); +-- +2.39.5 + diff --git a/queue-6.12/tcp-drop-secpath-at-the-same-time-as-we-currently-dr.patch b/queue-6.12/tcp-drop-secpath-at-the-same-time-as-we-currently-dr.patch new file mode 100644 index 0000000000..d1ef4f55a4 --- /dev/null +++ b/queue-6.12/tcp-drop-secpath-at-the-same-time-as-we-currently-dr.patch @@ -0,0 +1,158 @@ +From 9057546afb3d9a721e21dfc7a91362acb5135982 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 11:23:35 +0100 +Subject: tcp: drop secpath at the same time as we currently drop dst + +From: Sabrina Dubroca + +[ Upstream commit 9b6412e6979f6f9e0632075f8f008937b5cd4efd ] + +Xiumei reported hitting the WARN in xfrm6_tunnel_net_exit while +running tests that boil down to: + - create a pair of netns + - run a basic TCP test over ipcomp6 + - delete the pair of netns + +The xfrm_state found on spi_byaddr was not deleted at the time we +delete the netns, because we still have a reference on it. This +lingering reference comes from a secpath (which holds a ref on the +xfrm_state), which is still attached to an skb. This skb is not +leaked, it ends up on sk_receive_queue and then gets defer-free'd by +skb_attempt_defer_free. + +The problem happens when we defer freeing an skb (push it on one CPU's +defer_list), and don't flush that list before the netns is deleted. In +that case, we still have a reference on the xfrm_state that we don't +expect at this point. + +We already drop the skb's dst in the TCP receive path when it's no +longer needed, so let's also drop the secpath. At this point, +tcp_filter has already called into the LSM hooks that may require the +secpath, so it should not be needed anymore. However, in some of those +places, the MPTCP extension has just been attached to the skb, so we +cannot simply drop all extensions. + +Fixes: 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists") +Reported-by: Xiumei Mu +Signed-off-by: Sabrina Dubroca +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/5055ba8f8f72bdcb602faa299faca73c280b7735.1739743613.git.sd@queasysnail.net +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/tcp.h | 14 ++++++++++++++ + net/ipv4/tcp_fastopen.c | 4 ++-- + net/ipv4/tcp_input.c | 8 ++++---- + net/ipv4/tcp_ipv4.c | 2 +- + 4 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index d1948d357dade..6cd0fde806519 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk); + void tcp_check_space(struct sock *sk); + void tcp_sack_compress_send_ack(struct sock *sk); + ++static inline void tcp_cleanup_skb(struct sk_buff *skb) ++{ ++ skb_dst_drop(skb); ++ secpath_reset(skb); ++} ++ ++static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) ++{ ++ DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); ++ DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); ++ __skb_queue_tail(&sk->sk_receive_queue, skb); ++} ++ + /* tcp_timer.c */ + void tcp_init_xmit_timers(struct sock *); + static inline void tcp_clear_xmit_timers(struct sock *sk) +diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c +index 0f523cbfe329e..32b28fc21b63c 100644 +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) + if (!skb) + return; + +- skb_dst_drop(skb); ++ tcp_cleanup_skb(skb); + /* segs_in has been initialized to 1 in tcp_create_openreq_child(). + * Hence, reset segs_in to 0 before calling tcp_segs_in() + * to avoid double counting. Also, tcp_segs_in() expects +@@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; +- __skb_queue_tail(&sk->sk_receive_queue, skb); ++ tcp_add_receive_queue(sk, skb); + tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index bb17add6e4a78..d93a5a89c5692 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4970,7 +4970,7 @@ static void tcp_ofo_queue(struct sock *sk) + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; + if (!eaten) +- __skb_queue_tail(&sk->sk_receive_queue, skb); ++ tcp_add_receive_queue(sk, skb); + else + kfree_skb_partial(skb, fragstolen); + +@@ -5162,7 +5162,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, + skb, fragstolen)) ? 1 : 0; + tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); + if (!eaten) { +- __skb_queue_tail(&sk->sk_receive_queue, skb); ++ tcp_add_receive_queue(sk, skb); + skb_set_owner_r(skb, sk); + } + return eaten; +@@ -5245,7 +5245,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) + __kfree_skb(skb); + return; + } +- skb_dst_drop(skb); ++ tcp_cleanup_skb(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + + reason = SKB_DROP_REASON_NOT_SPECIFIED; +@@ -6214,7 +6214,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + + /* Bulk data transfer: receiver */ +- skb_dst_drop(skb); ++ tcp_cleanup_skb(skb); + __skb_pull(skb, tcp_header_len); + eaten = tcp_queue_rcv(sk, skb, &fragstolen); + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index bcc2f1e090c7d..824048679e1b8 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2025,7 +2025,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + */ + skb_condense(skb); + +- skb_dst_drop(skb); ++ tcp_cleanup_skb(skb); + + if (unlikely(tcp_checksum_complete(skb))) { + bh_unlock_sock(sk); +-- +2.39.5 + diff --git a/queue-6.12/tracing-have-the-error-of-__tracing_resize_ring_buff.patch b/queue-6.12/tracing-have-the-error-of-__tracing_resize_ring_buff.patch new file mode 100644 index 0000000000..2d045cb5a9 --- /dev/null +++ b/queue-6.12/tracing-have-the-error-of-__tracing_resize_ring_buff.patch @@ -0,0 +1,59 @@ +From a13b273d400ae4e89ecbb4378f7de52eb4a7e20b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 13:41:32 -0500 +Subject: tracing: Have the error of __tracing_resize_ring_buffer() passed to + user + +From: Steven Rostedt + +[ Upstream commit 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7 ] + +Currently if __tracing_resize_ring_buffer() returns an error, the +tracing_resize_ringbuffer() returns -ENOMEM. But it may not be a memory +issue that caused the function to fail. If the ring buffer is memory +mapped, then the resizing of the ring buffer will be disabled. But if the +user tries to resize the buffer, it will get an -ENOMEM returned, which is +confusing because there is plenty of memory. The actual error returned was +-EBUSY, which would make much more sense to the user. + +Cc: stable@vger.kernel.org +Cc: Mathieu Desnoyers +Cc: Vincent Donnefort +Link: https://lore.kernel.org/20250213134132.7e4505d7@gandalf.local.home +Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions") +Signed-off-by: Steven Rostedt (Google) +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index f03eef90de54c..1142a7802bb60 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -5998,8 +5998,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, + ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + unsigned long size, int cpu_id) + { +- int ret; +- + guard(mutex)(&trace_types_lock); + + if (cpu_id != RING_BUFFER_ALL_CPUS) { +@@ -6008,11 +6006,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + return -EINVAL; + } + +- ret = __tracing_resize_ring_buffer(tr, size, cpu_id); +- if (ret < 0) +- ret = -ENOMEM; +- +- return ret; ++ return __tracing_resize_ring_buffer(tr, size, cpu_id); + } + + static void update_last_data(struct trace_array *tr) +-- +2.39.5 + diff --git a/queue-6.12/tracing-switch-trace.c-code-over-to-use-guard.patch b/queue-6.12/tracing-switch-trace.c-code-over-to-use-guard.patch new file mode 100644 index 0000000000..516d9d566e --- /dev/null +++ b/queue-6.12/tracing-switch-trace.c-code-over-to-use-guard.patch @@ -0,0 +1,709 @@ +From 33009aa3a0ed4ef92ebe12c7eb7c006592131cbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Dec 2024 22:14:13 -0500 +Subject: tracing: Switch trace.c code over to use guard() + +From: Steven Rostedt + +[ Upstream commit d33b10c0c73adca00f72bf4a153a07b7f5f34715 ] + +There are several functions in trace.c that have "goto out;" or +equivalent on error in order to release locks or free values that were +allocated. This can be error prone or just simply make the code more +complex. + +Switch every location that ends with unlocking a mutex or freeing on error +over to using the guard(mutex)() and __free() infrastructure to let the +compiler worry about releasing locks. This makes the code easier to read +and understand. + +There's one place that should probably return an error but instead return +0. This does not change the return as the only changes are to do the +conversion without changing the logic. Fixing that location will have to +come later. + +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Peter Zijlstra +Cc: Andrew Morton +Acked-by: Masami Hiramatsu (Google) +Link: https://lore.kernel.org/20241224221413.7b8c68c3@batman.local.home +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 60b8f711143d ("tracing: Have the error of __tracing_resize_ring_buffer() passed to user") +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 266 +++++++++++++++---------------------------- + 1 file changed, 94 insertions(+), 172 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index bfc4ac265c2c3..f03eef90de54c 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -535,19 +536,16 @@ LIST_HEAD(ftrace_trace_arrays); + int trace_array_get(struct trace_array *this_tr) + { + struct trace_array *tr; +- int ret = -ENODEV; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; +- ret = 0; +- break; ++ return 0; + } + } +- mutex_unlock(&trace_types_lock); + +- return ret; ++ return -ENODEV; + } + + static void __trace_array_put(struct trace_array *this_tr) +@@ -1456,22 +1454,20 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); + int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, + cond_update_fn_t update) + { +- struct cond_snapshot *cond_snapshot; +- int ret = 0; ++ struct cond_snapshot *cond_snapshot __free(kfree) = ++ kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); ++ int ret; + +- cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); + if (!cond_snapshot) + return -ENOMEM; + + cond_snapshot->cond_data = cond_data; + cond_snapshot->update = update; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + +- if (tr->current_trace->use_max_tr) { +- ret = -EBUSY; +- goto fail_unlock; +- } ++ if (tr->current_trace->use_max_tr) ++ return -EBUSY; + + /* + * The cond_snapshot can only change to NULL without the +@@ -1481,29 +1477,20 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, + * do safely with only holding the trace_types_lock and not + * having to take the max_lock. + */ +- if (tr->cond_snapshot) { +- ret = -EBUSY; +- goto fail_unlock; +- } ++ if (tr->cond_snapshot) ++ return -EBUSY; + + ret = tracing_arm_snapshot_locked(tr); + if (ret) +- goto fail_unlock; ++ return ret; + + local_irq_disable(); + arch_spin_lock(&tr->max_lock); +- tr->cond_snapshot = cond_snapshot; ++ tr->cond_snapshot = no_free_ptr(cond_snapshot); + arch_spin_unlock(&tr->max_lock); + local_irq_enable(); + +- mutex_unlock(&trace_types_lock); +- +- return ret; +- +- fail_unlock: +- mutex_unlock(&trace_types_lock); +- kfree(cond_snapshot); +- return ret; ++ return 0; + } + EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); + +@@ -2216,10 +2203,10 @@ static __init int init_trace_selftests(void) + + selftests_can_run = true; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + + if (list_empty(&postponed_selftests)) +- goto out; ++ return 0; + + pr_info("Running postponed tracer tests:\n"); + +@@ -2248,9 +2235,6 @@ static __init int init_trace_selftests(void) + } + tracing_selftest_running = false; + +- out: +- mutex_unlock(&trace_types_lock); +- + return 0; + } + core_initcall(init_trace_selftests); +@@ -2818,7 +2802,7 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write, + int save_tracepoint_printk; + int ret; + +- mutex_lock(&tracepoint_printk_mutex); ++ guard(mutex)(&tracepoint_printk_mutex); + save_tracepoint_printk = tracepoint_printk; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); +@@ -2831,16 +2815,13 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write, + tracepoint_printk = 0; + + if (save_tracepoint_printk == tracepoint_printk) +- goto out; ++ return ret; + + if (tracepoint_printk) + static_key_enable(&tracepoint_printk_key.key); + else + static_key_disable(&tracepoint_printk_key.key); + +- out: +- mutex_unlock(&tracepoint_printk_mutex); +- + return ret; + } + +@@ -5150,7 +5131,8 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) + u32 tracer_flags; + int i; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); ++ + tracer_flags = tr->current_trace->flags->val; + trace_opts = tr->current_trace->flags->opts; + +@@ -5167,7 +5149,6 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) + else + seq_printf(m, "no%s\n", trace_opts[i].name); + } +- mutex_unlock(&trace_types_lock); + + return 0; + } +@@ -5832,7 +5813,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, + return; + } + +- mutex_lock(&trace_eval_mutex); ++ guard(mutex)(&trace_eval_mutex); + + if (!trace_eval_maps) + trace_eval_maps = map_array; +@@ -5856,8 +5837,6 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, + map_array++; + } + memset(map_array, 0, sizeof(*map_array)); +- +- mutex_unlock(&trace_eval_mutex); + } + + static void trace_create_eval_file(struct dentry *d_tracer) +@@ -6021,23 +6000,18 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, + { + int ret; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + + if (cpu_id != RING_BUFFER_ALL_CPUS) { + /* make sure, this cpu is enabled in the mask */ +- if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { +- ret = -EINVAL; +- goto out; +- } ++ if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) ++ return -EINVAL; + } + + ret = __tracing_resize_ring_buffer(tr, size, cpu_id); + if (ret < 0) + ret = -ENOMEM; + +-out: +- mutex_unlock(&trace_types_lock); +- + return ret; + } + +@@ -6129,9 +6103,9 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + #ifdef CONFIG_TRACER_MAX_TRACE + bool had_max_tr; + #endif +- int ret = 0; ++ int ret; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + + update_last_data(tr); + +@@ -6139,7 +6113,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + ret = __tracing_resize_ring_buffer(tr, trace_buf_size, + RING_BUFFER_ALL_CPUS); + if (ret < 0) +- goto out; ++ return ret; + ret = 0; + } + +@@ -6147,12 +6121,11 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + if (strcmp(t->name, buf) == 0) + break; + } +- if (!t) { +- ret = -EINVAL; +- goto out; +- } ++ if (!t) ++ return -EINVAL; ++ + if (t == tr->current_trace) +- goto out; ++ return 0; + + #ifdef CONFIG_TRACER_SNAPSHOT + if (t->use_max_tr) { +@@ -6163,27 +6136,23 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + arch_spin_unlock(&tr->max_lock); + local_irq_enable(); + if (ret) +- goto out; ++ return ret; + } + #endif + /* Some tracers won't work on kernel command line */ + if (system_state < SYSTEM_RUNNING && t->noboot) { + pr_warn("Tracer '%s' is not allowed on command line, ignored\n", + t->name); +- goto out; ++ return 0; + } + + /* Some tracers are only allowed for the top level buffer */ +- if (!trace_ok_for_array(t, tr)) { +- ret = -EINVAL; +- goto out; +- } ++ if (!trace_ok_for_array(t, tr)) ++ return -EINVAL; + + /* If trace pipe files are being read, we can't change the tracer */ +- if (tr->trace_ref) { +- ret = -EBUSY; +- goto out; +- } ++ if (tr->trace_ref) ++ return -EBUSY; + + trace_branch_disable(); + +@@ -6214,7 +6183,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + if (!had_max_tr && t->use_max_tr) { + ret = tracing_arm_snapshot_locked(tr); + if (ret) +- goto out; ++ return ret; + } + #else + tr->current_trace = &nop_trace; +@@ -6227,17 +6196,15 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) + if (t->use_max_tr) + tracing_disarm_snapshot(tr); + #endif +- goto out; ++ return ret; + } + } + + tr->current_trace = t; + tr->current_trace->enabled++; + trace_branch_enable(tr); +- out: +- mutex_unlock(&trace_types_lock); + +- return ret; ++ return 0; + } + + static ssize_t +@@ -6315,22 +6282,18 @@ tracing_thresh_write(struct file *filp, const char __user *ubuf, + struct trace_array *tr = filp->private_data; + int ret; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); + if (ret < 0) +- goto out; ++ return ret; + + if (tr->current_trace->update_thresh) { + ret = tr->current_trace->update_thresh(tr); + if (ret < 0) +- goto out; ++ return ret; + } + +- ret = cnt; +-out: +- mutex_unlock(&trace_types_lock); +- +- return ret; ++ return cnt; + } + + #ifdef CONFIG_TRACER_MAX_TRACE +@@ -6549,31 +6512,29 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, + * This is just a matter of traces coherency, the ring buffer itself + * is protected. + */ +- mutex_lock(&iter->mutex); ++ guard(mutex)(&iter->mutex); + + /* return any leftover data */ + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) +- goto out; ++ return sret; + + trace_seq_init(&iter->seq); + + if (iter->trace->read) { + sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (sret) +- goto out; ++ return sret; + } + + waitagain: + sret = tracing_wait_pipe(filp); + if (sret <= 0) +- goto out; ++ return sret; + + /* stop when tracing is finished */ +- if (trace_empty(iter)) { +- sret = 0; +- goto out; +- } ++ if (trace_empty(iter)) ++ return 0; + + if (cnt >= TRACE_SEQ_BUFFER_SIZE) + cnt = TRACE_SEQ_BUFFER_SIZE - 1; +@@ -6637,9 +6598,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, + if (sret == -EBUSY) + goto waitagain; + +-out: +- mutex_unlock(&iter->mutex); +- + return sret; + } + +@@ -7231,25 +7189,19 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve + */ + int tracing_set_filter_buffering(struct trace_array *tr, bool set) + { +- int ret = 0; +- +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + + if (set && tr->no_filter_buffering_ref++) +- goto out; ++ return 0; + + if (!set) { +- if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) { +- ret = -EINVAL; +- goto out; +- } ++ if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) ++ return -EINVAL; + + --tr->no_filter_buffering_ref; + } +- out: +- mutex_unlock(&trace_types_lock); + +- return ret; ++ return 0; + } + + struct ftrace_buffer_info { +@@ -7325,12 +7277,10 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + if (ret) + return ret; + +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&trace_types_lock); + +- if (tr->current_trace->use_max_tr) { +- ret = -EBUSY; +- goto out; +- } ++ if (tr->current_trace->use_max_tr) ++ return -EBUSY; + + local_irq_disable(); + arch_spin_lock(&tr->max_lock); +@@ -7339,24 +7289,20 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + arch_spin_unlock(&tr->max_lock); + local_irq_enable(); + if (ret) +- goto out; ++ return ret; + + switch (val) { + case 0: +- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { +- ret = -EINVAL; +- break; +- } ++ if (iter->cpu_file != RING_BUFFER_ALL_CPUS) ++ return -EINVAL; + if (tr->allocated_snapshot) + free_snapshot(tr); + break; + case 1: + /* Only allow per-cpu swap if the ring buffer supports it */ + #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP +- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { +- ret = -EINVAL; +- break; +- } ++ if (iter->cpu_file != RING_BUFFER_ALL_CPUS) ++ return -EINVAL; + #endif + if (tr->allocated_snapshot) + ret = resize_buffer_duplicate_size(&tr->max_buffer, +@@ -7364,7 +7310,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + + ret = tracing_arm_snapshot_locked(tr); + if (ret) +- break; ++ return ret; + + /* Now, we're going to swap */ + if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { +@@ -7391,8 +7337,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + *ppos += cnt; + ret = cnt; + } +-out: +- mutex_unlock(&trace_types_lock); ++ + return ret; + } + +@@ -7778,12 +7723,11 @@ void tracing_log_err(struct trace_array *tr, + + len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; + +- mutex_lock(&tracing_err_log_lock); ++ guard(mutex)(&tracing_err_log_lock); ++ + err = get_tracing_log_err(tr, len); +- if (PTR_ERR(err) == -ENOMEM) { +- mutex_unlock(&tracing_err_log_lock); ++ if (PTR_ERR(err) == -ENOMEM) + return; +- } + + snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); + snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); +@@ -7794,7 +7738,6 @@ void tracing_log_err(struct trace_array *tr, + err->info.ts = local_clock(); + + list_add_tail(&err->list, &tr->err_log); +- mutex_unlock(&tracing_err_log_lock); + } + + static void clear_tracing_err_log(struct trace_array *tr) +@@ -9535,20 +9478,17 @@ static int instance_mkdir(const char *name) + struct trace_array *tr; + int ret; + +- mutex_lock(&event_mutex); +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&event_mutex); ++ guard(mutex)(&trace_types_lock); + + ret = -EEXIST; + if (trace_array_find(name)) +- goto out_unlock; ++ return -EEXIST; + + tr = trace_array_create(name); + + ret = PTR_ERR_OR_ZERO(tr); + +-out_unlock: +- mutex_unlock(&trace_types_lock); +- mutex_unlock(&event_mutex); + return ret; + } + +@@ -9598,24 +9538,23 @@ struct trace_array *trace_array_get_by_name(const char *name, const char *system + { + struct trace_array *tr; + +- mutex_lock(&event_mutex); +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&event_mutex); ++ guard(mutex)(&trace_types_lock); + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { +- if (tr->name && strcmp(tr->name, name) == 0) +- goto out_unlock; ++ if (tr->name && strcmp(tr->name, name) == 0) { ++ tr->ref++; ++ return tr; ++ } + } + + tr = trace_array_create_systems(name, systems, 0, 0); + + if (IS_ERR(tr)) + tr = NULL; +-out_unlock: +- if (tr) ++ else + tr->ref++; + +- mutex_unlock(&trace_types_lock); +- mutex_unlock(&event_mutex); + return tr; + } + EXPORT_SYMBOL_GPL(trace_array_get_by_name); +@@ -9666,48 +9605,36 @@ static int __remove_instance(struct trace_array *tr) + int trace_array_destroy(struct trace_array *this_tr) + { + struct trace_array *tr; +- int ret; + + if (!this_tr) + return -EINVAL; + +- mutex_lock(&event_mutex); +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&event_mutex); ++ guard(mutex)(&trace_types_lock); + +- ret = -ENODEV; + + /* Making sure trace array exists before destroying it. */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { +- if (tr == this_tr) { +- ret = __remove_instance(tr); +- break; +- } ++ if (tr == this_tr) ++ return __remove_instance(tr); + } + +- mutex_unlock(&trace_types_lock); +- mutex_unlock(&event_mutex); +- +- return ret; ++ return -ENODEV; + } + EXPORT_SYMBOL_GPL(trace_array_destroy); + + static int instance_rmdir(const char *name) + { + struct trace_array *tr; +- int ret; + +- mutex_lock(&event_mutex); +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&event_mutex); ++ guard(mutex)(&trace_types_lock); + +- ret = -ENODEV; + tr = trace_array_find(name); +- if (tr) +- ret = __remove_instance(tr); +- +- mutex_unlock(&trace_types_lock); +- mutex_unlock(&event_mutex); ++ if (!tr) ++ return -ENODEV; + +- return ret; ++ return __remove_instance(tr); + } + + static __init void create_trace_instances(struct dentry *d_tracer) +@@ -9720,19 +9647,16 @@ static __init void create_trace_instances(struct dentry *d_tracer) + if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) + return; + +- mutex_lock(&event_mutex); +- mutex_lock(&trace_types_lock); ++ guard(mutex)(&event_mutex); ++ guard(mutex)(&trace_types_lock); + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->name) + continue; + if (MEM_FAIL(trace_array_create_dir(tr) < 0, + "Failed to create instance directory\n")) +- break; ++ return; + } +- +- mutex_unlock(&trace_types_lock); +- mutex_unlock(&event_mutex); + } + + static void +@@ -9946,7 +9870,7 @@ static void trace_module_remove_evals(struct module *mod) + if (!mod->num_trace_evals) + return; + +- mutex_lock(&trace_eval_mutex); ++ guard(mutex)(&trace_eval_mutex); + + map = trace_eval_maps; + +@@ -9958,12 +9882,10 @@ static void trace_module_remove_evals(struct module *mod) + map = map->tail.next; + } + if (!map) +- goto out; ++ return; + + *last = trace_eval_jmp_to_tail(map)->tail.next; + kfree(map); +- out: +- mutex_unlock(&trace_eval_mutex); + } + #else + static inline void trace_module_remove_evals(struct module *mod) { } +-- +2.39.5 + diff --git a/queue-6.12/usb-gadget-f_midi-f_midi_complete-to-call-queue_work.patch b/queue-6.12/usb-gadget-f_midi-f_midi_complete-to-call-queue_work.patch new file mode 100644 index 0000000000..011c727fab --- /dev/null +++ b/queue-6.12/usb-gadget-f_midi-f_midi_complete-to-call-queue_work.patch @@ -0,0 +1,42 @@ +From c1a6df0244e33970a6828c664554c6a972f3639f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Feb 2025 10:48:05 -0700 +Subject: USB: gadget: f_midi: f_midi_complete to call queue_work + +From: Jill Donahue + +[ Upstream commit 4ab37fcb42832cdd3e9d5e50653285ca84d6686f ] + +When using USB MIDI, a lock is attempted to be acquired twice through a +re-entrant call to f_midi_transmit, causing a deadlock. + +Fix it by using queue_work() to schedule the inner f_midi_transmit() via +a high priority work queue from the completion handler. + +Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGVAX6g@mail.gmail.com/ +Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver") +Cc: stable +Signed-off-by: Jill Donahue +Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/gadget/function/f_midi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c +index 4153643c67dce..1f18f15dba277 100644 +--- a/drivers/usb/gadget/function/f_midi.c ++++ b/drivers/usb/gadget/function/f_midi.c +@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req) + /* Our transmit completed. See if there's more to go. + * f_midi_transmit eats req, don't queue it again. */ + req->length = 0; +- f_midi_transmit(midi); ++ queue_work(system_highpri_wq, &midi->work); + return; + } + break; +-- +2.39.5 + diff --git a/queue-6.12/vsock-bpf-warn-on-socket-without-transport.patch b/queue-6.12/vsock-bpf-warn-on-socket-without-transport.patch new file mode 100644 index 0000000000..5ca0cd371a --- /dev/null +++ b/queue-6.12/vsock-bpf-warn-on-socket-without-transport.patch @@ -0,0 +1,53 @@ +From 85632c1b0929dafe8737c6c91cd4f0da0ee43543 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 12:58:50 +0100 +Subject: vsock/bpf: Warn on socket without transport + +From: Michal Luczaj + +[ Upstream commit 857ae05549ee2542317e7084ecaa5f8536634dd9 ] + +In the spirit of commit 91751e248256 ("vsock: prevent null-ptr-deref in +vsock_*[has_data|has_space]"), armorize the "impossible" cases with a +warning. + +Fixes: 634f1a7110b4 ("vsock: support sockmap") +Signed-off-by: Michal Luczaj +Reviewed-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/af_vsock.c | 3 +++ + net/vmw_vsock/vsock_bpf.c | 2 +- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index 37299a7ca1876..eb6ea26b390ee 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1189,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) + { + struct vsock_sock *vsk = vsock_sk(sk); + ++ if (WARN_ON_ONCE(!vsk->transport)) ++ return -ENODEV; ++ + return vsk->transport->read_skb(vsk, read_actor); + } + +diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c +index f201d9eca1df2..07b96d56f3a57 100644 +--- a/net/vmw_vsock/vsock_bpf.c ++++ b/net/vmw_vsock/vsock_bpf.c +@@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + lock_sock(sk); + vsk = vsock_sk(sk); + +- if (!vsk->transport) { ++ if (WARN_ON_ONCE(!vsk->transport)) { + copied = -ENODEV; + goto out; + } +-- +2.39.5 + diff --git a/queue-6.12/vsock-virtio-fix-variables-initialization-during-res.patch b/queue-6.12/vsock-virtio-fix-variables-initialization-during-res.patch new file mode 100644 index 0000000000..e03796b92e --- /dev/null +++ b/queue-6.12/vsock-virtio-fix-variables-initialization-during-res.patch @@ -0,0 +1,79 @@ +From b1803e0e1f9844e3cd555e579be5e90a919948cc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 09:22:00 +0800 +Subject: vsock/virtio: fix variables initialization during resuming + +From: Junnan Wu + +[ Upstream commit 55eff109e76a14e5ed10c8c3c3978d20a35e2a4d ] + +When executing suspend to ram twice in a row, +the `rx_buf_nr` and `rx_buf_max_nr` increase to three times vq->num_free. +Then after virtqueue_get_buf and `rx_buf_nr` decreased +in function virtio_transport_rx_work, +the condition to fill rx buffer +(rx_buf_nr < rx_buf_max_nr / 2) will never be met. + +It is because that `rx_buf_nr` and `rx_buf_max_nr` +are initialized only in virtio_vsock_probe(), +but they should be reset whenever virtqueues are recreated, +like after a suspend/resume. + +Move the `rx_buf_nr` and `rx_buf_max_nr` initialization in +virtio_vsock_vqs_init(), so we are sure that they are properly +initialized, every time we initialize the virtqueues, either when we +load the driver or after a suspend/resume. + +To prevent erroneous atomic load operations on the `queued_replies` +in the virtio_transport_send_pkt_work() function +which may disrupt the scheduling of vsock->rx_work +when transmitting reply-required socket packets, +this atomic variable must undergo synchronized initialization +alongside the preceding two variables after a suspend/resume. + +Fixes: bd50c5dc182b ("vsock/virtio: add support for device suspend/resume") +Link: https://lore.kernel.org/virtualization/20250207052033.2222629-1-junnan01.wu@samsung.com/ +Co-developed-by: Ying Gao +Signed-off-by: Ying Gao +Signed-off-by: Junnan Wu +Reviewed-by: Luigi Leonardi +Acked-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Link: https://patch.msgid.link/20250214012200.1883896-1-junnan01.wu@samsung.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/virtio_transport.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c +index b58c3818f284f..f0e48e6911fc4 100644 +--- a/net/vmw_vsock/virtio_transport.c ++++ b/net/vmw_vsock/virtio_transport.c +@@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) + }; + int ret; + ++ mutex_lock(&vsock->rx_lock); ++ vsock->rx_buf_nr = 0; ++ vsock->rx_buf_max_nr = 0; ++ mutex_unlock(&vsock->rx_lock); ++ ++ atomic_set(&vsock->queued_replies, 0); ++ + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); + if (ret < 0) + return ret; +@@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) + + vsock->vdev = vdev; + +- vsock->rx_buf_nr = 0; +- vsock->rx_buf_max_nr = 0; +- atomic_set(&vsock->queued_replies, 0); + + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); +-- +2.39.5 + diff --git a/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch b/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch new file mode 100644 index 0000000000..7a3d7f5456 --- /dev/null +++ b/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch @@ -0,0 +1,359 @@ +From 1af0bbe1ff1ab3181cc86532d051c73f1f73d10d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Dec 2024 09:19:18 -0800 +Subject: xe/oa: Fix query mode of operation for OAR/OAC + +From: Umesh Nerlige Ramappa + +[ Upstream commit 55039832f98c7e05f1cf9e0d8c12b2490abd0f16 ] + +This is a set of squashed commits to facilitate smooth applying to +stable. Each commit message is retained for reference. + +1) Allow a GGTT mapped batch to be submitted to user exec queue + +For a OA use case, one of the HW registers needs to be modified by +submitting an MI_LOAD_REGISTER_IMM command to the users exec queue, so +that the register is modified in the user's hardware context. In order +to do this a batch that is mapped in GGTT, needs to be submitted to the +user exec queue. Since all user submissions use q->vm and hence PPGTT, +add some plumbing to enable submission of batches mapped in GGTT. + +v2: ggtt is zero-initialized, so no need to set it false (Matt Brost) + +2) xe/oa: Use MI_LOAD_REGISTER_IMMEDIATE to enable OAR/OAC + +To enable OAR/OAC, a bit in RING_CONTEXT_CONTROL needs to be set. +Setting this bit cause the context image size to change and if not done +correct, can cause undesired hangs. + +Current code uses a separate exec_queue to modify this bit and is +error-prone. As per HW recommendation, submit MI_LOAD_REGISTER_IMM to +the target hardware context to modify the relevant bit. + +In v2 version, an attempt to submit everything to the user-queue was +made, but it failed the unprivileged-single-ctx-counters test. It +appears that the OACTXCONTROL must be modified from a remote context. + +In v3 version, all context specific register configurations were moved +to use LOAD_REGISTER_IMMEDIATE and that seems to work well. This is a +cleaner way, since we can now submit all configuration to user +exec_queue and the fence handling is simplified. + +v2: +(Matt) +- set job->ggtt to true if create job is successful +- unlock vm on job error + +(Ashutosh) +- don't wait on job submission +- use kernel exec queue where possible + +v3: +(Ashutosh) +- Fix checkpatch issues +- Remove extra spaces/new-lines +- Add Fixes: and Cc: tags +- Reset context control bit when OA stream is closed +- Submit all config via MI_LOAD_REGISTER_IMMEDIATE + +(Umesh) +- Update commit message for v3 experiment +- Squash patches for easier port to stable + +v4: +(Ashutosh) +- No need to pass q to xe_oa_submit_bb +- Do not support exec queues with width > 1 +- Fix disabling of CTX_CTRL_OAC_CONTEXT_ENABLE + +v5: +(Ashutosh) +- Drop reg_lri related comments +- Use XE_OA_SUBMIT_NO_DEPS in xe_oa_load_with_lri + +Fixes: 8135f1c09dd2 ("drm/xe/oa: Don't reset OAC_CONTEXT_ENABLE on OA stream close") +Signed-off-by: Umesh Nerlige Ramappa +Reviewed-by: Matthew Brost # commit 1 +Reviewed-by: Ashutosh Dixit +Cc: stable@vger.kernel.org +Reviewed-by: Jonathan Cavitt +Signed-off-by: Ashutosh Dixit +Link: https://patchwork.freedesktop.org/patch/msgid/20241220171919.571528-2-umesh.nerlige.ramappa@intel.com +Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_oa.c | 134 ++++++++---------------- + drivers/gpu/drm/xe/xe_ring_ops.c | 5 +- + drivers/gpu/drm/xe/xe_sched_job_types.h | 2 + + 3 files changed, 51 insertions(+), 90 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c +index 1bfc4b58b5c17..e6744422dee49 100644 +--- a/drivers/gpu/drm/xe/xe_oa.c ++++ b/drivers/gpu/drm/xe/xe_oa.c +@@ -69,12 +69,6 @@ struct xe_oa_config { + struct rcu_head rcu; + }; + +-struct flex { +- struct xe_reg reg; +- u32 offset; +- u32 value; +-}; +- + struct xe_oa_open_param { + struct xe_file *xef; + u32 oa_unit_id; +@@ -577,19 +571,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) + return ret; + } + ++static void xe_oa_lock_vma(struct xe_exec_queue *q) ++{ ++ if (q->vm) { ++ down_read(&q->vm->lock); ++ xe_vm_lock(q->vm, false); ++ } ++} ++ ++static void xe_oa_unlock_vma(struct xe_exec_queue *q) ++{ ++ if (q->vm) { ++ xe_vm_unlock(q->vm); ++ up_read(&q->vm->lock); ++ } ++} ++ + static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, + struct xe_bb *bb) + { ++ struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; + struct xe_sched_job *job; + struct dma_fence *fence; + int err = 0; + +- /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ +- job = xe_bb_create_job(stream->k_exec_q, bb); ++ xe_oa_lock_vma(q); ++ ++ job = xe_bb_create_job(q, bb); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto exit; + } ++ job->ggtt = true; + + if (deps == XE_OA_SUBMIT_ADD_DEPS) { + for (int i = 0; i < stream->num_syncs && !err; i++) +@@ -604,10 +617,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + ++ xe_oa_unlock_vma(q); ++ + return fence; + err_put_job: + xe_sched_job_put(job); + exit: ++ xe_oa_unlock_vma(q); + return ERR_PTR(err); + } + +@@ -655,63 +671,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) + free_oa_config_bo(oa_bo); + } + +-static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, +- struct xe_bb *bb, const struct flex *flex, u32 count) +-{ +- u32 offset = xe_bo_ggtt_addr(lrc->bo); +- +- do { +- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); +- bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); +- bb->cs[bb->len++] = 0; +- bb->cs[bb->len++] = flex->value; +- +- } while (flex++, --count); +-} +- +-static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, +- const struct flex *flex, u32 count) +-{ +- struct dma_fence *fence; +- struct xe_bb *bb; +- int err; +- +- bb = xe_bb_new(stream->gt, 4 * count, false); +- if (IS_ERR(bb)) { +- err = PTR_ERR(bb); +- goto exit; +- } +- +- xe_oa_store_flex(stream, lrc, bb, flex, count); +- +- fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); +- if (IS_ERR(fence)) { +- err = PTR_ERR(fence); +- goto free_bb; +- } +- xe_bb_free(bb, fence); +- dma_fence_put(fence); +- +- return 0; +-free_bb: +- xe_bb_free(bb, NULL); +-exit: +- return err; +-} +- +-static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) ++static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) + { + struct dma_fence *fence; + struct xe_bb *bb; + int err; + +- bb = xe_bb_new(stream->gt, 3, false); ++ bb = xe_bb_new(stream->gt, 2 * count + 1, false); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto exit; + } + +- write_cs_mi_lri(bb, reg_lri, 1); ++ write_cs_mi_lri(bb, reg_lri, count); + + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { +@@ -731,70 +703,54 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re + static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) + { + const struct xe_oa_format *format = stream->oa_buffer.format; +- struct xe_lrc *lrc = stream->exec_q->lrc[0]; +- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); + +- struct flex regs_context[] = { ++ struct xe_oa_reg reg_lri[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), +- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, ++ { ++ OAR_OACONTROL, ++ oacontrol, ++ }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), +- regs_offset + CTX_CONTEXT_CONTROL, +- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), ++ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, ++ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + }, + }; +- struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; +- int err; +- +- /* Modify stream hwe context image with regs_context */ +- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], +- regs_context, ARRAY_SIZE(regs_context)); +- if (err) +- return err; + +- /* Apply reg_lri using LRI */ +- return xe_oa_load_with_lri(stream, ®_lri); ++ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); + } + + static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) + { + const struct xe_oa_format *format = stream->oa_buffer.format; +- struct xe_lrc *lrc = stream->exec_q->lrc[0]; +- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); +- struct flex regs_context[] = { ++ struct xe_oa_reg reg_lri[] = { + { + OACTXCONTROL(stream->hwe->mmio_base), +- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, + enable ? OA_COUNTER_RESUME : 0, + }, ++ { ++ OAC_OACONTROL, ++ oacontrol ++ }, + { + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), +- regs_offset + CTX_CONTEXT_CONTROL, +- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | ++ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, ++ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), + }, + }; +- struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; +- int err; + + /* Set ccs select to enable programming of OAC_OACONTROL */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); + +- /* Modify stream hwe context image with regs_context */ +- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], +- regs_context, ARRAY_SIZE(regs_context)); +- if (err) +- return err; +- +- /* Apply reg_lri using LRI */ +- return xe_oa_load_with_lri(stream, ®_lri); ++ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); + } + + static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) +@@ -1933,8 +1889,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + +- if (param.exec_q->width > 1) +- drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); ++ if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) ++ return -EOPNOTSUPP; + } + + /* +diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c +index 0be4f489d3e12..9f327f27c0726 100644 +--- a/drivers/gpu/drm/xe/xe_ring_ops.c ++++ b/drivers/gpu/drm/xe/xe_ring_ops.c +@@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, + + static u32 get_ppgtt_flag(struct xe_sched_job *job) + { +- return job->q->vm ? BIT(8) : 0; ++ if (job->q->vm && !job->ggtt) ++ return BIT(8); ++ ++ return 0; + } + + static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) +diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h +index 0d3f76fb05cea..c207361bf43e1 100644 +--- a/drivers/gpu/drm/xe/xe_sched_job_types.h ++++ b/drivers/gpu/drm/xe/xe_sched_job_types.h +@@ -57,6 +57,8 @@ struct xe_sched_job { + u32 migrate_flush_flags; + /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ + bool ring_ops_flush_tlb; ++ /** @ggtt: mapped in ggtt. */ ++ bool ggtt; + /** @ptrs: per instance pointers. */ + struct xe_job_ptrs ptrs[]; + }; +-- +2.39.5 +