From 98f74ffd9ced10d2f21097d3160fce59971403ee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Dec 2024 13:40:35 +0100 Subject: [PATCH] 6.1-stable patches added patches: arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch bpf-fix-helper-writes-to-read-only-maps.patch drm-amdgpu-rework-resume-handling-for-display-v2.patch drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch drm-ttm-print-the-memory-decryption-status-just-once.patch gve-fixes-for-napi_poll-when-budget-is-0.patch mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch veth-use-tstats-per-cpu-traffic-counters.patch --- ...pport-for-smcccv1.3-sve-discard-hint.patch | 224 +++++++++++++ ...le-cpu-state-when-handling-sve-traps.patch | 91 ++++++ ...tatus-of-avs_pcm_hw_constraints_init.patch | 48 +++ ...-stats-for-bpf_redirect_peer-traffic.patch | 69 ++++ ...-fix-helper-writes-to-read-only-maps.patch | 235 ++++++++++++++ ...ework-resume-handling-for-display-v2.patch | 116 +++++++ ...d-tt-pages-are-decrypted-when-needed.patch | 153 +++++++++ ...e-memory-decryption-status-just-once.patch | 44 +++ ...fixes-for-napi_poll-when-budget-is-0.patch | 78 +++++ ...ap_file-lsm-hook-in-remap_file_pages.patch | 79 +++++ ...ocation-to-core-and-convert-veth-vrf.patch | 295 ++++++++++++++++++ ...check-in-amdgpu_debugfs_gprwave_read.patch | 34 ++ queue-6.1/series | 13 + ...-use-tstats-per-cpu-traffic-counters.patch | 114 +++++++ 14 files changed, 1593 insertions(+) create mode 100644 queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch create mode 100644 queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch create mode 100644 queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch create mode 100644 queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch create mode 100644 queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch create mode 100644 queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch create mode 100644 queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch create mode 100644 queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch create mode 100644 queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch create mode 100644 queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch create mode 100644 queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch create mode 100644 queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch create mode 100644 queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch diff --git a/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch b/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch new file mode 100644 index 00000000000..60a002e1b0a --- /dev/null +++ b/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch @@ -0,0 +1,224 @@ +From 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Wed, 6 Nov 2024 16:04:48 +0000 +Subject: arm64: smccc: Remove broken support for SMCCCv1.3 SVE discard hint + +From: Mark Rutland + +commit 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 upstream. + +SMCCCv1.3 added a hint bit which callers can set in an SMCCC function ID +(AKA "FID") to indicate that it is acceptable for the SMCCC +implementation to discard SVE and/or SME state over a specific SMCCC +call. The kernel support for using this hint is broken and SMCCC calls +may clobber the SVE and/or SME state of arbitrary tasks, though FPSIMD +state is unaffected. + +The kernel support is intended to use the hint when there is no SVE or +SME state to save, and to do this it checks whether TIF_FOREIGN_FPSTATE +is set or TIF_SVE is clear in assembly code: + +| ldr , [, #TSK_TI_FLAGS] +| tbnz , #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? +| tbnz , #TIF_SVE, 2f // Does that state include SVE? +| +| 1: orr , , ARM_SMCCC_1_3_SVE_HINT +| 2: +| << SMCCC call using FID >> + +This is not safe as-is: + +(1) SMCCC calls can be made in a preemptible context and preemption can + result in TIF_FOREIGN_FPSTATE being set or cleared at arbitrary + points in time. Thus checking for TIF_FOREIGN_FPSTATE provides no + guarantee. + +(2) TIF_FOREIGN_FPSTATE only indicates that the live FP/SVE/SME state in + the CPU does not belong to the current task, and does not indicate + that clobbering this state is acceptable. + + When the live CPU state is clobbered it is necessary to update + fpsimd_last_state.st to ensure that a subsequent context switch will + reload FP/SVE/SME state from memory rather than consuming the + clobbered state. This and the SMCCC call itself must happen in a + critical section with preemption disabled to avoid races. + +(3) Live SVE/SME state can exist with TIF_SVE clear (e.g. with only + TIF_SME set), and checking TIF_SVE alone is insufficient. + +Remove the broken support for the SMCCCv1.3 SVE saving hint. This is +effectively a revert of commits: + +* cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") +* a7c3acca5380 ("arm64: smccc: Save lr before calling __arm_smccc_sve_check()") + +... leaving behind the ARM_SMCCC_VERSION_1_3 and ARM_SMCCC_1_3_SVE_HINT +definitions, since these are simply definitions from the SMCCC +specification, and the latter is used in KVM via ARM_SMCCC_CALL_HINTS. + +If we want to bring this back in future, we'll probably want to handle +this logic in C where we can use all the usual FPSIMD/SVE/SME helper +functions, and that'll likely require some rework of the SMCCC code +and/or its callers. + +Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") +Signed-off-by: Mark Rutland +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: Marc Zyngier +Cc: Mark Brown +Cc: Will Deacon +Cc: stable@vger.kernel.org +Reviewed-by: Mark Brown +Link: https://lore.kernel.org/r/20241106160448.2712997-1-mark.rutland@arm.com +Signed-off-by: Will Deacon +[ Mark: fix conflicts in ] +Signed-off-by: Mark Rutland +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/smccc-call.S | 35 +++-------------------------------- + drivers/firmware/smccc/smccc.c | 4 ---- + include/linux/arm-smccc.h | 30 ++---------------------------- + 3 files changed, 5 insertions(+), 64 deletions(-) + +--- a/arch/arm64/kernel/smccc-call.S ++++ b/arch/arm64/kernel/smccc-call.S +@@ -7,48 +7,19 @@ + + #include + #include +-#include +- +-/* +- * If we have SMCCC v1.3 and (as is likely) no SVE state in +- * the registers then set the SMCCC hint bit to say there's no +- * need to preserve it. Do this by directly adjusting the SMCCC +- * function value which is already stored in x0 ready to be called. +- */ +-SYM_FUNC_START(__arm_smccc_sve_check) +- +- ldr_l x16, smccc_has_sve_hint +- cbz x16, 2f +- +- get_current_task x16 +- ldr x16, [x16, #TSK_TI_FLAGS] +- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? +- tbnz x16, #TIF_SVE, 2f // Does that state include SVE? +- +-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT +- +-2: ret +-SYM_FUNC_END(__arm_smccc_sve_check) +-EXPORT_SYMBOL(__arm_smccc_sve_check) + + .macro SMCCC instr +- stp x29, x30, [sp, #-16]! +- mov x29, sp +-alternative_if ARM64_SVE +- bl __arm_smccc_sve_check +-alternative_else_nop_endif + \instr #0 +- ldr x4, [sp, #16] ++ ldr x4, [sp] + stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] + stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] +- ldr x4, [sp, #24] ++ ldr x4, [sp, #8] + cbz x4, 1f /* no quirk structure */ + ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] + cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 + b.ne 1f + str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] +-1: ldp x29, x30, [sp], #16 +- ret ++1: ret + .endm + + /* +--- a/drivers/firmware/smccc/smccc.c ++++ b/drivers/firmware/smccc/smccc.c +@@ -16,7 +16,6 @@ static u32 smccc_version = ARM_SMCCC_VER + static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE; + + bool __ro_after_init smccc_trng_available = false; +-u64 __ro_after_init smccc_has_sve_hint = false; + s32 __ro_after_init smccc_soc_id_version = SMCCC_RET_NOT_SUPPORTED; + s32 __ro_after_init smccc_soc_id_revision = SMCCC_RET_NOT_SUPPORTED; + +@@ -28,9 +27,6 @@ void __init arm_smccc_version_init(u32 v + smccc_conduit = conduit; + + smccc_trng_available = smccc_probe_trng(); +- if (IS_ENABLED(CONFIG_ARM64_SVE) && +- smccc_version >= ARM_SMCCC_VERSION_1_3) +- smccc_has_sve_hint = true; + + if ((smccc_version >= ARM_SMCCC_VERSION_1_2) && + (smccc_conduit != SMCCC_CONDUIT_NONE)) { +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -224,8 +224,6 @@ u32 arm_smccc_get_version(void); + + void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); + +-extern u64 smccc_has_sve_hint; +- + /** + * arm_smccc_get_soc_id_version() + * +@@ -324,15 +322,6 @@ struct arm_smccc_quirk { + }; + + /** +- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls +- * +- * Sets the SMCCC hint bit to indicate if there is live state in the SVE +- * registers, this modifies x0 in place and should never be called from C +- * code. +- */ +-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); +- +-/** + * __arm_smccc_smc() - make SMC calls + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 +@@ -399,20 +388,6 @@ asmlinkage void __arm_smccc_hvc(unsigned + + #endif + +-/* nVHE hypervisor doesn't have a current thread so needs separate checks */ +-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__) +- +-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \ +- ARM64_SVE) +-#define smccc_sve_clobbers "x16", "x30", "cc", +- +-#else +- +-#define SMCCC_SVE_CHECK +-#define smccc_sve_clobbers +- +-#endif +- + #define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x + + #define __count_args(...) \ +@@ -480,7 +455,7 @@ asmlinkage void __arm_smccc_hvc(unsigned + + #define ___constraints(count) \ + : __constraint_read_ ## count \ +- : smccc_sve_clobbers "memory" ++ : "memory" + #define __constraints(count) ___constraints(count) + + /* +@@ -495,8 +470,7 @@ asmlinkage void __arm_smccc_hvc(unsigned + register unsigned long r2 asm("r2"); \ + register unsigned long r3 asm("r3"); \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ +- asm volatile(SMCCC_SVE_CHECK \ +- inst "\n" : \ ++ asm volatile(inst "\n" : \ + "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ + __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ diff --git a/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch b/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch new file mode 100644 index 00000000000..7e9ee3a0021 --- /dev/null +++ b/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch @@ -0,0 +1,91 @@ +From 751ecf6afd6568adc98f2a6052315552c0483d18 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Wed, 30 Oct 2024 20:23:50 +0000 +Subject: arm64/sve: Discard stale CPU state when handling SVE traps + +From: Mark Brown + +commit 751ecf6afd6568adc98f2a6052315552c0483d18 upstream. + +The logic for handling SVE traps manipulates saved FPSIMD/SVE state +incorrectly, and a race with preemption can result in a task having +TIF_SVE set and TIF_FOREIGN_FPSTATE clear even though the live CPU state +is stale (e.g. with SVE traps enabled). This has been observed to result +in warnings from do_sve_acc() where SVE traps are not expected while +TIF_SVE is set: + +| if (test_and_set_thread_flag(TIF_SVE)) +| WARN_ON(1); /* SVE access shouldn't have trapped */ + +Warnings of this form have been reported intermittently, e.g. + + https://lore.kernel.org/linux-arm-kernel/CA+G9fYtEGe_DhY2Ms7+L7NKsLYUomGsgqpdBj+QwDLeSg=JhGg@mail.gmail.com/ + https://lore.kernel.org/linux-arm-kernel/000000000000511e9a060ce5a45c@google.com/ + +The race can occur when the SVE trap handler is preempted before and +after manipulating the saved FPSIMD/SVE state, starting and ending on +the same CPU, e.g. + +| void do_sve_acc(unsigned long esr, struct pt_regs *regs) +| { +| // Trap on CPU 0 with TIF_SVE clear, SVE traps enabled +| // task->fpsimd_cpu is 0. +| // per_cpu_ptr(&fpsimd_last_state, 0) is task. +| +| ... +| +| // Preempted; migrated from CPU 0 to CPU 1. +| // TIF_FOREIGN_FPSTATE is set. +| +| get_cpu_fpsimd_context(); +| +| if (test_and_set_thread_flag(TIF_SVE)) +| WARN_ON(1); /* SVE access shouldn't have trapped */ +| +| sve_init_regs() { +| if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { +| ... +| } else { +| fpsimd_to_sve(current); +| current->thread.fp_type = FP_STATE_SVE; +| } +| } +| +| put_cpu_fpsimd_context(); +| +| // Preempted; migrated from CPU 1 to CPU 0. +| // task->fpsimd_cpu is still 0 +| // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then: +| // - Stale HW state is reused (with SVE traps enabled) +| // - TIF_FOREIGN_FPSTATE is cleared +| // - A return to userspace skips HW state restore +| } + +Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set +by calling fpsimd_flush_task_state() to detach from the saved CPU +state. This ensures that a subsequent context switch will not reuse the +stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the +new state to be reloaded from memory prior to a return to userspace. + +Fixes: cccb78ce89c4 ("arm64/sve: Rework SVE access trap to convert state in registers") +Reported-by: Mark Rutland +Signed-off-by: Mark Brown +Cc: stable@vger.kernel.org +Reviewed-by: Mark Rutland +Link: https://lore.kernel.org/r/20241030-arm64-fpsimd-foreign-flush-v1-1-bd7bd66905a2@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/fpsimd.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -1383,6 +1383,7 @@ static void sve_init_regs(void) + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); ++ fpsimd_flush_task_state(current); + } + } + diff --git a/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch b/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch new file mode 100644 index 00000000000..e28a044d7f3 --- /dev/null +++ b/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch @@ -0,0 +1,48 @@ +From a0aae96be5ffc5b456ca07bfe1385b721c20e184 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= + +Date: Thu, 10 Oct 2024 13:20:08 +0200 +Subject: ASoC: Intel: avs: Fix return status of avs_pcm_hw_constraints_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Amadeusz Sławiński + +commit a0aae96be5ffc5b456ca07bfe1385b721c20e184 upstream. + +Check for return code from avs_pcm_hw_constraints_init() in +avs_dai_fe_startup() only checks if value is different from 0. Currently +function can return positive value, change it to return 0 on success. + +Reviewed-by: Cezary Rojewski +Signed-off-by: Amadeusz Sławiński +I've observed KASAN on our setups and while patch itself is correct +regardless. Problem seems to be caused by recent changes to rates, as +this started happening after recent patchsets and doesn't reproduce with +those reverted +https://lore.kernel.org/linux-sound/20240905-alsa-12-24-128-v1-0-8371948d3921@baylibre.com/ +https://lore.kernel.org/linux-sound/20240911135756.24434-1-tiwai@suse.de/ +I've tested using Mark tree, where they are both applied and for some +reason snd_pcm_hw_constraint_minmax() started returning positive value, +while previously it returned 0. I'm bit worried if it signals some +potential deeper problem regarding constraints with above changes. + +Link: https://patch.msgid.link/20241010112008.545526-1-amadeuszx.slawinski@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/intel/avs/pcm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/soc/intel/avs/pcm.c ++++ b/sound/soc/intel/avs/pcm.c +@@ -540,7 +540,7 @@ static int avs_dai_fe_hw_free(struct snd + if (ret < 0) + dev_dbg(dai->dev, "Failed to free pages!\n"); + +- return ret; ++ return 0; + } + + static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) diff --git a/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch b/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch new file mode 100644 index 00000000000..e5ae369670b --- /dev/null +++ b/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch @@ -0,0 +1,69 @@ +From daniel@iogearbox.net Fri Dec 6 16:34:43 2024 +From: Daniel Borkmann +Date: Fri, 6 Dec 2024 16:34:02 +0100 +Subject: bpf: Fix dev's rx stats for bpf_redirect_peer traffic +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Youlun Zhang , Nikolay Aleksandrov , Martin KaFai Lau +Message-ID: <20241206153403.273068-2-daniel@iogearbox.net> + +From: Peilin Ye + +[ Upstream commit 024ee930cb3c9ae49e4266aee89cfde0ebb407e1 ] + +Traffic redirected by bpf_redirect_peer() (used by recent CNIs like Cilium) +is not accounted for in the RX stats of supported devices (that is, veth +and netkit), confusing user space metrics collectors such as cAdvisor [0], +as reported by Youlun. + +Fix it by calling dev_sw_netstats_rx_add() in skb_do_redirect(), to update +RX traffic counters. Devices that support ndo_get_peer_dev _must_ use the +@tstats per-CPU counters (instead of @lstats, or @dstats). + +To make this more fool-proof, error out when ndo_get_peer_dev is set but +@tstats are not selected. + + [0] Specifically, the "container_network_receive_{byte,packet}s_total" + counters are affected. + +Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper") +Reported-by: Youlun Zhang +Signed-off-by: Peilin Ye +Co-developed-by: Daniel Borkmann +Signed-off-by: Daniel Borkmann +Reviewed-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20231114004220.6495-6-daniel@iogearbox.net +Signed-off-by: Martin KaFai Lau +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 8 ++++++++ + net/core/filter.c | 1 + + 2 files changed, 9 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9995,6 +9995,14 @@ static int netdev_do_alloc_pcpu_stats(st + { + void __percpu *v; + ++ /* Drivers implementing ndo_get_peer_dev must support tstat ++ * accounting, so that skb_do_redirect() can bump the dev's ++ * RX stats upon network namespace switch. ++ */ ++ if (dev->netdev_ops->ndo_get_peer_dev && ++ dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS) ++ return -EOPNOTSUPP; ++ + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return 0; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2491,6 +2491,7 @@ int skb_do_redirect(struct sk_buff *skb) + net_eq(net, dev_net(dev)))) + goto out_drop; + skb->dev = dev; ++ dev_sw_netstats_rx_add(dev, skb->len); + return -EAGAIN; + } + return flags & BPF_F_NEIGH ? diff --git a/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch b/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch new file mode 100644 index 00000000000..9382c9cc531 --- /dev/null +++ b/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch @@ -0,0 +1,235 @@ +From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Fri, 13 Sep 2024 21:17:48 +0200 +Subject: bpf: Fix helper writes to read-only maps + +From: Daniel Borkmann + +commit 32556ce93bc45c730829083cb60f95a2728ea48b upstream. + +Lonial found an issue that despite user- and BPF-side frozen BPF map +(like in case of .rodata), it was still possible to write into it from +a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT} +as arguments. + +In check_func_arg() when the argument is as mentioned, the meta->raw_mode +is never set. Later, check_helper_mem_access(), under the case of +PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the +subsequent call to check_map_access_type() and given the BPF map is +read-only it succeeds. + +The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT +when results are written into them as opposed to read out of them. The +latter indicates that it's okay to pass a pointer to uninitialized memory +as the memory is written to anyway. + +However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM +just with additional alignment requirement. So it is better to just get +rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the +fixed size memory types. For this, add MEM_ALIGNED to additionally ensure +alignment given these helpers write directly into the args via * = val. +The .arg*_size has been initialized reflecting the actual sizeof(*). + +MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated +argument types, since in !MEM_FIXED_SIZE cases the verifier does not know +the buffer size a priori and therefore cannot blindly write * = val. + +Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types") +Reported-by: Lonial Con +Signed-off-by: Daniel Borkmann +Acked-by: Andrii Nakryiko +Acked-by: Shung-Hsi Yu +Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net +Signed-off-by: Alexei Starovoitov +[ Resolve merge conflict in include/linux/bpf.h and merge conflict in + kernel/bpf/verifier.c.] +Signed-off-by: Bin Lan +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf.h | 7 +++++-- + kernel/bpf/helpers.c | 6 ++++-- + kernel/bpf/syscall.c | 3 ++- + kernel/bpf/verifier.c | 41 +++++------------------------------------ + kernel/trace/bpf_trace.c | 6 ++++-- + net/core/filter.c | 6 ++++-- + 6 files changed, 24 insertions(+), 45 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -475,6 +475,11 @@ enum bpf_type_flag { + /* Size is known at compile time. */ + MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + ++ /* Memory must be aligned on some architectures, used in combination with ++ * MEM_FIXED_SIZE. ++ */ ++ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), ++ + __BPF_TYPE_FLAG_MAX, + __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, + }; +@@ -510,8 +515,6 @@ enum bpf_arg_type { + ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ +- ARG_PTR_TO_INT, /* pointer to int */ +- ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ + ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ + ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -531,7 +531,8 @@ const struct bpf_func_proto bpf_strtol_p + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_LONG, ++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg4_size = sizeof(s64), + }; + + BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, +@@ -560,7 +561,8 @@ const struct bpf_func_proto bpf_strtoul_ + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_LONG, ++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg4_size = sizeof(u64), + }; + + BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -5260,7 +5260,8 @@ static const struct bpf_func_proto bpf_k + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_LONG, ++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg4_size = sizeof(u64), + }; + + static const struct bpf_func_proto * +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5818,16 +5818,6 @@ static bool arg_type_is_dynptr(enum bpf_ + return base_type(type) == ARG_PTR_TO_DYNPTR; + } + +-static int int_ptr_type_to_size(enum bpf_arg_type type) +-{ +- if (type == ARG_PTR_TO_INT) +- return sizeof(u32); +- else if (type == ARG_PTR_TO_LONG) +- return sizeof(u64); +- +- return -EINVAL; +-} +- + static int resolve_map_arg_type(struct bpf_verifier_env *env, + const struct bpf_call_arg_meta *meta, + enum bpf_arg_type *arg_type) +@@ -5908,16 +5898,6 @@ static const struct bpf_reg_types mem_ty + }, + }; + +-static const struct bpf_reg_types int_ptr_types = { +- .types = { +- PTR_TO_STACK, +- PTR_TO_PACKET, +- PTR_TO_PACKET_META, +- PTR_TO_MAP_KEY, +- PTR_TO_MAP_VALUE, +- }, +-}; +- + static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; + static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; + static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; +@@ -5955,8 +5935,6 @@ static const struct bpf_reg_types *compa + [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, + [ARG_PTR_TO_MEM] = &mem_types, + [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, +- [ARG_PTR_TO_INT] = &int_ptr_types, +- [ARG_PTR_TO_LONG] = &int_ptr_types, + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK] = &stack_ptr_types, +@@ -6303,9 +6281,11 @@ skip_type_check: + */ + meta->raw_mode = arg_type & MEM_UNINIT; + if (arg_type & MEM_FIXED_SIZE) { +- err = check_helper_mem_access(env, regno, +- fn->arg_size[arg], false, +- meta); ++ err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta); ++ if (err) ++ return err; ++ if (arg_type & MEM_ALIGNED) ++ err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); + } + break; + case ARG_CONST_SIZE: +@@ -6373,17 +6353,6 @@ skip_type_check: + if (err) + return err; + break; +- case ARG_PTR_TO_INT: +- case ARG_PTR_TO_LONG: +- { +- int size = int_ptr_type_to_size(arg_type); +- +- err = check_helper_mem_access(env, regno, size, false, meta); +- if (err) +- return err; +- err = check_ptr_alignment(env, reg, 0, size, true); +- break; +- } + case ARG_PTR_TO_CONST_STR: + { + struct bpf_map *map = reg->map_ptr; +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -1192,7 +1192,8 @@ static const struct bpf_func_proto bpf_g + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_LONG, ++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg3_size = sizeof(u64), + }; + + BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) +@@ -1208,7 +1209,8 @@ static const struct bpf_func_proto bpf_g + .func = get_func_ret, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_LONG, ++ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg2_size = sizeof(u64), + }; + + BPF_CALL_1(get_func_arg_cnt, void *, ctx) +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -6233,7 +6233,8 @@ static const struct bpf_func_proto bpf_s + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_INT, ++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg3_size = sizeof(u32), + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, + }; +@@ -6244,7 +6245,8 @@ static const struct bpf_func_proto bpf_x + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_INT, ++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, ++ .arg3_size = sizeof(u32), + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, + }; diff --git a/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch b/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch new file mode 100644 index 00000000000..f83bf01c217 --- /dev/null +++ b/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch @@ -0,0 +1,116 @@ +From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Mon, 25 Nov 2024 13:59:09 -0500 +Subject: drm/amdgpu: rework resume handling for display (v2) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher + +commit 73dae652dcac776296890da215ee7dec357a1032 upstream. + +Split resume into a 3rd step to handle displays when DCC is +enabled on DCN 4.0.1. Move display after the buffer funcs +have been re-enabled so that the GPU will do the move and +properly set the DCC metadata for DCN. + +v2: fix fence irq resume ordering + +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org # 6.11.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -3242,7 +3242,7 @@ static int amdgpu_device_ip_resume_phase + * + * @adev: amdgpu_device pointer + * +- * First resume function for hardware IPs. The list of all the hardware ++ * Second resume function for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the resume callbacks are run for + * all blocks except COMMON, GMC, and IH. resume puts the hardware into a + * functional state after a suspend and updates the software state as +@@ -3260,6 +3260,7 @@ static int amdgpu_device_ip_resume_phase + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || ++ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) + continue; + r = adev->ip_blocks[i].version->funcs->resume(adev); +@@ -3284,6 +3285,36 @@ static int amdgpu_device_ip_resume_phase + } + + /** ++ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs ++ * ++ * @adev: amdgpu_device pointer ++ * ++ * Third resume function for hardware IPs. The list of all the hardware ++ * IPs that make up the asic is walked and the resume callbacks are run for ++ * all DCE. resume puts the hardware into a functional state after a suspend ++ * and updates the software state as necessary. This function is also used ++ * for restoring the GPU after a GPU reset. ++ * ++ * Returns 0 on success, negative error code on failure. ++ */ ++static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) ++{ ++ int i, r; ++ ++ for (i = 0; i < adev->num_ip_blocks; i++) { ++ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) ++ continue; ++ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { ++ r = adev->ip_blocks[i].version->funcs->resume(adev); ++ if (r) ++ return r; ++ } ++ } ++ ++ return 0; ++} ++ ++/** + * amdgpu_device_ip_resume - run resume for hardware IPs + * + * @adev: amdgpu_device pointer +@@ -3313,6 +3344,13 @@ static int amdgpu_device_ip_resume(struc + + r = amdgpu_device_ip_resume_phase2(adev); + ++ if (r) ++ return r; ++ ++ amdgpu_fence_driver_hw_init(adev); ++ ++ r = amdgpu_device_ip_resume_phase3(adev); ++ + return r; + } + +@@ -4311,7 +4349,6 @@ int amdgpu_device_resume(struct drm_devi + dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); + return r; + } +- amdgpu_fence_driver_hw_init(adev); + + r = amdgpu_device_ip_late_init(adev); + if (r) +@@ -5065,6 +5102,10 @@ int amdgpu_do_asic_reset(struct list_hea + if (r) + goto out; + ++ r = amdgpu_device_ip_resume_phase3(tmp_adev); ++ if (r) ++ goto out; ++ + if (vram_lost) + amdgpu_device_fill_reset_magic(tmp_adev); + diff --git a/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch b/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch new file mode 100644 index 00000000000..31345343602 --- /dev/null +++ b/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch @@ -0,0 +1,153 @@ +From stable+bounces-100113-greg=kroah.com@vger.kernel.org Mon Dec 9 10:49:51 2024 +From: Ajay Kaher +Date: Mon, 9 Dec 2024 09:49:03 +0000 +Subject: drm/ttm: Make sure the mapped tt pages are decrypted when needed +To: stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Sasha Levin , Ye Li +Message-ID: <20241209094904.2547579-2-ajay.kaher@broadcom.com> + +From: Zack Rusin + +commit 71ce046327cfd3aef3f93d1c44e091395eb03f8f upstream. + +Some drivers require the mapped tt pages to be decrypted. In an ideal +world this would have been handled by the dma layer, but the TTM page +fault handling would have to be rewritten to able to do that. + +A side-effect of the TTM page fault handling is using a dma allocation +per order (via ttm_pool_alloc_page) which makes it impossible to just +trivially use dma_mmap_attrs. As a result ttm has to be very careful +about trying to make its pgprot for the mapped tt pages match what +the dma layer thinks it is. At the ttm layer it's possible to +deduce the requirement to have tt pages decrypted by checking +whether coherent dma allocations have been requested and the system +is running with confidential computing technologies. + +This approach isn't ideal but keeping TTM matching DMAs expectations +for the page properties is in general fragile, unfortunately proper +fix would require a rewrite of TTM's page fault handling. + +Fixes vmwgfx with SEV enabled. + +v2: Explicitly include cc_platform.h +v3: Use CC_ATTR_GUEST_MEM_ENCRYPT instead of CC_ATTR_MEM_ENCRYPT to +limit the scope to guests and log when memory decryption is enabled. + +Signed-off-by: Zack Rusin +Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem") +Reviewed-by: Thomas Hellström +Acked-by: Christian König +Cc: Huang Rui +Cc: dri-devel@lists.freedesktop.org +Cc: linux-kernel@vger.kernel.org +Cc: # v5.14+ +Link: https://patchwork.freedesktop.org/patch/msgid/20230926040359.3040017-1-zack@kde.org +Signed-off-by: Sasha Levin +Signed-off-by: Ye Li +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/ttm/ttm_bo_util.c | 13 +++++++++++-- + drivers/gpu/drm/ttm/ttm_tt.c | 12 ++++++++++++ + include/drm/ttm/ttm_tt.h | 7 +++++++ + 3 files changed, 30 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/ttm/ttm_bo_util.c ++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c +@@ -274,7 +274,13 @@ pgprot_t ttm_io_prot(struct ttm_buffer_o + enum ttm_caching caching; + + man = ttm_manager_type(bo->bdev, res->mem_type); +- caching = man->use_tt ? bo->ttm->caching : res->bus.caching; ++ if (man->use_tt) { ++ caching = bo->ttm->caching; ++ if (bo->ttm->page_flags & TTM_TT_FLAG_DECRYPTED) ++ tmp = pgprot_decrypted(tmp); ++ } else { ++ caching = res->bus.caching; ++ } + + return ttm_prot_from_caching(caching, tmp); + } +@@ -317,6 +323,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu + .no_wait_gpu = false + }; + struct ttm_tt *ttm = bo->ttm; ++ struct ttm_resource_manager *man = ++ ttm_manager_type(bo->bdev, bo->resource->mem_type); + pgprot_t prot; + int ret; + +@@ -326,7 +334,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu + if (ret) + return ret; + +- if (num_pages == 1 && ttm->caching == ttm_cached) { ++ if (num_pages == 1 && ttm->caching == ttm_cached && ++ !(man->use_tt && (ttm->page_flags & TTM_TT_FLAG_DECRYPTED))) { + /* + * We're mapping a single page, and the desired + * page protection is consistent with the bo. +--- a/drivers/gpu/drm/ttm/ttm_tt.c ++++ b/drivers/gpu/drm/ttm/ttm_tt.c +@@ -31,11 +31,13 @@ + + #define pr_fmt(fmt) "[TTM] " fmt + ++#include + #include + #include + #include + #include + #include ++#include + #include + + #include "ttm_module.h" +@@ -59,6 +61,7 @@ static atomic_long_t ttm_dma32_pages_all + int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) + { + struct ttm_device *bdev = bo->bdev; ++ struct drm_device *ddev = bo->base.dev; + uint32_t page_flags = 0; + + dma_resv_assert_held(bo->base.resv); +@@ -80,6 +83,15 @@ int ttm_tt_create(struct ttm_buffer_obje + pr_err("Illegal buffer object type\n"); + return -EINVAL; + } ++ /* ++ * When using dma_alloc_coherent with memory encryption the ++ * mapped TT pages need to be decrypted or otherwise the drivers ++ * will end up sending encrypted mem to the gpu. ++ */ ++ if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { ++ page_flags |= TTM_TT_FLAG_DECRYPTED; ++ drm_info(ddev, "TT memory decryption enabled."); ++ } + + bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags); + if (unlikely(bo->ttm == NULL)) +--- a/include/drm/ttm/ttm_tt.h ++++ b/include/drm/ttm/ttm_tt.h +@@ -79,6 +79,12 @@ struct ttm_tt { + * page_flags = TTM_TT_FLAG_EXTERNAL | + * TTM_TT_FLAG_EXTERNAL_MAPPABLE; + * ++ * TTM_TT_FLAG_DECRYPTED: The mapped ttm pages should be marked as ++ * not encrypted. The framework will try to match what the dma layer ++ * is doing, but note that it is a little fragile because ttm page ++ * fault handling abuses the DMA api a bit and dma_map_attrs can't be ++ * used to assure pgprot always matches. ++ * + * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is + * set by TTM after ttm_tt_populate() has successfully returned, and is + * then unset when TTM calls ttm_tt_unpopulate(). +@@ -87,6 +93,7 @@ struct ttm_tt { + #define TTM_TT_FLAG_ZERO_ALLOC (1 << 1) + #define TTM_TT_FLAG_EXTERNAL (1 << 2) + #define TTM_TT_FLAG_EXTERNAL_MAPPABLE (1 << 3) ++#define TTM_TT_FLAG_DECRYPTED (1 << 4) + + #define TTM_TT_FLAG_PRIV_POPULATED (1U << 31) + uint32_t page_flags; diff --git a/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch b/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch new file mode 100644 index 00000000000..e6d5e0f958b --- /dev/null +++ b/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch @@ -0,0 +1,44 @@ +From stable+bounces-100114-greg=kroah.com@vger.kernel.org Mon Dec 9 10:50:05 2024 +From: Ajay Kaher +Date: Mon, 9 Dec 2024 09:49:04 +0000 +Subject: drm/ttm: Print the memory decryption status just once +To: stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Ye Li +Message-ID: <20241209094904.2547579-3-ajay.kaher@broadcom.com> + +From: Zack Rusin + +commit 27906e5d78248b19bcdfdae72049338c828897bb upstream. + +Stop printing the TT memory decryption status info each time tt is created +and instead print it just once. + +Reduces the spam in the system logs when running guests with SEV enabled. + +Signed-off-by: Zack Rusin +Fixes: 71ce046327cf ("drm/ttm: Make sure the mapped tt pages are decrypted when needed") +Reviewed-by: Christian König +Cc: Thomas Hellström +Cc: dri-devel@lists.freedesktop.org +Cc: linux-kernel@vger.kernel.org +Cc: # v5.14+ +Link: https://patchwork.freedesktop.org/patch/msgid/20240408155605.1398631-1-zack.rusin@broadcom.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ye Li +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/ttm/ttm_tt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/ttm/ttm_tt.c ++++ b/drivers/gpu/drm/ttm/ttm_tt.c +@@ -90,7 +90,7 @@ int ttm_tt_create(struct ttm_buffer_obje + */ + if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + page_flags |= TTM_TT_FLAG_DECRYPTED; +- drm_info(ddev, "TT memory decryption enabled."); ++ drm_info_once(ddev, "TT memory decryption enabled."); + } + + bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags); diff --git a/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch b/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch new file mode 100644 index 00000000000..be2ff160e34 --- /dev/null +++ b/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch @@ -0,0 +1,78 @@ +From 278a370c1766060d2144d6cf0b06c101e1043b6d Mon Sep 17 00:00:00 2001 +From: Ziwei Xiao +Date: Mon, 13 Nov 2023 16:41:44 -0800 +Subject: gve: Fixes for napi_poll when budget is 0 + +From: Ziwei Xiao + +commit 278a370c1766060d2144d6cf0b06c101e1043b6d upstream. + +Netpoll will explicilty pass the polling call with a budget of 0 to +indicate it's clearing the Tx path only. For the gve_rx_poll and +gve_xdp_poll, they were mistakenly taking the 0 budget as the indication +to do all the work. Add check to avoid the rx path and xdp path being +called when budget is 0. And also avoid napi_complete_done being called +when budget is 0 for netpoll. + +Fixes: f5cedc84a30d ("gve: Add transmit and receive support") +Signed-off-by: Ziwei Xiao +Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com +Signed-off-by: Jakub Kicinski +Reviewed-by: Praveen Kaligineedi +Signed-off-by: Praveen Kaligineedi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/google/gve/gve_main.c | 7 +++++++ + drivers/net/ethernet/google/gve/gve_rx.c | 4 ---- + drivers/net/ethernet/google/gve/gve_tx.c | 4 ---- + 3 files changed, 7 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/google/gve/gve_main.c ++++ b/drivers/net/ethernet/google/gve/gve_main.c +@@ -202,6 +202,10 @@ static int gve_napi_poll(struct napi_str + + if (block->tx) + reschedule |= gve_tx_poll(block, budget); ++ ++ if (!budget) ++ return 0; ++ + if (block->rx) { + work_done = gve_rx_poll(block, budget); + reschedule |= work_done == budget; +@@ -242,6 +246,9 @@ static int gve_napi_poll_dqo(struct napi + if (block->tx) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + ++ if (!budget) ++ return 0; ++ + if (block->rx) { + work_done = gve_rx_poll_dqo(block, budget); + reschedule |= work_done == budget; +--- a/drivers/net/ethernet/google/gve/gve_rx.c ++++ b/drivers/net/ethernet/google/gve/gve_rx.c +@@ -778,10 +778,6 @@ int gve_rx_poll(struct gve_notify_block + + feat = block->napi.dev->features; + +- /* If budget is 0, do all the work */ +- if (budget == 0) +- budget = INT_MAX; +- + if (budget > 0) + work_done = gve_clean_rx_done(rx, budget, feat); + +--- a/drivers/net/ethernet/google/gve/gve_tx.c ++++ b/drivers/net/ethernet/google/gve/gve_tx.c +@@ -725,10 +725,6 @@ bool gve_tx_poll(struct gve_notify_block + u32 nic_done; + u32 to_do; + +- /* If budget is 0, do all the work */ +- if (budget == 0) +- budget = INT_MAX; +- + /* In TX path, it may try to clean completed pkts in order to xmit, + * to avoid cleaning conflict, use spin_lock(), it yields better + * concurrency between xmit/clean than netif's lock. diff --git a/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch b/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch new file mode 100644 index 00000000000..7d001b5896e --- /dev/null +++ b/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch @@ -0,0 +1,79 @@ +From ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 Mon Sep 17 00:00:00 2001 +From: Shu Han +Date: Tue, 17 Sep 2024 17:41:04 +0800 +Subject: mm: call the security_mmap_file() LSM hook in remap_file_pages() + +From: Shu Han + +commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 upstream. + +The remap_file_pages syscall handler calls do_mmap() directly, which +doesn't contain the LSM security check. And if the process has called +personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for +RW pages, this will actually result in remapping the pages to RWX, +bypassing a W^X policy enforced by SELinux. + +So we should check prot by security_mmap_file LSM hook in the +remap_file_pages syscall handler before do_mmap() is called. Otherwise, it +potentially permits an attacker to bypass a W^X policy enforced by +SELinux. + +The bypass is similar to CVE-2016-10044, which bypass the same thing via +AIO and can be found in [1]. + +The PoC: + +$ cat > test.c + +int main(void) { + size_t pagesz = sysconf(_SC_PAGE_SIZE); + int mfd = syscall(SYS_memfd_create, "test", 0); + const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE, + MAP_SHARED, mfd, 0); + unsigned int old = syscall(SYS_personality, 0xffffffff); + syscall(SYS_personality, READ_IMPLIES_EXEC | old); + syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0); + syscall(SYS_personality, old); + // show the RWX page exists even if W^X policy is enforced + int fd = open("/proc/self/maps", O_RDONLY); + unsigned char buf2[1024]; + while (1) { + int ret = read(fd, buf2, 1024); + if (ret <= 0) break; + write(1, buf2, ret); + } + close(fd); +} + +$ gcc test.c -o test +$ ./test | grep rwx +7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted) + +Link: https://project-zero.issues.chromium.org/issues/42452389 [1] +Cc: stable@vger.kernel.org +Signed-off-by: Shu Han +Acked-by: Stephen Smalley +[PM: subject line tweaks] +Signed-off-by: Paul Moore +[ Resolve merge conflict in mm/mmap.c. ] +Signed-off-by: Bin Lan +Signed-off-by: Greg Kroah-Hartman +--- + mm/mmap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -3021,8 +3021,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsign + flags |= MAP_LOCKED; + + file = get_file(vma->vm_file); ++ ret = security_mmap_file(vma->vm_file, prot, flags); ++ if (ret) ++ goto out_fput; + ret = do_mmap(vma->vm_file, start, size, + prot, flags, pgoff, &populate, NULL); ++out_fput: + fput(file); + out: + mmap_write_unlock(mm); diff --git a/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch b/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch new file mode 100644 index 00000000000..cfe6e2f9ff6 --- /dev/null +++ b/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch @@ -0,0 +1,295 @@ +From daniel@iogearbox.net Fri Dec 6 16:34:37 2024 +From: Daniel Borkmann +Date: Fri, 6 Dec 2024 16:34:01 +0100 +Subject: net: Move {l,t,d}stats allocation to core and convert veth & vrf +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov , David Ahern , Martin KaFai Lau +Message-ID: <20241206153403.273068-1-daniel@iogearbox.net> + +From: Daniel Borkmann + +[ Upstream commit 34d21de99cea9cb17967874313e5b0262527833c ] + +Move {l,t,d}stats allocation to the core and let netdevs pick the stats +type they need. That way the driver doesn't have to bother with error +handling (allocation failure checking, making sure free happens in the +right spot, etc) - all happening in the core. + +Co-developed-by: Jakub Kicinski +Signed-off-by: Jakub Kicinski +Signed-off-by: Daniel Borkmann +Reviewed-by: Nikolay Aleksandrov +Cc: David Ahern +Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net +Signed-off-by: Martin KaFai Lau +Stable-dep-of: 024ee930cb3c ("bpf: Fix dev's rx stats for bpf_redirect_peer traffic") +[ Note: Simplified vrf bits to reduce patch given unrelated to the fix ] +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/veth.c | 16 +------------ + drivers/net/vrf.c | 24 ++++++-------------- + include/linux/netdevice.h | 30 ++++++++++++++++++++++---- + net/core/dev.c | 53 +++++++++++++++++++++++++++++++++++++++++++--- + 4 files changed, 85 insertions(+), 38 deletions(-) + +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1381,25 +1381,12 @@ static void veth_free_queues(struct net_ + + static int veth_dev_init(struct net_device *dev) + { +- int err; +- +- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); +- if (!dev->lstats) +- return -ENOMEM; +- +- err = veth_alloc_queues(dev); +- if (err) { +- free_percpu(dev->lstats); +- return err; +- } +- +- return 0; ++ return veth_alloc_queues(dev); + } + + static void veth_dev_free(struct net_device *dev) + { + veth_free_queues(dev); +- free_percpu(dev->lstats); + } + + #ifdef CONFIG_NET_POLL_CONTROLLER +@@ -1625,6 +1612,7 @@ static void veth_setup(struct net_device + NETIF_F_HW_VLAN_STAG_RX); + dev->needs_free_netdev = true; + dev->priv_destructor = veth_dev_free; ++ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; + dev->max_mtu = ETH_MAX_MTU; + + dev->hw_features = VETH_FEATURES; +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -121,22 +121,12 @@ struct net_vrf { + int ifindex; + }; + +-struct pcpu_dstats { +- u64 tx_pkts; +- u64 tx_bytes; +- u64 tx_drps; +- u64 rx_pkts; +- u64 rx_bytes; +- u64 rx_drps; +- struct u64_stats_sync syncp; +-}; +- + static void vrf_rx_stats(struct net_device *dev, int len) + { + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); +- dstats->rx_pkts++; ++ dstats->rx_packets++; + dstats->rx_bytes += len; + u64_stats_update_end(&dstats->syncp); + } +@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_d + do { + start = u64_stats_fetch_begin_irq(&dstats->syncp); + tbytes = dstats->tx_bytes; +- tpkts = dstats->tx_pkts; +- tdrops = dstats->tx_drps; ++ tpkts = dstats->tx_packets; ++ tdrops = dstats->tx_drops; + rbytes = dstats->rx_bytes; +- rpkts = dstats->rx_pkts; ++ rpkts = dstats->rx_packets; + } while (u64_stats_fetch_retry_irq(&dstats->syncp, start)); + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; +@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + vrf_rx_stats(dev, len); + else +- this_cpu_inc(dev->dstats->rx_drps); ++ this_cpu_inc(dev->dstats->rx_drops); + + return NETDEV_TX_OK; + } +@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_bu + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); +- dstats->tx_pkts++; ++ dstats->tx_packets++; + dstats->tx_bytes += len; + u64_stats_update_end(&dstats->syncp); + } else { +- this_cpu_inc(dev->dstats->tx_drps); ++ this_cpu_inc(dev->dstats->tx_drops); + } + + return ret; +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1747,6 +1747,13 @@ enum netdev_ml_priv_type { + ML_PRIV_CAN, + }; + ++enum netdev_stat_type { ++ NETDEV_PCPU_STAT_NONE, ++ NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ ++ NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ ++ NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ ++}; ++ + /** + * struct net_device - The DEVICE structure. + * +@@ -1941,10 +1948,14 @@ enum netdev_ml_priv_type { + * + * @ml_priv: Mid-layer private + * @ml_priv_type: Mid-layer private type +- * @lstats: Loopback statistics +- * @tstats: Tunnel statistics +- * @dstats: Dummy statistics +- * @vstats: Virtual ethernet statistics ++ * ++ * @pcpu_stat_type: Type of device statistics which the core should ++ * allocate/free: none, lstats, tstats, dstats. none ++ * means the driver is handling statistics allocation/ ++ * freeing internally. ++ * @lstats: Loopback statistics: packets, bytes ++ * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes ++ * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes + * + * @garp_port: GARP + * @mrp_port: MRP +@@ -2287,6 +2298,7 @@ struct net_device { + void *ml_priv; + enum netdev_ml_priv_type ml_priv_type; + ++ enum netdev_stat_type pcpu_stat_type:8; + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; +@@ -2670,6 +2682,16 @@ struct pcpu_sw_netstats { + struct u64_stats_sync syncp; + } __aligned(4 * sizeof(u64)); + ++struct pcpu_dstats { ++ u64 rx_packets; ++ u64 rx_bytes; ++ u64 rx_drops; ++ u64 tx_packets; ++ u64 tx_bytes; ++ u64 tx_drops; ++ struct u64_stats_sync syncp; ++} __aligned(8 * sizeof(u64)); ++ + struct pcpu_lstats { + u64_stats_t packets; + u64_stats_t bytes; +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9991,6 +9991,46 @@ void netif_tx_stop_all_queues(struct net + } + EXPORT_SYMBOL(netif_tx_stop_all_queues); + ++static int netdev_do_alloc_pcpu_stats(struct net_device *dev) ++{ ++ void __percpu *v; ++ ++ switch (dev->pcpu_stat_type) { ++ case NETDEV_PCPU_STAT_NONE: ++ return 0; ++ case NETDEV_PCPU_STAT_LSTATS: ++ v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); ++ break; ++ case NETDEV_PCPU_STAT_TSTATS: ++ v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ++ break; ++ case NETDEV_PCPU_STAT_DSTATS: ++ v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return v ? 0 : -ENOMEM; ++} ++ ++static void netdev_do_free_pcpu_stats(struct net_device *dev) ++{ ++ switch (dev->pcpu_stat_type) { ++ case NETDEV_PCPU_STAT_NONE: ++ return; ++ case NETDEV_PCPU_STAT_LSTATS: ++ free_percpu(dev->lstats); ++ break; ++ case NETDEV_PCPU_STAT_TSTATS: ++ free_percpu(dev->tstats); ++ break; ++ case NETDEV_PCPU_STAT_DSTATS: ++ free_percpu(dev->dstats); ++ break; ++ } ++} ++ + /** + * register_netdevice() - register a network device + * @dev: device to register +@@ -10051,11 +10091,15 @@ int register_netdevice(struct net_device + goto err_uninit; + } + ++ ret = netdev_do_alloc_pcpu_stats(dev); ++ if (ret) ++ goto err_uninit; ++ + ret = -EBUSY; + if (!dev->ifindex) + dev->ifindex = dev_new_index(net); + else if (__dev_get_by_index(net, dev->ifindex)) +- goto err_uninit; ++ goto err_free_pcpu; + + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). +@@ -10102,14 +10146,14 @@ int register_netdevice(struct net_device + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); + ret = notifier_to_errno(ret); + if (ret) +- goto err_uninit; ++ goto err_free_pcpu; + + ret = netdev_register_kobject(dev); + write_lock(&dev_base_lock); + dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED; + write_unlock(&dev_base_lock); + if (ret) +- goto err_uninit; ++ goto err_free_pcpu; + + __netdev_update_features(dev); + +@@ -10156,6 +10200,8 @@ int register_netdevice(struct net_device + out: + return ret; + ++err_free_pcpu: ++ netdev_do_free_pcpu_stats(dev); + err_uninit: + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); +@@ -10409,6 +10455,7 @@ void netdev_run_todo(void) + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); + ++ netdev_do_free_pcpu_stats(dev); + if (dev->priv_destructor) + dev->priv_destructor(dev); + if (dev->needs_free_netdev) diff --git a/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch b/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch new file mode 100644 index 00000000000..8fe8eb0061e --- /dev/null +++ b/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch @@ -0,0 +1,34 @@ +From zhangzekun11@huawei.com Thu Dec 12 12:45:53 2024 +From: Zhang Zekun +Date: Wed, 4 Dec 2024 16:27:52 +0800 +Subject: Revert "drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read()" +To: +Cc: , , , , , +Message-ID: <20241204082752.18498-1-zhangzekun11@huawei.com> + +From: Zhang Zekun + +This reverts commit 25d7e84343e1235b667cf5226c3934fdf36f0df6. + +The origin mainline patch fix a buffer overflow issue in +amdgpu_debugfs_gprwave_read(), but it has not been introduced in kernel +6.1 and older kernels. This patch add a check in a wrong function in the +same file. + +Signed-off-by: Zhang Zekun +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +@@ -419,7 +419,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_ + ssize_t result = 0; + int r; + +- if (size > 4096 || size & 0x3 || *pos & 0x3) ++ if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); diff --git a/queue-6.1/series b/queue-6.1/series index 5e37dda7d72..0f0bfe1d7a1 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -752,3 +752,16 @@ xhci-dbc-fix-stall-transfer-event-handling.patch mmc-mtk-sd-fix-error-handle-of-probe-function.patch drm-amd-display-check-bios-images-before-it-is-used.patch ocfs2-revert-ocfs2-fix-the-la-space-leak-when-unmounting-an-ocfs2-volume.patch +revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch +gve-fixes-for-napi_poll-when-budget-is-0.patch +arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch +arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch +asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch +mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch +bpf-fix-helper-writes-to-read-only-maps.patch +net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch +bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch +veth-use-tstats-per-cpu-traffic-counters.patch +drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch +drm-ttm-print-the-memory-decryption-status-just-once.patch +drm-amdgpu-rework-resume-handling-for-display-v2.patch diff --git a/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch b/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch new file mode 100644 index 00000000000..6c5371aab80 --- /dev/null +++ b/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch @@ -0,0 +1,114 @@ +From daniel@iogearbox.net Fri Dec 6 16:34:42 2024 +From: Daniel Borkmann +Date: Fri, 6 Dec 2024 16:34:03 +0100 +Subject: veth: Use tstats per-CPU traffic counters +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov , Martin KaFai Lau +Message-ID: <20241206153403.273068-3-daniel@iogearbox.net> + +From: Peilin Ye + +[ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ] + +Currently veth devices use the lstats per-CPU traffic counters, which only +cover TX traffic. veth_get_stats64() actually populates RX stats of a veth +device from its peer's TX counters, based on the assumption that a veth +device can _only_ receive packets from its peer, which is no longer true: + +For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF +helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in +a different netns), skipping veth's peer device. Unfortunately, this kind +of traffic isn't currently accounted for in veth's RX stats. + +In preparation for the fix, use tstats (instead of lstats) to maintain +both RX and TX counters for each veth device. We'll use RX counters for +bpf_redirect_peer() traffic, and keep using TX counters for the usual +"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by +_adding_ RX count to peer's TX count, in order to cover both kinds of +traffic. + +veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()") +for less confusion, but let's leave it to another patch to keep the fix +minimal. + +Signed-off-by: Peilin Ye +Co-developed-by: Daniel Borkmann +Signed-off-by: Daniel Borkmann +Reviewed-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net +Signed-off-by: Martin KaFai Lau +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/veth.c | 30 +++++++++++------------------- + 1 file changed, 11 insertions(+), 19 deletions(-) + +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -342,7 +342,7 @@ static netdev_tx_t veth_xmit(struct sk_b + skb_tx_timestamp(skb); + if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { + if (!use_napi) +- dev_lstats_add(dev, length); ++ dev_sw_netstats_tx_add(dev, 1, length); + } else { + drop: + atomic64_inc(&priv->dropped); +@@ -357,14 +357,6 @@ drop: + return ret; + } + +-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) +-{ +- struct veth_priv *priv = netdev_priv(dev); +- +- dev_lstats_read(dev, packets, bytes); +- return atomic64_read(&priv->dropped); +-} +- + static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) + { + struct veth_priv *priv = netdev_priv(dev); +@@ -402,24 +394,24 @@ static void veth_get_stats64(struct net_ + struct veth_priv *priv = netdev_priv(dev); + struct net_device *peer; + struct veth_stats rx; +- u64 packets, bytes; + +- tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); +- tot->tx_bytes = bytes; +- tot->tx_packets = packets; ++ tot->tx_dropped = atomic64_read(&priv->dropped); ++ dev_fetch_sw_netstats(tot, dev->tstats); + + veth_stats_rx(&rx, dev); + tot->tx_dropped += rx.xdp_tx_err; + tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; +- tot->rx_bytes = rx.xdp_bytes; +- tot->rx_packets = rx.xdp_packets; ++ tot->rx_bytes += rx.xdp_bytes; ++ tot->rx_packets += rx.xdp_packets; + + rcu_read_lock(); + peer = rcu_dereference(priv->peer); + if (peer) { +- veth_stats_tx(peer, &packets, &bytes); +- tot->rx_bytes += bytes; +- tot->rx_packets += packets; ++ struct rtnl_link_stats64 tot_peer = {}; ++ ++ dev_fetch_sw_netstats(&tot_peer, peer->tstats); ++ tot->rx_bytes += tot_peer.tx_bytes; ++ tot->rx_packets += tot_peer.tx_packets; + + veth_stats_rx(&rx, peer); + tot->tx_dropped += rx.peer_tq_xdp_xmit_err; +@@ -1612,7 +1604,7 @@ static void veth_setup(struct net_device + NETIF_F_HW_VLAN_STAG_RX); + dev->needs_free_netdev = true; + dev->priv_destructor = veth_dev_free; +- dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; ++ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + dev->max_mtu = ETH_MAX_MTU; + + dev->hw_features = VETH_FEATURES; -- 2.47.3