--- /dev/null
+From 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Wed, 6 Nov 2024 16:04:48 +0000
+Subject: arm64: smccc: Remove broken support for SMCCCv1.3 SVE discard hint
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 upstream.
+
+SMCCCv1.3 added a hint bit which callers can set in an SMCCC function ID
+(AKA "FID") to indicate that it is acceptable for the SMCCC
+implementation to discard SVE and/or SME state over a specific SMCCC
+call. The kernel support for using this hint is broken and SMCCC calls
+may clobber the SVE and/or SME state of arbitrary tasks, though FPSIMD
+state is unaffected.
+
+The kernel support is intended to use the hint when there is no SVE or
+SME state to save, and to do this it checks whether TIF_FOREIGN_FPSTATE
+is set or TIF_SVE is clear in assembly code:
+
+| ldr <flags>, [<current_task>, #TSK_TI_FLAGS]
+| tbnz <flags>, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
+| tbnz <flags>, #TIF_SVE, 2f // Does that state include SVE?
+|
+| 1: orr <fid>, <fid>, ARM_SMCCC_1_3_SVE_HINT
+| 2:
+| << SMCCC call using FID >>
+
+This is not safe as-is:
+
+(1) SMCCC calls can be made in a preemptible context and preemption can
+ result in TIF_FOREIGN_FPSTATE being set or cleared at arbitrary
+ points in time. Thus checking for TIF_FOREIGN_FPSTATE provides no
+ guarantee.
+
+(2) TIF_FOREIGN_FPSTATE only indicates that the live FP/SVE/SME state in
+ the CPU does not belong to the current task, and does not indicate
+ that clobbering this state is acceptable.
+
+ When the live CPU state is clobbered it is necessary to update
+ fpsimd_last_state.st to ensure that a subsequent context switch will
+ reload FP/SVE/SME state from memory rather than consuming the
+ clobbered state. This and the SMCCC call itself must happen in a
+ critical section with preemption disabled to avoid races.
+
+(3) Live SVE/SME state can exist with TIF_SVE clear (e.g. with only
+ TIF_SME set), and checking TIF_SVE alone is insufficient.
+
+Remove the broken support for the SMCCCv1.3 SVE saving hint. This is
+effectively a revert of commits:
+
+* cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
+* a7c3acca5380 ("arm64: smccc: Save lr before calling __arm_smccc_sve_check()")
+
+... leaving behind the ARM_SMCCC_VERSION_1_3 and ARM_SMCCC_1_3_SVE_HINT
+definitions, since these are simply definitions from the SMCCC
+specification, and the latter is used in KVM via ARM_SMCCC_CALL_HINTS.
+
+If we want to bring this back in future, we'll probably want to handle
+this logic in C where we can use all the usual FPSIMD/SVE/SME helper
+functions, and that'll likely require some rework of the SMCCC code
+and/or its callers.
+
+Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20241106160448.2712997-1-mark.rutland@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Mark: fix conflicts in <linux/arm-smccc.h> ]
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/smccc-call.S | 35 +++--------------------------------
+ drivers/firmware/smccc/smccc.c | 4 ----
+ include/linux/arm-smccc.h | 30 ++----------------------------
+ 3 files changed, 5 insertions(+), 64 deletions(-)
+
+--- a/arch/arm64/kernel/smccc-call.S
++++ b/arch/arm64/kernel/smccc-call.S
+@@ -7,48 +7,19 @@
+
+ #include <asm/asm-offsets.h>
+ #include <asm/assembler.h>
+-#include <asm/thread_info.h>
+-
+-/*
+- * If we have SMCCC v1.3 and (as is likely) no SVE state in
+- * the registers then set the SMCCC hint bit to say there's no
+- * need to preserve it. Do this by directly adjusting the SMCCC
+- * function value which is already stored in x0 ready to be called.
+- */
+-SYM_FUNC_START(__arm_smccc_sve_check)
+-
+- ldr_l x16, smccc_has_sve_hint
+- cbz x16, 2f
+-
+- get_current_task x16
+- ldr x16, [x16, #TSK_TI_FLAGS]
+- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
+- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
+-
+-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
+-
+-2: ret
+-SYM_FUNC_END(__arm_smccc_sve_check)
+-EXPORT_SYMBOL(__arm_smccc_sve_check)
+
+ .macro SMCCC instr
+- stp x29, x30, [sp, #-16]!
+- mov x29, sp
+-alternative_if ARM64_SVE
+- bl __arm_smccc_sve_check
+-alternative_else_nop_endif
+ \instr #0
+- ldr x4, [sp, #16]
++ ldr x4, [sp]
+ stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
+ stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
+- ldr x4, [sp, #24]
++ ldr x4, [sp, #8]
+ cbz x4, 1f /* no quirk structure */
+ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
+ cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
+ b.ne 1f
+ str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+-1: ldp x29, x30, [sp], #16
+- ret
++1: ret
+ .endm
+
+ /*
+--- a/drivers/firmware/smccc/smccc.c
++++ b/drivers/firmware/smccc/smccc.c
+@@ -16,7 +16,6 @@ static u32 smccc_version = ARM_SMCCC_VER
+ static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
+
+ bool __ro_after_init smccc_trng_available = false;
+-u64 __ro_after_init smccc_has_sve_hint = false;
+ s32 __ro_after_init smccc_soc_id_version = SMCCC_RET_NOT_SUPPORTED;
+ s32 __ro_after_init smccc_soc_id_revision = SMCCC_RET_NOT_SUPPORTED;
+
+@@ -28,9 +27,6 @@ void __init arm_smccc_version_init(u32 v
+ smccc_conduit = conduit;
+
+ smccc_trng_available = smccc_probe_trng();
+- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+- smccc_version >= ARM_SMCCC_VERSION_1_3)
+- smccc_has_sve_hint = true;
+
+ if ((smccc_version >= ARM_SMCCC_VERSION_1_2) &&
+ (smccc_conduit != SMCCC_CONDUIT_NONE)) {
+--- a/include/linux/arm-smccc.h
++++ b/include/linux/arm-smccc.h
+@@ -224,8 +224,6 @@ u32 arm_smccc_get_version(void);
+
+ void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
+
+-extern u64 smccc_has_sve_hint;
+-
+ /**
+ * arm_smccc_get_soc_id_version()
+ *
+@@ -324,15 +322,6 @@ struct arm_smccc_quirk {
+ };
+
+ /**
+- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
+- *
+- * Sets the SMCCC hint bit to indicate if there is live state in the SVE
+- * registers, this modifies x0 in place and should never be called from C
+- * code.
+- */
+-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
+-
+-/**
+ * __arm_smccc_smc() - make SMC calls
+ * @a0-a7: arguments passed in registers 0 to 7
+ * @res: result values from registers 0 to 3
+@@ -399,20 +388,6 @@ asmlinkage void __arm_smccc_hvc(unsigned
+
+ #endif
+
+-/* nVHE hypervisor doesn't have a current thread so needs separate checks */
+-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
+-
+-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \
+- ARM64_SVE)
+-#define smccc_sve_clobbers "x16", "x30", "cc",
+-
+-#else
+-
+-#define SMCCC_SVE_CHECK
+-#define smccc_sve_clobbers
+-
+-#endif
+-
+ #define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x
+
+ #define __count_args(...) \
+@@ -480,7 +455,7 @@ asmlinkage void __arm_smccc_hvc(unsigned
+
+ #define ___constraints(count) \
+ : __constraint_read_ ## count \
+- : smccc_sve_clobbers "memory"
++ : "memory"
+ #define __constraints(count) ___constraints(count)
+
+ /*
+@@ -495,8 +470,7 @@ asmlinkage void __arm_smccc_hvc(unsigned
+ register unsigned long r2 asm("r2"); \
+ register unsigned long r3 asm("r3"); \
+ __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
+- asm volatile(SMCCC_SVE_CHECK \
+- inst "\n" : \
++ asm volatile(inst "\n" : \
+ "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
+ __constraints(__count_args(__VA_ARGS__))); \
+ if (___res) \
--- /dev/null
+From 751ecf6afd6568adc98f2a6052315552c0483d18 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 30 Oct 2024 20:23:50 +0000
+Subject: arm64/sve: Discard stale CPU state when handling SVE traps
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 751ecf6afd6568adc98f2a6052315552c0483d18 upstream.
+
+The logic for handling SVE traps manipulates saved FPSIMD/SVE state
+incorrectly, and a race with preemption can result in a task having
+TIF_SVE set and TIF_FOREIGN_FPSTATE clear even though the live CPU state
+is stale (e.g. with SVE traps enabled). This has been observed to result
+in warnings from do_sve_acc() where SVE traps are not expected while
+TIF_SVE is set:
+
+| if (test_and_set_thread_flag(TIF_SVE))
+| WARN_ON(1); /* SVE access shouldn't have trapped */
+
+Warnings of this form have been reported intermittently, e.g.
+
+ https://lore.kernel.org/linux-arm-kernel/CA+G9fYtEGe_DhY2Ms7+L7NKsLYUomGsgqpdBj+QwDLeSg=JhGg@mail.gmail.com/
+ https://lore.kernel.org/linux-arm-kernel/000000000000511e9a060ce5a45c@google.com/
+
+The race can occur when the SVE trap handler is preempted before and
+after manipulating the saved FPSIMD/SVE state, starting and ending on
+the same CPU, e.g.
+
+| void do_sve_acc(unsigned long esr, struct pt_regs *regs)
+| {
+| // Trap on CPU 0 with TIF_SVE clear, SVE traps enabled
+| // task->fpsimd_cpu is 0.
+| // per_cpu_ptr(&fpsimd_last_state, 0) is task.
+|
+| ...
+|
+| // Preempted; migrated from CPU 0 to CPU 1.
+| // TIF_FOREIGN_FPSTATE is set.
+|
+| get_cpu_fpsimd_context();
+|
+| if (test_and_set_thread_flag(TIF_SVE))
+| WARN_ON(1); /* SVE access shouldn't have trapped */
+|
+| sve_init_regs() {
+| if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+| ...
+| } else {
+| fpsimd_to_sve(current);
+| current->thread.fp_type = FP_STATE_SVE;
+| }
+| }
+|
+| put_cpu_fpsimd_context();
+|
+| // Preempted; migrated from CPU 1 to CPU 0.
+| // task->fpsimd_cpu is still 0
+| // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then:
+| // - Stale HW state is reused (with SVE traps enabled)
+| // - TIF_FOREIGN_FPSTATE is cleared
+| // - A return to userspace skips HW state restore
+| }
+
+Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set
+by calling fpsimd_flush_task_state() to detach from the saved CPU
+state. This ensures that a subsequent context switch will not reuse the
+stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the
+new state to be reloaded from memory prior to a return to userspace.
+
+Fixes: cccb78ce89c4 ("arm64/sve: Rework SVE access trap to convert state in registers")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20241030-arm64-fpsimd-foreign-flush-v1-1-bd7bd66905a2@kernel.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -1383,6 +1383,7 @@ static void sve_init_regs(void)
+ fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_to_sve(current);
++ fpsimd_flush_task_state(current);
+ }
+ }
+
--- /dev/null
+From a0aae96be5ffc5b456ca07bfe1385b721c20e184 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
+ <amadeuszx.slawinski@linux.intel.com>
+Date: Thu, 10 Oct 2024 13:20:08 +0200
+Subject: ASoC: Intel: avs: Fix return status of avs_pcm_hw_constraints_init()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+
+commit a0aae96be5ffc5b456ca07bfe1385b721c20e184 upstream.
+
+Check for return code from avs_pcm_hw_constraints_init() in
+avs_dai_fe_startup() only checks if value is different from 0. Currently
+function can return positive value, change it to return 0 on success.
+
+Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+I've observed KASAN on our setups and while patch itself is correct
+regardless. Problem seems to be caused by recent changes to rates, as
+this started happening after recent patchsets and doesn't reproduce with
+those reverted
+https://lore.kernel.org/linux-sound/20240905-alsa-12-24-128-v1-0-8371948d3921@baylibre.com/
+https://lore.kernel.org/linux-sound/20240911135756.24434-1-tiwai@suse.de/
+I've tested using Mark tree, where they are both applied and for some
+reason snd_pcm_hw_constraint_minmax() started returning positive value,
+while previously it returned 0. I'm bit worried if it signals some
+potential deeper problem regarding constraints with above changes.
+
+Link: https://patch.msgid.link/20241010112008.545526-1-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/intel/avs/pcm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/soc/intel/avs/pcm.c
++++ b/sound/soc/intel/avs/pcm.c
+@@ -540,7 +540,7 @@ static int avs_dai_fe_hw_free(struct snd
+ if (ret < 0)
+ dev_dbg(dai->dev, "Failed to free pages!\n");
+
+- return ret;
++ return 0;
+ }
+
+ static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
--- /dev/null
+From daniel@iogearbox.net Fri Dec 6 16:34:43 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 6 Dec 2024 16:34:02 +0100
+Subject: bpf: Fix dev's rx stats for bpf_redirect_peer traffic
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Youlun Zhang <zhangyoulun@bytedance.com>, Nikolay Aleksandrov <razor@blackwall.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-2-daniel@iogearbox.net>
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 024ee930cb3c9ae49e4266aee89cfde0ebb407e1 ]
+
+Traffic redirected by bpf_redirect_peer() (used by recent CNIs like Cilium)
+is not accounted for in the RX stats of supported devices (that is, veth
+and netkit), confusing user space metrics collectors such as cAdvisor [0],
+as reported by Youlun.
+
+Fix it by calling dev_sw_netstats_rx_add() in skb_do_redirect(), to update
+RX traffic counters. Devices that support ndo_get_peer_dev _must_ use the
+@tstats per-CPU counters (instead of @lstats, or @dstats).
+
+To make this more fool-proof, error out when ndo_get_peer_dev is set but
+@tstats are not selected.
+
+ [0] Specifically, the "container_network_receive_{byte,packet}s_total"
+ counters are affected.
+
+Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper")
+Reported-by: Youlun Zhang <zhangyoulun@bytedance.com>
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-6-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 8 ++++++++
+ net/core/filter.c | 1 +
+ 2 files changed, 9 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9995,6 +9995,14 @@ static int netdev_do_alloc_pcpu_stats(st
+ {
+ void __percpu *v;
+
++ /* Drivers implementing ndo_get_peer_dev must support tstat
++ * accounting, so that skb_do_redirect() can bump the dev's
++ * RX stats upon network namespace switch.
++ */
++ if (dev->netdev_ops->ndo_get_peer_dev &&
++ dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
++ return -EOPNOTSUPP;
++
+ switch (dev->pcpu_stat_type) {
+ case NETDEV_PCPU_STAT_NONE:
+ return 0;
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2491,6 +2491,7 @@ int skb_do_redirect(struct sk_buff *skb)
+ net_eq(net, dev_net(dev))))
+ goto out_drop;
+ skb->dev = dev;
++ dev_sw_netstats_rx_add(dev, skb->len);
+ return -EAGAIN;
+ }
+ return flags & BPF_F_NEIGH ?
--- /dev/null
+From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 13 Sep 2024 21:17:48 +0200
+Subject: bpf: Fix helper writes to read-only maps
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 32556ce93bc45c730829083cb60f95a2728ea48b upstream.
+
+Lonial found an issue that despite user- and BPF-side frozen BPF map
+(like in case of .rodata), it was still possible to write into it from
+a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT}
+as arguments.
+
+In check_func_arg() when the argument is as mentioned, the meta->raw_mode
+is never set. Later, check_helper_mem_access(), under the case of
+PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the
+subsequent call to check_map_access_type() and given the BPF map is
+read-only it succeeds.
+
+The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT
+when results are written into them as opposed to read out of them. The
+latter indicates that it's okay to pass a pointer to uninitialized memory
+as the memory is written to anyway.
+
+However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM
+just with additional alignment requirement. So it is better to just get
+rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the
+fixed size memory types. For this, add MEM_ALIGNED to additionally ensure
+alignment given these helpers write directly into the args via *<ptr> = val.
+The .arg*_size has been initialized reflecting the actual sizeof(*<ptr>).
+
+MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated
+argument types, since in !MEM_FIXED_SIZE cases the verifier does not know
+the buffer size a priori and therefore cannot blindly write *<ptr> = val.
+
+Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types")
+Reported-by: Lonial Con <kongln9170@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[ Resolve merge conflict in include/linux/bpf.h and merge conflict in
+ kernel/bpf/verifier.c.]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h | 7 +++++--
+ kernel/bpf/helpers.c | 6 ++++--
+ kernel/bpf/syscall.c | 3 ++-
+ kernel/bpf/verifier.c | 41 +++++------------------------------------
+ kernel/trace/bpf_trace.c | 6 ++++--
+ net/core/filter.c | 6 ++++--
+ 6 files changed, 24 insertions(+), 45 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -475,6 +475,11 @@ enum bpf_type_flag {
+ /* Size is known at compile time. */
+ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+
++ /* Memory must be aligned on some architectures, used in combination with
++ * MEM_FIXED_SIZE.
++ */
++ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
++
+ __BPF_TYPE_FLAG_MAX,
+ __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
+ };
+@@ -510,8 +515,6 @@ enum bpf_arg_type {
+ ARG_ANYTHING, /* any (initialized) argument is ok */
+ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
+ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+- ARG_PTR_TO_INT, /* pointer to int */
+- ARG_PTR_TO_LONG, /* pointer to long */
+ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
+ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
+ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
+--- a/kernel/bpf/helpers.c
++++ b/kernel/bpf/helpers.c
+@@ -531,7 +531,8 @@ const struct bpf_func_proto bpf_strtol_p
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_LONG,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_size = sizeof(s64),
+ };
+
+ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
+@@ -560,7 +561,8 @@ const struct bpf_func_proto bpf_strtoul_
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_LONG,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_size = sizeof(u64),
+ };
+
+ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5260,7 +5260,8 @@ static const struct bpf_func_proto bpf_k
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_LONG,
++ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg4_size = sizeof(u64),
+ };
+
+ static const struct bpf_func_proto *
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5818,16 +5818,6 @@ static bool arg_type_is_dynptr(enum bpf_
+ return base_type(type) == ARG_PTR_TO_DYNPTR;
+ }
+
+-static int int_ptr_type_to_size(enum bpf_arg_type type)
+-{
+- if (type == ARG_PTR_TO_INT)
+- return sizeof(u32);
+- else if (type == ARG_PTR_TO_LONG)
+- return sizeof(u64);
+-
+- return -EINVAL;
+-}
+-
+ static int resolve_map_arg_type(struct bpf_verifier_env *env,
+ const struct bpf_call_arg_meta *meta,
+ enum bpf_arg_type *arg_type)
+@@ -5908,16 +5898,6 @@ static const struct bpf_reg_types mem_ty
+ },
+ };
+
+-static const struct bpf_reg_types int_ptr_types = {
+- .types = {
+- PTR_TO_STACK,
+- PTR_TO_PACKET,
+- PTR_TO_PACKET_META,
+- PTR_TO_MAP_KEY,
+- PTR_TO_MAP_VALUE,
+- },
+-};
+-
+ static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
+ static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
+ static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
+@@ -5955,8 +5935,6 @@ static const struct bpf_reg_types *compa
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
+ [ARG_PTR_TO_MEM] = &mem_types,
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+- [ARG_PTR_TO_INT] = &int_ptr_types,
+- [ARG_PTR_TO_LONG] = &int_ptr_types,
+ [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
+ [ARG_PTR_TO_FUNC] = &func_ptr_types,
+ [ARG_PTR_TO_STACK] = &stack_ptr_types,
+@@ -6303,9 +6281,11 @@ skip_type_check:
+ */
+ meta->raw_mode = arg_type & MEM_UNINIT;
+ if (arg_type & MEM_FIXED_SIZE) {
+- err = check_helper_mem_access(env, regno,
+- fn->arg_size[arg], false,
+- meta);
++ err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
++ if (err)
++ return err;
++ if (arg_type & MEM_ALIGNED)
++ err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
+ }
+ break;
+ case ARG_CONST_SIZE:
+@@ -6373,17 +6353,6 @@ skip_type_check:
+ if (err)
+ return err;
+ break;
+- case ARG_PTR_TO_INT:
+- case ARG_PTR_TO_LONG:
+- {
+- int size = int_ptr_type_to_size(arg_type);
+-
+- err = check_helper_mem_access(env, regno, size, false, meta);
+- if (err)
+- return err;
+- err = check_ptr_alignment(env, reg, 0, size, true);
+- break;
+- }
+ case ARG_PTR_TO_CONST_STR:
+ {
+ struct bpf_map *map = reg->map_ptr;
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1192,7 +1192,8 @@ static const struct bpf_func_proto bpf_g
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_LONG,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_size = sizeof(u64),
+ };
+
+ BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
+@@ -1208,7 +1209,8 @@ static const struct bpf_func_proto bpf_g
+ .func = get_func_ret,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_LONG,
++ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg2_size = sizeof(u64),
+ };
+
+ BPF_CALL_1(get_func_arg_cnt, void *, ctx)
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -6233,7 +6233,8 @@ static const struct bpf_func_proto bpf_s
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_INT,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_size = sizeof(u32),
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+ };
+@@ -6244,7 +6245,8 @@ static const struct bpf_func_proto bpf_x
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_INT,
++ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++ .arg3_size = sizeof(u32),
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+ };
--- /dev/null
+From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Mon, 25 Nov 2024 13:59:09 -0500
+Subject: drm/amdgpu: rework resume handling for display (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 73dae652dcac776296890da215ee7dec357a1032 upstream.
+
+Split resume into a 3rd step to handle displays when DCC is
+enabled on DCN 4.0.1. Move display after the buffer funcs
+have been re-enabled so that the GPU will do the move and
+properly set the DCC metadata for DCN.
+
+v2: fix fence irq resume ordering
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3242,7 +3242,7 @@ static int amdgpu_device_ip_resume_phase
+ *
+ * @adev: amdgpu_device pointer
+ *
+- * First resume function for hardware IPs. The list of all the hardware
++ * Second resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+@@ -3260,6 +3260,7 @@ static int amdgpu_device_ip_resume_phase
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
++ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+@@ -3284,6 +3285,36 @@ static int amdgpu_device_ip_resume_phase
+ }
+
+ /**
++ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
++ *
++ * @adev: amdgpu_device pointer
++ *
++ * Third resume function for hardware IPs. The list of all the hardware
++ * IPs that make up the asic is walked and the resume callbacks are run for
++ * all DCE. resume puts the hardware into a functional state after a suspend
++ * and updates the software state as necessary. This function is also used
++ * for restoring the GPU after a GPU reset.
++ *
++ * Returns 0 on success, negative error code on failure.
++ */
++static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
++{
++ int i, r;
++
++ for (i = 0; i < adev->num_ip_blocks; i++) {
++ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
++ continue;
++ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
++ r = adev->ip_blocks[i].version->funcs->resume(adev);
++ if (r)
++ return r;
++ }
++ }
++
++ return 0;
++}
++
++/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+@@ -3313,6 +3344,13 @@ static int amdgpu_device_ip_resume(struc
+
+ r = amdgpu_device_ip_resume_phase2(adev);
+
++ if (r)
++ return r;
++
++ amdgpu_fence_driver_hw_init(adev);
++
++ r = amdgpu_device_ip_resume_phase3(adev);
++
+ return r;
+ }
+
+@@ -4311,7 +4349,6 @@ int amdgpu_device_resume(struct drm_devi
+ dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+ return r;
+ }
+- amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_late_init(adev);
+ if (r)
+@@ -5065,6 +5102,10 @@ int amdgpu_do_asic_reset(struct list_hea
+ if (r)
+ goto out;
+
++ r = amdgpu_device_ip_resume_phase3(tmp_adev);
++ if (r)
++ goto out;
++
+ if (vram_lost)
+ amdgpu_device_fill_reset_magic(tmp_adev);
+
--- /dev/null
+From stable+bounces-100113-greg=kroah.com@vger.kernel.org Mon Dec 9 10:49:51 2024
+From: Ajay Kaher <ajay.kaher@broadcom.com>
+Date: Mon, 9 Dec 2024 09:49:03 +0000
+Subject: drm/ttm: Make sure the mapped tt pages are decrypted when needed
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Sasha Levin <sashal@kernel.org>, Ye Li <ye.li@broadcom.com>
+Message-ID: <20241209094904.2547579-2-ajay.kaher@broadcom.com>
+
+From: Zack Rusin <zack.rusin@broadcom.com>
+
+commit 71ce046327cfd3aef3f93d1c44e091395eb03f8f upstream.
+
+Some drivers require the mapped tt pages to be decrypted. In an ideal
+world this would have been handled by the dma layer, but the TTM page
+fault handling would have to be rewritten to able to do that.
+
+A side-effect of the TTM page fault handling is using a dma allocation
+per order (via ttm_pool_alloc_page) which makes it impossible to just
+trivially use dma_mmap_attrs. As a result ttm has to be very careful
+about trying to make its pgprot for the mapped tt pages match what
+the dma layer thinks it is. At the ttm layer it's possible to
+deduce the requirement to have tt pages decrypted by checking
+whether coherent dma allocations have been requested and the system
+is running with confidential computing technologies.
+
+This approach isn't ideal but keeping TTM matching DMAs expectations
+for the page properties is in general fragile, unfortunately proper
+fix would require a rewrite of TTM's page fault handling.
+
+Fixes vmwgfx with SEV enabled.
+
+v2: Explicitly include cc_platform.h
+v3: Use CC_ATTR_GUEST_MEM_ENCRYPT instead of CC_ATTR_MEM_ENCRYPT to
+limit the scope to guests and log when memory decryption is enabled.
+
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem")
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v5.14+
+Link: https://patchwork.freedesktop.org/patch/msgid/20230926040359.3040017-1-zack@kde.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Ye Li <ye.li@broadcom.com>
+Signed-off-by: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_bo_util.c | 13 +++++++++++--
+ drivers/gpu/drm/ttm/ttm_tt.c | 12 ++++++++++++
+ include/drm/ttm/ttm_tt.h | 7 +++++++
+ 3 files changed, 30 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
+@@ -274,7 +274,13 @@ pgprot_t ttm_io_prot(struct ttm_buffer_o
+ enum ttm_caching caching;
+
+ man = ttm_manager_type(bo->bdev, res->mem_type);
+- caching = man->use_tt ? bo->ttm->caching : res->bus.caching;
++ if (man->use_tt) {
++ caching = bo->ttm->caching;
++ if (bo->ttm->page_flags & TTM_TT_FLAG_DECRYPTED)
++ tmp = pgprot_decrypted(tmp);
++ } else {
++ caching = res->bus.caching;
++ }
+
+ return ttm_prot_from_caching(caching, tmp);
+ }
+@@ -317,6 +323,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu
+ .no_wait_gpu = false
+ };
+ struct ttm_tt *ttm = bo->ttm;
++ struct ttm_resource_manager *man =
++ ttm_manager_type(bo->bdev, bo->resource->mem_type);
+ pgprot_t prot;
+ int ret;
+
+@@ -326,7 +334,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu
+ if (ret)
+ return ret;
+
+- if (num_pages == 1 && ttm->caching == ttm_cached) {
++ if (num_pages == 1 && ttm->caching == ttm_cached &&
++ !(man->use_tt && (ttm->page_flags & TTM_TT_FLAG_DECRYPTED))) {
+ /*
+ * We're mapping a single page, and the desired
+ * page protection is consistent with the bo.
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -31,11 +31,13 @@
+
+ #define pr_fmt(fmt) "[TTM] " fmt
+
++#include <linux/cc_platform.h>
+ #include <linux/sched.h>
+ #include <linux/shmem_fs.h>
+ #include <linux/file.h>
+ #include <linux/module.h>
+ #include <drm/drm_cache.h>
++#include <drm/drm_device.h>
+ #include <drm/ttm/ttm_bo_driver.h>
+
+ #include "ttm_module.h"
+@@ -59,6 +61,7 @@ static atomic_long_t ttm_dma32_pages_all
+ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
+ {
+ struct ttm_device *bdev = bo->bdev;
++ struct drm_device *ddev = bo->base.dev;
+ uint32_t page_flags = 0;
+
+ dma_resv_assert_held(bo->base.resv);
+@@ -80,6 +83,15 @@ int ttm_tt_create(struct ttm_buffer_obje
+ pr_err("Illegal buffer object type\n");
+ return -EINVAL;
+ }
++ /*
++ * When using dma_alloc_coherent with memory encryption the
++ * mapped TT pages need to be decrypted or otherwise the drivers
++ * will end up sending encrypted mem to the gpu.
++ */
++ if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
++ page_flags |= TTM_TT_FLAG_DECRYPTED;
++ drm_info(ddev, "TT memory decryption enabled.");
++ }
+
+ bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
+ if (unlikely(bo->ttm == NULL))
+--- a/include/drm/ttm/ttm_tt.h
++++ b/include/drm/ttm/ttm_tt.h
+@@ -79,6 +79,12 @@ struct ttm_tt {
+ * page_flags = TTM_TT_FLAG_EXTERNAL |
+ * TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+ *
++ * TTM_TT_FLAG_DECRYPTED: The mapped ttm pages should be marked as
++ * not encrypted. The framework will try to match what the dma layer
++ * is doing, but note that it is a little fragile because ttm page
++ * fault handling abuses the DMA api a bit and dma_map_attrs can't be
++ * used to assure pgprot always matches.
++ *
+ * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
+ * set by TTM after ttm_tt_populate() has successfully returned, and is
+ * then unset when TTM calls ttm_tt_unpopulate().
+@@ -87,6 +93,7 @@ struct ttm_tt {
+ #define TTM_TT_FLAG_ZERO_ALLOC (1 << 1)
+ #define TTM_TT_FLAG_EXTERNAL (1 << 2)
+ #define TTM_TT_FLAG_EXTERNAL_MAPPABLE (1 << 3)
++#define TTM_TT_FLAG_DECRYPTED (1 << 4)
+
+ #define TTM_TT_FLAG_PRIV_POPULATED (1U << 31)
+ uint32_t page_flags;
--- /dev/null
+From stable+bounces-100114-greg=kroah.com@vger.kernel.org Mon Dec 9 10:50:05 2024
+From: Ajay Kaher <ajay.kaher@broadcom.com>
+Date: Mon, 9 Dec 2024 09:49:04 +0000
+Subject: drm/ttm: Print the memory decryption status just once
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Ye Li <ye.li@broadcom.com>
+Message-ID: <20241209094904.2547579-3-ajay.kaher@broadcom.com>
+
+From: Zack Rusin <zack.rusin@broadcom.com>
+
+commit 27906e5d78248b19bcdfdae72049338c828897bb upstream.
+
+Stop printing the TT memory decryption status info each time tt is created
+and instead print it just once.
+
+Reduces the spam in the system logs when running guests with SEV enabled.
+
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Fixes: 71ce046327cf ("drm/ttm: Make sure the mapped tt pages are decrypted when needed")
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v5.14+
+Link: https://patchwork.freedesktop.org/patch/msgid/20240408155605.1398631-1-zack.rusin@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ye Li <ye.li@broadcom.com>
+Signed-off-by: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -90,7 +90,7 @@ int ttm_tt_create(struct ttm_buffer_obje
+ */
+ if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
+ page_flags |= TTM_TT_FLAG_DECRYPTED;
+- drm_info(ddev, "TT memory decryption enabled.");
++ drm_info_once(ddev, "TT memory decryption enabled.");
+ }
+
+ bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
--- /dev/null
+From 278a370c1766060d2144d6cf0b06c101e1043b6d Mon Sep 17 00:00:00 2001
+From: Ziwei Xiao <ziweixiao@google.com>
+Date: Mon, 13 Nov 2023 16:41:44 -0800
+Subject: gve: Fixes for napi_poll when budget is 0
+
+From: Ziwei Xiao <ziweixiao@google.com>
+
+commit 278a370c1766060d2144d6cf0b06c101e1043b6d upstream.
+
+Netpoll will explicilty pass the polling call with a budget of 0 to
+indicate it's clearing the Tx path only. For the gve_rx_poll and
+gve_xdp_poll, they were mistakenly taking the 0 budget as the indication
+to do all the work. Add check to avoid the rx path and xdp path being
+called when budget is 0. And also avoid napi_complete_done being called
+when budget is 0 for netpoll.
+
+Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
+Signed-off-by: Ziwei Xiao <ziweixiao@google.com>
+Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
+Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/google/gve/gve_main.c | 7 +++++++
+ drivers/net/ethernet/google/gve/gve_rx.c | 4 ----
+ drivers/net/ethernet/google/gve/gve_tx.c | 4 ----
+ 3 files changed, 7 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/google/gve/gve_main.c
++++ b/drivers/net/ethernet/google/gve/gve_main.c
+@@ -202,6 +202,10 @@ static int gve_napi_poll(struct napi_str
+
+ if (block->tx)
+ reschedule |= gve_tx_poll(block, budget);
++
++ if (!budget)
++ return 0;
++
+ if (block->rx) {
+ work_done = gve_rx_poll(block, budget);
+ reschedule |= work_done == budget;
+@@ -242,6 +246,9 @@ static int gve_napi_poll_dqo(struct napi
+ if (block->tx)
+ reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+
++ if (!budget)
++ return 0;
++
+ if (block->rx) {
+ work_done = gve_rx_poll_dqo(block, budget);
+ reschedule |= work_done == budget;
+--- a/drivers/net/ethernet/google/gve/gve_rx.c
++++ b/drivers/net/ethernet/google/gve/gve_rx.c
+@@ -778,10 +778,6 @@ int gve_rx_poll(struct gve_notify_block
+
+ feat = block->napi.dev->features;
+
+- /* If budget is 0, do all the work */
+- if (budget == 0)
+- budget = INT_MAX;
+-
+ if (budget > 0)
+ work_done = gve_clean_rx_done(rx, budget, feat);
+
+--- a/drivers/net/ethernet/google/gve/gve_tx.c
++++ b/drivers/net/ethernet/google/gve/gve_tx.c
+@@ -725,10 +725,6 @@ bool gve_tx_poll(struct gve_notify_block
+ u32 nic_done;
+ u32 to_do;
+
+- /* If budget is 0, do all the work */
+- if (budget == 0)
+- budget = INT_MAX;
+-
+ /* In TX path, it may try to clean completed pkts in order to xmit,
+ * to avoid cleaning conflict, use spin_lock(), it yields better
+ * concurrency between xmit/clean than netif's lock.
--- /dev/null
+From ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 Mon Sep 17 00:00:00 2001
+From: Shu Han <ebpqwerty472123@gmail.com>
+Date: Tue, 17 Sep 2024 17:41:04 +0800
+Subject: mm: call the security_mmap_file() LSM hook in remap_file_pages()
+
+From: Shu Han <ebpqwerty472123@gmail.com>
+
+commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 upstream.
+
+The remap_file_pages syscall handler calls do_mmap() directly, which
+doesn't contain the LSM security check. And if the process has called
+personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for
+RW pages, this will actually result in remapping the pages to RWX,
+bypassing a W^X policy enforced by SELinux.
+
+So we should check prot by security_mmap_file LSM hook in the
+remap_file_pages syscall handler before do_mmap() is called. Otherwise, it
+potentially permits an attacker to bypass a W^X policy enforced by
+SELinux.
+
+The bypass is similar to CVE-2016-10044, which bypass the same thing via
+AIO and can be found in [1].
+
+The PoC:
+
+$ cat > test.c
+
+int main(void) {
+ size_t pagesz = sysconf(_SC_PAGE_SIZE);
+ int mfd = syscall(SYS_memfd_create, "test", 0);
+ const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, mfd, 0);
+ unsigned int old = syscall(SYS_personality, 0xffffffff);
+ syscall(SYS_personality, READ_IMPLIES_EXEC | old);
+ syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0);
+ syscall(SYS_personality, old);
+ // show the RWX page exists even if W^X policy is enforced
+ int fd = open("/proc/self/maps", O_RDONLY);
+ unsigned char buf2[1024];
+ while (1) {
+ int ret = read(fd, buf2, 1024);
+ if (ret <= 0) break;
+ write(1, buf2, ret);
+ }
+ close(fd);
+}
+
+$ gcc test.c -o test
+$ ./test | grep rwx
+7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted)
+
+Link: https://project-zero.issues.chromium.org/issues/42452389 [1]
+Cc: stable@vger.kernel.org
+Signed-off-by: Shu Han <ebpqwerty472123@gmail.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+[PM: subject line tweaks]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+[ Resolve merge conflict in mm/mmap.c. ]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -3021,8 +3021,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
+ flags |= MAP_LOCKED;
+
+ file = get_file(vma->vm_file);
++ ret = security_mmap_file(vma->vm_file, prot, flags);
++ if (ret)
++ goto out_fput;
+ ret = do_mmap(vma->vm_file, start, size,
+ prot, flags, pgoff, &populate, NULL);
++out_fput:
+ fput(file);
+ out:
+ mmap_write_unlock(mm);
--- /dev/null
+From daniel@iogearbox.net Fri Dec 6 16:34:37 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 6 Dec 2024 16:34:01 +0100
+Subject: net: Move {l,t,d}stats allocation to core and convert veth & vrf
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov <razor@blackwall.org>, David Ahern <dsahern@kernel.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-1-daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 34d21de99cea9cb17967874313e5b0262527833c ]
+
+Move {l,t,d}stats allocation to the core and let netdevs pick the stats
+type they need. That way the driver doesn't have to bother with error
+handling (allocation failure checking, making sure free happens in the
+right spot, etc) - all happening in the core.
+
+Co-developed-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Cc: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Stable-dep-of: 024ee930cb3c ("bpf: Fix dev's rx stats for bpf_redirect_peer traffic")
+[ Note: Simplified vrf bits to reduce patch given unrelated to the fix ]
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/veth.c | 16 +------------
+ drivers/net/vrf.c | 24 ++++++--------------
+ include/linux/netdevice.h | 30 ++++++++++++++++++++++----
+ net/core/dev.c | 53 +++++++++++++++++++++++++++++++++++++++++++---
+ 4 files changed, 85 insertions(+), 38 deletions(-)
+
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1381,25 +1381,12 @@ static void veth_free_queues(struct net_
+
+ static int veth_dev_init(struct net_device *dev)
+ {
+- int err;
+-
+- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+- if (!dev->lstats)
+- return -ENOMEM;
+-
+- err = veth_alloc_queues(dev);
+- if (err) {
+- free_percpu(dev->lstats);
+- return err;
+- }
+-
+- return 0;
++ return veth_alloc_queues(dev);
+ }
+
+ static void veth_dev_free(struct net_device *dev)
+ {
+ veth_free_queues(dev);
+- free_percpu(dev->lstats);
+ }
+
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+@@ -1625,6 +1612,7 @@ static void veth_setup(struct net_device
+ NETIF_F_HW_VLAN_STAG_RX);
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = veth_dev_free;
++ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
+ dev->max_mtu = ETH_MAX_MTU;
+
+ dev->hw_features = VETH_FEATURES;
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -121,22 +121,12 @@ struct net_vrf {
+ int ifindex;
+ };
+
+-struct pcpu_dstats {
+- u64 tx_pkts;
+- u64 tx_bytes;
+- u64 tx_drps;
+- u64 rx_pkts;
+- u64 rx_bytes;
+- u64 rx_drps;
+- struct u64_stats_sync syncp;
+-};
+-
+ static void vrf_rx_stats(struct net_device *dev, int len)
+ {
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+- dstats->rx_pkts++;
++ dstats->rx_packets++;
+ dstats->rx_bytes += len;
+ u64_stats_update_end(&dstats->syncp);
+ }
+@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_d
+ do {
+ start = u64_stats_fetch_begin_irq(&dstats->syncp);
+ tbytes = dstats->tx_bytes;
+- tpkts = dstats->tx_pkts;
+- tdrops = dstats->tx_drps;
++ tpkts = dstats->tx_packets;
++ tdrops = dstats->tx_drops;
+ rbytes = dstats->rx_bytes;
+- rpkts = dstats->rx_pkts;
++ rpkts = dstats->rx_packets;
+ } while (u64_stats_fetch_retry_irq(&dstats->syncp, start));
+ stats->tx_bytes += tbytes;
+ stats->tx_packets += tpkts;
+@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff
+ if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
+ vrf_rx_stats(dev, len);
+ else
+- this_cpu_inc(dev->dstats->rx_drps);
++ this_cpu_inc(dev->dstats->rx_drops);
+
+ return NETDEV_TX_OK;
+ }
+@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_bu
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+- dstats->tx_pkts++;
++ dstats->tx_packets++;
+ dstats->tx_bytes += len;
+ u64_stats_update_end(&dstats->syncp);
+ } else {
+- this_cpu_inc(dev->dstats->tx_drps);
++ this_cpu_inc(dev->dstats->tx_drops);
+ }
+
+ return ret;
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1747,6 +1747,13 @@ enum netdev_ml_priv_type {
+ ML_PRIV_CAN,
+ };
+
++enum netdev_stat_type {
++ NETDEV_PCPU_STAT_NONE,
++ NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
++ NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
++ NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
++};
++
+ /**
+ * struct net_device - The DEVICE structure.
+ *
+@@ -1941,10 +1948,14 @@ enum netdev_ml_priv_type {
+ *
+ * @ml_priv: Mid-layer private
+ * @ml_priv_type: Mid-layer private type
+- * @lstats: Loopback statistics
+- * @tstats: Tunnel statistics
+- * @dstats: Dummy statistics
+- * @vstats: Virtual ethernet statistics
++ *
++ * @pcpu_stat_type: Type of device statistics which the core should
++ * allocate/free: none, lstats, tstats, dstats. none
++ * means the driver is handling statistics allocation/
++ * freeing internally.
++ * @lstats: Loopback statistics: packets, bytes
++ * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes
++ * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes
+ *
+ * @garp_port: GARP
+ * @mrp_port: MRP
+@@ -2287,6 +2298,7 @@ struct net_device {
+ void *ml_priv;
+ enum netdev_ml_priv_type ml_priv_type;
+
++ enum netdev_stat_type pcpu_stat_type:8;
+ union {
+ struct pcpu_lstats __percpu *lstats;
+ struct pcpu_sw_netstats __percpu *tstats;
+@@ -2670,6 +2682,16 @@ struct pcpu_sw_netstats {
+ struct u64_stats_sync syncp;
+ } __aligned(4 * sizeof(u64));
+
++struct pcpu_dstats {
++ u64 rx_packets;
++ u64 rx_bytes;
++ u64 rx_drops;
++ u64 tx_packets;
++ u64 tx_bytes;
++ u64 tx_drops;
++ struct u64_stats_sync syncp;
++} __aligned(8 * sizeof(u64));
++
+ struct pcpu_lstats {
+ u64_stats_t packets;
+ u64_stats_t bytes;
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9991,6 +9991,46 @@ void netif_tx_stop_all_queues(struct net
+ }
+ EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
++static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
++{
++ void __percpu *v;
++
++ switch (dev->pcpu_stat_type) {
++ case NETDEV_PCPU_STAT_NONE:
++ return 0;
++ case NETDEV_PCPU_STAT_LSTATS:
++ v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
++ break;
++ case NETDEV_PCPU_STAT_TSTATS:
++ v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++ break;
++ case NETDEV_PCPU_STAT_DSTATS:
++ v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return v ? 0 : -ENOMEM;
++}
++
++static void netdev_do_free_pcpu_stats(struct net_device *dev)
++{
++ switch (dev->pcpu_stat_type) {
++ case NETDEV_PCPU_STAT_NONE:
++ return;
++ case NETDEV_PCPU_STAT_LSTATS:
++ free_percpu(dev->lstats);
++ break;
++ case NETDEV_PCPU_STAT_TSTATS:
++ free_percpu(dev->tstats);
++ break;
++ case NETDEV_PCPU_STAT_DSTATS:
++ free_percpu(dev->dstats);
++ break;
++ }
++}
++
+ /**
+ * register_netdevice() - register a network device
+ * @dev: device to register
+@@ -10051,11 +10091,15 @@ int register_netdevice(struct net_device
+ goto err_uninit;
+ }
+
++ ret = netdev_do_alloc_pcpu_stats(dev);
++ if (ret)
++ goto err_uninit;
++
+ ret = -EBUSY;
+ if (!dev->ifindex)
+ dev->ifindex = dev_new_index(net);
+ else if (__dev_get_by_index(net, dev->ifindex))
+- goto err_uninit;
++ goto err_free_pcpu;
+
+ /* Transfer changeable features to wanted_features and enable
+ * software offloads (GSO and GRO).
+@@ -10102,14 +10146,14 @@ int register_netdevice(struct net_device
+ ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+ ret = notifier_to_errno(ret);
+ if (ret)
+- goto err_uninit;
++ goto err_free_pcpu;
+
+ ret = netdev_register_kobject(dev);
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
+- goto err_uninit;
++ goto err_free_pcpu;
+
+ __netdev_update_features(dev);
+
+@@ -10156,6 +10200,8 @@ int register_netdevice(struct net_device
+ out:
+ return ret;
+
++err_free_pcpu:
++ netdev_do_free_pcpu_stats(dev);
+ err_uninit:
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+@@ -10409,6 +10455,7 @@ void netdev_run_todo(void)
+ WARN_ON(rcu_access_pointer(dev->ip_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+
++ netdev_do_free_pcpu_stats(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+ if (dev->needs_free_netdev)
--- /dev/null
+From zhangzekun11@huawei.com Thu Dec 12 12:45:53 2024
+From: Zhang Zekun <zhangzekun11@huawei.com>
+Date: Wed, 4 Dec 2024 16:27:52 +0800
+Subject: Revert "drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read()"
+To: <gregkh@linuxfoundation.org>
+Cc: <cve@kernel.org>, <stable@vger.kernel.org>, <kevinyang.wang@amd.com>, <alexander.deucher@amd.com>, <liuyongqiang13@huawei.com>, <zhangzekun11@huawei.com>
+Message-ID: <20241204082752.18498-1-zhangzekun11@huawei.com>
+
+From: Zhang Zekun <zhangzekun11@huawei.com>
+
+This reverts commit 25d7e84343e1235b667cf5226c3934fdf36f0df6.
+
+The origin mainline patch fix a buffer overflow issue in
+amdgpu_debugfs_gprwave_read(), but it has not been introduced in kernel
+6.1 and older kernels. This patch add a check in a wrong function in the
+same file.
+
+Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -419,7 +419,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_
+ ssize_t result = 0;
+ int r;
+
+- if (size > 4096 || size & 0x3 || *pos & 0x3)
++ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
mmc-mtk-sd-fix-error-handle-of-probe-function.patch
drm-amd-display-check-bios-images-before-it-is-used.patch
ocfs2-revert-ocfs2-fix-the-la-space-leak-when-unmounting-an-ocfs2-volume.patch
+revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch
+gve-fixes-for-napi_poll-when-budget-is-0.patch
+arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch
+arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch
+asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch
+mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch
+bpf-fix-helper-writes-to-read-only-maps.patch
+net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch
+bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch
+veth-use-tstats-per-cpu-traffic-counters.patch
+drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch
+drm-ttm-print-the-memory-decryption-status-just-once.patch
+drm-amdgpu-rework-resume-handling-for-display-v2.patch
--- /dev/null
+From daniel@iogearbox.net Fri Dec 6 16:34:42 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 6 Dec 2024 16:34:03 +0100
+Subject: veth: Use tstats per-CPU traffic counters
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov <razor@blackwall.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-3-daniel@iogearbox.net>
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ]
+
+Currently veth devices use the lstats per-CPU traffic counters, which only
+cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
+device from its peer's TX counters, based on the assumption that a veth
+device can _only_ receive packets from its peer, which is no longer true:
+
+For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
+helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
+a different netns), skipping veth's peer device. Unfortunately, this kind
+of traffic isn't currently accounted for in veth's RX stats.
+
+In preparation for the fix, use tstats (instead of lstats) to maintain
+both RX and TX counters for each veth device. We'll use RX counters for
+bpf_redirect_peer() traffic, and keep using TX counters for the usual
+"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
+_adding_ RX count to peer's TX count, in order to cover both kinds of
+traffic.
+
+veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
+for less confusion, but let's leave it to another patch to keep the fix
+minimal.
+
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/veth.c | 30 +++++++++++-------------------
+ 1 file changed, 11 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -342,7 +342,7 @@ static netdev_tx_t veth_xmit(struct sk_b
+ skb_tx_timestamp(skb);
+ if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+ if (!use_napi)
+- dev_lstats_add(dev, length);
++ dev_sw_netstats_tx_add(dev, 1, length);
+ } else {
+ drop:
+ atomic64_inc(&priv->dropped);
+@@ -357,14 +357,6 @@ drop:
+ return ret;
+ }
+
+-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+-{
+- struct veth_priv *priv = netdev_priv(dev);
+-
+- dev_lstats_read(dev, packets, bytes);
+- return atomic64_read(&priv->dropped);
+-}
+-
+ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
+ {
+ struct veth_priv *priv = netdev_priv(dev);
+@@ -402,24 +394,24 @@ static void veth_get_stats64(struct net_
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
+ struct veth_stats rx;
+- u64 packets, bytes;
+
+- tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
+- tot->tx_bytes = bytes;
+- tot->tx_packets = packets;
++ tot->tx_dropped = atomic64_read(&priv->dropped);
++ dev_fetch_sw_netstats(tot, dev->tstats);
+
+ veth_stats_rx(&rx, dev);
+ tot->tx_dropped += rx.xdp_tx_err;
+ tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
+- tot->rx_bytes = rx.xdp_bytes;
+- tot->rx_packets = rx.xdp_packets;
++ tot->rx_bytes += rx.xdp_bytes;
++ tot->rx_packets += rx.xdp_packets;
+
+ rcu_read_lock();
+ peer = rcu_dereference(priv->peer);
+ if (peer) {
+- veth_stats_tx(peer, &packets, &bytes);
+- tot->rx_bytes += bytes;
+- tot->rx_packets += packets;
++ struct rtnl_link_stats64 tot_peer = {};
++
++ dev_fetch_sw_netstats(&tot_peer, peer->tstats);
++ tot->rx_bytes += tot_peer.tx_bytes;
++ tot->rx_packets += tot_peer.tx_packets;
+
+ veth_stats_rx(&rx, peer);
+ tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
+@@ -1612,7 +1604,7 @@ static void veth_setup(struct net_device
+ NETIF_F_HW_VLAN_STAG_RX);
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = veth_dev_free;
+- dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
++ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ dev->max_mtu = ETH_MAX_MTU;
+
+ dev->hw_features = VETH_FEATURES;