6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)
diff --git a/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch b/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch

new file mode 100644 (file)

index 0000000..60a002e
--- /dev/null
+++ b/queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch
@@ -0,0 +1,224 @@
+From 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Wed, 6 Nov 2024 16:04:48 +0000
+Subject: arm64: smccc: Remove broken support for SMCCCv1.3 SVE discard hint
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 upstream.
+
+SMCCCv1.3 added a hint bit which callers can set in an SMCCC function ID
+(AKA "FID") to indicate that it is acceptable for the SMCCC
+implementation to discard SVE and/or SME state over a specific SMCCC
+call. The kernel support for using this hint is broken and SMCCC calls
+may clobber the SVE and/or SME state of arbitrary tasks, though FPSIMD
+state is unaffected.
+
+The kernel support is intended to use the hint when there is no SVE or
+SME state to save, and to do this it checks whether TIF_FOREIGN_FPSTATE
+is set or TIF_SVE is clear in assembly code:
+
+|        ldr     <flags>, [<current_task>, #TSK_TI_FLAGS]
+|        tbnz    <flags>, #TIF_FOREIGN_FPSTATE, 1f   // Any live FP state?
+|        tbnz    <flags>, #TIF_SVE, 2f               // Does that state include SVE?
+|
+| 1:     orr     <fid>, <fid>, ARM_SMCCC_1_3_SVE_HINT
+| 2:
+|        << SMCCC call using FID >>
+
+This is not safe as-is:
+
+(1) SMCCC calls can be made in a preemptible context and preemption can
+    result in TIF_FOREIGN_FPSTATE being set or cleared at arbitrary
+    points in time. Thus checking for TIF_FOREIGN_FPSTATE provides no
+    guarantee.
+
+(2) TIF_FOREIGN_FPSTATE only indicates that the live FP/SVE/SME state in
+    the CPU does not belong to the current task, and does not indicate
+    that clobbering this state is acceptable.
+
+    When the live CPU state is clobbered it is necessary to update
+    fpsimd_last_state.st to ensure that a subsequent context switch will
+    reload FP/SVE/SME state from memory rather than consuming the
+    clobbered state. This and the SMCCC call itself must happen in a
+    critical section with preemption disabled to avoid races.
+
+(3) Live SVE/SME state can exist with TIF_SVE clear (e.g. with only
+    TIF_SME set), and checking TIF_SVE alone is insufficient.
+
+Remove the broken support for the SMCCCv1.3 SVE saving hint. This is
+effectively a revert of commits:
+
+* cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
+* a7c3acca5380 ("arm64: smccc: Save lr before calling __arm_smccc_sve_check()")
+
+... leaving behind the ARM_SMCCC_VERSION_1_3 and ARM_SMCCC_1_3_SVE_HINT
+definitions, since these are simply definitions from the SMCCC
+specification, and the latter is used in KVM via ARM_SMCCC_CALL_HINTS.
+
+If we want to bring this back in future, we'll probably want to handle
+this logic in C where we can use all the usual FPSIMD/SVE/SME helper
+functions, and that'll likely require some rework of the SMCCC code
+and/or its callers.
+
+Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20241106160448.2712997-1-mark.rutland@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Mark: fix conflicts in <linux/arm-smccc.h> ]
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/smccc-call.S |   35 +++--------------------------------
+ drivers/firmware/smccc/smccc.c |    4 ----
+ include/linux/arm-smccc.h      |   30 ++----------------------------
+ 3 files changed, 5 insertions(+), 64 deletions(-)
+
+--- a/arch/arm64/kernel/smccc-call.S
++++ b/arch/arm64/kernel/smccc-call.S
+@@ -7,48 +7,19 @@
+ 
+ #include <asm/asm-offsets.h>
+ #include <asm/assembler.h>
+-#include <asm/thread_info.h>
+-
+-/*
+- * If we have SMCCC v1.3 and (as is likely) no SVE state in
+- * the registers then set the SMCCC hint bit to say there's no
+- * need to preserve it.  Do this by directly adjusting the SMCCC
+- * function value which is already stored in x0 ready to be called.
+- */
+-SYM_FUNC_START(__arm_smccc_sve_check)
+-
+-      ldr_l   x16, smccc_has_sve_hint
+-      cbz     x16, 2f
+-
+-      get_current_task x16
+-      ldr     x16, [x16, #TSK_TI_FLAGS]
+-      tbnz    x16, #TIF_FOREIGN_FPSTATE, 1f   // Any live FP state?
+-      tbnz    x16, #TIF_SVE, 2f               // Does that state include SVE?
+-
+-1:    orr     x0, x0, ARM_SMCCC_1_3_SVE_HINT
+-
+-2:    ret
+-SYM_FUNC_END(__arm_smccc_sve_check)
+-EXPORT_SYMBOL(__arm_smccc_sve_check)
+ 
+       .macro SMCCC instr
+-      stp     x29, x30, [sp, #-16]!
+-      mov     x29, sp
+-alternative_if ARM64_SVE
+-      bl      __arm_smccc_sve_check
+-alternative_else_nop_endif
+       \instr  #0
+-      ldr     x4, [sp, #16]
++      ldr     x4, [sp]
+       stp     x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
+       stp     x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
+-      ldr     x4, [sp, #24]
++      ldr     x4, [sp, #8]
+       cbz     x4, 1f /* no quirk structure */
+       ldr     x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
+       cmp     x9, #ARM_SMCCC_QUIRK_QCOM_A6
+       b.ne    1f
+       str     x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
+-1:    ldp     x29, x30, [sp], #16
+-      ret
++1:    ret
+       .endm
+ 
+ /*
+--- a/drivers/firmware/smccc/smccc.c
++++ b/drivers/firmware/smccc/smccc.c
+@@ -16,7 +16,6 @@ static u32 smccc_version = ARM_SMCCC_VER
+ static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
+ 
+ bool __ro_after_init smccc_trng_available = false;
+-u64 __ro_after_init smccc_has_sve_hint = false;
+ s32 __ro_after_init smccc_soc_id_version = SMCCC_RET_NOT_SUPPORTED;
+ s32 __ro_after_init smccc_soc_id_revision = SMCCC_RET_NOT_SUPPORTED;
+ 
+@@ -28,9 +27,6 @@ void __init arm_smccc_version_init(u32 v
+       smccc_conduit = conduit;
+ 
+       smccc_trng_available = smccc_probe_trng();
+-      if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+-          smccc_version >= ARM_SMCCC_VERSION_1_3)
+-              smccc_has_sve_hint = true;
+ 
+       if ((smccc_version >= ARM_SMCCC_VERSION_1_2) &&
+           (smccc_conduit != SMCCC_CONDUIT_NONE)) {
+--- a/include/linux/arm-smccc.h
++++ b/include/linux/arm-smccc.h
+@@ -224,8 +224,6 @@ u32 arm_smccc_get_version(void);
+ 
+ void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
+ 
+-extern u64 smccc_has_sve_hint;
+-
+ /**
+  * arm_smccc_get_soc_id_version()
+  *
+@@ -324,15 +322,6 @@ struct arm_smccc_quirk {
+ };
+ 
+ /**
+- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
+- *
+- * Sets the SMCCC hint bit to indicate if there is live state in the SVE
+- * registers, this modifies x0 in place and should never be called from C
+- * code.
+- */
+-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
+-
+-/**
+  * __arm_smccc_smc() - make SMC calls
+  * @a0-a7: arguments passed in registers 0 to 7
+  * @res: result values from registers 0 to 3
+@@ -399,20 +388,6 @@ asmlinkage void __arm_smccc_hvc(unsigned
+ 
+ #endif
+ 
+-/* nVHE hypervisor doesn't have a current thread so needs separate checks */
+-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
+-
+-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n",  "bl __arm_smccc_sve_check \n", \
+-                                  ARM64_SVE)
+-#define smccc_sve_clobbers "x16", "x30", "cc",
+-
+-#else
+-
+-#define SMCCC_SVE_CHECK
+-#define smccc_sve_clobbers
+-
+-#endif
+-
+ #define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x
+ 
+ #define __count_args(...)                                             \
+@@ -480,7 +455,7 @@ asmlinkage void __arm_smccc_hvc(unsigned
+ 
+ #define ___constraints(count)                                         \
+       : __constraint_read_ ## count                                   \
+-      : smccc_sve_clobbers "memory"
++      : "memory"
+ #define __constraints(count)  ___constraints(count)
+ 
+ /*
+@@ -495,8 +470,7 @@ asmlinkage void __arm_smccc_hvc(unsigned
+               register unsigned long r2 asm("r2");                    \
+               register unsigned long r3 asm("r3");                    \
+               __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
+-              asm volatile(SMCCC_SVE_CHECK                            \
+-                           inst "\n" :                                \
++              asm volatile(inst "\n" :                                \
+                            "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
+                            __constraints(__count_args(__VA_ARGS__))); \
+               if (___res)                                             \
diff --git a/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch b/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch

new file mode 100644 (file)

index 0000000..7e9ee3a
--- /dev/null
+++ b/queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch
@@ -0,0 +1,91 @@
+From 751ecf6afd6568adc98f2a6052315552c0483d18 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 30 Oct 2024 20:23:50 +0000
+Subject: arm64/sve: Discard stale CPU state when handling SVE traps
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 751ecf6afd6568adc98f2a6052315552c0483d18 upstream.
+
+The logic for handling SVE traps manipulates saved FPSIMD/SVE state
+incorrectly, and a race with preemption can result in a task having
+TIF_SVE set and TIF_FOREIGN_FPSTATE clear even though the live CPU state
+is stale (e.g. with SVE traps enabled). This has been observed to result
+in warnings from do_sve_acc() where SVE traps are not expected while
+TIF_SVE is set:
+
+|         if (test_and_set_thread_flag(TIF_SVE))
+|                 WARN_ON(1); /* SVE access shouldn't have trapped */
+
+Warnings of this form have been reported intermittently, e.g.
+
+  https://lore.kernel.org/linux-arm-kernel/CA+G9fYtEGe_DhY2Ms7+L7NKsLYUomGsgqpdBj+QwDLeSg=JhGg@mail.gmail.com/
+  https://lore.kernel.org/linux-arm-kernel/000000000000511e9a060ce5a45c@google.com/
+
+The race can occur when the SVE trap handler is preempted before and
+after manipulating the saved FPSIMD/SVE state, starting and ending on
+the same CPU, e.g.
+
+| void do_sve_acc(unsigned long esr, struct pt_regs *regs)
+| {
+|         // Trap on CPU 0 with TIF_SVE clear, SVE traps enabled
+|         // task->fpsimd_cpu is 0.
+|         // per_cpu_ptr(&fpsimd_last_state, 0) is task.
+|
+|         ...
+|
+|         // Preempted; migrated from CPU 0 to CPU 1.
+|         // TIF_FOREIGN_FPSTATE is set.
+|
+|         get_cpu_fpsimd_context();
+|
+|         if (test_and_set_thread_flag(TIF_SVE))
+|                 WARN_ON(1); /* SVE access shouldn't have trapped */
+|
+|         sve_init_regs() {
+|                 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+|                         ...
+|                 } else {
+|                         fpsimd_to_sve(current);
+|                         current->thread.fp_type = FP_STATE_SVE;
+|                 }
+|         }
+|
+|         put_cpu_fpsimd_context();
+|
+|         // Preempted; migrated from CPU 1 to CPU 0.
+|         // task->fpsimd_cpu is still 0
+|         // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then:
+|         // - Stale HW state is reused (with SVE traps enabled)
+|         // - TIF_FOREIGN_FPSTATE is cleared
+|         // - A return to userspace skips HW state restore
+| }
+
+Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set
+by calling fpsimd_flush_task_state() to detach from the saved CPU
+state. This ensures that a subsequent context switch will not reuse the
+stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the
+new state to be reloaded from memory prior to a return to userspace.
+
+Fixes: cccb78ce89c4 ("arm64/sve: Rework SVE access trap to convert state in registers")
+Reported-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Cc: stable@vger.kernel.org
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20241030-arm64-fpsimd-foreign-flush-v1-1-bd7bd66905a2@kernel.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -1383,6 +1383,7 @@ static void sve_init_regs(void)
+               fpsimd_bind_task_to_cpu();
+       } else {
+               fpsimd_to_sve(current);
++              fpsimd_flush_task_state(current);
+       }
+ }
+ 
diff --git a/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch b/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch

new file mode 100644 (file)

index 0000000..e28a044
--- /dev/null
+++ b/queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch
@@ -0,0 +1,48 @@
+From a0aae96be5ffc5b456ca07bfe1385b721c20e184 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
+ <amadeuszx.slawinski@linux.intel.com>
+Date: Thu, 10 Oct 2024 13:20:08 +0200
+Subject: ASoC: Intel: avs: Fix return status of avs_pcm_hw_constraints_init()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+
+commit a0aae96be5ffc5b456ca07bfe1385b721c20e184 upstream.
+
+Check for return code from avs_pcm_hw_constraints_init() in
+avs_dai_fe_startup() only checks if value is different from 0. Currently
+function can return positive value, change it to return 0 on success.
+
+Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+I've observed KASAN on our setups and while patch itself is correct
+regardless. Problem seems to be caused by recent changes to rates, as
+this started happening after recent patchsets and doesn't reproduce with
+those reverted
+https://lore.kernel.org/linux-sound/20240905-alsa-12-24-128-v1-0-8371948d3921@baylibre.com/
+https://lore.kernel.org/linux-sound/20240911135756.24434-1-tiwai@suse.de/
+I've tested using Mark tree, where they are both applied and for some
+reason snd_pcm_hw_constraint_minmax() started returning positive value,
+while previously it returned 0. I'm bit worried if it signals some
+potential deeper problem regarding constraints with above changes.
+
+Link: https://patch.msgid.link/20241010112008.545526-1-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/intel/avs/pcm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/soc/intel/avs/pcm.c
++++ b/sound/soc/intel/avs/pcm.c
+@@ -540,7 +540,7 @@ static int avs_dai_fe_hw_free(struct snd
+       if (ret < 0)
+               dev_dbg(dai->dev, "Failed to free pages!\n");
+ 
+-      return ret;
++      return 0;
+ }
+ 
+ static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
diff --git a/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch b/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch

new file mode 100644 (file)

index 0000000..e5ae369
--- /dev/null
+++ b/queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch
@@ -0,0 +1,69 @@
+From daniel@iogearbox.net Fri Dec  6 16:34:43 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri,  6 Dec 2024 16:34:02 +0100
+Subject: bpf: Fix dev's rx stats for bpf_redirect_peer traffic
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Youlun Zhang <zhangyoulun@bytedance.com>, Nikolay Aleksandrov <razor@blackwall.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-2-daniel@iogearbox.net>
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 024ee930cb3c9ae49e4266aee89cfde0ebb407e1 ]
+
+Traffic redirected by bpf_redirect_peer() (used by recent CNIs like Cilium)
+is not accounted for in the RX stats of supported devices (that is, veth
+and netkit), confusing user space metrics collectors such as cAdvisor [0],
+as reported by Youlun.
+
+Fix it by calling dev_sw_netstats_rx_add() in skb_do_redirect(), to update
+RX traffic counters. Devices that support ndo_get_peer_dev _must_ use the
+@tstats per-CPU counters (instead of @lstats, or @dstats).
+
+To make this more fool-proof, error out when ndo_get_peer_dev is set but
+@tstats are not selected.
+
+  [0] Specifically, the "container_network_receive_{byte,packet}s_total"
+      counters are affected.
+
+Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper")
+Reported-by: Youlun Zhang <zhangyoulun@bytedance.com>
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-6-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c    |    8 ++++++++
+ net/core/filter.c |    1 +
+ 2 files changed, 9 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9995,6 +9995,14 @@ static int netdev_do_alloc_pcpu_stats(st
+ {
+       void __percpu *v;
+ 
++      /* Drivers implementing ndo_get_peer_dev must support tstat
++       * accounting, so that skb_do_redirect() can bump the dev's
++       * RX stats upon network namespace switch.
++       */
++      if (dev->netdev_ops->ndo_get_peer_dev &&
++          dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
++              return -EOPNOTSUPP;
++
+       switch (dev->pcpu_stat_type) {
+       case NETDEV_PCPU_STAT_NONE:
+               return 0;
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2491,6 +2491,7 @@ int skb_do_redirect(struct sk_buff *skb)
+                            net_eq(net, dev_net(dev))))
+                       goto out_drop;
+               skb->dev = dev;
++              dev_sw_netstats_rx_add(dev, skb->len);
+               return -EAGAIN;
+       }
+       return flags & BPF_F_NEIGH ?
diff --git a/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch b/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch

new file mode 100644 (file)

index 0000000..9382c9c
--- /dev/null
+++ b/queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch
@@ -0,0 +1,235 @@
+From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 13 Sep 2024 21:17:48 +0200
+Subject: bpf: Fix helper writes to read-only maps
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 32556ce93bc45c730829083cb60f95a2728ea48b upstream.
+
+Lonial found an issue that despite user- and BPF-side frozen BPF map
+(like in case of .rodata), it was still possible to write into it from
+a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT}
+as arguments.
+
+In check_func_arg() when the argument is as mentioned, the meta->raw_mode
+is never set. Later, check_helper_mem_access(), under the case of
+PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the
+subsequent call to check_map_access_type() and given the BPF map is
+read-only it succeeds.
+
+The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT
+when results are written into them as opposed to read out of them. The
+latter indicates that it's okay to pass a pointer to uninitialized memory
+as the memory is written to anyway.
+
+However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM
+just with additional alignment requirement. So it is better to just get
+rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the
+fixed size memory types. For this, add MEM_ALIGNED to additionally ensure
+alignment given these helpers write directly into the args via *<ptr> = val.
+The .arg*_size has been initialized reflecting the actual sizeof(*<ptr>).
+
+MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated
+argument types, since in !MEM_FIXED_SIZE cases the verifier does not know
+the buffer size a priori and therefore cannot blindly write *<ptr> = val.
+
+Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types")
+Reported-by: Lonial Con <kongln9170@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[ Resolve merge conflict in include/linux/bpf.h and merge conflict in
+  kernel/bpf/verifier.c.]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h      |    7 +++++--
+ kernel/bpf/helpers.c     |    6 ++++--
+ kernel/bpf/syscall.c     |    3 ++-
+ kernel/bpf/verifier.c    |   41 +++++------------------------------------
+ kernel/trace/bpf_trace.c |    6 ++++--
+ net/core/filter.c        |    6 ++++--
+ 6 files changed, 24 insertions(+), 45 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -475,6 +475,11 @@ enum bpf_type_flag {
+       /* Size is known at compile time. */
+       MEM_FIXED_SIZE          = BIT(10 + BPF_BASE_TYPE_BITS),
+ 
++      /* Memory must be aligned on some architectures, used in combination with
++       * MEM_FIXED_SIZE.
++       */
++      MEM_ALIGNED             = BIT(17 + BPF_BASE_TYPE_BITS),
++
+       __BPF_TYPE_FLAG_MAX,
+       __BPF_TYPE_LAST_FLAG    = __BPF_TYPE_FLAG_MAX - 1,
+ };
+@@ -510,8 +515,6 @@ enum bpf_arg_type {
+       ARG_ANYTHING,           /* any (initialized) argument is ok */
+       ARG_PTR_TO_SPIN_LOCK,   /* pointer to bpf_spin_lock */
+       ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+-      ARG_PTR_TO_INT,         /* pointer to int */
+-      ARG_PTR_TO_LONG,        /* pointer to long */
+       ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock (fullsock) */
+       ARG_PTR_TO_BTF_ID,      /* pointer to in-kernel struct */
+       ARG_PTR_TO_ALLOC_MEM,   /* pointer to dynamically allocated memory */
+--- a/kernel/bpf/helpers.c
++++ b/kernel/bpf/helpers.c
+@@ -531,7 +531,8 @@ const struct bpf_func_proto bpf_strtol_p
+       .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
+       .arg2_type      = ARG_CONST_SIZE,
+       .arg3_type      = ARG_ANYTHING,
+-      .arg4_type      = ARG_PTR_TO_LONG,
++      .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg4_size      = sizeof(s64),
+ };
+ 
+ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
+@@ -560,7 +561,8 @@ const struct bpf_func_proto bpf_strtoul_
+       .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
+       .arg2_type      = ARG_CONST_SIZE,
+       .arg3_type      = ARG_ANYTHING,
+-      .arg4_type      = ARG_PTR_TO_LONG,
++      .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg4_size      = sizeof(u64),
+ };
+ 
+ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5260,7 +5260,8 @@ static const struct bpf_func_proto bpf_k
+       .arg1_type      = ARG_PTR_TO_MEM,
+       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg3_type      = ARG_ANYTHING,
+-      .arg4_type      = ARG_PTR_TO_LONG,
++      .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg4_size      = sizeof(u64),
+ };
+ 
+ static const struct bpf_func_proto *
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5818,16 +5818,6 @@ static bool arg_type_is_dynptr(enum bpf_
+       return base_type(type) == ARG_PTR_TO_DYNPTR;
+ }
+ 
+-static int int_ptr_type_to_size(enum bpf_arg_type type)
+-{
+-      if (type == ARG_PTR_TO_INT)
+-              return sizeof(u32);
+-      else if (type == ARG_PTR_TO_LONG)
+-              return sizeof(u64);
+-
+-      return -EINVAL;
+-}
+-
+ static int resolve_map_arg_type(struct bpf_verifier_env *env,
+                                const struct bpf_call_arg_meta *meta,
+                                enum bpf_arg_type *arg_type)
+@@ -5908,16 +5898,6 @@ static const struct bpf_reg_types mem_ty
+       },
+ };
+ 
+-static const struct bpf_reg_types int_ptr_types = {
+-      .types = {
+-              PTR_TO_STACK,
+-              PTR_TO_PACKET,
+-              PTR_TO_PACKET_META,
+-              PTR_TO_MAP_KEY,
+-              PTR_TO_MAP_VALUE,
+-      },
+-};
+-
+ static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
+ static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
+ static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
+@@ -5955,8 +5935,6 @@ static const struct bpf_reg_types *compa
+       [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
+       [ARG_PTR_TO_MEM]                = &mem_types,
+       [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
+-      [ARG_PTR_TO_INT]                = &int_ptr_types,
+-      [ARG_PTR_TO_LONG]               = &int_ptr_types,
+       [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
+       [ARG_PTR_TO_FUNC]               = &func_ptr_types,
+       [ARG_PTR_TO_STACK]              = &stack_ptr_types,
+@@ -6303,9 +6281,11 @@ skip_type_check:
+                */
+               meta->raw_mode = arg_type & MEM_UNINIT;
+               if (arg_type & MEM_FIXED_SIZE) {
+-                      err = check_helper_mem_access(env, regno,
+-                                                    fn->arg_size[arg], false,
+-                                                    meta);
++                      err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
++                      if (err)
++                              return err;
++                      if (arg_type & MEM_ALIGNED)
++                              err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
+               }
+               break;
+       case ARG_CONST_SIZE:
+@@ -6373,17 +6353,6 @@ skip_type_check:
+               if (err)
+                       return err;
+               break;
+-      case ARG_PTR_TO_INT:
+-      case ARG_PTR_TO_LONG:
+-      {
+-              int size = int_ptr_type_to_size(arg_type);
+-
+-              err = check_helper_mem_access(env, regno, size, false, meta);
+-              if (err)
+-                      return err;
+-              err = check_ptr_alignment(env, reg, 0, size, true);
+-              break;
+-      }
+       case ARG_PTR_TO_CONST_STR:
+       {
+               struct bpf_map *map = reg->map_ptr;
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1192,7 +1192,8 @@ static const struct bpf_func_proto bpf_g
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+-      .arg3_type      = ARG_PTR_TO_LONG,
++      .arg3_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg3_size      = sizeof(u64),
+ };
+ 
+ BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
+@@ -1208,7 +1209,8 @@ static const struct bpf_func_proto bpf_g
+       .func           = get_func_ret,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+-      .arg2_type      = ARG_PTR_TO_LONG,
++      .arg2_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg2_size      = sizeof(u64),
+ };
+ 
+ BPF_CALL_1(get_func_arg_cnt, void *, ctx)
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -6233,7 +6233,8 @@ static const struct bpf_func_proto bpf_s
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+-      .arg3_type      = ARG_PTR_TO_INT,
++      .arg3_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg3_size      = sizeof(u32),
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+ };
+@@ -6244,7 +6245,8 @@ static const struct bpf_func_proto bpf_x
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+-      .arg3_type      = ARG_PTR_TO_INT,
++      .arg3_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
++      .arg3_size      = sizeof(u32),
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+ };
diff --git a/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch b/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch

new file mode 100644 (file)

index 0000000..f83bf01
--- /dev/null
+++ b/queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch
@@ -0,0 +1,116 @@
+From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Mon, 25 Nov 2024 13:59:09 -0500
+Subject: drm/amdgpu: rework resume handling for display (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 73dae652dcac776296890da215ee7dec357a1032 upstream.
+
+Split resume into a 3rd step to handle displays when DCC is
+enabled on DCN 4.0.1.  Move display after the buffer funcs
+have been re-enabled so that the GPU will do the move and
+properly set the DCC metadata for DCN.
+
+v2: fix fence irq resume ordering
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   45 +++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3242,7 +3242,7 @@ static int amdgpu_device_ip_resume_phase
+  *
+  * @adev: amdgpu_device pointer
+  *
+- * First resume function for hardware IPs.  The list of all the hardware
++ * Second resume function for hardware IPs.  The list of all the hardware
+  * IPs that make up the asic is walked and the resume callbacks are run for
+  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
+  * functional state after a suspend and updates the software state as
+@@ -3260,6 +3260,7 @@ static int amdgpu_device_ip_resume_phase
+               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
++                  adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+                       continue;
+               r = adev->ip_blocks[i].version->funcs->resume(adev);
+@@ -3284,6 +3285,36 @@ static int amdgpu_device_ip_resume_phase
+ }
+ 
+ /**
++ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
++ *
++ * @adev: amdgpu_device pointer
++ *
++ * Third resume function for hardware IPs.  The list of all the hardware
++ * IPs that make up the asic is walked and the resume callbacks are run for
++ * all DCE.  resume puts the hardware into a functional state after a suspend
++ * and updates the software state as necessary.  This function is also used
++ * for restoring the GPU after a GPU reset.
++ *
++ * Returns 0 on success, negative error code on failure.
++ */
++static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
++{
++      int i, r;
++
++      for (i = 0; i < adev->num_ip_blocks; i++) {
++              if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
++                      continue;
++              if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
++                      r = adev->ip_blocks[i].version->funcs->resume(adev);
++                      if (r)
++                              return r;
++              }
++      }
++
++      return 0;
++}
++
++/**
+  * amdgpu_device_ip_resume - run resume for hardware IPs
+  *
+  * @adev: amdgpu_device pointer
+@@ -3313,6 +3344,13 @@ static int amdgpu_device_ip_resume(struc
+ 
+       r = amdgpu_device_ip_resume_phase2(adev);
+ 
++      if (r)
++              return r;
++
++      amdgpu_fence_driver_hw_init(adev);
++
++      r = amdgpu_device_ip_resume_phase3(adev);
++
+       return r;
+ }
+ 
+@@ -4311,7 +4349,6 @@ int amdgpu_device_resume(struct drm_devi
+               dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+               return r;
+       }
+-      amdgpu_fence_driver_hw_init(adev);
+ 
+       r = amdgpu_device_ip_late_init(adev);
+       if (r)
+@@ -5065,6 +5102,10 @@ int amdgpu_do_asic_reset(struct list_hea
+                               if (r)
+                                       goto out;
+ 
++                              r = amdgpu_device_ip_resume_phase3(tmp_adev);
++                              if (r)
++                                      goto out;
++
+                               if (vram_lost)
+                                       amdgpu_device_fill_reset_magic(tmp_adev);
+ 
diff --git a/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch b/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch

new file mode 100644 (file)

index 0000000..3134534
--- /dev/null
+++ b/queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch
@@ -0,0 +1,153 @@
+From stable+bounces-100113-greg=kroah.com@vger.kernel.org Mon Dec  9 10:49:51 2024
+From: Ajay Kaher <ajay.kaher@broadcom.com>
+Date: Mon,  9 Dec 2024 09:49:03 +0000
+Subject: drm/ttm: Make sure the mapped tt pages are decrypted when needed
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Sasha Levin <sashal@kernel.org>, Ye Li <ye.li@broadcom.com>
+Message-ID: <20241209094904.2547579-2-ajay.kaher@broadcom.com>
+
+From: Zack Rusin <zack.rusin@broadcom.com>
+
+commit 71ce046327cfd3aef3f93d1c44e091395eb03f8f upstream.
+
+Some drivers require the mapped tt pages to be decrypted. In an ideal
+world this would have been handled by the dma layer, but the TTM page
+fault handling would have to be rewritten to able to do that.
+
+A side-effect of the TTM page fault handling is using a dma allocation
+per order (via ttm_pool_alloc_page) which makes it impossible to just
+trivially use dma_mmap_attrs. As a result ttm has to be very careful
+about trying to make its pgprot for the mapped tt pages match what
+the dma layer thinks it is. At the ttm layer it's possible to
+deduce the requirement to have tt pages decrypted by checking
+whether coherent dma allocations have been requested and the system
+is running with confidential computing technologies.
+
+This approach isn't ideal but keeping TTM matching DMAs expectations
+for the page properties is in general fragile, unfortunately proper
+fix would require a rewrite of TTM's page fault handling.
+
+Fixes vmwgfx with SEV enabled.
+
+v2: Explicitly include cc_platform.h
+v3: Use CC_ATTR_GUEST_MEM_ENCRYPT instead of CC_ATTR_MEM_ENCRYPT to
+limit the scope to guests and log when memory decryption is enabled.
+
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem")
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v5.14+
+Link: https://patchwork.freedesktop.org/patch/msgid/20230926040359.3040017-1-zack@kde.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Ye Li <ye.li@broadcom.com>
+Signed-off-by: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_bo_util.c |   13 +++++++++++--
+ drivers/gpu/drm/ttm/ttm_tt.c      |   12 ++++++++++++
+ include/drm/ttm/ttm_tt.h          |    7 +++++++
+ 3 files changed, 30 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
+@@ -274,7 +274,13 @@ pgprot_t ttm_io_prot(struct ttm_buffer_o
+       enum ttm_caching caching;
+ 
+       man = ttm_manager_type(bo->bdev, res->mem_type);
+-      caching = man->use_tt ? bo->ttm->caching : res->bus.caching;
++      if (man->use_tt) {
++              caching = bo->ttm->caching;
++              if (bo->ttm->page_flags & TTM_TT_FLAG_DECRYPTED)
++                      tmp = pgprot_decrypted(tmp);
++      } else  {
++              caching = res->bus.caching;
++      }
+ 
+       return ttm_prot_from_caching(caching, tmp);
+ }
+@@ -317,6 +323,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu
+               .no_wait_gpu = false
+       };
+       struct ttm_tt *ttm = bo->ttm;
++      struct ttm_resource_manager *man =
++                      ttm_manager_type(bo->bdev, bo->resource->mem_type);
+       pgprot_t prot;
+       int ret;
+ 
+@@ -326,7 +334,8 @@ static int ttm_bo_kmap_ttm(struct ttm_bu
+       if (ret)
+               return ret;
+ 
+-      if (num_pages == 1 && ttm->caching == ttm_cached) {
++      if (num_pages == 1 && ttm->caching == ttm_cached &&
++          !(man->use_tt && (ttm->page_flags & TTM_TT_FLAG_DECRYPTED))) {
+               /*
+                * We're mapping a single page, and the desired
+                * page protection is consistent with the bo.
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -31,11 +31,13 @@
+ 
+ #define pr_fmt(fmt) "[TTM] " fmt
+ 
++#include <linux/cc_platform.h>
+ #include <linux/sched.h>
+ #include <linux/shmem_fs.h>
+ #include <linux/file.h>
+ #include <linux/module.h>
+ #include <drm/drm_cache.h>
++#include <drm/drm_device.h>
+ #include <drm/ttm/ttm_bo_driver.h>
+ 
+ #include "ttm_module.h"
+@@ -59,6 +61,7 @@ static atomic_long_t ttm_dma32_pages_all
+ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
+ {
+       struct ttm_device *bdev = bo->bdev;
++      struct drm_device *ddev = bo->base.dev;
+       uint32_t page_flags = 0;
+ 
+       dma_resv_assert_held(bo->base.resv);
+@@ -80,6 +83,15 @@ int ttm_tt_create(struct ttm_buffer_obje
+               pr_err("Illegal buffer object type\n");
+               return -EINVAL;
+       }
++      /*
++       * When using dma_alloc_coherent with memory encryption the
++       * mapped TT pages need to be decrypted or otherwise the drivers
++       * will end up sending encrypted mem to the gpu.
++       */
++      if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
++              page_flags |= TTM_TT_FLAG_DECRYPTED;
++              drm_info(ddev, "TT memory decryption enabled.");
++      }
+ 
+       bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
+       if (unlikely(bo->ttm == NULL))
+--- a/include/drm/ttm/ttm_tt.h
++++ b/include/drm/ttm/ttm_tt.h
+@@ -79,6 +79,12 @@ struct ttm_tt {
+        *   page_flags = TTM_TT_FLAG_EXTERNAL |
+        *                TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+        *
++       * TTM_TT_FLAG_DECRYPTED: The mapped ttm pages should be marked as
++       * not encrypted. The framework will try to match what the dma layer
++       * is doing, but note that it is a little fragile because ttm page
++       * fault handling abuses the DMA api a bit and dma_map_attrs can't be
++       * used to assure pgprot always matches.
++       *
+        * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
+        * set by TTM after ttm_tt_populate() has successfully returned, and is
+        * then unset when TTM calls ttm_tt_unpopulate().
+@@ -87,6 +93,7 @@ struct ttm_tt {
+ #define TTM_TT_FLAG_ZERO_ALLOC                (1 << 1)
+ #define TTM_TT_FLAG_EXTERNAL          (1 << 2)
+ #define TTM_TT_FLAG_EXTERNAL_MAPPABLE (1 << 3)
++#define TTM_TT_FLAG_DECRYPTED         (1 << 4)
+ 
+ #define TTM_TT_FLAG_PRIV_POPULATED  (1U << 31)
+       uint32_t page_flags;
diff --git a/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch b/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch

new file mode 100644 (file)

index 0000000..e6d5e0f
--- /dev/null
+++ b/queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch
@@ -0,0 +1,44 @@
+From stable+bounces-100114-greg=kroah.com@vger.kernel.org Mon Dec  9 10:50:05 2024
+From: Ajay Kaher <ajay.kaher@broadcom.com>
+Date: Mon,  9 Dec 2024 09:49:04 +0000
+Subject: drm/ttm: Print the memory decryption status just once
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: zack.rusin@broadcom.com, thomas.hellstrom@linux.intel.com, christian.koenig@amd.com, ray.huang@amd.com, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, ajay.kaher@broadcom.com, alexey.makhalov@broadcom.com, vasavi.sirnapalli@broadcom.com, Ye Li <ye.li@broadcom.com>
+Message-ID: <20241209094904.2547579-3-ajay.kaher@broadcom.com>
+
+From: Zack Rusin <zack.rusin@broadcom.com>
+
+commit 27906e5d78248b19bcdfdae72049338c828897bb upstream.
+
+Stop printing the TT memory decryption status info each time tt is created
+and instead print it just once.
+
+Reduces the spam in the system logs when running guests with SEV enabled.
+
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Fixes: 71ce046327cf ("drm/ttm: Make sure the mapped tt pages are decrypted when needed")
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v5.14+
+Link: https://patchwork.freedesktop.org/patch/msgid/20240408155605.1398631-1-zack.rusin@broadcom.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Ye Li <ye.li@broadcom.com>
+Signed-off-by: Ajay Kaher <ajay.kaher@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_tt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -90,7 +90,7 @@ int ttm_tt_create(struct ttm_buffer_obje
+        */
+       if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
+               page_flags |= TTM_TT_FLAG_DECRYPTED;
+-              drm_info(ddev, "TT memory decryption enabled.");
++              drm_info_once(ddev, "TT memory decryption enabled.");
+       }
+ 
+       bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
diff --git a/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch b/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch

new file mode 100644 (file)

index 0000000..be2ff16
--- /dev/null
+++ b/queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch
@@ -0,0 +1,78 @@
+From 278a370c1766060d2144d6cf0b06c101e1043b6d Mon Sep 17 00:00:00 2001
+From: Ziwei Xiao <ziweixiao@google.com>
+Date: Mon, 13 Nov 2023 16:41:44 -0800
+Subject: gve: Fixes for napi_poll when budget is 0
+
+From: Ziwei Xiao <ziweixiao@google.com>
+
+commit 278a370c1766060d2144d6cf0b06c101e1043b6d upstream.
+
+Netpoll will explicilty pass the polling call with a budget of 0 to
+indicate it's clearing the Tx path only. For the gve_rx_poll and
+gve_xdp_poll, they were mistakenly taking the 0 budget as the indication
+to do all the work. Add check to avoid the rx path and xdp path being
+called when budget is 0. And also avoid napi_complete_done being called
+when budget is 0 for netpoll.
+
+Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
+Signed-off-by: Ziwei Xiao <ziweixiao@google.com>
+Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com>
+Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/google/gve/gve_main.c |    7 +++++++
+ drivers/net/ethernet/google/gve/gve_rx.c   |    4 ----
+ drivers/net/ethernet/google/gve/gve_tx.c   |    4 ----
+ 3 files changed, 7 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/google/gve/gve_main.c
++++ b/drivers/net/ethernet/google/gve/gve_main.c
+@@ -202,6 +202,10 @@ static int gve_napi_poll(struct napi_str
+ 
+       if (block->tx)
+               reschedule |= gve_tx_poll(block, budget);
++
++      if (!budget)
++              return 0;
++
+       if (block->rx) {
+               work_done = gve_rx_poll(block, budget);
+               reschedule |= work_done == budget;
+@@ -242,6 +246,9 @@ static int gve_napi_poll_dqo(struct napi
+       if (block->tx)
+               reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ 
++      if (!budget)
++              return 0;
++
+       if (block->rx) {
+               work_done = gve_rx_poll_dqo(block, budget);
+               reschedule |= work_done == budget;
+--- a/drivers/net/ethernet/google/gve/gve_rx.c
++++ b/drivers/net/ethernet/google/gve/gve_rx.c
+@@ -778,10 +778,6 @@ int gve_rx_poll(struct gve_notify_block
+ 
+       feat = block->napi.dev->features;
+ 
+-      /* If budget is 0, do all the work */
+-      if (budget == 0)
+-              budget = INT_MAX;
+-
+       if (budget > 0)
+               work_done = gve_clean_rx_done(rx, budget, feat);
+ 
+--- a/drivers/net/ethernet/google/gve/gve_tx.c
++++ b/drivers/net/ethernet/google/gve/gve_tx.c
+@@ -725,10 +725,6 @@ bool gve_tx_poll(struct gve_notify_block
+       u32 nic_done;
+       u32 to_do;
+ 
+-      /* If budget is 0, do all the work */
+-      if (budget == 0)
+-              budget = INT_MAX;
+-
+       /* In TX path, it may try to clean completed pkts in order to xmit,
+        * to avoid cleaning conflict, use spin_lock(), it yields better
+        * concurrency between xmit/clean than netif's lock.
diff --git a/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch b/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch

new file mode 100644 (file)

index 0000000..7d001b5
--- /dev/null
+++ b/queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch
@@ -0,0 +1,79 @@
+From ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 Mon Sep 17 00:00:00 2001
+From: Shu Han <ebpqwerty472123@gmail.com>
+Date: Tue, 17 Sep 2024 17:41:04 +0800
+Subject: mm: call the security_mmap_file() LSM hook in remap_file_pages()
+
+From: Shu Han <ebpqwerty472123@gmail.com>
+
+commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 upstream.
+
+The remap_file_pages syscall handler calls do_mmap() directly, which
+doesn't contain the LSM security check. And if the process has called
+personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for
+RW pages, this will actually result in remapping the pages to RWX,
+bypassing a W^X policy enforced by SELinux.
+
+So we should check prot by security_mmap_file LSM hook in the
+remap_file_pages syscall handler before do_mmap() is called. Otherwise, it
+potentially permits an attacker to bypass a W^X policy enforced by
+SELinux.
+
+The bypass is similar to CVE-2016-10044, which bypass the same thing via
+AIO and can be found in [1].
+
+The PoC:
+
+$ cat > test.c
+
+int main(void) {
+       size_t pagesz = sysconf(_SC_PAGE_SIZE);
+       int mfd = syscall(SYS_memfd_create, "test", 0);
+       const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE,
+               MAP_SHARED, mfd, 0);
+       unsigned int old = syscall(SYS_personality, 0xffffffff);
+       syscall(SYS_personality, READ_IMPLIES_EXEC | old);
+       syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0);
+       syscall(SYS_personality, old);
+       // show the RWX page exists even if W^X policy is enforced
+       int fd = open("/proc/self/maps", O_RDONLY);
+       unsigned char buf2[1024];
+       while (1) {
+               int ret = read(fd, buf2, 1024);
+               if (ret <= 0) break;
+               write(1, buf2, ret);
+       }
+       close(fd);
+}
+
+$ gcc test.c -o test
+$ ./test | grep rwx
+7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted)
+
+Link: https://project-zero.issues.chromium.org/issues/42452389 [1]
+Cc: stable@vger.kernel.org
+Signed-off-by: Shu Han <ebpqwerty472123@gmail.com>
+Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+[PM: subject line tweaks]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+[ Resolve merge conflict in mm/mmap.c. ]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -3021,8 +3021,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
+               flags |= MAP_LOCKED;
+ 
+       file = get_file(vma->vm_file);
++      ret = security_mmap_file(vma->vm_file, prot, flags);
++      if (ret)
++              goto out_fput;
+       ret = do_mmap(vma->vm_file, start, size,
+                       prot, flags, pgoff, &populate, NULL);
++out_fput:
+       fput(file);
+ out:
+       mmap_write_unlock(mm);
diff --git a/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch b/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch

new file mode 100644 (file)

index 0000000..cfe6e2f
--- /dev/null
+++ b/queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch
@@ -0,0 +1,295 @@
+From daniel@iogearbox.net Fri Dec  6 16:34:37 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri,  6 Dec 2024 16:34:01 +0100
+Subject: net: Move {l,t,d}stats allocation to core and convert veth & vrf
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov <razor@blackwall.org>, David Ahern <dsahern@kernel.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-1-daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 34d21de99cea9cb17967874313e5b0262527833c ]
+
+Move {l,t,d}stats allocation to the core and let netdevs pick the stats
+type they need. That way the driver doesn't have to bother with error
+handling (allocation failure checking, making sure free happens in the
+right spot, etc) - all happening in the core.
+
+Co-developed-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Cc: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Stable-dep-of: 024ee930cb3c ("bpf: Fix dev's rx stats for bpf_redirect_peer traffic")
+[ Note: Simplified vrf bits to reduce patch given unrelated to the fix ]
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/veth.c        |   16 +------------
+ drivers/net/vrf.c         |   24 ++++++--------------
+ include/linux/netdevice.h |   30 ++++++++++++++++++++++----
+ net/core/dev.c            |   53 +++++++++++++++++++++++++++++++++++++++++++---
+ 4 files changed, 85 insertions(+), 38 deletions(-)
+
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -1381,25 +1381,12 @@ static void veth_free_queues(struct net_
+ 
+ static int veth_dev_init(struct net_device *dev)
+ {
+-      int err;
+-
+-      dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+-      if (!dev->lstats)
+-              return -ENOMEM;
+-
+-      err = veth_alloc_queues(dev);
+-      if (err) {
+-              free_percpu(dev->lstats);
+-              return err;
+-      }
+-
+-      return 0;
++      return veth_alloc_queues(dev);
+ }
+ 
+ static void veth_dev_free(struct net_device *dev)
+ {
+       veth_free_queues(dev);
+-      free_percpu(dev->lstats);
+ }
+ 
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+@@ -1625,6 +1612,7 @@ static void veth_setup(struct net_device
+                              NETIF_F_HW_VLAN_STAG_RX);
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = veth_dev_free;
++      dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
+       dev->max_mtu = ETH_MAX_MTU;
+ 
+       dev->hw_features = VETH_FEATURES;
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -121,22 +121,12 @@ struct net_vrf {
+       int                     ifindex;
+ };
+ 
+-struct pcpu_dstats {
+-      u64                     tx_pkts;
+-      u64                     tx_bytes;
+-      u64                     tx_drps;
+-      u64                     rx_pkts;
+-      u64                     rx_bytes;
+-      u64                     rx_drps;
+-      struct u64_stats_sync   syncp;
+-};
+-
+ static void vrf_rx_stats(struct net_device *dev, int len)
+ {
+       struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+ 
+       u64_stats_update_begin(&dstats->syncp);
+-      dstats->rx_pkts++;
++      dstats->rx_packets++;
+       dstats->rx_bytes += len;
+       u64_stats_update_end(&dstats->syncp);
+ }
+@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_d
+               do {
+                       start = u64_stats_fetch_begin_irq(&dstats->syncp);
+                       tbytes = dstats->tx_bytes;
+-                      tpkts = dstats->tx_pkts;
+-                      tdrops = dstats->tx_drps;
++                      tpkts = dstats->tx_packets;
++                      tdrops = dstats->tx_drops;
+                       rbytes = dstats->rx_bytes;
+-                      rpkts = dstats->rx_pkts;
++                      rpkts = dstats->rx_packets;
+               } while (u64_stats_fetch_retry_irq(&dstats->syncp, start));
+               stats->tx_bytes += tbytes;
+               stats->tx_packets += tpkts;
+@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff
+       if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
+               vrf_rx_stats(dev, len);
+       else
+-              this_cpu_inc(dev->dstats->rx_drps);
++              this_cpu_inc(dev->dstats->rx_drops);
+ 
+       return NETDEV_TX_OK;
+ }
+@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_bu
+               struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+ 
+               u64_stats_update_begin(&dstats->syncp);
+-              dstats->tx_pkts++;
++              dstats->tx_packets++;
+               dstats->tx_bytes += len;
+               u64_stats_update_end(&dstats->syncp);
+       } else {
+-              this_cpu_inc(dev->dstats->tx_drps);
++              this_cpu_inc(dev->dstats->tx_drops);
+       }
+ 
+       return ret;
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1747,6 +1747,13 @@ enum netdev_ml_priv_type {
+       ML_PRIV_CAN,
+ };
+ 
++enum netdev_stat_type {
++      NETDEV_PCPU_STAT_NONE,
++      NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
++      NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
++      NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
++};
++
+ /**
+  *    struct net_device - The DEVICE structure.
+  *
+@@ -1941,10 +1948,14 @@ enum netdev_ml_priv_type {
+  *
+  *    @ml_priv:       Mid-layer private
+  *    @ml_priv_type:  Mid-layer private type
+- *    @lstats:        Loopback statistics
+- *    @tstats:        Tunnel statistics
+- *    @dstats:        Dummy statistics
+- *    @vstats:        Virtual ethernet statistics
++ *
++ *    @pcpu_stat_type:        Type of device statistics which the core should
++ *                            allocate/free: none, lstats, tstats, dstats. none
++ *                            means the driver is handling statistics allocation/
++ *                            freeing internally.
++ *    @lstats:                Loopback statistics: packets, bytes
++ *    @tstats:                Tunnel statistics: RX/TX packets, RX/TX bytes
++ *    @dstats:                Dummy statistics: RX/TX/drop packets, RX/TX bytes
+  *
+  *    @garp_port:     GARP
+  *    @mrp_port:      MRP
+@@ -2287,6 +2298,7 @@ struct net_device {
+       void                            *ml_priv;
+       enum netdev_ml_priv_type        ml_priv_type;
+ 
++      enum netdev_stat_type           pcpu_stat_type:8;
+       union {
+               struct pcpu_lstats __percpu             *lstats;
+               struct pcpu_sw_netstats __percpu        *tstats;
+@@ -2670,6 +2682,16 @@ struct pcpu_sw_netstats {
+       struct u64_stats_sync   syncp;
+ } __aligned(4 * sizeof(u64));
+ 
++struct pcpu_dstats {
++      u64                     rx_packets;
++      u64                     rx_bytes;
++      u64                     rx_drops;
++      u64                     tx_packets;
++      u64                     tx_bytes;
++      u64                     tx_drops;
++      struct u64_stats_sync   syncp;
++} __aligned(8 * sizeof(u64));
++
+ struct pcpu_lstats {
+       u64_stats_t packets;
+       u64_stats_t bytes;
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9991,6 +9991,46 @@ void netif_tx_stop_all_queues(struct net
+ }
+ EXPORT_SYMBOL(netif_tx_stop_all_queues);
+ 
++static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
++{
++      void __percpu *v;
++
++      switch (dev->pcpu_stat_type) {
++      case NETDEV_PCPU_STAT_NONE:
++              return 0;
++      case NETDEV_PCPU_STAT_LSTATS:
++              v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
++              break;
++      case NETDEV_PCPU_STAT_TSTATS:
++              v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++              break;
++      case NETDEV_PCPU_STAT_DSTATS:
++              v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
++              break;
++      default:
++              return -EINVAL;
++      }
++
++      return v ? 0 : -ENOMEM;
++}
++
++static void netdev_do_free_pcpu_stats(struct net_device *dev)
++{
++      switch (dev->pcpu_stat_type) {
++      case NETDEV_PCPU_STAT_NONE:
++              return;
++      case NETDEV_PCPU_STAT_LSTATS:
++              free_percpu(dev->lstats);
++              break;
++      case NETDEV_PCPU_STAT_TSTATS:
++              free_percpu(dev->tstats);
++              break;
++      case NETDEV_PCPU_STAT_DSTATS:
++              free_percpu(dev->dstats);
++              break;
++      }
++}
++
+ /**
+  * register_netdevice() - register a network device
+  * @dev: device to register
+@@ -10051,11 +10091,15 @@ int register_netdevice(struct net_device
+               goto err_uninit;
+       }
+ 
++      ret = netdev_do_alloc_pcpu_stats(dev);
++      if (ret)
++              goto err_uninit;
++
+       ret = -EBUSY;
+       if (!dev->ifindex)
+               dev->ifindex = dev_new_index(net);
+       else if (__dev_get_by_index(net, dev->ifindex))
+-              goto err_uninit;
++              goto err_free_pcpu;
+ 
+       /* Transfer changeable features to wanted_features and enable
+        * software offloads (GSO and GRO).
+@@ -10102,14 +10146,14 @@ int register_netdevice(struct net_device
+       ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+       ret = notifier_to_errno(ret);
+       if (ret)
+-              goto err_uninit;
++              goto err_free_pcpu;
+ 
+       ret = netdev_register_kobject(dev);
+       write_lock(&dev_base_lock);
+       dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+       write_unlock(&dev_base_lock);
+       if (ret)
+-              goto err_uninit;
++              goto err_free_pcpu;
+ 
+       __netdev_update_features(dev);
+ 
+@@ -10156,6 +10200,8 @@ int register_netdevice(struct net_device
+ out:
+       return ret;
+ 
++err_free_pcpu:
++      netdev_do_free_pcpu_stats(dev);
+ err_uninit:
+       if (dev->netdev_ops->ndo_uninit)
+               dev->netdev_ops->ndo_uninit(dev);
+@@ -10409,6 +10455,7 @@ void netdev_run_todo(void)
+               WARN_ON(rcu_access_pointer(dev->ip_ptr));
+               WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+ 
++              netdev_do_free_pcpu_stats(dev);
+               if (dev->priv_destructor)
+                       dev->priv_destructor(dev);
+               if (dev->needs_free_netdev)
diff --git a/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch b/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch

new file mode 100644 (file)

index 0000000..8fe8eb0
--- /dev/null
+++ b/queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch
@@ -0,0 +1,34 @@
+From zhangzekun11@huawei.com  Thu Dec 12 12:45:53 2024
+From: Zhang Zekun <zhangzekun11@huawei.com>
+Date: Wed, 4 Dec 2024 16:27:52 +0800
+Subject: Revert "drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read()"
+To: <gregkh@linuxfoundation.org>
+Cc: <cve@kernel.org>, <stable@vger.kernel.org>, <kevinyang.wang@amd.com>, <alexander.deucher@amd.com>, <liuyongqiang13@huawei.com>, <zhangzekun11@huawei.com>
+Message-ID: <20241204082752.18498-1-zhangzekun11@huawei.com>
+
+From: Zhang Zekun <zhangzekun11@huawei.com>
+
+This reverts commit 25d7e84343e1235b667cf5226c3934fdf36f0df6.
+
+The origin mainline patch fix a buffer overflow issue in
+amdgpu_debugfs_gprwave_read(), but it has not been introduced in kernel
+6.1 and older kernels. This patch add a check in a wrong function in the
+same file.
+
+Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -419,7 +419,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_
+       ssize_t result = 0;
+       int r;
+ 
+-      if (size > 4096 || size & 0x3 || *pos & 0x3)
++      if (size & 0x3 || *pos & 0x3)
+               return -EINVAL;
+ 
+       r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
diff --git a/queue-6.1/series b/queue-6.1/series

index 5e37dda7d72d739bd165f065159ef2bb5a34fc47..0f0bfe1d7a12ca5aab6b3fa2bf757a36a32185eb 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -752,3 +752,16 @@ xhci-dbc-fix-stall-transfer-event-handling.patch
  mmc-mtk-sd-fix-error-handle-of-probe-function.patch
  drm-amd-display-check-bios-images-before-it-is-used.patch
  ocfs2-revert-ocfs2-fix-the-la-space-leak-when-unmounting-an-ocfs2-volume.patch
+revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch
+gve-fixes-for-napi_poll-when-budget-is-0.patch
+arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch
+arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch
+asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch
+mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch
+bpf-fix-helper-writes-to-read-only-maps.patch
+net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch
+bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch
+veth-use-tstats-per-cpu-traffic-counters.patch
+drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch
+drm-ttm-print-the-memory-decryption-status-just-once.patch
+drm-amdgpu-rework-resume-handling-for-display-v2.patch
diff --git a/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch b/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch

new file mode 100644 (file)

index 0000000..6c5371a
--- /dev/null
+++ b/queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch
@@ -0,0 +1,114 @@
+From daniel@iogearbox.net Fri Dec  6 16:34:42 2024
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri,  6 Dec 2024 16:34:03 +0100
+Subject: veth: Use tstats per-CPU traffic counters
+To: gregkh@linuxfoundation.org
+Cc: stable@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, leitao@debian.org, martin.lau@linux.dev, peilin.ye@bytedance.com, kuba@kernel.org, Nikolay Aleksandrov <razor@blackwall.org>, Martin KaFai Lau <martin.lau@kernel.org>
+Message-ID: <20241206153403.273068-3-daniel@iogearbox.net>
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ]
+
+Currently veth devices use the lstats per-CPU traffic counters, which only
+cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
+device from its peer's TX counters, based on the assumption that a veth
+device can _only_ receive packets from its peer, which is no longer true:
+
+For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
+helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
+a different netns), skipping veth's peer device. Unfortunately, this kind
+of traffic isn't currently accounted for in veth's RX stats.
+
+In preparation for the fix, use tstats (instead of lstats) to maintain
+both RX and TX counters for each veth device. We'll use RX counters for
+bpf_redirect_peer() traffic, and keep using TX counters for the usual
+"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
+_adding_ RX count to peer's TX count, in order to cover both kinds of
+traffic.
+
+veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
+for less confusion, but let's leave it to another patch to keep the fix
+minimal.
+
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/veth.c |   30 +++++++++++-------------------
+ 1 file changed, 11 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -342,7 +342,7 @@ static netdev_tx_t veth_xmit(struct sk_b
+       skb_tx_timestamp(skb);
+       if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+               if (!use_napi)
+-                      dev_lstats_add(dev, length);
++                      dev_sw_netstats_tx_add(dev, 1, length);
+       } else {
+ drop:
+               atomic64_inc(&priv->dropped);
+@@ -357,14 +357,6 @@ drop:
+       return ret;
+ }
+ 
+-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+-{
+-      struct veth_priv *priv = netdev_priv(dev);
+-
+-      dev_lstats_read(dev, packets, bytes);
+-      return atomic64_read(&priv->dropped);
+-}
+-
+ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
+ {
+       struct veth_priv *priv = netdev_priv(dev);
+@@ -402,24 +394,24 @@ static void veth_get_stats64(struct net_
+       struct veth_priv *priv = netdev_priv(dev);
+       struct net_device *peer;
+       struct veth_stats rx;
+-      u64 packets, bytes;
+ 
+-      tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
+-      tot->tx_bytes = bytes;
+-      tot->tx_packets = packets;
++      tot->tx_dropped = atomic64_read(&priv->dropped);
++      dev_fetch_sw_netstats(tot, dev->tstats);
+ 
+       veth_stats_rx(&rx, dev);
+       tot->tx_dropped += rx.xdp_tx_err;
+       tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
+-      tot->rx_bytes = rx.xdp_bytes;
+-      tot->rx_packets = rx.xdp_packets;
++      tot->rx_bytes += rx.xdp_bytes;
++      tot->rx_packets += rx.xdp_packets;
+ 
+       rcu_read_lock();
+       peer = rcu_dereference(priv->peer);
+       if (peer) {
+-              veth_stats_tx(peer, &packets, &bytes);
+-              tot->rx_bytes += bytes;
+-              tot->rx_packets += packets;
++              struct rtnl_link_stats64 tot_peer = {};
++
++              dev_fetch_sw_netstats(&tot_peer, peer->tstats);
++              tot->rx_bytes += tot_peer.tx_bytes;
++              tot->rx_packets += tot_peer.tx_packets;
+ 
+               veth_stats_rx(&rx, peer);
+               tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
+@@ -1612,7 +1604,7 @@ static void veth_setup(struct net_device
+                              NETIF_F_HW_VLAN_STAG_RX);
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = veth_dev_free;
+-      dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
++      dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+       dev->max_mtu = ETH_MAX_MTU;
+ 
+       dev->hw_features = VETH_FEATURES;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 12 Dec 2024 12:40:35 +0000 (13:40 +0100)
queue-6.1/arm64-smccc-remove-broken-support-for-smcccv1.3-sve-discard-hint.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/arm64-sve-discard-stale-cpu-state-when-handling-sve-traps.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-intel-avs-fix-return-status-of-avs_pcm_hw_constraints_init.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-fix-dev-s-rx-stats-for-bpf_redirect_peer-traffic.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-fix-helper-writes-to-read-only-maps.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amdgpu-rework-resume-handling-for-display-v2.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-ttm-make-sure-the-mapped-tt-pages-are-decrypted-when-needed.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-ttm-print-the-memory-decryption-status-just-once.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/gve-fixes-for-napi_poll-when-budget-is-0.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-call-the-security_mmap_file-lsm-hook-in-remap_file_pages.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-move-l-t-d-stats-allocation-to-core-and-convert-veth-vrf.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/revert-drm-amdgpu-add-missing-size-check-in-amdgpu_debugfs_gprwave_read.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/veth-use-tstats-per-cpu-traffic-counters.patch	[new file with mode: 0644]	patch \| blob