arm64: errata: Work around early CME DVMSync acknowledgement

author Catalin Marinas <catalin.marinas@arm.com>

Tue, 7 Apr 2026 10:28:44 +0000 (11:28 +0100)

committer Catalin Marinas <catalin.marinas@arm.com>

Fri, 10 Apr 2026 18:46:14 +0000 (19:46 +0100)
author Catalin Marinas <catalin.marinas@arm.com>
Tue, 7 Apr 2026 10:28:44 +0000 (11:28 +0100)
committer Catalin Marinas <catalin.marinas@arm.com>
Fri, 10 Apr 2026 18:46:14 +0000 (19:46 +0100)
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst

index 4c300caad90112d8bba72c73bf43e49e6e820915..282ad425798397a8d97a2f585b52935aae3c1b3b 100644 (file)
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -202,6 +202,8 @@ stable kernels.
  +----------------+-----------------+-----------------+-----------------------------+
  | ARM            | Neoverse-V3AE   | #3312417        | ARM64_ERRATUM_3194386       |
  +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | C1-Pro          | #4193714        | ARM64_ERRATUM_4193714       |
++----------------+-----------------+-----------------+-----------------------------+
  | ARM            | MMU-500         | #841119,826419  | ARM_SMMU_MMU_500_CPRE_ERRATA|
  |                |                 | #562869,1047329 |                             |
  +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index 38dba5f7e4d2d7e6d2ea4ef696578b5dae8d1192..9b419f1a9ae67dfecf18777d53b2e8b5099a683e 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1175,6 +1175,18 @@ config ARM64_ERRATUM_4311569
  
           If unsure, say Y.
  
+config ARM64_ERRATUM_4193714
+       bool "C1-Pro: 4193714: SME DVMSync early acknowledgement"
+       depends on ARM64_SME
+       default y
+       help
+         Enable workaround for C1-Pro acknowledging the DVMSync before
+         the SME memory accesses are complete. This will cause TLB
+         maintenance for processes using SME to also issue an IPI to
+         the affected CPUs.
+
+         If unsure, say Y.
+
  config CAVIUM_ERRATUM_22375
         bool "Cavium erratum 22375, 24313"
         default y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h

index 177c691914f8796f0e29710776c6fb0cddfe4e90..0b1b78a4c03ea166ac146082ee84ddb6c4f133a1 100644 (file)
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -64,6 +64,8 @@ cpucap_is_possible(const unsigned int cap)
                 return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
         case ARM64_WORKAROUND_SPECULATIVE_SSBS:
                 return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
+       case ARM64_WORKAROUND_4193714:
+               return IS_ENABLED(CONFIG_ARM64_ERRATUM_4193714);
         case ARM64_MPAM:
                 /*
                  * KVM MPAM support doesn't rely on the host kernel supporting MPAM.
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h

index 1d2e33559bd5229cdd9bd51c2a7bbc7b1ec7f4b7..d9d00b45ab11507f23a0897136d375b6dad007b3 100644 (file)
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -428,6 +428,24 @@ static inline size_t sme_state_size(struct task_struct const *task)
         return __sme_state_size(task_get_sme_vl(task));
  }
  
+void sme_enable_dvmsync(void);
+void sme_set_active(void);
+void sme_clear_active(void);
+
+static inline void sme_enter_from_user_mode(void)
+{
+       if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
+           test_thread_flag(TIF_SME))
+               sme_clear_active();
+}
+
+static inline void sme_exit_to_user_mode(void)
+{
+       if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
+           test_thread_flag(TIF_SME))
+               sme_set_active();
+}
+
  #else
  
  static inline void sme_user_disable(void) { BUILD_BUG(); }
@@ -456,6 +474,9 @@ static inline size_t sme_state_size(struct task_struct const *task)
         return 0;
  }
  
+static inline void sme_enter_from_user_mode(void) { }
+static inline void sme_exit_to_user_mode(void) { }
+
  #endif /* ! CONFIG_ARM64_SME */
  
  /* For use by EFI runtime services calls only */
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h

index fedb0b87b8db45dbb5228f41c587efbcff9ef004..6297631532e59f49de44d53ecdeb22944c39d0b3 100644 (file)
--- a/arch/arm64/include/asm/tlbbatch.h
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -2,11 +2,17 @@
  #ifndef _ARCH_ARM64_TLBBATCH_H
  #define _ARCH_ARM64_TLBBATCH_H
  
+#include <linux/cpumask.h>
+
  struct arch_tlbflush_unmap_batch {
+#ifdef CONFIG_ARM64_ERRATUM_4193714
         /*
-        * For arm64, HW can do tlb shootdown, so we don't
-        * need to record cpumask for sending IPI
+        * Track CPUs that need SME DVMSync on completion of this batch.
+        * Otherwise, the arm64 HW can do tlb shootdown, so we don't need to
+        * record cpumask for sending IPI
          */
+       cpumask_var_t cpumask;
+#endif
  };
  
  #endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h

index 2627911919350350f6cc36686c60781480fa8e1c..4aae42b83049017bdf49a1ceb075646d35d5c2de 100644 (file)
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -80,6 +80,71 @@ static inline unsigned long get_trans_granule(void)
         }
  }
  
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+void sme_do_dvmsync(const struct cpumask *mask);
+
+static inline void sme_dvmsync(struct mm_struct *mm)
+{
+       if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+               return;
+
+       sme_do_dvmsync(mm_cpumask(mm));
+}
+
+static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
+                                          struct mm_struct *mm)
+{
+       if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+               return;
+
+       /*
+        * Order the mm_cpumask() read after the hardware DVMSync.
+        */
+       dsb(ish);
+       if (cpumask_empty(mm_cpumask(mm)))
+               return;
+
+       /*
+        * Allocate the batch cpumask on first use. Fall back to an immediate
+        * IPI for this mm in case of failure.
+        */
+       if (!cpumask_available(batch->cpumask) &&
+           !zalloc_cpumask_var(&batch->cpumask, GFP_ATOMIC)) {
+               sme_do_dvmsync(mm_cpumask(mm));
+               return;
+       }
+
+       cpumask_or(batch->cpumask, batch->cpumask, mm_cpumask(mm));
+}
+
+static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
+{
+       if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+               return;
+
+       if (!cpumask_available(batch->cpumask))
+               return;
+
+       sme_do_dvmsync(batch->cpumask);
+       cpumask_clear(batch->cpumask);
+}
+
+#else
+
+static inline void sme_dvmsync(struct mm_struct *mm)
+{
+}
+static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
+                                          struct mm_struct *mm)
+{
+}
+static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
+{
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
  /*
   * Level-based TLBI operations.
   *
@@ -189,12 +254,14 @@ static inline void __tlbi_sync_s1ish(struct mm_struct *mm)
  {
         dsb(ish);
         __repeat_tlbi_sync(vale1is, 0);
+       sme_dvmsync(mm);
  }
  
-static inline void __tlbi_sync_s1ish_batch(void)
+static inline void __tlbi_sync_s1ish_batch(struct arch_tlbflush_unmap_batch *batch)
  {
         dsb(ish);
         __repeat_tlbi_sync(vale1is, 0);
+       sme_dvmsync_batch(batch);
  }
  
  static inline void __tlbi_sync_s1ish_kernel(void)
@@ -397,7 +464,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
   */
  static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  {
-       __tlbi_sync_s1ish_batch();
+       __tlbi_sync_s1ish_batch(batch);
  }
  
  /*
@@ -602,6 +669,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
                 struct mm_struct *mm, unsigned long start, unsigned long end)
  {
         __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
+       sme_dvmsync_add_pending(batch, mm);
  }
  
  static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c

index 5c0ab6bfd44a6ae2b5de459a1b4693a83a29ce91..5377e4c2eba2beaaeb5af77d9795fa9710d47a82 100644 (file)
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -11,6 +11,7 @@
  #include <asm/cpu.h>
  #include <asm/cputype.h>
  #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
  #include <asm/kvm_asm.h>
  #include <asm/smp_plat.h>
  
@@ -575,6 +576,23 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
  };
  #endif
  
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+static bool has_sme_dvmsync_erratum(const struct arm64_cpu_capabilities *entry,
+                                   int scope)
+{
+       if (!id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)))
+               return false;
+
+       return is_affected_midr_range(entry, scope);
+}
+
+static void cpu_enable_sme_dvmsync(const struct arm64_cpu_capabilities *__unused)
+{
+       if (this_cpu_has_cap(ARM64_WORKAROUND_4193714))
+               sme_enable_dvmsync();
+}
+#endif
+
  #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
  static const struct midr_range erratum_ac03_cpu_38_list[] = {
         MIDR_ALL_VERSIONS(MIDR_AMPERE1),
@@ -901,6 +919,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                 .matches = need_arm_si_l1_workaround_4311569,
         },
  #endif
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+       {
+               .desc = "C1-Pro SME DVMSync early acknowledgement",
+               .capability = ARM64_WORKAROUND_4193714,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = has_sme_dvmsync_erratum,
+               .cpu_enable = cpu_enable_sme_dvmsync,
+               /* C1-Pro r0p0 - r1p2 (the latter only when REVIDR_EL1[0]==0) */
+               .midr_range = MIDR_RANGE(MIDR_C1_PRO, 0, 0, 1, 2),
+               MIDR_FIXED(MIDR_CPU_VAR_REV(1, 2), BIT(0)),
+       },
+#endif
  #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
         {
                 .desc = "ARM errata 2966298, 3117295",
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c

index 3625797e9ee8f9ab2136d6128a0196773d8178c7..fb1e374af62299d1eabcb5261d01f8d6dbf2e6c8 100644 (file)
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -21,6 +21,7 @@
  #include <asm/daifflags.h>
  #include <asm/esr.h>
  #include <asm/exception.h>
+#include <asm/fpsimd.h>
  #include <asm/irq_regs.h>
  #include <asm/kprobes.h>
  #include <asm/mmu.h>
@@ -67,6 +68,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
  {
         enter_from_user_mode(regs);
         mte_disable_tco_entry(current);
+       sme_enter_from_user_mode();
  }
  
  /*
@@ -80,6 +82,7 @@ static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
         local_irq_disable();
         exit_to_user_mode_prepare_legacy(regs);
         local_daif_mask();
+       sme_exit_to_user_mode();
         mte_check_tfsr_exit();
         exit_to_user_mode();
  }
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index 9de1d8a604cbf2f7d0147a9322316856202e33c1..60a45d600b460a6cb104c4228ca426f8a72d1005 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -15,6 +15,7 @@
  #include <linux/compiler.h>
  #include <linux/cpu.h>
  #include <linux/cpu_pm.h>
+#include <linux/cpumask.h>
  #include <linux/ctype.h>
  #include <linux/kernel.h>
  #include <linux/linkage.h>
@@ -28,6 +29,7 @@
  #include <linux/sched/task_stack.h>
  #include <linux/signal.h>
  #include <linux/slab.h>
+#include <linux/smp.h>
  #include <linux/stddef.h>
  #include <linux/sysctl.h>
  #include <linux/swab.h>
@@ -1358,6 +1360,83 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
         put_cpu_fpsimd_context();
  }
  
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+/*
+ * SME/CME erratum handling.
+ */
+static cpumask_t sme_dvmsync_cpus;
+
+/*
+ * These helpers are only called from non-preemptible contexts, so
+ * smp_processor_id() is safe here.
+ */
+void sme_set_active(void)
+{
+       unsigned int cpu = smp_processor_id();
+
+       if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
+               return;
+
+       cpumask_set_cpu(cpu, mm_cpumask(current->mm));
+
+       /*
+        * A subsequent (post ERET) SME access may use a stale address
+        * translation. On C1-Pro, a TLBI+DSB on a different CPU will wait for
+        * the completion of cpumask_set_cpu() above as it appears in program
+        * order before the SME access. The post-TLBI+DSB read of mm_cpumask()
+        * will lead to the IPI being issued.
+        *
+        * https://lore.kernel.org/r/ablEXwhfKyJW1i7l@J2N7QTR9R3
+        */
+}
+
+void sme_clear_active(void)
+{
+       unsigned int cpu = smp_processor_id();
+
+       if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
+               return;
+
+       /*
+        * With SCTLR_EL1.IESB enabled, the SME memory transactions are
+        * completed on entering EL1.
+        */
+       cpumask_clear_cpu(cpu, mm_cpumask(current->mm));
+}
+
+static void sme_dvmsync_ipi(void *unused)
+{
+       /*
+        * With SCTLR_EL1.IESB on, taking an exception is sufficient to ensure
+        * the completion of the SME memory accesses, so no need for an
+        * explicit DSB.
+        */
+}
+
+void sme_do_dvmsync(const struct cpumask *mask)
+{
+       /*
+        * This is called from the TLB maintenance functions after the DSB ISH
+        * to send the hardware DVMSync message. If this CPU sees the mask as
+        * empty, the remote CPU executing sme_set_active() would have seen
+        * the DVMSync and no IPI required.
+        */
+       if (cpumask_empty(mask))
+               return;
+
+       preempt_disable();
+       smp_call_function_many(mask, sme_dvmsync_ipi, NULL, true);
+       preempt_enable();
+}
+
+void sme_enable_dvmsync(void)
+{
+       cpumask_set_cpu(smp_processor_id(), &sme_dvmsync_cpus);
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
  /*
   * Trapped SME access
   *
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c

index 489554931231e6dfa10d838dbced5b0c7e18a9fd..4c328b7c79ba3a470f4ab096c1f61e9fd60162f7 100644 (file)
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -26,6 +26,7 @@
  #include <linux/reboot.h>
  #include <linux/interrupt.h>
  #include <linux/init.h>
+#include <linux/cpumask.h>
  #include <linux/cpu.h>
  #include <linux/elfcore.h>
  #include <linux/pm.h>
@@ -339,8 +340,41 @@ void flush_thread(void)
         flush_gcs();
  }
  
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+static void arch_dup_tlbbatch_mask(struct task_struct *dst)
+{
+       /*
+        * Clear the inherited cpumask with memset() to cover both cases where
+        * cpumask_var_t is a pointer or an array. It will be allocated lazily
+        * in sme_dvmsync_add_pending() if CPUMASK_OFFSTACK=y.
+        */
+       if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+               memset(&dst->tlb_ubc.arch.cpumask, 0,
+                      sizeof(dst->tlb_ubc.arch.cpumask));
+}
+
+static void arch_release_tlbbatch_mask(struct task_struct *tsk)
+{
+       if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+               free_cpumask_var(tsk->tlb_ubc.arch.cpumask);
+}
+
+#else
+
+static void arch_dup_tlbbatch_mask(struct task_struct *dst)
+{
+}
+
+static void arch_release_tlbbatch_mask(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
  void arch_release_task_struct(struct task_struct *tsk)
  {
+       arch_release_tlbbatch_mask(tsk);
         fpsimd_release_task(tsk);
  }
  
@@ -356,6 +390,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  
         *dst = *src;
  
+       arch_dup_tlbbatch_mask(dst);
+
         /*
          * Drop stale reference to src's sve_state and convert dst to
          * non-streaming FPSIMD mode.
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps

index 7261553b644b2b01500154485b456d2ee5c5926f..8946be60a4099893fdbfc06f49daf1bf87ccf994 100644 (file)
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -105,6 +105,7 @@ WORKAROUND_2077057
  WORKAROUND_2457168
  WORKAROUND_2645198
  WORKAROUND_2658417
+WORKAROUND_4193714
  WORKAROUND_4311569
  WORKAROUND_AMPERE_AC03_CPU_38
  WORKAROUND_AMPERE_AC04_CPU_23
author	Catalin Marinas <catalin.marinas@arm.com>
	Tue, 7 Apr 2026 10:28:44 +0000 (11:28 +0100)
committer	Catalin Marinas <catalin.marinas@arm.com>
	Fri, 10 Apr 2026 18:46:14 +0000 (19:46 +0100)
Documentation/arch/arm64/silicon-errata.rst		patch \| blob \| blame \| history
arch/arm64/Kconfig		patch \| blob \| blame \| history
arch/arm64/include/asm/cpucaps.h		patch \| blob \| blame \| history
arch/arm64/include/asm/fpsimd.h		patch \| blob \| blame \| history
arch/arm64/include/asm/tlbbatch.h		patch \| blob \| blame \| history
arch/arm64/include/asm/tlbflush.h		patch \| blob \| blame \| history
arch/arm64/kernel/cpu_errata.c		patch \| blob \| blame \| history
arch/arm64/kernel/entry-common.c		patch \| blob \| blame \| history
arch/arm64/kernel/fpsimd.c		patch \| blob \| blame \| history
arch/arm64/kernel/process.c		patch \| blob \| blame \| history
arch/arm64/tools/cpucaps		patch \| blob \| blame \| history