x86/mm: Eliminate window where TLB flushes may be inadvertently skipped

author Dave Hansen <dave.hansen@linux.intel.com>

Thu, 8 May 2025 22:41:32 +0000 (15:41 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 9 May 2025 15:00:31 +0000 (08:00 -0700)
author Dave Hansen <dave.hansen@linux.intel.com>
Thu, 8 May 2025 22:41:32 +0000 (15:41 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 9 May 2025 15:00:31 +0000 (08:00 -0700)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index eb83348f930512b24bae96b426a6b0a39b200fd9..b6d6750e4bd1212a4136cb1f3a79b41e2f5ebf05 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -899,8 +899,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
                 cond_mitigation(tsk);
  
                 /*
-                * Let nmi_uaccess_okay() and finish_asid_transition()
-                * know that CR3 is changing.
+                * Indicate that CR3 is about to change. nmi_uaccess_okay()
+                * and others are sensitive to the window where mm_cpumask(),
+                * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
                  */
                 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
                 barrier();
@@ -1204,8 +1205,16 @@ done:
  
  static bool should_flush_tlb(int cpu, void *data)
  {
+       struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu);
         struct flush_tlb_info *info = data;
  
+       /*
+        * Order the 'loaded_mm' and 'is_lazy' against their
+        * write ordering in switch_mm_irqs_off(). Ensure
+        * 'is_lazy' is at least as new as 'loaded_mm'.
+        */
+       smp_rmb();
+
         /* Lazy TLB will get flushed at the next context switch. */
         if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
                 return false;
@@ -1214,8 +1223,15 @@ static bool should_flush_tlb(int cpu, void *data)
         if (!info->mm)
                 return true;
  
+       /*
+        * While switching, the remote CPU could have state from
+        * either the prev or next mm. Assume the worst and flush.
+        */
+       if (loaded_mm == LOADED_MM_SWITCHING)
+               return true;
+
         /* The target mm is loaded, and the CPU is not lazy. */
-       if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)
+       if (loaded_mm == info->mm)
                 return true;
  
         /* In cpumask, but not the loaded mm? Periodically remove by flushing. */
author	Dave Hansen <dave.hansen@linux.intel.com>
	Thu, 8 May 2025 22:41:32 +0000 (15:41 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 9 May 2025 15:00:31 +0000 (08:00 -0700)