x86/mm/tlb: Update mm_cpumask lazily

author Rik van Riel <riel@surriel.com>

Thu, 14 Nov 2024 15:26:16 +0000 (10:26 -0500)

committer Ingo Molnar <mingo@kernel.org>

Tue, 19 Nov 2024 11:02:46 +0000 (12:02 +0100)
author Rik van Riel <riel@surriel.com>
Thu, 14 Nov 2024 15:26:16 +0000 (10:26 -0500)
committer Ingo Molnar <mingo@kernel.org>
Tue, 19 Nov 2024 11:02:46 +0000 (12:02 +0100)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c

index d17518ca19b8b82a94678569ceb0f5871ecd07b4..8b66a555d2f03540ec2861c9eacd55f900145950 100644 (file)
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1825,11 +1825,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
         return temp_state;
  }
  
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
  static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
  {
         lockdep_assert_irqs_disabled();
+
         switch_mm_irqs_off(NULL, prev_state.mm, current);
  
+       /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
+       cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm));
+
         /*
          * Restore the breakpoints if they were disabled before the temporary mm
          * was loaded.
@@ -1838,9 +1845,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
                 hw_breakpoint_restore();
  }
  
-__ro_after_init struct mm_struct *poking_mm;
-__ro_after_init unsigned long poking_addr;
-
  static void text_poke_memcpy(void *dst, const void *src, size_t len)
  {
         memcpy(dst, src, len);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index b0d5a644fc84dc2c405d42bb102fd20873441744..cc4e57ae690f5cff6536a9e31cddf60ff646f120 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -606,18 +606,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
                 cond_mitigation(tsk);
  
                 /*
-                * Stop remote flushes for the previous mm.
-                * Skip kernel threads; we never send init_mm TLB flushing IPIs,
-                * but the bitmap manipulation can cause cache line contention.
+                * Leave this CPU in prev's mm_cpumask. Atomic writes to
+                * mm_cpumask can be expensive under contention. The CPU
+                * will be removed lazily at TLB flush time.
                  */
-               if (prev != &init_mm) {
-                       VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
-                                               mm_cpumask(prev)));
-                       cpumask_clear_cpu(cpu, mm_cpumask(prev));
-               }
+               VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu,
+                               mm_cpumask(prev)));
  
                 /* Start receiving IPIs and then read tlb_gen (and LAM below) */
-               if (next != &init_mm)
+               if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))
                         cpumask_set_cpu(cpu, mm_cpumask(next));
                 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
  
@@ -761,8 +758,10 @@ static void flush_tlb_func(void *info)
                 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
  
                 /* Can only happen on remote CPUs */
-               if (f->mm && f->mm != loaded_mm)
+               if (f->mm && f->mm != loaded_mm) {
+                       cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));
                         return;
+               }
         }
  
         if (unlikely(loaded_mm == &init_mm))
author	Rik van Riel <riel@surriel.com>
	Thu, 14 Nov 2024 15:26:16 +0000 (10:26 -0500)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 19 Nov 2024 11:02:46 +0000 (12:02 +0100)
arch/x86/kernel/alternative.c		patch \| blob \| blame \| history
arch/x86/mm/tlb.c		patch \| blob \| blame \| history