x86/mm: Enable broadcast TLB invalidation for multi-threaded processes

author Rik van Riel <riel@surriel.com>

Wed, 26 Feb 2025 03:00:45 +0000 (22:00 -0500)

committer Ingo Molnar <mingo@kernel.org>

Wed, 19 Mar 2025 10:12:29 +0000 (11:12 +0100)
author Rik van Riel <riel@surriel.com>
Wed, 26 Feb 2025 03:00:45 +0000 (22:00 -0500)
committer Ingo Molnar <mingo@kernel.org>
Wed, 19 Mar 2025 10:12:29 +0000 (11:12 +0100)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index e6c3be06dd21cb90ab48e430f36fe0413da1d914..7cad283d502d012a66aeb265a4f8db8c17657b86 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -280,6 +280,11 @@ static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
         smp_store_release(&mm->context.global_asid, asid);
  }
  
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+       WRITE_ONCE(mm->context.asid_transition, false);
+}
+
  static inline bool mm_in_asid_transition(struct mm_struct *mm)
  {
         if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
@@ -291,6 +296,7 @@ static inline bool mm_in_asid_transition(struct mm_struct *mm)
  static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
  static inline void mm_init_global_asid(struct mm_struct *mm) { }
  static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
  static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
  #endif /* CONFIG_BROADCAST_TLB_FLUSH */
  
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index b5681e6f2333e725d8594e98ce271bedfdb08610..0efd99053c09d1f4899e0096bc940413e3fdb8e4 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -430,6 +430,105 @@ static bool mm_needs_global_asid(struct mm_struct *mm, u16 asid)
         return false;
  }
  
+/*
+ * x86 has 4k ASIDs (2k when compiled with KPTI), but the largest x86
+ * systems have over 8k CPUs. Because of this potential ASID shortage,
+ * global ASIDs are handed out to processes that have frequent TLB
+ * flushes and are active on 4 or more CPUs simultaneously.
+ */
+static void consider_global_asid(struct mm_struct *mm)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+               return;
+
+       /* Check every once in a while. */
+       if ((current->pid & 0x1f) != (jiffies & 0x1f))
+               return;
+
+       /*
+        * Assign a global ASID if the process is active on
+        * 4 or more CPUs simultaneously.
+        */
+       if (mm_active_cpus_exceeds(mm, 3))
+               use_global_asid(mm);
+}
+
+static void finish_asid_transition(struct flush_tlb_info *info)
+{
+       struct mm_struct *mm = info->mm;
+       int bc_asid = mm_global_asid(mm);
+       int cpu;
+
+       if (!mm_in_asid_transition(mm))
+               return;
+
+       for_each_cpu(cpu, mm_cpumask(mm)) {
+               /*
+                * The remote CPU is context switching. Wait for that to
+                * finish, to catch the unlikely case of it switching to
+                * the target mm with an out of date ASID.
+                */
+               while (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) == LOADED_MM_SWITCHING)
+                       cpu_relax();
+
+               if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm, cpu)) != mm)
+                       continue;
+
+               /*
+                * If at least one CPU is not using the global ASID yet,
+                * send a TLB flush IPI. The IPI should cause stragglers
+                * to transition soon.
+                *
+                * This can race with the CPU switching to another task;
+                * that results in a (harmless) extra IPI.
+                */
+               if (READ_ONCE(per_cpu(cpu_tlbstate.loaded_mm_asid, cpu)) != bc_asid) {
+                       flush_tlb_multi(mm_cpumask(info->mm), info);
+                       return;
+               }
+       }
+
+       /* All the CPUs running this process are using the global ASID. */
+       mm_clear_asid_transition(mm);
+}
+
+static void broadcast_tlb_flush(struct flush_tlb_info *info)
+{
+       bool pmd = info->stride_shift == PMD_SHIFT;
+       unsigned long asid = mm_global_asid(info->mm);
+       unsigned long addr = info->start;
+
+       /*
+        * TLB flushes with INVLPGB are kicked off asynchronously.
+        * The inc_mm_tlb_gen() guarantees page table updates are done
+        * before these TLB flushes happen.
+        */
+       if (info->end == TLB_FLUSH_ALL) {
+               invlpgb_flush_single_pcid_nosync(kern_pcid(asid));
+               /* Do any CPUs supporting INVLPGB need PTI? */
+               if (cpu_feature_enabled(X86_FEATURE_PTI))
+                       invlpgb_flush_single_pcid_nosync(user_pcid(asid));
+       } else do {
+               unsigned long nr = 1;
+
+               if (info->stride_shift <= PMD_SHIFT) {
+                       nr = (info->end - addr) >> info->stride_shift;
+                       nr = clamp_val(nr, 1, invlpgb_count_max);
+               }
+
+               invlpgb_flush_user_nr_nosync(kern_pcid(asid), addr, nr, pmd);
+               if (cpu_feature_enabled(X86_FEATURE_PTI))
+                       invlpgb_flush_user_nr_nosync(user_pcid(asid), addr, nr, pmd);
+
+               addr += nr << info->stride_shift;
+       } while (addr < info->end);
+
+       finish_asid_transition(info);
+
+       /* Wait for the INVLPGBs kicked off above to finish. */
+       __tlbsync();
+}
+
  /*
   * Given an ASID, flush the corresponding user ASID.  We can delay this
   * until the next time we switch to it.
@@ -1260,9 +1359,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
          * a local TLB flush is needed. Optimize this use-case by calling
          * flush_tlb_func_local() directly in this case.
          */
-       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+       if (mm_global_asid(mm)) {
+               broadcast_tlb_flush(info);
+       } else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
                 info->trim_cpumask = should_trim_cpumask(mm);
                 flush_tlb_multi(mm_cpumask(mm), info);
+               consider_global_asid(mm);
         } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
                 lockdep_assert_irqs_enabled();
                 local_irq_disable();
author	Rik van Riel <riel@surriel.com>
	Wed, 26 Feb 2025 03:00:45 +0000 (22:00 -0500)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 19 Mar 2025 10:12:29 +0000 (11:12 +0100)
arch/x86/include/asm/tlbflush.h		patch \| blob \| blame \| history
arch/x86/mm/tlb.c		patch \| blob \| blame \| history