Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

[thirdparty/linux.git] / include / linux / pgtable.h
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h

index dd4637d6cfaa1d96b1bc28986452648b935c9e07..1fba072b3dac3dd613270fc8ec86922e2a3f4627 100644 (file)
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -5,6 +5,9 @@
  #include <linux/pfn.h>
  #include <asm/pgtable.h>
  
+#define PMD_ORDER      (PMD_SHIFT - PAGE_SHIFT)
+#define PUD_ORDER      (PUD_SHIFT - PAGE_SHIFT)
+
  #ifndef __ASSEMBLY__
  #ifdef CONFIG_MMU
  
@@ -63,7 +66,6 @@ static inline unsigned long pte_index(unsigned long address)
  {
         return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
  }
-#define pte_index pte_index
  
  #ifndef pmd_index
  static inline unsigned long pmd_index(unsigned long address)
@@ -99,7 +101,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
         ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
  #define pte_unmap(pte) do {    \
         kunmap_local((pte));    \
-       /* rcu_read_unlock() to be added later */       \
+       rcu_read_unlock();      \
  } while (0)
  #else
  static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
@@ -108,10 +110,12 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
  }
  static inline void pte_unmap(pte_t *pte)
  {
-       /* rcu_read_unlock() to be added later */
+       rcu_read_unlock();
  }
  #endif
  
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
  /* Find an entry in the second-level page table.. */
  #ifndef pmd_offset
  static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -180,6 +184,60 @@ static inline int pmd_young(pmd_t pmd)
  }
  #endif
  
+/*
+ * A facility to provide lazy MMU batching.  This allows PTE updates and
+ * page invalidations to be delayed until a call to leave lazy MMU mode
+ * is issued.  Some architectures may benefit from doing this, and it is
+ * beneficial for both shadow and direct mode hypervisors, which may batch
+ * the PTE updates which happen during this window.  Note that using this
+ * interface requires that read hazards be removed from the code.  A read
+ * hazard could result in the direct mode hypervisor case, since the actual
+ * write to the page tables may not yet have taken place, so reads though
+ * a raw PTE pointer after it has been modified are not guaranteed to be
+ * up to date.  This mode can only be entered and left under the protection of
+ * the page table locks for all page tables which may be modified.  In the UP
+ * case, this is required so that preemption is disabled, and in the SMP case,
+ * it must synchronize the delayed page table writes properly on other CPUs.
+ */
+#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+#define arch_enter_lazy_mmu_mode()     do {} while (0)
+#define arch_leave_lazy_mmu_mode()     do {} while (0)
+#define arch_flush_lazy_mmu_mode()     do {} while (0)
+#endif
+
+#ifndef set_ptes
+/**
+ * set_ptes - Map consecutive pages to a contiguous range of addresses.
+ * @mm: Address space to map the pages into.
+ * @addr: Address to map the first page at.
+ * @ptep: Page table pointer for the first entry.
+ * @pte: Page table entry for the first page.
+ * @nr: Number of pages to map.
+ *
+ * May be overridden by the architecture, or the architecture can define
+ * set_pte() and PFN_PTE_SHIFT.
+ *
+ * Context: The caller holds the page table lock.  The pages all belong
+ * to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+               pte_t *ptep, pte_t pte, unsigned int nr)
+{
+       page_table_check_ptes_set(mm, ptep, pte, nr);
+
+       arch_enter_lazy_mmu_mode();
+       for (;;) {
+               set_pte(ptep, pte);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+       }
+       arch_leave_lazy_mmu_mode();
+}
+#endif
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
  #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  extern int ptep_set_access_flags(struct vm_area_struct *vma,
                                  unsigned long address, pte_t *ptep,
@@ -334,7 +392,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  {
         pte_t pte = ptep_get(ptep);
         pte_clear(mm, address, ptep);
-       page_table_check_pte_clear(mm, address, pte);
+       page_table_check_pte_clear(mm, pte);
         return pte;
  }
  #endif
@@ -404,6 +462,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
         return pmd;
  }
  #define pmdp_get_lockless pmdp_get_lockless
+#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
  #endif /* CONFIG_PGTABLE_LEVELS > 2 */
  #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
  
@@ -422,6 +481,9 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
  {
         return pmdp_get(pmdp);
  }
+static inline void pmdp_get_lockless_sync(void)
+{
+}
  #endif
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -433,7 +495,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
         pmd_t pmd = *pmdp;
  
         pmd_clear(pmdp);
-       page_table_check_pmd_clear(mm, address, pmd);
+       page_table_check_pmd_clear(mm, pmd);
  
         return pmd;
  }
@@ -446,7 +508,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
         pud_t pud = *pudp;
  
         pud_clear(pudp);
-       page_table_check_pud_clear(mm, address, pud);
+       page_table_check_pud_clear(mm, pud);
  
         return pud;
  }
@@ -464,11 +526,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
  #endif
  
  #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
-static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
                                             unsigned long address, pud_t *pudp,
                                             int full)
  {
-       return pudp_huge_get_and_clear(mm, address, pudp);
+       return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
  }
  #endif
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -586,6 +648,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
  #endif
  #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  static inline void pudp_set_wrprotect(struct mm_struct *mm,
                                       unsigned long address, pud_t *pudp)
  {
@@ -599,6 +662,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm,
  {
         BUILD_BUG();
  }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
  #endif
  
@@ -721,11 +785,14 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
  {
         return pmd_val(pmd_a) == pmd_val(pmd_b);
  }
+#endif
  
+#ifndef pud_same
  static inline int pud_same(pud_t pud_a, pud_t pud_b)
  {
         return pud_val(pud_a) == pud_val(pud_b);
  }
+#define pud_same pud_same
  #endif
  
  #ifndef __HAVE_ARCH_P4D_SAME
@@ -1068,27 +1135,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  #define pgprot_decrypted(prot) (prot)
  #endif
  
-/*
- * A facility to provide lazy MMU batching.  This allows PTE updates and
- * page invalidations to be delayed until a call to leave lazy MMU mode
- * is issued.  Some architectures may benefit from doing this, and it is
- * beneficial for both shadow and direct mode hypervisors, which may batch
- * the PTE updates which happen during this window.  Note that using this
- * interface requires that read hazards be removed from the code.  A read
- * hazard could result in the direct mode hypervisor case, since the actual
- * write to the page tables may not yet have taken place, so reads though
- * a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date.  This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified.  In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
- */
-#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode()     do {} while (0)
-#define arch_leave_lazy_mmu_mode()     do {} while (0)
-#define arch_flush_lazy_mmu_mode()     do {} while (0)
-#endif
-
  /*
   * A facility to provide batching of the reload of page tables and
   * other process state with the actual context switch code for
@@ -1350,12 +1396,16 @@ static inline int pud_trans_unstable(pud_t *pud)
  
  #ifndef CONFIG_NUMA_BALANCING
  /*
- * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
- * the only case the kernel cares is for NUMA balancing and is only ever set
- * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
- * _PAGE_PROTNONE so by default, implement the helper as "always no". It
- * is the responsibility of the caller to distinguish between PROT_NONE
- * protections and NUMA hinting fault protections.
+ * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
+ * perfectly valid to indicate "no" in that case, which is why our default
+ * implementation defaults to "always no".
+ *
+ * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
+ * page protection due to NUMA hinting. NUMA hinting faults only apply in
+ * accessible VMAs.
+ *
+ * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
+ * looking at the VMA accessibility is sufficient.
   */
  static inline int pte_protnone(pte_t pte)
  {
@@ -1527,6 +1577,9 @@ typedef unsigned int pgtbl_mod_mask;
  #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
  #endif
  
+#ifndef has_transparent_pud_hugepage
+#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+#endif
  /*
   * On some architectures it depends on the mm if the p4d/pud or pmd
   * layer of the page table hierarchy is folded or not.