int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-void huge_pmd_set_accessed(struct vm_fault *vmf);
+bool huge_pmd_set_accessed(struct vm_fault *vmf);
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
struct vm_area_struct *vma);
EXPORT_SYMBOL_GPL(vmf_insert_folio_pud);
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+/**
+ * touch_pmd - Mark page table pmd entry as accessed and dirty (for write)
+ * @vma: The VMA covering @addr
+ * @addr: The virtual address
+ * @pmd: pmd pointer into the page table mapping @addr
+ * @write: Whether it's a write access
+ *
+ * Return: whether the pmd entry is changed
+ */
+bool touch_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, bool write)
{
- pmd_t _pmd;
+ pmd_t entry;
- _pmd = pmd_mkyoung(*pmd);
+ entry = pmd_mkyoung(*pmd);
if (write)
- _pmd = pmd_mkdirty(_pmd);
+ entry = pmd_mkdirty(entry);
if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
- pmd, _pmd, write))
+ pmd, entry, write)) {
update_mmu_cache_pmd(vma, addr, pmd);
+ return true;
+ }
+
+ return false;
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-void huge_pmd_set_accessed(struct vm_fault *vmf)
+bool huge_pmd_set_accessed(struct vm_fault *vmf)
{
bool write = vmf->flags & FAULT_FLAG_WRITE;
- vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd)))
- goto unlock;
-
- touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);
+ return false;
-unlock:
- spin_unlock(vmf->ptl);
+ return touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);
}
static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
return VM_FAULT_FALLBACK;
}
+/*
+ * The page faults may be spurious because of the racy access to the
+ * page table. For example, a non-populated virtual page is accessed
+ * on 2 CPUs simultaneously, thus the page faults are triggered on
+ * both CPUs. However, it's possible that one CPU (say CPU A) cannot
+ * find the reason for the page fault if the other CPU (say CPU B) has
+ * changed the page table before the PTE is checked on CPU A. Most of
+ * the time, the spurious page faults can be ignored safely. However,
+ * if the page fault is for the write access, it's possible that a
+ * stale read-only TLB entry exists in the local CPU and needs to be
+ * flushed on some architectures. This is called the spurious page
+ * fault fixing.
+ *
+ * Note: flush_tlb_fix_spurious_fault() is defined as flush_tlb_page()
+ * by default and used as such on most architectures, while
+ * flush_tlb_fix_spurious_fault_pmd() is defined as NOP by default and
+ * used as such on most architectures.
+ */
+static void fix_spurious_fault(struct vm_fault *vmf,
+ enum pgtable_level ptlevel)
+{
+ /* Skip spurious TLB flush for retried page fault */
+ if (vmf->flags & FAULT_FLAG_TRIED)
+ return;
+ /*
+ * This is needed only for protection faults but the arch code
+ * is not yet telling us if this is a protection fault or not.
+ * This still avoids useless tlb flushes for .text page faults
+ * with threads.
+ */
+ if (vmf->flags & FAULT_FLAG_WRITE) {
+ if (ptlevel == PGTABLE_LEVEL_PTE)
+ flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,
+ vmf->pte);
+ else
+ flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address,
+ vmf->pmd);
+ }
+}
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
}
entry = pte_mkyoung(entry);
if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
- vmf->flags & FAULT_FLAG_WRITE)) {
+ vmf->flags & FAULT_FLAG_WRITE))
update_mmu_cache_range(vmf, vmf->vma, vmf->address,
vmf->pte, 1);
- } else {
- /* Skip spurious TLB flush for retried page fault */
- if (vmf->flags & FAULT_FLAG_TRIED)
- goto unlock;
- /*
- * This is needed only for protection faults but the arch code
- * is not yet telling us if this is a protection fault or not.
- * This still avoids useless tlb flushes for .text page faults
- * with threads.
- */
- if (vmf->flags & FAULT_FLAG_WRITE)
- flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,
- vmf->pte);
- }
+ else
+ fix_spurious_fault(vmf, PGTABLE_LEVEL_PTE);
unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
- huge_pmd_set_accessed(&vmf);
+ vmf.ptl = pmd_lock(mm, vmf.pmd);
+ if (!huge_pmd_set_accessed(&vmf))
+ fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD);
+ spin_unlock(vmf.ptl);
return 0;
}
}