From: Dev Jain Date: Tue, 10 Jun 2025 03:50:42 +0000 (+0530) Subject: mm: call pointers to ptes as ptep X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=94dab12d86cf77ff0b8f667dc98af6d997422cb4;p=thirdparty%2Fkernel%2Flinux.git mm: call pointers to ptes as ptep Patch series "Optimize mremap() for large folios", v4. Currently move_ptes() iterates through ptes one by one. If the underlying folio mapped by the ptes is large, we can process those ptes in a batch using folio_pte_batch(), thus clearing and setting the PTEs in one go. For arm64 specifically, this results in a 16x reduction in the number of ptep_get() calls (since on a contig block, ptep_get() on arm64 will iterate through all 16 entries to collect a/d bits), and we also elide extra TLBIs through get_and_clear_full_ptes, replacing ptep_get_and_clear. Mapping 1M of memory with 64K folios, memsetting it, remapping it to src + 1M, and munmapping it 10,000 times, the average execution time reduces from 1.9 to 1.2 seconds, giving a 37% performance optimization, on Apple M3 (arm64). No regression is observed for small folios. Test program for reference: #define _GNU_SOURCE #include #include #include #include #include #include #define SIZE (1UL << 20) // 1M int main(void) { void *new_addr, *addr; for (int i = 0; i < 10000; ++i) { addr = mmap((void *)(1UL << 30), SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); return 1; } memset(addr, 0xAA, SIZE); new_addr = mremap(addr, SIZE, SIZE, MREMAP_MAYMOVE | MREMAP_FIXED, addr + SIZE); if (new_addr != (addr + SIZE)) { perror("mremap"); return 1; } munmap(new_addr, SIZE); } } This patch (of 2): Avoid confusion between pte_t* and pte_t data types by suffixing pointer type variables with p. No functional change. Link: https://lkml.kernel.org/r/20250610035043.75448-1-dev.jain@arm.com Link: https://lkml.kernel.org/r/20250610035043.75448-2-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: Barry Song Reviewed-by: Anshuman Khandual Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Pedro Falcato Cc: Bang Li Cc: Baolin Wang Cc: bibo mao Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Lance Yang Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Qi Zheng Cc: Ryan Roberts Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- diff --git a/mm/mremap.c b/mm/mremap.c index 60f6b8d0d5f0b..180b12225368e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -176,7 +176,8 @@ static int move_ptes(struct pagetable_move_control *pmc, struct vm_area_struct *vma = pmc->old; bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); struct mm_struct *mm = vma->vm_mm; - pte_t *old_pte, *new_pte, pte; + pte_t *old_ptep, *new_ptep; + pte_t pte; pmd_t dummy_pmdval; spinlock_t *old_ptl, *new_ptl; bool force_flush = false; @@ -211,8 +212,8 @@ static int move_ptes(struct pagetable_move_control *pmc, * We don't have to worry about the ordering of src and dst * pte locks because exclusive mmap_lock prevents deadlock. */ - old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); - if (!old_pte) { + old_ptep = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); + if (!old_ptep) { err = -EAGAIN; goto out; } @@ -223,10 +224,10 @@ static int move_ptes(struct pagetable_move_control *pmc, * mmap_lock, so this new_pte page is stable, so there is no need to get * pmdval and do pmd_same() check. */ - new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval, + new_ptep = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval, &new_ptl); - if (!new_pte) { - pte_unmap_unlock(old_pte, old_ptl); + if (!new_ptep) { + pte_unmap_unlock(old_ptep, old_ptl); err = -EAGAIN; goto out; } @@ -235,14 +236,14 @@ static int move_ptes(struct pagetable_move_control *pmc, flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); - for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, - new_pte++, new_addr += PAGE_SIZE) { - VM_WARN_ON_ONCE(!pte_none(*new_pte)); + for (; old_addr < old_end; old_ptep++, old_addr += PAGE_SIZE, + new_ptep++, new_addr += PAGE_SIZE) { + VM_WARN_ON_ONCE(!pte_none(*new_ptep)); - if (pte_none(ptep_get(old_pte))) + if (pte_none(ptep_get(old_ptep))) continue; - pte = ptep_get_and_clear(mm, old_addr, old_pte); + pte = ptep_get_and_clear(mm, old_addr, old_ptep); /* * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the @@ -260,7 +261,7 @@ static int move_ptes(struct pagetable_move_control *pmc, pte = move_soft_dirty_pte(pte); if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) - pte_clear(mm, new_addr, new_pte); + pte_clear(mm, new_addr, new_ptep); else { if (need_clear_uffd_wp) { if (pte_present(pte)) @@ -268,7 +269,7 @@ static int move_ptes(struct pagetable_move_control *pmc, else if (is_swap_pte(pte)) pte = pte_swp_clear_uffd_wp(pte); } - set_pte_at(mm, new_addr, new_pte, pte); + set_pte_at(mm, new_addr, new_ptep, pte); } } @@ -277,8 +278,8 @@ static int move_ptes(struct pagetable_move_control *pmc, flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); - pte_unmap(new_pte - 1); - pte_unmap_unlock(old_pte - 1, old_ptl); + pte_unmap(new_ptep - 1); + pte_unmap_unlock(old_ptep - 1, old_ptl); out: if (pmc->need_rmap_locks) drop_rmap_locks(vma);