From: Lorenzo Stoakes Date: Sun, 18 Jan 2026 14:50:40 +0000 (+0000) Subject: mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=535f6b8df17d4350dc37b2635b52fa8ab964132c;p=thirdparty%2Flinux.git mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap The root anon_vma of all anon_vma's linked to a VMA must by definition be the same - a VMA and all of its descendants/ancestors must exist in the same CoW chain. Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in anon_vma_clone()") introduced paranoid checking of the root anon_vma remaining the same throughout all AVC's in 2011. I think 15 years later we can safely assume that this is always the case. Additionally, since unfaulted VMAs being cloned from or unlinked are no-op's, we can simply lock the anon_vma's associated with this rather than doing any specific dance around this. This removes unnecessary checks and makes it clear that the root anon_vma is shared between all anon_vma's in a given VMA's anon_vma_chain. Link: https://lkml.kernel.org/r/838030d2f0772b99fa99ff4b4fd571353f14a1a9.1768746221.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: Harry Yoo Cc: Jann Horn Cc: Michal Hocko Cc: Mike Rapoport Cc: Pedro Falcato Cc: Rik van Riel Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- diff --git a/mm/rmap.c b/mm/rmap.c index 3c5fb8fb105ff..d4d2e7b9fe5f5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -231,32 +231,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma) return -ENOMEM; } -/* - * This is a useful helper function for locking the anon_vma root as - * we traverse the vma->anon_vma_chain, looping over anon_vma's that - * have the same vma. - * - * Such anon_vma's should have the same root, so you'd expect to see - * just a single mutex_lock for the whole traversal. - */ -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma) -{ - struct anon_vma *new_root = anon_vma->root; - if (new_root != root) { - if (WARN_ON_ONCE(root)) - up_write(&root->rwsem); - root = new_root; - down_write(&root->rwsem); - } - return root; -} - -static inline void unlock_anon_vma_root(struct anon_vma *root) -{ - if (root) - up_write(&root->rwsem); -} - static void check_anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { @@ -309,26 +283,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma); int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { struct anon_vma_chain *avc, *pavc; - struct anon_vma *root = NULL; check_anon_vma_clone(dst, src); if (!src->anon_vma) return 0; + check_anon_vma_clone(dst, src); + + /* All anon_vma's share the same root. */ + anon_vma_lock_write(src->anon_vma); list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { struct anon_vma *anon_vma; avc = anon_vma_chain_alloc(GFP_NOWAIT); if (unlikely(!avc)) { - unlock_anon_vma_root(root); - root = NULL; + anon_vma_unlock_write(src->anon_vma); avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto enomem_failure; + anon_vma_lock_write(src->anon_vma); } anon_vma = pavc->anon_vma; - root = lock_anon_vma_root(root, anon_vma); anon_vma_chain_link(dst, avc, anon_vma); /* @@ -345,7 +321,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) } if (dst->anon_vma) dst->anon_vma->num_active_vmas++; - unlock_anon_vma_root(root); + + anon_vma_unlock_write(src->anon_vma); return 0; enomem_failure: @@ -475,17 +452,19 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma) void unlink_anon_vmas(struct vm_area_struct *vma) { struct anon_vma_chain *avc, *next; - struct anon_vma *root = NULL; + struct anon_vma *active_anon_vma = vma->anon_vma; /* Always hold mmap lock, read-lock on unmap possibly. */ mmap_assert_locked(vma->vm_mm); /* Unfaulted is a no-op. */ - if (!vma->anon_vma) { + if (!active_anon_vma) { VM_WARN_ON_ONCE(!list_empty(&vma->anon_vma_chain)); return; } + anon_vma_lock_write(active_anon_vma); + /* * Unlink each anon_vma chained to the VMA. This list is ordered * from newest to oldest, ensuring the root anon_vma gets freed last. @@ -493,7 +472,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; - root = lock_anon_vma_root(root, anon_vma); anon_vma_interval_tree_remove(avc, &anon_vma->rb_root); /* @@ -509,13 +487,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma) anon_vma_chain_free(avc); } - vma->anon_vma->num_active_vmas--; + active_anon_vma->num_active_vmas--; /* * vma would still be needed after unlink, and anon_vma will be prepared * when handle fault. */ vma->anon_vma = NULL; - unlock_anon_vma_root(root); + anon_vma_unlock_write(active_anon_vma); + /* * Iterate the list once more, it now only contains empty and unlinked