]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/vma: document possible vma->vm_refcnt values and reference comment
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Fri, 23 Jan 2026 20:12:12 +0000 (20:12 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 31 Jan 2026 22:22:49 +0000 (14:22 -0800)
The possible vma->vm_refcnt values are confusing and vague, explain in
detail what these can be in a comment describing the vma->vm_refcnt field
and reference this comment in various places that read/write this field.

No functional change intended.

[akpm@linux-foundation.org: fix typo, per Suren]
Link: https://lkml.kernel.org/r/d462e7678c6cc7461f94e5b26c776547d80a67e8.1769198904.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Waiman Long <longman@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm_types.h
include/linux/mmap_lock.h
mm/mmap_lock.c

index bdbf17c4f26b5c3d714054983462a66cee5aa3d3..3e608d22cab0dbd0ce5ecf6dcd91f814720b0f13 100644 (file)
@@ -758,7 +758,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
  * set the VM_REFCNT_EXCLUDE_READERS_FLAG in vma->vm_refcnt to indiciate to
  * vma_start_read() that the reference count should be left alone.
  *
- * Once the operation is complete, this value is subtracted from vma->vm_refcnt.
+ * See the comment describing vm_refcnt in vm_area_struct for details as to
+ * which values the VMA reference count can be.
  */
 #define VM_REFCNT_EXCLUDE_READERS_BIT  (30)
 #define VM_REFCNT_EXCLUDE_READERS_FLAG (1U << VM_REFCNT_EXCLUDE_READERS_BIT)
@@ -989,7 +990,44 @@ struct vm_area_struct {
        struct vma_numab_state *numab_state;    /* NUMA Balancing state */
 #endif
 #ifdef CONFIG_PER_VMA_LOCK
-       /* Unstable RCU readers are allowed to read this. */
+       /*
+        * Used to keep track of firstly, whether the VMA is attached, secondly,
+        * if attached, how many read locks are taken, and thirdly, if the
+        * VM_REFCNT_EXCLUDE_READERS_FLAG is set, whether any read locks held
+        * are currently in the process of being excluded.
+        *
+        * This value can be equal to:
+        *
+        * 0 - Detached. IMPORTANT: when the refcnt is zero, readers cannot
+        * increment it.
+        *
+        * 1 - Attached and either unlocked or write-locked. Write locks are
+        * identified via __is_vma_write_locked() which checks for equality of
+        * vma->vm_lock_seq and mm->mm_lock_seq.
+        *
+        * >1, < VM_REFCNT_EXCLUDE_READERS_FLAG - Read-locked or (unlikely)
+        * write-locked with other threads having temporarily incremented the
+        * reference count prior to determining it is write-locked and
+        * decrementing it again.
+        *
+        * VM_REFCNT_EXCLUDE_READERS_FLAG - Detached, pending
+        * __vma_exit_locked() completion which will decrement the reference
+        * count to zero. IMPORTANT - at this stage no further readers can
+        * increment the reference count. It can only be reduced.
+        *
+        * VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either write-locking
+        * an attached VMA and has yet to invoke __vma_exit_locked(), OR a
+        * thread is detaching a VMA and is waiting on a single spurious reader
+        * in order to decrement the reference count. IMPORTANT - as above, no
+        * further readers can increment the reference count.
+        *
+        * > VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either
+        * write-locking or detaching a VMA is waiting on readers to
+        * exit. IMPORTANT - as above, no further readers can increment the
+        * reference count.
+        *
+        * NOTE: Unstable RCU readers are allowed to read this.
+        */
        refcount_t vm_refcnt ____cacheline_aligned_in_smp;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map vmlock_dep_map;
index 5acbd4ba1b52ca71dc5e715e86f4dbcb97e9c9ad..a764439d027683a8be56350a394d3d987c8e6b49 100644 (file)
@@ -130,6 +130,9 @@ static inline bool is_vma_writer_only(int refcnt)
         * attached. Waiting on a detached vma happens only in
         * vma_mark_detached() and is a rare case, therefore most of the time
         * there will be no unnecessary wakeup.
+        *
+        * See the comment describing the vm_area_struct->vm_refcnt field for
+        * details of possible refcnt values.
         */
        return (refcnt & VM_REFCNT_EXCLUDE_READERS_FLAG) &&
                refcnt <= VM_REFCNT_EXCLUDE_READERS_FLAG + 1;
@@ -249,6 +252,10 @@ static inline void vma_assert_locked(struct vm_area_struct *vma)
 {
        unsigned int mm_lock_seq;
 
+       /*
+        * See the comment describing the vm_area_struct->vm_refcnt field for
+        * details of possible refcnt values.
+        */
        VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
                      !__is_vma_write_locked(vma, &mm_lock_seq), vma);
 }
index 1d23b48552e94d4c4d0407ddf5070b66b1fdd2d7..75dc098aea14c9142bd75219eed9f442d1512644 100644 (file)
@@ -65,6 +65,9 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma,
        /*
         * If vma is detached then only vma_mark_attached() can raise the
         * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached().
+        *
+        * See the comment describing the vm_area_struct->vm_refcnt field for
+        * details of possible refcnt values.
         */
        if (!refcount_add_not_zero(VM_REFCNT_EXCLUDE_READERS_FLAG, &vma->vm_refcnt))
                return 0;
@@ -137,6 +140,9 @@ void vma_mark_detached(struct vm_area_struct *vma)
         * before they check vm_lock_seq, realize the vma is locked and drop
         * back the vm_refcnt. That is a narrow window for observing a raised
         * vm_refcnt.
+        *
+        * See the comment describing the vm_area_struct->vm_refcnt field for
+        * details of possible refcnt values.
         */
        if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
                /* Wait until vma is detached with no readers. */