]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm: convert mm_lock_seq to a proper seqcount
authorSuren Baghdasaryan <surenb@google.com>
Fri, 22 Nov 2024 17:44:15 +0000 (09:44 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 14 Jan 2025 06:40:50 +0000 (22:40 -0800)
Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock
variants to increment it, in-line with the usual seqcount usage pattern.
This lets us check whether the mmap_lock is write-locked by checking
mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be
used when implementing mmap_lock speculation functions.
As a result vm_lock_seq is also change to be unsigned to match the type
of mm_lock_seq.sequence.

Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmap_lock.h
kernel/fork.c
mm/init-mm.c
tools/testing/vma/vma.c
tools/testing/vma/vma_internal.h

index 9372bc058b43c46a3fec8a792840c43874410e50..a3a50c37603ed5cb52826c6febfab8ce5e509730 100644 (file)
@@ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
         * we don't rely on for anything - the mm_lock_seq read against which we
         * need ordering is below.
         */
-       if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
+       if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
                return false;
 
        if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
         * after it has been unlocked.
         * This pairs with RELEASE semantics in vma_end_write_all().
         */
-       if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
+       if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
                up_read(&vma->vm_lock->lock);
                return false;
        }
@@ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
 }
 
 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
-static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
+static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
 {
        mmap_assert_write_locked(vma->vm_mm);
 
@@ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
         * current task is holding mmap_write_lock, both vma->vm_lock_seq and
         * mm->mm_lock_seq can't be concurrently modified.
         */
-       *mm_lock_seq = vma->vm_mm->mm_lock_seq;
+       *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
        return (vma->vm_lock_seq == *mm_lock_seq);
 }
 
@@ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
  */
 static inline void vma_start_write(struct vm_area_struct *vma)
 {
-       int mm_lock_seq;
+       unsigned int mm_lock_seq;
 
        if (__is_vma_write_locked(vma, &mm_lock_seq))
                return;
@@ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
 
 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
 {
-       int mm_lock_seq;
+       unsigned int mm_lock_seq;
 
        VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
 }
index 7490d84af310af20e8ed65d1409a62b298d3dde2..5f1b2dc788e24378b5806272f674e803f6409503 100644 (file)
@@ -729,7 +729,7 @@ struct vm_area_struct {
         * counter reuse can only lead to occasional unnecessary use of the
         * slowpath.
         */
-       int vm_lock_seq;
+       unsigned int vm_lock_seq;
        /* Unstable RCU readers are allowed to read this. */
        struct vma_lock *vm_lock;
 #endif
@@ -923,6 +923,9 @@ struct mm_struct {
                 * Roughly speaking, incrementing the sequence number is
                 * equivalent to releasing locks on VMAs; reading the sequence
                 * number can be part of taking a read lock on a VMA.
+                * Incremented every time mmap_lock is write-locked/unlocked.
+                * Initialized to 0, therefore odd values indicate mmap_lock
+                * is write-locked and even values that it's released.
                 *
                 * Can be modified under write mmap_lock using RELEASE
                 * semantics.
@@ -931,7 +934,7 @@ struct mm_struct {
                 * Can be read with ACQUIRE semantics if not holding write
                 * mmap_lock.
                 */
-               int mm_lock_seq;
+               seqcount_t mm_lock_seq;
 #endif
 
 
index de9dc20b01ba7471d288c58a3b0d600e3b1eaa62..9715326f5a85fb2bbd0c539bbefdeda9a0a1aa2e 100644 (file)
@@ -71,39 +71,39 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
 }
 
 #ifdef CONFIG_PER_VMA_LOCK
-/*
- * Drop all currently-held per-VMA locks.
- * This is called from the mmap_lock implementation directly before releasing
- * a write-locked mmap_lock (or downgrading it to read-locked).
- * This should normally NOT be called manually from other places.
- * If you want to call this manually anyway, keep in mind that this will release
- * *all* VMA write locks, including ones from further up the stack.
- */
-static inline void vma_end_write_all(struct mm_struct *mm)
+static inline void mm_lock_seqcount_init(struct mm_struct *mm)
 {
-       mmap_assert_write_locked(mm);
-       /*
-        * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
-        * mmap_lock being held.
-        * We need RELEASE semantics here to ensure that preceding stores into
-        * the VMA take effect before we unlock it with this store.
-        * Pairs with ACQUIRE semantics in vma_start_read().
-        */
-       smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
+       seqcount_init(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
+{
+       do_raw_write_seqcount_begin(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_end(struct mm_struct *mm)
+{
+       ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
+       do_raw_write_seqcount_end(&mm->mm_lock_seq);
 }
+
 #else
-static inline void vma_end_write_all(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
 #endif
 
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
        init_rwsem(&mm->mmap_lock);
+       mm_lock_seqcount_init(mm);
 }
 
 static inline void mmap_write_lock(struct mm_struct *mm)
 {
        __mmap_lock_trace_start_locking(mm, true);
        down_write(&mm->mmap_lock);
+       mm_lock_seqcount_begin(mm);
        __mmap_lock_trace_acquire_returned(mm, true, true);
 }
 
@@ -111,6 +111,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
 {
        __mmap_lock_trace_start_locking(mm, true);
        down_write_nested(&mm->mmap_lock, subclass);
+       mm_lock_seqcount_begin(mm);
        __mmap_lock_trace_acquire_returned(mm, true, true);
 }
 
@@ -120,10 +121,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
 
        __mmap_lock_trace_start_locking(mm, true);
        ret = down_write_killable(&mm->mmap_lock);
+       if (!ret)
+               mm_lock_seqcount_begin(mm);
        __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
        return ret;
 }
 
+/*
+ * Drop all currently-held per-VMA locks.
+ * This is called from the mmap_lock implementation directly before releasing
+ * a write-locked mmap_lock (or downgrading it to read-locked).
+ * This should normally NOT be called manually from other places.
+ * If you want to call this manually anyway, keep in mind that this will release
+ * *all* VMA write locks, including ones from further up the stack.
+ */
+static inline void vma_end_write_all(struct mm_struct *mm)
+{
+       mmap_assert_write_locked(mm);
+       mm_lock_seqcount_end(mm);
+}
+
 static inline void mmap_write_unlock(struct mm_struct *mm)
 {
        __mmap_lock_trace_released(mm, true);
index 9b301180fd41628f55fc318d8899cab2698ba6f5..ded49f18cd95c0922dc5c73889c87245d92658bd 100644 (file)
@@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
                return false;
 
        init_rwsem(&vma->vm_lock->lock);
-       vma->vm_lock_seq = -1;
+       vma->vm_lock_seq = UINT_MAX;
 
        return true;
 }
@@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        seqcount_init(&mm->write_protect_seq);
        mmap_init_lock(mm);
        INIT_LIST_HEAD(&mm->mmlist);
-#ifdef CONFIG_PER_VMA_LOCK
-       mm->mm_lock_seq = 0;
-#endif
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
        mm->locked_vm = 0;
index 24c809379274503ac4f261fe7cfdbab3cb1ed1e7..6af3ad675930be588956591201f52c8b4e7a8f62 100644 (file)
@@ -40,7 +40,7 @@ struct mm_struct init_mm = {
        .arg_lock       =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
 #ifdef CONFIG_PER_VMA_LOCK
-       .mm_lock_seq    = 0,
+       .mm_lock_seq    = SEQCNT_ZERO(init_mm.mm_lock_seq),
 #endif
        .user_ns        = &init_user_ns,
        .cpu_bitmap     = CPU_BITS_NONE,
index 891d87a9ad6bcb961c2e9551816d0ff746e41927..920fba58884e3e90e15388322d4e4b3253aeeb43 100644 (file)
@@ -100,7 +100,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
         * begun. Linking to the tree will have caused this to be incremented,
         * which means we will get a false positive otherwise.
         */
-       vma->vm_lock_seq = -1;
+       vma->vm_lock_seq = UINT_MAX;
 
        return vma;
 }
@@ -225,7 +225,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
        int seq = vma->vm_lock_seq;
 
        /* We reset after each check. */
-       vma->vm_lock_seq = -1;
+       vma->vm_lock_seq = UINT_MAX;
 
        /* The vma_start_write() stub simply increments this value. */
        return seq > -1;
index a7de59a0d694dcb6913037f2ef40aa862884b0d8..b973b3e41c834215732dbe1220d49aa6869cd102 100644 (file)
@@ -281,7 +281,7 @@ struct vm_area_struct {
         * counter reuse can only lead to occasional unnecessary use of the
         * slowpath.
         */
-       int vm_lock_seq;
+       unsigned int vm_lock_seq;
        struct vma_lock *vm_lock;
 #endif
 
@@ -467,7 +467,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
                return false;
 
        init_rwsem(&vma->vm_lock->lock);
-       vma->vm_lock_seq = -1;
+       vma->vm_lock_seq = UINT_MAX;
 
        return true;
 }