]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm: introduce vma_start_read_locked{_nested} helpers
authorSuren Baghdasaryan <surenb@google.com>
Thu, 13 Feb 2025 22:46:38 +0000 (14:46 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 17 Mar 2025 05:06:17 +0000 (22:06 -0700)
Patch series "reimplement per-vma lock as a refcount", v10.

Back when per-vma locks were introduces, vm_lock was moved out of
vm_area_struct in [1] because of the performance regression caused by
false cacheline sharing.  Recent investigation [2] revealed that the
regressions is limited to a rather old Broadwell microarchitecture and
even there it can be mitigated by disabling adjacent cacheline
prefetching, see [3].

Splitting single logical structure into multiple ones leads to more
complicated management, extra pointer dereferences and overall less
maintainable code.  When that split-away part is a lock, it complicates
things even further.  With no performance benefits, there are no reasons
for this split.  Merging the vm_lock back into vm_area_struct also allows
vm_area_struct to use SLAB_TYPESAFE_BY_RCU later in this patchset.

This patchset:

1. moves vm_lock back into vm_area_struct, aligning it at the
   cacheline boundary and changing the cache to be cacheline-aligned to
   minimize cacheline sharing;

2. changes vm_area_struct initialization to mark new vma as detached
   until it is inserted into vma tree;

3. replaces vm_lock and vma->detached flag with a reference counter;

4. regroups vm_area_struct members to fit them into 3 cachelines;

5. changes vm_area_struct cache to SLAB_TYPESAFE_BY_RCU to allow for
   their reuse and to minimize call_rcu() calls.

Pagefault microbenchmarks show performance improvement:
Hmean     faults/cpu-1    507926.5547 (   0.00%)   506519.3692 *  -0.28%*
Hmean     faults/cpu-4    479119.7051 (   0.00%)   481333.6802 *   0.46%*
Hmean     faults/cpu-7    452880.2961 (   0.00%)   455845.6211 *   0.65%*
Hmean     faults/cpu-12   347639.1021 (   0.00%)   352004.2254 *   1.26%*
Hmean     faults/cpu-21   200061.2238 (   0.00%)   229597.0317 *  14.76%*
Hmean     faults/cpu-30   145251.2001 (   0.00%)   164202.5067 *  13.05%*
Hmean     faults/cpu-48   106848.4434 (   0.00%)   120641.5504 *  12.91%*
Hmean     faults/cpu-56    92472.3835 (   0.00%)   103464.7916 *  11.89%*
Hmean     faults/sec-1    507566.1468 (   0.00%)   506139.0811 *  -0.28%*
Hmean     faults/sec-4   1880478.2402 (   0.00%)  1886795.6329 *   0.34%*
Hmean     faults/sec-7   3106394.3438 (   0.00%)  3140550.7485 *   1.10%*
Hmean     faults/sec-12  4061358.4795 (   0.00%)  4112477.0206 *   1.26%*
Hmean     faults/sec-21  3988619.1169 (   0.00%)  4577747.1436 *  14.77%*
Hmean     faults/sec-30  3909839.5449 (   0.00%)  4311052.2787 *  10.26%*
Hmean     faults/sec-48  4761108.4691 (   0.00%)  5283790.5026 *  10.98%*
Hmean     faults/sec-56  4885561.4590 (   0.00%)  5415839.4045 *  10.85%*

This patch (of 18):

Introduce helper functions which can be used to read-lock a VMA when
holding mmap_lock for read.  Replace direct accesses to vma->vm_lock with
these new helpers.

Link: https://lkml.kernel.org/r/20250213224655.1680278-1-surenb@google.com
Link: https://lkml.kernel.org/r/20250213224655.1680278-2-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Tested-by: Shivank Garg <shivankg@amd.com>
Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Klara Modin <klarasmodin@gmail.com>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Will Deacon <will@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h
mm/userfaultfd.c

index fd1e85b4b48a23216313137d9c05a054f720b56d..6a4914bc1a38b2b4d78542dc3780baabe184e8fc 100644 (file)
@@ -735,6 +735,30 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
        return true;
 }
 
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+{
+       mmap_assert_locked(vma->vm_mm);
+       down_read_nested(&vma->vm_lock->lock, subclass);
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline void vma_start_read_locked(struct vm_area_struct *vma)
+{
+       mmap_assert_locked(vma->vm_mm);
+       down_read(&vma->vm_lock->lock);
+}
+
 static inline void vma_end_read(struct vm_area_struct *vma)
 {
        rcu_read_lock(); /* keeps vma alive till the end of up_read */
index d06453fa8abae6b8e52666b7e946f3b5fbc86acc..48ac81bbfee60cb531e882a9f38bc21949623af6 100644 (file)
@@ -85,16 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
 
        mmap_read_lock(mm);
        vma = find_vma_and_prepare_anon(mm, address);
-       if (!IS_ERR(vma)) {
-               /*
-                * We cannot use vma_start_read() as it may fail due to
-                * false locked (see comment in vma_start_read()). We
-                * can avoid that by directly locking vm_lock under
-                * mmap_lock, which guarantees that nobody can lock the
-                * vma for write (vma_start_write()) under us.
-                */
-               down_read(&vma->vm_lock->lock);
-       }
+       if (!IS_ERR(vma))
+               vma_start_read_locked(vma);
 
        mmap_read_unlock(mm);
        return vma;
@@ -1564,14 +1556,10 @@ static int uffd_move_lock(struct mm_struct *mm,
        mmap_read_lock(mm);
        err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
        if (!err) {
-               /*
-                * See comment in uffd_lock_vma() as to why not using
-                * vma_start_read() here.
-                */
-               down_read(&(*dst_vmap)->vm_lock->lock);
+               vma_start_read_locked(*dst_vmap);
                if (*dst_vmap != *src_vmap)
-                       down_read_nested(&(*src_vmap)->vm_lock->lock,
-                                        SINGLE_DEPTH_NESTING);
+                       vma_start_read_locked_nested(*src_vmap,
+                                               SINGLE_DEPTH_NESTING);
        }
        mmap_read_unlock(mm);
        return err;