]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
hugetlb: remove VMEMMAP_SYNCHRONIZE_RCU
authorKiryl Shutsemau <kas@kernel.org>
Fri, 27 Feb 2026 19:42:52 +0000 (19:42 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 5 Apr 2026 20:53:09 +0000 (13:53 -0700)
The VMEMMAP_SYNCHRONIZE_RCU flag triggered synchronize_rcu() calls to
prevent a race between HVO remapping and page_ref_add_unless().  The race
could occur when a speculative PFN walker tried to modify the refcount on
a struct page that was in the process of being remapped to a fake head.

With fake heads eliminated, page_ref_add_unless() no longer needs RCU
protection.

Remove the flag and synchronize_rcu() calls.

Link: https://lkml.kernel.org/r/20260227194302.274384-15-kas@kernel.org
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Baoquan He <bhe@redhat.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb_vmemmap.c

index 92330f172eb7c7c9f7307fda39dc9e9909d2dfac..fd1d5d5d12b4621065ef6eeec2558cdd7dc0638b 100644 (file)
@@ -47,8 +47,6 @@ struct vmemmap_remap_walk {
 #define VMEMMAP_SPLIT_NO_TLB_FLUSH     BIT(0)
 /* Skip the TLB flush when we remap the PTE */
 #define VMEMMAP_REMAP_NO_TLB_FLUSH     BIT(1)
-/* synchronize_rcu() to avoid writes from page_ref_add_unless() */
-#define VMEMMAP_SYNCHRONIZE_RCU                BIT(2)
        unsigned long           flags;
 };
 
@@ -409,9 +407,6 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
        if (!folio_test_hugetlb_vmemmap_optimized(folio))
                return 0;
 
-       if (flags & VMEMMAP_SYNCHRONIZE_RCU)
-               synchronize_rcu();
-
        vmemmap_start   = (unsigned long)&folio->page;
        vmemmap_end     = vmemmap_start + hugetlb_vmemmap_size(h);
 
@@ -444,7 +439,7 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
  */
 int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
 {
-       return __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_SYNCHRONIZE_RCU);
+       return __hugetlb_vmemmap_restore_folio(h, folio, 0);
 }
 
 /**
@@ -467,14 +462,11 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
        struct folio *folio, *t_folio;
        long restored = 0;
        long ret = 0;
-       unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
+       unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH;
 
        list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
                if (folio_test_hugetlb_vmemmap_optimized(folio)) {
                        ret = __hugetlb_vmemmap_restore_folio(h, folio, flags);
-                       /* only need to synchronize_rcu() once for each batch */
-                       flags &= ~VMEMMAP_SYNCHRONIZE_RCU;
-
                        if (ret)
                                break;
                        restored++;
@@ -554,8 +546,6 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 
        static_branch_inc(&hugetlb_optimize_vmemmap_key);
 
-       if (flags & VMEMMAP_SYNCHRONIZE_RCU)
-               synchronize_rcu();
        /*
         * Very Subtle
         * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed
@@ -613,7 +603,7 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
 {
        LIST_HEAD(vmemmap_pages);
 
-       __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_SYNCHRONIZE_RCU);
+       __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
        free_vmemmap_page_list(&vmemmap_pages);
 }
 
@@ -641,7 +631,7 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
        struct folio *folio;
        int nr_to_optimize;
        LIST_HEAD(vmemmap_pages);
-       unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
+       unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH;
 
        nr_to_optimize = 0;
        list_for_each_entry(folio, folio_list, lru) {
@@ -694,8 +684,6 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
                int ret;
 
                ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags);
-               /* only need to synchronize_rcu() once for each batch */
-               flags &= ~VMEMMAP_SYNCHRONIZE_RCU;
 
                /*
                 * Pages to be freed may have been accumulated.  If we