]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm: move _pincount in folio to page[2] on 32bit
authorDavid Hildenbrand <david@redhat.com>
Mon, 3 Mar 2025 16:29:58 +0000 (17:29 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 18 Mar 2025 05:06:44 +0000 (22:06 -0700)
Let's free up some space on 32bit in page[1] by moving the _pincount to
page[2].

For order-1 folios (never anon folios!) on 32bit, we will now also use the
GUP_PIN_COUNTING_BIAS approach.  A fully-mapped order-1 folio requires 2
references.  With GUP_PIN_COUNTING_BIAS being 1024, we'd detect such
folios as "maybe pinned" with 512 full mappings, instead of 1024 for
order-0.  As anon folios are out of the picture (which are the most
relevant users of checking for pinnings on *mapped* pages) and we are
talking about 32bit, this is not expected to cause any trouble.

In __dump_page(), copy one additional folio page if we detect a folio with
an order > 1, so we can dump the pincount on order > 1 folios reliably.

Note that THPs on 32bit are not particularly common (and we don't care too
much about performance), but we want to keep it working reliably, because
likely we want to use large folios there as well in the future,
independent of PMD leaf support.

Once we dynamically allocate "struct folio", fortunately the 32bit
specifics will likely go away again; even small folios could then have a
pincount and folio_has_pincount() would essentially always return "true".

Link: https://lkml.kernel.org/r/20250303163014.1128035-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andy Lutomirks^H^Hski <luto@kernel.org>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michal Koutn <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: tejun heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h
include/linux/mm_types.h
mm/debug.c
mm/gup.c
mm/internal.h
mm/page_alloc.c

index c9c2ca34535022ad462932e50cc51654f4cfb374..860082ba89781bc5ed292b2cc1fdcca9d3f6e1a0 100644 (file)
@@ -2010,6 +2010,13 @@ static inline struct folio *pfn_folio(unsigned long pfn)
        return page_folio(pfn_to_page(pfn));
 }
 
+static inline bool folio_has_pincount(const struct folio *folio)
+{
+       if (IS_ENABLED(CONFIG_64BIT))
+               return folio_test_large(folio);
+       return folio_order(folio) > 1;
+}
+
 /**
  * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
  * @folio: The folio.
@@ -2026,7 +2033,7 @@ static inline struct folio *pfn_folio(unsigned long pfn)
  * get that many refcounts, and b) all the callers of this routine are
  * expected to be able to deal gracefully with a false positive.
  *
- * For large folios, the result will be exactly correct. That's because
+ * For most large folios, the result will be exactly correct. That's because
  * we have more tracking data available: the _pincount field is used
  * instead of the GUP_PIN_COUNTING_BIAS scheme.
  *
@@ -2037,7 +2044,7 @@ static inline struct folio *pfn_folio(unsigned long pfn)
  */
 static inline bool folio_maybe_dma_pinned(struct folio *folio)
 {
-       if (folio_test_large(folio))
+       if (folio_has_pincount(folio))
                return atomic_read(&folio->_pincount) > 0;
 
        /*
index 727322ecbfddd18750ad61c48a68d75e0f4d24c3..3ea2019a1aacf8a79cc5218f51ccc05f52ab216e 100644 (file)
@@ -387,7 +387,9 @@ struct folio {
                                        atomic_t _large_mapcount;
                                        atomic_t _entire_mapcount;
                                        atomic_t _nr_pages_mapped;
+#ifdef CONFIG_64BIT
                                        atomic_t _pincount;
+#endif /* CONFIG_64BIT */
        /* private: the union with struct page is transitional */
                                };
                                unsigned long _usable_1[4];
@@ -408,6 +410,9 @@ struct folio {
                        unsigned long _head_2;
        /* public: */
                        struct list_head _deferred_list;
+#ifndef CONFIG_64BIT
+                       atomic_t _pincount;
+#endif /* !CONFIG_64BIT */
        /* private: the union with struct page is transitional */
                };
                struct page __page_2;
index 2d1bd67d957bc9688f8d76afd04288d0e78045c1..83ef3bd0ccd328ea2c1256bc09c5021425ed01d4 100644 (file)
@@ -79,12 +79,17 @@ static void __dump_folio(struct folio *folio, struct page *page,
                        folio_ref_count(folio), mapcount, mapping,
                        folio->index + idx, pfn);
        if (folio_test_large(folio)) {
+               int pincount = 0;
+
+               if (folio_has_pincount(folio))
+                       pincount = atomic_read(&folio->_pincount);
+
                pr_warn("head: order:%u mapcount:%d entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
                                folio_order(folio),
                                folio_mapcount(folio),
                                folio_entire_mapcount(folio),
                                folio_nr_pages_mapped(folio),
-                               atomic_read(&folio->_pincount));
+                               pincount);
        }
 
 #ifdef CONFIG_MEMCG
@@ -146,6 +151,9 @@ again:
        if (idx < MAX_FOLIO_NR_PAGES) {
                memcpy(&folio, foliop, 2 * sizeof(struct page));
                nr_pages = folio_nr_pages(&folio);
+               if (nr_pages > 1)
+                       memcpy(&folio.__page_2, &foliop->__page_2,
+                              sizeof(struct page));
                foliop = &folio;
        }
 
index e5040657870ea0001a8fd9b86c7b97099745d803..2944fe8cf3174797f52f0cdda5cc3cd28849ca5d 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -109,7 +109,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
                if (is_zero_folio(folio))
                        return;
                node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
-               if (folio_test_large(folio))
+               if (folio_has_pincount(folio))
                        atomic_sub(refs, &folio->_pincount);
                else
                        refs *= GUP_PIN_COUNTING_BIAS;
@@ -164,7 +164,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
                 * Increment the normal page refcount field at least once,
                 * so that the page really is pinned.
                 */
-               if (folio_test_large(folio)) {
+               if (folio_has_pincount(folio)) {
                        folio_ref_add(folio, refs);
                        atomic_add(refs, &folio->_pincount);
                } else {
@@ -223,7 +223,7 @@ void folio_add_pin(struct folio *folio)
         * page refcount field at least once, so that the page really is
         * pinned.
         */
-       if (folio_test_large(folio)) {
+       if (folio_has_pincount(folio)) {
                WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
                folio_ref_inc(folio);
                atomic_inc(&folio->_pincount);
@@ -575,7 +575,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
         * is pinned.  That's why the refcount from the earlier
         * try_get_folio() is left intact.
         */
-       if (folio_test_large(folio))
+       if (folio_has_pincount(folio))
                atomic_add(refs, &folio->_pincount);
        else
                folio_ref_add(folio,
index 1cd977413859dca051ff44bef362e7216b245851..2d44a4c9d2820a7be4e36dca7539e9d413c69da0 100644 (file)
@@ -764,7 +764,8 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
        atomic_set(&folio->_large_mapcount, -1);
        atomic_set(&folio->_entire_mapcount, -1);
        atomic_set(&folio->_nr_pages_mapped, 0);
-       atomic_set(&folio->_pincount, 0);
+       if (IS_ENABLED(CONFIG_64BIT) || order > 1)
+               atomic_set(&folio->_pincount, 0);
        if (order > 1)
                INIT_LIST_HEAD(&folio->_deferred_list);
 }
index 735192222c364d04bce91f2bac3e283b398b1a07..2a9aa4439a662550e31564d1a735739fd6fa1e39 100644 (file)
@@ -959,9 +959,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
                        bad_page(page, "nonzero nr_pages_mapped");
                        goto out;
                }
-               if (unlikely(atomic_read(&folio->_pincount))) {
-                       bad_page(page, "nonzero pincount");
-                       goto out;
+               if (IS_ENABLED(CONFIG_64BIT)) {
+                       if (unlikely(atomic_read(&folio->_pincount))) {
+                               bad_page(page, "nonzero pincount");
+                               goto out;
+                       }
                }
                break;
        case 2:
@@ -970,6 +972,12 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
                        bad_page(page, "on deferred list");
                        goto out;
                }
+               if (!IS_ENABLED(CONFIG_64BIT)) {
+                       if (unlikely(atomic_read(&folio->_pincount))) {
+                               bad_page(page, "nonzero pincount");
+                               goto out;
+                       }
+               }
                break;
        case 3:
                /* the third tail page: hugetlb specifics overlap ->mappings */