--- /dev/null
+From 94efde1d15399f5c88e576923db9bcd422d217f2 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Mon, 4 Nov 2024 19:29:44 -0800
+Subject: mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit 94efde1d15399f5c88e576923db9bcd422d217f2 upstream.
+
+commit 53ba78de064b ("mm/gup: introduce
+check_and_migrate_movable_folios()") created a new constraint on the
+pin_user_pages*() API family: a potentially large internal allocation must
+now occur, for FOLL_LONGTERM cases.
+
+A user-visible consequence has now appeared: user space can no longer pin
+more than 2GB of memory anymore on x86_64. That's because, on a 4KB
+PAGE_SIZE system, when user space tries to (indirectly, via a device
+driver that calls pin_user_pages()) pin 2GB, this requires an allocation
+of a folio pointers array of MAX_PAGE_ORDER size, which is the limit for
+kmalloc().
+
+In addition to the directly visible effect described above, there is also
+the problem of adding an unnecessary allocation. The **pages array
+argument has already been allocated, and there is no need for a redundant
+**folios array allocation in this case.
+
+Fix this by avoiding the new allocation entirely. This is done by
+referring to either the original page[i] within **pages, or to the
+associated folio. Thanks to David Hildenbrand for suggesting this
+approach and for providing the initial implementation (which I've tested
+and adjusted slightly) as well.
+
+[jhubbard@nvidia.com: whitespace tweak, per David]
+ Link: https://lkml.kernel.org/r/131cf9c8-ebc0-4cbb-b722-22fa8527bf3c@nvidia.com
+[jhubbard@nvidia.com: bypass pofs_get_folio(), per Oscar]
+ Link: https://lkml.kernel.org/r/c1587c7f-9155-45be-bd62-1e36c0dd6923@nvidia.com
+Link: https://lkml.kernel.org/r/20241105032944.141488-2-jhubbard@nvidia.com
+Fixes: 53ba78de064b ("mm/gup: introduce check_and_migrate_movable_folios()")
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Gerd Hoffmann <kraxel@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Dongwon Kim <dongwon.kim@intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Junxiao Chang <junxiao.chang@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c | 114 ++++++++++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 77 insertions(+), 37 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2282,20 +2282,57 @@ struct page *get_dump_page(unsigned long
+ #endif /* CONFIG_ELF_CORE */
+
+ #ifdef CONFIG_MIGRATION
++
++/*
++ * An array of either pages or folios ("pofs"). Although it may seem tempting to
++ * avoid this complication, by simply interpreting a list of folios as a list of
++ * pages, that approach won't work in the longer term, because eventually the
++ * layouts of struct page and struct folio will become completely different.
++ * Furthermore, this pof approach avoids excessive page_folio() calls.
++ */
++struct pages_or_folios {
++ union {
++ struct page **pages;
++ struct folio **folios;
++ void **entries;
++ };
++ bool has_folios;
++ long nr_entries;
++};
++
++static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i)
++{
++ if (pofs->has_folios)
++ return pofs->folios[i];
++ return page_folio(pofs->pages[i]);
++}
++
++static void pofs_clear_entry(struct pages_or_folios *pofs, long i)
++{
++ pofs->entries[i] = NULL;
++}
++
++static void pofs_unpin(struct pages_or_folios *pofs)
++{
++ if (pofs->has_folios)
++ unpin_folios(pofs->folios, pofs->nr_entries);
++ else
++ unpin_user_pages(pofs->pages, pofs->nr_entries);
++}
++
+ /*
+ * Returns the number of collected folios. Return value is always >= 0.
+ */
+ static unsigned long collect_longterm_unpinnable_folios(
+- struct list_head *movable_folio_list,
+- unsigned long nr_folios,
+- struct folio **folios)
++ struct list_head *movable_folio_list,
++ struct pages_or_folios *pofs)
+ {
+ unsigned long i, collected = 0;
+ struct folio *prev_folio = NULL;
+ bool drain_allow = true;
+
+- for (i = 0; i < nr_folios; i++) {
+- struct folio *folio = folios[i];
++ for (i = 0; i < pofs->nr_entries; i++) {
++ struct folio *folio = pofs_get_folio(pofs, i);
+
+ if (folio == prev_folio)
+ continue;
+@@ -2336,16 +2373,15 @@ static unsigned long collect_longterm_un
+ * Returns -EAGAIN if all folios were successfully migrated or -errno for
+ * failure (or partial success).
+ */
+-static int migrate_longterm_unpinnable_folios(
+- struct list_head *movable_folio_list,
+- unsigned long nr_folios,
+- struct folio **folios)
++static int
++migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
++ struct pages_or_folios *pofs)
+ {
+ int ret;
+ unsigned long i;
+
+- for (i = 0; i < nr_folios; i++) {
+- struct folio *folio = folios[i];
++ for (i = 0; i < pofs->nr_entries; i++) {
++ struct folio *folio = pofs_get_folio(pofs, i);
+
+ if (folio_is_device_coherent(folio)) {
+ /*
+@@ -2353,7 +2389,7 @@ static int migrate_longterm_unpinnable_f
+ * convert the pin on the source folio to a normal
+ * reference.
+ */
+- folios[i] = NULL;
++ pofs_clear_entry(pofs, i);
+ folio_get(folio);
+ gup_put_folio(folio, 1, FOLL_PIN);
+
+@@ -2372,8 +2408,8 @@ static int migrate_longterm_unpinnable_f
+ * calling folio_isolate_lru() which takes a reference so the
+ * folio won't be freed if it's migrating.
+ */
+- unpin_folio(folios[i]);
+- folios[i] = NULL;
++ unpin_folio(folio);
++ pofs_clear_entry(pofs, i);
+ }
+
+ if (!list_empty(movable_folio_list)) {
+@@ -2396,12 +2432,26 @@ static int migrate_longterm_unpinnable_f
+ return -EAGAIN;
+
+ err:
+- unpin_folios(folios, nr_folios);
++ pofs_unpin(pofs);
+ putback_movable_pages(movable_folio_list);
+
+ return ret;
+ }
+
++static long
++check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
++{
++ LIST_HEAD(movable_folio_list);
++ unsigned long collected;
++
++ collected = collect_longterm_unpinnable_folios(&movable_folio_list,
++ pofs);
++ if (!collected)
++ return 0;
++
++ return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
++}
++
+ /*
+ * Check whether all folios are *allowed* to be pinned indefinitely (longterm).
+ * Rather confusingly, all folios in the range are required to be pinned via
+@@ -2421,16 +2471,13 @@ err:
+ static long check_and_migrate_movable_folios(unsigned long nr_folios,
+ struct folio **folios)
+ {
+- unsigned long collected;
+- LIST_HEAD(movable_folio_list);
++ struct pages_or_folios pofs = {
++ .folios = folios,
++ .has_folios = true,
++ .nr_entries = nr_folios,
++ };
+
+- collected = collect_longterm_unpinnable_folios(&movable_folio_list,
+- nr_folios, folios);
+- if (!collected)
+- return 0;
+-
+- return migrate_longterm_unpinnable_folios(&movable_folio_list,
+- nr_folios, folios);
++ return check_and_migrate_movable_pages_or_folios(&pofs);
+ }
+
+ /*
+@@ -2442,20 +2489,13 @@ static long check_and_migrate_movable_fo
+ static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ struct page **pages)
+ {
+- struct folio **folios;
+- long i, ret;
++ struct pages_or_folios pofs = {
++ .pages = pages,
++ .has_folios = false,
++ .nr_entries = nr_pages,
++ };
+
+- folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
+- if (!folios)
+- return -ENOMEM;
+-
+- for (i = 0; i < nr_pages; i++)
+- folios[i] = page_folio(pages[i]);
+-
+- ret = check_and_migrate_movable_folios(nr_pages, folios);
+-
+- kfree(folios);
+- return ret;
++ return check_and_migrate_movable_pages_or_folios(&pofs);
+ }
+ #else
+ static long check_and_migrate_movable_pages(unsigned long nr_pages,