+++ /dev/null
-From 94efde1d15399f5c88e576923db9bcd422d217f2 Mon Sep 17 00:00:00 2001
-From: John Hubbard <jhubbard@nvidia.com>
-Date: Mon, 4 Nov 2024 19:29:44 -0800
-Subject: mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
-
-From: John Hubbard <jhubbard@nvidia.com>
-
-commit 94efde1d15399f5c88e576923db9bcd422d217f2 upstream.
-
-commit 53ba78de064b ("mm/gup: introduce
-check_and_migrate_movable_folios()") created a new constraint on the
-pin_user_pages*() API family: a potentially large internal allocation must
-now occur, for FOLL_LONGTERM cases.
-
-A user-visible consequence has now appeared: user space can no longer pin
-more than 2GB of memory anymore on x86_64. That's because, on a 4KB
-PAGE_SIZE system, when user space tries to (indirectly, via a device
-driver that calls pin_user_pages()) pin 2GB, this requires an allocation
-of a folio pointers array of MAX_PAGE_ORDER size, which is the limit for
-kmalloc().
-
-In addition to the directly visible effect described above, there is also
-the problem of adding an unnecessary allocation. The **pages array
-argument has already been allocated, and there is no need for a redundant
-**folios array allocation in this case.
-
-Fix this by avoiding the new allocation entirely. This is done by
-referring to either the original page[i] within **pages, or to the
-associated folio. Thanks to David Hildenbrand for suggesting this
-approach and for providing the initial implementation (which I've tested
-and adjusted slightly) as well.
-
-[jhubbard@nvidia.com: whitespace tweak, per David]
- Link: https://lkml.kernel.org/r/131cf9c8-ebc0-4cbb-b722-22fa8527bf3c@nvidia.com
-[jhubbard@nvidia.com: bypass pofs_get_folio(), per Oscar]
- Link: https://lkml.kernel.org/r/c1587c7f-9155-45be-bd62-1e36c0dd6923@nvidia.com
-Link: https://lkml.kernel.org/r/20241105032944.141488-2-jhubbard@nvidia.com
-Fixes: 53ba78de064b ("mm/gup: introduce check_and_migrate_movable_folios()")
-Signed-off-by: John Hubbard <jhubbard@nvidia.com>
-Suggested-by: David Hildenbrand <david@redhat.com>
-Acked-by: David Hildenbrand <david@redhat.com>
-Reviewed-by: Oscar Salvador <osalvador@suse.de>
-Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
-Cc: Dave Airlie <airlied@redhat.com>
-Cc: Gerd Hoffmann <kraxel@redhat.com>
-Cc: Matthew Wilcox <willy@infradead.org>
-Cc: Christoph Hellwig <hch@infradead.org>
-Cc: Jason Gunthorpe <jgg@nvidia.com>
-Cc: Peter Xu <peterx@redhat.com>
-Cc: Arnd Bergmann <arnd@arndb.de>
-Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
-Cc: Dongwon Kim <dongwon.kim@intel.com>
-Cc: Hugh Dickins <hughd@google.com>
-Cc: Junxiao Chang <junxiao.chang@intel.com>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- mm/gup.c | 114 ++++++++++++++++++++++++++++++++++++++++++---------------------
- 1 file changed, 77 insertions(+), 37 deletions(-)
-
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -2282,20 +2282,57 @@ struct page *get_dump_page(unsigned long
- #endif /* CONFIG_ELF_CORE */
-
- #ifdef CONFIG_MIGRATION
-+
-+/*
-+ * An array of either pages or folios ("pofs"). Although it may seem tempting to
-+ * avoid this complication, by simply interpreting a list of folios as a list of
-+ * pages, that approach won't work in the longer term, because eventually the
-+ * layouts of struct page and struct folio will become completely different.
-+ * Furthermore, this pof approach avoids excessive page_folio() calls.
-+ */
-+struct pages_or_folios {
-+ union {
-+ struct page **pages;
-+ struct folio **folios;
-+ void **entries;
-+ };
-+ bool has_folios;
-+ long nr_entries;
-+};
-+
-+static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i)
-+{
-+ if (pofs->has_folios)
-+ return pofs->folios[i];
-+ return page_folio(pofs->pages[i]);
-+}
-+
-+static void pofs_clear_entry(struct pages_or_folios *pofs, long i)
-+{
-+ pofs->entries[i] = NULL;
-+}
-+
-+static void pofs_unpin(struct pages_or_folios *pofs)
-+{
-+ if (pofs->has_folios)
-+ unpin_folios(pofs->folios, pofs->nr_entries);
-+ else
-+ unpin_user_pages(pofs->pages, pofs->nr_entries);
-+}
-+
- /*
- * Returns the number of collected folios. Return value is always >= 0.
- */
- static unsigned long collect_longterm_unpinnable_folios(
-- struct list_head *movable_folio_list,
-- unsigned long nr_folios,
-- struct folio **folios)
-+ struct list_head *movable_folio_list,
-+ struct pages_or_folios *pofs)
- {
- unsigned long i, collected = 0;
- struct folio *prev_folio = NULL;
- bool drain_allow = true;
-
-- for (i = 0; i < nr_folios; i++) {
-- struct folio *folio = folios[i];
-+ for (i = 0; i < pofs->nr_entries; i++) {
-+ struct folio *folio = pofs_get_folio(pofs, i);
-
- if (folio == prev_folio)
- continue;
-@@ -2336,16 +2373,15 @@ static unsigned long collect_longterm_un
- * Returns -EAGAIN if all folios were successfully migrated or -errno for
- * failure (or partial success).
- */
--static int migrate_longterm_unpinnable_folios(
-- struct list_head *movable_folio_list,
-- unsigned long nr_folios,
-- struct folio **folios)
-+static int
-+migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
-+ struct pages_or_folios *pofs)
- {
- int ret;
- unsigned long i;
-
-- for (i = 0; i < nr_folios; i++) {
-- struct folio *folio = folios[i];
-+ for (i = 0; i < pofs->nr_entries; i++) {
-+ struct folio *folio = pofs_get_folio(pofs, i);
-
- if (folio_is_device_coherent(folio)) {
- /*
-@@ -2353,7 +2389,7 @@ static int migrate_longterm_unpinnable_f
- * convert the pin on the source folio to a normal
- * reference.
- */
-- folios[i] = NULL;
-+ pofs_clear_entry(pofs, i);
- folio_get(folio);
- gup_put_folio(folio, 1, FOLL_PIN);
-
-@@ -2372,8 +2408,8 @@ static int migrate_longterm_unpinnable_f
- * calling folio_isolate_lru() which takes a reference so the
- * folio won't be freed if it's migrating.
- */
-- unpin_folio(folios[i]);
-- folios[i] = NULL;
-+ unpin_folio(folio);
-+ pofs_clear_entry(pofs, i);
- }
-
- if (!list_empty(movable_folio_list)) {
-@@ -2396,12 +2432,26 @@ static int migrate_longterm_unpinnable_f
- return -EAGAIN;
-
- err:
-- unpin_folios(folios, nr_folios);
-+ pofs_unpin(pofs);
- putback_movable_pages(movable_folio_list);
-
- return ret;
- }
-
-+static long
-+check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
-+{
-+ LIST_HEAD(movable_folio_list);
-+ unsigned long collected;
-+
-+ collected = collect_longterm_unpinnable_folios(&movable_folio_list,
-+ pofs);
-+ if (!collected)
-+ return 0;
-+
-+ return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
-+}
-+
- /*
- * Check whether all folios are *allowed* to be pinned indefinitely (longterm).
- * Rather confusingly, all folios in the range are required to be pinned via
-@@ -2421,16 +2471,13 @@ err:
- static long check_and_migrate_movable_folios(unsigned long nr_folios,
- struct folio **folios)
- {
-- unsigned long collected;
-- LIST_HEAD(movable_folio_list);
-+ struct pages_or_folios pofs = {
-+ .folios = folios,
-+ .has_folios = true,
-+ .nr_entries = nr_folios,
-+ };
-
-- collected = collect_longterm_unpinnable_folios(&movable_folio_list,
-- nr_folios, folios);
-- if (!collected)
-- return 0;
--
-- return migrate_longterm_unpinnable_folios(&movable_folio_list,
-- nr_folios, folios);
-+ return check_and_migrate_movable_pages_or_folios(&pofs);
- }
-
- /*
-@@ -2442,20 +2489,13 @@ static long check_and_migrate_movable_fo
- static long check_and_migrate_movable_pages(unsigned long nr_pages,
- struct page **pages)
- {
-- struct folio **folios;
-- long i, ret;
-+ struct pages_or_folios pofs = {
-+ .pages = pages,
-+ .has_folios = false,
-+ .nr_entries = nr_pages,
-+ };
-
-- folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
-- if (!folios)
-- return -ENOMEM;
--
-- for (i = 0; i < nr_pages; i++)
-- folios[i] = page_folio(pages[i]);
--
-- ret = check_and_migrate_movable_folios(nr_pages, folios);
--
-- kfree(folios);
-- return ret;
-+ return check_and_migrate_movable_pages_or_folios(&pofs);
- }
- #else
- static long check_and_migrate_movable_pages(unsigned long nr_pages,