From: Sasha Levin <sashal@kernel.org>
Date: Sun, 2 Dec 2018 08:31:56 +0000 (-0500)
Subject: hugepage backports for 4.19
X-Git-Tag: v4.19.7~45
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4067483ad4f3faaaada358c4128ad208fe53c651;p=thirdparty%2Fkernel%2Fstable-queue.git

hugepage backports for 4.19

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-4.19/mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_siz.patch b/queue-4.19/mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_siz.patch
new file mode 100644
index 00000000000..b22f28c7448
--- /dev/null
+++ b/queue-4.19/mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_siz.patch
@@ -0,0 +1,104 @@
+From 3e6e59a4b47e060cb860957aadeb476e2194503a Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:21 -0800
+Subject: mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()
+
+commit 006d3ff27e884f80bd7d306b041afc415f63598f upstream.
+
+Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that
+__split_huge_page() was using i_size_read() while holding the irq-safe
+lru_lock and page tree lock, but the 32-bit i_size_read() uses an
+irq-unsafe seqlock which should not be nested inside them.
+
+Instead, read the i_size earlier in split_huge_page_to_list(), and pass
+the end offset down to __split_huge_page(): all while holding head page
+lock, which is enough to prevent truncation of that extent before the
+page tree lock has been taken.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils
+Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index c12b441a99f9..15310f14c25e 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2410,12 +2410,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ }
+ 
+ static void __split_huge_page(struct page *page, struct list_head *list,
+-		unsigned long flags)
++		pgoff_t end, unsigned long flags)
+ {
+ 	struct page *head = compound_head(page);
+ 	struct zone *zone = page_zone(head);
+ 	struct lruvec *lruvec;
+-	pgoff_t end = -1;
+ 	int i;
+ 
+ 	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
+@@ -2423,9 +2422,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ 	/* complete memcg works before add pages to LRU */
+ 	mem_cgroup_split_huge_fixup(head);
+ 
+-	if (!PageAnon(page))
+-		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+-
+ 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
+ 		__split_huge_page_tail(head, i, lruvec, list);
+ 		/* Some pages can be beyond i_size: drop them from page cache */
+@@ -2597,6 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 	int count, mapcount, extra_pins, ret;
+ 	bool mlocked;
+ 	unsigned long flags;
++	pgoff_t end;
+ 
+ 	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
+@@ -2619,6 +2616,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 			ret = -EBUSY;
+ 			goto out;
+ 		}
++		end = -1;
+ 		mapping = NULL;
+ 		anon_vma_lock_write(anon_vma);
+ 	} else {
+@@ -2632,6 +2630,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 
+ 		anon_vma = NULL;
+ 		i_mmap_lock_read(mapping);
++
++		/*
++		 *__split_huge_page() may need to trim off pages beyond EOF:
++		 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
++		 * which cannot be nested inside the page tree lock. So note
++		 * end now: i_size itself may be changed at any moment, but
++		 * head page lock is good enough to serialize the trimming.
++		 */
++		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ 	}
+ 
+ 	/*
+@@ -2681,7 +2688,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 		if (mapping)
+ 			__dec_node_page_state(page, NR_SHMEM_THPS);
+ 		spin_unlock(&pgdata->split_queue_lock);
+-		__split_huge_page(page, list, flags);
++		__split_huge_page(page, list, end, flags);
+ 		if (PageSwapCache(head)) {
+ 			swp_entry_t entry = { .val = page_private(head) };
+ 
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-huge_memory-rename-freeze_page-to-unmap_page.patch b/queue-4.19/mm-huge_memory-rename-freeze_page-to-unmap_page.patch
new file mode 100644
index 00000000000..4530d3d7d40
--- /dev/null
+++ b/queue-4.19/mm-huge_memory-rename-freeze_page-to-unmap_page.patch
@@ -0,0 +1,117 @@
+From 02500e569e9e6a2feac492fd1837cde5c4a56abc Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:13 -0800
+Subject: mm/huge_memory: rename freeze_page() to unmap_page()
+
+commit 906f9cdfc2a0800f13683f9e4ebdfd08c12ee81b upstream.
+
+The term "freeze" is used in several ways in the kernel, and in mm it
+has the particular meaning of forcing page refcount temporarily to 0.
+freeze_page() is just too confusing a name for a function that unmaps a
+page: rename it unmap_page(), and rename unfreeze_page() remap_page().
+
+Went to change the mention of freeze_page() added later in mm/rmap.c,
+but found it to be incorrect: ordinary page reclaim reaches there too;
+but the substance of the comment still seems correct, so edit it down.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils
+Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 12 ++++++------
+ mm/rmap.c        | 13 +++----------
+ 2 files changed, 9 insertions(+), 16 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index deed97fba979..ef573d719eb4 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2322,7 +2322,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ 	}
+ }
+ 
+-static void freeze_page(struct page *page)
++static void unmap_page(struct page *page)
+ {
+ 	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+ 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+@@ -2337,7 +2337,7 @@ static void freeze_page(struct page *page)
+ 	VM_BUG_ON_PAGE(!unmap_success, page);
+ }
+ 
+-static void unfreeze_page(struct page *page)
++static void remap_page(struct page *page)
+ {
+ 	int i;
+ 	if (PageTransHuge(page)) {
+@@ -2454,7 +2454,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
+ 
+ 	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+ 
+-	unfreeze_page(head);
++	remap_page(head);
+ 
+ 	for (i = 0; i < HPAGE_PMD_NR; i++) {
+ 		struct page *subpage = head + i;
+@@ -2635,7 +2635,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 	}
+ 
+ 	/*
+-	 * Racy check if we can split the page, before freeze_page() will
++	 * Racy check if we can split the page, before unmap_page() will
+ 	 * split PMDs
+ 	 */
+ 	if (!can_split_huge_page(head, &extra_pins)) {
+@@ -2644,7 +2644,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ 	}
+ 
+ 	mlocked = PageMlocked(page);
+-	freeze_page(head);
++	unmap_page(head);
+ 	VM_BUG_ON_PAGE(compound_mapcount(head), head);
+ 
+ 	/* Make sure the page is not on per-CPU pagevec as it takes pin */
+@@ -2701,7 +2701,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ fail:		if (mapping)
+ 			xa_unlock(&mapping->i_pages);
+ 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+-		unfreeze_page(head);
++		remap_page(head);
+ 		ret = -EBUSY;
+ 	}
+ 
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 1e79fac3186b..85b7f9423352 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+ 						      address + PAGE_SIZE);
+ 		} else {
+ 			/*
+-			 * We should not need to notify here as we reach this
+-			 * case only from freeze_page() itself only call from
+-			 * split_huge_page_to_list() so everything below must
+-			 * be true:
+-			 *   - page is not anonymous
+-			 *   - page is locked
+-			 *
+-			 * So as it is a locked file back page thus it can not
+-			 * be remove from the page cache and replace by a new
+-			 * page before mmu_notifier_invalidate_range_end so no
++			 * This is a locked file-backed page, thus it cannot
++			 * be removed from the page cache and replaced by a new
++			 * page before mmu_notifier_invalidate_range_end, so no
+ 			 * concurrent thread might update its page table to
+ 			 * point at new page while a device still is using this
+ 			 * page.
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-huge_memory-splitting-set-mapping-index-before-un.patch b/queue-4.19/mm-huge_memory-splitting-set-mapping-index-before-un.patch
new file mode 100644
index 00000000000..9e646cdbb2b
--- /dev/null
+++ b/queue-4.19/mm-huge_memory-splitting-set-mapping-index-before-un.patch
@@ -0,0 +1,71 @@
+From 84bfc6c34def1b6df2bb76c6ba2fa4aa822e179f Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:16 -0800
+Subject: mm/huge_memory: splitting set mapping+index before unfreeze
+
+commit 173d9d9fd3ddae84c110fea8aedf1f26af6be9ec upstream.
+
+Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s
+VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page).
+
+Move the setting of mapping and index up before the page_ref_unfreeze()
+in __split_huge_page_tail() to fix this: so that a page cache lookup
+cannot get a reference while the tail's mapping and index are unstable.
+
+In fact, might as well move them up before the smp_wmb(): I don't see an
+actual need for that, but if I'm missing something, this way round is
+safer than the other, and no less efficient.
+
+You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is
+misplaced, and should be left until after the trylock_page(); but left as
+is has not crashed since, and gives more stringent assurance.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils
+Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
+Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index ef573d719eb4..c12b441a99f9 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2373,6 +2373,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ 			 (1L << PG_unevictable) |
+ 			 (1L << PG_dirty)));
+ 
++	/* ->mapping in first tail page is compound_mapcount */
++	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
++			page_tail);
++	page_tail->mapping = head->mapping;
++	page_tail->index = head->index + tail;
++
+ 	/* Page flags must be visible before we make the page non-compound. */
+ 	smp_wmb();
+ 
+@@ -2393,12 +2399,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
+ 	if (page_is_idle(head))
+ 		set_page_idle(page_tail);
+ 
+-	/* ->mapping in first tail page is compound_mapcount */
+-	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+-			page_tail);
+-	page_tail->mapping = head->mapping;
+-
+-	page_tail->index = head->index + tail;
+ 	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+ 
+ 	/*
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-collapse_shmem-do-not-crash-on-compoun.patch b/queue-4.19/mm-khugepaged-collapse_shmem-do-not-crash-on-compoun.patch
new file mode 100644
index 00000000000..737f5467900
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-collapse_shmem-do-not-crash-on-compoun.patch
@@ -0,0 +1,53 @@
+From 46366d4787e7be1accb41349b9c84b9c4af3dbe0 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:47 -0800
+Subject: mm/khugepaged: collapse_shmem() do not crash on Compound
+
+commit 06a5e1268a5fb9c2b346a3da6b97e85f2eba0f07 upstream.
+
+collapse_shmem()'s VM_BUG_ON_PAGE(PageTransCompound) was unsafe: before
+it holds page lock of the first page, racing truncation then extension
+might conceivably have inserted a hugepage there already.  Fail with the
+SCAN_PAGE_COMPOUND result, instead of crashing (CONFIG_DEBUG_VM=y) or
+otherwise mishandling the unexpected hugepage - though later we might
+code up a more constructive way of handling it, with SCAN_SUCCESS.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261529310.2275@eggly.anvils
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index e2b13c04626e..fde5820be24d 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1398,7 +1398,15 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 */
+ 		VM_BUG_ON_PAGE(!PageLocked(page), page);
+ 		VM_BUG_ON_PAGE(!PageUptodate(page), page);
+-		VM_BUG_ON_PAGE(PageTransCompound(page), page);
++
++		/*
++		 * If file was truncated then extended, or hole-punched, before
++		 * we locked the first page, then a THP might be there already.
++		 */
++		if (PageTransCompound(page)) {
++			result = SCAN_PAGE_COMPOUND;
++			goto out_unlock;
++		}
+ 
+ 		if (page_mapping(page) != mapping) {
+ 			result = SCAN_TRUNCATED;
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch b/queue-4.19/mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
new file mode 100644
index 00000000000..4cf859ef404
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
@@ -0,0 +1,64 @@
+From d8d532f288c7cfa5637232f1e05a5b692787af70 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:35 -0800
+Subject: mm/khugepaged: collapse_shmem() remember to clear holes
+
+commit 2af8ff291848cc4b1cce24b6c943394eb2c761e8 upstream.
+
+Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp
+flags khugepaged uses to allocate a huge page - in all common cases it
+would just be a waste of effort - so collapse_shmem() must remember to
+clear out any holes that it instantiates.
+
+The obvious place to do so, where they are put into the page cache tree,
+is not a good choice: because interrupts are disabled there.  Leave it
+until further down, once success is assured, where the other pages are
+copied (before setting PageUptodate).
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 87dbb0fcfa1a..068868763b78 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1498,7 +1498,12 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 * Replacing old pages with new one has succeed, now we need to
+ 		 * copy the content and free old pages.
+ 		 */
++		index = start;
+ 		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
++			while (index < page->index) {
++				clear_highpage(new_page + (index % HPAGE_PMD_NR));
++				index++;
++			}
+ 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
+ 					page);
+ 			list_del(&page->lru);
+@@ -1508,6 +1513,11 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			ClearPageActive(page);
+ 			ClearPageUnevictable(page);
+ 			put_page(page);
++			index++;
++		}
++		while (index < end) {
++			clear_highpage(new_page + (index % HPAGE_PMD_NR));
++			index++;
+ 		}
+ 
+ 		local_irq_save(flags);
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-collapse_shmem-stop-if-punched-or-trun.patch b/queue-4.19/mm-khugepaged-collapse_shmem-stop-if-punched-or-trun.patch
new file mode 100644
index 00000000000..2d00003326d
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-collapse_shmem-stop-if-punched-or-trun.patch
@@ -0,0 +1,62 @@
+From 85da1c220da9ef73ddabcb2c56a537e786cba8c3 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:25 -0800
+Subject: mm/khugepaged: collapse_shmem() stop if punched or truncated
+
+commit 701270fa193aadf00bdcf607738f64997275d4c7 upstream.
+
+Huge tmpfs testing showed that although collapse_shmem() recognizes a
+concurrently truncated or hole-punched page correctly, its handling of
+holes was liable to refill an emptied extent.  Add check to stop that.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox <willy@infradead.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index a31d740e6cd1..0378f758b065 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1349,6 +1349,16 @@ static void collapse_shmem(struct mm_struct *mm,
+ 	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
+ 		int n = min(iter.index, end) - index;
+ 
++		/*
++		 * Stop if extent has been hole-punched, and is now completely
++		 * empty (the more obvious i_size_read() check would take an
++		 * irq-unsafe seqlock on 32-bit).
++		 */
++		if (n >= HPAGE_PMD_NR) {
++			result = SCAN_TRUNCATED;
++			goto tree_locked;
++		}
++
+ 		/*
+ 		 * Handle holes in the radix tree: charge it from shmem and
+ 		 * insert relevant subpage of new_page into the radix-tree.
+@@ -1459,6 +1469,11 @@ static void collapse_shmem(struct mm_struct *mm,
+ 	if (result == SCAN_SUCCEED && index < end) {
+ 		int n = end - index;
+ 
++		/* Stop if extent has been truncated, and is now empty */
++		if (n >= HPAGE_PMD_NR) {
++			result = SCAN_TRUNCATED;
++			goto tree_locked;
++		}
+ 		if (!shmem_charge(mapping->host, n)) {
+ 			result = SCAN_FAIL;
+ 			goto tree_locked;
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-collapse_shmem-without-freezing-new_pa.patch b/queue-4.19/mm-khugepaged-collapse_shmem-without-freezing-new_pa.patch
new file mode 100644
index 00000000000..eca799b7d07
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-collapse_shmem-without-freezing-new_pa.patch
@@ -0,0 +1,117 @@
+From efc355653067ff3abfbe9e5e29512edf0ff3ec5f Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:43 -0800
+Subject: mm/khugepaged: collapse_shmem() without freezing new_page
+
+commit 87c460a0bded56195b5eb497d44709777ef7b415 upstream.
+
+khugepaged's collapse_shmem() does almost all of its work, to assemble
+the huge new_page from 512 scattered old pages, with the new_page's
+refcount frozen to 0 (and refcounts of all old pages so far also frozen
+to 0).  Including shmem_getpage() to read in any which were out on swap,
+memory reclaim if necessary to allocate their intermediate pages, and
+copying over all the data from old to new.
+
+Imagine the frozen refcount as a spinlock held, but without any lock
+debugging to highlight the abuse: it's not good, and under serious load
+heads into lockups - speculative getters of the page are not expecting
+to spin while khugepaged is rescheduled.
+
+One can get a little further under load by hacking around elsewhere; but
+fortunately, freezing the new_page turns out to have been entirely
+unnecessary, with no hacks needed elsewhere.
+
+The huge new_page lock is already held throughout, and guards all its
+subpages as they are brought one by one into the page cache tree; and
+anything reading the data in that page, without the lock, before it has
+been marked PageUptodate, would already be in the wrong.  So simply
+eliminate the freezing of the new_page.
+
+Each of the old pages remains frozen with refcount 0 after it has been
+replaced by a new_page subpage in the page cache tree, until they are
+all unfrozen on success or failure: just as before.  They could be
+unfrozen sooner, but cause no problem once no longer visible to
+find_get_entry(), filemap_map_pages() and other speculative lookups.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index d0a347e6fd08..e2b13c04626e 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+  * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+  *
+  * Basic scheme is simple, details are more complex:
+- *  - allocate and freeze a new huge page;
++ *  - allocate and lock a new huge page;
+  *  - scan over radix tree replacing old pages the new one
+  *    + swap in pages if necessary;
+  *    + fill in gaps;
+@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+  *  - if replacing succeed:
+  *    + copy data over;
+  *    + free old pages;
+- *    + unfreeze huge page;
++ *    + unlock huge page;
+  *  - if replacing failed;
+  *    + put all pages back and unfreeze them;
+  *    + restore gaps in the radix-tree;
+- *    + free huge page;
++ *    + unlock and free huge page;
+  */
+ static void collapse_shmem(struct mm_struct *mm,
+ 		struct address_space *mapping, pgoff_t start,
+@@ -1334,13 +1334,11 @@ static void collapse_shmem(struct mm_struct *mm,
+ 	__SetPageSwapBacked(new_page);
+ 	new_page->index = start;
+ 	new_page->mapping = mapping;
+-	BUG_ON(!page_ref_freeze(new_page, 1));
+ 
+ 	/*
+-	 * At this point the new_page is 'frozen' (page_count() is zero), locked
+-	 * and not up-to-date. It's safe to insert it into radix tree, because
+-	 * nobody would be able to map it or use it in other way until we
+-	 * unfreeze it.
++	 * At this point the new_page is locked and not up-to-date.
++	 * It's safe to insert it into the page cache, because nobody would
++	 * be able to map it or use it in another way until we unlock it.
+ 	 */
+ 
+ 	index = start;
+@@ -1517,9 +1515,8 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			index++;
+ 		}
+ 
+-		/* Everything is ready, let's unfreeze the new_page */
+ 		SetPageUptodate(new_page);
+-		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
++		page_ref_add(new_page, HPAGE_PMD_NR - 1);
+ 		set_page_dirty(new_page);
+ 		mem_cgroup_commit_charge(new_page, memcg, false, true);
+ 		lru_cache_add_anon(new_page);
+@@ -1566,8 +1563,6 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		VM_BUG_ON(nr_none);
+ 		xa_unlock_irq(&mapping->i_pages);
+ 
+-		/* Unfreeze new_page, caller would take care about freeing it */
+-		page_ref_unfreeze(new_page, 1);
+ 		mem_cgroup_cancel_charge(new_page, memcg, true);
+ 		new_page->mapping = NULL;
+ 	}
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch b/queue-4.19/mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
new file mode 100644
index 00000000000..80ff30f84e4
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
@@ -0,0 +1,92 @@
+From c594e1cb91df327a54f89971c1368d893c8a071c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:29 -0800
+Subject: mm/khugepaged: fix crashes due to misaccounted holes
+
+commit aaa52e340073b7f4593b3c4ddafcafa70cf838b5 upstream.
+
+Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent
+hit shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by
+clear_inode()'s BUG_ON(inode->i_data.nrpages) when the file was later
+closed and unlinked.
+
+khugepaged's collapse_shmem() was forgetting to update mapping->nrpages
+on the rollback path, after it had added but then needs to undo some
+holes.
+
+There is indeed an irritating asymmetry between shmem_charge(), whose
+callers want it to increment nrpages after successfully accounting
+blocks, and shmem_uncharge(), when __delete_from_page_cache() already
+decremented nrpages itself: oh well, just add a comment on that to them
+both.
+
+And shmem_recalc_inode() is supposed to be called when the accounting is
+expected to be in balance (so it can deduce from imbalance that reclaim
+discarded some pages): so change shmem_charge() to update nrpages
+earlier (though it's rare for the difference to matter at all).
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils
+Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 4 +++-
+ mm/shmem.c      | 6 +++++-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 0378f758b065..87dbb0fcfa1a 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1537,8 +1537,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		khugepaged_pages_collapsed++;
+ 	} else {
+ 		/* Something went wrong: rollback changes to the radix-tree */
+-		shmem_uncharge(mapping->host, nr_none);
+ 		xa_lock_irq(&mapping->i_pages);
++		mapping->nrpages -= nr_none;
++		shmem_uncharge(mapping->host, nr_none);
++
+ 		radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
+ 			if (iter.index >= end)
+ 				break;
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 38d228a30fdc..cd6b4bc221eb 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, long pages)
+ 	if (!shmem_inode_acct_block(inode, pages))
+ 		return false;
+ 
++	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
++	inode->i_mapping->nrpages += pages;
++
+ 	spin_lock_irqsave(&info->lock, flags);
+ 	info->alloced += pages;
+ 	inode->i_blocks += pages * BLOCKS_PER_PAGE;
+ 	shmem_recalc_inode(inode);
+ 	spin_unlock_irqrestore(&info->lock, flags);
+-	inode->i_mapping->nrpages += pages;
+ 
+ 	return true;
+ }
+@@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode, long pages)
+ 	struct shmem_inode_info *info = SHMEM_I(inode);
+ 	unsigned long flags;
+ 
++	/* nrpages adjustment done by __delete_from_page_cache() or caller */
++
+ 	spin_lock_irqsave(&info->lock, flags);
+ 	info->alloced -= pages;
+ 	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
+-- 
+2.17.1
+
diff --git a/queue-4.19/mm-khugepaged-minor-reorderings-in-collapse_shmem.patch b/queue-4.19/mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
new file mode 100644
index 00000000000..25169c0c42a
--- /dev/null
+++ b/queue-4.19/mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
@@ -0,0 +1,234 @@
+From 709c42273f8d869886a13494176d82ebda1a01d3 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 30 Nov 2018 14:10:39 -0800
+Subject: mm/khugepaged: minor reorderings in collapse_shmem()
+
+commit 042a30824871fa3149b0127009074b75cc25863c upstream.
+
+Several cleanups in collapse_shmem(): most of which probably do not
+really matter, beyond doing things in a more familiar and reassuring
+order.  Simplify the failure gotos in the main loop, and on success
+update stats while interrupts still disabled from the last iteration.
+
+Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils
+Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Jerome Glisse <jglisse@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>	[4.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 73 ++++++++++++++++++++-----------------------------
+ 1 file changed, 30 insertions(+), 43 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 068868763b78..d0a347e6fd08 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1330,13 +1330,12 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		goto out;
+ 	}
+ 
++	__SetPageLocked(new_page);
++	__SetPageSwapBacked(new_page);
+ 	new_page->index = start;
+ 	new_page->mapping = mapping;
+-	__SetPageSwapBacked(new_page);
+-	__SetPageLocked(new_page);
+ 	BUG_ON(!page_ref_freeze(new_page, 1));
+ 
+-
+ 	/*
+ 	 * At this point the new_page is 'frozen' (page_count() is zero), locked
+ 	 * and not up-to-date. It's safe to insert it into radix tree, because
+@@ -1365,13 +1364,13 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 */
+ 		if (n && !shmem_charge(mapping->host, n)) {
+ 			result = SCAN_FAIL;
+-			break;
++			goto tree_locked;
+ 		}
+-		nr_none += n;
+ 		for (; index < min(iter.index, end); index++) {
+ 			radix_tree_insert(&mapping->i_pages, index,
+ 					new_page + (index % HPAGE_PMD_NR));
+ 		}
++		nr_none += n;
+ 
+ 		/* We are done. */
+ 		if (index >= end)
+@@ -1387,12 +1386,12 @@ static void collapse_shmem(struct mm_struct *mm,
+ 				result = SCAN_FAIL;
+ 				goto tree_unlocked;
+ 			}
+-			xa_lock_irq(&mapping->i_pages);
+ 		} else if (trylock_page(page)) {
+ 			get_page(page);
++			xa_unlock_irq(&mapping->i_pages);
+ 		} else {
+ 			result = SCAN_PAGE_LOCK;
+-			break;
++			goto tree_locked;
+ 		}
+ 
+ 		/*
+@@ -1407,11 +1406,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			result = SCAN_TRUNCATED;
+ 			goto out_unlock;
+ 		}
+-		xa_unlock_irq(&mapping->i_pages);
+ 
+ 		if (isolate_lru_page(page)) {
+ 			result = SCAN_DEL_PAGE_LRU;
+-			goto out_isolate_failed;
++			goto out_unlock;
+ 		}
+ 
+ 		if (page_mapped(page))
+@@ -1432,7 +1430,9 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		 */
+ 		if (!page_ref_freeze(page, 3)) {
+ 			result = SCAN_PAGE_COUNT;
+-			goto out_lru;
++			xa_unlock_irq(&mapping->i_pages);
++			putback_lru_page(page);
++			goto out_unlock;
+ 		}
+ 
+ 		/*
+@@ -1448,17 +1448,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		slot = radix_tree_iter_resume(slot, &iter);
+ 		index++;
+ 		continue;
+-out_lru:
+-		xa_unlock_irq(&mapping->i_pages);
+-		putback_lru_page(page);
+-out_isolate_failed:
+-		unlock_page(page);
+-		put_page(page);
+-		goto tree_unlocked;
+ out_unlock:
+ 		unlock_page(page);
+ 		put_page(page);
+-		break;
++		goto tree_unlocked;
+ 	}
+ 
+ 	/*
+@@ -1466,7 +1459,7 @@ static void collapse_shmem(struct mm_struct *mm,
+ 	 * This code only triggers if there's nothing in radix tree
+ 	 * beyond 'end'.
+ 	 */
+-	if (result == SCAN_SUCCEED && index < end) {
++	if (index < end) {
+ 		int n = end - index;
+ 
+ 		/* Stop if extent has been truncated, and is now empty */
+@@ -1478,7 +1471,6 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			result = SCAN_FAIL;
+ 			goto tree_locked;
+ 		}
+-
+ 		for (; index < end; index++) {
+ 			radix_tree_insert(&mapping->i_pages, index,
+ 					new_page + (index % HPAGE_PMD_NR));
+@@ -1486,14 +1478,19 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		nr_none += n;
+ 	}
+ 
++	__inc_node_page_state(new_page, NR_SHMEM_THPS);
++	if (nr_none) {
++		struct zone *zone = page_zone(new_page);
++
++		__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
++		__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
++	}
++
+ tree_locked:
+ 	xa_unlock_irq(&mapping->i_pages);
+ tree_unlocked:
+ 
+ 	if (result == SCAN_SUCCEED) {
+-		unsigned long flags;
+-		struct zone *zone = page_zone(new_page);
+-
+ 		/*
+ 		 * Replacing old pages with new one has succeed, now we need to
+ 		 * copy the content and free old pages.
+@@ -1507,11 +1504,11 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
+ 					page);
+ 			list_del(&page->lru);
+-			unlock_page(page);
+-			page_ref_unfreeze(page, 1);
+ 			page->mapping = NULL;
++			page_ref_unfreeze(page, 1);
+ 			ClearPageActive(page);
+ 			ClearPageUnevictable(page);
++			unlock_page(page);
+ 			put_page(page);
+ 			index++;
+ 		}
+@@ -1520,28 +1517,17 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			index++;
+ 		}
+ 
+-		local_irq_save(flags);
+-		__inc_node_page_state(new_page, NR_SHMEM_THPS);
+-		if (nr_none) {
+-			__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+-			__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+-		}
+-		local_irq_restore(flags);
+-
+-		/*
+-		 * Remove pte page tables, so we can re-faulti
+-		 * the page as huge.
+-		 */
+-		retract_page_tables(mapping, start);
+-
+ 		/* Everything is ready, let's unfreeze the new_page */
+-		set_page_dirty(new_page);
+ 		SetPageUptodate(new_page);
+ 		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
++		set_page_dirty(new_page);
+ 		mem_cgroup_commit_charge(new_page, memcg, false, true);
+ 		lru_cache_add_anon(new_page);
+-		unlock_page(new_page);
+ 
++		/*
++		 * Remove pte page tables, so we can re-fault the page as huge.
++		 */
++		retract_page_tables(mapping, start);
+ 		*hpage = NULL;
+ 
+ 		khugepaged_pages_collapsed++;
+@@ -1573,8 +1559,8 @@ static void collapse_shmem(struct mm_struct *mm,
+ 			radix_tree_replace_slot(&mapping->i_pages, slot, page);
+ 			slot = radix_tree_iter_resume(slot, &iter);
+ 			xa_unlock_irq(&mapping->i_pages);
+-			putback_lru_page(page);
+ 			unlock_page(page);
++			putback_lru_page(page);
+ 			xa_lock_irq(&mapping->i_pages);
+ 		}
+ 		VM_BUG_ON(nr_none);
+@@ -1583,9 +1569,10 @@ static void collapse_shmem(struct mm_struct *mm,
+ 		/* Unfreeze new_page, caller would take care about freeing it */
+ 		page_ref_unfreeze(new_page, 1);
+ 		mem_cgroup_cancel_charge(new_page, memcg, true);
+-		unlock_page(new_page);
+ 		new_page->mapping = NULL;
+ 	}
++
++	unlock_page(new_page);
+ out:
+ 	VM_BUG_ON(!list_empty(&pagelist));
+ 	/* TODO: tracepoints */
+-- 
+2.17.1
+
diff --git a/queue-4.19/series b/queue-4.19/series
new file mode 100644
index 00000000000..181684e82e8
--- /dev/null
+++ b/queue-4.19/series
@@ -0,0 +1,9 @@
+mm-huge_memory-rename-freeze_page-to-unmap_page.patch
+mm-huge_memory-splitting-set-mapping-index-before-un.patch
+mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_siz.patch
+mm-khugepaged-collapse_shmem-stop-if-punched-or-trun.patch
+mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
+mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
+mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
+mm-khugepaged-collapse_shmem-without-freezing-new_pa.patch
+mm-khugepaged-collapse_shmem-do-not-crash-on-compoun.patch