--- /dev/null
+From 0a352554da69b02f75ca3389c885c741f1f63235 Mon Sep 17 00:00:00 2001
+From: Nicolas Boichat <drinkcat@chromium.org>
+Date: Thu, 28 Mar 2019 20:43:46 -0700
+Subject: iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging
+
+From: Nicolas Boichat <drinkcat@chromium.org>
+
+commit 0a352554da69b02f75ca3389c885c741f1f63235 upstream.
+
+IOMMUs using ARMv7 short-descriptor format require page tables (level 1
+and 2) to be allocated within the first 4GB of RAM, even on 64-bit
+systems.
+
+For level 1/2 pages, ensure GFP_DMA32 is used if CONFIG_ZONE_DMA32 is
+defined (e.g. on arm64 platforms).
+
+For level 2 pages, allocate a slab cache in SLAB_CACHE_DMA32. Note that
+we do not explicitly pass GFP_DMA[32] to kmem_cache_zalloc, as this is
+not strictly necessary, and would cause a warning in mm/sl*b.c, as we
+did not update GFP_SLAB_BUG_MASK.
+
+Also, print an error when the physical address does not fit in
+32-bit, to make debugging easier in the future.
+
+Link: http://lkml.kernel.org/r/20181210011504.122604-3-drinkcat@chromium.org
+Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
+Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Hsin-Yi Wang <hsinyi@chromium.org>
+Cc: Huaisheng Ye <yehs1@lenovo.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Matthias Brugger <matthias.bgg@gmail.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: Sasha Levin <Alexander.Levin@microsoft.com>
+Cc: Tomasz Figa <tfiga@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
+Cc: Yong Wu <yong.wu@mediatek.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/drivers/iommu/io-pgtable-arm-v7s.c
++++ b/drivers/iommu/io-pgtable-arm-v7s.c
+@@ -161,6 +161,14 @@
+
+ #define ARM_V7S_TCR_PD1 BIT(5)
+
++#ifdef CONFIG_ZONE_DMA32
++#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
++#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
++#else
++#define ARM_V7S_TABLE_GFP_DMA GFP_DMA
++#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
++#endif
++
+ typedef u32 arm_v7s_iopte;
+
+ static bool selftest_running;
+@@ -198,13 +206,16 @@ static void *__arm_v7s_alloc_table(int l
+ void *table = NULL;
+
+ if (lvl == 1)
+- table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
++ table = (void *)__get_free_pages(
++ __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
+ else if (lvl == 2)
+- table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
++ table = kmem_cache_zalloc(data->l2_tables, gfp);
+ phys = virt_to_phys(table);
+- if (phys != (arm_v7s_iopte)phys)
++ if (phys != (arm_v7s_iopte)phys) {
+ /* Doesn't fit in PTE */
++ dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
+ goto out_free;
++ }
+ if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
+ dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma))
+@@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_
+ data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
+ ARM_V7S_TABLE_SIZE(2),
+ ARM_V7S_TABLE_SIZE(2),
+- SLAB_CACHE_DMA, NULL);
++ ARM_V7S_TABLE_SLAB_FLAGS, NULL);
+ if (!data->l2_tables)
+ goto out_free_data;
+
--- /dev/null
+From 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a Mon Sep 17 00:00:00 2001
+From: Nicolas Boichat <drinkcat@chromium.org>
+Date: Thu, 28 Mar 2019 20:43:42 -0700
+Subject: mm: add support for kmem caches in DMA32 zone
+
+From: Nicolas Boichat <drinkcat@chromium.org>
+
+commit 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a upstream.
+
+Patch series "iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables",
+v6.
+
+This is a followup to the discussion in [1], [2].
+
+IOMMUs using ARMv7 short-descriptor format require page tables (level 1
+and 2) to be allocated within the first 4GB of RAM, even on 64-bit
+systems.
+
+For L1 tables that are bigger than a page, we can just use
+__get_free_pages with GFP_DMA32 (on arm64 systems only, arm would still
+use GFP_DMA).
+
+For L2 tables that only take 1KB, it would be a waste to allocate a full
+page, so we considered 3 approaches:
+ 1. This series, adding support for GFP_DMA32 slab caches.
+ 2. genalloc, which requires pre-allocating the maximum number of L2 page
+ tables (4096, so 4MB of memory).
+ 3. page_frag, which is not very memory-efficient as it is unable to reuse
+ freed fragments until the whole page is freed. [3]
+
+This series is the most memory-efficient approach.
+
+stable@ note:
+ We confirmed that this is a regression, and IOMMU errors happen on 4.19
+ and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue
+ most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA
+ with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek
+ platforms (and maybe others?).
+
+[1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html
+[2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html
+[3] https://patchwork.codeaurora.org/patch/671639/
+
+This patch (of 3):
+
+IOMMUs using ARMv7 short-descriptor format require page tables to be
+allocated within the first 4GB of RAM, even on 64-bit systems. On arm64,
+this is done by passing GFP_DMA32 flag to memory allocation functions.
+
+For IOMMU L2 tables that only take 1KB, it would be a waste to allocate
+a full page using get_free_pages, so we considered 3 approaches:
+ 1. This patch, adding support for GFP_DMA32 slab caches.
+ 2. genalloc, which requires pre-allocating the maximum number of L2
+ page tables (4096, so 4MB of memory).
+ 3. page_frag, which is not very memory-efficient as it is unable
+ to reuse freed fragments until the whole page is freed.
+
+This change makes it possible to create a custom cache in DMA32 zone using
+kmem_cache_create, then allocate memory using kmem_cache_alloc.
+
+We do not create a DMA32 kmalloc cache array, as there are currently no
+users of kmalloc(..., GFP_DMA32). These calls will continue to trigger a
+warning, as we keep GFP_DMA32 in GFP_SLAB_BUG_MASK.
+
+This implies that calls to kmem_cache_*alloc on a SLAB_CACHE_DMA32
+kmem_cache must _not_ use GFP_DMA32 (it is anyway redundant and
+unnecessary).
+
+Link: http://lkml.kernel.org/r/20181210011504.122604-2-drinkcat@chromium.org
+Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Sasha Levin <Alexander.Levin@microsoft.com>
+Cc: Huaisheng Ye <yehs1@lenovo.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Yong Wu <yong.wu@mediatek.com>
+Cc: Matthias Brugger <matthias.bgg@gmail.com>
+Cc: Tomasz Figa <tfiga@google.com>
+Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Hsin-Yi Wang <hsinyi@chromium.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/slab.h | 2 ++
+ mm/slab.c | 2 ++
+ mm/slab.h | 3 ++-
+ mm/slab_common.c | 2 +-
+ mm/slub.c | 5 +++++
+ 5 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/include/linux/slab.h
++++ b/include/linux/slab.h
+@@ -32,6 +32,8 @@
+ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
+ /* Use GFP_DMA memory */
+ #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U)
++/* Use GFP_DMA32 memory */
++#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U)
+ /* DEBUG: Store the last owner for bug hunting */
+ #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U)
+ /* Panic if kmem_cache_create() fails */
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -2111,6 +2111,8 @@ done:
+ cachep->allocflags = __GFP_COMP;
+ if (flags & SLAB_CACHE_DMA)
+ cachep->allocflags |= GFP_DMA;
++ if (flags & SLAB_CACHE_DMA32)
++ cachep->allocflags |= GFP_DMA32;
+ if (flags & SLAB_RECLAIM_ACCOUNT)
+ cachep->allocflags |= __GFP_RECLAIMABLE;
+ cachep->size = size;
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_fl
+
+
+ /* Legal flag mask for kmem_cache_create(), for various configurations */
+-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
++#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
++ SLAB_CACHE_DMA32 | SLAB_PANIC | \
+ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
+
+ #if defined(CONFIG_DEBUG_SLAB)
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_d
+ SLAB_FAILSLAB | SLAB_KASAN)
+
+ #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
+- SLAB_ACCOUNT)
++ SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
+
+ /*
+ * Merge control. If this is set then no merging of slab caches will occur.
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3591,6 +3591,9 @@ static int calculate_sizes(struct kmem_c
+ if (s->flags & SLAB_CACHE_DMA)
+ s->allocflags |= GFP_DMA;
+
++ if (s->flags & SLAB_CACHE_DMA32)
++ s->allocflags |= GFP_DMA32;
++
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ s->allocflags |= __GFP_RECLAIMABLE;
+
+@@ -5681,6 +5684,8 @@ static char *create_unique_id(struct kme
+ */
+ if (s->flags & SLAB_CACHE_DMA)
+ *p++ = 'd';
++ if (s->flags & SLAB_CACHE_DMA32)
++ *p++ = 'D';
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ *p++ = 'a';
+ if (s->flags & SLAB_CONSISTENCY_CHECKS)
--- /dev/null
+From 5ae2efb1dea9f537453e841714e3ee2757595aec Mon Sep 17 00:00:00 2001
+From: Oscar Salvador <osalvador@suse.de>
+Date: Thu, 28 Mar 2019 20:44:01 -0700
+Subject: mm/debug.c: fix __dump_page when mapping->host is not set
+
+From: Oscar Salvador <osalvador@suse.de>
+
+commit 5ae2efb1dea9f537453e841714e3ee2757595aec upstream.
+
+While debugging something, I added a dump_page() into do_swap_page(),
+and I got the splat from below. The issue happens when dereferencing
+mapping->host in __dump_page():
+
+ ...
+ else if (mapping) {
+ pr_warn("%ps ", mapping->a_ops);
+ if (mapping->host->i_dentry.first) {
+ struct dentry *dentry;
+ dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
+ pr_warn("name:\"%pd\" ", dentry);
+ }
+ }
+ ...
+
+Swap address space does not contain an inode information, and so
+mapping->host equals NULL.
+
+Although the dump_page() call was added artificially into
+do_swap_page(), I am not sure if we can hit this from any other path, so
+it looks worth fixing it. We can easily do that by checking
+mapping->host first.
+
+Link: http://lkml.kernel.org/r/20190318072931.29094-1-osalvador@suse.de
+Fixes: 1c6fb1d89e73c ("mm: print more information about mapping in __dump_page")
+Signed-off-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/debug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -79,7 +79,7 @@ void __dump_page(struct page *page, cons
+ pr_warn("ksm ");
+ else if (mapping) {
+ pr_warn("%ps ", mapping->a_ops);
+- if (mapping->host->i_dentry.first) {
++ if (mapping->host && mapping->host->i_dentry.first) {
+ struct dentry *dentry;
+ dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
+ pr_warn("name:\"%pd\" ", dentry);
--- /dev/null
+From 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Thu, 28 Mar 2019 20:43:34 -0700
+Subject: mm/hotplug: fix offline undo_isolate_page_range()
+
+From: Qian Cai <cai@lca.pw>
+
+commit 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab upstream.
+
+Commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded
+memory to zones until online") introduced move_pfn_range_to_zone() which
+calls memmap_init_zone() during onlining a memory block.
+memmap_init_zone() will reset pagetype flags and makes migrate type to
+be MOVABLE.
+
+However, in __offline_pages(), it also call undo_isolate_page_range()
+after offline_isolated_pages() to do the same thing. Due to commit
+2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") changed
+__first_valid_page() to skip offline pages, undo_isolate_page_range()
+here just waste CPU cycles looping around the offlining PFN range while
+doing nothing, because __first_valid_page() will return NULL as
+offline_isolated_pages() has already marked all memory sections within
+the pfn range as offline via offline_mem_sections().
+
+Also, after calling the "useless" undo_isolate_page_range() here, it
+reaches the point of no returning by notifying MEM_OFFLINE. Those pages
+will be marked as MIGRATE_MOVABLE again once onlining. The only thing
+left to do is to decrease the number of isolated pageblocks zone counter
+which would make some paths of the page allocation slower that the above
+commit introduced.
+
+Even if alloc_contig_range() can be used to isolate 16GB-hugetlb pages
+on ppc64, an "int" should still be enough to represent the number of
+pageblocks there. Fix an incorrect comment along the way.
+
+[cai@lca.pw: v4]
+ Link: http://lkml.kernel.org/r/20190314150641.59358-1-cai@lca.pw
+Link: http://lkml.kernel.org/r/20190313143133.46200-1-cai@lca.pw
+Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages")
+Signed-off-by: Qian Cai <cai@lca.pw>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org> [4.13+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/page-isolation.h | 10 --------
+ mm/memory_hotplug.c | 17 +++++++++++---
+ mm/page_alloc.c | 2 -
+ mm/page_isolation.c | 48 +++++++++++++++++++++++++----------------
+ mm/sparse.c | 2 -
+ 5 files changed, 45 insertions(+), 34 deletions(-)
+
+--- a/include/linux/page-isolation.h
++++ b/include/linux/page-isolation.h
+@@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zo
+
+ /*
+ * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
+- * If specified range includes migrate types other than MOVABLE or CMA,
+- * this will fail with -EBUSY.
+- *
+- * For isolating all pages in the range finally, the caller have to
+- * free all pages in the range. test_page_isolated() can be used for
+- * test it.
+- *
+- * The following flags are allowed (they can be combined in a bit mask)
+- * SKIP_HWPOISON - ignore hwpoison pages
+- * REPORT_FAILURE - report details about the failure to isolate the range
+ */
+ int
+ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1560,7 +1560,7 @@ static int __ref __offline_pages(unsigne
+ {
+ unsigned long pfn, nr_pages;
+ long offlined_pages;
+- int ret, node;
++ int ret, node, nr_isolate_pageblock;
+ unsigned long flags;
+ unsigned long valid_start, valid_end;
+ struct zone *zone;
+@@ -1586,10 +1586,11 @@ static int __ref __offline_pages(unsigne
+ ret = start_isolate_page_range(start_pfn, end_pfn,
+ MIGRATE_MOVABLE,
+ SKIP_HWPOISON | REPORT_FAILURE);
+- if (ret) {
++ if (ret < 0) {
+ reason = "failure to isolate range";
+ goto failed_removal;
+ }
++ nr_isolate_pageblock = ret;
+
+ arg.start_pfn = start_pfn;
+ arg.nr_pages = nr_pages;
+@@ -1642,8 +1643,16 @@ static int __ref __offline_pages(unsigne
+ /* Ok, all of our target is isolated.
+ We cannot do rollback at this point. */
+ offline_isolated_pages(start_pfn, end_pfn);
+- /* reset pagetype flags and makes migrate type to be MOVABLE */
+- undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
++
++ /*
++ * Onlining will reset pagetype flags and makes migrate type
++ * MOVABLE, so just need to decrease the number of isolated
++ * pageblocks zone counter here.
++ */
++ spin_lock_irqsave(&zone->lock, flags);
++ zone->nr_isolate_pageblock -= nr_isolate_pageblock;
++ spin_unlock_irqrestore(&zone->lock, flags);
++
+ /* removal success */
+ adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
+ zone->present_pages -= offlined_pages;
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -8160,7 +8160,7 @@ int alloc_contig_range(unsigned long sta
+
+ ret = start_isolate_page_range(pfn_max_align_down(start),
+ pfn_max_align_up(end), migratetype, 0);
+- if (ret)
++ if (ret < 0)
+ return ret;
+
+ /*
+--- a/mm/page_isolation.c
++++ b/mm/page_isolation.c
+@@ -160,27 +160,36 @@ __first_valid_page(unsigned long pfn, un
+ return NULL;
+ }
+
+-/*
+- * start_isolate_page_range() -- make page-allocation-type of range of pages
+- * to be MIGRATE_ISOLATE.
+- * @start_pfn: The lower PFN of the range to be isolated.
+- * @end_pfn: The upper PFN of the range to be isolated.
+- * @migratetype: migrate type to set in error recovery.
++/**
++ * start_isolate_page_range() - make page-allocation-type of range of pages to
++ * be MIGRATE_ISOLATE.
++ * @start_pfn: The lower PFN of the range to be isolated.
++ * @end_pfn: The upper PFN of the range to be isolated.
++ * start_pfn/end_pfn must be aligned to pageblock_order.
++ * @migratetype: Migrate type to set in error recovery.
++ * @flags: The following flags are allowed (they can be combined in
++ * a bit mask)
++ * SKIP_HWPOISON - ignore hwpoison pages
++ * REPORT_FAILURE - report details about the failure to
++ * isolate the range
+ *
+ * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
+ * the range will never be allocated. Any free pages and pages freed in the
+- * future will not be allocated again.
+- *
+- * start_pfn/end_pfn must be aligned to pageblock_order.
+- * Return 0 on success and -EBUSY if any part of range cannot be isolated.
++ * future will not be allocated again. If specified range includes migrate types
++ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
++ * pages in the range finally, the caller have to free all pages in the range.
++ * test_page_isolated() can be used for test it.
+ *
+ * There is no high level synchronization mechanism that prevents two threads
+- * from trying to isolate overlapping ranges. If this happens, one thread
++ * from trying to isolate overlapping ranges. If this happens, one thread
+ * will notice pageblocks in the overlapping range already set to isolate.
+ * This happens in set_migratetype_isolate, and set_migratetype_isolate
+- * returns an error. We then clean up by restoring the migration type on
+- * pageblocks we may have modified and return -EBUSY to caller. This
++ * returns an error. We then clean up by restoring the migration type on
++ * pageblocks we may have modified and return -EBUSY to caller. This
+ * prevents two threads from simultaneously working on overlapping ranges.
++ *
++ * Return: the number of isolated pageblocks on success and -EBUSY if any part
++ * of range cannot be isolated.
+ */
+ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+ unsigned migratetype, int flags)
+@@ -188,6 +197,7 @@ int start_isolate_page_range(unsigned lo
+ unsigned long pfn;
+ unsigned long undo_pfn;
+ struct page *page;
++ int nr_isolate_pageblock = 0;
+
+ BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
+ BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
+@@ -196,13 +206,15 @@ int start_isolate_page_range(unsigned lo
+ pfn < end_pfn;
+ pfn += pageblock_nr_pages) {
+ page = __first_valid_page(pfn, pageblock_nr_pages);
+- if (page &&
+- set_migratetype_isolate(page, migratetype, flags)) {
+- undo_pfn = pfn;
+- goto undo;
++ if (page) {
++ if (set_migratetype_isolate(page, migratetype, flags)) {
++ undo_pfn = pfn;
++ goto undo;
++ }
++ nr_isolate_pageblock++;
+ }
+ }
+- return 0;
++ return nr_isolate_pageblock;
+ undo:
+ for (pfn = start_pfn;
+ pfn < undo_pfn;
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -556,7 +556,7 @@ void online_mem_sections(unsigned long s
+ }
+
+ #ifdef CONFIG_MEMORY_HOTREMOVE
+-/* Mark all memory sections within the pfn range as online */
++/* Mark all memory sections within the pfn range as offline */
+ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+ {
+ unsigned long pfn;
--- /dev/null
+From c4efe484b5f0d768e23c9731082fec827723e738 Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Thu, 28 Mar 2019 20:44:16 -0700
+Subject: mm/memory_hotplug.c: fix notification in offline error path
+
+From: Qian Cai <cai@lca.pw>
+
+commit c4efe484b5f0d768e23c9731082fec827723e738 upstream.
+
+When start_isolate_page_range() returned -EBUSY in __offline_pages(), it
+calls memory_notify(MEM_CANCEL_OFFLINE, &arg) with an uninitialized
+"arg". As the result, it triggers warnings below. Also, it is only
+necessary to notify MEM_CANCEL_OFFLINE after MEM_GOING_OFFLINE.
+
+ page:ffffea0001200000 count:1 mapcount:0 mapping:0000000000000000
+ index:0x0
+ flags: 0x3fffe000001000(reserved)
+ raw: 003fffe000001000 ffffea0001200008 ffffea0001200008 0000000000000000
+ raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+ page dumped because: unmovable page
+ WARNING: CPU: 25 PID: 1665 at mm/kasan/common.c:665
+ kasan_mem_notifier+0x34/0x23b
+ CPU: 25 PID: 1665 Comm: bash Tainted: G W 5.0.0+ #94
+ Hardware name: HP ProLiant DL180 Gen9/ProLiant DL180 Gen9, BIOS U20
+ 10/25/2017
+ RIP: 0010:kasan_mem_notifier+0x34/0x23b
+ RSP: 0018:ffff8883ec737890 EFLAGS: 00010206
+ RAX: 0000000000000246 RBX: ff10f0f4435f1000 RCX: f887a7a21af88000
+ RDX: dffffc0000000000 RSI: 0000000000000020 RDI: ffff8881f221af88
+ RBP: ffff8883ec737898 R08: ffff888000000000 R09: ffffffffb0bddcd0
+ R10: ffffed103e857088 R11: ffff8881f42b8443 R12: dffffc0000000000
+ R13: 00000000fffffff9 R14: dffffc0000000000 R15: 0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000560fbd31d730 CR3: 00000004049c6003 CR4: 00000000001606a0
+ Call Trace:
+ notifier_call_chain+0xbf/0x130
+ __blocking_notifier_call_chain+0x76/0xc0
+ blocking_notifier_call_chain+0x16/0x20
+ memory_notify+0x1b/0x20
+ __offline_pages+0x3e2/0x1210
+ offline_pages+0x11/0x20
+ memory_block_action+0x144/0x300
+ memory_subsys_offline+0xe5/0x170
+ device_offline+0x13f/0x1e0
+ state_store+0xeb/0x110
+ dev_attr_store+0x3f/0x70
+ sysfs_kf_write+0x104/0x150
+ kernfs_fop_write+0x25c/0x410
+ __vfs_write+0x66/0x120
+ vfs_write+0x15a/0x4f0
+ ksys_write+0xd2/0x1b0
+ __x64_sys_write+0x73/0xb0
+ do_syscall_64+0xeb/0xb78
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x7f14f75cc3b8
+ RSP: 002b:00007ffe84d01d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+ RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f14f75cc3b8
+ RDX: 0000000000000008 RSI: 0000563f8e433d70 RDI: 0000000000000001
+ RBP: 0000563f8e433d70 R08: 000000000000000a R09: 00007ffe84d018f0
+ R10: 000000000000000a R11: 0000000000000246 R12: 00007f14f789e780
+ R13: 0000000000000008 R14: 00007f14f7899740 R15: 0000000000000008
+
+Link: http://lkml.kernel.org/r/20190320204255.53571-1-cai@lca.pw
+Fixes: 7960509329c2 ("mm, memory_hotplug: print reason for the offlining failure")
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Qian Cai <cai@lca.pw>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: <stable@vger.kernel.org> [5.0.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory_hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1684,12 +1684,12 @@ static int __ref __offline_pages(unsigne
+
+ failed_removal_isolated:
+ undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
++ memory_notify(MEM_CANCEL_OFFLINE, &arg);
+ failed_removal:
+ pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
+ (unsigned long long) start_pfn << PAGE_SHIFT,
+ ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
+ reason);
+- memory_notify(MEM_CANCEL_OFFLINE, &arg);
+ /* pushback to free area */
+ mem_hotplug_done();
+ return ret;
--- /dev/null
+From a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 Mon Sep 17 00:00:00 2001
+From: Yang Shi <yang.shi@linux.alibaba.com>
+Date: Thu, 28 Mar 2019 20:43:55 -0700
+Subject: mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified
+
+From: Yang Shi <yang.shi@linux.alibaba.com>
+
+commit a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 upstream.
+
+When MPOL_MF_STRICT was specified and an existing page was already on a
+node that does not follow the policy, mbind() should return -EIO. But
+commit 6f4576e3687b ("mempolicy: apply page table walker on
+queue_pages_range()") broke the rule.
+
+And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support
+thp migration") didn't return the correct value for THP mbind() too.
+
+If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it
+reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an
+existing page was already on a node that does not follow the policy.
+And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
+MPOL_MF_MOVE_ALL was specified.
+
+Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c
+
+[akpm@linux-foundation.org: tweak code comment]
+Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com
+Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()")
+Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Oscar Salvador <osalvador@suse.de>
+Reported-by: Cyril Hrubis <chrubis@suse.cz>
+Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 33 insertions(+), 7 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -428,6 +428,13 @@ static inline bool queue_pages_required(
+ return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
+ }
+
++/*
++ * queue_pages_pmd() has three possible return values:
++ * 1 - pages are placed on the right node or queued successfully.
++ * 0 - THP was split.
++ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
++ * page was already on a node that does not follow the policy.
++ */
+ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+ {
+@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, s
+ unsigned long flags;
+
+ if (unlikely(is_pmd_migration_entry(*pmd))) {
+- ret = 1;
++ ret = -EIO;
+ goto unlock;
+ }
+ page = pmd_page(*pmd);
+@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, s
+ ret = 1;
+ flags = qp->flags;
+ /* go to thp migration */
+- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
++ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
++ if (!vma_migratable(walk->vma)) {
++ ret = -EIO;
++ goto unlock;
++ }
++
+ migrate_page_add(page, qp->pagelist, flags);
++ } else
++ ret = -EIO;
+ unlock:
+ spin_unlock(ptl);
+ out:
+@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
+ ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
+- if (ret)
++ if (ret > 0)
+ return 0;
++ else if (ret < 0)
++ return ret;
+ }
+
+ if (pmd_trans_unstable(pmd))
+@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *
+ continue;
+ if (!queue_pages_required(page, qp))
+ continue;
+- migrate_page_add(page, qp->pagelist, flags);
++ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
++ if (!vma_migratable(vma))
++ break;
++ migrate_page_add(page, qp->pagelist, flags);
++ } else
++ break;
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
+- return 0;
++ return addr != end ? -EIO : 0;
+ }
+
+ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigne
+ unsigned long endvma = vma->vm_end;
+ unsigned long flags = qp->flags;
+
+- if (!vma_migratable(vma))
++ /*
++ * Need check MPOL_MF_STRICT to return -EIO if possible
++ * regardless of vma_migratable
++ */
++ if (!vma_migratable(vma) &&
++ !(flags & MPOL_MF_STRICT))
+ return 1;
+
+ if (endvma > end)
+@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigne
+ }
+
+ /* queue pages from current vma */
+- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
++ if (flags & MPOL_MF_VALID)
+ return 0;
+ return 1;
+ }
--- /dev/null
+From d2b2c6dd227ba5b8a802858748ec9a780cb75b47 Mon Sep 17 00:00:00 2001
+From: Lars Persson <lars.persson@axis.com>
+Date: Thu, 28 Mar 2019 20:44:28 -0700
+Subject: mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate
+
+From: Lars Persson <lars.persson@axis.com>
+
+commit d2b2c6dd227ba5b8a802858748ec9a780cb75b47 upstream.
+
+Our MIPS 1004Kc SoCs were seeing random userspace crashes with SIGILL
+and SIGSEGV that could not be traced back to a userspace code bug. They
+had all the magic signs of an I/D cache coherency issue.
+
+Now recently we noticed that the /proc/sys/vm/compact_memory interface
+was quite efficient at provoking this class of userspace crashes.
+
+Studying the code in mm/migrate.c there is a distinction made between
+migrating a page that is mapped at the instant of migration and one that
+is not mapped. Our problem turned out to be the non-mapped pages.
+
+For the non-mapped page the code performs a copy of the page content and
+all relevant meta-data of the page without doing the required D-cache
+maintenance. This leaves dirty data in the D-cache of the CPU and on
+the 1004K cores this data is not visible to the I-cache. A subsequent
+page-fault that triggers a mapping of the page will happily serve the
+process with potentially stale code.
+
+What about ARM then, this bug should have seen greater exposure? Well
+ARM became immune to this flaw back in 2010, see commit c01778001a4f
+("ARM: 6379/1: Assume new page cache pages have dirty D-cache").
+
+My proposed fix moves the D-cache maintenance inside move_to_new_page to
+make it common for both cases.
+
+Link: http://lkml.kernel.org/r/20190315083502.11849-1-larper@axis.com
+Fixes: 97ee0524614 ("flush cache before installing new page at migraton")
+Signed-off-by: Lars Persson <larper@axis.com>
+Reviewed-by: Paul Burton <paul.burton@mips.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -248,10 +248,8 @@ static bool remove_migration_pte(struct
+ pte = swp_entry_to_pte(entry);
+ } else if (is_device_public_page(new)) {
+ pte = pte_mkdevmap(pte);
+- flush_dcache_page(new);
+ }
+- } else
+- flush_dcache_page(new);
++ }
+
+ #ifdef CONFIG_HUGETLB_PAGE
+ if (PageHuge(new)) {
+@@ -995,6 +993,13 @@ static int move_to_new_page(struct page
+ */
+ if (!PageMappingFlags(page))
+ page->mapping = NULL;
++
++ if (unlikely(is_zone_device_page(newpage))) {
++ if (is_device_public_page(newpage))
++ flush_dcache_page(newpage);
++ } else
++ flush_dcache_page(newpage);
++
+ }
+ out:
+ return rc;
--- /dev/null
+From f5777bc2d9cf0712554228b1a7927b6f13f5c1f0 Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Thu, 28 Mar 2019 20:44:21 -0700
+Subject: mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate()
+
+From: Qian Cai <cai@lca.pw>
+
+commit f5777bc2d9cf0712554228b1a7927b6f13f5c1f0 upstream.
+
+Due to has_unmovable_pages() taking an incorrect irqsave flag instead of
+the isolation flag in set_migratetype_isolate(), there are issues with
+HWPOSION and error reporting where dump_page() is not called when there
+is an unmovable page.
+
+Link: http://lkml.kernel.org/r/20190320204941.53731-1-cai@lca.pw
+Fixes: d381c54760dc ("mm: only report isolation failures when offlining memory")
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Signed-off-by: Qian Cai <cai@lca.pw>
+Cc: <stable@vger.kernel.org> [5.0.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_isolation.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/page_isolation.c
++++ b/mm/page_isolation.c
+@@ -59,7 +59,8 @@ static int set_migratetype_isolate(struc
+ * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
+ * We just check MOVABLE pages.
+ */
+- if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags))
++ if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype,
++ isol_flags))
+ ret = 0;
+
+ /*
usb-cdc-acm-fix-race-during-wakeup-blocking-tx-traffic.patch
usb-typec-tcpm-try-pd-2.0-if-sink-does-not-respond-to-3.0-source-caps.patch
usb-typec-fix-unchecked-return-value.patch
+mm-hotplug-fix-offline-undo_isolate_page_range.patch
+mm-add-support-for-kmem-caches-in-dma32-zone.patch
+iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch
+mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch
+mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch
+mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch
+mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch
+mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch