From: Greg Kroah-Hartman Date: Sun, 31 Mar 2019 15:01:32 +0000 (+0200) Subject: 5.0-stable patches X-Git-Tag: v3.18.138~23 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2939670ceb69e13152ad461b3e02f295e283720d;p=thirdparty%2Fkernel%2Fstable-queue.git 5.0-stable patches added patches: iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch mm-add-support-for-kmem-caches-in-dma32-zone.patch mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch mm-hotplug-fix-offline-undo_isolate_page_range.patch mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch --- diff --git a/queue-5.0/iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch b/queue-5.0/iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch new file mode 100644 index 00000000000..dd02cbd6535 --- /dev/null +++ b/queue-5.0/iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch @@ -0,0 +1,102 @@ +From 0a352554da69b02f75ca3389c885c741f1f63235 Mon Sep 17 00:00:00 2001 +From: Nicolas Boichat +Date: Thu, 28 Mar 2019 20:43:46 -0700 +Subject: iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging + +From: Nicolas Boichat + +commit 0a352554da69b02f75ca3389c885c741f1f63235 upstream. + +IOMMUs using ARMv7 short-descriptor format require page tables (level 1 +and 2) to be allocated within the first 4GB of RAM, even on 64-bit +systems. + +For level 1/2 pages, ensure GFP_DMA32 is used if CONFIG_ZONE_DMA32 is +defined (e.g. on arm64 platforms). + +For level 2 pages, allocate a slab cache in SLAB_CACHE_DMA32. Note that +we do not explicitly pass GFP_DMA[32] to kmem_cache_zalloc, as this is +not strictly necessary, and would cause a warning in mm/sl*b.c, as we +did not update GFP_SLAB_BUG_MASK. + +Also, print an error when the physical address does not fit in +32-bit, to make debugging easier in the future. + +Link: http://lkml.kernel.org/r/20181210011504.122604-3-drinkcat@chromium.org +Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32") +Signed-off-by: Nicolas Boichat +Acked-by: Will Deacon +Cc: Christoph Hellwig +Cc: Christoph Lameter +Cc: David Rientjes +Cc: Hsin-Yi Wang +Cc: Huaisheng Ye +Cc: Joerg Roedel +Cc: Joonsoo Kim +Cc: Matthew Wilcox +Cc: Matthias Brugger +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Pekka Enberg +Cc: Robin Murphy +Cc: Sasha Levin +Cc: Tomasz Figa +Cc: Vlastimil Babka +Cc: Yingjoe Chen +Cc: Yong Wu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/drivers/iommu/io-pgtable-arm-v7s.c ++++ b/drivers/iommu/io-pgtable-arm-v7s.c +@@ -161,6 +161,14 @@ + + #define ARM_V7S_TCR_PD1 BIT(5) + ++#ifdef CONFIG_ZONE_DMA32 ++#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 ++#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 ++#else ++#define ARM_V7S_TABLE_GFP_DMA GFP_DMA ++#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA ++#endif ++ + typedef u32 arm_v7s_iopte; + + static bool selftest_running; +@@ -198,13 +206,16 @@ static void *__arm_v7s_alloc_table(int l + void *table = NULL; + + if (lvl == 1) +- table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); ++ table = (void *)__get_free_pages( ++ __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); + else if (lvl == 2) +- table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); ++ table = kmem_cache_zalloc(data->l2_tables, gfp); + phys = virt_to_phys(table); +- if (phys != (arm_v7s_iopte)phys) ++ if (phys != (arm_v7s_iopte)phys) { + /* Doesn't fit in PTE */ ++ dev_err(dev, "Page table does not fit in PTE: %pa", &phys); + goto out_free; ++ } + if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) +@@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_ + data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", + ARM_V7S_TABLE_SIZE(2), + ARM_V7S_TABLE_SIZE(2), +- SLAB_CACHE_DMA, NULL); ++ ARM_V7S_TABLE_SLAB_FLAGS, NULL); + if (!data->l2_tables) + goto out_free_data; + diff --git a/queue-5.0/mm-add-support-for-kmem-caches-in-dma32-zone.patch b/queue-5.0/mm-add-support-for-kmem-caches-in-dma32-zone.patch new file mode 100644 index 00000000000..a61ed736084 --- /dev/null +++ b/queue-5.0/mm-add-support-for-kmem-caches-in-dma32-zone.patch @@ -0,0 +1,169 @@ +From 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a Mon Sep 17 00:00:00 2001 +From: Nicolas Boichat +Date: Thu, 28 Mar 2019 20:43:42 -0700 +Subject: mm: add support for kmem caches in DMA32 zone + +From: Nicolas Boichat + +commit 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a upstream. + +Patch series "iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables", +v6. + +This is a followup to the discussion in [1], [2]. + +IOMMUs using ARMv7 short-descriptor format require page tables (level 1 +and 2) to be allocated within the first 4GB of RAM, even on 64-bit +systems. + +For L1 tables that are bigger than a page, we can just use +__get_free_pages with GFP_DMA32 (on arm64 systems only, arm would still +use GFP_DMA). + +For L2 tables that only take 1KB, it would be a waste to allocate a full +page, so we considered 3 approaches: + 1. This series, adding support for GFP_DMA32 slab caches. + 2. genalloc, which requires pre-allocating the maximum number of L2 page + tables (4096, so 4MB of memory). + 3. page_frag, which is not very memory-efficient as it is unable to reuse + freed fragments until the whole page is freed. [3] + +This series is the most memory-efficient approach. + +stable@ note: + We confirmed that this is a regression, and IOMMU errors happen on 4.19 + and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue + most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA + with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek + platforms (and maybe others?). + +[1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html +[2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html +[3] https://patchwork.codeaurora.org/patch/671639/ + +This patch (of 3): + +IOMMUs using ARMv7 short-descriptor format require page tables to be +allocated within the first 4GB of RAM, even on 64-bit systems. On arm64, +this is done by passing GFP_DMA32 flag to memory allocation functions. + +For IOMMU L2 tables that only take 1KB, it would be a waste to allocate +a full page using get_free_pages, so we considered 3 approaches: + 1. This patch, adding support for GFP_DMA32 slab caches. + 2. genalloc, which requires pre-allocating the maximum number of L2 + page tables (4096, so 4MB of memory). + 3. page_frag, which is not very memory-efficient as it is unable + to reuse freed fragments until the whole page is freed. + +This change makes it possible to create a custom cache in DMA32 zone using +kmem_cache_create, then allocate memory using kmem_cache_alloc. + +We do not create a DMA32 kmalloc cache array, as there are currently no +users of kmalloc(..., GFP_DMA32). These calls will continue to trigger a +warning, as we keep GFP_DMA32 in GFP_SLAB_BUG_MASK. + +This implies that calls to kmem_cache_*alloc on a SLAB_CACHE_DMA32 +kmem_cache must _not_ use GFP_DMA32 (it is anyway redundant and +unnecessary). + +Link: http://lkml.kernel.org/r/20181210011504.122604-2-drinkcat@chromium.org +Signed-off-by: Nicolas Boichat +Acked-by: Vlastimil Babka +Acked-by: Will Deacon +Cc: Robin Murphy +Cc: Joerg Roedel +Cc: Christoph Lameter +Cc: Pekka Enberg +Cc: David Rientjes +Cc: Joonsoo Kim +Cc: Michal Hocko +Cc: Mel Gorman +Cc: Sasha Levin +Cc: Huaisheng Ye +Cc: Mike Rapoport +Cc: Yong Wu +Cc: Matthias Brugger +Cc: Tomasz Figa +Cc: Yingjoe Chen +Cc: Christoph Hellwig +Cc: Matthew Wilcox +Cc: Hsin-Yi Wang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/slab.h | 2 ++ + mm/slab.c | 2 ++ + mm/slab.h | 3 ++- + mm/slab_common.c | 2 +- + mm/slub.c | 5 +++++ + 5 files changed, 12 insertions(+), 2 deletions(-) + +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -32,6 +32,8 @@ + #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) + /* Use GFP_DMA memory */ + #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) ++/* Use GFP_DMA32 memory */ ++#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) + /* DEBUG: Store the last owner for bug hunting */ + #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) + /* Panic if kmem_cache_create() fails */ +--- a/mm/slab.c ++++ b/mm/slab.c +@@ -2111,6 +2111,8 @@ done: + cachep->allocflags = __GFP_COMP; + if (flags & SLAB_CACHE_DMA) + cachep->allocflags |= GFP_DMA; ++ if (flags & SLAB_CACHE_DMA32) ++ cachep->allocflags |= GFP_DMA32; + if (flags & SLAB_RECLAIM_ACCOUNT) + cachep->allocflags |= __GFP_RECLAIMABLE; + cachep->size = size; +--- a/mm/slab.h ++++ b/mm/slab.h +@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_fl + + + /* Legal flag mask for kmem_cache_create(), for various configurations */ +-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ ++#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ ++ SLAB_CACHE_DMA32 | SLAB_PANIC | \ + SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) + + #if defined(CONFIG_DEBUG_SLAB) +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_d + SLAB_FAILSLAB | SLAB_KASAN) + + #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ +- SLAB_ACCOUNT) ++ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) + + /* + * Merge control. If this is set then no merging of slab caches will occur. +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -3591,6 +3591,9 @@ static int calculate_sizes(struct kmem_c + if (s->flags & SLAB_CACHE_DMA) + s->allocflags |= GFP_DMA; + ++ if (s->flags & SLAB_CACHE_DMA32) ++ s->allocflags |= GFP_DMA32; ++ + if (s->flags & SLAB_RECLAIM_ACCOUNT) + s->allocflags |= __GFP_RECLAIMABLE; + +@@ -5681,6 +5684,8 @@ static char *create_unique_id(struct kme + */ + if (s->flags & SLAB_CACHE_DMA) + *p++ = 'd'; ++ if (s->flags & SLAB_CACHE_DMA32) ++ *p++ = 'D'; + if (s->flags & SLAB_RECLAIM_ACCOUNT) + *p++ = 'a'; + if (s->flags & SLAB_CONSISTENCY_CHECKS) diff --git a/queue-5.0/mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch b/queue-5.0/mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch new file mode 100644 index 00000000000..e79d9a5a58c --- /dev/null +++ b/queue-5.0/mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch @@ -0,0 +1,57 @@ +From 5ae2efb1dea9f537453e841714e3ee2757595aec Mon Sep 17 00:00:00 2001 +From: Oscar Salvador +Date: Thu, 28 Mar 2019 20:44:01 -0700 +Subject: mm/debug.c: fix __dump_page when mapping->host is not set + +From: Oscar Salvador + +commit 5ae2efb1dea9f537453e841714e3ee2757595aec upstream. + +While debugging something, I added a dump_page() into do_swap_page(), +and I got the splat from below. The issue happens when dereferencing +mapping->host in __dump_page(): + + ... + else if (mapping) { + pr_warn("%ps ", mapping->a_ops); + if (mapping->host->i_dentry.first) { + struct dentry *dentry; + dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); + pr_warn("name:\"%pd\" ", dentry); + } + } + ... + +Swap address space does not contain an inode information, and so +mapping->host equals NULL. + +Although the dump_page() call was added artificially into +do_swap_page(), I am not sure if we can hit this from any other path, so +it looks worth fixing it. We can easily do that by checking +mapping->host first. + +Link: http://lkml.kernel.org/r/20190318072931.29094-1-osalvador@suse.de +Fixes: 1c6fb1d89e73c ("mm: print more information about mapping in __dump_page") +Signed-off-by: Oscar Salvador +Acked-by: Michal Hocko +Acked-by: Hugh Dickins +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/debug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/debug.c ++++ b/mm/debug.c +@@ -79,7 +79,7 @@ void __dump_page(struct page *page, cons + pr_warn("ksm "); + else if (mapping) { + pr_warn("%ps ", mapping->a_ops); +- if (mapping->host->i_dentry.first) { ++ if (mapping->host && mapping->host->i_dentry.first) { + struct dentry *dentry; + dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); + pr_warn("name:\"%pd\" ", dentry); diff --git a/queue-5.0/mm-hotplug-fix-offline-undo_isolate_page_range.patch b/queue-5.0/mm-hotplug-fix-offline-undo_isolate_page_range.patch new file mode 100644 index 00000000000..14ea342280c --- /dev/null +++ b/queue-5.0/mm-hotplug-fix-offline-undo_isolate_page_range.patch @@ -0,0 +1,221 @@ +From 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab Mon Sep 17 00:00:00 2001 +From: Qian Cai +Date: Thu, 28 Mar 2019 20:43:34 -0700 +Subject: mm/hotplug: fix offline undo_isolate_page_range() + +From: Qian Cai + +commit 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab upstream. + +Commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded +memory to zones until online") introduced move_pfn_range_to_zone() which +calls memmap_init_zone() during onlining a memory block. +memmap_init_zone() will reset pagetype flags and makes migrate type to +be MOVABLE. + +However, in __offline_pages(), it also call undo_isolate_page_range() +after offline_isolated_pages() to do the same thing. Due to commit +2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") changed +__first_valid_page() to skip offline pages, undo_isolate_page_range() +here just waste CPU cycles looping around the offlining PFN range while +doing nothing, because __first_valid_page() will return NULL as +offline_isolated_pages() has already marked all memory sections within +the pfn range as offline via offline_mem_sections(). + +Also, after calling the "useless" undo_isolate_page_range() here, it +reaches the point of no returning by notifying MEM_OFFLINE. Those pages +will be marked as MIGRATE_MOVABLE again once onlining. The only thing +left to do is to decrease the number of isolated pageblocks zone counter +which would make some paths of the page allocation slower that the above +commit introduced. + +Even if alloc_contig_range() can be used to isolate 16GB-hugetlb pages +on ppc64, an "int" should still be enough to represent the number of +pageblocks there. Fix an incorrect comment along the way. + +[cai@lca.pw: v4] + Link: http://lkml.kernel.org/r/20190314150641.59358-1-cai@lca.pw +Link: http://lkml.kernel.org/r/20190313143133.46200-1-cai@lca.pw +Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") +Signed-off-by: Qian Cai +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Cc: Vlastimil Babka +Cc: [4.13+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/page-isolation.h | 10 -------- + mm/memory_hotplug.c | 17 +++++++++++--- + mm/page_alloc.c | 2 - + mm/page_isolation.c | 48 +++++++++++++++++++++++++---------------- + mm/sparse.c | 2 - + 5 files changed, 45 insertions(+), 34 deletions(-) + +--- a/include/linux/page-isolation.h ++++ b/include/linux/page-isolation.h +@@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zo + + /* + * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. +- * If specified range includes migrate types other than MOVABLE or CMA, +- * this will fail with -EBUSY. +- * +- * For isolating all pages in the range finally, the caller have to +- * free all pages in the range. test_page_isolated() can be used for +- * test it. +- * +- * The following flags are allowed (they can be combined in a bit mask) +- * SKIP_HWPOISON - ignore hwpoison pages +- * REPORT_FAILURE - report details about the failure to isolate the range + */ + int + start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1560,7 +1560,7 @@ static int __ref __offline_pages(unsigne + { + unsigned long pfn, nr_pages; + long offlined_pages; +- int ret, node; ++ int ret, node, nr_isolate_pageblock; + unsigned long flags; + unsigned long valid_start, valid_end; + struct zone *zone; +@@ -1586,10 +1586,11 @@ static int __ref __offline_pages(unsigne + ret = start_isolate_page_range(start_pfn, end_pfn, + MIGRATE_MOVABLE, + SKIP_HWPOISON | REPORT_FAILURE); +- if (ret) { ++ if (ret < 0) { + reason = "failure to isolate range"; + goto failed_removal; + } ++ nr_isolate_pageblock = ret; + + arg.start_pfn = start_pfn; + arg.nr_pages = nr_pages; +@@ -1642,8 +1643,16 @@ static int __ref __offline_pages(unsigne + /* Ok, all of our target is isolated. + We cannot do rollback at this point. */ + offline_isolated_pages(start_pfn, end_pfn); +- /* reset pagetype flags and makes migrate type to be MOVABLE */ +- undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); ++ ++ /* ++ * Onlining will reset pagetype flags and makes migrate type ++ * MOVABLE, so just need to decrease the number of isolated ++ * pageblocks zone counter here. ++ */ ++ spin_lock_irqsave(&zone->lock, flags); ++ zone->nr_isolate_pageblock -= nr_isolate_pageblock; ++ spin_unlock_irqrestore(&zone->lock, flags); ++ + /* removal success */ + adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); + zone->present_pages -= offlined_pages; +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -8160,7 +8160,7 @@ int alloc_contig_range(unsigned long sta + + ret = start_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end), migratetype, 0); +- if (ret) ++ if (ret < 0) + return ret; + + /* +--- a/mm/page_isolation.c ++++ b/mm/page_isolation.c +@@ -160,27 +160,36 @@ __first_valid_page(unsigned long pfn, un + return NULL; + } + +-/* +- * start_isolate_page_range() -- make page-allocation-type of range of pages +- * to be MIGRATE_ISOLATE. +- * @start_pfn: The lower PFN of the range to be isolated. +- * @end_pfn: The upper PFN of the range to be isolated. +- * @migratetype: migrate type to set in error recovery. ++/** ++ * start_isolate_page_range() - make page-allocation-type of range of pages to ++ * be MIGRATE_ISOLATE. ++ * @start_pfn: The lower PFN of the range to be isolated. ++ * @end_pfn: The upper PFN of the range to be isolated. ++ * start_pfn/end_pfn must be aligned to pageblock_order. ++ * @migratetype: Migrate type to set in error recovery. ++ * @flags: The following flags are allowed (they can be combined in ++ * a bit mask) ++ * SKIP_HWPOISON - ignore hwpoison pages ++ * REPORT_FAILURE - report details about the failure to ++ * isolate the range + * + * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in + * the range will never be allocated. Any free pages and pages freed in the +- * future will not be allocated again. +- * +- * start_pfn/end_pfn must be aligned to pageblock_order. +- * Return 0 on success and -EBUSY if any part of range cannot be isolated. ++ * future will not be allocated again. If specified range includes migrate types ++ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all ++ * pages in the range finally, the caller have to free all pages in the range. ++ * test_page_isolated() can be used for test it. + * + * There is no high level synchronization mechanism that prevents two threads +- * from trying to isolate overlapping ranges. If this happens, one thread ++ * from trying to isolate overlapping ranges. If this happens, one thread + * will notice pageblocks in the overlapping range already set to isolate. + * This happens in set_migratetype_isolate, and set_migratetype_isolate +- * returns an error. We then clean up by restoring the migration type on +- * pageblocks we may have modified and return -EBUSY to caller. This ++ * returns an error. We then clean up by restoring the migration type on ++ * pageblocks we may have modified and return -EBUSY to caller. This + * prevents two threads from simultaneously working on overlapping ranges. ++ * ++ * Return: the number of isolated pageblocks on success and -EBUSY if any part ++ * of range cannot be isolated. + */ + int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype, int flags) +@@ -188,6 +197,7 @@ int start_isolate_page_range(unsigned lo + unsigned long pfn; + unsigned long undo_pfn; + struct page *page; ++ int nr_isolate_pageblock = 0; + + BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); + BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); +@@ -196,13 +206,15 @@ int start_isolate_page_range(unsigned lo + pfn < end_pfn; + pfn += pageblock_nr_pages) { + page = __first_valid_page(pfn, pageblock_nr_pages); +- if (page && +- set_migratetype_isolate(page, migratetype, flags)) { +- undo_pfn = pfn; +- goto undo; ++ if (page) { ++ if (set_migratetype_isolate(page, migratetype, flags)) { ++ undo_pfn = pfn; ++ goto undo; ++ } ++ nr_isolate_pageblock++; + } + } +- return 0; ++ return nr_isolate_pageblock; + undo: + for (pfn = start_pfn; + pfn < undo_pfn; +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -556,7 +556,7 @@ void online_mem_sections(unsigned long s + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-/* Mark all memory sections within the pfn range as online */ ++/* Mark all memory sections within the pfn range as offline */ + void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) + { + unsigned long pfn; diff --git a/queue-5.0/mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch b/queue-5.0/mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch new file mode 100644 index 00000000000..398ee661676 --- /dev/null +++ b/queue-5.0/mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch @@ -0,0 +1,93 @@ +From c4efe484b5f0d768e23c9731082fec827723e738 Mon Sep 17 00:00:00 2001 +From: Qian Cai +Date: Thu, 28 Mar 2019 20:44:16 -0700 +Subject: mm/memory_hotplug.c: fix notification in offline error path + +From: Qian Cai + +commit c4efe484b5f0d768e23c9731082fec827723e738 upstream. + +When start_isolate_page_range() returned -EBUSY in __offline_pages(), it +calls memory_notify(MEM_CANCEL_OFFLINE, &arg) with an uninitialized +"arg". As the result, it triggers warnings below. Also, it is only +necessary to notify MEM_CANCEL_OFFLINE after MEM_GOING_OFFLINE. + + page:ffffea0001200000 count:1 mapcount:0 mapping:0000000000000000 + index:0x0 + flags: 0x3fffe000001000(reserved) + raw: 003fffe000001000 ffffea0001200008 ffffea0001200008 0000000000000000 + raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 + page dumped because: unmovable page + WARNING: CPU: 25 PID: 1665 at mm/kasan/common.c:665 + kasan_mem_notifier+0x34/0x23b + CPU: 25 PID: 1665 Comm: bash Tainted: G W 5.0.0+ #94 + Hardware name: HP ProLiant DL180 Gen9/ProLiant DL180 Gen9, BIOS U20 + 10/25/2017 + RIP: 0010:kasan_mem_notifier+0x34/0x23b + RSP: 0018:ffff8883ec737890 EFLAGS: 00010206 + RAX: 0000000000000246 RBX: ff10f0f4435f1000 RCX: f887a7a21af88000 + RDX: dffffc0000000000 RSI: 0000000000000020 RDI: ffff8881f221af88 + RBP: ffff8883ec737898 R08: ffff888000000000 R09: ffffffffb0bddcd0 + R10: ffffed103e857088 R11: ffff8881f42b8443 R12: dffffc0000000000 + R13: 00000000fffffff9 R14: dffffc0000000000 R15: 0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000560fbd31d730 CR3: 00000004049c6003 CR4: 00000000001606a0 + Call Trace: + notifier_call_chain+0xbf/0x130 + __blocking_notifier_call_chain+0x76/0xc0 + blocking_notifier_call_chain+0x16/0x20 + memory_notify+0x1b/0x20 + __offline_pages+0x3e2/0x1210 + offline_pages+0x11/0x20 + memory_block_action+0x144/0x300 + memory_subsys_offline+0xe5/0x170 + device_offline+0x13f/0x1e0 + state_store+0xeb/0x110 + dev_attr_store+0x3f/0x70 + sysfs_kf_write+0x104/0x150 + kernfs_fop_write+0x25c/0x410 + __vfs_write+0x66/0x120 + vfs_write+0x15a/0x4f0 + ksys_write+0xd2/0x1b0 + __x64_sys_write+0x73/0xb0 + do_syscall_64+0xeb/0xb78 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x7f14f75cc3b8 + RSP: 002b:00007ffe84d01d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f14f75cc3b8 + RDX: 0000000000000008 RSI: 0000563f8e433d70 RDI: 0000000000000001 + RBP: 0000563f8e433d70 R08: 000000000000000a R09: 00007ffe84d018f0 + R10: 000000000000000a R11: 0000000000000246 R12: 00007f14f789e780 + R13: 0000000000000008 R14: 00007f14f7899740 R15: 0000000000000008 + +Link: http://lkml.kernel.org/r/20190320204255.53571-1-cai@lca.pw +Fixes: 7960509329c2 ("mm, memory_hotplug: print reason for the offlining failure") +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Signed-off-by: Qian Cai +Reviewed-by: Andrew Morton +Cc: [5.0.x] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory_hotplug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1684,12 +1684,12 @@ static int __ref __offline_pages(unsigne + + failed_removal_isolated: + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); ++ memory_notify(MEM_CANCEL_OFFLINE, &arg); + failed_removal: + pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", + (unsigned long long) start_pfn << PAGE_SHIFT, + ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, + reason); +- memory_notify(MEM_CANCEL_OFFLINE, &arg); + /* pushback to free area */ + mem_hotplug_done(); + return ret; diff --git a/queue-5.0/mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch b/queue-5.0/mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch new file mode 100644 index 00000000000..402794a52ee --- /dev/null +++ b/queue-5.0/mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch @@ -0,0 +1,141 @@ +From a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Thu, 28 Mar 2019 20:43:55 -0700 +Subject: mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified + +From: Yang Shi + +commit a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 upstream. + +When MPOL_MF_STRICT was specified and an existing page was already on a +node that does not follow the policy, mbind() should return -EIO. But +commit 6f4576e3687b ("mempolicy: apply page table walker on +queue_pages_range()") broke the rule. + +And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support +thp migration") didn't return the correct value for THP mbind() too. + +If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it +reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an +existing page was already on a node that does not follow the policy. +And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or +MPOL_MF_MOVE_ALL was specified. + +Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c + +[akpm@linux-foundation.org: tweak code comment] +Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com +Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") +Signed-off-by: Yang Shi +Signed-off-by: Oscar Salvador +Reported-by: Cyril Hrubis +Suggested-by: Kirill A. Shutemov +Acked-by: Rafael Aquini +Reviewed-by: Oscar Salvador +Acked-by: David Rientjes +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- + 1 file changed, 33 insertions(+), 7 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -428,6 +428,13 @@ static inline bool queue_pages_required( + return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); + } + ++/* ++ * queue_pages_pmd() has three possible return values: ++ * 1 - pages are placed on the right node or queued successfully. ++ * 0 - THP was split. ++ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing ++ * page was already on a node that does not follow the policy. ++ */ + static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + unsigned long end, struct mm_walk *walk) + { +@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, s + unsigned long flags; + + if (unlikely(is_pmd_migration_entry(*pmd))) { +- ret = 1; ++ ret = -EIO; + goto unlock; + } + page = pmd_page(*pmd); +@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, s + ret = 1; + flags = qp->flags; + /* go to thp migration */ +- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ++ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { ++ if (!vma_migratable(walk->vma)) { ++ ret = -EIO; ++ goto unlock; ++ } ++ + migrate_page_add(page, qp->pagelist, flags); ++ } else ++ ret = -EIO; + unlock: + spin_unlock(ptl); + out: +@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t * + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) { + ret = queue_pages_pmd(pmd, ptl, addr, end, walk); +- if (ret) ++ if (ret > 0) + return 0; ++ else if (ret < 0) ++ return ret; + } + + if (pmd_trans_unstable(pmd)) +@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t * + continue; + if (!queue_pages_required(page, qp)) + continue; +- migrate_page_add(page, qp->pagelist, flags); ++ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { ++ if (!vma_migratable(vma)) ++ break; ++ migrate_page_add(page, qp->pagelist, flags); ++ } else ++ break; + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); +- return 0; ++ return addr != end ? -EIO : 0; + } + + static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, +@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigne + unsigned long endvma = vma->vm_end; + unsigned long flags = qp->flags; + +- if (!vma_migratable(vma)) ++ /* ++ * Need check MPOL_MF_STRICT to return -EIO if possible ++ * regardless of vma_migratable ++ */ ++ if (!vma_migratable(vma) && ++ !(flags & MPOL_MF_STRICT)) + return 1; + + if (endvma > end) +@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigne + } + + /* queue pages from current vma */ +- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ++ if (flags & MPOL_MF_VALID) + return 0; + return 1; + } diff --git a/queue-5.0/mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch b/queue-5.0/mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch new file mode 100644 index 00000000000..474f8dce768 --- /dev/null +++ b/queue-5.0/mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch @@ -0,0 +1,77 @@ +From d2b2c6dd227ba5b8a802858748ec9a780cb75b47 Mon Sep 17 00:00:00 2001 +From: Lars Persson +Date: Thu, 28 Mar 2019 20:44:28 -0700 +Subject: mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate + +From: Lars Persson + +commit d2b2c6dd227ba5b8a802858748ec9a780cb75b47 upstream. + +Our MIPS 1004Kc SoCs were seeing random userspace crashes with SIGILL +and SIGSEGV that could not be traced back to a userspace code bug. They +had all the magic signs of an I/D cache coherency issue. + +Now recently we noticed that the /proc/sys/vm/compact_memory interface +was quite efficient at provoking this class of userspace crashes. + +Studying the code in mm/migrate.c there is a distinction made between +migrating a page that is mapped at the instant of migration and one that +is not mapped. Our problem turned out to be the non-mapped pages. + +For the non-mapped page the code performs a copy of the page content and +all relevant meta-data of the page without doing the required D-cache +maintenance. This leaves dirty data in the D-cache of the CPU and on +the 1004K cores this data is not visible to the I-cache. A subsequent +page-fault that triggers a mapping of the page will happily serve the +process with potentially stale code. + +What about ARM then, this bug should have seen greater exposure? Well +ARM became immune to this flaw back in 2010, see commit c01778001a4f +("ARM: 6379/1: Assume new page cache pages have dirty D-cache"). + +My proposed fix moves the D-cache maintenance inside move_to_new_page to +make it common for both cases. + +Link: http://lkml.kernel.org/r/20190315083502.11849-1-larper@axis.com +Fixes: 97ee0524614 ("flush cache before installing new page at migraton") +Signed-off-by: Lars Persson +Reviewed-by: Paul Burton +Acked-by: Mel Gorman +Cc: Ralf Baechle +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/migrate.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -248,10 +248,8 @@ static bool remove_migration_pte(struct + pte = swp_entry_to_pte(entry); + } else if (is_device_public_page(new)) { + pte = pte_mkdevmap(pte); +- flush_dcache_page(new); + } +- } else +- flush_dcache_page(new); ++ } + + #ifdef CONFIG_HUGETLB_PAGE + if (PageHuge(new)) { +@@ -995,6 +993,13 @@ static int move_to_new_page(struct page + */ + if (!PageMappingFlags(page)) + page->mapping = NULL; ++ ++ if (unlikely(is_zone_device_page(newpage))) { ++ if (is_device_public_page(newpage)) ++ flush_dcache_page(newpage); ++ } else ++ flush_dcache_page(newpage); ++ + } + out: + return rc; diff --git a/queue-5.0/mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch b/queue-5.0/mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch new file mode 100644 index 00000000000..8807f191eff --- /dev/null +++ b/queue-5.0/mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch @@ -0,0 +1,40 @@ +From f5777bc2d9cf0712554228b1a7927b6f13f5c1f0 Mon Sep 17 00:00:00 2001 +From: Qian Cai +Date: Thu, 28 Mar 2019 20:44:21 -0700 +Subject: mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate() + +From: Qian Cai + +commit f5777bc2d9cf0712554228b1a7927b6f13f5c1f0 upstream. + +Due to has_unmovable_pages() taking an incorrect irqsave flag instead of +the isolation flag in set_migratetype_isolate(), there are issues with +HWPOSION and error reporting where dump_page() is not called when there +is an unmovable page. + +Link: http://lkml.kernel.org/r/20190320204941.53731-1-cai@lca.pw +Fixes: d381c54760dc ("mm: only report isolation failures when offlining memory") +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Signed-off-by: Qian Cai +Cc: [5.0.x] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_isolation.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/page_isolation.c ++++ b/mm/page_isolation.c +@@ -59,7 +59,8 @@ static int set_migratetype_isolate(struc + * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. + * We just check MOVABLE pages. + */ +- if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags)) ++ if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, ++ isol_flags)) + ret = 0; + + /* diff --git a/queue-5.0/series b/queue-5.0/series index 24540ca4438..247b57a11ca 100644 --- a/queue-5.0/series +++ b/queue-5.0/series @@ -122,3 +122,11 @@ xhci-don-t-let-usb3-ports-stuck-in-polling-state-prevent-suspend.patch usb-cdc-acm-fix-race-during-wakeup-blocking-tx-traffic.patch usb-typec-tcpm-try-pd-2.0-if-sink-does-not-respond-to-3.0-source-caps.patch usb-typec-fix-unchecked-return-value.patch +mm-hotplug-fix-offline-undo_isolate_page_range.patch +mm-add-support-for-kmem-caches-in-dma32-zone.patch +iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch +mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified.patch +mm-debug.c-fix-__dump_page-when-mapping-host-is-not-set.patch +mm-memory_hotplug.c-fix-notification-in-offline-error-path.patch +mm-page_isolation.c-fix-a-wrong-flag-in-set_migratetype_isolate.patch +mm-migrate.c-add-missing-flush_dcache_page-for-non-mapped-page-migrate.patch