From: Greg Kroah-Hartman Date: Tue, 28 Jan 2020 13:49:35 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.4.212~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1cf61743148addef9ffd567f8bc494d79365da25;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: drivers-base-memory-pass-a-block_id-to-init_memory_block.patch drivers-base-memory.c-clean-up-relics-in-function-parameters.patch drivers-base-memory.c-remove-an-unnecessary-check-on-nr_mem_sections.patch drivers-base-node.c-simplify-unregister_memory_block_under_nodes.patch mm-hotplug-kill-is_dev_zone-usage-in-__remove_pages.patch mm-memory_hotplug-add-nid-parameter-to-arch_remove_memory.patch mm-memory_hotplug-allow-arch_remove_memory-without-config_memory_hotremove.patch mm-memory_hotplug-create-memory-block-devices-after-arch_add_memory.patch mm-memory_hotplug-fix-try_offline_node.patch mm-memory_hotplug-make-__remove_pages-and-arch_remove_memory-never-fail.patch mm-memory_hotplug-make-__remove_section-never-fail.patch mm-memory_hotplug-make-remove_memory-take-the-device_hotplug_lock.patch mm-memory_hotplug-make-unregister_memory_block_under_nodes-never-fail.patch mm-memory_hotplug-make-unregister_memory_section-never-fail.patch mm-memory_hotplug-release-memory-resource-after-arch_remove_memory.patch mm-memory_hotplug-remove-memory-block-devices-before-arch_remove_memory.patch mm-memory_hotplug-remove-zone-parameter-from-sparse_remove_one_section.patch mm-memory_hotplug-shrink-zones-when-offlining-memory.patch mm-memory_hotplug-update-a-comment-in-unregister_memory.patch mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch mm-sparse-drop-pgdat_resize_lock-in-sparse_add-remove_one_section.patch mm-sparse-pass-nid-instead-of-pgdat-to-sparse_add_one_section.patch powerpc-mm-fix-section-mismatch-warning.patch s390x-mm-implement-arch_remove_memory.patch --- diff --git a/queue-4.19/drivers-base-memory-pass-a-block_id-to-init_memory_block.patch b/queue-4.19/drivers-base-memory-pass-a-block_id-to-init_memory_block.patch new file mode 100644 index 00000000000..21090394ba2 --- /dev/null +++ b/queue-4.19/drivers-base-memory-pass-a-block_id-to-init_memory_block.patch @@ -0,0 +1,151 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:12 +0100 +Subject: drivers/base/memory: pass a block_id to init_memory_block() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-16-david@redhat.com> + +From: David Hildenbrand + +commit 1811582587c43bdf13d690d83345610d4df433bb upstream. + +We'll rework hotplug_memory_register() shortly, so it no longer consumes +pass a section. + +[cai@lca.pw: fix a compilation warning] + Link: http://lkml.kernel.org/r/1559320186-28337-1-git-send-email-cai@lca.pw +Link: http://lkml.kernel.org/r/20190527111152.16324-6-david@redhat.com +Signed-off-by: David Hildenbrand +Signed-off-by: Qian Cai +Acked-by: Michal Hocko +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Alex Deucher +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Arun KS +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Chris Wilson +Cc: Dan Williams +Cc: Dave Hansen +Cc: "David S. Miller" +Cc: Fenghua Yu +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jonathan Cameron +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Brown +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Mathieu Malaterre +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: "mike.travis@hpe.com" +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Wei Yang +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 27 +++++++++++---------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -660,21 +660,18 @@ int register_memory(struct memory_block + return ret; + } + +-static int init_memory_block(struct memory_block **memory, +- struct mem_section *section, unsigned long state) ++static int init_memory_block(struct memory_block **memory, int block_id, ++ unsigned long state) + { + struct memory_block *mem; + unsigned long start_pfn; +- int scn_nr; + int ret = 0; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; + +- scn_nr = __section_nr(section); +- mem->start_section_nr = +- base_memory_block_id(scn_nr) * sections_per_block; ++ mem->start_section_nr = block_id * sections_per_block; + mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; + mem->state = state; + start_pfn = section_nr_to_pfn(mem->start_section_nr); +@@ -689,21 +686,18 @@ static int init_memory_block(struct memo + static int add_memory_block(int base_section_nr) + { + struct memory_block *mem; +- int i, ret, section_count = 0, section_nr; ++ int i, ret, section_count = 0; + + for (i = base_section_nr; + i < base_section_nr + sections_per_block; +- i++) { +- if (!present_section_nr(i)) +- continue; +- if (section_count == 0) +- section_nr = i; +- section_count++; +- } ++ i++) ++ if (present_section_nr(i)) ++ section_count++; + + if (section_count == 0) + return 0; +- ret = init_memory_block(&mem, __nr_to_section(section_nr), MEM_ONLINE); ++ ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), ++ MEM_ONLINE); + if (ret) + return ret; + mem->section_count = section_count; +@@ -716,6 +710,7 @@ static int add_memory_block(int base_sec + */ + int hotplug_memory_register(int nid, struct mem_section *section) + { ++ int block_id = base_memory_block_id(__section_nr(section)); + int ret = 0; + struct memory_block *mem; + +@@ -726,7 +721,7 @@ int hotplug_memory_register(int nid, str + mem->section_count++; + put_device(&mem->dev); + } else { +- ret = init_memory_block(&mem, section, MEM_OFFLINE); ++ ret = init_memory_block(&mem, block_id, MEM_OFFLINE); + if (ret) + goto out; + mem->section_count++; diff --git a/queue-4.19/drivers-base-memory.c-clean-up-relics-in-function-parameters.patch b/queue-4.19/drivers-base-memory.c-clean-up-relics-in-function-parameters.patch new file mode 100644 index 00000000000..2aabae38625 --- /dev/null +++ b/queue-4.19/drivers-base-memory.c-clean-up-relics-in-function-parameters.patch @@ -0,0 +1,81 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:04 +0100 +Subject: drivers/base/memory.c: clean up relics in function parameters +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-8-david@redhat.com> + +From: Baoquan He + +commit 063b8a4cee8088224bcdb79bcd08db98df16178e upstream. + +The input parameter 'phys_index' of memory_block_action() is actually the +section number, but not the phys_index of memory_block. This is a relic +from the past when one memory block could only contain one section. +Rename it to start_section_nr. + +And also in remove_memory_section(), the 'node_id' and 'phys_device' +arguments are not used by anyone. Remove them. + +Link: http://lkml.kernel.org/r/20190329144250.14315-2-bhe@redhat.com +Signed-off-by: Baoquan He +Acked-by: Michal Hocko +Reviewed-by: Rafael J. Wysocki +Reviewed-by: Mukesh Ojha +Reviewed-by: Oscar Salvador +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -230,13 +230,14 @@ static bool pages_correctly_probed(unsig + * OK to have direct references to sparsemem variables in here. + */ + static int +-memory_block_action(unsigned long phys_index, unsigned long action, int online_type) ++memory_block_action(unsigned long start_section_nr, unsigned long action, ++ int online_type) + { + unsigned long start_pfn; + unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; + int ret; + +- start_pfn = section_nr_to_pfn(phys_index); ++ start_pfn = section_nr_to_pfn(start_section_nr); + + switch (action) { + case MEM_ONLINE: +@@ -250,7 +251,7 @@ memory_block_action(unsigned long phys_i + break; + default: + WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " +- "%ld\n", __func__, phys_index, action, action); ++ "%ld\n", __func__, start_section_nr, action, action); + ret = -EINVAL; + } + +@@ -747,8 +748,7 @@ unregister_memory(struct memory_block *m + device_unregister(&memory->dev); + } + +-static int remove_memory_section(unsigned long node_id, +- struct mem_section *section, int phys_device) ++static int remove_memory_section(struct mem_section *section) + { + struct memory_block *mem; + +@@ -780,7 +780,7 @@ int unregister_memory_section(struct mem + if (!present_section(section)) + return -EINVAL; + +- return remove_memory_section(0, section, 0); ++ return remove_memory_section(section); + } + #endif /* CONFIG_MEMORY_HOTREMOVE */ + diff --git a/queue-4.19/drivers-base-memory.c-remove-an-unnecessary-check-on-nr_mem_sections.patch b/queue-4.19/drivers-base-memory.c-remove-an-unnecessary-check-on-nr_mem_sections.patch new file mode 100644 index 00000000000..d698f1c5a23 --- /dev/null +++ b/queue-4.19/drivers-base-memory.c-remove-an-unnecessary-check-on-nr_mem_sections.patch @@ -0,0 +1,54 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:01 +0100 +Subject: drivers/base/memory.c: remove an unnecessary check on NR_MEM_SECTIONS +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-5-david@redhat.com> + +From: Wei Yang + +commit 3b6fd6ffb27c2efa003c6d4d15ca72c054b71d7c upstream. + +In cb5e39b8038b ("drivers: base: refactor add_memory_section() to +add_memory_block()"), add_memory_block() is introduced, which is only +invoked in memory_dev_init(). + +When combining these two loops in memory_dev_init() and +add_memory_block(), they looks like this: + + for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) + for (j = i; + (j < i + sections_per_block) && j < NR_MEM_SECTIONS; + j++) + +Since it is sure the (i < NR_MEM_SECTIONS) and j sits in its own memory +block, the check of (j < NR_MEM_SECTIONS) is not necessary. + +This patch just removes this check. + +Link: http://lkml.kernel.org/r/20181123222811.18216-1-richard.weiyang@gmail.com +Signed-off-by: Wei Yang +Reviewed-by: Andrew Morton +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Seth Jennings +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -691,7 +691,7 @@ static int add_memory_block(int base_sec + int i, ret, section_count = 0, section_nr; + + for (i = base_section_nr; +- (i < base_section_nr + sections_per_block) && i < NR_MEM_SECTIONS; ++ i < base_section_nr + sections_per_block; + i++) { + if (!present_section_nr(i)) + continue; diff --git a/queue-4.19/drivers-base-node.c-simplify-unregister_memory_block_under_nodes.patch b/queue-4.19/drivers-base-node.c-simplify-unregister_memory_block_under_nodes.patch new file mode 100644 index 00000000000..0ee012d6685 --- /dev/null +++ b/queue-4.19/drivers-base-node.c-simplify-unregister_memory_block_under_nodes.patch @@ -0,0 +1,118 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:18 +0100 +Subject: drivers/base/node.c: simplify unregister_memory_block_under_nodes() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-22-david@redhat.com> + +From: David Hildenbrand + +commit d84f2f5a755208da3f93e17714631485cb3da11c upstream. + +We don't allow to offline memory block devices that belong to multiple +numa nodes. Therefore, such devices can never get removed. It is +sufficient to process a single node when removing the memory block. No +need to iterate over each and every PFN. + +We already have the nid stored for each memory block. Make sure that the +nid always has a sane value. + +Please note that checking for node_online(nid) is not required. If we +would have a memory block belonging to a node that is no longer offline, +then we would have a BUG in the node offlining code. + +Link: http://lkml.kernel.org/r/20190719135244.15242-1-david@redhat.com +Signed-off-by: David Hildenbrand +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: David Hildenbrand +Cc: Stephen Rothwell +Cc: Pavel Tatashin +Cc: Michal Hocko +Cc: Oscar Salvador +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 1 + + drivers/base/node.c | 39 +++++++++++++++------------------------ + 2 files changed, 16 insertions(+), 24 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -693,6 +693,7 @@ static int init_memory_block(struct memo + mem->state = state; + start_pfn = section_nr_to_pfn(mem->start_section_nr); + mem->phys_device = arch_get_memory_phys_device(start_pfn); ++ mem->nid = NUMA_NO_NODE; + + ret = register_memory(mem); + +--- a/drivers/base/node.c ++++ b/drivers/base/node.c +@@ -409,8 +409,6 @@ int register_mem_sect_under_node(struct + int ret, nid = *(int *)arg; + unsigned long pfn, sect_start_pfn, sect_end_pfn; + +- mem_blk->nid = nid; +- + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); + sect_end_pfn += PAGES_PER_SECTION - 1; +@@ -439,6 +437,13 @@ int register_mem_sect_under_node(struct + if (page_nid != nid) + continue; + } ++ ++ /* ++ * If this memory block spans multiple nodes, we only indicate ++ * the last processed node. ++ */ ++ mem_blk->nid = nid; ++ + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, + &mem_blk->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); +@@ -454,32 +459,18 @@ int register_mem_sect_under_node(struct + } + + /* +- * Unregister memory block device under all nodes that it spans. +- * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). ++ * Unregister a memory block device under the node it spans. Memory blocks ++ * with multiple nodes cannot be offlined and therefore also never be removed. + */ + void unregister_memory_block_under_nodes(struct memory_block *mem_blk) + { +- unsigned long pfn, sect_start_pfn, sect_end_pfn; +- static nodemask_t unlinked_nodes; +- +- nodes_clear(unlinked_nodes); +- sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); +- sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); +- for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { +- int nid; ++ if (mem_blk->nid == NUMA_NO_NODE) ++ return; + +- nid = get_nid_for_pfn(pfn); +- if (nid < 0) +- continue; +- if (!node_online(nid)) +- continue; +- if (node_test_and_set(nid, unlinked_nodes)) +- continue; +- sysfs_remove_link(&node_devices[nid]->dev.kobj, +- kobject_name(&mem_blk->dev.kobj)); +- sysfs_remove_link(&mem_blk->dev.kobj, +- kobject_name(&node_devices[nid]->dev.kobj)); +- } ++ sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj, ++ kobject_name(&mem_blk->dev.kobj)); ++ sysfs_remove_link(&mem_blk->dev.kobj, ++ kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); + } + + int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) diff --git a/queue-4.19/mm-hotplug-kill-is_dev_zone-usage-in-__remove_pages.patch b/queue-4.19/mm-hotplug-kill-is_dev_zone-usage-in-__remove_pages.patch new file mode 100644 index 00000000000..862d24560dc --- /dev/null +++ b/queue-4.19/mm-hotplug-kill-is_dev_zone-usage-in-__remove_pages.patch @@ -0,0 +1,64 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:17 +0100 +Subject: mm/hotplug: kill is_dev_zone() usage in __remove_pages() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-21-david@redhat.com> + +From: Dan Williams + +commit 96da4350000973ef9310a10d077d65bbc017f093 upstream. + +-- snip -- + +Minor conflict, keep the altmap check. + +-- snip -- + +The zone type check was a leftover from the cleanup that plumbed altmap +through the memory hotplug path, i.e. commit da024512a1fa "mm: pass the +vmem_altmap to arch_remove_memory and __remove_pages". + +Link: http://lkml.kernel.org/r/156092352642.979959.6664333788149363039.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Dan Williams +Reviewed-by: David Hildenbrand +Reviewed-by: Oscar Salvador +Tested-by: Aneesh Kumar K.V [ppc64] +Cc: Michal Hocko +Cc: Logan Gunthorpe +Cc: Pavel Tatashin +Cc: Jane Chu +Cc: Jeff Moyer +Cc: Jérôme Glisse +Cc: Jonathan Corbet +Cc: Mike Rapoport +Cc: Toshi Kani +Cc: Vlastimil Babka +Cc: Wei Yang +Cc: Jason Gunthorpe +Cc: Christoph Hellwig +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory_hotplug.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -507,11 +507,8 @@ void __remove_pages(struct zone *zone, u + unsigned long map_offset = 0; + int sections_to_remove; + +- /* In the ZONE_DEVICE case device driver owns the memory region */ +- if (is_dev_zone(zone)) { +- if (altmap) +- map_offset = vmem_altmap_offset(altmap); +- } ++ if (altmap) ++ map_offset = vmem_altmap_offset(altmap); + + clear_zone_contiguous(zone); + diff --git a/queue-4.19/mm-memory_hotplug-add-nid-parameter-to-arch_remove_memory.patch b/queue-4.19/mm-memory_hotplug-add-nid-parameter-to-arch_remove_memory.patch new file mode 100644 index 00000000000..ebb0f2b80e9 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-add-nid-parameter-to-arch_remove_memory.patch @@ -0,0 +1,210 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:02 +0100 +Subject: mm, memory_hotplug: add nid parameter to arch_remove_memory +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-6-david@redhat.com> + +From: Oscar Salvador + +commit 2c2a5af6fed20cf74401c9d64319c76c5ff81309 upstream. + +-- snip -- + +Missing unification of mm/hmm.c and kernel/memremap.c + +-- snip -- + +Patch series "Do not touch pages in hot-remove path", v2. + +This patchset aims for two things: + + 1) A better definition about offline and hot-remove stage + 2) Solving bugs where we can access non-initialized pages + during hot-remove operations [2] [3]. + +This is achieved by moving all page/zone handling to the offline +stage, so we do not need to access pages when hot-removing memory. + +[1] https://patchwork.kernel.org/cover/10691415/ +[2] https://patchwork.kernel.org/patch/10547445/ +[3] https://www.spinics.net/lists/linux-mm/msg161316.html + +This patch (of 5): + +This is a preparation for the following-up patches. The idea of passing +the nid is that it will allow us to get rid of the zone parameter +afterwards. + +Link: http://lkml.kernel.org/r/20181127162005.15833-2-osalvador@suse.de +Signed-off-by: Oscar Salvador +Reviewed-by: David Hildenbrand +Reviewed-by: Pavel Tatashin +Cc: Michal Hocko +Cc: Dan Williams +Cc: Jerome Glisse +Cc: Jonathan Cameron +Cc: "Rafael J. Wysocki" + +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/mm/init.c | 2 +- + arch/powerpc/mm/mem.c | 3 ++- + arch/s390/mm/init.c | 2 +- + arch/sh/mm/init.c | 2 +- + arch/x86/mm/init_32.c | 2 +- + arch/x86/mm/init_64.c | 3 ++- + include/linux/memory_hotplug.h | 4 ++-- + kernel/memremap.c | 5 ++++- + mm/hmm.c | 4 +++- + mm/memory_hotplug.c | 2 +- + 10 files changed, 18 insertions(+), 11 deletions(-) + +--- a/arch/ia64/mm/init.c ++++ b/arch/ia64/mm/init.c +@@ -662,7 +662,7 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -140,7 +140,8 @@ int __meminit arch_add_memory(int nid, u + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int __meminit arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -240,7 +240,7 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) + { + /* + * There is no hardware or firmware interface which could trigger a +--- a/arch/sh/mm/init.c ++++ b/arch/sh/mm/init.c +@@ -444,7 +444,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to + #endif + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) + { + unsigned long start_pfn = PFN_DOWN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -861,7 +861,7 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1142,7 +1142,8 @@ kernel_physical_mapping_remove(unsigned + remove_pagetable(start, end, true, NULL); + } + +-int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) ++int __ref arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -109,8 +109,8 @@ static inline bool movable_node_is_enabl + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-extern int arch_remove_memory(u64 start, u64 size, +- struct vmem_altmap *altmap); ++extern int arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap); + extern int __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); + #endif /* CONFIG_MEMORY_HOTREMOVE */ +--- a/kernel/memremap.c ++++ b/kernel/memremap.c +@@ -121,6 +121,7 @@ static void devm_memremap_pages_release( + struct resource *res = &pgmap->res; + resource_size_t align_start, align_size; + unsigned long pfn; ++ int nid; + + pgmap->kill(pgmap->ref); + for_each_device_pfn(pfn, pgmap) +@@ -131,13 +132,15 @@ static void devm_memremap_pages_release( + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; + ++ nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); ++ + mem_hotplug_begin(); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { + pfn = align_start >> PAGE_SHIFT; + __remove_pages(page_zone(pfn_to_page(pfn)), pfn, + align_size >> PAGE_SHIFT, NULL); + } else { +- arch_remove_memory(align_start, align_size, ++ arch_remove_memory(nid, align_start, align_size, + pgmap->altmap_valid ? &pgmap->altmap : NULL); + kasan_remove_zero_shadow(__va(align_start), align_size); + } +--- a/mm/hmm.c ++++ b/mm/hmm.c +@@ -999,6 +999,7 @@ static void hmm_devmem_release(void *dat + unsigned long start_pfn, npages; + struct zone *zone; + struct page *page; ++ int nid; + + /* pages are dead and unused, undo the arch mapping */ + start_pfn = (resource->start & ~(PA_SECTION_SIZE - 1)) >> PAGE_SHIFT; +@@ -1006,12 +1007,13 @@ static void hmm_devmem_release(void *dat + + page = pfn_to_page(start_pfn); + zone = page_zone(page); ++ nid = page_to_nid(page); + + mem_hotplug_begin(); + if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) + __remove_pages(zone, start_pfn, npages, NULL); + else +- arch_remove_memory(start_pfn << PAGE_SHIFT, ++ arch_remove_memory(nid, start_pfn << PAGE_SHIFT, + npages << PAGE_SHIFT, NULL); + mem_hotplug_done(); + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1916,7 +1916,7 @@ void __ref __remove_memory(int nid, u64 + memblock_free(start, size); + memblock_remove(start, size); + +- arch_remove_memory(start, size, NULL); ++ arch_remove_memory(nid, start, size, NULL); + + try_offline_node(nid); + diff --git a/queue-4.19/mm-memory_hotplug-allow-arch_remove_memory-without-config_memory_hotremove.patch b/queue-4.19/mm-memory_hotplug-allow-arch_remove_memory-without-config_memory_hotremove.patch new file mode 100644 index 00000000000..d8c00978f8a --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-allow-arch_remove_memory-without-config_memory_hotremove.patch @@ -0,0 +1,316 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:11 +0100 +Subject: mm/memory_hotplug: allow arch_remove_memory() without CONFIG_MEMORY_HOTREMOVE +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-15-david@redhat.com> + +From: David Hildenbrand + +commit 80ec922dbd87fd38d15719c86a94457204648aeb upstream. + +-- snip -- + +Missing arm64 memory hot(un)plug support. + +-- snip -- + +We want to improve error handling while adding memory by allowing to use +arch_remove_memory() and __remove_pages() even if +CONFIG_MEMORY_HOTREMOVE is not set to e.g., implement something like: + + arch_add_memory() + rc = do_something(); + if (rc) { + arch_remove_memory(); + } + +We won't get rid of CONFIG_MEMORY_HOTREMOVE for now, as it will require +quite some dependencies for memory offlining. + +Link: http://lkml.kernel.org/r/20190527111152.16324-7-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Pavel Tatashin +Cc: Tony Luck +Cc: Fenghua Yu +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: Heiko Carstens +Cc: Yoshinori Sato +Cc: Rich Felker +Cc: Dave Hansen +Cc: Andy Lutomirski +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Michal Hocko +Cc: David Hildenbrand +Cc: Oscar Salvador +Cc: "Kirill A. Shutemov" +Cc: Alex Deucher +Cc: "David S. Miller" +Cc: Mark Brown +Cc: Chris Wilson +Cc: Christophe Leroy +Cc: Nicholas Piggin +Cc: Vasily Gorbik +Cc: Rob Herring +Cc: Masahiro Yamada +Cc: "mike.travis@hpe.com" +Cc: Andrew Banman +Cc: Arun KS +Cc: Qian Cai +Cc: Mathieu Malaterre +Cc: Baoquan He +Cc: Logan Gunthorpe +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Dan Williams +Cc: Ingo Molnar +Cc: Jonathan Cameron +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: Mark Rutland +Cc: Mike Rapoport +Cc: Oscar Salvador +Cc: Robin Murphy +Cc: Wei Yang +Cc: Will Deacon +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/mm/init.c | 2 -- + arch/powerpc/mm/mem.c | 2 -- + arch/s390/mm/init.c | 2 -- + arch/sh/mm/init.c | 2 -- + arch/x86/mm/init_32.c | 2 -- + arch/x86/mm/init_64.c | 2 -- + drivers/base/memory.c | 2 -- + include/linux/memory.h | 2 -- + include/linux/memory_hotplug.h | 2 -- + mm/memory_hotplug.c | 2 -- + mm/sparse.c | 6 ------ + 11 files changed, 26 deletions(-) + +--- a/arch/ia64/mm/init.c ++++ b/arch/ia64/mm/init.c +@@ -661,7 +661,6 @@ int arch_add_memory(int nid, u64 start, + return ret; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +@@ -673,4 +672,3 @@ void arch_remove_memory(int nid, u64 sta + __remove_pages(zone, start_pfn, nr_pages, altmap); + } + #endif +-#endif +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -139,7 +139,6 @@ int __ref arch_add_memory(int nid, u64 s + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +@@ -172,7 +171,6 @@ void __ref arch_remove_memory(int nid, u + resize_hpt_for_hotplug(memblock_phys_mem_size()); + } + #endif +-#endif /* CONFIG_MEMORY_HOTPLUG */ + + /* + * walk_memory_resource() needs to make sure there is no holes in a given +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -239,7 +239,6 @@ int arch_add_memory(int nid, u64 start, + return rc; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +@@ -251,5 +250,4 @@ void arch_remove_memory(int nid, u64 sta + __remove_pages(zone, start_pfn, nr_pages, altmap); + vmem_remove_mapping(start, size); + } +-#endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/sh/mm/init.c ++++ b/arch/sh/mm/init.c +@@ -443,7 +443,6 @@ int memory_add_physaddr_to_nid(u64 addr) + EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + #endif + +-#ifdef CONFIG_MEMORY_HOTREMOVE + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +@@ -454,5 +453,4 @@ void arch_remove_memory(int nid, u64 sta + zone = page_zone(pfn_to_page(start_pfn)); + __remove_pages(zone, start_pfn, nr_pages, altmap); + } +-#endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -860,7 +860,6 @@ int arch_add_memory(int nid, u64 start, + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +@@ -872,7 +871,6 @@ void arch_remove_memory(int nid, u64 sta + __remove_pages(zone, start_pfn, nr_pages, altmap); + } + #endif +-#endif + + int kernel_set_to_readonly __read_mostly; + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1132,7 +1132,6 @@ void __ref vmemmap_free(unsigned long st + remove_pagetable(start, end, false, altmap); + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + static void __meminit + kernel_physical_mapping_remove(unsigned long start, unsigned long end) + { +@@ -1157,7 +1156,6 @@ void __ref arch_remove_memory(int nid, u + __remove_pages(zone, start_pfn, nr_pages, altmap); + kernel_physical_mapping_remove(start, start + size); + } +-#endif + #endif /* CONFIG_MEMORY_HOTPLUG */ + + static struct kcore_list kcore_vsyscall; +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -737,7 +737,6 @@ out: + return ret; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + static void + unregister_memory(struct memory_block *memory) + { +@@ -776,7 +775,6 @@ void unregister_memory_section(struct me + out_unlock: + mutex_unlock(&mem_sysfs_mutex); + } +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + + /* return true if the memory block is offlined, otherwise, return false */ + bool is_memblock_offlined(struct memory_block *mem) +--- a/include/linux/memory.h ++++ b/include/linux/memory.h +@@ -112,9 +112,7 @@ extern void unregister_memory_notifier(s + extern int register_memory_isolate_notifier(struct notifier_block *nb); + extern void unregister_memory_isolate_notifier(struct notifier_block *nb); + int hotplug_memory_register(int nid, struct mem_section *section); +-#ifdef CONFIG_MEMORY_HOTREMOVE + extern void unregister_memory_section(struct mem_section *); +-#endif + extern int memory_dev_init(void); + extern int memory_notify(unsigned long val, void *v); + extern int memory_isolate_notify(unsigned long val, void *v); +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -108,12 +108,10 @@ static inline bool movable_node_is_enabl + return movable_node_enabled; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + extern void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); + extern void __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + + /* reasonably generic interface to expand the physical pages */ + extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -315,7 +315,6 @@ out: + return err; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ + static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, + unsigned long start_pfn, +@@ -542,7 +541,6 @@ void __remove_pages(struct zone *zone, u + + set_zone_contiguous(zone); + } +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + + int set_online_page_callback(online_page_callback_t callback) + { +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -576,7 +576,6 @@ static void __kfree_section_memmap(struc + + vmemmap_free(start, end, altmap); + } +-#ifdef CONFIG_MEMORY_HOTREMOVE + static void free_map_bootmem(struct page *memmap) + { + unsigned long start = (unsigned long)memmap; +@@ -584,7 +583,6 @@ static void free_map_bootmem(struct page + + vmemmap_free(start, end, NULL); + } +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + #else + static struct page *__kmalloc_section_memmap(void) + { +@@ -623,7 +621,6 @@ static void __kfree_section_memmap(struc + get_order(sizeof(struct page) * PAGES_PER_SECTION)); + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + static void free_map_bootmem(struct page *memmap) + { + unsigned long maps_section_nr, removing_section_nr, i; +@@ -653,7 +650,6 @@ static void free_map_bootmem(struct page + put_page_bootmem(page); + } + } +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + + /* +@@ -712,7 +708,6 @@ out: + return ret; + } + +-#ifdef CONFIG_MEMORY_HOTREMOVE + #ifdef CONFIG_MEMORY_FAILURE + static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) + { +@@ -780,5 +775,4 @@ void sparse_remove_one_section(struct zo + PAGES_PER_SECTION - map_offset); + free_section_usemap(memmap, usemap, altmap); + } +-#endif /* CONFIG_MEMORY_HOTREMOVE */ + #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/queue-4.19/mm-memory_hotplug-create-memory-block-devices-after-arch_add_memory.patch b/queue-4.19/mm-memory_hotplug-create-memory-block-devices-after-arch_add_memory.patch new file mode 100644 index 00000000000..46a25b7f40f --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-create-memory-block-devices-after-arch_add_memory.patch @@ -0,0 +1,270 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:13 +0100 +Subject: mm/memory_hotplug: create memory block devices after arch_add_memory() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-17-david@redhat.com> + +From: David Hildenbrand + +commit db051a0dac13db24d58470d75cee0ce7c6b031a1 upstream. + +Only memory to be added to the buddy and to be onlined/offlined by user +space using /sys/devices/system/memory/... needs (and should have!) +memory block devices. + +Factor out creation of memory block devices. Create all devices after +arch_add_memory() succeeded. We can later drop the want_memblock +parameter, because it is now effectively stale. + +Only after memory block devices have been added, memory can be onlined +by user space. This implies, that memory is not visible to user space +at all before arch_add_memory() succeeded. + +While at it + - use WARN_ON_ONCE instead of BUG_ON in moved unregister_memory() + - introduce find_memory_block_by_id() to search via block id + - Use find_memory_block_by_id() in init_memory_block() to catch + duplicates + +Link: http://lkml.kernel.org/r/20190527111152.16324-8-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Pavel Tatashin +Acked-by: Michal Hocko +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: David Hildenbrand +Cc: "mike.travis@hpe.com" +Cc: Ingo Molnar +Cc: Andrew Banman +Cc: Oscar Salvador +Cc: Qian Cai +Cc: Wei Yang +Cc: Arun KS +Cc: Mathieu Malaterre +Cc: Alex Deucher +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Chris Wilson +Cc: Dan Williams +Cc: Dave Hansen +Cc: "David S. Miller" +Cc: Fenghua Yu +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Jonathan Cameron +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Brown +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Peter Zijlstra +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 82 ++++++++++++++++++++++++++++++++----------------- + include/linux/memory.h | 2 - + mm/memory_hotplug.c | 15 ++++---- + 3 files changed, 63 insertions(+), 36 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -39,6 +39,11 @@ static inline int base_memory_block_id(i + return section_nr / sections_per_block; + } + ++static inline int pfn_to_block_id(unsigned long pfn) ++{ ++ return base_memory_block_id(pfn_to_section_nr(pfn)); ++} ++ + static int memory_subsys_online(struct device *dev); + static int memory_subsys_offline(struct device *dev); + +@@ -591,10 +596,9 @@ int __weak arch_get_memory_phys_device(u + * A reference for the returned object is held and the reference for the + * hinted object is released. + */ +-struct memory_block *find_memory_block_hinted(struct mem_section *section, +- struct memory_block *hint) ++static struct memory_block *find_memory_block_by_id(int block_id, ++ struct memory_block *hint) + { +- int block_id = base_memory_block_id(__section_nr(section)); + struct device *hintdev = hint ? &hint->dev : NULL; + struct device *dev; + +@@ -606,6 +610,14 @@ struct memory_block *find_memory_block_h + return to_memory_block(dev); + } + ++struct memory_block *find_memory_block_hinted(struct mem_section *section, ++ struct memory_block *hint) ++{ ++ int block_id = base_memory_block_id(__section_nr(section)); ++ ++ return find_memory_block_by_id(block_id, hint); ++} ++ + /* + * For now, we have a linear search to go find the appropriate + * memory_block corresponding to a particular phys_index. If +@@ -667,6 +679,11 @@ static int init_memory_block(struct memo + unsigned long start_pfn; + int ret = 0; + ++ mem = find_memory_block_by_id(block_id, NULL); ++ if (mem) { ++ put_device(&mem->dev); ++ return -EEXIST; ++ } + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; +@@ -704,44 +721,53 @@ static int add_memory_block(int base_sec + return 0; + } + ++static void unregister_memory(struct memory_block *memory) ++{ ++ if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) ++ return; ++ ++ /* drop the ref. we got via find_memory_block() */ ++ put_device(&memory->dev); ++ device_unregister(&memory->dev); ++} ++ + /* +- * need an interface for the VM to add new memory regions, +- * but without onlining it. ++ * Create memory block devices for the given memory area. Start and size ++ * have to be aligned to memory block granularity. Memory block devices ++ * will be initialized as offline. + */ +-int hotplug_memory_register(int nid, struct mem_section *section) ++int create_memory_block_devices(unsigned long start, unsigned long size) + { +- int block_id = base_memory_block_id(__section_nr(section)); +- int ret = 0; ++ const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); ++ int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + struct memory_block *mem; ++ unsigned long block_id; ++ int ret = 0; + +- mutex_lock(&mem_sysfs_mutex); ++ if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || ++ !IS_ALIGNED(size, memory_block_size_bytes()))) ++ return -EINVAL; + +- mem = find_memory_block(section); +- if (mem) { +- mem->section_count++; +- put_device(&mem->dev); +- } else { ++ mutex_lock(&mem_sysfs_mutex); ++ for (block_id = start_block_id; block_id != end_block_id; block_id++) { + ret = init_memory_block(&mem, block_id, MEM_OFFLINE); + if (ret) +- goto out; +- mem->section_count++; ++ break; ++ mem->section_count = sections_per_block; ++ } ++ if (ret) { ++ end_block_id = block_id; ++ for (block_id = start_block_id; block_id != end_block_id; ++ block_id++) { ++ mem = find_memory_block_by_id(block_id, NULL); ++ mem->section_count = 0; ++ unregister_memory(mem); ++ } + } +- +-out: + mutex_unlock(&mem_sysfs_mutex); + return ret; + } + +-static void +-unregister_memory(struct memory_block *memory) +-{ +- BUG_ON(memory->dev.bus != &memory_subsys); +- +- /* drop the ref. we got via find_memory_block() */ +- put_device(&memory->dev); +- device_unregister(&memory->dev); +-} +- + void unregister_memory_section(struct mem_section *section) + { + struct memory_block *mem; +--- a/include/linux/memory.h ++++ b/include/linux/memory.h +@@ -111,7 +111,7 @@ extern int register_memory_notifier(stru + extern void unregister_memory_notifier(struct notifier_block *nb); + extern int register_memory_isolate_notifier(struct notifier_block *nb); + extern void unregister_memory_isolate_notifier(struct notifier_block *nb); +-int hotplug_memory_register(int nid, struct mem_section *section); ++int create_memory_block_devices(unsigned long start, unsigned long size); + extern void unregister_memory_section(struct mem_section *); + extern int memory_dev_init(void); + extern int memory_notify(unsigned long val, void *v); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -256,13 +256,7 @@ static int __meminit __add_section(int n + return -EEXIST; + + ret = sparse_add_one_section(nid, phys_start_pfn, altmap); +- if (ret < 0) +- return ret; +- +- if (!want_memblock) +- return 0; +- +- return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); ++ return ret < 0 ? ret : 0; + } + + /* +@@ -1096,6 +1090,13 @@ int __ref add_memory_resource(int nid, s + if (ret < 0) + goto error; + ++ /* create memory block devices after memory was added */ ++ ret = create_memory_block_devices(start, size); ++ if (ret) { ++ arch_remove_memory(nid, start, size, NULL); ++ goto error; ++ } ++ + if (new_node) { + /* If sysfs file of new node can't be created, cpu on the node + * can't be hot-added. There is no rollback way now. diff --git a/queue-4.19/mm-memory_hotplug-fix-try_offline_node.patch b/queue-4.19/mm-memory_hotplug-fix-try_offline_node.patch new file mode 100644 index 00000000000..37f84d860bb --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-fix-try_offline_node.patch @@ -0,0 +1,238 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:20 +0100 +Subject: mm/memory_hotplug: fix try_offline_node() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-24-david@redhat.com> + +From: David Hildenbrand + +commit 2c91f8fc6c999fe10185d8ad99fda1759f662f70 upstream. + +-- snip -- + +Only contextual issues: +- Unrelated check_and_unmap_cpu_on_node() changes are missing. +- Unrelated walk_memory_blocks() has not been moved/refactored yet. + +-- snip -- + +try_offline_node() is pretty much broken right now: + + - The node span is updated when onlining memory, not when adding it. We + ignore memory that was mever onlined. Bad. + + - We touch possible garbage memmaps. The pfn_to_nid(pfn) can easily + trigger a kernel panic. Bad for memory that is offline but also bad + for subsection hotadd with ZONE_DEVICE, whereby the memmap of the + first PFN of a section might contain garbage. + + - Sections belonging to mixed nodes are not properly considered. + +As memory blocks might belong to multiple nodes, we would have to walk +all pageblocks (or at least subsections) within present sections. +However, we don't have a way to identify whether a memmap that is not +online was initialized (relevant for ZONE_DEVICE). This makes things +more complicated. + +Luckily, we can piggy pack on the node span and the nid stored in memory +blocks. Currently, the node span is grown when calling +move_pfn_range_to_zone() - e.g., when onlining memory, and shrunk when +removing memory, before calling try_offline_node(). Sysfs links are +created via link_mem_sections(), e.g., during boot or when adding +memory. + +If the node still spans memory or if any memory block belongs to the +nid, we don't set the node offline. As memory blocks that span multiple +nodes cannot get offlined, the nid stored in memory blocks is reliable +enough (for such online memory blocks, the node still spans the memory). + +Introduce for_each_memory_block() to efficiently walk all memory blocks. + +Note: We will soon stop shrinking the ZONE_DEVICE zone and the node span +when removing ZONE_DEVICE memory to fix similar issues (access of +garbage memmaps) - until we have a reliable way to identify whether +these memmaps were properly initialized. This implies later, that once +a node had ZONE_DEVICE memory, we won't be able to set a node offline - +which should be acceptable. + +Since commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate +hotadded memory to zones until online") memory that is added is not +assoziated with a zone/node (memmap not initialized). The introducing +commit 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") +already missed that we could have multiple nodes for a section and that +the zone/node span is updated when onlining pages, not when adding them. + +I tested this by hotplugging two DIMMs to a memory-less and cpu-less +NUMA node. The node is properly onlined when adding the DIMMs. When +removing the DIMMs, the node is properly offlined. + +Masayoshi Mizuma reported: + +: Without this patch, memory hotplug fails as panic: +: +: BUG: kernel NULL pointer dereference, address: 0000000000000000 +: ... +: Call Trace: +: remove_memory_block_devices+0x81/0xc0 +: try_remove_memory+0xb4/0x130 +: __remove_memory+0xa/0x20 +: acpi_memory_device_remove+0x84/0x100 +: acpi_bus_trim+0x57/0x90 +: acpi_bus_trim+0x2e/0x90 +: acpi_device_hotplug+0x2b2/0x4d0 +: acpi_hotplug_work_fn+0x1a/0x30 +: process_one_work+0x171/0x380 +: worker_thread+0x49/0x3f0 +: kthread+0xf8/0x130 +: ret_from_fork+0x35/0x40 + +[david@redhat.com: v3] + Link: http://lkml.kernel.org/r/20191102120221.7553-1-david@redhat.com +Link: http://lkml.kernel.org/r/20191028105458.28320-1-david@redhat.com +Fixes: 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") +Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visiable after d0dc12e86b319 +Signed-off-by: David Hildenbrand +Tested-by: Masayoshi Mizuma +Cc: Tang Chen +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Keith Busch +Cc: Jiri Olsa +Cc: "Peter Zijlstra (Intel)" +Cc: Jani Nikula +Cc: Nayna Jain +Cc: Michal Hocko +Cc: Oscar Salvador +Cc: Stephen Rothwell +Cc: Dan Williams +Cc: Pavel Tatashin +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 36 ++++++++++++++++++++++++++++++++++++ + include/linux/memory.h | 2 ++ + mm/memory_hotplug.c | 47 +++++++++++++++++++++++++++++------------------ + 3 files changed, 67 insertions(+), 18 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -862,3 +862,39 @@ out: + printk(KERN_ERR "%s() failed: %d\n", __func__, ret); + return ret; + } ++ ++struct for_each_memory_block_cb_data { ++ walk_memory_blocks_func_t func; ++ void *arg; ++}; ++ ++static int for_each_memory_block_cb(struct device *dev, void *data) ++{ ++ struct memory_block *mem = to_memory_block(dev); ++ struct for_each_memory_block_cb_data *cb_data = data; ++ ++ return cb_data->func(mem, cb_data->arg); ++} ++ ++/** ++ * for_each_memory_block - walk through all present memory blocks ++ * ++ * @arg: argument passed to func ++ * @func: callback for each memory block walked ++ * ++ * This function walks through all present memory blocks, calling func on ++ * each memory block. ++ * ++ * In case func() returns an error, walking is aborted and the error is ++ * returned. ++ */ ++int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) ++{ ++ struct for_each_memory_block_cb_data cb_data = { ++ .func = func, ++ .arg = arg, ++ }; ++ ++ return bus_for_each_dev(&memory_subsys, NULL, &cb_data, ++ for_each_memory_block_cb); ++} +--- a/include/linux/memory.h ++++ b/include/linux/memory.h +@@ -119,6 +119,8 @@ extern int memory_isolate_notify(unsigne + extern struct memory_block *find_memory_block_hinted(struct mem_section *, + struct memory_block *); + extern struct memory_block *find_memory_block(struct mem_section *); ++typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); ++extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); + #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<nid == nid ? -EEXIST : 0; ++} ++ + /** + * try_offline_node + * @nid: the node ID +@@ -1824,25 +1836,24 @@ static int check_and_unmap_cpu_on_node(p + void try_offline_node(int nid) + { + pg_data_t *pgdat = NODE_DATA(nid); +- unsigned long start_pfn = pgdat->node_start_pfn; +- unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; +- unsigned long pfn; +- +- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { +- unsigned long section_nr = pfn_to_section_nr(pfn); +- +- if (!present_section_nr(section_nr)) +- continue; +- +- if (pfn_to_nid(pfn) != nid) +- continue; +- +- /* +- * some memory sections of this node are not removed, and we +- * can't offline node now. +- */ ++ int rc; ++ ++ /* ++ * If the node still spans pages (especially ZONE_DEVICE), don't ++ * offline it. A node spans memory after move_pfn_range_to_zone(), ++ * e.g., after the memory block was onlined. ++ */ ++ if (pgdat->node_spanned_pages) ++ return; ++ ++ /* ++ * Especially offline memory blocks might not be spanned by the ++ * node. They will get spanned by the node once they get onlined. ++ * However, they link to the node in sysfs and can get onlined later. ++ */ ++ rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb); ++ if (rc) + return; +- } + + if (check_and_unmap_cpu_on_node(pgdat)) + return; diff --git a/queue-4.19/mm-memory_hotplug-make-__remove_pages-and-arch_remove_memory-never-fail.patch b/queue-4.19/mm-memory_hotplug-make-__remove_pages-and-arch_remove_memory-never-fail.patch new file mode 100644 index 00000000000..990c154d87d --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-make-__remove_pages-and-arch_remove_memory-never-fail.patch @@ -0,0 +1,278 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:09 +0100 +Subject: mm/memory_hotplug: make __remove_pages() and arch_remove_memory() never fail +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-13-david@redhat.com> + +From: David Hildenbrand + +commit ac5c94264580f498e484c854031d0226b3c1038f upstream. + +-- snip -- + +Minor conflict in arch/powerpc/mm/mem.c + +-- snip -- + +All callers of arch_remove_memory() ignore errors. And we should really +try to remove any errors from the memory removal path. No more errors are +reported from __remove_pages(). BUG() in s390x code in case +arch_remove_memory() is triggered. We may implement that properly later. +WARN in case powerpc code failed to remove the section mapping, which is +better than ignoring the error completely right now. + +Link: http://lkml.kernel.org/r/20190409100148.24703-5-david@redhat.com +Signed-off-by: David Hildenbrand +Cc: Tony Luck +Cc: Fenghua Yu +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: Yoshinori Sato +Cc: Rich Felker +Cc: Dave Hansen +Cc: Andy Lutomirski +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Oscar Salvador +Cc: "Kirill A. Shutemov" +Cc: Christophe Leroy +Cc: Stefan Agner +Cc: Nicholas Piggin +Cc: Pavel Tatashin +Cc: Vasily Gorbik +Cc: Arun KS +Cc: Geert Uytterhoeven +Cc: Masahiro Yamada +Cc: Rob Herring +Cc: Joonsoo Kim +Cc: Wei Yang +Cc: Qian Cai +Cc: Mathieu Malaterre +Cc: Andrew Banman +Cc: Greg Kroah-Hartman +Cc: Ingo Molnar +Cc: Mike Travis +Cc: Oscar Salvador +Cc: "Rafael J. Wysocki" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/mm/init.c | 11 +++-------- + arch/powerpc/mm/mem.c | 9 +++------ + arch/s390/mm/init.c | 5 +++-- + arch/sh/mm/init.c | 11 +++-------- + arch/x86/mm/init_32.c | 5 +++-- + arch/x86/mm/init_64.c | 10 +++------- + include/linux/memory_hotplug.h | 8 ++++---- + mm/memory_hotplug.c | 5 ++--- + 8 files changed, 24 insertions(+), 40 deletions(-) + +--- a/arch/ia64/mm/init.c ++++ b/arch/ia64/mm/init.c +@@ -662,20 +662,15 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) ++void arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; +- int ret; + + zone = page_zone(pfn_to_page(start_pfn)); +- ret = __remove_pages(zone, start_pfn, nr_pages, altmap); +- if (ret) +- pr_warn("%s: Problem encountered in __remove_pages() as" +- " ret=%d\n", __func__, ret); +- +- return ret; ++ __remove_pages(zone, start_pfn, nr_pages, altmap); + } + #endif + #endif +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -140,7 +140,7 @@ int __ref arch_add_memory(int nid, u64 s + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int __ref arch_remove_memory(int nid, u64 start, u64 size, ++void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; +@@ -156,14 +156,13 @@ int __ref arch_remove_memory(int nid, u6 + if (altmap) + page += vmem_altmap_offset(altmap); + +- ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); +- if (ret) +- return ret; ++ __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + flush_inval_dcache_range(start, start + size); + ret = remove_section_mapping(start, start + size); ++ WARN_ON_ONCE(ret); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory +@@ -171,8 +170,6 @@ int __ref arch_remove_memory(int nid, u6 + vm_unmap_aliases(); + + resize_hpt_for_hotplug(memblock_phys_mem_size()); +- +- return ret; + } + #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -240,14 +240,15 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) ++void arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + /* + * There is no hardware or firmware interface which could trigger a + * hot memory remove on s390. So there is nothing that needs to be + * implemented. + */ +- return -EBUSY; ++ BUG(); + } + #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/sh/mm/init.c ++++ b/arch/sh/mm/init.c +@@ -444,20 +444,15 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to + #endif + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) ++void arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = PFN_DOWN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; +- int ret; + + zone = page_zone(pfn_to_page(start_pfn)); +- ret = __remove_pages(zone, start_pfn, nr_pages, altmap); +- if (unlikely(ret)) +- pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, +- ret); +- +- return ret; ++ __remove_pages(zone, start_pfn, nr_pages, altmap); + } + #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -861,14 +861,15 @@ int arch_add_memory(int nid, u64 start, + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) ++void arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); +- return __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(zone, start_pfn, nr_pages, altmap); + } + #endif + #endif +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1142,24 +1142,20 @@ kernel_physical_mapping_remove(unsigned + remove_pagetable(start, end, true, NULL); + } + +-int __ref arch_remove_memory(int nid, u64 start, u64 size, +- struct vmem_altmap *altmap) ++void __ref arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct zone *zone; +- int ret; + + /* With altmap the first mapped page is offset from @start */ + if (altmap) + page += vmem_altmap_offset(altmap); + zone = page_zone(page); +- ret = __remove_pages(zone, start_pfn, nr_pages, altmap); +- WARN_ON_ONCE(ret); ++ __remove_pages(zone, start_pfn, nr_pages, altmap); + kernel_physical_mapping_remove(start, start + size); +- +- return ret; + } + #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -109,10 +109,10 @@ static inline bool movable_node_is_enabl + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-extern int arch_remove_memory(int nid, u64 start, u64 size, +- struct vmem_altmap *altmap); +-extern int __remove_pages(struct zone *zone, unsigned long start_pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap); ++extern void arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap); ++extern void __remove_pages(struct zone *zone, unsigned long start_pfn, ++ unsigned long nr_pages, struct vmem_altmap *altmap); + #endif /* CONFIG_MEMORY_HOTREMOVE */ + + /* reasonably generic interface to expand the physical pages */ +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -509,8 +509,8 @@ static void __remove_section(struct zone + * sure that pages are marked reserved and zones are adjust properly by + * calling offline_pages(). + */ +-int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap) ++void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, ++ unsigned long nr_pages, struct vmem_altmap *altmap) + { + unsigned long i; + unsigned long map_offset = 0; +@@ -541,7 +541,6 @@ int __remove_pages(struct zone *zone, un + } + + set_zone_contiguous(zone); +- return 0; + } + #endif /* CONFIG_MEMORY_HOTREMOVE */ + diff --git a/queue-4.19/mm-memory_hotplug-make-__remove_section-never-fail.patch b/queue-4.19/mm-memory_hotplug-make-__remove_section-never-fail.patch new file mode 100644 index 00000000000..5ed9ad2fe23 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-make-__remove_section-never-fail.patch @@ -0,0 +1,126 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:07 +0100 +Subject: mm/memory_hotplug: make __remove_section() never fail +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-11-david@redhat.com> + +From: David Hildenbrand + +commit 9d1d887d785b4fe0590bd3c5e71acaa3908044e2 upstream. + +Let's just warn in case a section is not valid instead of failing to +remove somewhere in the middle of the process, returning an error that +will be mostly ignored by callers. + +Link: http://lkml.kernel.org/r/20190409100148.24703-4-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Oscar Salvador +Cc: Michal Hocko +Cc: David Hildenbrand +Cc: Pavel Tatashin +Cc: Qian Cai +Cc: Wei Yang +Cc: Arun KS +Cc: Mathieu Malaterre +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Geert Uytterhoeven +Cc: Greg Kroah-Hartman +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Ingo Molnar +Cc: Joonsoo Kim +Cc: "Kirill A. Shutemov" +Cc: Martin Schwidefsky +Cc: Masahiro Yamada +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: Mike Travis +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Peter Zijlstra +Cc: "Rafael J. Wysocki" +Cc: Rich Felker +Cc: Rob Herring +Cc: Stefan Agner +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Yoshinori Sato +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory_hotplug.c | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -478,15 +478,15 @@ static void __remove_zone(struct zone *z + pgdat_resize_unlock(zone->zone_pgdat, &flags); + } + +-static int __remove_section(struct zone *zone, struct mem_section *ms, +- unsigned long map_offset, struct vmem_altmap *altmap) ++static void __remove_section(struct zone *zone, struct mem_section *ms, ++ unsigned long map_offset, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn; + int scn_nr; +- int ret = -EINVAL; + +- if (!valid_section(ms)) +- return ret; ++ if (WARN_ON_ONCE(!valid_section(ms))) ++ return; + + unregister_memory_section(ms); + +@@ -495,7 +495,6 @@ static int __remove_section(struct zone + __remove_zone(zone, start_pfn); + + sparse_remove_one_section(zone, ms, map_offset, altmap); +- return 0; + } + + /** +@@ -515,7 +514,7 @@ int __remove_pages(struct zone *zone, un + { + unsigned long i; + unsigned long map_offset = 0; +- int sections_to_remove, ret = 0; ++ int sections_to_remove; + + /* In the ZONE_DEVICE case device driver owns the memory region */ + if (is_dev_zone(zone)) { +@@ -536,16 +535,13 @@ int __remove_pages(struct zone *zone, un + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + + cond_resched(); +- ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, +- altmap); ++ __remove_section(zone, __pfn_to_section(pfn), map_offset, ++ altmap); + map_offset = 0; +- if (ret) +- break; + } + + set_zone_contiguous(zone); +- +- return ret; ++ return 0; + } + #endif /* CONFIG_MEMORY_HOTREMOVE */ + diff --git a/queue-4.19/mm-memory_hotplug-make-remove_memory-take-the-device_hotplug_lock.patch b/queue-4.19/mm-memory_hotplug-make-remove_memory-take-the-device_hotplug_lock.patch new file mode 100644 index 00000000000..7be0687db5d --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-make-remove_memory-take-the-device_hotplug_lock.patch @@ -0,0 +1,216 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:49:58 +0100 +Subject: mm/memory_hotplug: make remove_memory() take the device_hotplug_lock +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-2-david@redhat.com> + +From: David Hildenbrand + +commit d15e59260f62bd5e0f625cf5f5240f6ffac78ab6 upstream. + +Patch series "mm: online/offline_pages called w.o. mem_hotplug_lock", v3. + +Reading through the code and studying how mem_hotplug_lock is to be used, +I noticed that there are two places where we can end up calling +device_online()/device_offline() - online_pages()/offline_pages() without +the mem_hotplug_lock. And there are other places where we call +device_online()/device_offline() without the device_hotplug_lock. + +While e.g. + echo "online" > /sys/devices/system/memory/memory9/state +is fine, e.g. + echo 1 > /sys/devices/system/memory/memory9/online +Will not take the mem_hotplug_lock. However the device_lock() and +device_hotplug_lock. + +E.g. via memory_probe_store(), we can end up calling +add_memory()->online_pages() without the device_hotplug_lock. So we can +have concurrent callers in online_pages(). We e.g. touch in +online_pages() basically unprotected zone->present_pages then. + +Looks like there is a longer history to that (see Patch #2 for details), +and fixing it to work the way it was intended is not really possible. We +would e.g. have to take the mem_hotplug_lock in device/base/core.c, which +sounds wrong. + +Summary: We had a lock inversion on mem_hotplug_lock and device_lock(). +More details can be found in patch 3 and patch 6. + +I propose the general rules (documentation added in patch 6): + +1. add_memory/add_memory_resource() must only be called with + device_hotplug_lock. +2. remove_memory() must only be called with device_hotplug_lock. This is + already documented and holds for all callers. +3. device_online()/device_offline() must only be called with + device_hotplug_lock. This is already documented and true for now in core + code. Other callers (related to memory hotplug) have to be fixed up. +4. mem_hotplug_lock is taken inside of add_memory/remove_memory/ + online_pages/offline_pages. + +To me, this looks way cleaner than what we have right now (and easier to +verify). And looking at the documentation of remove_memory, using +lock_device_hotplug also for add_memory() feels natural. + +This patch (of 6): + +remove_memory() is exported right now but requires the +device_hotplug_lock, which is not exported. So let's provide a variant +that takes the lock and only export that one. + +The lock is already held in + arch/powerpc/platforms/pseries/hotplug-memory.c + drivers/acpi/acpi_memhotplug.c + arch/powerpc/platforms/powernv/memtrace.c + +Apart from that, there are not other users in the tree. + +Link: http://lkml.kernel.org/r/20180925091457.28651-2-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Pavel Tatashin +Reviewed-by: Rafael J. Wysocki +Reviewed-by: Rashmica Gupta +Reviewed-by: Oscar Salvador +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: "Rafael J. Wysocki" +Cc: Len Brown +Cc: Rashmica Gupta +Cc: Michael Neuling +Cc: Balbir Singh +Cc: Nathan Fontenot +Cc: John Allen +Cc: Michal Hocko +Cc: Dan Williams +Cc: Joonsoo Kim +Cc: Vlastimil Babka +Cc: Greg Kroah-Hartman +Cc: YASUAKI ISHIMATSU +Cc: Mathieu Malaterre +Cc: Boris Ostrovsky +Cc: Haiyang Zhang +Cc: Heiko Carstens +Cc: Jonathan Corbet +Cc: Juergen Gross +Cc: Kate Stewart +Cc: "K. Y. Srinivasan" +Cc: Martin Schwidefsky +Cc: Philippe Ombredanne +Cc: Stephen Hemminger +Cc: Thomas Gleixner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/platforms/powernv/memtrace.c | 2 +- + arch/powerpc/platforms/pseries/hotplug-memory.c | 6 +++--- + drivers/acpi/acpi_memhotplug.c | 2 +- + include/linux/memory_hotplug.h | 3 ++- + mm/memory_hotplug.c | 9 ++++++++- + 5 files changed, 15 insertions(+), 7 deletions(-) + +--- a/arch/powerpc/platforms/powernv/memtrace.c ++++ b/arch/powerpc/platforms/powernv/memtrace.c +@@ -122,7 +122,7 @@ static u64 memtrace_alloc_node(u32 nid, + */ + end_pfn = base_pfn + nr_pages; + for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { +- remove_memory(nid, pfn << PAGE_SHIFT, bytes); ++ __remove_memory(nid, pfn << PAGE_SHIFT, bytes); + } + unlock_device_hotplug(); + return base_pfn << PAGE_SHIFT; +--- a/arch/powerpc/platforms/pseries/hotplug-memory.c ++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c +@@ -301,7 +301,7 @@ static int pseries_remove_memblock(unsig + nid = memory_add_physaddr_to_nid(base); + + for (i = 0; i < sections_per_block; i++) { +- remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); ++ __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); + base += MIN_MEMORY_BLOCK_SIZE; + } + +@@ -393,7 +393,7 @@ static int dlpar_remove_lmb(struct drmem + block_sz = pseries_memory_block_size(); + nid = memory_add_physaddr_to_nid(lmb->base_addr); + +- remove_memory(nid, lmb->base_addr, block_sz); ++ __remove_memory(nid, lmb->base_addr, block_sz); + + /* Update memory regions for memory remove */ + memblock_remove(lmb->base_addr, block_sz); +@@ -680,7 +680,7 @@ static int dlpar_add_lmb(struct drmem_lm + + rc = dlpar_online_lmb(lmb); + if (rc) { +- remove_memory(nid, lmb->base_addr, block_sz); ++ __remove_memory(nid, lmb->base_addr, block_sz); + invalidate_lmb_associativity_index(lmb); + } else { + lmb->flags |= DRCONF_MEM_ASSIGNED; +--- a/drivers/acpi/acpi_memhotplug.c ++++ b/drivers/acpi/acpi_memhotplug.c +@@ -282,7 +282,7 @@ static void acpi_memory_remove_memory(st + nid = memory_add_physaddr_to_nid(info->start_addr); + + acpi_unbind_memory_blocks(info); +- remove_memory(nid, info->start_addr, info->length); ++ __remove_memory(nid, info->start_addr, info->length); + list_del(&info->list); + kfree(info); + } +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -303,6 +303,7 @@ extern bool is_mem_section_removable(uns + extern void try_offline_node(int nid); + extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); + extern void remove_memory(int nid, u64 start, u64 size); ++extern void __remove_memory(int nid, u64 start, u64 size); + + #else + static inline bool is_mem_section_removable(unsigned long pfn, +@@ -319,6 +320,7 @@ static inline int offline_pages(unsigned + } + + static inline void remove_memory(int nid, u64 start, u64 size) {} ++static inline void __remove_memory(int nid, u64 start, u64 size) {} + #endif /* CONFIG_MEMORY_HOTREMOVE */ + + extern void __ref free_area_init_core_hotplug(int nid); +@@ -333,7 +335,6 @@ extern void move_pfn_range_to_zone(struc + unsigned long nr_pages, struct vmem_altmap *altmap); + extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); + extern bool is_memblock_offlined(struct memory_block *mem); +-extern void remove_memory(int nid, u64 start, u64 size); + extern int sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap); + extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1893,7 +1893,7 @@ EXPORT_SYMBOL(try_offline_node); + * and online/offline operations before this call, as required by + * try_offline_node(). + */ +-void __ref remove_memory(int nid, u64 start, u64 size) ++void __ref __remove_memory(int nid, u64 start, u64 size) + { + int ret; + +@@ -1922,5 +1922,12 @@ void __ref remove_memory(int nid, u64 st + + mem_hotplug_done(); + } ++ ++void remove_memory(int nid, u64 start, u64 size) ++{ ++ lock_device_hotplug(); ++ __remove_memory(nid, start, size); ++ unlock_device_hotplug(); ++} + EXPORT_SYMBOL_GPL(remove_memory); + #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/queue-4.19/mm-memory_hotplug-make-unregister_memory_block_under_nodes-never-fail.patch b/queue-4.19/mm-memory_hotplug-make-unregister_memory_block_under_nodes-never-fail.patch new file mode 100644 index 00000000000..f9c9fcb1db0 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-make-unregister_memory_block_under_nodes-never-fail.patch @@ -0,0 +1,155 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:15 +0100 +Subject: mm/memory_hotplug: make unregister_memory_block_under_nodes() never fail +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-19-david@redhat.com> + +From: David Hildenbrand + +commit a31b264c2b415b29660da0bc2ba291a98629ce51 upstream. + +We really don't want anything during memory hotunplug to fail. We +always pass a valid memory block device, that check can go. Avoid +allocating memory and eventually failing. As we are always called under +lock, we can use a static piece of memory. This avoids having to put +the structure onto the stack, having to guess about the stack size of +callers. + +Patch inspired by a patch from Oscar Salvador. + +In the future, there might be no need to iterate over nodes at all. +mem->nid should tell us exactly what to remove. Memory block devices +with mixed nodes (added during boot) should properly fenced off and +never removed. + +Link: http://lkml.kernel.org/r/20190527111152.16324-11-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Wei Yang +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Alex Deucher +Cc: "David S. Miller" +Cc: Mark Brown +Cc: Chris Wilson +Cc: David Hildenbrand +Cc: Jonathan Cameron +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Arun KS +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Dan Williams +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Mathieu Malaterre +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: "mike.travis@hpe.com" +Cc: Nicholas Piggin +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Qian Cai +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/node.c | 18 +++++------------- + include/linux/node.h | 5 ++--- + 2 files changed, 7 insertions(+), 16 deletions(-) + +--- a/drivers/base/node.c ++++ b/drivers/base/node.c +@@ -455,20 +455,14 @@ int register_mem_sect_under_node(struct + + /* + * Unregister memory block device under all nodes that it spans. ++ * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). + */ +-int unregister_memory_block_under_nodes(struct memory_block *mem_blk) ++void unregister_memory_block_under_nodes(struct memory_block *mem_blk) + { +- NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); + unsigned long pfn, sect_start_pfn, sect_end_pfn; ++ static nodemask_t unlinked_nodes; + +- if (!mem_blk) { +- NODEMASK_FREE(unlinked_nodes); +- return -EFAULT; +- } +- if (!unlinked_nodes) +- return -ENOMEM; +- nodes_clear(*unlinked_nodes); +- ++ nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { +@@ -479,15 +473,13 @@ int unregister_memory_block_under_nodes( + continue; + if (!node_online(nid)) + continue; +- if (node_test_and_set(nid, *unlinked_nodes)) ++ if (node_test_and_set(nid, unlinked_nodes)) + continue; + sysfs_remove_link(&node_devices[nid]->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + sysfs_remove_link(&mem_blk->dev.kobj, + kobject_name(&node_devices[nid]->dev.kobj)); + } +- NODEMASK_FREE(unlinked_nodes); +- return 0; + } + + int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) +--- a/include/linux/node.h ++++ b/include/linux/node.h +@@ -72,7 +72,7 @@ extern int register_cpu_under_node(unsig + extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); + extern int register_mem_sect_under_node(struct memory_block *mem_blk, + void *arg); +-extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk); ++extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); + + #ifdef CONFIG_HUGETLBFS + extern void register_hugetlbfs_with_node(node_registration_func_t doregister, +@@ -104,9 +104,8 @@ static inline int register_mem_sect_unde + { + return 0; + } +-static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) ++static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) + { +- return 0; + } + + static inline void register_hugetlbfs_with_node(node_registration_func_t reg, diff --git a/queue-4.19/mm-memory_hotplug-make-unregister_memory_section-never-fail.patch b/queue-4.19/mm-memory_hotplug-make-unregister_memory_section-never-fail.patch new file mode 100644 index 00000000000..7824da0b778 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-make-unregister_memory_section-never-fail.patch @@ -0,0 +1,131 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:06 +0100 +Subject: mm/memory_hotplug: make unregister_memory_section() never fail +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-10-david@redhat.com> + +From: David Hildenbrand + +commit cb7b3a3685b20d3b5900ff24b2cb96d002960189 upstream. + +Failing while removing memory is mostly ignored and cannot really be +handled. Let's treat errors in unregister_memory_section() in a nice way, +warning, but continuing. + +Link: http://lkml.kernel.org/r/20190409100148.24703-3-david@redhat.com +Signed-off-by: David Hildenbrand +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: Ingo Molnar +Cc: Andrew Banman +Cc: Mike Travis +Cc: David Hildenbrand +Cc: Oscar Salvador +Cc: Michal Hocko +Cc: Pavel Tatashin +Cc: Qian Cai +Cc: Wei Yang +Cc: Arun KS +Cc: Mathieu Malaterre +Cc: Andy Lutomirski +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Geert Uytterhoeven +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Joonsoo Kim +Cc: "Kirill A. Shutemov" +Cc: Martin Schwidefsky +Cc: Masahiro Yamada +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Peter Zijlstra +Cc: Rich Felker +Cc: Rob Herring +Cc: Stefan Agner +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Yoshinori Sato +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 16 +++++----------- + include/linux/memory.h | 2 +- + mm/memory_hotplug.c | 4 +--- + 3 files changed, 7 insertions(+), 15 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -743,15 +743,18 @@ unregister_memory(struct memory_block *m + { + BUG_ON(memory->dev.bus != &memory_subsys); + +- /* drop the ref. we got in remove_memory_section() */ ++ /* drop the ref. we got via find_memory_block() */ + put_device(&memory->dev); + device_unregister(&memory->dev); + } + +-static int remove_memory_section(struct mem_section *section) ++void unregister_memory_section(struct mem_section *section) + { + struct memory_block *mem; + ++ if (WARN_ON_ONCE(!present_section(section))) ++ return; ++ + mutex_lock(&mem_sysfs_mutex); + + /* +@@ -772,15 +775,6 @@ static int remove_memory_section(struct + + out_unlock: + mutex_unlock(&mem_sysfs_mutex); +- return 0; +-} +- +-int unregister_memory_section(struct mem_section *section) +-{ +- if (!present_section(section)) +- return -EINVAL; +- +- return remove_memory_section(section); + } + #endif /* CONFIG_MEMORY_HOTREMOVE */ + +--- a/include/linux/memory.h ++++ b/include/linux/memory.h +@@ -113,7 +113,7 @@ extern int register_memory_isolate_notif + extern void unregister_memory_isolate_notifier(struct notifier_block *nb); + int hotplug_memory_register(int nid, struct mem_section *section); + #ifdef CONFIG_MEMORY_HOTREMOVE +-extern int unregister_memory_section(struct mem_section *); ++extern void unregister_memory_section(struct mem_section *); + #endif + extern int memory_dev_init(void); + extern int memory_notify(unsigned long val, void *v); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -488,9 +488,7 @@ static int __remove_section(struct zone + if (!valid_section(ms)) + return ret; + +- ret = unregister_memory_section(ms); +- if (ret) +- return ret; ++ unregister_memory_section(ms); + + scn_nr = __section_nr(ms); + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); diff --git a/queue-4.19/mm-memory_hotplug-release-memory-resource-after-arch_remove_memory.patch b/queue-4.19/mm-memory_hotplug-release-memory-resource-after-arch_remove_memory.patch new file mode 100644 index 00000000000..c1f5c8e2509 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-release-memory-resource-after-arch_remove_memory.patch @@ -0,0 +1,179 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:03 +0100 +Subject: mm/memory_hotplug: release memory resource after arch_remove_memory() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-7-david@redhat.com> + +From: David Hildenbrand + +commit d9eb1417c77df7ce19abd2e41619e9dceccbdf2a upstream. + +Patch series "mm/memory_hotplug: Better error handling when removing +memory", v1. + +Error handling when removing memory is somewhat messed up right now. Some +errors result in warnings, others are completely ignored. Memory unplug +code can essentially not deal with errors properly as of now. +remove_memory() will never fail. + +We have basically two choices: +1. Allow arch_remov_memory() and friends to fail, propagating errors via + remove_memory(). Might be problematic (e.g. DIMMs consisting of multiple + pieces added/removed separately). +2. Don't allow the functions to fail, handling errors in a nicer way. + +It seems like most errors that can theoretically happen are really corner +cases and mostly theoretical (e.g. "section not valid"). However e.g. +aborting removal of sections while all callers simply continue in case of +errors is not nice. + +If we can gurantee that removal of memory always works (and WARN/skip in +case of theoretical errors so we can figure out what is going on), we can +go ahead and implement better error handling when adding memory. + +E.g. via add_memory(): + +arch_add_memory() +ret = do_stuff() +if (ret) { + arch_remove_memory(); + goto error; +} + +Handling here that arch_remove_memory() might fail is basically +impossible. So I suggest, let's avoid reporting errors while removing +memory, warning on theoretical errors instead and continuing instead of +aborting. + +This patch (of 4): + +__add_pages() doesn't add the memory resource, so __remove_pages() +shouldn't remove it. Let's factor it out. Especially as it is a special +case for memory used as system memory, added via add_memory() and friends. + +We now remove the resource after removing the sections instead of doing it +the other way around. I don't think this change is problematic. + +add_memory() + register memory resource + arch_add_memory() + +remove_memory + arch_remove_memory() + release memory resource + +While at it, explain why we ignore errors and that it only happeny if +we remove memory in a different granularity as we added it. + +[david@redhat.com: fix printk warning] + Link: http://lkml.kernel.org/r/20190417120204.6997-1-david@redhat.com +Link: http://lkml.kernel.org/r/20190409100148.24703-2-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Oscar Salvador +Cc: Michal Hocko +Cc: David Hildenbrand +Cc: Pavel Tatashin +Cc: Wei Yang +Cc: Qian Cai +Cc: Arun KS +Cc: Mathieu Malaterre +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Geert Uytterhoeven +Cc: Greg Kroah-Hartman +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Ingo Molnar +Cc: Joonsoo Kim +Cc: "Kirill A. Shutemov" +Cc: Martin Schwidefsky +Cc: Masahiro Yamada +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: Mike Travis +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Peter Zijlstra +Cc: "Rafael J. Wysocki" +Cc: Rich Felker +Cc: Rob Herring +Cc: Stefan Agner +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Yoshinori Sato +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory_hotplug.c | 35 +++++++++++++++++++++-------------- + 1 file changed, 21 insertions(+), 14 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -523,20 +523,6 @@ int __remove_pages(struct zone *zone, un + if (is_dev_zone(zone)) { + if (altmap) + map_offset = vmem_altmap_offset(altmap); +- } else { +- resource_size_t start, size; +- +- start = phys_start_pfn << PAGE_SHIFT; +- size = nr_pages * PAGE_SIZE; +- +- ret = release_mem_region_adjustable(&iomem_resource, start, +- size); +- if (ret) { +- resource_size_t endres = start + size - 1; +- +- pr_warn("Unable to release resource <%pa-%pa> (%d)\n", +- &start, &endres, ret); +- } + } + + clear_zone_contiguous(zone); +@@ -1883,6 +1869,26 @@ void try_offline_node(int nid) + } + EXPORT_SYMBOL(try_offline_node); + ++static void __release_memory_resource(resource_size_t start, ++ resource_size_t size) ++{ ++ int ret; ++ ++ /* ++ * When removing memory in the same granularity as it was added, ++ * this function never fails. It might only fail if resources ++ * have to be adjusted or split. We'll ignore the error, as ++ * removing of memory cannot fail. ++ */ ++ ret = release_mem_region_adjustable(&iomem_resource, start, size); ++ if (ret) { ++ resource_size_t endres = start + size - 1; ++ ++ pr_warn("Unable to release resource <%pa-%pa> (%d)\n", ++ &start, &endres, ret); ++ } ++} ++ + /** + * remove_memory + * @nid: the node ID +@@ -1917,6 +1923,7 @@ void __ref __remove_memory(int nid, u64 + memblock_remove(start, size); + + arch_remove_memory(nid, start, size, NULL); ++ __release_memory_resource(start, size); + + try_offline_node(nid); + diff --git a/queue-4.19/mm-memory_hotplug-remove-memory-block-devices-before-arch_remove_memory.patch b/queue-4.19/mm-memory_hotplug-remove-memory-block-devices-before-arch_remove_memory.patch new file mode 100644 index 00000000000..d754dd73b24 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-remove-memory-block-devices-before-arch_remove_memory.patch @@ -0,0 +1,221 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:14 +0100 +Subject: mm/memory_hotplug: remove memory block devices before arch_remove_memory() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-18-david@redhat.com> + +From: David Hildenbrand + +commit 4c4b7f9ba9486c565aead99a198ceeef73ae81f6 upstream. + +Let's factor out removing of memory block devices, which is only +necessary for memory added via add_memory() and friends that created +memory block devices. Remove the devices before calling +arch_remove_memory(). + +This finishes factoring out memory block device handling from +arch_add_memory() and arch_remove_memory(). + +Link: http://lkml.kernel.org/r/20190527111152.16324-10-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Dan Williams +Acked-by: Michal Hocko +Cc: Greg Kroah-Hartman +Cc: "Rafael J. Wysocki" +Cc: David Hildenbrand +Cc: "mike.travis@hpe.com" +Cc: Andrew Banman +Cc: Ingo Molnar +Cc: Alex Deucher +Cc: "David S. Miller" +Cc: Mark Brown +Cc: Chris Wilson +Cc: Oscar Salvador +Cc: Jonathan Cameron +Cc: Arun KS +Cc: Mathieu Malaterre +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Qian Cai +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Wei Yang +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 37 ++++++++++++++++++------------------- + drivers/base/node.c | 11 ++++++----- + include/linux/memory.h | 2 +- + include/linux/node.h | 6 ++---- + mm/memory_hotplug.c | 5 +++-- + 5 files changed, 30 insertions(+), 31 deletions(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -768,32 +768,31 @@ int create_memory_block_devices(unsigned + return ret; + } + +-void unregister_memory_section(struct mem_section *section) ++/* ++ * Remove memory block devices for the given memory area. Start and size ++ * have to be aligned to memory block granularity. Memory block devices ++ * have to be offline. ++ */ ++void remove_memory_block_devices(unsigned long start, unsigned long size) + { ++ const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); ++ const int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + struct memory_block *mem; ++ int block_id; + +- if (WARN_ON_ONCE(!present_section(section))) ++ if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || ++ !IS_ALIGNED(size, memory_block_size_bytes()))) + return; + + mutex_lock(&mem_sysfs_mutex); +- +- /* +- * Some users of the memory hotplug do not want/need memblock to +- * track all sections. Skip over those. +- */ +- mem = find_memory_block(section); +- if (!mem) +- goto out_unlock; +- +- unregister_mem_sect_under_nodes(mem, __section_nr(section)); +- +- mem->section_count--; +- if (mem->section_count == 0) ++ for (block_id = start_block_id; block_id != end_block_id; block_id++) { ++ mem = find_memory_block_by_id(block_id, NULL); ++ if (WARN_ON_ONCE(!mem)) ++ continue; ++ mem->section_count = 0; ++ unregister_memory_block_under_nodes(mem); + unregister_memory(mem); +- else +- put_device(&mem->dev); +- +-out_unlock: ++ } + mutex_unlock(&mem_sysfs_mutex); + } + +--- a/drivers/base/node.c ++++ b/drivers/base/node.c +@@ -453,9 +453,10 @@ int register_mem_sect_under_node(struct + return 0; + } + +-/* unregister memory section under all nodes that it spans */ +-int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, +- unsigned long phys_index) ++/* ++ * Unregister memory block device under all nodes that it spans. ++ */ ++int unregister_memory_block_under_nodes(struct memory_block *mem_blk) + { + NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); + unsigned long pfn, sect_start_pfn, sect_end_pfn; +@@ -468,8 +469,8 @@ int unregister_mem_sect_under_nodes(stru + return -ENOMEM; + nodes_clear(*unlinked_nodes); + +- sect_start_pfn = section_nr_to_pfn(phys_index); +- sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; ++ sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); ++ sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + int nid; + +--- a/include/linux/memory.h ++++ b/include/linux/memory.h +@@ -112,7 +112,7 @@ extern void unregister_memory_notifier(s + extern int register_memory_isolate_notifier(struct notifier_block *nb); + extern void unregister_memory_isolate_notifier(struct notifier_block *nb); + int create_memory_block_devices(unsigned long start, unsigned long size); +-extern void unregister_memory_section(struct mem_section *); ++void remove_memory_block_devices(unsigned long start, unsigned long size); + extern int memory_dev_init(void); + extern int memory_notify(unsigned long val, void *v); + extern int memory_isolate_notify(unsigned long val, void *v); +--- a/include/linux/node.h ++++ b/include/linux/node.h +@@ -72,8 +72,7 @@ extern int register_cpu_under_node(unsig + extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); + extern int register_mem_sect_under_node(struct memory_block *mem_blk, + void *arg); +-extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, +- unsigned long phys_index); ++extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk); + + #ifdef CONFIG_HUGETLBFS + extern void register_hugetlbfs_with_node(node_registration_func_t doregister, +@@ -105,8 +104,7 @@ static inline int register_mem_sect_unde + { + return 0; + } +-static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, +- unsigned long phys_index) ++static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) + { + return 0; + } +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -481,8 +481,6 @@ static void __remove_section(struct zone + if (WARN_ON_ONCE(!valid_section(ms))) + return; + +- unregister_memory_section(ms); +- + scn_nr = __section_nr(ms); + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); + __remove_zone(zone, start_pfn); +@@ -1914,6 +1912,9 @@ void __ref __remove_memory(int nid, u64 + memblock_free(start, size); + memblock_remove(start, size); + ++ /* remove memory block devices before removing memory */ ++ remove_memory_block_devices(start, size); ++ + arch_remove_memory(nid, start, size, NULL); + __release_memory_resource(start, size); + diff --git a/queue-4.19/mm-memory_hotplug-remove-zone-parameter-from-sparse_remove_one_section.patch b/queue-4.19/mm-memory_hotplug-remove-zone-parameter-from-sparse_remove_one_section.patch new file mode 100644 index 00000000000..faa1714d790 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-remove-zone-parameter-from-sparse_remove_one_section.patch @@ -0,0 +1,114 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:16 +0100 +Subject: mm/memory_hotplug: remove "zone" parameter from sparse_remove_one_section +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-20-david@redhat.com> + +From: David Hildenbrand + +commit b9bf8d342d9b443c0d19aa57883d8ddb38d965de upstream. + +The parameter is unused, so let's drop it. Memory removal paths should +never care about zones. This is the job of memory offlining and will +require more refactorings. + +Link: http://lkml.kernel.org/r/20190527111152.16324-12-david@redhat.com +Signed-off-by: David Hildenbrand +Reviewed-by: Dan Williams +Reviewed-by: Wei Yang +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Cc: Alex Deucher +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Arun KS +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Chris Wilson +Cc: Dave Hansen +Cc: "David S. Miller" +Cc: Fenghua Yu +Cc: Greg Kroah-Hartman +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jonathan Cameron +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Brown +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Mathieu Malaterre +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: "mike.travis@hpe.com" +Cc: Nicholas Piggin +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Qian Cai +Cc: "Rafael J. Wysocki" +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memory_hotplug.h | 2 +- + mm/memory_hotplug.c | 2 +- + mm/sparse.c | 4 ++-- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -335,7 +335,7 @@ extern int offline_pages(unsigned long s + extern bool is_memblock_offlined(struct memory_block *mem); + extern int sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap); +-extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, ++extern void sparse_remove_one_section(struct mem_section *ms, + unsigned long map_offset, struct vmem_altmap *altmap); + extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, + unsigned long pnum); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -485,7 +485,7 @@ static void __remove_section(struct zone + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); + __remove_zone(zone, start_pfn); + +- sparse_remove_one_section(zone, ms, map_offset, altmap); ++ sparse_remove_one_section(ms, map_offset, altmap); + } + + /** +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -757,8 +757,8 @@ static void free_section_usemap(struct p + free_map_bootmem(memmap); + } + +-void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +- unsigned long map_offset, struct vmem_altmap *altmap) ++void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, ++ struct vmem_altmap *altmap) + { + struct page *memmap = NULL; + unsigned long *usemap = NULL; diff --git a/queue-4.19/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch b/queue-4.19/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch new file mode 100644 index 00000000000..62bf3d17654 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch @@ -0,0 +1,360 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:21 +0100 +Subject: mm/memory_hotplug: shrink zones when offlining memory +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-25-david@redhat.com> + +From: David Hildenbrand + +commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream. + +-- snip -- + +- Missing arm64 hot(un)plug support +- Missing some vmem_altmap_offset() cleanups +- Missing sub-section hotadd support +- Missing unification of mm/hmm.c and kernel/memremap.c + +-- snip -- + +We currently try to shrink a single zone when removing memory. We use +the zone of the first page of the memory we are removing. If that +memmap was never initialized (e.g., memory was never onlined), we will +read garbage and can trigger kernel BUGs (due to a stale pointer): + + BUG: unable to handle page fault for address: 000000000000353d + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD 0 P4D 0 + Oops: 0002 [#1] SMP PTI + CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 + Workqueue: kacpi_hotplug acpi_hotplug_work_fn + RIP: 0010:clear_zone_contiguous+0x5/0x10 + Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840 + RSP: 0018:ffffad2400043c98 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000 + RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40 + RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001 + R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 + R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680 + FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + __remove_pages+0x4b/0x640 + arch_remove_memory+0x63/0x8d + try_remove_memory+0xdb/0x130 + __remove_memory+0xa/0x11 + acpi_memory_device_remove+0x70/0x100 + acpi_bus_trim+0x55/0x90 + acpi_device_hotplug+0x227/0x3a0 + acpi_hotplug_work_fn+0x1a/0x30 + process_one_work+0x221/0x550 + worker_thread+0x50/0x3b0 + kthread+0x105/0x140 + ret_from_fork+0x3a/0x50 + Modules linked in: + CR2: 000000000000353d + +Instead, shrink the zones when offlining memory or when onlining failed. +Introduce and use remove_pfn_range_from_zone(() for that. We now +properly shrink the zones, even if we have DIMMs whereby + + - Some memory blocks fall into no zone (never onlined) + + - Some memory blocks fall into multiple zones (offlined+re-onlined) + + - Multiple memory blocks that fall into different zones + +Drop the zone parameter (with a potential dubious value) from +__remove_pages() and __remove_section(). + +Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com +Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] +Signed-off-by: David Hildenbrand +Reviewed-by: Oscar Salvador +Cc: Michal Hocko +Cc: "Matthew Wilcox (Oracle)" +Cc: "Aneesh Kumar K.V" +Cc: Pavel Tatashin +Cc: Greg Kroah-Hartman +Cc: Dan Williams +Cc: Logan Gunthorpe +Cc: [5.0+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/mm/init.c | 4 +--- + arch/powerpc/mm/mem.c | 11 +---------- + arch/s390/mm/init.c | 4 +--- + arch/sh/mm/init.c | 4 +--- + arch/x86/mm/init_32.c | 4 +--- + arch/x86/mm/init_64.c | 8 +------- + include/linux/memory_hotplug.h | 7 +++++-- + kernel/memremap.c | 3 +-- + mm/hmm.c | 4 +--- + mm/memory_hotplug.c | 29 ++++++++++++++--------------- + 10 files changed, 27 insertions(+), 51 deletions(-) + +--- a/arch/ia64/mm/init.c ++++ b/arch/ia64/mm/init.c +@@ -666,9 +666,7 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -144,18 +144,9 @@ void __ref arch_remove_memory(int nid, u + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct page *page; + int ret; + +- /* +- * If we have an altmap then we need to skip over any reserved PFNs +- * when querying the zone. +- */ +- page = pfn_to_page(start_pfn); +- if (altmap) +- page += vmem_altmap_offset(altmap); +- +- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -244,10 +244,8 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + vmem_remove_mapping(start, size); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/sh/mm/init.c ++++ b/arch/sh/mm/init.c +@@ -448,9 +448,7 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = PFN_DOWN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1146,14 +1146,8 @@ void __ref arch_remove_memory(int nid, u + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct page *page = pfn_to_page(start_pfn); +- struct zone *zone; + +- /* With altmap the first mapped page is offset from @start */ +- if (altmap) +- page += vmem_altmap_offset(altmap); +- zone = page_zone(page); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + kernel_physical_mapping_remove(start, start + size); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -110,8 +110,8 @@ static inline bool movable_node_is_enabl + + extern void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); +-extern void __remove_pages(struct zone *zone, unsigned long start_pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap); ++extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, ++ struct vmem_altmap *altmap); + + /* reasonably generic interface to expand the physical pages */ + extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, +@@ -331,6 +331,9 @@ extern int arch_add_memory(int nid, u64 + struct vmem_altmap *altmap, bool want_memblock); + extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); ++extern void remove_pfn_range_from_zone(struct zone *zone, ++ unsigned long start_pfn, ++ unsigned long nr_pages); + extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); + extern bool is_memblock_offlined(struct memory_block *mem); + extern int sparse_add_one_section(int nid, unsigned long start_pfn, +--- a/kernel/memremap.c ++++ b/kernel/memremap.c +@@ -141,8 +141,7 @@ static void devm_memremap_pages_release( + mem_hotplug_begin(); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { + pfn = align_start >> PAGE_SHIFT; +- __remove_pages(page_zone(first_page), pfn, +- align_size >> PAGE_SHIFT, NULL); ++ __remove_pages(pfn, align_size >> PAGE_SHIFT, NULL); + } else { + arch_remove_memory(nid, align_start, align_size, + pgmap->altmap_valid ? &pgmap->altmap : NULL); +--- a/mm/hmm.c ++++ b/mm/hmm.c +@@ -997,7 +997,6 @@ static void hmm_devmem_release(void *dat + struct hmm_devmem *devmem = data; + struct resource *resource = devmem->resource; + unsigned long start_pfn, npages; +- struct zone *zone; + struct page *page; + int nid; + +@@ -1006,12 +1005,11 @@ static void hmm_devmem_release(void *dat + npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT; + + page = pfn_to_page(start_pfn); +- zone = page_zone(page); + nid = page_to_nid(page); + + mem_hotplug_begin(); + if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) +- __remove_pages(zone, start_pfn, npages, NULL); ++ __remove_pages(start_pfn, npages, NULL); + else + arch_remove_memory(nid, start_pfn << PAGE_SHIFT, + npages << PAGE_SHIFT, NULL); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -449,10 +449,11 @@ static void update_pgdat_span(struct pgl + pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; + } + +-static void __remove_zone(struct zone *zone, unsigned long start_pfn) ++void __ref remove_pfn_range_from_zone(struct zone *zone, ++ unsigned long start_pfn, ++ unsigned long nr_pages) + { + struct pglist_data *pgdat = zone->zone_pgdat; +- int nr_pages = PAGES_PER_SECTION; + unsigned long flags; + + #ifdef CONFIG_ZONE_DEVICE +@@ -465,14 +466,17 @@ static void __remove_zone(struct zone *z + return; + #endif + ++ clear_zone_contiguous(zone); ++ + pgdat_resize_lock(zone->zone_pgdat, &flags); + shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); + update_pgdat_span(pgdat); + pgdat_resize_unlock(zone->zone_pgdat, &flags); ++ ++ set_zone_contiguous(zone); + } + +-static void __remove_section(struct zone *zone, struct mem_section *ms, +- unsigned long map_offset, ++static void __remove_section(struct mem_section *ms, unsigned long map_offset, + struct vmem_altmap *altmap) + { + unsigned long start_pfn; +@@ -483,14 +487,12 @@ static void __remove_section(struct zone + + scn_nr = __section_nr(ms); + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); +- __remove_zone(zone, start_pfn); + + sparse_remove_one_section(ms, map_offset, altmap); + } + + /** +- * __remove_pages() - remove sections of pages from a zone +- * @zone: zone from which pages need to be removed ++ * __remove_pages() - remove sections of pages + * @phys_start_pfn: starting pageframe (must be aligned to start of a section) + * @nr_pages: number of pages to remove (must be multiple of section size) + * @altmap: alternative device page map or %NULL if default memmap is used +@@ -500,8 +502,8 @@ static void __remove_section(struct zone + * sure that pages are marked reserved and zones are adjust properly by + * calling offline_pages(). + */ +-void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap) ++void __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages, ++ struct vmem_altmap *altmap) + { + unsigned long i; + unsigned long map_offset = 0; +@@ -510,8 +512,6 @@ void __remove_pages(struct zone *zone, u + if (altmap) + map_offset = vmem_altmap_offset(altmap); + +- clear_zone_contiguous(zone); +- + /* + * We can only remove entire sections + */ +@@ -523,12 +523,9 @@ void __remove_pages(struct zone *zone, u + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + + cond_resched(); +- __remove_section(zone, __pfn_to_section(pfn), map_offset, +- altmap); ++ __remove_section(__pfn_to_section(pfn), map_offset, altmap); + map_offset = 0; + } +- +- set_zone_contiguous(zone); + } + + int set_online_page_callback(online_page_callback_t callback) +@@ -898,6 +895,7 @@ failed_addition: + (unsigned long long) pfn << PAGE_SHIFT, + (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); + memory_notify(MEM_CANCEL_ONLINE, &arg); ++ remove_pfn_range_from_zone(zone, pfn, nr_pages); + mem_hotplug_done(); + return ret; + } +@@ -1682,6 +1680,7 @@ repeat: + writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); ++ remove_pfn_range_from_zone(zone, start_pfn, nr_pages); + mem_hotplug_done(); + return 0; + diff --git a/queue-4.19/mm-memory_hotplug-update-a-comment-in-unregister_memory.patch b/queue-4.19/mm-memory_hotplug-update-a-comment-in-unregister_memory.patch new file mode 100644 index 00000000000..54bb7ac0b03 --- /dev/null +++ b/queue-4.19/mm-memory_hotplug-update-a-comment-in-unregister_memory.patch @@ -0,0 +1,35 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:05 +0100 +Subject: mm, memory_hotplug: update a comment in unregister_memory() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-9-david@redhat.com> + +From: Dan Carpenter + +commit 16df1456aa858a86f398dbc7d27649eb6662b0cc upstream. + +The remove_memory_block() function was renamed to in commit +cc292b0b4302 ("drivers/base/memory.c: rename remove_memory_block() to +remove_memory_section()"). + +Signed-off-by: Dan Carpenter +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/memory.c ++++ b/drivers/base/memory.c +@@ -743,7 +743,7 @@ unregister_memory(struct memory_block *m + { + BUG_ON(memory->dev.bus != &memory_subsys); + +- /* drop the ref. we got in remove_memory_block() */ ++ /* drop the ref. we got in remove_memory_section() */ + put_device(&memory->dev); + device_unregister(&memory->dev); + } diff --git a/queue-4.19/mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch b/queue-4.19/mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch new file mode 100644 index 00000000000..93384cc57d4 --- /dev/null +++ b/queue-4.19/mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch @@ -0,0 +1,198 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:19 +0100 +Subject: mm/memunmap: don't access uninitialized memmap in memunmap_pages() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-23-david@redhat.com> + +From: "Aneesh Kumar K.V" + +commit 77e080e7680e1e615587352f70c87b9e98126d03 upstream. + +-- snip -- + +- Missing mm/hmm.c and kernel/memremap.c unification. +-- hmm code does not need fixes (no altmap) +- Missing 7cc7867fb061 ("mm/devm_memremap_pages: enable sub-section remap") + +-- snip -- + +Patch series "mm/memory_hotplug: Shrink zones before removing memory", +v6. + +This series fixes the access of uninitialized memmaps when shrinking +zones/nodes and when removing memory. Also, it contains all fixes for +crashes that can be triggered when removing certain namespace using +memunmap_pages() - ZONE_DEVICE, reported by Aneesh. + +We stop trying to shrink ZONE_DEVICE, as it's buggy, fixing it would be +more involved (we don't have SECTION_IS_ONLINE as an indicator), and +shrinking is only of limited use (set_zone_contiguous() cannot detect +the ZONE_DEVICE as contiguous). + +We continue shrinking !ZONE_DEVICE zones, however, I reduced the amount +of code to a minimum. Shrinking is especially necessary to keep +zone->contiguous set where possible, especially, on memory unplug of +DIMMs at zone boundaries. + +-------------------------------------------------------------------------- + +Zones are now properly shrunk when offlining memory blocks or when +onlining failed. This allows to properly shrink zones on memory unplug +even if the separate memory blocks of a DIMM were onlined to different +zones or re-onlined to a different zone after offlining. + +Example: + + :/# cat /proc/zoneinfo + Node 1, zone Movable + spanned 0 + present 0 + managed 0 + :/# echo "online_movable" > /sys/devices/system/memory/memory41/state + :/# echo "online_movable" > /sys/devices/system/memory/memory43/state + :/# cat /proc/zoneinfo + Node 1, zone Movable + spanned 98304 + present 65536 + managed 65536 + :/# echo 0 > /sys/devices/system/memory/memory43/online + :/# cat /proc/zoneinfo + Node 1, zone Movable + spanned 32768 + present 32768 + managed 32768 + :/# echo 0 > /sys/devices/system/memory/memory41/online + :/# cat /proc/zoneinfo + Node 1, zone Movable + spanned 0 + present 0 + managed 0 + +This patch (of 10): + +With an altmap, the memmap falling into the reserved altmap space are not +initialized and, therefore, contain a garbage NID and a garbage zone. +Make sure to read the NID/zone from a memmap that was initialized. + +This fixes a kernel crash that is observed when destroying a namespace: + + kernel BUG at include/linux/mm.h:1107! + cpu 0x1: Vector: 700 (Program Check) at [c000000274087890] + pc: c0000000004b9728: memunmap_pages+0x238/0x340 + lr: c0000000004b9724: memunmap_pages+0x234/0x340 + ... + pid = 3669, comm = ndctl + kernel BUG at include/linux/mm.h:1107! + devm_action_release+0x30/0x50 + release_nodes+0x268/0x2d0 + device_release_driver_internal+0x174/0x240 + unbind_store+0x13c/0x190 + drv_attr_store+0x44/0x60 + sysfs_kf_write+0x70/0xa0 + kernfs_fop_write+0x1ac/0x290 + __vfs_write+0x3c/0x70 + vfs_write+0xe4/0x200 + ksys_write+0x7c/0x140 + system_call+0x5c/0x68 + +The "page_zone(pfn_to_page(pfn)" was introduced by 69324b8f4833 ("mm, +devm_memremap_pages: add MEMORY_DEVICE_PRIVATE support"), however, I +think we will never have driver reserved memory with +MEMORY_DEVICE_PRIVATE (no altmap AFAIKS). + +[david@redhat.com: minimze code changes, rephrase description] +Link: http://lkml.kernel.org/r/20191006085646.5768-2-david@redhat.com +Fixes: 2c2a5af6fed2 ("mm, memory_hotplug: add nid parameter to arch_remove_memory") +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: David Hildenbrand +Cc: Dan Williams +Cc: Jason Gunthorpe +Cc: Logan Gunthorpe +Cc: Ira Weiny +Cc: Damian Tometzki +Cc: Alexander Duyck +Cc: Alexander Potapenko +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Christian Borntraeger +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Fenghua Yu +Cc: Gerald Schaefer +Cc: Greg Kroah-Hartman +Cc: Halil Pasic +Cc: Heiko Carstens +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jun Yao +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: "Matthew Wilcox (Oracle)" +Cc: Mel Gorman +Cc: Michael Ellerman +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Oscar Salvador +Cc: Pankaj Gupta +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Qian Cai +Cc: Rich Felker +Cc: Robin Murphy +Cc: Steve Capper +Cc: Thomas Gleixner +Cc: Tom Lendacky +Cc: Tony Luck +Cc: Vasily Gorbik +Cc: Vlastimil Babka +Cc: Wei Yang +Cc: Wei Yang +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Cc: [5.0+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + kernel/memremap.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/kernel/memremap.c ++++ b/kernel/memremap.c +@@ -120,6 +120,7 @@ static void devm_memremap_pages_release( + struct device *dev = pgmap->dev; + struct resource *res = &pgmap->res; + resource_size_t align_start, align_size; ++ struct page *first_page; + unsigned long pfn; + int nid; + +@@ -132,13 +133,16 @@ static void devm_memremap_pages_release( + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; + +- nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); ++ /* make sure to access a memmap that was actually initialized */ ++ first_page = pfn_to_page(pfn_first(pgmap)); ++ ++ nid = page_to_nid(first_page); + + mem_hotplug_begin(); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { + pfn = align_start >> PAGE_SHIFT; +- __remove_pages(page_zone(pfn_to_page(pfn)), pfn, +- align_size >> PAGE_SHIFT, NULL); ++ __remove_pages(page_zone(first_page), pfn, ++ align_size >> PAGE_SHIFT, NULL); + } else { + arch_remove_memory(nid, align_start, align_size, + pgmap->altmap_valid ? &pgmap->altmap : NULL); diff --git a/queue-4.19/mm-sparse-drop-pgdat_resize_lock-in-sparse_add-remove_one_section.patch b/queue-4.19/mm-sparse-drop-pgdat_resize_lock-in-sparse_add-remove_one_section.patch new file mode 100644 index 00000000000..a06b7c8d5a9 --- /dev/null +++ b/queue-4.19/mm-sparse-drop-pgdat_resize_lock-in-sparse_add-remove_one_section.patch @@ -0,0 +1,122 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:49:59 +0100 +Subject: mm, sparse: drop pgdat_resize_lock in sparse_add/remove_one_section() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-3-david@redhat.com> + +From: Wei Yang + +commit 83af658898cb292a32d8b6cd9b51266d7cfc4b6a upstream. + +pgdat_resize_lock is used to protect pgdat's memory region information +like: node_start_pfn, node_present_pages, etc. While in function +sparse_add/remove_one_section(), pgdat_resize_lock is used to protect +initialization/release of one mem_section. This looks not proper. + +These code paths are currently protected by mem_hotplug_lock currently but +should there ever be any reason for locking at the sparse layer a +dedicated lock should be introduced. + +Following is the current call trace of sparse_add/remove_one_section() + + mem_hotplug_begin() + arch_add_memory() + add_pages() + __add_pages() + __add_section() + sparse_add_one_section() + mem_hotplug_done() + + mem_hotplug_begin() + arch_remove_memory() + __remove_pages() + __remove_section() + sparse_remove_one_section() + mem_hotplug_done() + +The comment above the pgdat_resize_lock also mentions "Holding this will +also guarantee that any pfn_valid() stays that way.", which is true with +the current implementation and false after this patch. But current +implementation doesn't meet this comment. There isn't any pfn walkers to +take the lock so this looks like a relict from the past. This patch also +removes this comment. + +[richard.weiyang@gmail.com: v4] + Link: http://lkml.kernel.org/r/20181204085657.20472-1-richard.weiyang@gmail.com +[mhocko@suse.com: changelog suggestion] +Link: http://lkml.kernel.org/r/20181128091243.19249-1-richard.weiyang@gmail.com +Signed-off-by: Wei Yang +Reviewed-by: David Hildenbrand +Acked-by: Michal Hocko +Cc: Dave Hansen +Cc: Oscar Salvador +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mmzone.h | 3 +-- + mm/sparse.c | 9 +-------- + 2 files changed, 2 insertions(+), 10 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -637,8 +637,7 @@ typedef struct pglist_data { + #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) + /* + * Must be held any time you expect node_start_pfn, node_present_pages +- * or node_spanned_pages stay constant. Holding this will also +- * guarantee that any pfn_valid() stays that way. ++ * or node_spanned_pages stay constant. + * + * pgdat_resize_lock() and pgdat_resize_unlock() are provided to + * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -668,7 +668,6 @@ int __meminit sparse_add_one_section(str + struct mem_section *ms; + struct page *memmap; + unsigned long *usemap; +- unsigned long flags; + int ret; + + /* +@@ -688,8 +687,6 @@ int __meminit sparse_add_one_section(str + return -ENOMEM; + } + +- pgdat_resize_lock(pgdat, &flags); +- + ms = __pfn_to_section(start_pfn); + if (ms->section_mem_map & SECTION_MARKED_PRESENT) { + ret = -EEXIST; +@@ -708,7 +705,6 @@ int __meminit sparse_add_one_section(str + sparse_init_one_section(ms, section_nr, memmap, usemap); + + out: +- pgdat_resize_unlock(pgdat, &flags); + if (ret < 0) { + kfree(usemap); + __kfree_section_memmap(memmap, altmap); +@@ -770,10 +766,8 @@ void sparse_remove_one_section(struct zo + unsigned long map_offset, struct vmem_altmap *altmap) + { + struct page *memmap = NULL; +- unsigned long *usemap = NULL, flags; +- struct pglist_data *pgdat = zone->zone_pgdat; ++ unsigned long *usemap = NULL; + +- pgdat_resize_lock(pgdat, &flags); + if (ms->section_mem_map) { + usemap = ms->pageblock_flags; + memmap = sparse_decode_mem_map(ms->section_mem_map, +@@ -781,7 +775,6 @@ void sparse_remove_one_section(struct zo + ms->section_mem_map = 0; + ms->pageblock_flags = NULL; + } +- pgdat_resize_unlock(pgdat, &flags); + + clear_hwpoisoned_pages(memmap + map_offset, + PAGES_PER_SECTION - map_offset); diff --git a/queue-4.19/mm-sparse-pass-nid-instead-of-pgdat-to-sparse_add_one_section.patch b/queue-4.19/mm-sparse-pass-nid-instead-of-pgdat-to-sparse_add_one_section.patch new file mode 100644 index 00000000000..a01de5c76db --- /dev/null +++ b/queue-4.19/mm-sparse-pass-nid-instead-of-pgdat-to-sparse_add_one_section.patch @@ -0,0 +1,86 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:00 +0100 +Subject: mm, sparse: pass nid instead of pgdat to sparse_add_one_section() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-4-david@redhat.com> + +From: Wei Yang + +commit 4e0d2e7ef14d9e1c900dac909db45263822b824f upstream. + +Since the information needed in sparse_add_one_section() is node id to +allocate proper memory, it is not necessary to pass its pgdat. + +This patch changes the prototype of sparse_add_one_section() to pass node +id directly. This is intended to reduce misleading that +sparse_add_one_section() would touch pgdat. + +Link: http://lkml.kernel.org/r/20181204085657.20472-2-richard.weiyang@gmail.com +Signed-off-by: Wei Yang +Reviewed-by: David Hildenbrand +Acked-by: Michal Hocko +Cc: Dave Hansen +Cc: Oscar Salvador +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memory_hotplug.h | 4 ++-- + mm/memory_hotplug.c | 2 +- + mm/sparse.c | 8 ++++---- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -335,8 +335,8 @@ extern void move_pfn_range_to_zone(struc + unsigned long nr_pages, struct vmem_altmap *altmap); + extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); + extern bool is_memblock_offlined(struct memory_block *mem); +-extern int sparse_add_one_section(struct pglist_data *pgdat, +- unsigned long start_pfn, struct vmem_altmap *altmap); ++extern int sparse_add_one_section(int nid, unsigned long start_pfn, ++ struct vmem_altmap *altmap); + extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, + unsigned long map_offset, struct vmem_altmap *altmap); + extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -255,7 +255,7 @@ static int __meminit __add_section(int n + if (pfn_valid(phys_start_pfn)) + return -EEXIST; + +- ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); ++ ret = sparse_add_one_section(nid, phys_start_pfn, altmap); + if (ret < 0) + return ret; + +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -661,8 +661,8 @@ static void free_map_bootmem(struct page + * set. If this is <=0, then that means that the passed-in + * map was not consumed and must be freed. + */ +-int __meminit sparse_add_one_section(struct pglist_data *pgdat, +- unsigned long start_pfn, struct vmem_altmap *altmap) ++int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, ++ struct vmem_altmap *altmap) + { + unsigned long section_nr = pfn_to_section_nr(start_pfn); + struct mem_section *ms; +@@ -674,11 +674,11 @@ int __meminit sparse_add_one_section(str + * no locking for this, because it does its own + * plus, it does a kmalloc + */ +- ret = sparse_index_init(section_nr, pgdat->node_id); ++ ret = sparse_index_init(section_nr, nid); + if (ret < 0 && ret != -EEXIST) + return ret; + ret = 0; +- memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); ++ memmap = kmalloc_section_memmap(section_nr, nid, altmap); + if (!memmap) + return -ENOMEM; + usemap = __kmalloc_section_usemap(); diff --git a/queue-4.19/powerpc-mm-fix-section-mismatch-warning.patch b/queue-4.19/powerpc-mm-fix-section-mismatch-warning.patch new file mode 100644 index 00000000000..f5b2d1a2868 --- /dev/null +++ b/queue-4.19/powerpc-mm-fix-section-mismatch-warning.patch @@ -0,0 +1,49 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:08 +0100 +Subject: powerpc/mm: Fix section mismatch warning +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-12-david@redhat.com> + +From: "Aneesh Kumar K.V" + +commit 26ad26718dfaa7cf49d106d212ebf2370076c253 upstream. + +This patch fix the below section mismatch warnings. + +WARNING: vmlinux.o(.text+0x2d1f44): Section mismatch in reference from the function devm_memremap_pages_release() to the function .meminit.text:arch_remove_memory() +WARNING: vmlinux.o(.text+0x2d265c): Section mismatch in reference from the function devm_memremap_pages() to the function .meminit.text:arch_add_memory() + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/mm/mem.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -118,8 +118,8 @@ int __weak remove_section_mapping(unsign + return -ENODEV; + } + +-int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, +- bool want_memblock) ++int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, ++ bool want_memblock) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +@@ -140,8 +140,8 @@ int __meminit arch_add_memory(int nid, u + } + + #ifdef CONFIG_MEMORY_HOTREMOVE +-int __meminit arch_remove_memory(int nid, u64 start, u64 size, +- struct vmem_altmap *altmap) ++int __ref arch_remove_memory(int nid, u64 start, u64 size, ++ struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/queue-4.19/s390x-mm-implement-arch_remove_memory.patch b/queue-4.19/s390x-mm-implement-arch_remove_memory.patch new file mode 100644 index 00000000000..2e4accecdef --- /dev/null +++ b/queue-4.19/s390x-mm-implement-arch_remove_memory.patch @@ -0,0 +1,101 @@ +From foo@baz Tue 28 Jan 2020 02:32:10 PM CET +From: David Hildenbrand +Date: Tue, 28 Jan 2020 10:50:10 +0100 +Subject: s390x/mm: implement arch_remove_memory() +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, Michal Hocko , Greg Kroah-Hartman , Andrew Morton , "Aneesh Kumar K . V" , Baoquan He , Dan Williams , Oscar Salvador , Wei Yang , David Hildenbrand +Message-ID: <20200128095021.8076-14-david@redhat.com> + +From: David Hildenbrand + +commit 18c86506c80f6b6b5e67d95bf0d6f7e665de5239 upstream. + +Will come in handy when wanting to handle errors after +arch_add_memory(). + +Link: http://lkml.kernel.org/r/20190527111152.16324-4-david@redhat.com +Signed-off-by: David Hildenbrand +Cc: Heiko Carstens +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: David Hildenbrand +Cc: Vasily Gorbik +Cc: Oscar Salvador +Cc: Alex Deucher +Cc: Andrew Banman +Cc: Andy Lutomirski +Cc: Anshuman Khandual +Cc: Ard Biesheuvel +Cc: Arun KS +Cc: Baoquan He +Cc: Benjamin Herrenschmidt +Cc: Borislav Petkov +Cc: Catalin Marinas +Cc: Chintan Pandya +Cc: Christophe Leroy +Cc: Chris Wilson +Cc: Dan Williams +Cc: Dave Hansen +Cc: "David S. Miller" +Cc: Fenghua Yu +Cc: Greg Kroah-Hartman +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jonathan Cameron +Cc: Joonsoo Kim +Cc: Jun Yao +Cc: "Kirill A. Shutemov" +Cc: Logan Gunthorpe +Cc: Mark Brown +Cc: Mark Rutland +Cc: Masahiro Yamada +Cc: Mathieu Malaterre +Cc: Michael Ellerman +Cc: Mike Rapoport +Cc: "mike.travis@hpe.com" +Cc: Nicholas Piggin +Cc: Oscar Salvador +Cc: Paul Mackerras +Cc: Pavel Tatashin +Cc: Peter Zijlstra +Cc: Qian Cai +Cc: "Rafael J. Wysocki" +Cc: Rich Felker +Cc: Rob Herring +Cc: Robin Murphy +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Wei Yang +Cc: Will Deacon +Cc: Yoshinori Sato +Cc: Yu Zhao +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/mm/init.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -243,12 +243,13 @@ int arch_add_memory(int nid, u64 start, + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { +- /* +- * There is no hardware or firmware interface which could trigger a +- * hot memory remove on s390. So there is nothing that needs to be +- * implemented. +- */ +- BUG(); ++ unsigned long start_pfn = start >> PAGE_SHIFT; ++ unsigned long nr_pages = size >> PAGE_SHIFT; ++ struct zone *zone; ++ ++ zone = page_zone(pfn_to_page(start_pfn)); ++ __remove_pages(zone, start_pfn, nr_pages, altmap); ++ vmem_remove_mapping(start, size); + } + #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/queue-4.19/series b/queue-4.19/series index f29d9fa1303..12d534d5085 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -66,3 +66,27 @@ scsi-iscsi-avoid-potential-deadlock-in-iscsi_if_rx-func.patch netfilter-ipset-use-bitmap-infrastructure-completely.patch netfilter-nf_tables-add-__nft_chain_type_get.patch net-x25-fix-nonblocking-connect.patch +mm-memory_hotplug-make-remove_memory-take-the-device_hotplug_lock.patch +mm-sparse-drop-pgdat_resize_lock-in-sparse_add-remove_one_section.patch +mm-sparse-pass-nid-instead-of-pgdat-to-sparse_add_one_section.patch +drivers-base-memory.c-remove-an-unnecessary-check-on-nr_mem_sections.patch +mm-memory_hotplug-add-nid-parameter-to-arch_remove_memory.patch +mm-memory_hotplug-release-memory-resource-after-arch_remove_memory.patch +drivers-base-memory.c-clean-up-relics-in-function-parameters.patch +mm-memory_hotplug-update-a-comment-in-unregister_memory.patch +mm-memory_hotplug-make-unregister_memory_section-never-fail.patch +mm-memory_hotplug-make-__remove_section-never-fail.patch +powerpc-mm-fix-section-mismatch-warning.patch +mm-memory_hotplug-make-__remove_pages-and-arch_remove_memory-never-fail.patch +s390x-mm-implement-arch_remove_memory.patch +mm-memory_hotplug-allow-arch_remove_memory-without-config_memory_hotremove.patch +drivers-base-memory-pass-a-block_id-to-init_memory_block.patch +mm-memory_hotplug-create-memory-block-devices-after-arch_add_memory.patch +mm-memory_hotplug-remove-memory-block-devices-before-arch_remove_memory.patch +mm-memory_hotplug-make-unregister_memory_block_under_nodes-never-fail.patch +mm-memory_hotplug-remove-zone-parameter-from-sparse_remove_one_section.patch +mm-hotplug-kill-is_dev_zone-usage-in-__remove_pages.patch +drivers-base-node.c-simplify-unregister_memory_block_under_nodes.patch +mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch +mm-memory_hotplug-fix-try_offline_node.patch +mm-memory_hotplug-shrink-zones-when-offlining-memory.patch