]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
mm/memory_hotplug: add missing mem_hotplug_lock
authorSumanth Korikkar <sumanthk@linux.ibm.com>
Mon, 20 Nov 2023 14:53:52 +0000 (15:53 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 10 Jan 2024 16:10:33 +0000 (17:10 +0100)
[ Upstream commit 001002e73712cdf6b8d9a103648cda3040ad7647 ]

From Documentation/core-api/memory-hotplug.rst:
When adding/removing/onlining/offlining memory or adding/removing
heterogeneous/device memory, we should always hold the mem_hotplug_lock
in write mode to serialise memory hotplug (e.g. access to global/zone
variables).

mhp_(de)init_memmap_on_memory() functions can change zone stats and
struct page content, but they are currently called w/o the
mem_hotplug_lock.

When memory block is being offlined and when kmemleak goes through each
populated zone, the following theoretical race conditions could occur:
CPU 0:      | CPU 1:
memory_offline()      |
-> offline_pages()      |
-> mem_hotplug_begin()      |
   ...      |
-> mem_hotplug_done()      |
     | kmemleak_scan()
     | -> get_online_mems()
     |    ...
-> mhp_deinit_memmap_on_memory()      |
  [not protected by mem_hotplug_begin/done()]|
  Marks memory section as offline,      |   Retrieves zone_start_pfn
  poisons vmemmap struct pages and updates   |   and struct page members.
  the zone related data      |
         |    ...
         | -> put_online_mems()

Fix this by ensuring mem_hotplug_lock is taken before performing
mhp_init_memmap_on_memory().  Also ensure that
mhp_deinit_memmap_on_memory() holds the lock.

online/offline_pages() are currently only called from
memory_block_online/offline(), so it is safe to move the locking there.

Link: https://lkml.kernel.org/r/20231120145354.308999-2-sumanthk@linux.ibm.com
Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range")
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: kernel test robot <lkp@intel.com>
Cc: <stable@vger.kernel.org> [5.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/base/memory.c
mm/memory_hotplug.c

index 9aa0da991cfb910955ece83be4be1970eced2749..5d39f3e374dae1fdf5986c58b2578d1f32296274 100644 (file)
@@ -175,6 +175,9 @@ int memory_notify(unsigned long val, void *v)
        return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_online(struct memory_block *mem)
 {
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -193,10 +196,11 @@ static int memory_block_online(struct memory_block *mem)
         * stage helps to keep accounting easier to follow - e.g vmemmaps
         * belong to the same zone as the memory they backed.
         */
+       mem_hotplug_begin();
        if (nr_vmemmap_pages) {
                ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
                if (ret)
-                       return ret;
+                       goto out;
        }
 
        ret = online_pages(start_pfn + nr_vmemmap_pages,
@@ -204,7 +208,7 @@ static int memory_block_online(struct memory_block *mem)
        if (ret) {
                if (nr_vmemmap_pages)
                        mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
-               return ret;
+               goto out;
        }
 
        /*
@@ -216,9 +220,14 @@ static int memory_block_online(struct memory_block *mem)
                                          nr_vmemmap_pages);
 
        mem->zone = zone;
+out:
+       mem_hotplug_done();
        return ret;
 }
 
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_offline(struct memory_block *mem)
 {
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -233,6 +242,7 @@ static int memory_block_offline(struct memory_block *mem)
         * Unaccount before offlining, such that unpopulated zone and kthreads
         * can properly be torn down in offline_pages().
         */
+       mem_hotplug_begin();
        if (nr_vmemmap_pages)
                adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
                                          -nr_vmemmap_pages);
@@ -244,13 +254,15 @@ static int memory_block_offline(struct memory_block *mem)
                if (nr_vmemmap_pages)
                        adjust_present_page_count(pfn_to_page(start_pfn),
                                                  mem->group, nr_vmemmap_pages);
-               return ret;
+               goto out;
        }
 
        if (nr_vmemmap_pages)
                mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
 
        mem->zone = NULL;
+out:
+       mem_hotplug_done();
        return ret;
 }
 
index bd2570b4f9b7b412a84e4072ac667c1d86e54c66..d02722bbfcf33992c8c36835204a42cb1a1c41e2 100644 (file)
@@ -1069,6 +1069,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
        kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
 }
 
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
                       struct zone *zone, struct memory_group *group)
 {
@@ -1089,7 +1092,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
                         !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
                return -EINVAL;
 
-       mem_hotplug_begin();
 
        /* associate pfn range with the zone */
        move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
@@ -1148,7 +1150,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
        writeback_set_ratelimit();
 
        memory_notify(MEM_ONLINE, &arg);
-       mem_hotplug_done();
        return 0;
 
 failed_addition:
@@ -1157,7 +1158,6 @@ failed_addition:
                 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
        memory_notify(MEM_CANCEL_ONLINE, &arg);
        remove_pfn_range_from_zone(zone, pfn, nr_pages);
-       mem_hotplug_done();
        return ret;
 }
 
@@ -1787,6 +1787,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
        return 0;
 }
 
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
 int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
                        struct zone *zone, struct memory_group *group)
 {
@@ -1809,8 +1812,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
                         !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
                return -EINVAL;
 
-       mem_hotplug_begin();
-
        /*
         * Don't allow to offline memory blocks that contain holes.
         * Consequently, memory blocks with holes can never get onlined
@@ -1946,7 +1947,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
 
        memory_notify(MEM_OFFLINE, &arg);
        remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
-       mem_hotplug_done();
        return 0;
 
 failed_removal_isolated:
@@ -1961,7 +1961,6 @@ failed_removal:
                 (unsigned long long) start_pfn << PAGE_SHIFT,
                 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
                 reason);
-       mem_hotplug_done();
        return ret;
 }