]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
7e24e4e6681fd78dd974eceda2e7b89bfd0b6807
[thirdparty/kernel/stable-queue.git] /
1 From 8dc4bb58a146655eb057247d7c9d19e73928715b Mon Sep 17 00:00:00 2001
2 From: David Hildenbrand <david@redhat.com>
3 Date: Thu, 12 Nov 2020 14:38:13 +0100
4 Subject: mm/memory_hotplug: extend offline_and_remove_memory() to handle more than one memory block
5
6 From: David Hildenbrand <david@redhat.com>
7
8 commit 8dc4bb58a146655eb057247d7c9d19e73928715b upstream.
9
10 virtio-mem soon wants to use offline_and_remove_memory() memory that
11 exceeds a single Linux memory block (memory_block_size_bytes()). Let's
12 remove that restriction.
13
14 Let's remember the old state and try to restore that if anything goes
15 wrong. While re-onlining can, in general, fail, it's highly unlikely to
16 happen (usually only when a notifier fails to allocate memory, and these
17 are rather rare).
18
19 This will be used by virtio-mem to offline+remove memory ranges that are
20 bigger than a single memory block - for example, with a device block
21 size of 1 GiB (e.g., gigantic pages in the hypervisor) and a Linux memory
22 block size of 128MB.
23
24 While we could compress the state into 2 bit, using 8 bit is much
25 easier.
26
27 This handling is similar, but different to acpi_scan_try_to_offline():
28
29 a) We don't try to offline twice. I am not sure if this CONFIG_MEMCG
30 optimization is still relevant - it should only apply to ZONE_NORMAL
31 (where we have no guarantees). If relevant, we can always add it.
32
33 b) acpi_scan_try_to_offline() simply onlines all memory in case
34 something goes wrong. It doesn't restore previous online type. Let's do
35 that, so we won't overwrite what e.g., user space configured.
36
37 Reviewed-by: Wei Yang <richard.weiyang@linux.alibaba.com>
38 Cc: "Michael S. Tsirkin" <mst@redhat.com>
39 Cc: Jason Wang <jasowang@redhat.com>
40 Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
41 Cc: Michal Hocko <mhocko@kernel.org>
42 Cc: Oscar Salvador <osalvador@suse.de>
43 Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
44 Cc: Andrew Morton <akpm@linux-foundation.org>
45 Signed-off-by: David Hildenbrand <david@redhat.com>
46 Link: https://lore.kernel.org/r/20201112133815.13332-28-david@redhat.com
47 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
48 Acked-by: Andrew Morton <akpm@linux-foundation.org>
49 Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
50 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
51 ---
52 mm/memory_hotplug.c | 105 ++++++++++++++++++++++++++++++++++++++++++++--------
53 1 file changed, 89 insertions(+), 16 deletions(-)
54
55 --- a/mm/memory_hotplug.c
56 +++ b/mm/memory_hotplug.c
57 @@ -1788,39 +1788,112 @@ int remove_memory(int nid, u64 start, u6
58 }
59 EXPORT_SYMBOL_GPL(remove_memory);
60
61 +static int try_offline_memory_block(struct memory_block *mem, void *arg)
62 +{
63 + uint8_t online_type = MMOP_ONLINE_KERNEL;
64 + uint8_t **online_types = arg;
65 + struct page *page;
66 + int rc;
67 +
68 + /*
69 + * Sense the online_type via the zone of the memory block. Offlining
70 + * with multiple zones within one memory block will be rejected
71 + * by offlining code ... so we don't care about that.
72 + */
73 + page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
74 + if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
75 + online_type = MMOP_ONLINE_MOVABLE;
76 +
77 + rc = device_offline(&mem->dev);
78 + /*
79 + * Default is MMOP_OFFLINE - change it only if offlining succeeded,
80 + * so try_reonline_memory_block() can do the right thing.
81 + */
82 + if (!rc)
83 + **online_types = online_type;
84 +
85 + (*online_types)++;
86 + /* Ignore if already offline. */
87 + return rc < 0 ? rc : 0;
88 +}
89 +
90 +static int try_reonline_memory_block(struct memory_block *mem, void *arg)
91 +{
92 + uint8_t **online_types = arg;
93 + int rc;
94 +
95 + if (**online_types != MMOP_OFFLINE) {
96 + mem->online_type = **online_types;
97 + rc = device_online(&mem->dev);
98 + if (rc < 0)
99 + pr_warn("%s: Failed to re-online memory: %d",
100 + __func__, rc);
101 + }
102 +
103 + /* Continue processing all remaining memory blocks. */
104 + (*online_types)++;
105 + return 0;
106 +}
107 +
108 /*
109 - * Try to offline and remove a memory block. Might take a long time to
110 - * finish in case memory is still in use. Primarily useful for memory devices
111 - * that logically unplugged all memory (so it's no longer in use) and want to
112 - * offline + remove the memory block.
113 + * Try to offline and remove memory. Might take a long time to finish in case
114 + * memory is still in use. Primarily useful for memory devices that logically
115 + * unplugged all memory (so it's no longer in use) and want to offline + remove
116 + * that memory.
117 */
118 int offline_and_remove_memory(int nid, u64 start, u64 size)
119 {
120 - struct memory_block *mem;
121 - int rc = -EINVAL;
122 + const unsigned long mb_count = size / memory_block_size_bytes();
123 + uint8_t *online_types, *tmp;
124 + int rc;
125
126 if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
127 - size != memory_block_size_bytes())
128 - return rc;
129 + !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
130 + return -EINVAL;
131 +
132 + /*
133 + * We'll remember the old online type of each memory block, so we can
134 + * try to revert whatever we did when offlining one memory block fails
135 + * after offlining some others succeeded.
136 + */
137 + online_types = kmalloc_array(mb_count, sizeof(*online_types),
138 + GFP_KERNEL);
139 + if (!online_types)
140 + return -ENOMEM;
141 + /*
142 + * Initialize all states to MMOP_OFFLINE, so when we abort processing in
143 + * try_offline_memory_block(), we'll skip all unprocessed blocks in
144 + * try_reonline_memory_block().
145 + */
146 + memset(online_types, MMOP_OFFLINE, mb_count);
147
148 lock_device_hotplug();
149 - mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
150 - if (mem)
151 - rc = device_offline(&mem->dev);
152 - /* Ignore if the device is already offline. */
153 - if (rc > 0)
154 - rc = 0;
155 +
156 + tmp = online_types;
157 + rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
158
159 /*
160 - * In case we succeeded to offline the memory block, remove it.
161 + * In case we succeeded to offline all memory, remove it.
162 * This cannot fail as it cannot get onlined in the meantime.
163 */
164 if (!rc) {
165 rc = try_remove_memory(nid, start, size);
166 - WARN_ON_ONCE(rc);
167 + if (rc)
168 + pr_err("%s: Failed to remove memory: %d", __func__, rc);
169 + }
170 +
171 + /*
172 + * Rollback what we did. While memory onlining might theoretically fail
173 + * (nacked by a notifier), it barely ever happens.
174 + */
175 + if (rc) {
176 + tmp = online_types;
177 + walk_memory_blocks(start, size, &tmp,
178 + try_reonline_memory_block);
179 }
180 unlock_device_hotplug();
181
182 + kfree(online_types);
183 return rc;
184 }
185 EXPORT_SYMBOL_GPL(offline_and_remove_memory);