]>
Commit | Line | Data |
---|---|---|
40b0b3f8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
692f66f2 HB |
2 | /* |
3 | * crash.c - kernel crash support code. | |
4 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | |
692f66f2 HB |
5 | */ |
6 | ||
4707c13d BH |
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
8 | ||
44e8a5e9 | 9 | #include <linux/buildid.h> |
71d2bcec | 10 | #include <linux/init.h> |
692f66f2 HB |
11 | #include <linux/utsname.h> |
12 | #include <linux/vmalloc.h> | |
46d36b1b | 13 | #include <linux/sizes.h> |
6f991cc3 | 14 | #include <linux/kexec.h> |
24726275 | 15 | #include <linux/memory.h> |
02aff848 | 16 | #include <linux/mm.h> |
24726275 | 17 | #include <linux/cpuhotplug.h> |
0ab97169 | 18 | #include <linux/memblock.h> |
0ab97169 | 19 | #include <linux/kmemleak.h> |
02aff848 BH |
20 | #include <linux/crash_core.h> |
21 | #include <linux/reboot.h> | |
22 | #include <linux/btf.h> | |
23 | #include <linux/objtool.h> | |
692f66f2 HB |
24 | |
25 | #include <asm/page.h> | |
26 | #include <asm/sections.h> | |
27 | ||
a24d22b2 | 28 | #include <crypto/sha1.h> |
0935288c | 29 | |
5fd8fea9 | 30 | #include "kallsyms_internal.h" |
24726275 | 31 | #include "kexec_internal.h" |
5fd8fea9 | 32 | |
6f991cc3 ED |
33 | /* Per cpu memory for storing cpu states in case of system crash. */ |
34 | note_buf_t __percpu *crash_notes; | |
5fd8fea9 | 35 | |
02aff848 BH |
36 | #ifdef CONFIG_CRASH_DUMP |
37 | ||
38 | int kimage_crash_copy_vmcoreinfo(struct kimage *image) | |
39 | { | |
40 | struct page *vmcoreinfo_page; | |
41 | void *safecopy; | |
42 | ||
43 | if (!IS_ENABLED(CONFIG_CRASH_DUMP)) | |
44 | return 0; | |
45 | if (image->type != KEXEC_TYPE_CRASH) | |
46 | return 0; | |
47 | ||
48 | /* | |
49 | * For kdump, allocate one vmcoreinfo safe copy from the | |
50 | * crash memory. as we have arch_kexec_protect_crashkres() | |
51 | * after kexec syscall, we naturally protect it from write | |
52 | * (even read) access under kernel direct mapping. But on | |
53 | * the other hand, we still need to operate it when crash | |
54 | * happens to generate vmcoreinfo note, hereby we rely on | |
55 | * vmap for this purpose. | |
56 | */ | |
57 | vmcoreinfo_page = kimage_alloc_control_pages(image, 0); | |
58 | if (!vmcoreinfo_page) { | |
59 | pr_warn("Could not allocate vmcoreinfo buffer\n"); | |
60 | return -ENOMEM; | |
61 | } | |
62 | safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL); | |
63 | if (!safecopy) { | |
64 | pr_warn("Could not vmap vmcoreinfo buffer\n"); | |
65 | return -ENOMEM; | |
66 | } | |
67 | ||
68 | image->vmcoreinfo_data_copy = safecopy; | |
69 | crash_update_vmcoreinfo_safecopy(safecopy); | |
70 | ||
71 | return 0; | |
72 | } | |
73 | ||
74 | ||
75 | ||
76 | int kexec_should_crash(struct task_struct *p) | |
77 | { | |
78 | /* | |
79 | * If crash_kexec_post_notifiers is enabled, don't run | |
80 | * crash_kexec() here yet, which must be run after panic | |
81 | * notifiers in panic(). | |
82 | */ | |
83 | if (crash_kexec_post_notifiers) | |
84 | return 0; | |
85 | /* | |
86 | * There are 4 panic() calls in make_task_dead() path, each of which | |
87 | * corresponds to each of these 4 conditions. | |
88 | */ | |
89 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | |
90 | return 1; | |
91 | return 0; | |
92 | } | |
93 | ||
94 | int kexec_crash_loaded(void) | |
95 | { | |
96 | return !!kexec_crash_image; | |
97 | } | |
98 | EXPORT_SYMBOL_GPL(kexec_crash_loaded); | |
99 | ||
100 | /* | |
101 | * No panic_cpu check version of crash_kexec(). This function is called | |
102 | * only when panic_cpu holds the current CPU number; this is the only CPU | |
103 | * which processes crash_kexec routines. | |
104 | */ | |
105 | void __noclone __crash_kexec(struct pt_regs *regs) | |
106 | { | |
107 | /* Take the kexec_lock here to prevent sys_kexec_load | |
108 | * running on one cpu from replacing the crash kernel | |
109 | * we are using after a panic on a different cpu. | |
110 | * | |
111 | * If the crash kernel was not located in a fixed area | |
112 | * of memory the xchg(&kexec_crash_image) would be | |
113 | * sufficient. But since I reuse the memory... | |
114 | */ | |
115 | if (kexec_trylock()) { | |
116 | if (kexec_crash_image) { | |
117 | struct pt_regs fixed_regs; | |
118 | ||
119 | crash_setup_regs(&fixed_regs, regs); | |
120 | crash_save_vmcoreinfo(); | |
121 | machine_crash_shutdown(&fixed_regs); | |
122 | machine_kexec(kexec_crash_image); | |
123 | } | |
124 | kexec_unlock(); | |
125 | } | |
126 | } | |
127 | STACK_FRAME_NON_STANDARD(__crash_kexec); | |
128 | ||
129 | __bpf_kfunc void crash_kexec(struct pt_regs *regs) | |
130 | { | |
131 | int old_cpu, this_cpu; | |
132 | ||
133 | /* | |
134 | * Only one CPU is allowed to execute the crash_kexec() code as with | |
135 | * panic(). Otherwise parallel calls of panic() and crash_kexec() | |
136 | * may stop each other. To exclude them, we use panic_cpu here too. | |
137 | */ | |
138 | old_cpu = PANIC_CPU_INVALID; | |
139 | this_cpu = raw_smp_processor_id(); | |
140 | ||
141 | if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) { | |
142 | /* This is the 1st CPU which comes here, so go ahead. */ | |
143 | __crash_kexec(regs); | |
144 | ||
145 | /* | |
146 | * Reset panic_cpu to allow another panic()/crash_kexec() | |
147 | * call. | |
148 | */ | |
149 | atomic_set(&panic_cpu, PANIC_CPU_INVALID); | |
150 | } | |
151 | } | |
152 | ||
153 | static inline resource_size_t crash_resource_size(const struct resource *res) | |
154 | { | |
155 | return !res->end ? 0 : resource_size(res); | |
156 | } | |
157 | ||
158 | ||
159 | ||
160 | ||
6f991cc3 ED |
161 | int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, |
162 | void **addr, unsigned long *sz) | |
163 | { | |
164 | Elf64_Ehdr *ehdr; | |
165 | Elf64_Phdr *phdr; | |
166 | unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; | |
167 | unsigned char *buf; | |
168 | unsigned int cpu, i; | |
169 | unsigned long long notes_addr; | |
170 | unsigned long mstart, mend; | |
171 | ||
172 | /* extra phdr for vmcoreinfo ELF note */ | |
173 | nr_phdr = nr_cpus + 1; | |
174 | nr_phdr += mem->nr_ranges; | |
175 | ||
176 | /* | |
177 | * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping | |
178 | * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). | |
179 | * I think this is required by tools like gdb. So same physical | |
180 | * memory will be mapped in two ELF headers. One will contain kernel | |
181 | * text virtual addresses and other will have __va(physical) addresses. | |
182 | */ | |
183 | ||
184 | nr_phdr++; | |
185 | elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); | |
186 | elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); | |
187 | ||
188 | buf = vzalloc(elf_sz); | |
189 | if (!buf) | |
190 | return -ENOMEM; | |
191 | ||
192 | ehdr = (Elf64_Ehdr *)buf; | |
193 | phdr = (Elf64_Phdr *)(ehdr + 1); | |
194 | memcpy(ehdr->e_ident, ELFMAG, SELFMAG); | |
195 | ehdr->e_ident[EI_CLASS] = ELFCLASS64; | |
196 | ehdr->e_ident[EI_DATA] = ELFDATA2LSB; | |
197 | ehdr->e_ident[EI_VERSION] = EV_CURRENT; | |
198 | ehdr->e_ident[EI_OSABI] = ELF_OSABI; | |
199 | memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); | |
200 | ehdr->e_type = ET_CORE; | |
201 | ehdr->e_machine = ELF_ARCH; | |
202 | ehdr->e_version = EV_CURRENT; | |
203 | ehdr->e_phoff = sizeof(Elf64_Ehdr); | |
204 | ehdr->e_ehsize = sizeof(Elf64_Ehdr); | |
205 | ehdr->e_phentsize = sizeof(Elf64_Phdr); | |
206 | ||
a396d0f8 ED |
207 | /* Prepare one phdr of type PT_NOTE for each possible CPU */ |
208 | for_each_possible_cpu(cpu) { | |
6f991cc3 ED |
209 | phdr->p_type = PT_NOTE; |
210 | notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); | |
211 | phdr->p_offset = phdr->p_paddr = notes_addr; | |
212 | phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); | |
213 | (ehdr->e_phnum)++; | |
214 | phdr++; | |
215 | } | |
216 | ||
217 | /* Prepare one PT_NOTE header for vmcoreinfo */ | |
218 | phdr->p_type = PT_NOTE; | |
219 | phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); | |
220 | phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; | |
221 | (ehdr->e_phnum)++; | |
222 | phdr++; | |
223 | ||
224 | /* Prepare PT_LOAD type program header for kernel text region */ | |
225 | if (need_kernel_map) { | |
226 | phdr->p_type = PT_LOAD; | |
227 | phdr->p_flags = PF_R|PF_W|PF_X; | |
228 | phdr->p_vaddr = (unsigned long) _text; | |
229 | phdr->p_filesz = phdr->p_memsz = _end - _text; | |
230 | phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); | |
231 | ehdr->e_phnum++; | |
232 | phdr++; | |
233 | } | |
234 | ||
235 | /* Go through all the ranges in mem->ranges[] and prepare phdr */ | |
236 | for (i = 0; i < mem->nr_ranges; i++) { | |
237 | mstart = mem->ranges[i].start; | |
238 | mend = mem->ranges[i].end; | |
239 | ||
240 | phdr->p_type = PT_LOAD; | |
241 | phdr->p_flags = PF_R|PF_W|PF_X; | |
242 | phdr->p_offset = mstart; | |
243 | ||
244 | phdr->p_paddr = mstart; | |
245 | phdr->p_vaddr = (unsigned long) __va(mstart); | |
246 | phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; | |
247 | phdr->p_align = 0; | |
248 | ehdr->e_phnum++; | |
a85ee18c BH |
249 | #ifdef CONFIG_KEXEC_FILE |
250 | kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", | |
251 | phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, | |
252 | ehdr->e_phnum, phdr->p_offset); | |
253 | #endif | |
6f991cc3 ED |
254 | phdr++; |
255 | } | |
256 | ||
257 | *addr = buf; | |
258 | *sz = elf_sz; | |
259 | return 0; | |
260 | } | |
261 | ||
262 | int crash_exclude_mem_range(struct crash_mem *mem, | |
263 | unsigned long long mstart, unsigned long long mend) | |
264 | { | |
6dff3159 | 265 | int i; |
6f991cc3 | 266 | unsigned long long start, end, p_start, p_end; |
6f991cc3 ED |
267 | |
268 | for (i = 0; i < mem->nr_ranges; i++) { | |
269 | start = mem->ranges[i].start; | |
270 | end = mem->ranges[i].end; | |
271 | p_start = mstart; | |
272 | p_end = mend; | |
273 | ||
6dff3159 | 274 | if (p_start > end) |
6f991cc3 ED |
275 | continue; |
276 | ||
6dff3159 YW |
277 | /* |
278 | * Because the memory ranges in mem->ranges are stored in | |
279 | * ascending order, when we detect `p_end < start`, we can | |
280 | * immediately exit the for loop, as the subsequent memory | |
281 | * ranges will definitely be outside the range we are looking | |
282 | * for. | |
283 | */ | |
284 | if (p_end < start) | |
285 | break; | |
286 | ||
6f991cc3 | 287 | /* Truncate any area outside of range */ |
6dff3159 | 288 | if (p_start < start) |
6f991cc3 | 289 | p_start = start; |
6dff3159 | 290 | if (p_end > end) |
6f991cc3 ED |
291 | p_end = end; |
292 | ||
293 | /* Found completely overlapping range */ | |
294 | if (p_start == start && p_end == end) { | |
6dff3159 YW |
295 | memmove(&mem->ranges[i], &mem->ranges[i + 1], |
296 | (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); | |
297 | i--; | |
6f991cc3 | 298 | mem->nr_ranges--; |
6dff3159 | 299 | } else if (p_start > start && p_end < end) { |
6f991cc3 | 300 | /* Split original range */ |
6dff3159 YW |
301 | if (mem->nr_ranges >= mem->max_nr_ranges) |
302 | return -ENOMEM; | |
303 | ||
304 | memmove(&mem->ranges[i + 2], &mem->ranges[i + 1], | |
305 | (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); | |
306 | ||
6f991cc3 | 307 | mem->ranges[i].end = p_start - 1; |
6dff3159 YW |
308 | mem->ranges[i + 1].start = p_end + 1; |
309 | mem->ranges[i + 1].end = end; | |
310 | ||
311 | i++; | |
312 | mem->nr_ranges++; | |
6f991cc3 ED |
313 | } else if (p_start != start) |
314 | mem->ranges[i].end = p_start - 1; | |
315 | else | |
316 | mem->ranges[i].start = p_end + 1; | |
6f991cc3 ED |
317 | } |
318 | ||
6f991cc3 ED |
319 | return 0; |
320 | } | |
321 | ||
02aff848 BH |
322 | ssize_t crash_get_memory_size(void) |
323 | { | |
324 | ssize_t size = 0; | |
325 | ||
326 | if (!kexec_trylock()) | |
327 | return -EBUSY; | |
328 | ||
329 | size += crash_resource_size(&crashk_res); | |
330 | size += crash_resource_size(&crashk_low_res); | |
331 | ||
332 | kexec_unlock(); | |
333 | return size; | |
334 | } | |
335 | ||
336 | static int __crash_shrink_memory(struct resource *old_res, | |
337 | unsigned long new_size) | |
338 | { | |
339 | struct resource *ram_res; | |
340 | ||
341 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); | |
342 | if (!ram_res) | |
343 | return -ENOMEM; | |
344 | ||
345 | ram_res->start = old_res->start + new_size; | |
346 | ram_res->end = old_res->end; | |
347 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; | |
348 | ram_res->name = "System RAM"; | |
349 | ||
350 | if (!new_size) { | |
351 | release_resource(old_res); | |
352 | old_res->start = 0; | |
353 | old_res->end = 0; | |
354 | } else { | |
355 | crashk_res.end = ram_res->start - 1; | |
356 | } | |
357 | ||
358 | crash_free_reserved_phys_range(ram_res->start, ram_res->end); | |
359 | insert_resource(&iomem_resource, ram_res); | |
360 | ||
361 | return 0; | |
362 | } | |
363 | ||
364 | int crash_shrink_memory(unsigned long new_size) | |
365 | { | |
366 | int ret = 0; | |
367 | unsigned long old_size, low_size; | |
368 | ||
369 | if (!kexec_trylock()) | |
370 | return -EBUSY; | |
371 | ||
372 | if (kexec_crash_image) { | |
373 | ret = -ENOENT; | |
374 | goto unlock; | |
375 | } | |
376 | ||
377 | low_size = crash_resource_size(&crashk_low_res); | |
378 | old_size = crash_resource_size(&crashk_res) + low_size; | |
379 | new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN); | |
380 | if (new_size >= old_size) { | |
381 | ret = (new_size == old_size) ? 0 : -EINVAL; | |
382 | goto unlock; | |
383 | } | |
384 | ||
385 | /* | |
386 | * (low_size > new_size) implies that low_size is greater than zero. | |
387 | * This also means that if low_size is zero, the else branch is taken. | |
388 | * | |
389 | * If low_size is greater than 0, (low_size > new_size) indicates that | |
390 | * crashk_low_res also needs to be shrunken. Otherwise, only crashk_res | |
391 | * needs to be shrunken. | |
392 | */ | |
393 | if (low_size > new_size) { | |
394 | ret = __crash_shrink_memory(&crashk_res, 0); | |
395 | if (ret) | |
396 | goto unlock; | |
397 | ||
398 | ret = __crash_shrink_memory(&crashk_low_res, new_size); | |
399 | } else { | |
400 | ret = __crash_shrink_memory(&crashk_res, new_size - low_size); | |
401 | } | |
402 | ||
403 | /* Swap crashk_res and crashk_low_res if needed */ | |
404 | if (!crashk_res.end && crashk_low_res.end) { | |
405 | crashk_res.start = crashk_low_res.start; | |
406 | crashk_res.end = crashk_low_res.end; | |
407 | release_resource(&crashk_low_res); | |
408 | crashk_low_res.start = 0; | |
409 | crashk_low_res.end = 0; | |
410 | insert_resource(&iomem_resource, &crashk_res); | |
411 | } | |
412 | ||
413 | unlock: | |
414 | kexec_unlock(); | |
415 | return ret; | |
416 | } | |
417 | ||
418 | void crash_save_cpu(struct pt_regs *regs, int cpu) | |
419 | { | |
420 | struct elf_prstatus prstatus; | |
421 | u32 *buf; | |
422 | ||
423 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | |
424 | return; | |
425 | ||
426 | /* Using ELF notes here is opportunistic. | |
427 | * I need a well defined structure format | |
428 | * for the data I pass, and I need tags | |
429 | * on the data to indicate what information I have | |
430 | * squirrelled away. ELF notes happen to provide | |
431 | * all of that, so there is no need to invent something new. | |
432 | */ | |
433 | buf = (u32 *)per_cpu_ptr(crash_notes, cpu); | |
434 | if (!buf) | |
435 | return; | |
436 | memset(&prstatus, 0, sizeof(prstatus)); | |
437 | prstatus.common.pr_pid = current->pid; | |
438 | elf_core_copy_regs(&prstatus.pr_reg, regs); | |
439 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | |
440 | &prstatus, sizeof(prstatus)); | |
441 | final_note(buf); | |
442 | } | |
443 | ||
444 | ||
445 | ||
6f991cc3 ED |
446 | static int __init crash_notes_memory_init(void) |
447 | { | |
448 | /* Allocate memory for saving cpu registers. */ | |
449 | size_t size, align; | |
450 | ||
451 | /* | |
452 | * crash_notes could be allocated across 2 vmalloc pages when percpu | |
453 | * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc | |
454 | * pages are also on 2 continuous physical pages. In this case the | |
455 | * 2nd part of crash_notes in 2nd page could be lost since only the | |
456 | * starting address and size of crash_notes are exported through sysfs. | |
457 | * Here round up the size of crash_notes to the nearest power of two | |
458 | * and pass it to __alloc_percpu as align value. This can make sure | |
459 | * crash_notes is allocated inside one physical page. | |
460 | */ | |
461 | size = sizeof(note_buf_t); | |
462 | align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); | |
463 | ||
464 | /* | |
465 | * Break compile if size is bigger than PAGE_SIZE since crash_notes | |
466 | * definitely will be in 2 pages with that. | |
467 | */ | |
468 | BUILD_BUG_ON(size > PAGE_SIZE); | |
469 | ||
470 | crash_notes = __alloc_percpu(size, align); | |
471 | if (!crash_notes) { | |
472 | pr_warn("Memory allocation for saving cpu register states failed\n"); | |
473 | return -ENOMEM; | |
474 | } | |
475 | return 0; | |
476 | } | |
477 | subsys_initcall(crash_notes_memory_init); | |
24726275 | 478 | |
02aff848 BH |
479 | #endif /*CONFIG_CRASH_DUMP*/ |
480 | ||
24726275 ED |
481 | #ifdef CONFIG_CRASH_HOTPLUG |
482 | #undef pr_fmt | |
483 | #define pr_fmt(fmt) "crash hp: " fmt | |
a72bbec7 | 484 | |
e2a8f20d BH |
485 | /* |
486 | * Different than kexec/kdump loading/unloading/jumping/shrinking which | |
487 | * usually rarely happen, there will be many crash hotplug events notified | |
488 | * during one short period, e.g one memory board is hot added and memory | |
489 | * regions are online. So mutex lock __crash_hotplug_lock is used to | |
490 | * serialize the crash hotplug handling specifically. | |
491 | */ | |
4e87ff59 | 492 | static DEFINE_MUTEX(__crash_hotplug_lock); |
e2a8f20d BH |
493 | #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) |
494 | #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) | |
495 | ||
a72bbec7 ED |
496 | /* |
497 | * This routine utilized when the crash_hotplug sysfs node is read. | |
79365026 SJ |
498 | * It reflects the kernel's ability/permission to update the kdump |
499 | * image directly. | |
a72bbec7 | 500 | */ |
79365026 | 501 | int crash_check_hotplug_support(void) |
a72bbec7 ED |
502 | { |
503 | int rc = 0; | |
504 | ||
e2a8f20d | 505 | crash_hotplug_lock(); |
a72bbec7 ED |
506 | /* Obtain lock while reading crash information */ |
507 | if (!kexec_trylock()) { | |
508 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); | |
e2a8f20d | 509 | crash_hotplug_unlock(); |
a72bbec7 ED |
510 | return 0; |
511 | } | |
512 | if (kexec_crash_image) { | |
79365026 | 513 | rc = kexec_crash_image->hotplug_support; |
a72bbec7 ED |
514 | } |
515 | /* Release lock now that update complete */ | |
516 | kexec_unlock(); | |
e2a8f20d | 517 | crash_hotplug_unlock(); |
a72bbec7 ED |
518 | |
519 | return rc; | |
520 | } | |
521 | ||
24726275 ED |
522 | /* |
523 | * To accurately reflect hot un/plug changes of cpu and memory resources | |
524 | * (including onling and offlining of those resources), the elfcorehdr | |
525 | * (which is passed to the crash kernel via the elfcorehdr= parameter) | |
526 | * must be updated with the new list of CPUs and memories. | |
527 | * | |
528 | * In order to make changes to elfcorehdr, two conditions are needed: | |
529 | * First, the segment containing the elfcorehdr must be large enough | |
530 | * to permit a growing number of resources; the elfcorehdr memory size | |
531 | * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. | |
532 | * Second, purgatory must explicitly exclude the elfcorehdr from the | |
533 | * list of segments it checks (since the elfcorehdr changes and thus | |
534 | * would require an update to purgatory itself to update the digest). | |
535 | */ | |
11800571 | 536 | static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu, void *arg) |
24726275 ED |
537 | { |
538 | struct kimage *image; | |
539 | ||
e2a8f20d | 540 | crash_hotplug_lock(); |
24726275 ED |
541 | /* Obtain lock while changing crash information */ |
542 | if (!kexec_trylock()) { | |
543 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); | |
e2a8f20d | 544 | crash_hotplug_unlock(); |
24726275 ED |
545 | return; |
546 | } | |
547 | ||
548 | /* Check kdump is not loaded */ | |
549 | if (!kexec_crash_image) | |
550 | goto out; | |
551 | ||
552 | image = kexec_crash_image; | |
553 | ||
79365026 SJ |
554 | /* Check that kexec segments update is permitted */ |
555 | if (!image->hotplug_support) | |
a72bbec7 ED |
556 | goto out; |
557 | ||
24726275 ED |
558 | if (hp_action == KEXEC_CRASH_HP_ADD_CPU || |
559 | hp_action == KEXEC_CRASH_HP_REMOVE_CPU) | |
560 | pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); | |
561 | else | |
562 | pr_debug("hp_action %u\n", hp_action); | |
563 | ||
564 | /* | |
565 | * The elfcorehdr_index is set to -1 when the struct kimage | |
566 | * is allocated. Find the segment containing the elfcorehdr, | |
567 | * if not already found. | |
568 | */ | |
569 | if (image->elfcorehdr_index < 0) { | |
570 | unsigned long mem; | |
571 | unsigned char *ptr; | |
572 | unsigned int n; | |
573 | ||
574 | for (n = 0; n < image->nr_segments; n++) { | |
575 | mem = image->segment[n].mem; | |
576 | ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); | |
577 | if (ptr) { | |
578 | /* The segment containing elfcorehdr */ | |
579 | if (memcmp(ptr, ELFMAG, SELFMAG) == 0) | |
580 | image->elfcorehdr_index = (int)n; | |
581 | kunmap_local(ptr); | |
582 | } | |
583 | } | |
584 | } | |
585 | ||
586 | if (image->elfcorehdr_index < 0) { | |
587 | pr_err("unable to locate elfcorehdr segment"); | |
588 | goto out; | |
589 | } | |
590 | ||
591 | /* Needed in order for the segments to be updated */ | |
592 | arch_kexec_unprotect_crashkres(); | |
593 | ||
594 | /* Differentiate between normal load and hotplug update */ | |
595 | image->hp_action = hp_action; | |
596 | ||
597 | /* Now invoke arch-specific update handler */ | |
11800571 | 598 | arch_crash_handle_hotplug_event(image, arg); |
24726275 ED |
599 | |
600 | /* No longer handling a hotplug event */ | |
601 | image->hp_action = KEXEC_CRASH_HP_NONE; | |
602 | image->elfcorehdr_updated = true; | |
603 | ||
604 | /* Change back to read-only */ | |
605 | arch_kexec_protect_crashkres(); | |
606 | ||
607 | /* Errors in the callback is not a reason to rollback state */ | |
608 | out: | |
609 | /* Release lock now that update complete */ | |
610 | kexec_unlock(); | |
e2a8f20d | 611 | crash_hotplug_unlock(); |
24726275 ED |
612 | } |
613 | ||
11800571 | 614 | static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *arg) |
24726275 ED |
615 | { |
616 | switch (val) { | |
617 | case MEM_ONLINE: | |
618 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, | |
11800571 | 619 | KEXEC_CRASH_HP_INVALID_CPU, arg); |
24726275 ED |
620 | break; |
621 | ||
622 | case MEM_OFFLINE: | |
623 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, | |
11800571 | 624 | KEXEC_CRASH_HP_INVALID_CPU, arg); |
24726275 ED |
625 | break; |
626 | } | |
627 | return NOTIFY_OK; | |
628 | } | |
629 | ||
630 | static struct notifier_block crash_memhp_nb = { | |
631 | .notifier_call = crash_memhp_notifier, | |
632 | .priority = 0 | |
633 | }; | |
634 | ||
635 | static int crash_cpuhp_online(unsigned int cpu) | |
636 | { | |
11800571 | 637 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu, NULL); |
24726275 ED |
638 | return 0; |
639 | } | |
640 | ||
641 | static int crash_cpuhp_offline(unsigned int cpu) | |
642 | { | |
11800571 | 643 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu, NULL); |
24726275 ED |
644 | return 0; |
645 | } | |
646 | ||
647 | static int __init crash_hotplug_init(void) | |
648 | { | |
649 | int result = 0; | |
650 | ||
651 | if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) | |
652 | register_memory_notifier(&crash_memhp_nb); | |
653 | ||
654 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { | |
655 | result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, | |
656 | "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); | |
657 | } | |
658 | ||
659 | return result; | |
660 | } | |
661 | ||
662 | subsys_initcall(crash_hotplug_init); | |
663 | #endif |