]> git.ipfire.org Git - thirdparty/qemu.git/blame - dump/dump.c
dump: Add create_win_dump() stub for non-x86 targets
[thirdparty/qemu.git] / dump / dump.c
CommitLineData
783e9b48
WC
1/*
2 * QEMU dump
3 *
4 * Copyright Fujitsu, Corp. 2011, 2012
5 *
6 * Authors:
7 * Wen Congyang <wency@cn.fujitsu.com>
8 *
352666e2
SW
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
783e9b48
WC
11 *
12 */
13
d38ea87a 14#include "qemu/osdep.h"
f348b6d1 15#include "qemu/cutils.h"
783e9b48 16#include "elf.h"
ac978771 17#include "qemu/bswap.h"
c5d40b22 18#include "exec/target_page.h"
83c9089e 19#include "monitor/monitor.h"
9c17d615 20#include "sysemu/dump.h"
54d31236 21#include "sysemu/runstate.h"
1b3509ca 22#include "sysemu/cpus.h"
e688df6b 23#include "qapi/error.h"
d06b747b
MA
24#include "qapi/qapi-commands-dump.h"
25#include "qapi/qapi-events-dump.h"
cc7a8ea7 26#include "qapi/qmp/qerror.h"
db725815 27#include "qemu/main-loop.h"
903ef734 28#include "hw/misc/vmcoreinfo.h"
b7bc6b18 29#include "migration/blocker.h"
ac978771 30#include "hw/core/cpu.h"
2da91b54 31#include "win_dump.h"
2da91b54 32
d12f57ec
QN
33#include <zlib.h>
34#ifdef CONFIG_LZO
35#include <lzo/lzo1x.h>
36#endif
37#ifdef CONFIG_SNAPPY
38#include <snappy-c.h>
39#endif
4ab23a91
QN
40#ifndef ELF_MACHINE_UNAME
41#define ELF_MACHINE_UNAME "Unknown"
42#endif
d12f57ec 43
903ef734
MAL
44#define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
45
b7bc6b18
PX
46static Error *dump_migration_blocker;
47
903ef734
MAL
48#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \
49 ((DIV_ROUND_UP((hdr_size), 4) + \
50 DIV_ROUND_UP((name_size), 4) + \
51 DIV_ROUND_UP((desc_size), 4)) * 4)
52
05bbaa50
JF
53static inline bool dump_is_64bit(DumpState *s)
54{
55 return s->dump_info.d_class == ELFCLASS64;
56}
57
dddf725f
JF
58static inline bool dump_has_filter(DumpState *s)
59{
60 return s->filter_area_length > 0;
61}
62
acb0ef58 63uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
783e9b48 64{
acb0ef58 65 if (s->dump_info.d_endian == ELFDATA2LSB) {
783e9b48
WC
66 val = cpu_to_le16(val);
67 } else {
68 val = cpu_to_be16(val);
69 }
70
71 return val;
72}
73
acb0ef58 74uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
783e9b48 75{
acb0ef58 76 if (s->dump_info.d_endian == ELFDATA2LSB) {
783e9b48
WC
77 val = cpu_to_le32(val);
78 } else {
79 val = cpu_to_be32(val);
80 }
81
82 return val;
83}
84
acb0ef58 85uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
783e9b48 86{
acb0ef58 87 if (s->dump_info.d_endian == ELFDATA2LSB) {
783e9b48
WC
88 val = cpu_to_le64(val);
89 } else {
90 val = cpu_to_be64(val);
91 }
92
93 return val;
94}
95
783e9b48
WC
96static int dump_cleanup(DumpState *s)
97{
5ee163e8 98 guest_phys_blocks_free(&s->guest_phys_blocks);
783e9b48 99 memory_mapping_list_free(&s->list);
2928207a 100 close(s->fd);
903ef734 101 g_free(s->guest_note);
9b72224f 102 g_array_unref(s->string_table_buf);
903ef734 103 s->guest_note = NULL;
783e9b48 104 if (s->resume) {
6796b400
FZ
105 if (s->detached) {
106 qemu_mutex_lock_iothread();
107 }
783e9b48 108 vm_start();
6796b400
FZ
109 if (s->detached) {
110 qemu_mutex_unlock_iothread();
111 }
783e9b48 112 }
b7bc6b18 113 migrate_del_blocker(dump_migration_blocker);
783e9b48 114
2928207a 115 return 0;
783e9b48
WC
116}
117
b5ba1cc6 118static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
783e9b48
WC
119{
120 DumpState *s = opaque;
2f61652d
LC
121 size_t written_size;
122
123 written_size = qemu_write_full(s->fd, buf, size);
124 if (written_size != size) {
0c33659d 125 return -errno;
783e9b48
WC
126 }
127
128 return 0;
129}
130
670e7699 131static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
783e9b48 132{
046bc416
JF
133 /*
134 * phnum in the elf header is 16 bit, if we have more segments we
135 * set phnum to PN_XNUM and write the real number of segments to a
136 * special section.
137 */
138 uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
783e9b48 139
670e7699
JF
140 memset(elf_header, 0, sizeof(Elf64_Ehdr));
141 memcpy(elf_header, ELFMAG, SELFMAG);
142 elf_header->e_ident[EI_CLASS] = ELFCLASS64;
143 elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
144 elf_header->e_ident[EI_VERSION] = EV_CURRENT;
145 elf_header->e_type = cpu_to_dump16(s, ET_CORE);
146 elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
147 elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
148 elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
149 elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
150 elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
151 elf_header->e_phnum = cpu_to_dump16(s, phnum);
9b72224f
JF
152 elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
153 elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
154 elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
155 elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
783e9b48
WC
156}
157
670e7699 158static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
783e9b48 159{
046bc416
JF
160 /*
161 * phnum in the elf header is 16 bit, if we have more segments we
162 * set phnum to PN_XNUM and write the real number of segments to a
163 * special section.
164 */
165 uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
783e9b48 166
670e7699
JF
167 memset(elf_header, 0, sizeof(Elf32_Ehdr));
168 memcpy(elf_header, ELFMAG, SELFMAG);
169 elf_header->e_ident[EI_CLASS] = ELFCLASS32;
170 elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
171 elf_header->e_ident[EI_VERSION] = EV_CURRENT;
172 elf_header->e_type = cpu_to_dump16(s, ET_CORE);
173 elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
174 elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
175 elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
176 elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
177 elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
178 elf_header->e_phnum = cpu_to_dump16(s, phnum);
9b72224f
JF
179 elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
180 elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
181 elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
182 elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
670e7699 183}
783e9b48 184
670e7699
JF
185static void write_elf_header(DumpState *s, Error **errp)
186{
187 Elf32_Ehdr elf32_header;
188 Elf64_Ehdr elf64_header;
189 size_t header_size;
190 void *header_ptr;
191 int ret;
192
9b72224f
JF
193 /* The NULL header and the shstrtab are always defined */
194 assert(s->shdr_num >= 2);
670e7699
JF
195 if (dump_is_64bit(s)) {
196 prepare_elf64_header(s, &elf64_header);
197 header_size = sizeof(elf64_header);
198 header_ptr = &elf64_header;
199 } else {
200 prepare_elf32_header(s, &elf32_header);
201 header_size = sizeof(elf32_header);
202 header_ptr = &elf32_header;
203 }
204
205 ret = fd_write_vmcore(header_ptr, header_size, s);
783e9b48 206 if (ret < 0) {
0c33659d 207 error_setg_errno(errp, -ret, "dump: failed to write elf header");
783e9b48 208 }
783e9b48
WC
209}
210
4c7e251a
HZ
211static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
212 int phdr_index, hwaddr offset,
213 hwaddr filesz, Error **errp)
783e9b48
WC
214{
215 Elf64_Phdr phdr;
216 int ret;
783e9b48
WC
217
218 memset(&phdr, 0, sizeof(Elf64_Phdr));
acb0ef58
BR
219 phdr.p_type = cpu_to_dump32(s, PT_LOAD);
220 phdr.p_offset = cpu_to_dump64(s, offset);
221 phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
222 phdr.p_filesz = cpu_to_dump64(s, filesz);
223 phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
e17bebd0 224 phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
783e9b48 225
2cac2607
LE
226 assert(memory_mapping->length >= filesz);
227
783e9b48
WC
228 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
229 if (ret < 0) {
0c33659d
YB
230 error_setg_errno(errp, -ret,
231 "dump: failed to write program header table");
783e9b48 232 }
783e9b48
WC
233}
234
4c7e251a
HZ
235static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
236 int phdr_index, hwaddr offset,
237 hwaddr filesz, Error **errp)
783e9b48
WC
238{
239 Elf32_Phdr phdr;
240 int ret;
783e9b48
WC
241
242 memset(&phdr, 0, sizeof(Elf32_Phdr));
acb0ef58
BR
243 phdr.p_type = cpu_to_dump32(s, PT_LOAD);
244 phdr.p_offset = cpu_to_dump32(s, offset);
245 phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
246 phdr.p_filesz = cpu_to_dump32(s, filesz);
247 phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
e17bebd0
JD
248 phdr.p_vaddr =
249 cpu_to_dump32(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
783e9b48 250
2cac2607
LE
251 assert(memory_mapping->length >= filesz);
252
783e9b48
WC
253 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
254 if (ret < 0) {
0c33659d
YB
255 error_setg_errno(errp, -ret,
256 "dump: failed to write program header table");
783e9b48 257 }
783e9b48
WC
258}
259
2341a94d 260static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr)
783e9b48 261{
bc7d5580
JF
262 memset(phdr, 0, sizeof(*phdr));
263 phdr->p_type = cpu_to_dump32(s, PT_NOTE);
264 phdr->p_offset = cpu_to_dump64(s, s->note_offset);
265 phdr->p_paddr = 0;
266 phdr->p_filesz = cpu_to_dump64(s, s->note_size);
267 phdr->p_memsz = cpu_to_dump64(s, s->note_size);
268 phdr->p_vaddr = 0;
783e9b48
WC
269}
270
0bc3cd62
PB
271static inline int cpu_index(CPUState *cpu)
272{
273 return cpu->cpu_index + 1;
274}
275
903ef734
MAL
276static void write_guest_note(WriteCoreDumpFunction f, DumpState *s,
277 Error **errp)
278{
279 int ret;
280
281 if (s->guest_note) {
282 ret = f(s->guest_note, s->guest_note_size, s);
283 if (ret < 0) {
284 error_setg(errp, "dump: failed to write guest note");
285 }
286 }
287}
288
4c7e251a
HZ
289static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
290 Error **errp)
783e9b48 291{
0d34282f 292 CPUState *cpu;
783e9b48
WC
293 int ret;
294 int id;
295
bdc44640 296 CPU_FOREACH(cpu) {
0d34282f 297 id = cpu_index(cpu);
6a519918 298 ret = cpu_write_elf64_note(f, cpu, id, s);
783e9b48 299 if (ret < 0) {
e3517a52 300 error_setg(errp, "dump: failed to write elf notes");
4c7e251a 301 return;
783e9b48
WC
302 }
303 }
304
bdc44640 305 CPU_FOREACH(cpu) {
6a519918 306 ret = cpu_write_elf64_qemunote(f, cpu, s);
783e9b48 307 if (ret < 0) {
e3517a52 308 error_setg(errp, "dump: failed to write CPU status");
4c7e251a 309 return;
783e9b48
WC
310 }
311 }
903ef734
MAL
312
313 write_guest_note(f, s, errp);
783e9b48
WC
314}
315
2341a94d 316static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr)
783e9b48 317{
bc7d5580
JF
318 memset(phdr, 0, sizeof(*phdr));
319 phdr->p_type = cpu_to_dump32(s, PT_NOTE);
320 phdr->p_offset = cpu_to_dump32(s, s->note_offset);
321 phdr->p_paddr = 0;
322 phdr->p_filesz = cpu_to_dump32(s, s->note_size);
323 phdr->p_memsz = cpu_to_dump32(s, s->note_size);
324 phdr->p_vaddr = 0;
783e9b48
WC
325}
326
4c7e251a
HZ
327static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
328 Error **errp)
783e9b48 329{
0d34282f 330 CPUState *cpu;
783e9b48
WC
331 int ret;
332 int id;
333
bdc44640 334 CPU_FOREACH(cpu) {
0d34282f 335 id = cpu_index(cpu);
6a519918 336 ret = cpu_write_elf32_note(f, cpu, id, s);
783e9b48 337 if (ret < 0) {
e3517a52 338 error_setg(errp, "dump: failed to write elf notes");
4c7e251a 339 return;
783e9b48
WC
340 }
341 }
342
bdc44640 343 CPU_FOREACH(cpu) {
6a519918 344 ret = cpu_write_elf32_qemunote(f, cpu, s);
783e9b48 345 if (ret < 0) {
e3517a52 346 error_setg(errp, "dump: failed to write CPU status");
4c7e251a 347 return;
783e9b48
WC
348 }
349 }
903ef734
MAL
350
351 write_guest_note(f, s, errp);
783e9b48
WC
352}
353
bc7d5580
JF
354static void write_elf_phdr_note(DumpState *s, Error **errp)
355{
bc7d5580
JF
356 Elf32_Phdr phdr32;
357 Elf64_Phdr phdr64;
358 void *phdr;
359 size_t size;
360 int ret;
361
362 if (dump_is_64bit(s)) {
2341a94d 363 prepare_elf64_phdr_note(s, &phdr64);
bc7d5580
JF
364 size = sizeof(phdr64);
365 phdr = &phdr64;
366 } else {
2341a94d 367 prepare_elf32_phdr_note(s, &phdr32);
bc7d5580
JF
368 size = sizeof(phdr32);
369 phdr = &phdr32;
370 }
371
372 ret = fd_write_vmcore(phdr, size, s);
373 if (ret < 0) {
374 error_setg_errno(errp, -ret,
375 "dump: failed to write program header table");
376 }
377}
378
e41ed29b 379static void prepare_elf_section_hdr_zero(DumpState *s)
783e9b48 380{
e41ed29b
JF
381 if (dump_is_64bit(s)) {
382 Elf64_Shdr *shdr64 = s->elf_section_hdrs;
783e9b48 383
e41ed29b 384 shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
783e9b48 385 } else {
e41ed29b
JF
386 Elf32_Shdr *shdr32 = s->elf_section_hdrs;
387
388 shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
389 }
390}
391
9b72224f
JF
392static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
393{
394 uint64_t index = s->string_table_buf->len;
395 const char strtab[] = ".shstrtab";
396 Elf32_Shdr shdr32 = {};
397 Elf64_Shdr shdr64 = {};
398 int shdr_size;
399 void *shdr;
400
401 g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab));
402 if (dump_is_64bit(s)) {
403 shdr_size = sizeof(Elf64_Shdr);
404 shdr64.sh_type = SHT_STRTAB;
405 shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
406 shdr64.sh_name = index;
407 shdr64.sh_size = s->string_table_buf->len;
408 shdr = &shdr64;
409 } else {
410 shdr_size = sizeof(Elf32_Shdr);
411 shdr32.sh_type = SHT_STRTAB;
412 shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
413 shdr32.sh_name = index;
414 shdr32.sh_size = s->string_table_buf->len;
415 shdr = &shdr32;
416 }
417 memcpy(buff, shdr, shdr_size);
418}
419
420static bool prepare_elf_section_hdrs(DumpState *s, Error **errp)
e41ed29b
JF
421{
422 size_t len, sizeof_shdr;
9b72224f 423 void *buff_hdr;
e41ed29b
JF
424
425 /*
426 * Section ordering:
427 * - HDR zero
9b72224f
JF
428 * - Arch section hdrs
429 * - String table hdr
e41ed29b
JF
430 */
431 sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
432 len = sizeof_shdr * s->shdr_num;
433 s->elf_section_hdrs = g_malloc0(len);
9b72224f 434 buff_hdr = s->elf_section_hdrs;
e41ed29b
JF
435
436 /*
437 * The first section header is ALWAYS a special initial section
438 * header.
439 *
440 * The header should be 0 with one exception being that if
441 * phdr_num is PN_XNUM then the sh_info field contains the real
442 * number of segment entries.
443 *
444 * As we zero allocate the buffer we will only need to modify
445 * sh_info for the PN_XNUM case.
446 */
447 if (s->phdr_num >= PN_XNUM) {
448 prepare_elf_section_hdr_zero(s);
783e9b48 449 }
9b72224f
JF
450 buff_hdr += sizeof_shdr;
451
452 /* Add architecture defined section headers */
453 if (s->dump_info.arch_sections_write_hdr_fn
454 && s->shdr_num > 2) {
455 buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
456
457 if (s->shdr_num >= SHN_LORESERVE) {
458 error_setg_errno(errp, EINVAL,
459 "dump: too many architecture defined sections");
460 return false;
461 }
462 }
463
464 /*
465 * String table is the last section since strings are added via
466 * arch_sections_write_hdr().
467 */
468 prepare_elf_section_hdr_string(s, buff_hdr);
469 return true;
e41ed29b 470}
783e9b48 471
e41ed29b
JF
472static void write_elf_section_headers(DumpState *s, Error **errp)
473{
474 size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
475 int ret;
476
9b72224f
JF
477 if (!prepare_elf_section_hdrs(s, errp)) {
478 return;
479 }
e41ed29b
JF
480
481 ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
783e9b48 482 if (ret < 0) {
e41ed29b 483 error_setg_errno(errp, -ret, "dump: failed to write section headers");
783e9b48 484 }
e41ed29b
JF
485
486 g_free(s->elf_section_hdrs);
783e9b48
WC
487}
488
9b72224f
JF
489static void write_elf_sections(DumpState *s, Error **errp)
490{
491 int ret;
492
493 if (s->elf_section_data_size) {
494 /* Write architecture section data */
495 ret = fd_write_vmcore(s->elf_section_data,
496 s->elf_section_data_size, s);
497 if (ret < 0) {
498 error_setg_errno(errp, -ret,
499 "dump: failed to write architecture section data");
500 return;
501 }
502 }
503
504 /* Write string table */
505 ret = fd_write_vmcore(s->string_table_buf->data,
506 s->string_table_buf->len, s);
507 if (ret < 0) {
508 error_setg_errno(errp, -ret, "dump: failed to write string table data");
509 }
510}
511
4c7e251a 512static void write_data(DumpState *s, void *buf, int length, Error **errp)
783e9b48
WC
513{
514 int ret;
515
516 ret = fd_write_vmcore(buf, length, s);
517 if (ret < 0) {
0c33659d 518 error_setg_errno(errp, -ret, "dump: failed to save memory");
2264c2c9
PX
519 } else {
520 s->written_size += length;
783e9b48 521 }
783e9b48
WC
522}
523
4c7e251a
HZ
524/* write the memory to vmcore. 1 page per I/O. */
525static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
526 int64_t size, Error **errp)
783e9b48 527{
86a518bb 528 ERRP_GUARD();
783e9b48 529 int64_t i;
783e9b48 530
8161befd
AJ
531 for (i = 0; i < size / s->dump_info.page_size; i++) {
532 write_data(s, block->host_addr + start + i * s->dump_info.page_size,
86a518bb
JF
533 s->dump_info.page_size, errp);
534 if (*errp) {
4c7e251a 535 return;
783e9b48
WC
536 }
537 }
538
8161befd
AJ
539 if ((size % s->dump_info.page_size) != 0) {
540 write_data(s, block->host_addr + start + i * s->dump_info.page_size,
86a518bb
JF
541 size % s->dump_info.page_size, errp);
542 if (*errp) {
4c7e251a 543 return;
783e9b48
WC
544 }
545 }
783e9b48
WC
546}
547
2cac2607
LE
548/* get the memory's offset and size in the vmcore */
549static void get_offset_range(hwaddr phys_addr,
550 ram_addr_t mapping_length,
551 DumpState *s,
552 hwaddr *p_offset,
553 hwaddr *p_filesz)
783e9b48 554{
56c4bfb3 555 GuestPhysBlock *block;
a8170e5e 556 hwaddr offset = s->memory_offset;
783e9b48
WC
557 int64_t size_in_block, start;
558
2cac2607
LE
559 /* When the memory is not stored into vmcore, offset will be -1 */
560 *p_offset = -1;
561 *p_filesz = 0;
562
dddf725f
JF
563 if (dump_has_filter(s)) {
564 if (phys_addr < s->filter_area_begin ||
565 phys_addr >= s->filter_area_begin + s->filter_area_length) {
2cac2607 566 return;
783e9b48
WC
567 }
568 }
569
56c4bfb3 570 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
dddf725f
JF
571 if (dump_has_filter(s)) {
572 if (block->target_start >= s->filter_area_begin + s->filter_area_length ||
573 block->target_end <= s->filter_area_begin) {
783e9b48
WC
574 /* This block is out of the range */
575 continue;
576 }
577
dddf725f 578 if (s->filter_area_begin <= block->target_start) {
56c4bfb3 579 start = block->target_start;
783e9b48 580 } else {
dddf725f 581 start = s->filter_area_begin;
783e9b48
WC
582 }
583
56c4bfb3 584 size_in_block = block->target_end - start;
dddf725f
JF
585 if (s->filter_area_begin + s->filter_area_length < block->target_end) {
586 size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length);
783e9b48
WC
587 }
588 } else {
56c4bfb3
LE
589 start = block->target_start;
590 size_in_block = block->target_end - block->target_start;
783e9b48
WC
591 }
592
593 if (phys_addr >= start && phys_addr < start + size_in_block) {
2cac2607
LE
594 *p_offset = phys_addr - start + offset;
595
596 /* The offset range mapped from the vmcore file must not spill over
56c4bfb3 597 * the GuestPhysBlock, clamp it. The rest of the mapping will be
2cac2607
LE
598 * zero-filled in memory at load time; see
599 * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
600 */
601 *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
602 mapping_length :
603 size_in_block - (phys_addr - start);
604 return;
783e9b48
WC
605 }
606
607 offset += size_in_block;
608 }
783e9b48
WC
609}
610
afae6056 611static void write_elf_phdr_loads(DumpState *s, Error **errp)
783e9b48 612{
86a518bb 613 ERRP_GUARD();
2cac2607 614 hwaddr offset, filesz;
783e9b48
WC
615 MemoryMapping *memory_mapping;
616 uint32_t phdr_index = 1;
783e9b48
WC
617
618 QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
2cac2607
LE
619 get_offset_range(memory_mapping->phys_addr,
620 memory_mapping->length,
621 s, &offset, &filesz);
05bbaa50 622 if (dump_is_64bit(s)) {
4c7e251a 623 write_elf64_load(s, memory_mapping, phdr_index++, offset,
86a518bb 624 filesz, errp);
783e9b48 625 } else {
4c7e251a 626 write_elf32_load(s, memory_mapping, phdr_index++, offset,
86a518bb 627 filesz, errp);
783e9b48
WC
628 }
629
86a518bb 630 if (*errp) {
4c7e251a 631 return;
783e9b48
WC
632 }
633
046bc416 634 if (phdr_index >= s->phdr_num) {
783e9b48
WC
635 break;
636 }
637 }
783e9b48
WC
638}
639
c6812473
JF
640static void write_elf_notes(DumpState *s, Error **errp)
641{
642 if (dump_is_64bit(s)) {
643 write_elf64_notes(fd_write_vmcore, s, errp);
644 } else {
645 write_elf32_notes(fd_write_vmcore, s, errp);
646 }
647}
648
783e9b48 649/* write elf header, PT_NOTE and elf note to vmcore. */
4c7e251a 650static void dump_begin(DumpState *s, Error **errp)
783e9b48 651{
86a518bb 652 ERRP_GUARD();
783e9b48
WC
653
654 /*
655 * the vmcore's format is:
656 * --------------
657 * | elf header |
658 * --------------
cb415fd6
JF
659 * | sctn_hdr |
660 * --------------
783e9b48
WC
661 * | PT_NOTE |
662 * --------------
663 * | PT_LOAD |
664 * --------------
665 * | ...... |
666 * --------------
667 * | PT_LOAD |
668 * --------------
783e9b48
WC
669 * | elf note |
670 * --------------
671 * | memory |
672 * --------------
673 *
674 * we only know where the memory is saved after we write elf note into
675 * vmcore.
676 */
677
678 /* write elf header to vmcore */
670e7699 679 write_elf_header(s, errp);
86a518bb 680 if (*errp) {
4c7e251a 681 return;
783e9b48
WC
682 }
683
cb415fd6
JF
684 /* write section headers to vmcore */
685 write_elf_section_headers(s, errp);
bc7d5580
JF
686 if (*errp) {
687 return;
688 }
783e9b48 689
cb415fd6
JF
690 /* write PT_NOTE to vmcore */
691 write_elf_phdr_note(s, errp);
5ff2e5a3
JF
692 if (*errp) {
693 return;
694 }
695
cb415fd6
JF
696 /* write all PT_LOADs to vmcore */
697 write_elf_phdr_loads(s, errp);
e41ed29b
JF
698 if (*errp) {
699 return;
5ff2e5a3 700 }
783e9b48 701
c6812473
JF
702 /* write notes to vmcore */
703 write_elf_notes(s, errp);
783e9b48
WC
704}
705
113d8f4e
JF
706int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
707 int64_t filter_area_start,
708 int64_t filter_area_length)
783e9b48 709{
1e811303 710 int64_t size, left, right;
783e9b48 711
1e811303
JF
712 /* No filter, return full size */
713 if (!filter_area_length) {
714 return block->target_end - block->target_start;
715 }
783e9b48 716
1e811303
JF
717 /* calculate the overlapped region. */
718 left = MAX(filter_area_start, block->target_start);
719 right = MIN(filter_area_start + filter_area_length, block->target_end);
720 size = right - left;
721 size = size > 0 ? size : 0;
722
723 return size;
724}
725
113d8f4e
JF
726int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
727 int64_t filter_area_start,
728 int64_t filter_area_length)
1e811303
JF
729{
730 if (filter_area_length) {
731 /* return -1 if the block is not within filter area */
732 if (block->target_start >= filter_area_start + filter_area_length ||
733 block->target_end <= filter_area_start) {
734 return -1;
783e9b48
WC
735 }
736
1e811303
JF
737 if (filter_area_start > block->target_start) {
738 return filter_area_start - block->target_start;
739 }
783e9b48 740 }
1e811303
JF
741
742 return 0;
783e9b48
WC
743}
744
745/* write all memory to vmcore */
4c7e251a 746static void dump_iterate(DumpState *s, Error **errp)
783e9b48 747{
86a518bb 748 ERRP_GUARD();
56c4bfb3 749 GuestPhysBlock *block;
1e811303 750 int64_t memblock_size, memblock_start;
783e9b48 751
1e811303 752 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
dddf725f 753 memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length);
1e811303
JF
754 if (memblock_start == -1) {
755 continue;
783e9b48 756 }
1e811303 757
dddf725f 758 memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length);
1e811303
JF
759
760 /* Write the memory to file */
761 write_memory(s, block, memblock_start, memblock_size, errp);
86a518bb 762 if (*errp) {
4c7e251a 763 return;
783e9b48 764 }
1e811303 765 }
783e9b48
WC
766}
767
9b72224f
JF
768static void dump_end(DumpState *s, Error **errp)
769{
770 int rc;
9b72224f
JF
771
772 if (s->elf_section_data_size) {
773 s->elf_section_data = g_malloc0(s->elf_section_data_size);
774 }
775
776 /* Adds the architecture defined section data to s->elf_section_data */
777 if (s->dump_info.arch_sections_write_fn &&
778 s->elf_section_data_size) {
779 rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
780 if (rc) {
781 error_setg_errno(errp, rc,
782 "dump: failed to get arch section data");
783 g_free(s->elf_section_data);
784 return;
785 }
786 }
787
788 /* write sections to vmcore */
789 write_elf_sections(s, errp);
790}
791
4c7e251a 792static void create_vmcore(DumpState *s, Error **errp)
783e9b48 793{
86a518bb 794 ERRP_GUARD();
783e9b48 795
86a518bb
JF
796 dump_begin(s, errp);
797 if (*errp) {
4c7e251a 798 return;
783e9b48
WC
799 }
800
9b72224f 801 /* Iterate over memory and dump it to file */
4c7e251a 802 dump_iterate(s, errp);
9b72224f
JF
803 if (*errp) {
804 return;
805 }
806
807 /* Write the section data */
808 dump_end(s, errp);
783e9b48
WC
809}
810
fda05387
QN
811static int write_start_flat_header(int fd)
812{
92ba1401 813 MakedumpfileHeader *mh;
fda05387
QN
814 int ret = 0;
815
92ba1401
LE
816 QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
817 mh = g_malloc0(MAX_SIZE_MDF_HEADER);
fda05387 818
92ba1401
LE
819 memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
820 MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
fda05387 821
92ba1401
LE
822 mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
823 mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
fda05387
QN
824
825 size_t written_size;
92ba1401 826 written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
fda05387
QN
827 if (written_size != MAX_SIZE_MDF_HEADER) {
828 ret = -1;
829 }
830
92ba1401 831 g_free(mh);
fda05387
QN
832 return ret;
833}
834
835static int write_end_flat_header(int fd)
836{
837 MakedumpfileDataHeader mdh;
838
839 mdh.offset = END_FLAG_FLAT_HEADER;
840 mdh.buf_size = END_FLAG_FLAT_HEADER;
841
842 size_t written_size;
843 written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
844 if (written_size != sizeof(mdh)) {
845 return -1;
846 }
847
848 return 0;
849}
850
5d31babe
QN
851static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
852{
853 size_t written_size;
854 MakedumpfileDataHeader mdh;
855
856 mdh.offset = cpu_to_be64(offset);
857 mdh.buf_size = cpu_to_be64(size);
858
859 written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
860 if (written_size != sizeof(mdh)) {
861 return -1;
862 }
863
864 written_size = qemu_write_full(fd, buf, size);
865 if (written_size != size) {
866 return -1;
867 }
868
869 return 0;
870}
871
4835ef77
QN
872static int buf_write_note(const void *buf, size_t size, void *opaque)
873{
874 DumpState *s = opaque;
875
876 /* note_buf is not enough */
877 if (s->note_buf_offset + size > s->note_size) {
878 return -1;
879 }
880
881 memcpy(s->note_buf + s->note_buf_offset, buf, size);
882
883 s->note_buf_offset += size;
884
885 return 0;
886}
887
903ef734
MAL
888/*
889 * This function retrieves various sizes from an elf header.
890 *
891 * @note has to be a valid ELF note. The return sizes are unmodified
892 * (not padded or rounded up to be multiple of 4).
893 */
894static void get_note_sizes(DumpState *s, const void *note,
895 uint64_t *note_head_size,
896 uint64_t *name_size,
897 uint64_t *desc_size)
898{
899 uint64_t note_head_sz;
900 uint64_t name_sz;
901 uint64_t desc_sz;
902
05bbaa50 903 if (dump_is_64bit(s)) {
903ef734
MAL
904 const Elf64_Nhdr *hdr = note;
905 note_head_sz = sizeof(Elf64_Nhdr);
bb509d94
PMD
906 name_sz = cpu_to_dump64(s, hdr->n_namesz);
907 desc_sz = cpu_to_dump64(s, hdr->n_descsz);
903ef734
MAL
908 } else {
909 const Elf32_Nhdr *hdr = note;
910 note_head_sz = sizeof(Elf32_Nhdr);
bb509d94
PMD
911 name_sz = cpu_to_dump32(s, hdr->n_namesz);
912 desc_sz = cpu_to_dump32(s, hdr->n_descsz);
903ef734
MAL
913 }
914
915 if (note_head_size) {
916 *note_head_size = note_head_sz;
917 }
918 if (name_size) {
919 *name_size = name_sz;
920 }
921 if (desc_size) {
922 *desc_size = desc_sz;
923 }
924}
925
d9feb517
MAL
926static bool note_name_equal(DumpState *s,
927 const uint8_t *note, const char *name)
928{
929 int len = strlen(name) + 1;
930 uint64_t head_size, name_size;
931
932 get_note_sizes(s, note, &head_size, &name_size, NULL);
933 head_size = ROUND_UP(head_size, 4);
934
c983ca84 935 return name_size == len && memcmp(note + head_size, name, len) == 0;
d9feb517
MAL
936}
937
298f1168 938/* write common header, sub header and elf note to vmcore */
4c7e251a 939static void create_header32(DumpState *s, Error **errp)
298f1168 940{
86a518bb 941 ERRP_GUARD();
298f1168
QN
942 DiskDumpHeader32 *dh = NULL;
943 KdumpSubHeader32 *kh = NULL;
944 size_t size;
298f1168
QN
945 uint32_t block_size;
946 uint32_t sub_hdr_size;
947 uint32_t bitmap_blocks;
948 uint32_t status = 0;
949 uint64_t offset_note;
950
951 /* write common header, the version of kdump-compressed format is 6th */
952 size = sizeof(DiskDumpHeader32);
953 dh = g_malloc0(size);
954
84c868f6 955 memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
acb0ef58 956 dh->header_version = cpu_to_dump32(s, 6);
8161befd 957 block_size = s->dump_info.page_size;
acb0ef58 958 dh->block_size = cpu_to_dump32(s, block_size);
298f1168
QN
959 sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
960 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
acb0ef58 961 dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
298f1168 962 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
acb0ef58
BR
963 dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
964 dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
298f1168 965 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
acb0ef58 966 dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
4ab23a91 967 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
298f1168
QN
968
969 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
970 status |= DUMP_DH_COMPRESSED_ZLIB;
971 }
972#ifdef CONFIG_LZO
973 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
974 status |= DUMP_DH_COMPRESSED_LZO;
975 }
976#endif
977#ifdef CONFIG_SNAPPY
978 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
979 status |= DUMP_DH_COMPRESSED_SNAPPY;
980 }
981#endif
acb0ef58 982 dh->status = cpu_to_dump32(s, status);
298f1168
QN
983
984 if (write_buffer(s->fd, 0, dh, size) < 0) {
e3517a52 985 error_setg(errp, "dump: failed to write disk dump header");
298f1168
QN
986 goto out;
987 }
988
989 /* write sub header */
990 size = sizeof(KdumpSubHeader32);
991 kh = g_malloc0(size);
992
993 /* 64bit max_mapnr_64 */
acb0ef58 994 kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
b6e05aa4 995 kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base);
acb0ef58 996 kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
298f1168
QN
997
998 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
9ada575b
MAL
999 if (s->guest_note &&
1000 note_name_equal(s, s->guest_note, "VMCOREINFO")) {
1001 uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
1002
1003 get_note_sizes(s, s->guest_note,
1004 &hsize, &name_size, &size_vmcoreinfo_desc);
1005 offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
1006 (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
1007 kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
1008 kh->size_vmcoreinfo = cpu_to_dump32(s, size_vmcoreinfo_desc);
1009 }
1010
acb0ef58
BR
1011 kh->offset_note = cpu_to_dump64(s, offset_note);
1012 kh->note_size = cpu_to_dump32(s, s->note_size);
298f1168
QN
1013
1014 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
1015 block_size, kh, size) < 0) {
e3517a52 1016 error_setg(errp, "dump: failed to write kdump sub header");
298f1168
QN
1017 goto out;
1018 }
1019
1020 /* write note */
1021 s->note_buf = g_malloc0(s->note_size);
1022 s->note_buf_offset = 0;
1023
1024 /* use s->note_buf to store notes temporarily */
86a518bb
JF
1025 write_elf32_notes(buf_write_note, s, errp);
1026 if (*errp) {
298f1168
QN
1027 goto out;
1028 }
298f1168
QN
1029 if (write_buffer(s->fd, offset_note, s->note_buf,
1030 s->note_size) < 0) {
e3517a52 1031 error_setg(errp, "dump: failed to write notes");
298f1168
QN
1032 goto out;
1033 }
1034
1035 /* get offset of dump_bitmap */
1036 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
1037 block_size;
1038
1039 /* get offset of page */
1040 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
1041 block_size;
1042
1043out:
1044 g_free(dh);
1045 g_free(kh);
1046 g_free(s->note_buf);
298f1168
QN
1047}
1048
1049/* write common header, sub header and elf note to vmcore */
4c7e251a 1050static void create_header64(DumpState *s, Error **errp)
298f1168 1051{
86a518bb 1052 ERRP_GUARD();
298f1168
QN
1053 DiskDumpHeader64 *dh = NULL;
1054 KdumpSubHeader64 *kh = NULL;
1055 size_t size;
298f1168
QN
1056 uint32_t block_size;
1057 uint32_t sub_hdr_size;
1058 uint32_t bitmap_blocks;
1059 uint32_t status = 0;
1060 uint64_t offset_note;
1061
1062 /* write common header, the version of kdump-compressed format is 6th */
1063 size = sizeof(DiskDumpHeader64);
1064 dh = g_malloc0(size);
1065
84c868f6 1066 memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
acb0ef58 1067 dh->header_version = cpu_to_dump32(s, 6);
8161befd 1068 block_size = s->dump_info.page_size;
acb0ef58 1069 dh->block_size = cpu_to_dump32(s, block_size);
298f1168
QN
1070 sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
1071 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
acb0ef58 1072 dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
298f1168 1073 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
acb0ef58
BR
1074 dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
1075 dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
298f1168 1076 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
acb0ef58 1077 dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
4ab23a91 1078 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
298f1168
QN
1079
1080 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
1081 status |= DUMP_DH_COMPRESSED_ZLIB;
1082 }
1083#ifdef CONFIG_LZO
1084 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
1085 status |= DUMP_DH_COMPRESSED_LZO;
1086 }
1087#endif
1088#ifdef CONFIG_SNAPPY
1089 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
1090 status |= DUMP_DH_COMPRESSED_SNAPPY;
1091 }
1092#endif
acb0ef58 1093 dh->status = cpu_to_dump32(s, status);
298f1168
QN
1094
1095 if (write_buffer(s->fd, 0, dh, size) < 0) {
e3517a52 1096 error_setg(errp, "dump: failed to write disk dump header");
298f1168
QN
1097 goto out;
1098 }
1099
1100 /* write sub header */
1101 size = sizeof(KdumpSubHeader64);
1102 kh = g_malloc0(size);
1103
1104 /* 64bit max_mapnr_64 */
acb0ef58 1105 kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
b6e05aa4 1106 kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base);
acb0ef58 1107 kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
298f1168
QN
1108
1109 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
9ada575b
MAL
1110 if (s->guest_note &&
1111 note_name_equal(s, s->guest_note, "VMCOREINFO")) {
1112 uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
1113
1114 get_note_sizes(s, s->guest_note,
1115 &hsize, &name_size, &size_vmcoreinfo_desc);
1116 offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
1117 (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
1118 kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
1119 kh->size_vmcoreinfo = cpu_to_dump64(s, size_vmcoreinfo_desc);
1120 }
1121
acb0ef58
BR
1122 kh->offset_note = cpu_to_dump64(s, offset_note);
1123 kh->note_size = cpu_to_dump64(s, s->note_size);
298f1168
QN
1124
1125 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
1126 block_size, kh, size) < 0) {
e3517a52 1127 error_setg(errp, "dump: failed to write kdump sub header");
298f1168
QN
1128 goto out;
1129 }
1130
1131 /* write note */
1132 s->note_buf = g_malloc0(s->note_size);
1133 s->note_buf_offset = 0;
1134
1135 /* use s->note_buf to store notes temporarily */
86a518bb
JF
1136 write_elf64_notes(buf_write_note, s, errp);
1137 if (*errp) {
298f1168
QN
1138 goto out;
1139 }
1140
1141 if (write_buffer(s->fd, offset_note, s->note_buf,
1142 s->note_size) < 0) {
e3517a52 1143 error_setg(errp, "dump: failed to write notes");
298f1168
QN
1144 goto out;
1145 }
1146
1147 /* get offset of dump_bitmap */
1148 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
1149 block_size;
1150
1151 /* get offset of page */
1152 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
1153 block_size;
1154
1155out:
1156 g_free(dh);
1157 g_free(kh);
1158 g_free(s->note_buf);
298f1168
QN
1159}
1160
4c7e251a 1161static void write_dump_header(DumpState *s, Error **errp)
298f1168 1162{
05bbaa50 1163 if (dump_is_64bit(s)) {
992861fb 1164 create_header64(s, errp);
05bbaa50
JF
1165 } else {
1166 create_header32(s, errp);
4c7e251a 1167 }
298f1168
QN
1168}
1169
8161befd
AJ
1170static size_t dump_bitmap_get_bufsize(DumpState *s)
1171{
1172 return s->dump_info.page_size;
1173}
1174
d0686c72
QN
1175/*
1176 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
1177 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
1178 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
1179 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
1180 * vmcore, ie. synchronizing un-sync bit into vmcore.
1181 */
1182static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
1183 uint8_t *buf, DumpState *s)
1184{
1185 off_t old_offset, new_offset;
1186 off_t offset_bitmap1, offset_bitmap2;
1187 uint32_t byte, bit;
8161befd
AJ
1188 size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
1189 size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
d0686c72
QN
1190
1191 /* should not set the previous place */
1192 assert(last_pfn <= pfn);
1193
1194 /*
1195 * if the bit needed to be set is not cached in buf, flush the data in buf
1196 * to vmcore firstly.
1197 * making new_offset be bigger than old_offset can also sync remained data
1198 * into vmcore.
1199 */
8161befd
AJ
1200 old_offset = bitmap_bufsize * (last_pfn / bits_per_buf);
1201 new_offset = bitmap_bufsize * (pfn / bits_per_buf);
d0686c72
QN
1202
1203 while (old_offset < new_offset) {
1204 /* calculate the offset and write dump_bitmap */
1205 offset_bitmap1 = s->offset_dump_bitmap + old_offset;
1206 if (write_buffer(s->fd, offset_bitmap1, buf,
8161befd 1207 bitmap_bufsize) < 0) {
d0686c72
QN
1208 return -1;
1209 }
1210
1211 /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
1212 offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
1213 old_offset;
1214 if (write_buffer(s->fd, offset_bitmap2, buf,
8161befd 1215 bitmap_bufsize) < 0) {
d0686c72
QN
1216 return -1;
1217 }
1218
8161befd
AJ
1219 memset(buf, 0, bitmap_bufsize);
1220 old_offset += bitmap_bufsize;
d0686c72
QN
1221 }
1222
1223 /* get the exact place of the bit in the buf, and set it */
8161befd
AJ
1224 byte = (pfn % bits_per_buf) / CHAR_BIT;
1225 bit = (pfn % bits_per_buf) % CHAR_BIT;
d0686c72
QN
1226 if (value) {
1227 buf[byte] |= 1u << bit;
1228 } else {
1229 buf[byte] &= ~(1u << bit);
1230 }
1231
1232 return 0;
1233}
1234
8161befd
AJ
1235static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
1236{
1237 int target_page_shift = ctz32(s->dump_info.page_size);
1238
1239 return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
1240}
1241
1242static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
1243{
1244 int target_page_shift = ctz32(s->dump_info.page_size);
1245
1246 return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
1247}
1248
d0686c72 1249/*
94d78840
MAL
1250 * Return the page frame number and the page content in *bufptr. bufptr can be
1251 * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated
1252 * memory. This is not necessarily the memory returned.
d0686c72
QN
1253 */
1254static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1255 uint8_t **bufptr, DumpState *s)
1256{
1257 GuestPhysBlock *block = *blockptr;
94d78840
MAL
1258 uint32_t page_size = s->dump_info.page_size;
1259 uint8_t *buf = NULL, *hbuf;
1260 hwaddr addr;
d0686c72
QN
1261
1262 /* block == NULL means the start of the iteration */
1263 if (!block) {
1264 block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1265 *blockptr = block;
08df3438 1266 addr = block->target_start;
94d78840 1267 *pfnptr = dump_paddr_to_pfn(s, addr);
08df3438 1268 } else {
94d78840
MAL
1269 *pfnptr += 1;
1270 addr = dump_pfn_to_paddr(s, *pfnptr);
d0686c72 1271 }
08df3438 1272 assert(block != NULL);
d0686c72 1273
94d78840
MAL
1274 while (1) {
1275 if (addr >= block->target_start && addr < block->target_end) {
1276 size_t n = MIN(block->target_end - addr, page_size - addr % page_size);
1277 hbuf = block->host_addr + (addr - block->target_start);
1278 if (!buf) {
1279 if (n == page_size) {
1280 /* this is a whole target page, go for it */
1281 assert(addr % page_size == 0);
1282 buf = hbuf;
1283 break;
1284 } else if (bufptr) {
1285 assert(*bufptr);
1286 buf = *bufptr;
1287 memset(buf, 0, page_size);
1288 } else {
1289 return true;
1290 }
1291 }
1292
1293 memcpy(buf + addr % page_size, hbuf, n);
1294 addr += n;
1295 if (addr % page_size == 0) {
1296 /* we filled up the page */
1297 break;
1298 }
1299 } else {
1300 /* the next page is in the next block */
1301 *blockptr = block = QTAILQ_NEXT(block, next);
1302 if (!block) {
1303 break;
1304 }
1305
1306 addr = block->target_start;
1307 /* are we still in the same page? */
1308 if (dump_paddr_to_pfn(s, addr) != *pfnptr) {
1309 if (buf) {
1310 /* no, but we already filled something earlier, return it */
1311 break;
1312 } else {
1313 /* else continue from there */
1314 *pfnptr = dump_paddr_to_pfn(s, addr);
1315 }
1316 }
d0686c72 1317 }
d0686c72
QN
1318 }
1319
1320 if (bufptr) {
1321 *bufptr = buf;
1322 }
1323
94d78840 1324 return buf != NULL;
d0686c72
QN
1325}
1326
4c7e251a 1327static void write_dump_bitmap(DumpState *s, Error **errp)
d0686c72
QN
1328{
1329 int ret = 0;
1330 uint64_t last_pfn, pfn;
1331 void *dump_bitmap_buf;
1332 size_t num_dumpable;
1333 GuestPhysBlock *block_iter = NULL;
8161befd
AJ
1334 size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
1335 size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
d0686c72
QN
1336
1337 /* dump_bitmap_buf is used to store dump_bitmap temporarily */
8161befd 1338 dump_bitmap_buf = g_malloc0(bitmap_bufsize);
d0686c72
QN
1339
1340 num_dumpable = 0;
1341 last_pfn = 0;
1342
1343 /*
1344 * exam memory page by page, and set the bit in dump_bitmap corresponded
1345 * to the existing page.
1346 */
1347 while (get_next_page(&block_iter, &pfn, NULL, s)) {
1348 ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
1349 if (ret < 0) {
e3517a52 1350 error_setg(errp, "dump: failed to set dump_bitmap");
d0686c72
QN
1351 goto out;
1352 }
1353
1354 last_pfn = pfn;
1355 num_dumpable++;
1356 }
1357
1358 /*
1359 * set_dump_bitmap will always leave the recently set bit un-sync. Here we
8161befd
AJ
1360 * set the remaining bits from last_pfn to the end of the bitmap buffer to
1361 * 0. With those set, the un-sync bit will be synchronized into the vmcore.
d0686c72
QN
1362 */
1363 if (num_dumpable > 0) {
8161befd 1364 ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
d0686c72
QN
1365 dump_bitmap_buf, s);
1366 if (ret < 0) {
e3517a52 1367 error_setg(errp, "dump: failed to sync dump_bitmap");
d0686c72
QN
1368 goto out;
1369 }
1370 }
1371
1372 /* number of dumpable pages that will be dumped later */
1373 s->num_dumpable = num_dumpable;
1374
1375out:
1376 g_free(dump_bitmap_buf);
d0686c72
QN
1377}
1378
64cfba6a
QN
1379static void prepare_data_cache(DataCache *data_cache, DumpState *s,
1380 off_t offset)
1381{
1382 data_cache->fd = s->fd;
1383 data_cache->data_size = 0;
8161befd
AJ
1384 data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
1385 data_cache->buf = g_malloc0(data_cache->buf_size);
64cfba6a
QN
1386 data_cache->offset = offset;
1387}
1388
1389static int write_cache(DataCache *dc, const void *buf, size_t size,
1390 bool flag_sync)
1391{
1392 /*
1393 * dc->buf_size should not be less than size, otherwise dc will never be
1394 * enough
1395 */
1396 assert(size <= dc->buf_size);
1397
1398 /*
1399 * if flag_sync is set, synchronize data in dc->buf into vmcore.
1400 * otherwise check if the space is enough for caching data in buf, if not,
1401 * write the data in dc->buf to dc->fd and reset dc->buf
1402 */
1403 if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
1404 (flag_sync && dc->data_size > 0)) {
1405 if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
1406 return -1;
1407 }
1408
1409 dc->offset += dc->data_size;
1410 dc->data_size = 0;
1411 }
1412
1413 if (!flag_sync) {
1414 memcpy(dc->buf + dc->data_size, buf, size);
1415 dc->data_size += size;
1416 }
1417
1418 return 0;
1419}
1420
1421static void free_data_cache(DataCache *data_cache)
1422{
1423 g_free(data_cache->buf);
1424}
1425
d12f57ec
QN
1426static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
1427{
b87ef351
LE
1428 switch (flag_compress) {
1429 case DUMP_DH_COMPRESSED_ZLIB:
1430 return compressBound(page_size);
1431
1432 case DUMP_DH_COMPRESSED_LZO:
1433 /*
1434 * LZO will expand incompressible data by a little amount. Please check
1435 * the following URL to see the expansion calculation:
1436 * http://www.oberhumer.com/opensource/lzo/lzofaq.php
1437 */
1438 return page_size + page_size / 16 + 64 + 3;
d12f57ec
QN
1439
1440#ifdef CONFIG_SNAPPY
b87ef351
LE
1441 case DUMP_DH_COMPRESSED_SNAPPY:
1442 return snappy_max_compressed_length(page_size);
d12f57ec 1443#endif
b87ef351
LE
1444 }
1445 return 0;
d12f57ec
QN
1446}
1447
4c7e251a 1448static void write_dump_pages(DumpState *s, Error **errp)
d12f57ec
QN
1449{
1450 int ret = 0;
1451 DataCache page_desc, page_data;
1452 size_t len_buf_out, size_out;
1453#ifdef CONFIG_LZO
1454 lzo_bytep wrkmem = NULL;
1455#endif
1456 uint8_t *buf_out = NULL;
1457 off_t offset_desc, offset_data;
1458 PageDescriptor pd, pd_zero;
1459 uint8_t *buf;
d12f57ec
QN
1460 GuestPhysBlock *block_iter = NULL;
1461 uint64_t pfn_iter;
94d78840 1462 g_autofree uint8_t *page = NULL;
d12f57ec
QN
1463
1464 /* get offset of page_desc and page_data in dump file */
1465 offset_desc = s->offset_page;
1466 offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
1467
1468 prepare_data_cache(&page_desc, s, offset_desc);
1469 prepare_data_cache(&page_data, s, offset_data);
1470
1471 /* prepare buffer to store compressed data */
8161befd 1472 len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
b87ef351 1473 assert(len_buf_out != 0);
d12f57ec
QN
1474
1475#ifdef CONFIG_LZO
1476 wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
1477#endif
1478
1479 buf_out = g_malloc(len_buf_out);
1480
1481 /*
1482 * init zero page's page_desc and page_data, because every zero page
1483 * uses the same page_data
1484 */
8161befd 1485 pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
acb0ef58
BR
1486 pd_zero.flags = cpu_to_dump32(s, 0);
1487 pd_zero.offset = cpu_to_dump64(s, offset_data);
1488 pd_zero.page_flags = cpu_to_dump64(s, 0);
8161befd
AJ
1489 buf = g_malloc0(s->dump_info.page_size);
1490 ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
d12f57ec
QN
1491 g_free(buf);
1492 if (ret < 0) {
e3517a52 1493 error_setg(errp, "dump: failed to write page data (zero page)");
d12f57ec
QN
1494 goto out;
1495 }
1496
8161befd 1497 offset_data += s->dump_info.page_size;
94d78840 1498 page = g_malloc(s->dump_info.page_size);
d12f57ec
QN
1499
1500 /*
1501 * dump memory to vmcore page by page. zero page will all be resided in the
1502 * first page of page section
1503 */
94d78840 1504 for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) {
d12f57ec 1505 /* check zero page */
f13f22ba 1506 if (buffer_is_zero(buf, s->dump_info.page_size)) {
d12f57ec
QN
1507 ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
1508 false);
1509 if (ret < 0) {
e3517a52 1510 error_setg(errp, "dump: failed to write page desc");
d12f57ec
QN
1511 goto out;
1512 }
1513 } else {
1514 /*
1515 * not zero page, then:
1516 * 1. compress the page
1517 * 2. write the compressed page into the cache of page_data
1518 * 3. get page desc of the compressed page and write it into the
1519 * cache of page_desc
1520 *
1521 * only one compression format will be used here, for
1522 * s->flag_compress is set. But when compression fails to work,
1523 * we fall back to save in plaintext.
1524 */
1525 size_out = len_buf_out;
1526 if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
acb0ef58 1527 (compress2(buf_out, (uLongf *)&size_out, buf,
8161befd
AJ
1528 s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
1529 (size_out < s->dump_info.page_size)) {
acb0ef58
BR
1530 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
1531 pd.size = cpu_to_dump32(s, size_out);
d12f57ec
QN
1532
1533 ret = write_cache(&page_data, buf_out, size_out, false);
1534 if (ret < 0) {
e3517a52 1535 error_setg(errp, "dump: failed to write page data");
d12f57ec
QN
1536 goto out;
1537 }
1538#ifdef CONFIG_LZO
1539 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
8161befd 1540 (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
d12f57ec 1541 (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
8161befd 1542 (size_out < s->dump_info.page_size)) {
acb0ef58
BR
1543 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
1544 pd.size = cpu_to_dump32(s, size_out);
d12f57ec
QN
1545
1546 ret = write_cache(&page_data, buf_out, size_out, false);
1547 if (ret < 0) {
e3517a52 1548 error_setg(errp, "dump: failed to write page data");
d12f57ec
QN
1549 goto out;
1550 }
1551#endif
1552#ifdef CONFIG_SNAPPY
1553 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
8161befd 1554 (snappy_compress((char *)buf, s->dump_info.page_size,
d12f57ec 1555 (char *)buf_out, &size_out) == SNAPPY_OK) &&
8161befd 1556 (size_out < s->dump_info.page_size)) {
acb0ef58
BR
1557 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
1558 pd.size = cpu_to_dump32(s, size_out);
d12f57ec
QN
1559
1560 ret = write_cache(&page_data, buf_out, size_out, false);
1561 if (ret < 0) {
e3517a52 1562 error_setg(errp, "dump: failed to write page data");
d12f57ec
QN
1563 goto out;
1564 }
1565#endif
1566 } else {
1567 /*
1568 * fall back to save in plaintext, size_out should be
8161befd 1569 * assigned the target's page size
d12f57ec 1570 */
acb0ef58 1571 pd.flags = cpu_to_dump32(s, 0);
8161befd 1572 size_out = s->dump_info.page_size;
acb0ef58 1573 pd.size = cpu_to_dump32(s, size_out);
d12f57ec 1574
8161befd
AJ
1575 ret = write_cache(&page_data, buf,
1576 s->dump_info.page_size, false);
d12f57ec 1577 if (ret < 0) {
e3517a52 1578 error_setg(errp, "dump: failed to write page data");
d12f57ec
QN
1579 goto out;
1580 }
1581 }
1582
1583 /* get and write page desc here */
acb0ef58
BR
1584 pd.page_flags = cpu_to_dump64(s, 0);
1585 pd.offset = cpu_to_dump64(s, offset_data);
d12f57ec
QN
1586 offset_data += size_out;
1587
1588 ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
1589 if (ret < 0) {
e3517a52 1590 error_setg(errp, "dump: failed to write page desc");
d12f57ec
QN
1591 goto out;
1592 }
1593 }
2264c2c9 1594 s->written_size += s->dump_info.page_size;
d12f57ec
QN
1595 }
1596
1597 ret = write_cache(&page_desc, NULL, 0, true);
1598 if (ret < 0) {
e3517a52 1599 error_setg(errp, "dump: failed to sync cache for page_desc");
d12f57ec
QN
1600 goto out;
1601 }
1602 ret = write_cache(&page_data, NULL, 0, true);
1603 if (ret < 0) {
e3517a52 1604 error_setg(errp, "dump: failed to sync cache for page_data");
d12f57ec
QN
1605 goto out;
1606 }
1607
1608out:
1609 free_data_cache(&page_desc);
1610 free_data_cache(&page_data);
1611
1612#ifdef CONFIG_LZO
1613 g_free(wrkmem);
1614#endif
1615
1616 g_free(buf_out);
d12f57ec
QN
1617}
1618
4c7e251a 1619static void create_kdump_vmcore(DumpState *s, Error **errp)
b53ccc30 1620{
86a518bb 1621 ERRP_GUARD();
b53ccc30
QN
1622 int ret;
1623
1624 /*
1625 * the kdump-compressed format is:
1626 * File offset
1627 * +------------------------------------------+ 0x0
1628 * | main header (struct disk_dump_header) |
1629 * |------------------------------------------+ block 1
1630 * | sub header (struct kdump_sub_header) |
1631 * |------------------------------------------+ block 2
1632 * | 1st-dump_bitmap |
1633 * |------------------------------------------+ block 2 + X blocks
1634 * | 2nd-dump_bitmap | (aligned by block)
1635 * |------------------------------------------+ block 2 + 2 * X blocks
1636 * | page desc for pfn 0 (struct page_desc) | (aligned by block)
1637 * | page desc for pfn 1 (struct page_desc) |
1638 * | : |
1639 * |------------------------------------------| (not aligned by block)
1640 * | page data (pfn 0) |
1641 * | page data (pfn 1) |
1642 * | : |
1643 * +------------------------------------------+
1644 */
1645
1646 ret = write_start_flat_header(s->fd);
1647 if (ret < 0) {
e3517a52 1648 error_setg(errp, "dump: failed to write start flat header");
4c7e251a 1649 return;
b53ccc30
QN
1650 }
1651
86a518bb
JF
1652 write_dump_header(s, errp);
1653 if (*errp) {
4c7e251a 1654 return;
b53ccc30
QN
1655 }
1656
86a518bb
JF
1657 write_dump_bitmap(s, errp);
1658 if (*errp) {
4c7e251a 1659 return;
b53ccc30
QN
1660 }
1661
86a518bb
JF
1662 write_dump_pages(s, errp);
1663 if (*errp) {
4c7e251a 1664 return;
b53ccc30
QN
1665 }
1666
1667 ret = write_end_flat_header(s->fd);
1668 if (ret < 0) {
e3517a52 1669 error_setg(errp, "dump: failed to write end flat header");
4c7e251a 1670 return;
b53ccc30 1671 }
b53ccc30
QN
1672}
1673
0c2994ac 1674static int validate_start_block(DumpState *s)
783e9b48 1675{
56c4bfb3 1676 GuestPhysBlock *block;
783e9b48 1677
dddf725f 1678 if (!dump_has_filter(s)) {
783e9b48
WC
1679 return 0;
1680 }
1681
56c4bfb3 1682 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
0c2994ac 1683 /* This block is out of the range */
dddf725f
JF
1684 if (block->target_start >= s->filter_area_begin + s->filter_area_length ||
1685 block->target_end <= s->filter_area_begin) {
783e9b48
WC
1686 continue;
1687 }
0c2994ac
JF
1688 return 0;
1689 }
783e9b48
WC
1690
1691 return -1;
1692}
1693
7aad248d
QN
1694static void get_max_mapnr(DumpState *s)
1695{
1696 GuestPhysBlock *last_block;
1697
eae3eb3e 1698 last_block = QTAILQ_LAST(&s->guest_phys_blocks.head);
8161befd 1699 s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end);
7aad248d
QN
1700}
1701
baf28f57
PX
1702static DumpState dump_state_global = { .status = DUMP_STATUS_NONE };
1703
1704static void dump_state_prepare(DumpState *s)
1705{
1706 /* zero the struct, setting status to active */
1707 *s = (DumpState) { .status = DUMP_STATUS_ACTIVE };
1708}
1709
544803c7 1710bool qemu_system_dump_in_progress(void)
65d64f36
PX
1711{
1712 DumpState *state = &dump_state_global;
d73415a3 1713 return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
65d64f36
PX
1714}
1715
c370d530
JF
1716/*
1717 * calculate total size of memory to be dumped (taking filter into
1718 * account.)
1719 */
2264c2c9
PX
1720static int64_t dump_calculate_size(DumpState *s)
1721{
1722 GuestPhysBlock *block;
c370d530 1723 int64_t total = 0;
2264c2c9
PX
1724
1725 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
c370d530
JF
1726 total += dump_filtered_memblock_size(block,
1727 s->filter_area_begin,
1728 s->filter_area_length);
2264c2c9
PX
1729 }
1730
1731 return total;
1732}
1733
d9feb517
MAL
1734static void vmcoreinfo_update_phys_base(DumpState *s)
1735{
1736 uint64_t size, note_head_size, name_size, phys_base;
1737 char **lines;
1738 uint8_t *vmci;
1739 size_t i;
1740
1741 if (!note_name_equal(s, s->guest_note, "VMCOREINFO")) {
1742 return;
1743 }
1744
1745 get_note_sizes(s, s->guest_note, &note_head_size, &name_size, &size);
1746 note_head_size = ROUND_UP(note_head_size, 4);
1747
1748 vmci = s->guest_note + note_head_size + ROUND_UP(name_size, 4);
1749 *(vmci + size) = '\0';
1750
1751 lines = g_strsplit((char *)vmci, "\n", -1);
1752 for (i = 0; lines[i]; i++) {
68cbecfd
WH
1753 const char *prefix = NULL;
1754
1755 if (s->dump_info.d_machine == EM_X86_64) {
1756 prefix = "NUMBER(phys_base)=";
1757 } else if (s->dump_info.d_machine == EM_AARCH64) {
1758 prefix = "NUMBER(PHYS_OFFSET)=";
1759 }
1760
1761 if (prefix && g_str_has_prefix(lines[i], prefix)) {
1762 if (qemu_strtou64(lines[i] + strlen(prefix), NULL, 16,
d9feb517 1763 &phys_base) < 0) {
68cbecfd 1764 warn_report("Failed to read %s", prefix);
d9feb517
MAL
1765 } else {
1766 s->dump_info.phys_base = phys_base;
1767 }
1768 break;
1769 }
1770 }
1771
1772 g_strfreev(lines);
1773}
1774
4c7e251a
HZ
1775static void dump_init(DumpState *s, int fd, bool has_format,
1776 DumpGuestMemoryFormat format, bool paging, bool has_filter,
1777 int64_t begin, int64_t length, Error **errp)
783e9b48 1778{
86a518bb 1779 ERRP_GUARD();
903ef734 1780 VMCoreInfoState *vmci = vmcoreinfo_find();
182735ef 1781 CPUState *cpu;
783e9b48
WC
1782 int nr_cpus;
1783 int ret;
1784
ca1fc8c9
PX
1785 s->has_format = has_format;
1786 s->format = format;
2264c2c9 1787 s->written_size = 0;
ca1fc8c9 1788
b53ccc30
QN
1789 /* kdump-compressed is conflict with paging and filter */
1790 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1791 assert(!paging && !has_filter);
1792 }
1793
783e9b48
WC
1794 if (runstate_is_running()) {
1795 vm_stop(RUN_STATE_SAVE_VM);
1796 s->resume = true;
1797 } else {
1798 s->resume = false;
1799 }
1800
5ee163e8
LE
1801 /* If we use KVM, we should synchronize the registers before we get dump
1802 * info or physmap info.
1803 */
1804 cpu_synchronize_all_states();
1805 nr_cpus = 0;
bdc44640 1806 CPU_FOREACH(cpu) {
5ee163e8
LE
1807 nr_cpus++;
1808 }
1809
783e9b48 1810 s->fd = fd;
dddf725f
JF
1811 if (has_filter && !length) {
1812 error_setg(errp, QERR_INVALID_PARAMETER, "length");
1813 goto cleanup;
1814 }
1815 s->filter_area_begin = begin;
1816 s->filter_area_length = length;
5ee163e8 1817
9b72224f
JF
1818 /* First index is 0, it's the special null name */
1819 s->string_table_buf = g_array_new(FALSE, TRUE, 1);
1820 /*
1821 * Allocate the null name, due to the clearing option set to true
1822 * it will be 0.
1823 */
1824 g_array_set_size(s->string_table_buf, 1);
1825
2928207a
CG
1826 memory_mapping_list_init(&s->list);
1827
5ee163e8 1828 guest_phys_blocks_init(&s->guest_phys_blocks);
c5d7f60f 1829 guest_phys_blocks_append(&s->guest_phys_blocks);
2264c2c9
PX
1830 s->total_size = dump_calculate_size(s);
1831#ifdef DEBUG_DUMP_GUEST_MEMORY
1832 fprintf(stderr, "DUMP: total memory to dump: %lu\n", s->total_size);
1833#endif
5ee163e8 1834
d1e6994a
CH
1835 /* it does not make sense to dump non-existent memory */
1836 if (!s->total_size) {
1837 error_setg(errp, "dump: no guest memory to dump");
1838 goto cleanup;
1839 }
1840
0c2994ac
JF
1841 /* Is the filter filtering everything? */
1842 if (validate_start_block(s) == -1) {
c6bd8c70 1843 error_setg(errp, QERR_INVALID_PARAMETER, "begin");
783e9b48
WC
1844 goto cleanup;
1845 }
1846
5ee163e8 1847 /* get dump info: endian, class and architecture.
783e9b48
WC
1848 * If the target architecture is not supported, cpu_get_dump_info() will
1849 * return -1.
783e9b48 1850 */
56c4bfb3 1851 ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
783e9b48 1852 if (ret < 0) {
f969c627
MA
1853 error_setg(errp,
1854 "dumping guest memory is not supported on this target");
783e9b48
WC
1855 goto cleanup;
1856 }
1857
8161befd 1858 if (!s->dump_info.page_size) {
c5d40b22 1859 s->dump_info.page_size = qemu_target_page_size();
8161befd
AJ
1860 }
1861
4720bd05
PB
1862 s->note_size = cpu_get_note_size(s->dump_info.d_class,
1863 s->dump_info.d_machine, nr_cpus);
f1a4697c 1864 assert(s->note_size >= 0);
4720bd05 1865
903ef734 1866 /*
d9feb517
MAL
1867 * The goal of this block is to (a) update the previously guessed
1868 * phys_base, (b) copy the guest note out of the guest.
1869 * Failure to do so is not fatal for dumping.
903ef734
MAL
1870 */
1871 if (vmci) {
1872 uint64_t addr, note_head_size, name_size, desc_size;
1873 uint32_t size;
1874 uint16_t format;
1875
05bbaa50
JF
1876 note_head_size = dump_is_64bit(s) ?
1877 sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr);
903ef734
MAL
1878
1879 format = le16_to_cpu(vmci->vmcoreinfo.guest_format);
1880 size = le32_to_cpu(vmci->vmcoreinfo.size);
1881 addr = le64_to_cpu(vmci->vmcoreinfo.paddr);
1882 if (!vmci->has_vmcoreinfo) {
1883 warn_report("guest note is not present");
1884 } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) {
1885 warn_report("guest note size is invalid: %" PRIu32, size);
5be5df72 1886 } else if (format != FW_CFG_VMCOREINFO_FORMAT_ELF) {
903ef734
MAL
1887 warn_report("guest note format is unsupported: %" PRIu16, format);
1888 } else {
1889 s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
1890 cpu_physical_memory_read(addr, s->guest_note, size);
1891
1892 get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size);
1893 s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size,
1894 desc_size);
1895 if (name_size > MAX_GUEST_NOTE_SIZE ||
1896 desc_size > MAX_GUEST_NOTE_SIZE ||
1897 s->guest_note_size > size) {
1898 warn_report("Invalid guest note header");
1899 g_free(s->guest_note);
1900 s->guest_note = NULL;
1901 } else {
d9feb517 1902 vmcoreinfo_update_phys_base(s);
903ef734
MAL
1903 s->note_size += s->guest_note_size;
1904 }
1905 }
1906 }
1907
783e9b48 1908 /* get memory mapping */
783e9b48 1909 if (paging) {
86a518bb
JF
1910 qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp);
1911 if (*errp) {
11ed09cf
AF
1912 goto cleanup;
1913 }
783e9b48 1914 } else {
56c4bfb3 1915 qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
783e9b48
WC
1916 }
1917
7aad248d 1918 s->nr_cpus = nr_cpus;
7aad248d
QN
1919
1920 get_max_mapnr(s);
1921
1922 uint64_t tmp;
8161befd
AJ
1923 tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT),
1924 s->dump_info.page_size);
1925 s->len_dump_bitmap = tmp * s->dump_info.page_size;
7aad248d 1926
b53ccc30
QN
1927 /* init for kdump-compressed format */
1928 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1929 switch (format) {
1930 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
1931 s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
1932 break;
1933
1934 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
c998acb0
LE
1935#ifdef CONFIG_LZO
1936 if (lzo_init() != LZO_E_OK) {
1937 error_setg(errp, "failed to initialize the LZO library");
1938 goto cleanup;
1939 }
1940#endif
b53ccc30
QN
1941 s->flag_compress = DUMP_DH_COMPRESSED_LZO;
1942 break;
1943
1944 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
1945 s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
1946 break;
1947
1948 default:
1949 s->flag_compress = 0;
1950 }
1951
4c7e251a 1952 return;
b53ccc30
QN
1953 }
1954
dddf725f
JF
1955 if (dump_has_filter(s)) {
1956 memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length);
783e9b48
WC
1957 }
1958
1959 /*
9b72224f
JF
1960 * The first section header is always a special one in which most
1961 * fields are 0. The section header string table is also always
1962 * set.
1963 */
1964 s->shdr_num = 2;
1965
1966 /*
1967 * Adds the number of architecture sections to shdr_num and sets
1968 * elf_section_data_size so we know the offsets and sizes of all
1969 * parts.
1970 */
1971 if (s->dump_info.arch_sections_add_fn) {
1972 s->dump_info.arch_sections_add_fn(s);
1973 }
1974
1975 /*
1976 * calculate shdr_num so we know the offsets and sizes of all
1977 * parts.
1978 * Calculate phdr_num
783e9b48 1979 *
9b72224f
JF
1980 * The absolute maximum amount of phdrs is UINT32_MAX - 1 as
1981 * sh_info is 32 bit. There's special handling once we go over
1982 * UINT16_MAX - 1 but that is handled in the ehdr and section
1983 * code.
783e9b48 1984 */
9b72224f
JF
1985 s->phdr_num = 1; /* Reserve PT_NOTE */
1986 if (s->list.num <= UINT32_MAX - 1) {
783e9b48 1987 s->phdr_num += s->list.num;
783e9b48 1988 } else {
9b72224f 1989 s->phdr_num = UINT32_MAX;
783e9b48
WC
1990 }
1991
9b72224f
JF
1992 /*
1993 * Now that the number of section and program headers is known we
1994 * can calculate the offsets of the headers and data.
1995 */
05bbaa50 1996 if (dump_is_64bit(s)) {
cb415fd6
JF
1997 s->shdr_offset = sizeof(Elf64_Ehdr);
1998 s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
1999 s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
783e9b48 2000 } else {
cb415fd6
JF
2001 s->shdr_offset = sizeof(Elf32_Ehdr);
2002 s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
2003 s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
783e9b48 2004 }
13fd417d
JF
2005 s->memory_offset = s->note_offset + s->note_size;
2006 s->section_offset = s->memory_offset + s->total_size;
783e9b48 2007
4c7e251a 2008 return;
783e9b48
WC
2009
2010cleanup:
2928207a 2011 dump_cleanup(s);
783e9b48
WC
2012}
2013
ca1fc8c9
PX
2014/* this operation might be time consuming. */
2015static void dump_process(DumpState *s, Error **errp)
2016{
86a518bb 2017 ERRP_GUARD();
d42a0d14 2018 DumpQueryResult *result = NULL;
ca1fc8c9 2019
2da91b54 2020 if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
86a518bb 2021 create_win_dump(s, errp);
2da91b54 2022 } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
86a518bb 2023 create_kdump_vmcore(s, errp);
ca1fc8c9 2024 } else {
86a518bb 2025 create_vmcore(s, errp);
ca1fc8c9
PX
2026 }
2027
39ba2ea6
PX
2028 /* make sure status is written after written_size updates */
2029 smp_wmb();
d73415a3 2030 qatomic_set(&s->status,
86a518bb 2031 (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
ca1fc8c9 2032
d42a0d14
PX
2033 /* send DUMP_COMPLETED message (unconditionally) */
2034 result = qmp_query_dump(NULL);
2035 /* should never fail */
2036 assert(result);
d4f8bdc7
MA
2037 qapi_event_send_dump_completed(result,
2038 *errp ? error_get_pretty(*errp) : NULL);
d42a0d14
PX
2039 qapi_free_DumpQueryResult(result);
2040
ca1fc8c9
PX
2041 dump_cleanup(s);
2042}
2043
1fbeff72
PX
2044static void *dump_thread(void *data)
2045{
1fbeff72 2046 DumpState *s = (DumpState *)data;
c3a8fe33 2047 dump_process(s, NULL);
1fbeff72
PX
2048 return NULL;
2049}
2050
39ba2ea6
PX
2051DumpQueryResult *qmp_query_dump(Error **errp)
2052{
2053 DumpQueryResult *result = g_new(DumpQueryResult, 1);
2054 DumpState *state = &dump_state_global;
d73415a3 2055 result->status = qatomic_read(&state->status);
39ba2ea6
PX
2056 /* make sure we are reading status and written_size in order */
2057 smp_rmb();
2058 result->completed = state->written_size;
2059 result->total = state->total_size;
2060 return result;
2061}
2062
228de9cf
PX
2063void qmp_dump_guest_memory(bool paging, const char *file,
2064 bool has_detach, bool detach,
2065 bool has_begin, int64_t begin, bool has_length,
b53ccc30
QN
2066 int64_t length, bool has_format,
2067 DumpGuestMemoryFormat format, Error **errp)
783e9b48 2068{
86a518bb 2069 ERRP_GUARD();
783e9b48
WC
2070 const char *p;
2071 int fd = -1;
2072 DumpState *s;
1fbeff72 2073 bool detach_p = false;
783e9b48 2074
63e27f28
PX
2075 if (runstate_check(RUN_STATE_INMIGRATE)) {
2076 error_setg(errp, "Dump not allowed during incoming migration.");
2077 return;
2078 }
2079
65d64f36
PX
2080 /* if there is a dump in background, we should wait until the dump
2081 * finished */
544803c7 2082 if (qemu_system_dump_in_progress()) {
65d64f36
PX
2083 error_setg(errp, "There is a dump in process, please wait.");
2084 return;
2085 }
2086
b53ccc30
QN
2087 /*
2088 * kdump-compressed format need the whole memory dumped, so paging or
2089 * filter is not supported here.
2090 */
2091 if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
2092 (paging || has_begin || has_length)) {
2093 error_setg(errp, "kdump-compressed format doesn't support paging or "
2094 "filter");
2095 return;
2096 }
783e9b48 2097 if (has_begin && !has_length) {
c6bd8c70 2098 error_setg(errp, QERR_MISSING_PARAMETER, "length");
783e9b48
WC
2099 return;
2100 }
2101 if (!has_begin && has_length) {
c6bd8c70 2102 error_setg(errp, QERR_MISSING_PARAMETER, "begin");
783e9b48
WC
2103 return;
2104 }
1fbeff72
PX
2105 if (has_detach) {
2106 detach_p = detach;
2107 }
783e9b48 2108
b53ccc30
QN
2109 /* check whether lzo/snappy is supported */
2110#ifndef CONFIG_LZO
2111 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
2112 error_setg(errp, "kdump-lzo is not available now");
2113 return;
2114 }
2115#endif
2116
2117#ifndef CONFIG_SNAPPY
2118 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
2119 error_setg(errp, "kdump-snappy is not available now");
2120 return;
2121 }
2122#endif
2123
efc3146a
PMD
2124 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP
2125 && !win_dump_available(errp)) {
2da91b54
VP
2126 return;
2127 }
2da91b54 2128
783e9b48
WC
2129#if !defined(WIN32)
2130 if (strstart(file, "fd:", &p)) {
947e4744 2131 fd = monitor_get_fd(monitor_cur(), p, errp);
783e9b48 2132 if (fd == -1) {
783e9b48
WC
2133 return;
2134 }
2135 }
2136#endif
2137
2138 if (strstart(file, "file:", &p)) {
448058aa 2139 fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
783e9b48 2140 if (fd < 0) {
7581766b 2141 error_setg_file_open(errp, errno, p);
783e9b48
WC
2142 return;
2143 }
2144 }
2145
2146 if (fd == -1) {
c6bd8c70 2147 error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
783e9b48
WC
2148 return;
2149 }
2150
b7bc6b18
PX
2151 if (!dump_migration_blocker) {
2152 error_setg(&dump_migration_blocker,
2153 "Live migration disabled: dump-guest-memory in progress");
2154 }
2155
2156 /*
2157 * Allows even for -only-migratable, but forbid migration during the
2158 * process of dump guest memory.
2159 */
2160 if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
2161 /* Remember to release the fd before passing it over to dump state */
2162 close(fd);
2163 return;
2164 }
2165
baf28f57
PX
2166 s = &dump_state_global;
2167 dump_state_prepare(s);
783e9b48 2168
4c7e251a 2169 dump_init(s, fd, has_format, format, paging, has_begin,
86a518bb
JF
2170 begin, length, errp);
2171 if (*errp) {
d73415a3 2172 qatomic_set(&s->status, DUMP_STATUS_FAILED);
783e9b48
WC
2173 return;
2174 }
2175
1fbeff72
PX
2176 if (detach_p) {
2177 /* detached dump */
6796b400 2178 s->detached = true;
1fbeff72
PX
2179 qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
2180 s, QEMU_THREAD_DETACHED);
2181 } else {
2182 /* sync dump */
2183 dump_process(s, errp);
2184 }
783e9b48 2185}
7d6dc7f3
QN
2186
2187DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
2188{
7d6dc7f3 2189 DumpGuestMemoryCapability *cap =
b21e2380 2190 g_new0(DumpGuestMemoryCapability, 1);
95b3a8c8 2191 DumpGuestMemoryFormatList **tail = &cap->formats;
7d6dc7f3
QN
2192
2193 /* elf is always available */
95b3a8c8 2194 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_ELF);
7d6dc7f3
QN
2195
2196 /* kdump-zlib is always available */
95b3a8c8 2197 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB);
7d6dc7f3
QN
2198
2199 /* add new item if kdump-lzo is available */
2200#ifdef CONFIG_LZO
95b3a8c8 2201 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO);
7d6dc7f3
QN
2202#endif
2203
2204 /* add new item if kdump-snappy is available */
2205#ifdef CONFIG_SNAPPY
95b3a8c8 2206 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY);
7d6dc7f3
QN
2207#endif
2208
efc3146a
PMD
2209 if (win_dump_available(NULL)) {
2210 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_WIN_DMP);
2211 }
2da91b54 2212
7d6dc7f3
QN
2213 return cap;
2214}