]> git.ipfire.org Git - thirdparty/qemu.git/blame - migration/savevm.c
Include migration/vmstate.h less
[thirdparty/qemu.git] / migration / savevm.c
CommitLineData
a672b469
AL
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2009-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
a672b469
AL
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
a672b469 28
1393a485 29#include "qemu/osdep.h"
abfd9ce3 30#include "hw/boards.h"
88c16567 31#include "hw/xen/xen.h"
1422e32d 32#include "net/net.h"
6666c96a 33#include "migration.h"
5e22479a 34#include "migration/snapshot.h"
d6454270 35#include "migration/vmstate.h"
f8d806c9 36#include "migration/misc.h"
f2a8f0a6 37#include "migration/register.h"
84a899de 38#include "migration/global_state.h"
7b1e1a22 39#include "ram.h"
40014d81 40#include "qemu-file-channel.h"
08a0aee1 41#include "qemu-file.h"
20a519a0 42#include "savevm.h"
be07b0ac 43#include "postcopy-ram.h"
e688df6b 44#include "qapi/error.h"
112ed241
MA
45#include "qapi/qapi-commands-migration.h"
46#include "qapi/qapi-commands-misc.h"
cc7a8ea7 47#include "qapi/qmp/qerror.h"
d49b6836 48#include "qemu/error-report.h"
9c17d615 49#include "sysemu/cpus.h"
022c62cb 50#include "exec/memory.h"
51180423 51#include "exec/target_page.h"
517a13c9 52#include "trace.h"
28085f7b 53#include "qemu/iov.h"
de08c606 54#include "block/snapshot.h"
f348b6d1 55#include "qemu/cutils.h"
61b67d47 56#include "io/channel-buffer.h"
8925839f 57#include "io/channel-file.h"
377b21cc 58#include "sysemu/replay.h"
83ee768d 59#include "qjson.h"
aad555c2 60#include "migration/colo.h"
6cafc8e4 61#include "qemu/bitmap.h"
7659505c 62#include "net/announce.h"
18995b98 63
093e3c42
DDAG
64const unsigned int postcopy_ram_discard_version = 0;
65
20a519a0
JQ
66/* Subcommands for QEMU_VM_COMMAND */
67enum qemu_vm_cmd {
68 MIG_CMD_INVALID = 0, /* Must be 0 */
69 MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
70 MIG_CMD_PING, /* Request a PONG on the RP */
71
72 MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
73 warn we might want to do PC */
74 MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
75 pages as it's running. */
76 MIG_CMD_POSTCOPY_RUN, /* Start execution */
77
78 MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
79 were previously sent during
80 precopy but are dirty. */
81 MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
aad555c2 82 MIG_CMD_ENABLE_COLO, /* Enable COLO */
858b6d62 83 MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
f25d4225 84 MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
20a519a0
JQ
85 MIG_CMD_MAX
86};
87
ee555cdf 88#define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
c76ca188
DDAG
89static struct mig_cmd_args {
90 ssize_t len; /* -1 = variable */
91 const char *name;
92} mig_cmd_args[] = {
93 [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
2e37701e
DDAG
94 [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
95 [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
58110f0a 96 [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
093e3c42
DDAG
97 [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
98 [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
99 [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
100 .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
3f5875ec 101 [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
11cf1d98 102 [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
f25d4225 103 [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
c76ca188
DDAG
104 [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
105};
106
58110f0a
VSO
107/* Note for MIG_CMD_POSTCOPY_ADVISE:
108 * The format of arguments is depending on postcopy mode:
109 * - postcopy RAM only
110 * uint64_t host page size
111 * uint64_t taget page size
112 *
113 * - postcopy RAM and postcopy dirty bitmaps
114 * format is the same as for postcopy RAM only
115 *
116 * - postcopy dirty bitmaps only
117 * Nothing. Command length field is 0.
118 *
119 * Be careful: adding a new postcopy entity with some other parameters should
120 * not break format self-description ability. Good way is to introduce some
121 * generic extendable format with an exception for two old entities.
122 */
123
a672b469
AL
124/***********************************************************/
125/* savevm/loadvm support */
126
05fcc848
KW
127static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
128 int64_t pos)
129{
130 int ret;
131 QEMUIOVector qiov;
132
133 qemu_iovec_init_external(&qiov, iov, iovcnt);
134 ret = bdrv_writev_vmstate(opaque, &qiov, pos);
135 if (ret < 0) {
136 return ret;
137 }
138
139 return qiov.size;
140}
141
a202a4c0
DDAG
142static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
143 size_t size)
a672b469 144{
45566e9c 145 return bdrv_load_vmstate(opaque, buf, pos, size);
a672b469
AL
146}
147
148static int bdrv_fclose(void *opaque)
149{
ad492c92 150 return bdrv_flush(opaque);
a672b469
AL
151}
152
9229bf3c
PB
153static const QEMUFileOps bdrv_read_ops = {
154 .get_buffer = block_get_buffer,
155 .close = bdrv_fclose
156};
157
158static const QEMUFileOps bdrv_write_ops = {
05fcc848
KW
159 .writev_buffer = block_writev_buffer,
160 .close = bdrv_fclose
9229bf3c
PB
161};
162
45566e9c 163static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
a672b469 164{
38ff78d3 165 if (is_writable) {
9229bf3c 166 return qemu_fopen_ops(bs, &bdrv_write_ops);
38ff78d3 167 }
9229bf3c 168 return qemu_fopen_ops(bs, &bdrv_read_ops);
a672b469
AL
169}
170
2ff68d07 171
bb1a6d8c
EH
172/* QEMUFile timer support.
173 * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
174 */
2ff68d07 175
40daca54 176void timer_put(QEMUFile *f, QEMUTimer *ts)
2ff68d07
PB
177{
178 uint64_t expire_time;
179
e93379b0 180 expire_time = timer_expire_time_ns(ts);
2ff68d07
PB
181 qemu_put_be64(f, expire_time);
182}
183
40daca54 184void timer_get(QEMUFile *f, QEMUTimer *ts)
2ff68d07
PB
185{
186 uint64_t expire_time;
187
188 expire_time = qemu_get_be64(f);
189 if (expire_time != -1) {
bc72ad67 190 timer_mod_ns(ts, expire_time);
2ff68d07 191 } else {
bc72ad67 192 timer_del(ts);
2ff68d07
PB
193 }
194}
195
196
bb1a6d8c
EH
197/* VMState timer support.
198 * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
199 */
dde0463b 200
03fee66f
MAL
201static int get_timer(QEMUFile *f, void *pv, size_t size,
202 const VMStateField *field)
dde0463b
JQ
203{
204 QEMUTimer *v = pv;
40daca54 205 timer_get(f, v);
dde0463b
JQ
206 return 0;
207}
208
03fee66f
MAL
209static int put_timer(QEMUFile *f, void *pv, size_t size,
210 const VMStateField *field, QJSON *vmdesc)
dde0463b 211{
84e2e3eb 212 QEMUTimer *v = pv;
40daca54 213 timer_put(f, v);
2c21ee76
JD
214
215 return 0;
dde0463b
JQ
216}
217
218const VMStateInfo vmstate_info_timer = {
219 .name = "timer",
220 .get = get_timer,
221 .put = put_timer,
222};
223
08e99e29 224
7685ee6a
AW
225typedef struct CompatEntry {
226 char idstr[256];
227 int instance_id;
228} CompatEntry;
229
a672b469 230typedef struct SaveStateEntry {
72cf2d4f 231 QTAILQ_ENTRY(SaveStateEntry) entry;
a672b469
AL
232 char idstr[256];
233 int instance_id;
4d2ffa08 234 int alias_id;
a672b469 235 int version_id;
0f42f657
JQ
236 /* version id read from the stream */
237 int load_version_id;
a672b469 238 int section_id;
0f42f657
JQ
239 /* section id read from the stream */
240 int load_section_id;
de22ded0 241 const SaveVMHandlers *ops;
9ed7d6ae 242 const VMStateDescription *vmsd;
a672b469 243 void *opaque;
7685ee6a 244 CompatEntry *compat;
a7ae8355 245 int is_ram;
a672b469
AL
246} SaveStateEntry;
247
0163a2e0
JQ
248typedef struct SaveState {
249 QTAILQ_HEAD(, SaveStateEntry) handlers;
250 int global_section_id;
61964c23
JQ
251 uint32_t len;
252 const char *name;
59811a32 253 uint32_t target_page_bits;
6cafc8e4
YK
254 uint32_t caps_count;
255 MigrationCapability *capabilities;
0163a2e0
JQ
256} SaveState;
257
258static SaveState savevm_state = {
259 .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
260 .global_section_id = 0,
61964c23
JQ
261};
262
6cafc8e4
YK
263static bool should_validate_capability(int capability)
264{
265 assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
266 /* Validate only new capabilities to keep compatibility. */
267 switch (capability) {
268 case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
269 return true;
270 default:
271 return false;
272 }
273}
274
275static uint32_t get_validatable_capabilities_count(void)
276{
277 MigrationState *s = migrate_get_current();
278 uint32_t result = 0;
279 int i;
280 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
281 if (should_validate_capability(i) && s->enabled_capabilities[i]) {
282 result++;
283 }
284 }
285 return result;
286}
287
44b1ff31 288static int configuration_pre_save(void *opaque)
61964c23
JQ
289{
290 SaveState *state = opaque;
291 const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
6cafc8e4
YK
292 MigrationState *s = migrate_get_current();
293 int i, j;
61964c23
JQ
294
295 state->len = strlen(current_name);
296 state->name = current_name;
46d702b1 297 state->target_page_bits = qemu_target_page_bits();
44b1ff31 298
6cafc8e4
YK
299 state->caps_count = get_validatable_capabilities_count();
300 state->capabilities = g_renew(MigrationCapability, state->capabilities,
301 state->caps_count);
302 for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
303 if (should_validate_capability(i) && s->enabled_capabilities[i]) {
304 state->capabilities[j++] = i;
305 }
306 }
307
44b1ff31 308 return 0;
59811a32
PM
309}
310
311static int configuration_pre_load(void *opaque)
312{
313 SaveState *state = opaque;
314
315 /* If there is no target-page-bits subsection it means the source
316 * predates the variable-target-page-bits support and is using the
317 * minimum possible value for this CPU.
318 */
46d702b1 319 state->target_page_bits = qemu_target_page_bits_min();
59811a32 320 return 0;
61964c23
JQ
321}
322
6cafc8e4
YK
323static bool configuration_validate_capabilities(SaveState *state)
324{
325 bool ret = true;
326 MigrationState *s = migrate_get_current();
327 unsigned long *source_caps_bm;
328 int i;
329
330 source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
331 for (i = 0; i < state->caps_count; i++) {
332 MigrationCapability capability = state->capabilities[i];
333 set_bit(capability, source_caps_bm);
334 }
335
336 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
337 bool source_state, target_state;
338 if (!should_validate_capability(i)) {
339 continue;
340 }
341 source_state = test_bit(i, source_caps_bm);
342 target_state = s->enabled_capabilities[i];
343 if (source_state != target_state) {
344 error_report("Capability %s is %s, but received capability is %s",
345 MigrationCapability_str(i),
346 target_state ? "on" : "off",
347 source_state ? "on" : "off");
348 ret = false;
349 /* Don't break here to report all failed capabilities */
350 }
351 }
352
353 g_free(source_caps_bm);
354 return ret;
355}
356
61964c23
JQ
357static int configuration_post_load(void *opaque, int version_id)
358{
359 SaveState *state = opaque;
360 const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
361
362 if (strncmp(state->name, current_name, state->len) != 0) {
15d61692
GK
363 error_report("Machine type received is '%.*s' and local is '%s'",
364 (int) state->len, state->name, current_name);
61964c23
JQ
365 return -EINVAL;
366 }
59811a32 367
46d702b1 368 if (state->target_page_bits != qemu_target_page_bits()) {
59811a32 369 error_report("Received TARGET_PAGE_BITS is %d but local is %d",
46d702b1 370 state->target_page_bits, qemu_target_page_bits());
59811a32
PM
371 return -EINVAL;
372 }
373
6cafc8e4
YK
374 if (!configuration_validate_capabilities(state)) {
375 return -EINVAL;
376 }
377
61964c23
JQ
378 return 0;
379}
380
6cafc8e4
YK
381static int get_capability(QEMUFile *f, void *pv, size_t size,
382 const VMStateField *field)
383{
384 MigrationCapability *capability = pv;
385 char capability_str[UINT8_MAX + 1];
386 uint8_t len;
387 int i;
388
389 len = qemu_get_byte(f);
390 qemu_get_buffer(f, (uint8_t *)capability_str, len);
391 capability_str[len] = '\0';
392 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
393 if (!strcmp(MigrationCapability_str(i), capability_str)) {
394 *capability = i;
395 return 0;
396 }
397 }
398 error_report("Received unknown capability %s", capability_str);
399 return -EINVAL;
400}
401
402static int put_capability(QEMUFile *f, void *pv, size_t size,
403 const VMStateField *field, QJSON *vmdesc)
404{
405 MigrationCapability *capability = pv;
406 const char *capability_str = MigrationCapability_str(*capability);
407 size_t len = strlen(capability_str);
408 assert(len <= UINT8_MAX);
409
410 qemu_put_byte(f, len);
411 qemu_put_buffer(f, (uint8_t *)capability_str, len);
412 return 0;
413}
414
415static const VMStateInfo vmstate_info_capability = {
416 .name = "capability",
417 .get = get_capability,
418 .put = put_capability,
419};
420
59811a32
PM
421/* The target-page-bits subsection is present only if the
422 * target page size is not the same as the default (ie the
423 * minimum page size for a variable-page-size guest CPU).
424 * If it is present then it contains the actual target page
425 * bits for the machine, and migration will fail if the
426 * two ends don't agree about it.
427 */
428static bool vmstate_target_page_bits_needed(void *opaque)
429{
46d702b1
JQ
430 return qemu_target_page_bits()
431 > qemu_target_page_bits_min();
59811a32
PM
432}
433
434static const VMStateDescription vmstate_target_page_bits = {
435 .name = "configuration/target-page-bits",
436 .version_id = 1,
437 .minimum_version_id = 1,
438 .needed = vmstate_target_page_bits_needed,
439 .fields = (VMStateField[]) {
440 VMSTATE_UINT32(target_page_bits, SaveState),
441 VMSTATE_END_OF_LIST()
442 }
443};
444
6cafc8e4
YK
445static bool vmstate_capabilites_needed(void *opaque)
446{
447 return get_validatable_capabilities_count() > 0;
448}
449
450static const VMStateDescription vmstate_capabilites = {
451 .name = "configuration/capabilities",
452 .version_id = 1,
453 .minimum_version_id = 1,
454 .needed = vmstate_capabilites_needed,
455 .fields = (VMStateField[]) {
456 VMSTATE_UINT32_V(caps_count, SaveState, 1),
457 VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
458 vmstate_info_capability,
459 MigrationCapability),
460 VMSTATE_END_OF_LIST()
461 }
462};
463
61964c23
JQ
464static const VMStateDescription vmstate_configuration = {
465 .name = "configuration",
466 .version_id = 1,
59811a32 467 .pre_load = configuration_pre_load,
61964c23
JQ
468 .post_load = configuration_post_load,
469 .pre_save = configuration_pre_save,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(len, SaveState),
59046ec2 472 VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
61964c23
JQ
473 VMSTATE_END_OF_LIST()
474 },
59811a32
PM
475 .subsections = (const VMStateDescription*[]) {
476 &vmstate_target_page_bits,
6cafc8e4 477 &vmstate_capabilites,
59811a32
PM
478 NULL
479 }
0163a2e0 480};
a672b469 481
abfd9ce3
AS
482static void dump_vmstate_vmsd(FILE *out_file,
483 const VMStateDescription *vmsd, int indent,
484 bool is_subsection);
485
486static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
487 int indent)
488{
489 fprintf(out_file, "%*s{\n", indent, "");
490 indent += 2;
491 fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
492 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
493 field->version_id);
494 fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
495 field->field_exists ? "true" : "false");
496 fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
497 if (field->vmsd != NULL) {
498 fprintf(out_file, ",\n");
499 dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
500 }
501 fprintf(out_file, "\n%*s}", indent - 2, "");
502}
503
504static void dump_vmstate_vmss(FILE *out_file,
5cd8cada 505 const VMStateDescription **subsection,
abfd9ce3
AS
506 int indent)
507{
5cd8cada
JQ
508 if (*subsection != NULL) {
509 dump_vmstate_vmsd(out_file, *subsection, indent, true);
abfd9ce3
AS
510 }
511}
512
513static void dump_vmstate_vmsd(FILE *out_file,
514 const VMStateDescription *vmsd, int indent,
515 bool is_subsection)
516{
517 if (is_subsection) {
518 fprintf(out_file, "%*s{\n", indent, "");
519 } else {
520 fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
521 }
522 indent += 2;
523 fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
524 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
525 vmsd->version_id);
526 fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
527 vmsd->minimum_version_id);
528 if (vmsd->fields != NULL) {
529 const VMStateField *field = vmsd->fields;
530 bool first;
531
532 fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
533 first = true;
534 while (field->name != NULL) {
535 if (field->flags & VMS_MUST_EXIST) {
536 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
537 field++;
538 continue;
539 }
540 if (!first) {
541 fprintf(out_file, ",\n");
542 }
543 dump_vmstate_vmsf(out_file, field, indent + 2);
544 field++;
545 first = false;
546 }
547 fprintf(out_file, "\n%*s]", indent, "");
548 }
549 if (vmsd->subsections != NULL) {
5cd8cada 550 const VMStateDescription **subsection = vmsd->subsections;
abfd9ce3
AS
551 bool first;
552
553 fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
554 first = true;
5cd8cada 555 while (*subsection != NULL) {
abfd9ce3
AS
556 if (!first) {
557 fprintf(out_file, ",\n");
558 }
559 dump_vmstate_vmss(out_file, subsection, indent + 2);
560 subsection++;
561 first = false;
562 }
563 fprintf(out_file, "\n%*s]", indent, "");
564 }
565 fprintf(out_file, "\n%*s}", indent - 2, "");
566}
567
568static void dump_machine_type(FILE *out_file)
569{
570 MachineClass *mc;
571
572 mc = MACHINE_GET_CLASS(current_machine);
573
574 fprintf(out_file, " \"vmschkmachine\": {\n");
575 fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
576 fprintf(out_file, " },\n");
577}
578
579void dump_vmstate_json_to_file(FILE *out_file)
580{
581 GSList *list, *elt;
582 bool first;
583
584 fprintf(out_file, "{\n");
585 dump_machine_type(out_file);
586
587 first = true;
588 list = object_class_get_list(TYPE_DEVICE, true);
589 for (elt = list; elt; elt = elt->next) {
590 DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
591 TYPE_DEVICE);
592 const char *name;
593 int indent = 2;
594
595 if (!dc->vmsd) {
596 continue;
597 }
598
599 if (!first) {
600 fprintf(out_file, ",\n");
601 }
602 name = object_class_get_name(OBJECT_CLASS(dc));
603 fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
604 indent += 2;
605 fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
606 fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
607 dc->vmsd->version_id);
608 fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
609 dc->vmsd->minimum_version_id);
610
611 dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
612
613 fprintf(out_file, "\n%*s}", indent - 2, "");
614 first = false;
615 }
616 fprintf(out_file, "\n}\n");
617 fclose(out_file);
618}
619
8718e999
JQ
620static int calculate_new_instance_id(const char *idstr)
621{
622 SaveStateEntry *se;
623 int instance_id = 0;
624
0163a2e0 625 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
8718e999
JQ
626 if (strcmp(idstr, se->idstr) == 0
627 && instance_id <= se->instance_id) {
628 instance_id = se->instance_id + 1;
629 }
630 }
631 return instance_id;
632}
633
7685ee6a
AW
634static int calculate_compat_instance_id(const char *idstr)
635{
636 SaveStateEntry *se;
637 int instance_id = 0;
638
0163a2e0 639 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
38ff78d3 640 if (!se->compat) {
7685ee6a 641 continue;
38ff78d3 642 }
7685ee6a
AW
643
644 if (strcmp(idstr, se->compat->idstr) == 0
645 && instance_id <= se->compat->instance_id) {
646 instance_id = se->compat->instance_id + 1;
647 }
648 }
649 return instance_id;
650}
651
f37bc036
PX
652static inline MigrationPriority save_state_priority(SaveStateEntry *se)
653{
654 if (se->vmsd) {
655 return se->vmsd->priority;
656 }
657 return MIG_PRI_DEFAULT;
658}
659
660static void savevm_state_handler_insert(SaveStateEntry *nse)
661{
662 MigrationPriority priority = save_state_priority(nse);
663 SaveStateEntry *se;
664
665 assert(priority <= MIG_PRI_MAX);
666
667 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
668 if (save_state_priority(se) < priority) {
669 break;
670 }
671 }
672
673 if (se) {
674 QTAILQ_INSERT_BEFORE(se, nse, entry);
675 } else {
676 QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
677 }
678}
679
a672b469
AL
680/* TODO: Individual devices generally have very little idea about the rest
681 of the system, so instance_id should be removed/replaced.
682 Meanwhile pass -1 as instance_id if you do not already have a clearly
683 distinguishing id for all instances of your device class. */
0be71e32
AW
684int register_savevm_live(DeviceState *dev,
685 const char *idstr,
a672b469
AL
686 int instance_id,
687 int version_id,
de22ded0 688 const SaveVMHandlers *ops,
a672b469
AL
689 void *opaque)
690{
8718e999 691 SaveStateEntry *se;
a672b469 692
97f3ad35 693 se = g_new0(SaveStateEntry, 1);
a672b469 694 se->version_id = version_id;
0163a2e0 695 se->section_id = savevm_state.global_section_id++;
7908c78d 696 se->ops = ops;
a672b469 697 se->opaque = opaque;
9ed7d6ae 698 se->vmsd = NULL;
a7ae8355 699 /* if this is a live_savem then set is_ram */
9907e842 700 if (ops->save_setup != NULL) {
a7ae8355
SS
701 se->is_ram = 1;
702 }
a672b469 703
09e5ab63
AL
704 if (dev) {
705 char *id = qdev_get_dev_path(dev);
7685ee6a 706 if (id) {
581f08ba
DDAG
707 if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
708 sizeof(se->idstr)) {
709 error_report("Path too long for VMState (%s)", id);
710 g_free(id);
711 g_free(se);
712
713 return -1;
714 }
7267c094 715 g_free(id);
7685ee6a 716
97f3ad35 717 se->compat = g_new0(CompatEntry, 1);
7685ee6a
AW
718 pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
719 se->compat->instance_id = instance_id == -1 ?
720 calculate_compat_instance_id(idstr) : instance_id;
721 instance_id = -1;
722 }
723 }
724 pstrcat(se->idstr, sizeof(se->idstr), idstr);
725
8718e999 726 if (instance_id == -1) {
7685ee6a 727 se->instance_id = calculate_new_instance_id(se->idstr);
8718e999
JQ
728 } else {
729 se->instance_id = instance_id;
a672b469 730 }
7685ee6a 731 assert(!se->compat || se->instance_id == 0);
f37bc036 732 savevm_state_handler_insert(se);
a672b469
AL
733 return 0;
734}
735
0be71e32 736void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
41bd13af 737{
8718e999 738 SaveStateEntry *se, *new_se;
7685ee6a
AW
739 char id[256] = "";
740
09e5ab63
AL
741 if (dev) {
742 char *path = qdev_get_dev_path(dev);
7685ee6a
AW
743 if (path) {
744 pstrcpy(id, sizeof(id), path);
745 pstrcat(id, sizeof(id), "/");
7267c094 746 g_free(path);
7685ee6a
AW
747 }
748 }
749 pstrcat(id, sizeof(id), idstr);
41bd13af 750
0163a2e0 751 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
7685ee6a 752 if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
0163a2e0 753 QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
ef1e1e07 754 g_free(se->compat);
7267c094 755 g_free(se);
41bd13af 756 }
41bd13af
AL
757 }
758}
759
0be71e32 760int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
4d2ffa08
JK
761 const VMStateDescription *vmsd,
762 void *opaque, int alias_id,
bc5c4f21
DDAG
763 int required_for_version,
764 Error **errp)
9ed7d6ae 765{
8718e999 766 SaveStateEntry *se;
9ed7d6ae 767
4d2ffa08
JK
768 /* If this triggers, alias support can be dropped for the vmsd. */
769 assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
770
97f3ad35 771 se = g_new0(SaveStateEntry, 1);
9ed7d6ae 772 se->version_id = vmsd->version_id;
0163a2e0 773 se->section_id = savevm_state.global_section_id++;
9ed7d6ae
JQ
774 se->opaque = opaque;
775 se->vmsd = vmsd;
4d2ffa08 776 se->alias_id = alias_id;
9ed7d6ae 777
09e5ab63
AL
778 if (dev) {
779 char *id = qdev_get_dev_path(dev);
7685ee6a 780 if (id) {
581f08ba
DDAG
781 if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
782 sizeof(se->idstr)) {
783 error_setg(errp, "Path too long for VMState (%s)", id);
784 g_free(id);
785 g_free(se);
786
787 return -1;
788 }
128e4e10 789 g_free(id);
7685ee6a 790
97f3ad35 791 se->compat = g_new0(CompatEntry, 1);
7685ee6a
AW
792 pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
793 se->compat->instance_id = instance_id == -1 ?
794 calculate_compat_instance_id(vmsd->name) : instance_id;
795 instance_id = -1;
796 }
797 }
798 pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
799
8718e999 800 if (instance_id == -1) {
7685ee6a 801 se->instance_id = calculate_new_instance_id(se->idstr);
8718e999
JQ
802 } else {
803 se->instance_id = instance_id;
9ed7d6ae 804 }
7685ee6a 805 assert(!se->compat || se->instance_id == 0);
f37bc036 806 savevm_state_handler_insert(se);
9ed7d6ae
JQ
807 return 0;
808}
809
0be71e32
AW
810void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
811 void *opaque)
9ed7d6ae 812{
1eb7538b
JQ
813 SaveStateEntry *se, *new_se;
814
0163a2e0 815 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
1eb7538b 816 if (se->vmsd == vmsd && se->opaque == opaque) {
0163a2e0 817 QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
ef1e1e07 818 g_free(se->compat);
7267c094 819 g_free(se);
1eb7538b
JQ
820 }
821 }
9ed7d6ae
JQ
822}
823
3a011c26 824static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
4082be4d 825{
9013dca5 826 trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
9ed7d6ae 827 if (!se->vmsd) { /* Old style */
3a011c26 828 return se->ops->load_state(f, se->opaque, se->load_version_id);
9ed7d6ae 829 }
3a011c26 830 return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
4082be4d
JQ
831}
832
8118f095
AG
833static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
834{
835 int64_t old_offset, size;
836
837 old_offset = qemu_ftell_fast(f);
838 se->ops->save_state(f, se->opaque);
839 size = qemu_ftell_fast(f) - old_offset;
840
841 if (vmdesc) {
842 json_prop_int(vmdesc, "size", size);
843 json_start_array(vmdesc, "fields");
844 json_start_object(vmdesc, NULL);
845 json_prop_str(vmdesc, "name", "data");
846 json_prop_int(vmdesc, "size", size);
847 json_prop_str(vmdesc, "type", "buffer");
848 json_end_object(vmdesc);
849 json_end_array(vmdesc);
850 }
851}
852
687433f6 853static int vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
4082be4d 854{
9013dca5 855 trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
8118f095
AG
856 if (!se->vmsd) {
857 vmstate_save_old_style(f, se, vmdesc);
687433f6 858 return 0;
9ed7d6ae 859 }
687433f6 860 return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
4082be4d
JQ
861}
862
ce39bfc9
DDAG
863/*
864 * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
865 */
866static void save_section_header(QEMUFile *f, SaveStateEntry *se,
867 uint8_t section_type)
868{
869 qemu_put_byte(f, section_type);
870 qemu_put_be32(f, se->section_id);
871
872 if (section_type == QEMU_VM_SECTION_FULL ||
873 section_type == QEMU_VM_SECTION_START) {
874 /* ID string */
875 size_t len = strlen(se->idstr);
876 qemu_put_byte(f, len);
877 qemu_put_buffer(f, (uint8_t *)se->idstr, len);
878
879 qemu_put_be32(f, se->instance_id);
880 qemu_put_be32(f, se->version_id);
881 }
882}
883
f68945d4
DDAG
884/*
885 * Write a footer onto device sections that catches cases misformatted device
886 * sections.
887 */
888static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
889{
15c38503 890 if (migrate_get_current()->send_section_footer) {
f68945d4
DDAG
891 qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
892 qemu_put_be32(f, se->section_id);
893 }
894}
895
c76ca188
DDAG
896/**
897 * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
898 * command and associated data.
899 *
900 * @f: File to send command on
901 * @command: Command type to send
902 * @len: Length of associated data
903 * @data: Data associated with command.
904 */
20a519a0
JQ
905static void qemu_savevm_command_send(QEMUFile *f,
906 enum qemu_vm_cmd command,
907 uint16_t len,
908 uint8_t *data)
c76ca188
DDAG
909{
910 trace_savevm_command_send(command, len);
911 qemu_put_byte(f, QEMU_VM_COMMAND);
912 qemu_put_be16(f, (uint16_t)command);
913 qemu_put_be16(f, len);
914 qemu_put_buffer(f, data, len);
915 qemu_fflush(f);
916}
917
aad555c2
ZC
918void qemu_savevm_send_colo_enable(QEMUFile *f)
919{
920 trace_savevm_send_colo_enable();
921 qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
922}
923
2e37701e
DDAG
924void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
925{
926 uint32_t buf;
927
928 trace_savevm_send_ping(value);
929 buf = cpu_to_be32(value);
930 qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
931}
932
933void qemu_savevm_send_open_return_path(QEMUFile *f)
934{
935 trace_savevm_send_open_return_path();
936 qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
937}
938
11cf1d98
DDAG
939/* We have a buffer of data to send; we don't want that all to be loaded
940 * by the command itself, so the command contains just the length of the
941 * extra buffer that we then send straight after it.
942 * TODO: Must be a better way to organise that
943 *
944 * Returns:
945 * 0 on success
946 * -ve on error
947 */
61b67d47 948int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
11cf1d98 949{
11cf1d98
DDAG
950 uint32_t tmp;
951
952 if (len > MAX_VM_CMD_PACKAGED_SIZE) {
953 error_report("%s: Unreasonably large packaged state: %zu",
954 __func__, len);
955 return -1;
956 }
957
958 tmp = cpu_to_be32(len);
959
960 trace_qemu_savevm_send_packaged();
961 qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
962
61b67d47 963 qemu_put_buffer(f, buf, len);
11cf1d98
DDAG
964
965 return 0;
966}
967
093e3c42
DDAG
968/* Send prior to any postcopy transfer */
969void qemu_savevm_send_postcopy_advise(QEMUFile *f)
970{
58110f0a
VSO
971 if (migrate_postcopy_ram()) {
972 uint64_t tmp[2];
973 tmp[0] = cpu_to_be64(ram_pagesize_summary());
974 tmp[1] = cpu_to_be64(qemu_target_page_size());
975
976 trace_qemu_savevm_send_postcopy_advise();
977 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
978 16, (uint8_t *)tmp);
979 } else {
980 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
981 }
093e3c42
DDAG
982}
983
984/* Sent prior to starting the destination running in postcopy, discard pages
985 * that have already been sent but redirtied on the source.
986 * CMD_POSTCOPY_RAM_DISCARD consist of:
987 * byte version (0)
988 * byte Length of name field (not including 0)
989 * n x byte RAM block name
990 * byte 0 terminator (just for safety)
991 * n x Byte ranges within the named RAMBlock
992 * be64 Start of the range
993 * be64 Length
994 *
995 * name: RAMBlock name that these entries are part of
996 * len: Number of page entries
997 * start_list: 'len' addresses
998 * length_list: 'len' addresses
999 *
1000 */
1001void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
1002 uint16_t len,
1003 uint64_t *start_list,
1004 uint64_t *length_list)
1005{
1006 uint8_t *buf;
1007 uint16_t tmplen;
1008 uint16_t t;
1009 size_t name_len = strlen(name);
1010
1011 trace_qemu_savevm_send_postcopy_ram_discard(name, len);
1012 assert(name_len < 256);
1013 buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
1014 buf[0] = postcopy_ram_discard_version;
1015 buf[1] = name_len;
1016 memcpy(buf + 2, name, name_len);
1017 tmplen = 2 + name_len;
1018 buf[tmplen++] = '\0';
1019
1020 for (t = 0; t < len; t++) {
4d885131 1021 stq_be_p(buf + tmplen, start_list[t]);
093e3c42 1022 tmplen += 8;
4d885131 1023 stq_be_p(buf + tmplen, length_list[t]);
093e3c42
DDAG
1024 tmplen += 8;
1025 }
1026 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
1027 g_free(buf);
1028}
1029
1030/* Get the destination into a state where it can receive postcopy data. */
1031void qemu_savevm_send_postcopy_listen(QEMUFile *f)
1032{
1033 trace_savevm_send_postcopy_listen();
1034 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
1035}
1036
1037/* Kick the destination into running */
1038void qemu_savevm_send_postcopy_run(QEMUFile *f)
1039{
1040 trace_savevm_send_postcopy_run();
1041 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
1042}
1043
3f5875ec
PX
1044void qemu_savevm_send_postcopy_resume(QEMUFile *f)
1045{
1046 trace_savevm_send_postcopy_resume();
1047 qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
1048}
1049
f25d4225
PX
1050void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
1051{
1052 size_t len;
1053 char buf[256];
1054
1055 trace_savevm_send_recv_bitmap(block_name);
1056
1057 buf[0] = len = strlen(block_name);
1058 memcpy(buf + 1, block_name, len);
1059
1060 qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
1061}
1062
e1c37d0e 1063bool qemu_savevm_state_blocked(Error **errp)
dc912121
AW
1064{
1065 SaveStateEntry *se;
1066
0163a2e0 1067 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
7d854c47 1068 if (se->vmsd && se->vmsd->unmigratable) {
f231b88d
CR
1069 error_setg(errp, "State blocked by non-migratable device '%s'",
1070 se->idstr);
dc912121
AW
1071 return true;
1072 }
1073 }
1074 return false;
1075}
1076
f796baa1
DDAG
1077void qemu_savevm_state_header(QEMUFile *f)
1078{
1079 trace_savevm_state_header();
1080 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1081 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
172dfd4f 1082
4ffdb337 1083 if (migrate_get_current()->send_configuration) {
172dfd4f
DDAG
1084 qemu_put_byte(f, QEMU_VM_CONFIGURATION);
1085 vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
1086 }
f796baa1
DDAG
1087}
1088
9907e842 1089void qemu_savevm_state_setup(QEMUFile *f)
a672b469
AL
1090{
1091 SaveStateEntry *se;
bd227060 1092 Error *local_err = NULL;
39346385 1093 int ret;
a672b469 1094
9907e842 1095 trace_savevm_state_setup();
0163a2e0 1096 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
9907e842 1097 if (!se->ops || !se->ops->save_setup) {
a672b469 1098 continue;
22ea40f4 1099 }
6bd68781
JQ
1100 if (se->ops && se->ops->is_active) {
1101 if (!se->ops->is_active(se->opaque)) {
1102 continue;
1103 }
1104 }
ce39bfc9 1105 save_section_header(f, se, QEMU_VM_SECTION_START);
a672b469 1106
9907e842 1107 ret = se->ops->save_setup(f, se->opaque);
f68945d4 1108 save_section_footer(f, se);
2975725f 1109 if (ret < 0) {
47c8c17a
PB
1110 qemu_file_set_error(f, ret);
1111 break;
2975725f 1112 }
a672b469 1113 }
bd227060
WW
1114
1115 if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
1116 error_report_err(local_err);
1117 }
a672b469
AL
1118}
1119
d1b8eadb
PX
1120int qemu_savevm_state_resume_prepare(MigrationState *s)
1121{
1122 SaveStateEntry *se;
1123 int ret;
1124
1125 trace_savevm_state_resume_prepare();
1126
1127 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1128 if (!se->ops || !se->ops->resume_prepare) {
1129 continue;
1130 }
1131 if (se->ops && se->ops->is_active) {
1132 if (!se->ops->is_active(se->opaque)) {
1133 continue;
1134 }
1135 }
1136 ret = se->ops->resume_prepare(s, se->opaque);
1137 if (ret < 0) {
1138 return ret;
1139 }
1140 }
1141
1142 return 0;
1143}
1144
39346385 1145/*
07f35073 1146 * this function has three return values:
39346385
JQ
1147 * negative: there was one error, and we have -errno.
1148 * 0 : We haven't finished, caller have to go again
1149 * 1 : We have finished, we can go to complete phase
1150 */
35ecd943 1151int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
a672b469
AL
1152{
1153 SaveStateEntry *se;
1154 int ret = 1;
1155
9013dca5 1156 trace_savevm_state_iterate();
0163a2e0 1157 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
16310a3c 1158 if (!se->ops || !se->ops->save_live_iterate) {
a672b469 1159 continue;
22ea40f4 1160 }
a94cd7b8
WY
1161 if (se->ops->is_active &&
1162 !se->ops->is_active(se->opaque)) {
1163 continue;
6bd68781 1164 }
a94cd7b8
WY
1165 if (se->ops->is_active_iterate &&
1166 !se->ops->is_active_iterate(se->opaque)) {
1167 continue;
c865d848 1168 }
35ecd943
DDAG
1169 /*
1170 * In the postcopy phase, any device that doesn't know how to
1171 * do postcopy should have saved it's state in the _complete
1172 * call that's already run, it might get confused if we call
1173 * iterate afterwards.
1174 */
c6467627
VSO
1175 if (postcopy &&
1176 !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
35ecd943
DDAG
1177 continue;
1178 }
aac844ed
JQ
1179 if (qemu_file_rate_limit(f)) {
1180 return 0;
1181 }
464400f6 1182 trace_savevm_section_start(se->idstr, se->section_id);
ce39bfc9
DDAG
1183
1184 save_section_header(f, se, QEMU_VM_SECTION_PART);
a672b469 1185
16310a3c 1186 ret = se->ops->save_live_iterate(f, se->opaque);
a5df2a02 1187 trace_savevm_section_end(se->idstr, se->section_id, ret);
f68945d4 1188 save_section_footer(f, se);
517a13c9 1189
47c8c17a
PB
1190 if (ret < 0) {
1191 qemu_file_set_error(f, ret);
1192 }
2975725f 1193 if (ret <= 0) {
90697be8
JK
1194 /* Do not proceed to the next vmstate before this one reported
1195 completion of the current stage. This serializes the migration
1196 and reduces the probability that a faster changing state is
1197 synchronized over and over again. */
1198 break;
1199 }
a672b469 1200 }
39346385 1201 return ret;
a672b469
AL
1202}
1203
9850c604
AG
1204static bool should_send_vmdesc(void)
1205{
1206 MachineState *machine = MACHINE(qdev_get_machine());
5727309d 1207 bool in_postcopy = migration_in_postcopy();
8421b205 1208 return !machine->suppress_vmdesc && !in_postcopy;
9850c604
AG
1209}
1210
763c906b
DDAG
1211/*
1212 * Calls the save_live_complete_postcopy methods
1213 * causing the last few pages to be sent immediately and doing any associated
1214 * cleanup.
1215 * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
1216 * all the other devices, but that happens at the point we switch to postcopy.
1217 */
1218void qemu_savevm_state_complete_postcopy(QEMUFile *f)
1219{
1220 SaveStateEntry *se;
1221 int ret;
1222
1223 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1224 if (!se->ops || !se->ops->save_live_complete_postcopy) {
1225 continue;
1226 }
1227 if (se->ops && se->ops->is_active) {
1228 if (!se->ops->is_active(se->opaque)) {
1229 continue;
1230 }
1231 }
1232 trace_savevm_section_start(se->idstr, se->section_id);
1233 /* Section type */
1234 qemu_put_byte(f, QEMU_VM_SECTION_END);
1235 qemu_put_be32(f, se->section_id);
1236
1237 ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1238 trace_savevm_section_end(se->idstr, se->section_id, ret);
1239 save_section_footer(f, se);
1240 if (ret < 0) {
1241 qemu_file_set_error(f, ret);
1242 return;
1243 }
1244 }
1245
1246 qemu_put_byte(f, QEMU_VM_EOF);
1247 qemu_fflush(f);
1248}
1249
a1fbe750
FZ
1250int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
1251 bool inactivate_disks)
a672b469 1252{
8118f095
AG
1253 QJSON *vmdesc;
1254 int vmdesc_len;
a672b469 1255 SaveStateEntry *se;
2975725f 1256 int ret;
5727309d 1257 bool in_postcopy = migration_in_postcopy();
bd227060
WW
1258 Error *local_err = NULL;
1259
1260 if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
1261 error_report_err(local_err);
1262 }
a672b469 1263
a3e06c3d 1264 trace_savevm_state_complete_precopy();
9013dca5 1265
ea375f9a
JK
1266 cpu_synchronize_all_states();
1267
0163a2e0 1268 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
763c906b 1269 if (!se->ops ||
c6467627
VSO
1270 (in_postcopy && se->ops->has_postcopy &&
1271 se->ops->has_postcopy(se->opaque)) ||
1c0d249d 1272 (in_postcopy && !iterable_only) ||
763c906b 1273 !se->ops->save_live_complete_precopy) {
a672b469 1274 continue;
22ea40f4 1275 }
1c0d249d 1276
6bd68781
JQ
1277 if (se->ops && se->ops->is_active) {
1278 if (!se->ops->is_active(se->opaque)) {
1279 continue;
1280 }
1281 }
464400f6 1282 trace_savevm_section_start(se->idstr, se->section_id);
ce39bfc9
DDAG
1283
1284 save_section_header(f, se, QEMU_VM_SECTION_END);
a672b469 1285
a3e06c3d 1286 ret = se->ops->save_live_complete_precopy(f, se->opaque);
a5df2a02 1287 trace_savevm_section_end(se->idstr, se->section_id, ret);
f68945d4 1288 save_section_footer(f, se);
2975725f 1289 if (ret < 0) {
47c8c17a 1290 qemu_file_set_error(f, ret);
a1fbe750 1291 return -1;
2975725f 1292 }
a672b469
AL
1293 }
1294
1c0d249d 1295 if (iterable_only) {
a1fbe750 1296 return 0;
1c0d249d
DDAG
1297 }
1298
8118f095 1299 vmdesc = qjson_new();
46d702b1 1300 json_prop_int(vmdesc, "page_size", qemu_target_page_size());
8118f095 1301 json_start_array(vmdesc, "devices");
0163a2e0 1302 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
a672b469 1303
22ea40f4 1304 if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
5cecf414 1305 continue;
22ea40f4 1306 }
df896152
JQ
1307 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1308 trace_savevm_section_skip(se->idstr, se->section_id);
1309 continue;
1310 }
1311
464400f6 1312 trace_savevm_section_start(se->idstr, se->section_id);
8118f095
AG
1313
1314 json_start_object(vmdesc, NULL);
1315 json_prop_str(vmdesc, "name", se->idstr);
1316 json_prop_int(vmdesc, "instance_id", se->instance_id);
1317
ce39bfc9 1318 save_section_header(f, se, QEMU_VM_SECTION_FULL);
687433f6
DDAG
1319 ret = vmstate_save(f, se, vmdesc);
1320 if (ret) {
1321 qemu_file_set_error(f, ret);
1322 return ret;
1323 }
a5df2a02 1324 trace_savevm_section_end(se->idstr, se->section_id, 0);
f68945d4 1325 save_section_footer(f, se);
bdf46d64
WY
1326
1327 json_end_object(vmdesc);
a672b469
AL
1328 }
1329
a1fbe750
FZ
1330 if (inactivate_disks) {
1331 /* Inactivate before sending QEMU_VM_EOF so that the
1332 * bdrv_invalidate_cache_all() on the other end won't fail. */
1333 ret = bdrv_inactivate_all();
1334 if (ret) {
c232bf58
DDAG
1335 error_report("%s: bdrv_inactivate_all() failed (%d)",
1336 __func__, ret);
a1fbe750
FZ
1337 qemu_file_set_error(f, ret);
1338 return ret;
1339 }
1340 }
763c906b
DDAG
1341 if (!in_postcopy) {
1342 /* Postcopy stream will still be going */
1343 qemu_put_byte(f, QEMU_VM_EOF);
1344 }
8118f095
AG
1345
1346 json_end_array(vmdesc);
1347 qjson_finish(vmdesc);
1348 vmdesc_len = strlen(qjson_get_str(vmdesc));
1349
9850c604
AG
1350 if (should_send_vmdesc()) {
1351 qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1352 qemu_put_be32(f, vmdesc_len);
1353 qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1354 }
b72fe9e6 1355 qjson_destroy(vmdesc);
8118f095 1356
edaae611 1357 qemu_fflush(f);
a1fbe750 1358 return 0;
a672b469
AL
1359}
1360
c31b098f
DDAG
1361/* Give an estimate of the amount left to be transferred,
1362 * the result is split into the amount for units that can and
1363 * for units that can't do postcopy.
1364 */
faec066a 1365void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
47995026
VSO
1366 uint64_t *res_precopy_only,
1367 uint64_t *res_compatible,
1368 uint64_t *res_postcopy_only)
e4ed1541
JQ
1369{
1370 SaveStateEntry *se;
c31b098f 1371
47995026
VSO
1372 *res_precopy_only = 0;
1373 *res_compatible = 0;
1374 *res_postcopy_only = 0;
c31b098f 1375
e4ed1541 1376
0163a2e0 1377 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
e4ed1541
JQ
1378 if (!se->ops || !se->ops->save_live_pending) {
1379 continue;
1380 }
1381 if (se->ops && se->ops->is_active) {
1382 if (!se->ops->is_active(se->opaque)) {
1383 continue;
1384 }
1385 }
faec066a 1386 se->ops->save_live_pending(f, se->opaque, threshold_size,
47995026
VSO
1387 res_precopy_only, res_compatible,
1388 res_postcopy_only);
e4ed1541 1389 }
e4ed1541
JQ
1390}
1391
ea7415fa 1392void qemu_savevm_state_cleanup(void)
4ec7fcc7
JK
1393{
1394 SaveStateEntry *se;
bd227060
WW
1395 Error *local_err = NULL;
1396
1397 if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
1398 error_report_err(local_err);
1399 }
4ec7fcc7 1400
ea7415fa 1401 trace_savevm_state_cleanup();
0163a2e0 1402 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
70f794fc
JQ
1403 if (se->ops && se->ops->save_cleanup) {
1404 se->ops->save_cleanup(se->opaque);
4ec7fcc7
JK
1405 }
1406 }
1407}
1408
5d80448c 1409static int qemu_savevm_state(QEMUFile *f, Error **errp)
a672b469 1410{
a672b469 1411 int ret;
3e0c8050 1412 MigrationState *ms = migrate_get_current();
6dcf6668 1413 MigrationStatus status;
3e0c8050 1414
3d63da16
JL
1415 if (migration_is_setup_or_active(ms->state) ||
1416 ms->state == MIGRATION_STATUS_CANCELLING ||
1417 ms->state == MIGRATION_STATUS_COLO) {
1418 error_setg(errp, QERR_MIGRATION_ACTIVE);
1419 return -EINVAL;
1420 }
a672b469 1421
ce7c817c
JQ
1422 if (migrate_use_block()) {
1423 error_setg(errp, "Block migration and snapshots are incompatible");
3d63da16 1424 return -EINVAL;
ce7c817c
JQ
1425 }
1426
3d63da16
JL
1427 migrate_init(ms);
1428 ms->to_dst_file = f;
1429
9b095037 1430 qemu_mutex_unlock_iothread();
f796baa1 1431 qemu_savevm_state_header(f);
9907e842 1432 qemu_savevm_state_setup(f);
9b095037
PB
1433 qemu_mutex_lock_iothread();
1434
47c8c17a 1435 while (qemu_file_get_error(f) == 0) {
35ecd943 1436 if (qemu_savevm_state_iterate(f, false) > 0) {
47c8c17a
PB
1437 break;
1438 }
1439 }
a672b469 1440
47c8c17a 1441 ret = qemu_file_get_error(f);
39346385 1442 if (ret == 0) {
a1fbe750 1443 qemu_savevm_state_complete_precopy(f, false, false);
624b9cc2 1444 ret = qemu_file_get_error(f);
39346385 1445 }
15b3b8ea 1446 qemu_savevm_state_cleanup();
04943eba 1447 if (ret != 0) {
5d80448c 1448 error_setg_errno(errp, -ret, "Error while writing VM state");
04943eba 1449 }
6dcf6668 1450
6dcf6668
DL
1451 if (ret != 0) {
1452 status = MIGRATION_STATUS_FAILED;
1453 } else {
1454 status = MIGRATION_STATUS_COMPLETED;
1455 }
1456 migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
f9c8caa0
VSO
1457
1458 /* f is outer parameter, it should not stay in global migration state after
1459 * this function finished */
1460 ms->to_dst_file = NULL;
1461
a672b469
AL
1462 return ret;
1463}
1464
3f6df99d 1465void qemu_savevm_live_state(QEMUFile *f)
a7ae8355 1466{
3f6df99d
ZC
1467 /* save QEMU_VM_SECTION_END section */
1468 qemu_savevm_state_complete_precopy(f, true, false);
1469 qemu_put_byte(f, QEMU_VM_EOF);
1470}
a7ae8355 1471
3f6df99d
ZC
1472int qemu_save_device_state(QEMUFile *f)
1473{
1474 SaveStateEntry *se;
a7ae8355 1475
3f6df99d
ZC
1476 if (!migration_in_colo_state()) {
1477 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1478 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1479 }
a7ae8355
SS
1480 cpu_synchronize_all_states();
1481
0163a2e0 1482 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
687433f6
DDAG
1483 int ret;
1484
a7ae8355
SS
1485 if (se->is_ram) {
1486 continue;
1487 }
22ea40f4 1488 if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
a7ae8355
SS
1489 continue;
1490 }
df896152
JQ
1491 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1492 continue;
1493 }
a7ae8355 1494
ce39bfc9 1495 save_section_header(f, se, QEMU_VM_SECTION_FULL);
a7ae8355 1496
687433f6
DDAG
1497 ret = vmstate_save(f, se, NULL);
1498 if (ret) {
1499 return ret;
1500 }
f68945d4
DDAG
1501
1502 save_section_footer(f, se);
a7ae8355
SS
1503 }
1504
1505 qemu_put_byte(f, QEMU_VM_EOF);
1506
1507 return qemu_file_get_error(f);
1508}
1509
a672b469
AL
1510static SaveStateEntry *find_se(const char *idstr, int instance_id)
1511{
1512 SaveStateEntry *se;
1513
0163a2e0 1514 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
a672b469 1515 if (!strcmp(se->idstr, idstr) &&
4d2ffa08
JK
1516 (instance_id == se->instance_id ||
1517 instance_id == se->alias_id))
a672b469 1518 return se;
7685ee6a
AW
1519 /* Migrating from an older version? */
1520 if (strstr(se->idstr, idstr) && se->compat) {
1521 if (!strcmp(se->compat->idstr, idstr) &&
1522 (instance_id == se->compat->instance_id ||
1523 instance_id == se->alias_id))
1524 return se;
1525 }
a672b469
AL
1526 }
1527 return NULL;
1528}
1529
7b89bf27
DDAG
1530enum LoadVMExitCodes {
1531 /* Allow a command to quit all layers of nested loadvm loops */
1532 LOADVM_QUIT = 1,
1533};
1534
093e3c42
DDAG
1535/* ------ incoming postcopy messages ------ */
1536/* 'advise' arrives before any transfers just to tell us that a postcopy
1537 * *might* happen - it might be skipped if precopy transferred everything
1538 * quickly.
1539 */
875fcd01
GK
1540static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
1541 uint16_t len)
093e3c42
DDAG
1542{
1543 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
e8ca1db2 1544 uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
d3dff7a5 1545 Error *local_err = NULL;
093e3c42
DDAG
1546
1547 trace_loadvm_postcopy_handle_advise();
1548 if (ps != POSTCOPY_INCOMING_NONE) {
1549 error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1550 return -1;
1551 }
1552
875fcd01
GK
1553 switch (len) {
1554 case 0:
1555 if (migrate_postcopy_ram()) {
1556 error_report("RAM postcopy is enabled but have 0 byte advise");
1557 return -EINVAL;
1558 }
58110f0a 1559 return 0;
875fcd01
GK
1560 case 8 + 8:
1561 if (!migrate_postcopy_ram()) {
1562 error_report("RAM postcopy is disabled but have 16 byte advise");
1563 return -EINVAL;
1564 }
1565 break;
1566 default:
1567 error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
1568 return -EINVAL;
58110f0a
VSO
1569 }
1570
d7651f15 1571 if (!postcopy_ram_supported_by_host(mis)) {
328d4d85 1572 postcopy_state_set(POSTCOPY_INCOMING_NONE);
eb59db53
DDAG
1573 return -1;
1574 }
1575
e8ca1db2
DDAG
1576 remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
1577 local_pagesize_summary = ram_pagesize_summary();
1578
1579 if (remote_pagesize_summary != local_pagesize_summary) {
093e3c42 1580 /*
e8ca1db2
DDAG
1581 * This detects two potential causes of mismatch:
1582 * a) A mismatch in host page sizes
1583 * Some combinations of mismatch are probably possible but it gets
1584 * a bit more complicated. In particular we need to place whole
1585 * host pages on the dest at once, and we need to ensure that we
1586 * handle dirtying to make sure we never end up sending part of
1587 * a hostpage on it's own.
1588 * b) The use of different huge page sizes on source/destination
1589 * a more fine grain test is performed during RAM block migration
1590 * but this test here causes a nice early clear failure, and
1591 * also fails when passed to an older qemu that doesn't
1592 * do huge pages.
093e3c42 1593 */
e8ca1db2
DDAG
1594 error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
1595 " d=%" PRIx64 ")",
1596 remote_pagesize_summary, local_pagesize_summary);
093e3c42
DDAG
1597 return -1;
1598 }
1599
1600 remote_tps = qemu_get_be64(mis->from_src_file);
20afaed9 1601 if (remote_tps != qemu_target_page_size()) {
093e3c42
DDAG
1602 /*
1603 * Again, some differences could be dealt with, but for now keep it
1604 * simple.
1605 */
20afaed9
JQ
1606 error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
1607 (int)remote_tps, qemu_target_page_size());
093e3c42
DDAG
1608 return -1;
1609 }
1610
d3dff7a5
DDAG
1611 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
1612 error_report_err(local_err);
1613 return -1;
1614 }
1615
1caddf8a
DDAG
1616 if (ram_postcopy_incoming_init(mis)) {
1617 return -1;
1618 }
1619
1620 postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1621
093e3c42
DDAG
1622 return 0;
1623}
1624
1625/* After postcopy we will be told to throw some pages away since they're
1626 * dirty and will have to be demand fetched. Must happen before CPU is
1627 * started.
1628 * There can be 0..many of these messages, each encoding multiple pages.
1629 */
1630static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1631 uint16_t len)
1632{
1633 int tmp;
1634 char ramid[256];
1635 PostcopyState ps = postcopy_state_get();
1636
1637 trace_loadvm_postcopy_ram_handle_discard();
1638
1639 switch (ps) {
1640 case POSTCOPY_INCOMING_ADVISE:
1641 /* 1st discard */
f9527107 1642 tmp = postcopy_ram_prepare_discard(mis);
093e3c42
DDAG
1643 if (tmp) {
1644 return tmp;
1645 }
1646 break;
1647
1648 case POSTCOPY_INCOMING_DISCARD:
1649 /* Expected state */
1650 break;
1651
1652 default:
1653 error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1654 ps);
1655 return -1;
1656 }
1657 /* We're expecting a
1658 * Version (0)
1659 * a RAM ID string (length byte, name, 0 term)
1660 * then at least 1 16 byte chunk
1661 */
1662 if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1663 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1664 return -1;
1665 }
1666
1667 tmp = qemu_get_byte(mis->from_src_file);
1668 if (tmp != postcopy_ram_discard_version) {
1669 error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1670 return -1;
1671 }
1672
1673 if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1674 error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1675 return -1;
1676 }
1677 tmp = qemu_get_byte(mis->from_src_file);
1678 if (tmp != 0) {
1679 error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1680 return -1;
1681 }
1682
1683 len -= 3 + strlen(ramid);
1684 if (len % 16) {
1685 error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1686 return -1;
1687 }
1688 trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1689 while (len) {
093e3c42
DDAG
1690 uint64_t start_addr, block_length;
1691 start_addr = qemu_get_be64(mis->from_src_file);
1692 block_length = qemu_get_be64(mis->from_src_file);
1693
1694 len -= 16;
aaa2064c 1695 int ret = ram_discard_range(ramid, start_addr, block_length);
093e3c42
DDAG
1696 if (ret) {
1697 return ret;
1698 }
093e3c42
DDAG
1699 }
1700 trace_loadvm_postcopy_ram_handle_discard_end();
1701
1702 return 0;
1703}
1704
c76201ab
DDAG
1705/*
1706 * Triggered by a postcopy_listen command; this thread takes over reading
1707 * the input stream, leaving the main thread free to carry on loading the rest
1708 * of the device state (from RAM).
1709 * (TODO:This could do with being in a postcopy file - but there again it's
1710 * just another input loop, not that postcopy specific)
1711 */
1712static void *postcopy_ram_listen_thread(void *opaque)
1713{
c76201ab 1714 MigrationIncomingState *mis = migration_incoming_get_current();
b411b844 1715 QEMUFile *f = mis->from_src_file;
c76201ab
DDAG
1716 int load_res;
1717
6ba996bb
DDAG
1718 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1719 MIGRATION_STATUS_POSTCOPY_ACTIVE);
c76201ab
DDAG
1720 qemu_sem_post(&mis->listen_thread_sem);
1721 trace_postcopy_ram_listen_thread_start();
1722
74637e6f 1723 rcu_register_thread();
c76201ab
DDAG
1724 /*
1725 * Because we're a thread and not a coroutine we can't yield
1726 * in qemu_file, and thus we must be blocking now.
1727 */
1728 qemu_file_set_blocking(f, true);
1729 load_res = qemu_loadvm_state_main(f, mis);
b411b844
PX
1730
1731 /*
1732 * This is tricky, but, mis->from_src_file can change after it
1733 * returns, when postcopy recovery happened. In the future, we may
1734 * want a wrapper for the QEMUFile handle.
1735 */
1736 f = mis->from_src_file;
1737
c76201ab
DDAG
1738 /* And non-blocking again so we don't block in any cleanup */
1739 qemu_file_set_blocking(f, false);
1740
1741 trace_postcopy_ram_listen_thread_exit();
1742 if (load_res < 0) {
1743 error_report("%s: loadvm failed: %d", __func__, load_res);
1744 qemu_file_set_error(f, load_res);
6ba996bb
DDAG
1745 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1746 MIGRATION_STATUS_FAILED);
c76201ab
DDAG
1747 } else {
1748 /*
1749 * This looks good, but it's possible that the device loading in the
1750 * main thread hasn't finished yet, and so we might not be in 'RUN'
1751 * state yet; wait for the end of the main thread.
1752 */
1753 qemu_event_wait(&mis->main_thread_load_event);
1754 }
1755 postcopy_ram_incoming_cleanup(mis);
c76201ab
DDAG
1756
1757 if (load_res < 0) {
1758 /*
1759 * If something went wrong then we have a bad state so exit;
1760 * depending how far we got it might be possible at this point
1761 * to leave the guest running and fire MCEs for pages that never
1762 * arrived as a desperate recovery step.
1763 */
74637e6f 1764 rcu_unregister_thread();
c76201ab
DDAG
1765 exit(EXIT_FAILURE);
1766 }
1767
6ba996bb
DDAG
1768 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1769 MIGRATION_STATUS_COMPLETED);
1770 /*
1771 * If everything has worked fine, then the main thread has waited
1772 * for us to start, and we're the last use of the mis.
1773 * (If something broke then qemu will have to exit anyway since it's
1774 * got a bad migration state).
1775 */
1776 migration_incoming_state_destroy();
acb5ea86 1777 qemu_loadvm_state_cleanup();
6ba996bb 1778
74637e6f 1779 rcu_unregister_thread();
9cf4bb87 1780 mis->have_listen_thread = false;
c76201ab
DDAG
1781 return NULL;
1782}
1783
093e3c42
DDAG
1784/* After this message we must be able to immediately receive postcopy data */
1785static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1786{
1787 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1788 trace_loadvm_postcopy_handle_listen();
6864a7b5
DDAG
1789 Error *local_err = NULL;
1790
093e3c42
DDAG
1791 if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1792 error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1793 return -1;
1794 }
f9527107
DDAG
1795 if (ps == POSTCOPY_INCOMING_ADVISE) {
1796 /*
1797 * A rare case, we entered listen without having to do any discards,
1798 * so do the setup that's normally done at the time of the 1st discard.
1799 */
58110f0a
VSO
1800 if (migrate_postcopy_ram()) {
1801 postcopy_ram_prepare_discard(mis);
1802 }
f9527107 1803 }
093e3c42 1804
f0a227ad
DDAG
1805 /*
1806 * Sensitise RAM - can now generate requests for blocks that don't exist
1807 * However, at this point the CPU shouldn't be running, and the IO
1808 * shouldn't be doing anything yet so don't actually expect requests
1809 */
58110f0a
VSO
1810 if (migrate_postcopy_ram()) {
1811 if (postcopy_ram_enable_notify(mis)) {
91b02dc7 1812 postcopy_ram_incoming_cleanup(mis);
58110f0a
VSO
1813 return -1;
1814 }
f0a227ad
DDAG
1815 }
1816
6864a7b5
DDAG
1817 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
1818 error_report_err(local_err);
1819 return -1;
1820 }
1821
c76201ab
DDAG
1822 if (mis->have_listen_thread) {
1823 error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1824 return -1;
1825 }
1826
1827 mis->have_listen_thread = true;
1828 /* Start up the listening thread and wait for it to signal ready */
1829 qemu_sem_init(&mis->listen_thread_sem, 0);
1830 qemu_thread_create(&mis->listen_thread, "postcopy/listen",
b411b844 1831 postcopy_ram_listen_thread, NULL,
a587a3fe 1832 QEMU_THREAD_DETACHED);
c76201ab
DDAG
1833 qemu_sem_wait(&mis->listen_thread_sem);
1834 qemu_sem_destroy(&mis->listen_thread_sem);
1835
093e3c42
DDAG
1836 return 0;
1837}
1838
86469922
DL
1839
1840typedef struct {
1841 QEMUBH *bh;
1842} HandleRunBhData;
1843
ea6a55bc 1844static void loadvm_postcopy_handle_run_bh(void *opaque)
093e3c42 1845{
27c6825b 1846 Error *local_err = NULL;
86469922 1847 HandleRunBhData *data = opaque;
7659505c 1848 MigrationIncomingState *mis = migration_incoming_get_current();
093e3c42 1849
27c6825b
DDAG
1850 /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1851 * in migration.c
1852 */
1853 cpu_synchronize_all_post_init();
1854
7659505c 1855 qemu_announce_self(&mis->announce_timer, migrate_announce_params());
27c6825b 1856
ace21a58
KW
1857 /* Make sure all file formats flush their mutable metadata.
1858 * If we get an error here, just don't restart the VM yet. */
27c6825b 1859 bdrv_invalidate_cache_all(&local_err);
0042fd36 1860 if (local_err) {
ace21a58 1861 error_report_err(local_err);
0042fd36
KW
1862 local_err = NULL;
1863 autostart = false;
1864 }
1865
27c6825b 1866 trace_loadvm_postcopy_handle_run_cpu_sync();
27c6825b
DDAG
1867
1868 trace_loadvm_postcopy_handle_run_vmstart();
1869
b35ebdf0
VSO
1870 dirty_bitmap_mig_before_vm_start();
1871
093e3c42
DDAG
1872 if (autostart) {
1873 /* Hold onto your hats, starting the CPU */
1874 vm_start();
1875 } else {
1876 /* leave it paused and let management decide when to start the CPU */
1877 runstate_set(RUN_STATE_PAUSED);
1878 }
1879
86469922
DL
1880 qemu_bh_delete(data->bh);
1881 g_free(data);
ea6a55bc
DL
1882}
1883
1884/* After all discards we can start running and asking for pages */
1885static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1886{
1887 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
86469922 1888 HandleRunBhData *data;
ea6a55bc
DL
1889
1890 trace_loadvm_postcopy_handle_run();
1891 if (ps != POSTCOPY_INCOMING_LISTENING) {
1892 error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1893 return -1;
1894 }
1895
86469922
DL
1896 data = g_new(HandleRunBhData, 1);
1897 data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data);
1898 qemu_bh_schedule(data->bh);
ea6a55bc 1899
27c6825b
DDAG
1900 /* We need to finish reading the stream from the package
1901 * and also stop reading anything more from the stream that loaded the
1902 * package (since it's now being read by the listener thread).
1903 * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1904 */
1905 return LOADVM_QUIT;
093e3c42
DDAG
1906}
1907
3f5875ec
PX
1908static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
1909{
1910 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
1911 error_report("%s: illegal resume received", __func__);
1912 /* Don't fail the load, only for this. */
1913 return 0;
1914 }
1915
1916 /*
1917 * This means source VM is ready to resume the postcopy migration.
1918 * It's time to switch state and release the fault thread to
1919 * continue service page faults.
1920 */
1921 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
1922 MIGRATION_STATUS_POSTCOPY_ACTIVE);
1923 qemu_sem_post(&mis->postcopy_pause_sem_fault);
1924
1925 trace_loadvm_postcopy_handle_resume();
1926
13955b89
PX
1927 /* Tell source that "we are ready" */
1928 migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
3f5875ec
PX
1929
1930 return 0;
1931}
1932
c76ca188 1933/**
11cf1d98
DDAG
1934 * Immediately following this command is a blob of data containing an embedded
1935 * chunk of migration stream; read it and load it.
1936 *
1937 * @mis: Incoming state
1938 * @length: Length of packaged data to read
c76ca188 1939 *
11cf1d98
DDAG
1940 * Returns: Negative values on error
1941 *
1942 */
1943static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1944{
1945 int ret;
61b67d47
DB
1946 size_t length;
1947 QIOChannelBuffer *bioc;
11cf1d98
DDAG
1948
1949 length = qemu_get_be32(mis->from_src_file);
1950 trace_loadvm_handle_cmd_packaged(length);
1951
1952 if (length > MAX_VM_CMD_PACKAGED_SIZE) {
61b67d47 1953 error_report("Unreasonably large packaged state: %zu", length);
11cf1d98
DDAG
1954 return -1;
1955 }
61b67d47
DB
1956
1957 bioc = qio_channel_buffer_new(length);
6f01f136 1958 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
61b67d47
DB
1959 ret = qemu_get_buffer(mis->from_src_file,
1960 bioc->data,
1961 length);
11cf1d98 1962 if (ret != length) {
61b67d47
DB
1963 object_unref(OBJECT(bioc));
1964 error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
9af9e0fe 1965 ret, length);
11cf1d98
DDAG
1966 return (ret < 0) ? ret : -EAGAIN;
1967 }
61b67d47 1968 bioc->usage += length;
11cf1d98
DDAG
1969 trace_loadvm_handle_cmd_packaged_received(ret);
1970
61b67d47 1971 QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
11cf1d98
DDAG
1972
1973 ret = qemu_loadvm_state_main(packf, mis);
1974 trace_loadvm_handle_cmd_packaged_main(ret);
1975 qemu_fclose(packf);
61b67d47 1976 object_unref(OBJECT(bioc));
11cf1d98
DDAG
1977
1978 return ret;
1979}
1980
f25d4225
PX
1981/*
1982 * Handle request that source requests for recved_bitmap on
1983 * destination. Payload format:
1984 *
1985 * len (1 byte) + ramblock_name (<255 bytes)
1986 */
1987static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
1988 uint16_t len)
1989{
1990 QEMUFile *file = mis->from_src_file;
1991 RAMBlock *rb;
1992 char block_name[256];
1993 size_t cnt;
1994
1995 cnt = qemu_get_counted_string(file, block_name);
1996 if (!cnt) {
1997 error_report("%s: failed to read block name", __func__);
1998 return -EINVAL;
1999 }
2000
2001 /* Validate before using the data */
2002 if (qemu_file_get_error(file)) {
2003 return qemu_file_get_error(file);
2004 }
2005
2006 if (len != cnt + 1) {
2007 error_report("%s: invalid payload length (%d)", __func__, len);
2008 return -EINVAL;
2009 }
2010
2011 rb = qemu_ram_block_by_name(block_name);
2012 if (!rb) {
2013 error_report("%s: block '%s' not found", __func__, block_name);
2014 return -EINVAL;
2015 }
2016
a335debb 2017 migrate_send_rp_recv_bitmap(mis, block_name);
f25d4225
PX
2018
2019 trace_loadvm_handle_recv_bitmap(block_name);
2020
2021 return 0;
2022}
2023
aad555c2
ZC
2024static int loadvm_process_enable_colo(MigrationIncomingState *mis)
2025{
2026 migration_incoming_enable_colo();
13af18f2 2027 return colo_init_ram_cache();
aad555c2
ZC
2028}
2029
11cf1d98
DDAG
2030/*
2031 * Process an incoming 'QEMU_VM_COMMAND'
2032 * 0 just a normal return
2033 * LOADVM_QUIT All good, but exit the loop
2034 * <0 Error
c76ca188
DDAG
2035 */
2036static int loadvm_process_command(QEMUFile *f)
2037{
2e37701e 2038 MigrationIncomingState *mis = migration_incoming_get_current();
c76ca188
DDAG
2039 uint16_t cmd;
2040 uint16_t len;
2e37701e 2041 uint32_t tmp32;
c76ca188
DDAG
2042
2043 cmd = qemu_get_be16(f);
2044 len = qemu_get_be16(f);
2045
7a9ddfbf
PX
2046 /* Check validity before continue processing of cmds */
2047 if (qemu_file_get_error(f)) {
2048 return qemu_file_get_error(f);
2049 }
2050
c76ca188
DDAG
2051 trace_loadvm_process_command(cmd, len);
2052 if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
2053 error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
2054 return -EINVAL;
2055 }
2056
2057 if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
2058 error_report("%s received with bad length - expecting %zu, got %d",
2059 mig_cmd_args[cmd].name,
2060 (size_t)mig_cmd_args[cmd].len, len);
2061 return -ERANGE;
2062 }
2063
2064 switch (cmd) {
2e37701e
DDAG
2065 case MIG_CMD_OPEN_RETURN_PATH:
2066 if (mis->to_src_file) {
2067 error_report("CMD_OPEN_RETURN_PATH called when RP already open");
2068 /* Not really a problem, so don't give up */
2069 return 0;
2070 }
2071 mis->to_src_file = qemu_file_get_return_path(f);
2072 if (!mis->to_src_file) {
2073 error_report("CMD_OPEN_RETURN_PATH failed");
2074 return -1;
2075 }
2076 break;
2077
2078 case MIG_CMD_PING:
2079 tmp32 = qemu_get_be32(f);
2080 trace_loadvm_process_command_ping(tmp32);
2081 if (!mis->to_src_file) {
2082 error_report("CMD_PING (0x%x) received with no return path",
2083 tmp32);
2084 return -1;
2085 }
6decec93 2086 migrate_send_rp_pong(mis, tmp32);
2e37701e 2087 break;
093e3c42 2088
11cf1d98
DDAG
2089 case MIG_CMD_PACKAGED:
2090 return loadvm_handle_cmd_packaged(mis);
2091
093e3c42 2092 case MIG_CMD_POSTCOPY_ADVISE:
875fcd01 2093 return loadvm_postcopy_handle_advise(mis, len);
093e3c42
DDAG
2094
2095 case MIG_CMD_POSTCOPY_LISTEN:
2096 return loadvm_postcopy_handle_listen(mis);
2097
2098 case MIG_CMD_POSTCOPY_RUN:
2099 return loadvm_postcopy_handle_run(mis);
2100
2101 case MIG_CMD_POSTCOPY_RAM_DISCARD:
2102 return loadvm_postcopy_ram_handle_discard(mis, len);
f25d4225 2103
3f5875ec
PX
2104 case MIG_CMD_POSTCOPY_RESUME:
2105 return loadvm_postcopy_handle_resume(mis);
2106
f25d4225
PX
2107 case MIG_CMD_RECV_BITMAP:
2108 return loadvm_handle_recv_bitmap(mis, len);
aad555c2
ZC
2109
2110 case MIG_CMD_ENABLE_COLO:
2111 return loadvm_process_enable_colo(mis);
c76ca188
DDAG
2112 }
2113
2114 return 0;
2115}
2116
59f39a47
DDAG
2117/*
2118 * Read a footer off the wire and check that it matches the expected section
2119 *
2120 * Returns: true if the footer was good
2121 * false if there is a problem (and calls error_report to say why)
2122 */
0f42f657 2123static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
59f39a47 2124{
7a9ddfbf 2125 int ret;
59f39a47
DDAG
2126 uint8_t read_mark;
2127 uint32_t read_section_id;
2128
15c38503 2129 if (!migrate_get_current()->send_section_footer) {
59f39a47
DDAG
2130 /* No footer to check */
2131 return true;
2132 }
2133
2134 read_mark = qemu_get_byte(f);
2135
7a9ddfbf
PX
2136 ret = qemu_file_get_error(f);
2137 if (ret) {
2138 error_report("%s: Read section footer failed: %d",
2139 __func__, ret);
2140 return false;
2141 }
2142
59f39a47 2143 if (read_mark != QEMU_VM_SECTION_FOOTER) {
0f42f657 2144 error_report("Missing section footer for %s", se->idstr);
59f39a47
DDAG
2145 return false;
2146 }
2147
2148 read_section_id = qemu_get_be32(f);
0f42f657 2149 if (read_section_id != se->load_section_id) {
59f39a47
DDAG
2150 error_report("Mismatched section id in footer for %s -"
2151 " read 0x%x expected 0x%x",
0f42f657 2152 se->idstr, read_section_id, se->load_section_id);
59f39a47
DDAG
2153 return false;
2154 }
2155
2156 /* All good */
2157 return true;
2158}
2159
fb3520a8
HZ
2160static int
2161qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
2162{
2163 uint32_t instance_id, version_id, section_id;
2164 SaveStateEntry *se;
fb3520a8
HZ
2165 char idstr[256];
2166 int ret;
2167
2168 /* Read section start */
2169 section_id = qemu_get_be32(f);
2170 if (!qemu_get_counted_string(f, idstr)) {
2171 error_report("Unable to read ID string for section %u",
2172 section_id);
2173 return -EINVAL;
2174 }
2175 instance_id = qemu_get_be32(f);
2176 version_id = qemu_get_be32(f);
2177
7a9ddfbf
PX
2178 ret = qemu_file_get_error(f);
2179 if (ret) {
2180 error_report("%s: Failed to read instance/version ID: %d",
2181 __func__, ret);
2182 return ret;
2183 }
2184
fb3520a8
HZ
2185 trace_qemu_loadvm_state_section_startfull(section_id, idstr,
2186 instance_id, version_id);
2187 /* Find savevm section */
2188 se = find_se(idstr, instance_id);
2189 if (se == NULL) {
827beacb
JRZ
2190 error_report("Unknown savevm section or instance '%s' %d. "
2191 "Make sure that your current VM setup matches your "
2192 "saved VM setup, including any hotplugged devices",
fb3520a8
HZ
2193 idstr, instance_id);
2194 return -EINVAL;
2195 }
2196
2197 /* Validate version */
2198 if (version_id > se->version_id) {
2199 error_report("savevm: unsupported version %d for '%s' v%d",
2200 version_id, idstr, se->version_id);
2201 return -EINVAL;
2202 }
0f42f657
JQ
2203 se->load_version_id = version_id;
2204 se->load_section_id = section_id;
fb3520a8 2205
88c16567
WC
2206 /* Validate if it is a device's state */
2207 if (xen_enabled() && se->is_ram) {
2208 error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
2209 return -EINVAL;
2210 }
2211
3a011c26 2212 ret = vmstate_load(f, se);
fb3520a8
HZ
2213 if (ret < 0) {
2214 error_report("error while loading state for instance 0x%x of"
2215 " device '%s'", instance_id, idstr);
2216 return ret;
2217 }
0f42f657 2218 if (!check_section_footer(f, se)) {
fb3520a8
HZ
2219 return -EINVAL;
2220 }
2221
2222 return 0;
2223}
2224
2225static int
2226qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
2227{
2228 uint32_t section_id;
0f42f657 2229 SaveStateEntry *se;
fb3520a8
HZ
2230 int ret;
2231
2232 section_id = qemu_get_be32(f);
2233
7a9ddfbf
PX
2234 ret = qemu_file_get_error(f);
2235 if (ret) {
2236 error_report("%s: Failed to read section ID: %d",
2237 __func__, ret);
2238 return ret;
2239 }
2240
fb3520a8 2241 trace_qemu_loadvm_state_section_partend(section_id);
0f42f657
JQ
2242 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2243 if (se->load_section_id == section_id) {
fb3520a8
HZ
2244 break;
2245 }
2246 }
0f42f657 2247 if (se == NULL) {
fb3520a8
HZ
2248 error_report("Unknown savevm section %d", section_id);
2249 return -EINVAL;
2250 }
2251
3a011c26 2252 ret = vmstate_load(f, se);
fb3520a8
HZ
2253 if (ret < 0) {
2254 error_report("error while loading state section id %d(%s)",
0f42f657 2255 section_id, se->idstr);
fb3520a8
HZ
2256 return ret;
2257 }
0f42f657 2258 if (!check_section_footer(f, se)) {
fb3520a8
HZ
2259 return -EINVAL;
2260 }
2261
2262 return 0;
2263}
2264
16015d32
WY
2265static int qemu_loadvm_state_header(QEMUFile *f)
2266{
2267 unsigned int v;
2268 int ret;
2269
2270 v = qemu_get_be32(f);
2271 if (v != QEMU_VM_FILE_MAGIC) {
2272 error_report("Not a migration stream");
2273 return -EINVAL;
2274 }
2275
2276 v = qemu_get_be32(f);
2277 if (v == QEMU_VM_FILE_VERSION_COMPAT) {
2278 error_report("SaveVM v2 format is obsolete and don't work anymore");
2279 return -ENOTSUP;
2280 }
2281 if (v != QEMU_VM_FILE_VERSION) {
2282 error_report("Unsupported migration stream version");
2283 return -ENOTSUP;
2284 }
2285
2286 if (migrate_get_current()->send_configuration) {
2287 if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
2288 error_report("Configuration section missing");
2289 qemu_loadvm_state_cleanup();
2290 return -EINVAL;
2291 }
2292 ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
2293
2294 if (ret) {
2295 qemu_loadvm_state_cleanup();
2296 return ret;
2297 }
2298 }
2299 return 0;
2300}
2301
acb5ea86
JQ
2302static int qemu_loadvm_state_setup(QEMUFile *f)
2303{
2304 SaveStateEntry *se;
2305 int ret;
2306
2307 trace_loadvm_state_setup();
2308 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2309 if (!se->ops || !se->ops->load_setup) {
2310 continue;
2311 }
2312 if (se->ops && se->ops->is_active) {
2313 if (!se->ops->is_active(se->opaque)) {
2314 continue;
2315 }
2316 }
2317
2318 ret = se->ops->load_setup(f, se->opaque);
2319 if (ret < 0) {
2320 qemu_file_set_error(f, ret);
2321 error_report("Load state of device %s failed", se->idstr);
2322 return ret;
2323 }
2324 }
2325 return 0;
2326}
2327
2328void qemu_loadvm_state_cleanup(void)
2329{
2330 SaveStateEntry *se;
2331
2332 trace_loadvm_state_cleanup();
2333 QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
2334 if (se->ops && se->ops->load_cleanup) {
2335 se->ops->load_cleanup(se->opaque);
2336 }
2337 }
2338}
2339
b411b844
PX
2340/* Return true if we should continue the migration, or false. */
2341static bool postcopy_pause_incoming(MigrationIncomingState *mis)
2342{
2343 trace_postcopy_pause_incoming();
2344
02affd41
PX
2345 /* Clear the triggered bit to allow one recovery */
2346 mis->postcopy_recover_triggered = false;
2347
b411b844
PX
2348 assert(mis->from_src_file);
2349 qemu_file_shutdown(mis->from_src_file);
2350 qemu_fclose(mis->from_src_file);
2351 mis->from_src_file = NULL;
2352
2353 assert(mis->to_src_file);
2354 qemu_file_shutdown(mis->to_src_file);
2355 qemu_mutex_lock(&mis->rp_mutex);
2356 qemu_fclose(mis->to_src_file);
2357 mis->to_src_file = NULL;
2358 qemu_mutex_unlock(&mis->rp_mutex);
2359
eed1cc78
PX
2360 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2361 MIGRATION_STATUS_POSTCOPY_PAUSED);
2362
3a7804c3
PX
2363 /* Notify the fault thread for the invalidated file handle */
2364 postcopy_fault_thread_notify(mis);
2365
b411b844
PX
2366 error_report("Detected IO failure for postcopy. "
2367 "Migration paused.");
2368
2369 while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2370 qemu_sem_wait(&mis->postcopy_pause_sem_dst);
2371 }
2372
2373 trace_postcopy_pause_incoming_continued();
2374
2375 return true;
2376}
2377
3f6df99d 2378int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
1a8f46f8 2379{
a672b469 2380 uint8_t section_type;
ccb783c3 2381 int ret = 0;
61964c23 2382
b411b844 2383retry:
7a9ddfbf
PX
2384 while (true) {
2385 section_type = qemu_get_byte(f);
2386
2387 if (qemu_file_get_error(f)) {
2388 ret = qemu_file_get_error(f);
2389 break;
2390 }
2391
a5df2a02 2392 trace_qemu_loadvm_state_section(section_type);
a672b469
AL
2393 switch (section_type) {
2394 case QEMU_VM_SECTION_START:
2395 case QEMU_VM_SECTION_FULL:
fb3520a8 2396 ret = qemu_loadvm_section_start_full(f, mis);
b5a22e4a 2397 if (ret < 0) {
ccb783c3 2398 goto out;
b5a22e4a 2399 }
a672b469
AL
2400 break;
2401 case QEMU_VM_SECTION_PART:
2402 case QEMU_VM_SECTION_END:
fb3520a8 2403 ret = qemu_loadvm_section_part_end(f, mis);
b5a22e4a 2404 if (ret < 0) {
ccb783c3 2405 goto out;
b5a22e4a 2406 }
a672b469 2407 break;
c76ca188
DDAG
2408 case QEMU_VM_COMMAND:
2409 ret = loadvm_process_command(f);
7b89bf27
DDAG
2410 trace_qemu_loadvm_state_section_command(ret);
2411 if ((ret < 0) || (ret & LOADVM_QUIT)) {
ccb783c3 2412 goto out;
c76ca188
DDAG
2413 }
2414 break;
7a9ddfbf
PX
2415 case QEMU_VM_EOF:
2416 /* This is the end of migration */
2417 goto out;
a672b469 2418 default:
6a64b644 2419 error_report("Unknown savevm section type %d", section_type);
ccb783c3
DDAG
2420 ret = -EINVAL;
2421 goto out;
a672b469
AL
2422 }
2423 }
2424
ccb783c3
DDAG
2425out:
2426 if (ret < 0) {
2427 qemu_file_set_error(f, ret);
b411b844
PX
2428
2429 /*
fd037a65
PX
2430 * If we are during an active postcopy, then we pause instead
2431 * of bail out to at least keep the VM's dirty data. Note
2432 * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2433 * during which we're still receiving device states and we
2434 * still haven't yet started the VM on destination.
b411b844
PX
2435 */
2436 if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
fd037a65 2437 postcopy_pause_incoming(mis)) {
b411b844
PX
2438 /* Reset f to point to the newly created channel */
2439 f = mis->from_src_file;
2440 goto retry;
2441 }
ccb783c3
DDAG
2442 }
2443 return ret;
7b89bf27
DDAG
2444}
2445
2446int qemu_loadvm_state(QEMUFile *f)
2447{
2448 MigrationIncomingState *mis = migration_incoming_get_current();
2449 Error *local_err = NULL;
7b89bf27
DDAG
2450 int ret;
2451
2452 if (qemu_savevm_state_blocked(&local_err)) {
2453 error_report_err(local_err);
2454 return -EINVAL;
2455 }
2456
16015d32
WY
2457 ret = qemu_loadvm_state_header(f);
2458 if (ret) {
2459 return ret;
7b89bf27
DDAG
2460 }
2461
9e14b849
WY
2462 if (qemu_loadvm_state_setup(f) != 0) {
2463 return -EINVAL;
2464 }
2465
75e972da
DG
2466 cpu_synchronize_all_pre_loadvm();
2467
7b89bf27
DDAG
2468 ret = qemu_loadvm_state_main(f, mis);
2469 qemu_event_set(&mis->main_thread_load_event);
2470
2471 trace_qemu_loadvm_state_post_main(ret);
2472
c76201ab
DDAG
2473 if (mis->have_listen_thread) {
2474 /* Listen thread still going, can't clean up yet */
2475 return ret;
2476 }
2477
7b89bf27
DDAG
2478 if (ret == 0) {
2479 ret = qemu_file_get_error(f);
2480 }
1925cebc
AG
2481
2482 /*
2483 * Try to read in the VMDESC section as well, so that dumping tools that
2484 * intercept our migration stream have the chance to see it.
2485 */
1aca9a5f
DDAG
2486
2487 /* We've got to be careful; if we don't read the data and just shut the fd
2488 * then the sender can error if we close while it's still sending.
2489 * We also mustn't read data that isn't there; some transports (RDMA)
2490 * will stall waiting for that data when the source has already closed.
2491 */
7b89bf27 2492 if (ret == 0 && should_send_vmdesc()) {
1aca9a5f
DDAG
2493 uint8_t *buf;
2494 uint32_t size;
7b89bf27 2495 uint8_t section_type = qemu_get_byte(f);
1aca9a5f
DDAG
2496
2497 if (section_type != QEMU_VM_VMDESCRIPTION) {
2498 error_report("Expected vmdescription section, but got %d",
2499 section_type);
2500 /*
2501 * It doesn't seem worth failing at this point since
2502 * we apparently have an otherwise valid VM state
2503 */
2504 } else {
2505 buf = g_malloc(0x1000);
2506 size = qemu_get_be32(f);
2507
2508 while (size > 0) {
2509 uint32_t read_chunk = MIN(size, 0x1000);
2510 qemu_get_buffer(f, buf, read_chunk);
2511 size -= read_chunk;
2512 }
2513 g_free(buf);
1925cebc 2514 }
1925cebc
AG
2515 }
2516
acb5ea86 2517 qemu_loadvm_state_cleanup();
ea375f9a
JK
2518 cpu_synchronize_all_post_init();
2519
a672b469
AL
2520 return ret;
2521}
2522
3f6df99d
ZC
2523int qemu_load_device_state(QEMUFile *f)
2524{
2525 MigrationIncomingState *mis = migration_incoming_get_current();
2526 int ret;
2527
2528 /* Load QEMU_VM_SECTION_FULL section */
2529 ret = qemu_loadvm_state_main(f, mis);
2530 if (ret < 0) {
2531 error_report("Failed to load device state: %d", ret);
2532 return ret;
2533 }
2534
2535 cpu_synchronize_all_post_init();
2536 return 0;
2537}
2538
5e22479a 2539int save_snapshot(const char *name, Error **errp)
a672b469
AL
2540{
2541 BlockDriverState *bs, *bs1;
2542 QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
ac8c19ba 2543 int ret = -1;
a672b469
AL
2544 QEMUFile *f;
2545 int saved_vm_running;
c2c9a466 2546 uint64_t vm_state_size;
68b891ec 2547 qemu_timeval tv;
7d631a11 2548 struct tm tm;
79b3c12a 2549 AioContext *aio_context;
a672b469 2550
5aaac467 2551 if (migration_is_blocked(errp)) {
aded9dfa 2552 return ret;
5aaac467
PB
2553 }
2554
377b21cc 2555 if (!replay_can_snapshot()) {
4dd32b3d
MA
2556 error_setg(errp, "Record/replay does not allow making snapshot "
2557 "right now. Try once more later.");
377b21cc
PD
2558 return ret;
2559 }
2560
e9ff957a 2561 if (!bdrv_all_can_snapshot(&bs)) {
927d6638
JQ
2562 error_setg(errp, "Device '%s' is writable but does not support "
2563 "snapshots", bdrv_get_device_name(bs));
ac8c19ba 2564 return ret;
feeee5ac
MDCF
2565 }
2566
0b461605 2567 /* Delete old snapshots of the same name */
ac8c19ba 2568 if (name) {
927d6638 2569 ret = bdrv_all_delete_snapshot(name, &bs1, errp);
ac8c19ba 2570 if (ret < 0) {
927d6638
JQ
2571 error_prepend(errp, "Error while deleting snapshot on device "
2572 "'%s': ", bdrv_get_device_name(bs1));
ac8c19ba
PD
2573 return ret;
2574 }
0b461605
DL
2575 }
2576
7cb14481
DL
2577 bs = bdrv_all_find_vmstate_bs();
2578 if (bs == NULL) {
927d6638 2579 error_setg(errp, "No block device can accept snapshots");
ac8c19ba 2580 return ret;
a672b469 2581 }
79b3c12a 2582 aio_context = bdrv_get_aio_context(bs);
a672b469 2583
1354869c 2584 saved_vm_running = runstate_is_running();
560d027b
JQ
2585
2586 ret = global_state_store();
2587 if (ret) {
927d6638 2588 error_setg(errp, "Error saving global state");
ac8c19ba 2589 return ret;
560d027b 2590 }
0461d5a6 2591 vm_stop(RUN_STATE_SAVE_VM);
a672b469 2592
8649f2f9
SH
2593 bdrv_drain_all_begin();
2594
79b3c12a
DL
2595 aio_context_acquire(aio_context);
2596
cb499fb2 2597 memset(sn, 0, sizeof(*sn));
a672b469
AL
2598
2599 /* fill auxiliary fields */
68b891ec 2600 qemu_gettimeofday(&tv);
a672b469
AL
2601 sn->date_sec = tv.tv_sec;
2602 sn->date_nsec = tv.tv_usec * 1000;
bc72ad67 2603 sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
a672b469 2604
7d631a11
MDCF
2605 if (name) {
2606 ret = bdrv_snapshot_find(bs, old_sn, name);
2607 if (ret >= 0) {
2608 pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2609 pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2610 } else {
2611 pstrcpy(sn->name, sizeof(sn->name), name);
2612 }
2613 } else {
d7d9b528
BS
2614 /* cast below needed for OpenBSD where tv_sec is still 'long' */
2615 localtime_r((const time_t *)&tv.tv_sec, &tm);
7d631a11 2616 strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
7d631a11
MDCF
2617 }
2618
a672b469 2619 /* save the VM state */
45566e9c 2620 f = qemu_fopen_bdrv(bs, 1);
a672b469 2621 if (!f) {
927d6638 2622 error_setg(errp, "Could not open VM state file");
a672b469
AL
2623 goto the_end;
2624 }
927d6638 2625 ret = qemu_savevm_state(f, errp);
2d22b18f 2626 vm_state_size = qemu_ftell(f);
a672b469
AL
2627 qemu_fclose(f);
2628 if (ret < 0) {
a672b469
AL
2629 goto the_end;
2630 }
2631
17e2a4a4
SH
2632 /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
2633 * for itself. BDRV_POLL_WHILE() does not support nested locking because
2634 * it only releases the lock once. Therefore synchronous I/O will deadlock
2635 * unless we release the AioContext before bdrv_all_create_snapshot().
2636 */
2637 aio_context_release(aio_context);
2638 aio_context = NULL;
2639
a9085f9b
DL
2640 ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2641 if (ret < 0) {
927d6638
JQ
2642 error_setg(errp, "Error while creating snapshot on '%s'",
2643 bdrv_get_device_name(bs));
ac8c19ba 2644 goto the_end;
a672b469
AL
2645 }
2646
ac8c19ba
PD
2647 ret = 0;
2648
a672b469 2649 the_end:
17e2a4a4
SH
2650 if (aio_context) {
2651 aio_context_release(aio_context);
2652 }
8649f2f9
SH
2653
2654 bdrv_drain_all_end();
2655
38ff78d3 2656 if (saved_vm_running) {
a672b469 2657 vm_start();
38ff78d3 2658 }
ac8c19ba
PD
2659 return ret;
2660}
2661
5d6c599f
AP
2662void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
2663 Error **errp)
a7ae8355
SS
2664{
2665 QEMUFile *f;
8925839f 2666 QIOChannelFile *ioc;
a7ae8355
SS
2667 int saved_vm_running;
2668 int ret;
2669
5d6c599f
AP
2670 if (!has_live) {
2671 /* live default to true so old version of Xen tool stack can have a
2672 * successfull live migration */
2673 live = true;
2674 }
2675
a7ae8355
SS
2676 saved_vm_running = runstate_is_running();
2677 vm_stop(RUN_STATE_SAVE_VM);
c69adea4 2678 global_state_store_running();
a7ae8355 2679
8925839f
DB
2680 ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
2681 if (!ioc) {
a7ae8355
SS
2682 goto the_end;
2683 }
6f01f136 2684 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
8925839f 2685 f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
032b79f7 2686 object_unref(OBJECT(ioc));
a7ae8355 2687 ret = qemu_save_device_state(f);
96994fd1 2688 if (ret < 0 || qemu_fclose(f) < 0) {
c6bd8c70 2689 error_setg(errp, QERR_IO_ERROR);
5d6c599f
AP
2690 } else {
2691 /* libxl calls the QMP command "stop" before calling
2692 * "xen-save-devices-state" and in case of migration failure, libxl
2693 * would call "cont".
2694 * So call bdrv_inactivate_all (release locks) here to let the other
2695 * side of the migration take controle of the images.
2696 */
2697 if (live && !saved_vm_running) {
2698 ret = bdrv_inactivate_all();
2699 if (ret) {
2700 error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
2701 __func__, ret);
2702 }
2703 }
a7ae8355
SS
2704 }
2705
2706 the_end:
38ff78d3 2707 if (saved_vm_running) {
a7ae8355 2708 vm_start();
38ff78d3 2709 }
a7ae8355
SS
2710}
2711
88c16567
WC
2712void qmp_xen_load_devices_state(const char *filename, Error **errp)
2713{
2714 QEMUFile *f;
2715 QIOChannelFile *ioc;
2716 int ret;
2717
2718 /* Guest must be paused before loading the device state; the RAM state
2719 * will already have been loaded by xc
2720 */
2721 if (runstate_is_running()) {
2722 error_setg(errp, "Cannot update device state while vm is running");
2723 return;
2724 }
2725 vm_stop(RUN_STATE_RESTORE_VM);
2726
2727 ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
2728 if (!ioc) {
2729 return;
2730 }
6f01f136 2731 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
88c16567 2732 f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
032b79f7 2733 object_unref(OBJECT(ioc));
88c16567 2734
88c16567
WC
2735 ret = qemu_loadvm_state(f);
2736 qemu_fclose(f);
2737 if (ret < 0) {
2738 error_setg(errp, QERR_IO_ERROR);
2739 }
2740 migration_incoming_state_destroy();
2741}
2742
5e22479a 2743int load_snapshot(const char *name, Error **errp)
a672b469 2744{
f0aa7a8b 2745 BlockDriverState *bs, *bs_vm_state;
2d22b18f 2746 QEMUSnapshotInfo sn;
a672b469 2747 QEMUFile *f;
751c6a17 2748 int ret;
79b3c12a 2749 AioContext *aio_context;
b4b076da 2750 MigrationIncomingState *mis = migration_incoming_get_current();
a672b469 2751
377b21cc 2752 if (!replay_can_snapshot()) {
4dd32b3d
MA
2753 error_setg(errp, "Record/replay does not allow loading snapshot "
2754 "right now. Try once more later.");
377b21cc
PD
2755 return -EINVAL;
2756 }
2757
849f96e2 2758 if (!bdrv_all_can_snapshot(&bs)) {
927d6638
JQ
2759 error_setg(errp,
2760 "Device '%s' is writable but does not support snapshots",
2761 bdrv_get_device_name(bs));
849f96e2
DL
2762 return -ENOTSUP;
2763 }
723ccda1
DL
2764 ret = bdrv_all_find_snapshot(name, &bs);
2765 if (ret < 0) {
927d6638
JQ
2766 error_setg(errp,
2767 "Device '%s' does not have the requested snapshot '%s'",
2768 bdrv_get_device_name(bs), name);
723ccda1
DL
2769 return ret;
2770 }
849f96e2 2771
7cb14481 2772 bs_vm_state = bdrv_all_find_vmstate_bs();
f0aa7a8b 2773 if (!bs_vm_state) {
927d6638 2774 error_setg(errp, "No block device supports snapshots");
f0aa7a8b
MDCF
2775 return -ENOTSUP;
2776 }
79b3c12a 2777 aio_context = bdrv_get_aio_context(bs_vm_state);
f0aa7a8b
MDCF
2778
2779 /* Don't even try to load empty VM states */
79b3c12a 2780 aio_context_acquire(aio_context);
f0aa7a8b 2781 ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
79b3c12a 2782 aio_context_release(aio_context);
f0aa7a8b
MDCF
2783 if (ret < 0) {
2784 return ret;
2785 } else if (sn.vm_state_size == 0) {
927d6638
JQ
2786 error_setg(errp, "This is a disk-only snapshot. Revert to it "
2787 " offline using qemu-img");
f0aa7a8b
MDCF
2788 return -EINVAL;
2789 }
2790
a672b469 2791 /* Flush all IO requests so they don't interfere with the new state. */
8649f2f9 2792 bdrv_drain_all_begin();
a672b469 2793
2b624fe0 2794 ret = bdrv_all_goto_snapshot(name, &bs, errp);
4c1cdbaa 2795 if (ret < 0) {
2b624fe0
KW
2796 error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
2797 name, bdrv_get_device_name(bs));
8649f2f9 2798 goto err_drain;
a672b469
AL
2799 }
2800
a672b469 2801 /* restore the VM state */
f0aa7a8b 2802 f = qemu_fopen_bdrv(bs_vm_state, 0);
a672b469 2803 if (!f) {
927d6638 2804 error_setg(errp, "Could not open VM state file");
8649f2f9
SH
2805 ret = -EINVAL;
2806 goto err_drain;
a672b469 2807 }
f0aa7a8b 2808
aedbe192 2809 qemu_system_reset(SHUTDOWN_CAUSE_NONE);
b4b076da 2810 mis->from_src_file = f;
f0aa7a8b 2811
79b3c12a
DL
2812 aio_context_acquire(aio_context);
2813 ret = qemu_loadvm_state(f);
1575829d 2814 migration_incoming_state_destroy();
79b3c12a
DL
2815 aio_context_release(aio_context);
2816
8649f2f9
SH
2817 bdrv_drain_all_end();
2818
a672b469 2819 if (ret < 0) {
927d6638 2820 error_setg(errp, "Error %d while loading VM state", ret);
05f2401e 2821 return ret;
a672b469 2822 }
f0aa7a8b 2823
05f2401e 2824 return 0;
8649f2f9
SH
2825
2826err_drain:
2827 bdrv_drain_all_end();
2828 return ret;
7b630349
JQ
2829}
2830
c5705a77
AK
2831void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2832{
fa53a0e5 2833 qemu_ram_set_idstr(mr->ram_block,
c5705a77 2834 memory_region_name(mr), dev);
b895de50 2835 qemu_ram_set_migratable(mr->ram_block);
c5705a77
AK
2836}
2837
2838void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2839{
fa53a0e5 2840 qemu_ram_unset_idstr(mr->ram_block);
b895de50 2841 qemu_ram_unset_migratable(mr->ram_block);
c5705a77
AK
2842}
2843
2844void vmstate_register_ram_global(MemoryRegion *mr)
2845{
2846 vmstate_register_ram(mr, NULL);
2847}
1bfe5f05
JQ
2848
2849bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
2850{
2851 /* check needed if --only-migratable is specified */
811f8652 2852 if (!only_migratable) {
1bfe5f05
JQ
2853 return true;
2854 }
2855
2856 return !(vmsd && vmsd->unmigratable);
2857}