]> git.ipfire.org Git - thirdparty/qemu.git/commitdiff
migration: Introduce POSTCOPY_DEVICE state
authorJuraj Marcin <jmarcin@redhat.com>
Mon, 3 Nov 2025 18:32:57 +0000 (19:32 +0100)
committerPeter Xu <peterx@redhat.com>
Mon, 3 Nov 2025 21:04:10 +0000 (16:04 -0500)
Currently, when postcopy starts, the source VM starts switchover and
sends a package containing the state of all non-postcopiable devices.
When the destination loads this package, the switchover is complete and
the destination VM starts. However, if the device state load fails or
the destination side crashes, the source side is already in
POSTCOPY_ACTIVE state and cannot be recovered, even when it has the most
up-to-date machine state as the destination has not yet started.

This patch introduces a new POSTCOPY_DEVICE state which is active while
the destination machine is loading the device state, is not yet running,
and the source side can be resumed in case of a migration failure.
Return-path is required for this state to function, otherwise it will be
skipped in favor of POSTCOPY_ACTIVE.

To transition from POSTCOPY_DEVICE to POSTCOPY_ACTIVE, the source
side uses a PONG message that is a response to a PING message processed
just before the POSTCOPY_RUN command that starts the destination VM.
Thus, this feature is effective even if the destination side does not
yet support this new state.

Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
Link: https://lore.kernel.org/r/20251103183301.3840862-9-jmarcin@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
migration/migration.c
migration/migration.h
migration/postcopy-ram.c
migration/savevm.c
migration/savevm.h
migration/trace-events
qapi/migration.json
tests/qemu-iotests/194
tests/qtest/migration/precopy-tests.c

index 637be71bfe8b1f22be076bba2578bfee08531411..c2daab6bdd679af764f7f351a15856ea872d2535 100644 (file)
@@ -1206,6 +1206,7 @@ bool migration_is_running(void)
 
     switch (s->state) {
     case MIGRATION_STATUS_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_DEVICE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
@@ -1227,6 +1228,7 @@ static bool migration_is_active(void)
     MigrationState *s = current_migration;
 
     return (s->state == MIGRATION_STATUS_ACTIVE ||
+            s->state == MIGRATION_STATUS_POSTCOPY_DEVICE ||
             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
 }
 
@@ -1349,6 +1351,7 @@ static void fill_source_migration_info(MigrationInfo *info)
         break;
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_CANCELLING:
+    case MIGRATION_STATUS_POSTCOPY_DEVICE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_PRE_SWITCHOVER:
     case MIGRATION_STATUS_DEVICE:
@@ -1402,6 +1405,7 @@ static void fill_destination_migration_info(MigrationInfo *info)
     case MIGRATION_STATUS_CANCELLING:
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_DEVICE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
     case MIGRATION_STATUS_POSTCOPY_RECOVER:
@@ -1732,6 +1736,7 @@ bool migration_in_postcopy(void)
     MigrationState *s = migrate_get_current();
 
     switch (s->state) {
+    case MIGRATION_STATUS_POSTCOPY_DEVICE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
@@ -1833,6 +1838,9 @@ int migrate_init(MigrationState *s, Error **errp)
     memset(&mig_stats, 0, sizeof(mig_stats));
     migration_reset_vfio_bytes_transferred();
 
+    s->postcopy_package_loaded = false;
+    qemu_event_reset(&s->postcopy_package_loaded_event);
+
     return 0;
 }
 
@@ -2568,6 +2576,11 @@ static void *source_return_path_thread(void *opaque)
             tmp32 = ldl_be_p(buf);
             trace_source_return_path_thread_pong(tmp32);
             qemu_sem_post(&ms->rp_state.rp_pong_acks);
+            if (tmp32 == QEMU_VM_PING_PACKAGED_LOADED) {
+                trace_source_return_path_thread_postcopy_package_loaded();
+                ms->postcopy_package_loaded = true;
+                qemu_event_set(&ms->postcopy_package_loaded_event);
+            }
             break;
 
         case MIG_RP_MSG_REQ_PAGES:
@@ -2813,6 +2826,15 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     if (migrate_postcopy_ram()) {
         qemu_savevm_send_ping(fb, 3);
     }
+    if (ms->rp_state.rp_thread_created) {
+        /*
+        * This ping will tell us that all non-postcopiable device state has been
+        * successfully loaded and the destination is about to start. When
+        * response is received, it will trigger transition from POSTCOPY_DEVICE
+        * to POSTCOPY_ACTIVE state.
+        */
+        qemu_savevm_send_ping(fb, QEMU_VM_PING_PACKAGED_LOADED);
+    }
 
     qemu_savevm_send_postcopy_run(fb);
 
@@ -2868,8 +2890,13 @@ static int postcopy_start(MigrationState *ms, Error **errp)
      */
     migration_rate_set(migrate_max_postcopy_bandwidth());
 
-    /* Now, switchover looks all fine, switching to postcopy-active */
+    /*
+     * Now, switchover looks all fine, switching to POSTCOPY_DEVICE, or
+     * directly to POSTCOPY_ACTIVE if there is no return path.
+     */
     migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
+                      ms->rp_state.rp_thread_created ?
+                      MIGRATION_STATUS_POSTCOPY_DEVICE :
                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
 
     bql_unlock();
@@ -3311,8 +3338,8 @@ static MigThrError migration_detect_error(MigrationState *s)
         return postcopy_pause(s);
     } else {
         /*
-         * For precopy (or postcopy with error outside IO), we fail
-         * with no time.
+         * For precopy (or postcopy with error outside IO, or before dest
+         * starts), we fail with no time.
          */
         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
         trace_migration_thread_file_err();
@@ -3447,7 +3474,8 @@ static MigIterateState migration_iteration_run(MigrationState *s)
 {
     uint64_t must_precopy, can_postcopy, pending_size;
     Error *local_err = NULL;
-    bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
+    bool in_postcopy = (s->state == MIGRATION_STATUS_POSTCOPY_DEVICE ||
+                        s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
     bool can_switchover = migration_can_switchover(s);
     bool complete_ready;
 
@@ -3463,6 +3491,18 @@ static MigIterateState migration_iteration_run(MigrationState *s)
          * POSTCOPY_ACTIVE it means switchover already happened.
          */
         complete_ready = !pending_size;
+        if (s->state == MIGRATION_STATUS_POSTCOPY_DEVICE &&
+            (s->postcopy_package_loaded || complete_ready)) {
+            /*
+             * If package has been loaded, the event is set and we will
+             * immediatelly transition to POSTCOPY_ACTIVE. If we are ready for
+             * completion, we need to wait for destination to load the postcopy
+             * package before actually completing.
+             */
+            qemu_event_wait(&s->postcopy_package_loaded_event);
+            migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_DEVICE,
+                              MIGRATION_STATUS_POSTCOPY_ACTIVE);
+        }
     } else {
         /*
          * Exact pending reporting is only needed for precopy.  Taking RAM
@@ -4117,6 +4157,7 @@ static void migration_instance_finalize(Object *obj)
     qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
     error_free(ms->error);
+    qemu_event_destroy(&ms->postcopy_package_loaded_event);
 }
 
 static void migration_instance_init(Object *obj)
@@ -4138,6 +4179,7 @@ static void migration_instance_init(Object *obj)
     qemu_sem_init(&ms->wait_unplug_sem, 0);
     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
     qemu_mutex_init(&ms->qemu_file_lock);
+    qemu_event_init(&ms->postcopy_package_loaded_event, 0);
 }
 
 /*
index 4a37f7202c1d030629d7d9a45de37ba027eea98d..213b33fe6e55abb47b152dd9db5aee8ef56d54e8 100644 (file)
@@ -510,6 +510,9 @@ struct MigrationState {
     /* Is this a rdma migration */
     bool rdma_migration;
 
+    bool postcopy_package_loaded;
+    QemuEvent postcopy_package_loaded_event;
+
     GSource *hup_source;
 };
 
index 8405cce7b4575a3c8bc57e24b06862b572c1c691..3f98dcb6fd3b360e33cadddc4caab63bf4eed180 100644 (file)
@@ -2117,7 +2117,8 @@ static void *postcopy_listen_thread(void *opaque)
     object_ref(OBJECT(migr));
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                                   MIGRATION_STATUS_POSTCOPY_ACTIVE);
+                      mis->to_src_file ? MIGRATION_STATUS_POSTCOPY_DEVICE :
+                                         MIGRATION_STATUS_POSTCOPY_ACTIVE);
     qemu_event_set(&mis->thread_sync_event);
     trace_postcopy_ram_listen_thread_start();
 
@@ -2164,8 +2165,7 @@ static void *postcopy_listen_thread(void *opaque)
                           "loadvm failed during postcopy: %d: ", load_res);
             migrate_set_error(migr, local_err);
             g_clear_pointer(&local_err, error_report_err);
-            migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                                           MIGRATION_STATUS_FAILED);
+            migrate_set_state(&mis->state, mis->state, MIGRATION_STATUS_FAILED);
             goto out;
         }
     }
@@ -2176,6 +2176,10 @@ static void *postcopy_listen_thread(void *opaque)
      */
     qemu_event_wait(&mis->main_thread_load_event);
 
+    /*
+     * Device load in the main thread has finished, we should be in
+     * POSTCOPY_ACTIVE now.
+     */
     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
                                    MIGRATION_STATUS_COMPLETED);
 
index 01b5a8bfffce7563cde58e6718519609a3e88402..62cc2ce25cbf793aa37b5f3c9bc48c48a95e509e 100644 (file)
@@ -2170,6 +2170,11 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis, Error **errp)
         return -1;
     }
 
+    /* We might be already in POSTCOPY_ACTIVE if there is no return path */
+    if (mis->state == MIGRATION_STATUS_POSTCOPY_DEVICE) {
+        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_DEVICE,
+                          MIGRATION_STATUS_POSTCOPY_ACTIVE);
+    }
     postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
     migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis);
 
index c337e3e3d111a7f28a57b90f61e8f70b71803d4e..125a2507b7279412bcb0745b95a774874c31c54f 100644 (file)
@@ -29,6 +29,8 @@
 #define QEMU_VM_COMMAND              0x08
 #define QEMU_VM_SECTION_FOOTER       0x7e
 
+#define QEMU_VM_PING_PACKAGED_LOADED 0x42
+
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
 int qemu_savevm_state_prepare(Error **errp);
index 772636f3ac72c4b70064df40cc4ed569c0bc3e2d..bf11b62b17796ffda384a43f6faef201885b0650 100644 (file)
@@ -191,6 +191,7 @@ source_return_path_thread_pong(uint32_t val) "0x%x"
 source_return_path_thread_shut(uint32_t val) "0x%x"
 source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32
 source_return_path_thread_switchover_acked(void) ""
+source_return_path_thread_postcopy_package_loaded(void) ""
 migration_thread_low_pending(uint64_t pending) "%" PRIu64
 migrate_transferred(uint64_t transferred, uint64_t time_spent, uint64_t bandwidth, uint64_t avail_bw, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " switchover_bw %" PRIu64 " max_size %" PRId64
 process_incoming_migration_co_end(int ret) "ret=%d"
index c7a6737cc1fafa3f142773d8be2a6784b67fc0a8..93f71de3fe12365b85bd3842c6b694c945e0958a 100644 (file)
 # @postcopy-active: like active, but now in postcopy mode.
 #     (since 2.5)
 #
+# @postcopy-device: like postcopy-active, but the destination is still
+#     loading device state and is not running yet.  If migration fails
+#     during this state, the source side will resume.  If there is no
+#     return-path from destination to source, this state is skipped.
+#     (since 10.2)
+#
 # @postcopy-paused: during postcopy but paused.  (since 3.0)
 #
 # @postcopy-recover-setup: setup phase for a postcopy recovery
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
-            'active', 'postcopy-active', 'postcopy-paused',
-            'postcopy-recover-setup',
+            'active', 'postcopy-device', 'postcopy-active',
+            'postcopy-paused', 'postcopy-recover-setup',
             'postcopy-recover', 'completed', 'failed', 'colo',
             'pre-switchover', 'device', 'wait-unplug' ] }
 ##
index e114c0b2695d03342306e00b8aec095a4864a76b..806624394db37fee47f105fcdd2509e10d179a7d 100755 (executable)
@@ -76,7 +76,7 @@ with iotests.FilePath('source.img') as source_img_path, \
 
     while True:
         event1 = source_vm.event_wait('MIGRATION')
-        if event1['data']['status'] == 'postcopy-active':
+        if event1['data']['status'] in ('postcopy-device', 'postcopy-active'):
             # This event is racy, it depends do we really do postcopy or bitmap
             # was migrated during downtime (and no data to migrate in postcopy
             # phase). So, don't log it.
index bb38292550cb963463c3513b804be04849cb1770..57ca623de5df408e85652a544796079cf87f9972 100644 (file)
@@ -1316,13 +1316,14 @@ void migration_test_add_precopy(MigrationTestEnv *env)
     }
 
     /* ensure new status don't go unnoticed */
-    assert(MIGRATION_STATUS__MAX == 15);
+    assert(MIGRATION_STATUS__MAX == 16);
 
     for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
         switch (i) {
         case MIGRATION_STATUS_DEVICE: /* happens too fast */
         case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */
         case MIGRATION_STATUS_COLO: /* no support in tests */
+        case MIGRATION_STATUS_POSTCOPY_DEVICE: /* postcopy can't be cancelled */
         case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
         case MIGRATION_STATUS_POSTCOPY_PAUSED:
         case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: