]> git.ipfire.org Git - thirdparty/qemu.git/commitdiff
migration: Introduce stopcopy_bytes in save_query_pending()
authorPeter Xu <peterx@redhat.com>
Tue, 21 Apr 2026 20:21:01 +0000 (16:21 -0400)
committerPeter Xu <peterx@redhat.com>
Tue, 5 May 2026 16:35:25 +0000 (12:35 -0400)
Allow modules to report data that can only be migrated after VM is stopped.

When this concept is introduced, we will need to account stopcopy size to
be part of pending_size as before.

However, when there're data only can be migrated in stopcopy phase, it
means the old "pending_size" may not always be able to reach low enough to
kickoff an slow version of query sync.

It used to be almost guaranteed to happen as all prior iterative modules
doesn't have stopcopy only data.  VFIO may change that fact by having some
data that must be copied during stop phase.

So we need to make sure QEMU will kickoff a synchronized version of query
pending when all precopy data is migrated.  This might be important to VFIO
to keep making progress even if the downtime cannot yet be satisfied.

So far, this patch should introduce no functional change, as no module yet
report stopcopy size.

This paves way for VFIO to properly report its pending data sizes, which
will start to include stop-only data.

Reviewed-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Juraj Marcin <jmarcin@redhat.com>
Link: https://lore.kernel.org/r/20260421202110.306051-8-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
include/migration/register.h
migration/migration.c
migration/savevm.c
migration/trace-events

index e2117e8dd48b984d9306bc5608717303e670f3dc..5e5e0ee432e537781ce78e84b2ff2d81c33e8f37 100644 (file)
@@ -21,6 +21,13 @@ typedef struct MigPendingData {
     uint64_t precopy_bytes;
     /* Amount of pending bytes can be transferred in postcopy */
     uint64_t postcopy_bytes;
+    /* Amount of pending bytes can be transferred only in stopcopy */
+    uint64_t stopcopy_bytes;
+    /*
+     * Total pending data, modules do not need to update this field, it
+     * will be automatically calculated by migration core API.
+     */
+    uint64_t total_bytes;
 } MigPendingData;
 
 /**
index c75ad01b64b99ad2f117f75125b3e982c3260dab..049b69fbe70f155f72db982d26f3bed05774e605 100644 (file)
@@ -3202,6 +3202,54 @@ typedef enum {
     MIG_ITERATE_BREAK,          /* Break the loop */
 } MigIterateState;
 
+/* Are we ready to move to the next iteration phase? */
+static bool migration_iteration_next_ready(MigrationState *s,
+                                           MigPendingData *pending)
+{
+    /*
+     * If the estimated values already suggest us to switchover, mark this
+     * iteration finished, time to do a slow sync.
+     */
+    if (pending->total_bytes <= s->threshold_size) {
+        return true;
+    }
+
+    /*
+     * Since we may have modules reporting stop-only data, we also want to
+     * re-query with slow mode if all precopy data is moved over.  This
+     * will also mark the current iteration done.
+     *
+     * This could happen when e.g. a module (like, VFIO) reports stopcopy
+     * size too large so it will never yet satisfy the downtime with the
+     * current setup (above check).  Here, slow version of re-query helps
+     * because we keep trying the best to move whatever we have.
+     */
+    if (pending->precopy_bytes == 0) {
+        return true;
+    }
+
+    return false;
+}
+
+static void migration_iteration_go_next(MigPendingData *pending)
+{
+    /*
+     * Do a slow sync will achieve this.  TODO: move RAM iteration code
+     * into the core layer.
+     */
+    qemu_savevm_query_pending(pending, true);
+}
+
+static bool postcopy_should_start(MigrationState *s, MigPendingData *pending)
+{
+    /* If postcopy's switchver will violate user specified downtime, stop */
+    if (pending->precopy_bytes + pending->stopcopy_bytes > s->threshold_size) {
+        return false;
+    }
+
+    return qatomic_read(&s->start_postcopy);
+}
+
 /*
  * Return true if continue to the next iteration directly, false
  * otherwise.
@@ -3213,12 +3261,10 @@ static MigIterateState migration_iteration_run(MigrationState *s)
                         s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
     bool can_switchover = migration_can_switchover(s);
     MigPendingData pending = { };
-    uint64_t pending_size;
     bool complete_ready;
 
     /* Fast path - get the estimated amount of pending data */
     qemu_savevm_query_pending(&pending, false);
-    pending_size = pending.precopy_bytes + pending.postcopy_bytes;
 
     if (in_postcopy) {
         /*
@@ -3226,7 +3272,7 @@ static MigIterateState migration_iteration_run(MigrationState *s)
          * postcopy completion doesn't rely on can_switchover, because when
          * POSTCOPY_ACTIVE it means switchover already happened.
          */
-        complete_ready = !pending_size;
+        complete_ready = !pending.total_bytes;
         if (s->state == MIGRATION_STATUS_POSTCOPY_DEVICE &&
             (s->postcopy_package_loaded || complete_ready)) {
             /*
@@ -3258,14 +3304,12 @@ static MigIterateState migration_iteration_run(MigrationState *s)
          * postcopy started, so ESTIMATE should always match with EXACT
          * during postcopy phase.
          */
-        if (pending_size <= s->threshold_size) {
-            qemu_savevm_query_pending(&pending, true);
-            pending_size = pending.precopy_bytes + pending.postcopy_bytes;
+        if (migration_iteration_next_ready(s, &pending)) {
+            migration_iteration_go_next(&pending);
         }
 
         /* Should we switch to postcopy now? */
-        if (pending.precopy_bytes <= s->threshold_size &&
-            can_switchover && qatomic_read(&s->start_postcopy)) {
+        if (can_switchover && postcopy_should_start(s, &pending)) {
             if (postcopy_start(s, &local_err)) {
                 migrate_error_propagate(s, error_copy(local_err));
                 error_report_err(local_err);
@@ -3280,11 +3324,12 @@ static MigIterateState migration_iteration_run(MigrationState *s)
          * (2) Pending size is no more than the threshold specified
          *     (which was calculated from expected downtime)
          */
-        complete_ready = can_switchover && (pending_size <= s->threshold_size);
+        complete_ready = can_switchover &&
+            (pending.total_bytes <= s->threshold_size);
     }
 
     if (complete_ready) {
-        trace_migration_thread_low_pending(pending_size);
+        trace_migration_thread_low_pending(pending.total_bytes);
         migration_completion(s);
         return MIG_ITERATE_BREAK;
     }
index 72454e15ad570ca73cfa5f9b78d023aa02ad1208..39430470aaa05e69b3e500c3221c070f017356d2 100644 (file)
@@ -1800,8 +1800,7 @@ void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
 {
     SaveStateEntry *se;
 
-    pending->precopy_bytes = 0;
-    pending->postcopy_bytes = 0;
+    memset(pending, 0, sizeof(*pending));
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_query_pending) {
@@ -1813,8 +1812,13 @@ void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
         se->ops->save_query_pending(se->opaque, pending, exact);
     }
 
+    pending->total_bytes = pending->precopy_bytes +
+        pending->stopcopy_bytes + pending->postcopy_bytes;
+
     trace_qemu_savevm_query_pending(exact, pending->precopy_bytes,
-                                    pending->postcopy_bytes);
+                                    pending->stopcopy_bytes,
+                                    pending->postcopy_bytes,
+                                    pending->total_bytes);
 }
 
 void qemu_savevm_state_cleanup(void)
index ca7dfd4cb708de29bf482ed0780768dfb6fafda3..de99d976abc637e9361f8467daacf3cc8787976f 100644 (file)
@@ -7,7 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
 qemu_loadvm_state_post_main(int ret) "%d"
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
 qemu_savevm_send_packaged(void) ""
-qemu_savevm_query_pending(bool exact, uint64_t precopy, uint64_t postcopy) "exact=%d, precopy=%"PRIu64", postcopy=%"PRIu64
+qemu_savevm_query_pending(bool exact, uint64_t precopy, uint64_t stopcopy, uint64_t postcopy, uint64_t total) "exact=%d, precopy=%"PRIu64", stopcopy=%"PRIu64", postcopy=%"PRIu64", total=%"PRIu64
 loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u"
 loadvm_state_setup(void) ""
 loadvm_state_cleanup(void) ""