]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
qemu: Keep domain running on dst on failed post-copy migration
authorJiri Denemark <jdenemar@redhat.com>
Tue, 10 May 2022 13:20:25 +0000 (15:20 +0200)
committerJiri Denemark <jdenemar@redhat.com>
Tue, 7 Jun 2022 15:40:20 +0000 (17:40 +0200)
There's no need to artificially pause a domain when post-copy fails
from our point of view unless QEMU connection is broken too as migration
may still be progressing well.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
src/qemu/qemu_migration.c
src/qemu/qemu_migration.h
src/qemu/qemu_process.c

index f130ccbe2621f08d1d63ef8275176a715c48db2b..0859a4e4a7b50afa20fc8cb42e2b906fef227920 100644 (file)
@@ -1577,34 +1577,51 @@ qemuMigrationSrcIsSafe(virDomainDef *def,
 
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                               virDomainObj *vm)
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm)
 {
     virDomainState state;
     int reason;
 
     state = virDomainObjGetState(vm, &reason);
 
-    if (state != VIR_DOMAIN_PAUSED &&
-        state != VIR_DOMAIN_RUNNING)
-        return;
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
 
-    if (state == VIR_DOMAIN_PAUSED &&
+    if (state != VIR_DOMAIN_PAUSED ||
         reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
         return;
 
     VIR_WARN("Migration of domain %s failed during post-copy; "
              "leaving the domain paused", vm->def->name);
 
-    if (state == VIR_DOMAIN_RUNNING) {
-        if (qemuProcessStopCPUs(driver, vm,
-                                VIR_DOMAIN_PAUSED_POSTCOPY_FAILED,
-                                VIR_ASYNC_JOB_MIGRATION_IN) < 0)
-            VIR_WARN("Unable to pause guest CPUs for %s", vm->def->name);
-    } else {
-        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
-                             VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
-    }
+    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
+                         VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);
+}
+
+
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm)
+{
+    virDomainState state;
+    int reason;
+
+    state = virDomainObjGetState(vm, &reason);
+
+    VIR_DEBUG("%s/%s",
+              virDomainStateTypeToString(state),
+              virDomainStateReasonToString(state, reason));
+
+    if (state != VIR_DOMAIN_RUNNING ||
+        reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED)
+        return;
+
+    VIR_WARN("Migration protocol failed during incoming migration of domain "
+             "%s, but QEMU keeps migrating; leaving the domain running, the "
+             "migration will be handled as unattended", vm->def->name);
+
+    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
+                         VIR_DOMAIN_RUNNING_POSTCOPY_FAILED);
 }
 
 
@@ -3453,7 +3470,7 @@ qemuMigrationSrcConfirmPhase(virQEMUDriver *driver,
 
         if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED &&
             reason == VIR_DOMAIN_PAUSED_POSTCOPY)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         else
             qemuMigrationSrcRestoreDomainState(driver, vm);
 
@@ -5826,7 +5843,7 @@ qemuMigrationDstFinish(virQEMUDriver *driver,
                                 VIR_DOMAIN_EVENT_STOPPED_FAILED);
             virObjectEventStateQueue(driver->domainEventState, event);
         } else {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         }
     }
 
index a8afa6611932afd106e8426d9e74e70e8c22c64a..c4e4228282c86fc90f63be216cf7c56686f2e159 100644 (file)
@@ -251,8 +251,10 @@ qemuMigrationDstRun(virQEMUDriver *driver,
                     virDomainAsyncJob asyncJob);
 
 void
-qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver,
-                            virDomainObj *vm);
+qemuMigrationSrcPostcopyFailed(virDomainObj *vm);
+
+void
+qemuMigrationDstPostcopyFailed(virDomainObj *vm);
 
 int
 qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver,
index e8936cd6234d63e36db9bd37b8fd98748b63404a..0d39c67dfcebf9a0e92039b12195bfd771ddcd51 100644 (file)
@@ -3411,7 +3411,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
          * confirm success or failure yet; killing it seems safest unless
          * we already started guest CPUs or we were in post-copy mode */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationDstPostcopyFailed(vm);
         } else if (state != VIR_DOMAIN_RUNNING) {
             VIR_DEBUG("Killing migrated domain %s", vm->def->name);
             return -1;
@@ -3462,7 +3462,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Cancelling unfinished migration of domain %s",
                       vm->def->name);
@@ -3480,7 +3480,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
          */
         if (postcopy)
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         break;
 
     case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
@@ -3489,7 +3489,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
          * as broken in that case
          */
         if (postcopy) {
-            qemuMigrationAnyPostcopyFailed(driver, vm);
+            qemuMigrationSrcPostcopyFailed(vm);
         } else {
             VIR_DEBUG("Resuming domain %s after failed migration",
                       vm->def->name);