From c2f6a6a7262ede1bef8299e9dae95e68edbf06c7 Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Tue, 10 May 2022 15:20:25 +0200 Subject: [PATCH] qemu: Abort failed post-copy when we haven't called Finish yet When migration fails after it already switched to post-copy phase on the source, but early enough that we haven't called Finish on the destination yet, we know the vCPUs were not started on the destination and the source host still has a complete state of the domain. Thus we can just ignore the fact post-copy phase started and normally abort the migration and resume vCPUs on the source. Signed-off-by: Jiri Denemark Reviewed-by: Peter Krempa Reviewed-by: Pavel Hrdina --- src/qemu/qemu_migration.c | 8 ++++++++ src/qemu/qemu_process.c | 20 ++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9572ad2fa7..446b767739 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -4412,6 +4412,14 @@ qemuMigrationSrcRun(virQEMUDriver *driver, virErrorPreserveLast(&orig_err); if (virDomainObjIsActive(vm)) { + int reason; + virDomainState state = virDomainObjGetState(vm, &reason); + + if (state == VIR_DOMAIN_PAUSED && reason == VIR_DOMAIN_PAUSED_POSTCOPY) { + VIR_DEBUG("Aborting failed post-copy migration as the destination is not running yet"); + virDomainObjSetState(vm, state, VIR_DOMAIN_PAUSED_MIGRATION); + } + if (cancel && priv->job.current->status != VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED && qemuDomainObjEnterMonitorAsync(driver, vm, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index b1725ce4cf..f5a45c898d 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3483,20 +3483,16 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, case QEMU_MIGRATION_PHASE_PERFORM2: case QEMU_MIGRATION_PHASE_PERFORM3: /* migration is still in progress, let's cancel it and resume the - * domain; however we can only do that before migration enters - * post-copy mode + * domain; we can do so even in post-copy phase as the domain was not + * resumed on the destination host yet */ - if (postcopy) { - qemuMigrationSrcPostcopyFailed(vm); - } else { - VIR_DEBUG("Cancelling unfinished migration of domain %s", - vm->def->name); - if (qemuMigrationSrcCancel(driver, vm) < 0) { - VIR_WARN("Could not cancel ongoing migration of domain %s", - vm->def->name); - } - resume = true; + VIR_DEBUG("Cancelling unfinished migration of domain %s", + vm->def->name); + if (qemuMigrationSrcCancel(driver, vm) < 0) { + VIR_WARN("Could not cancel ongoing migration of domain %s", + vm->def->name); } + resume = true; break; case QEMU_MIGRATION_PHASE_PERFORM3_DONE: -- 2.47.2