]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
qemu: Recover from interrupted jobs
authorJiri Denemark <jdenemar@redhat.com>
Mon, 4 Jul 2011 21:33:39 +0000 (23:33 +0200)
committerJiri Denemark <jdenemar@redhat.com>
Tue, 12 Jul 2011 23:53:32 +0000 (01:53 +0200)
Detect and react on situations when libvirtd was restarted or killed
when a job was active.

src/qemu/qemu_domain.c
src/qemu/qemu_domain.h
src/qemu/qemu_process.c

index 062ecc776ff04be78c8dbd5df0508c9437129bbb..b26308e2ae8e454975563b75a3d36807511f0408 100644 (file)
@@ -142,6 +142,20 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
     memset(&job->signalsData, 0, sizeof(job->signalsData));
 }
 
+void
+qemuDomainObjRestoreJob(virDomainObjPtr obj,
+                        struct qemuDomainJobObj *job)
+{
+    qemuDomainObjPrivatePtr priv = obj->privateData;
+
+    memset(job, 0, sizeof(*job));
+    job->active = priv->job.active;
+    job->asyncJob = priv->job.asyncJob;
+
+    qemuDomainObjResetJob(priv);
+    qemuDomainObjResetAsyncJob(priv);
+}
+
 static void
 qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv)
 {
index 8766393baf472d95b1b0533ce834fdc7b81b67b0..d721e247ef94b08a4f11f7dc7798328c7c881f8a 100644 (file)
@@ -184,6 +184,8 @@ void qemuDomainObjEndNestedJob(struct qemud_driver *driver,
 void qemuDomainObjSaveJob(struct qemud_driver *driver, virDomainObjPtr obj);
 void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj,
                                   unsigned long long allowedJobs);
+void qemuDomainObjRestoreJob(virDomainObjPtr obj,
+                             struct qemuDomainJobObj *job);
 void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver,
                                   virDomainObjPtr obj);
 
index e2e13881248a36253531c538bca80c8ab81f8e75..52a73b83772b891992a935d10c4eb685eb64858f 100644 (file)
@@ -2231,6 +2231,80 @@ qemuProcessUpdateState(struct qemud_driver *driver, virDomainObjPtr vm)
     return 0;
 }
 
+static int
+qemuProcessRecoverJob(struct qemud_driver *driver,
+                      virDomainObjPtr vm,
+                      virConnectPtr conn,
+                      const struct qemuDomainJobObj *job)
+{
+    virDomainState state;
+    int reason;
+
+    state = virDomainObjGetState(vm, &reason);
+
+    switch (job->asyncJob) {
+    case QEMU_ASYNC_JOB_MIGRATION_OUT:
+    case QEMU_ASYNC_JOB_MIGRATION_IN:
+        /* we don't know what to do yet */
+        break;
+
+    case QEMU_ASYNC_JOB_SAVE:
+    case QEMU_ASYNC_JOB_DUMP:
+        /* TODO cancel possibly running migrate operation */
+        /* resume the domain but only if it was paused as a result of
+         * running save/dump operation */
+        if (state == VIR_DOMAIN_PAUSED &&
+            ((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
+              reason == VIR_DOMAIN_PAUSED_DUMP) ||
+             (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
+              reason == VIR_DOMAIN_PAUSED_SAVE) ||
+             reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
+            if (qemuProcessStartCPUs(driver, vm, conn,
+                                     VIR_DOMAIN_RUNNING_UNPAUSED) < 0) {
+                VIR_WARN("Could not resume domain %s after", vm->def->name);
+            }
+        }
+        break;
+
+    case QEMU_ASYNC_JOB_NONE:
+    case QEMU_ASYNC_JOB_LAST:
+        break;
+    }
+
+    if (!virDomainObjIsActive(vm))
+        return -1;
+
+    switch (job->active) {
+    case QEMU_JOB_QUERY:
+        /* harmless */
+        break;
+
+    case QEMU_JOB_DESTROY:
+        VIR_DEBUG("Domain %s should have already been destroyed",
+                  vm->def->name);
+        return -1;
+
+    case QEMU_JOB_SUSPEND:
+        /* mostly harmless */
+        break;
+
+    case QEMU_JOB_MODIFY:
+        /* XXX depending on the command we may be in an inconsistent state and
+         * we should probably fall back to "monitor error" state and refuse to
+         */
+        break;
+
+    case QEMU_JOB_ASYNC:
+    case QEMU_JOB_ASYNC_NESTED:
+        /* async job was already handled above */
+    case QEMU_JOB_NONE:
+    case QEMU_JOB_LAST:
+        break;
+    }
+
+    return 0;
+}
+
 struct qemuProcessReconnectData {
     virConnectPtr conn;
     struct qemud_driver *driver;
@@ -2247,9 +2321,12 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
     struct qemud_driver *driver = data->driver;
     qemuDomainObjPrivatePtr priv;
     virConnectPtr conn = data->conn;
+    struct qemuDomainJobObj oldjob;
 
     virDomainObjLock(obj);
 
+    qemuDomainObjRestoreJob(obj, &oldjob);
+
     VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);
 
     priv = obj->privateData;
@@ -2295,6 +2372,9 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa
     if (qemuProcessFiltersInstantiate(conn, obj->def))
         goto error;
 
+    if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
+        goto error;
+
     priv->job.active = QEMU_JOB_NONE;
 
     /* update domain state XML with possibly updated state in virDomainObj */