Destination daemon should not rely on the client or source daemon
(depending on the type of migration) to call Finish when migration
fails, because the client may crash before it can do so. The domain
prepared for incoming migration is set to be destroyed (and migration
job cleaned up) when connection with the client closes but this is not
enough. If the associated qemu process crashes after Prepare step and
the domain is cleaned up before the connection gets closed, autodestroy
is not called for the domain and migration jobs remains set. In case the
domain is defined on destination host (i.e., it is not completely
removed once destroyed) we keep the job set for ever. To fix this, we
register a cleanup callback which is responsible to clean migration-in
job when a domain dies anywhere between Prepare and Finish steps. Note
that we can't blindly clean any job when spotting EOF on monitor since
normally an API is running at that time.
---
src/qemu/qemu_domain.c | 2 --
src/qemu/qemu_domain.h | 2 ++
src/qemu/qemu_migration.c | 22 ++++++++++++++++++++++
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index a9469cf..41ffd6a 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -51,7 +51,6 @@
(VIR_DOMAIN_XML_SECURE | \
VIR_DOMAIN_XML_UPDATE_CPU)
-VIR_ENUM_DECL(qemuDomainJob)
VIR_ENUM_IMPL(qemuDomainJob, QEMU_JOB_LAST,
"none",
"query",
@@ -64,7 +63,6 @@ VIR_ENUM_IMPL(qemuDomainJob, QEMU_JOB_LAST,
"async nested",
);
-VIR_ENUM_DECL(qemuDomainAsyncJob)
VIR_ENUM_IMPL(qemuDomainAsyncJob, QEMU_ASYNC_JOB_LAST,
"none",
"migration out",
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index af83c0e..d79ff1d 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -64,6 +64,7 @@ enum qemuDomainJob {
QEMU_JOB_LAST
};
+VIR_ENUM_DECL(qemuDomainJob)
/* Async job consists of a series of jobs that may change state. Independent
* jobs that do not change state (and possibly others if explicitly allowed by
@@ -78,6 +79,7 @@ enum qemuDomainAsyncJob {
QEMU_ASYNC_JOB_LAST
};
+VIR_ENUM_DECL(qemuDomainAsyncJob)
struct qemuDomainJobObj {
virCond cond; /* Use to coordinate jobs */
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 81b2d5b..4eb3bf4 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1107,6 +1107,23 @@ cleanup:
/* Prepare is the first step, and it runs on the destination host.
*/
+static void
+qemuMigrationPrepareCleanup(struct qemud_driver *driver,
+ virDomainObjPtr vm)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+
+ VIR_DEBUG("driver=%p, vm=%s, job=%s, asyncJob=%s",
+ driver,
+ vm->def->name,
+ qemuDomainJobTypeToString(priv->job.active),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+ if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_IN))
+ return;
+ qemuDomainObjDiscardAsyncJob(driver, vm);
+}
+
static int
qemuMigrationPrepareAny(struct qemud_driver *driver,
virConnectPtr dconn,
@@ -1264,6 +1281,9 @@ qemuMigrationPrepareAny(struct qemud_driver *driver,
VIR_WARN("Unable to encode migration cookie");
}
+ if (qemuDomainCleanupAdd(vm, qemuMigrationPrepareCleanup) < 0)
+ goto endjob;
+
virDomainAuditStart(vm, "migrated", true);
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STARTED,
@@ -2703,6 +2723,8 @@ qemuMigrationFinish(struct qemud_driver *driver,
v3proto ? QEMU_MIGRATION_PHASE_FINISH3
: QEMU_MIGRATION_PHASE_FINISH2);
+ qemuDomainCleanupRemove(vm, qemuMigrationPrepareCleanup);
+
if (flags & VIR_MIGRATE_PERSIST_DEST)
cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT;
--
1.7.8.5