Migration is a multi-step process
1. Begin(src)
2. Prepare(dst)
3. Perform(src)
4. Finish(dst)
5. Confirm(src)
At step 2, a QEMU process is lauched in the destination to
accept the incoming migration. Occasionally the process
that is controlling the migration workflow aborts, and fails
to call step 4, Finish. This leaves a QEMU process running
on the target (albeit with paused CPUs). Unfortunately because
step 2 actives a job on the QEMU process, it is unkillable by
normal means.
By registering the VM for autokill against the src virConnectPtr
in step 2, we can ensure that the guest is forcefully killed off
if the connection is closed without step 4 being invoked
* src/qemu/qemu_migration.c: Register autokill in PrepareDirect
and PrepareTunnel. Unregister autokill on successful run
of Finish
* src/qemu/qemu_process.c: Unregister autokill when stopping a
process
---
src/qemu/qemu_migration.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index fbee653..42091a0 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1120,7 +1120,7 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver,
* -incoming stdio (which qemu_command might convert to exec:cat or fd:n)
*/
internalret = qemuProcessStart(dconn, driver, vm, "stdio", true,
- false, dataFD[0], NULL,
+ true, dataFD[0], NULL,
VIR_VM_OP_MIGRATE_IN_START);
if (internalret < 0) {
qemuAuditDomainStart(vm, "migrated", false);
@@ -1348,7 +1348,7 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver,
* -incoming tcp:0.0.0.0:port
*/
snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port);
- if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, false,
+ if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, true,
-1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) {
qemuAuditDomainStart(vm, "migrated", false);
/* Note that we don't set an error here because qemuProcessStart
@@ -2549,6 +2549,9 @@ qemuMigrationFinish(struct qemud_driver *driver,
VIR_WARN("Failed to save status on vm %s", vm->def->name);
goto endjob;
}
+
+ /* Guest is sucessfully running, so cancel previous autokill */
+ qemuProcessAutokillRemove(driver, vm);
} else {
qemuProcessStop(driver, vm, 1, VIR_DOMAIN_SHUTOFF_FAILED);
qemuAuditDomainStop(vm, "failed");
--
1.7.4.4