QEMU identified a race condition between the device state serialization
and the end of storage migration. Both QEMU and libvirt needs to be
updated to fix this.
Our migration work flow is modified so that after starting the migration
we to wait for QEMU to enter "pre-switchover", "postcopy-active", or
"completed" state. Once there, we cancel all block jobs as usual. But if
QEMU is in "pre-switchover", we need to resume the migration afterwards
and wait again for the real end (either "postcopy-active" or
"completed" state).
Old QEMU will just enter either "postcopy-active" or "completed"
directly, which is still correctly handled even by new libvirt. The
"pre-switchover" state will only be entered if QEMU supports it and the
pause-before-switchover capability was enabled. Thus all combinations of
libvirt and QEMU will work, but only new QEMU with new libvirt will
avoid the race condition.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_migration.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 63 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 4b356002f..af744661f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1525,6 +1525,16 @@ qemuMigrationCompleted(virQEMUDriverPtr driver,
goto error;
}
+ /* Migration was paused before serializing device state, let's return to
+ * the caller so that it can finish all block jobs, resume migration, and
+ * wait again for the real end of the migration.
+ */
+ if (flags & QEMU_MIGRATION_COMPLETED_PRE_SWITCHOVER &&
+ jobInfo->status == QEMU_DOMAIN_JOB_STATUS_PAUSED) {
+ VIR_DEBUG("Migration paused before switchover");
+ return 1;
+ }
+
/* In case of postcopy the source considers migration completed at the
* moment it switched from active to postcopy-active state. The destination
* will continue waiting until the migrate state changes to completed.
@@ -3600,6 +3610,28 @@ qemuMigrationConnect(virQEMUDriverPtr driver,
return ret;
}
+
+static int
+qemuMigrationContinue(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ qemuMonitorMigrationStatus status,
+ qemuDomainAsyncJob asyncJob)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ int ret;
+
+ if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
+ return -1;
+
+ ret = qemuMonitorMigrateContinue(priv->mon, status);
+
+ if (qemuDomainObjExitMonitor(driver, vm) < 0)
+ ret = -1;
+
+ return ret;
+}
+
+
static int
qemuMigrationRun(virQEMUDriverPtr driver,
virDomainObjPtr vm,
@@ -3769,6 +3801,12 @@ qemuMigrationRun(virQEMUDriverPtr driver,
QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto error;
+ if (qemuMigrationCapsGet(vm, QEMU_MONITOR_MIGRATION_CAPS_PAUSE_BEFORE_SWITCHOVER)
&&
+ qemuMigrationSetOption(driver, vm,
+ QEMU_MONITOR_MIGRATION_CAPS_PAUSE_BEFORE_SWITCHOVER,
+ true, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto error;
+
if (qemuMigrationSetParams(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT,
migParams) < 0)
goto error;
@@ -3847,7 +3885,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
fd = -1;
}
- waitFlags = 0;
+ waitFlags = QEMU_MIGRATION_COMPLETED_PRE_SWITCHOVER;
if (abort_on_error)
waitFlags |= QEMU_MIGRATION_COMPLETED_ABORT_ON_ERROR;
if (mig->nbd)
@@ -3889,6 +3927,30 @@ qemuMigrationRun(virQEMUDriverPtr driver,
dconn) < 0)
goto error;
+ /* When migration was paused before serializing device state we need to
+ * resume it now once we finished all block jobs and wait for the real
+ * end of the migration.
+ */
+ if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_PAUSED) {
+ if (qemuMigrationContinue(driver, vm,
+ QEMU_MONITOR_MIGRATION_STATUS_PRE_SWITCHOVER,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto error;
+
+ waitFlags ^= QEMU_MIGRATION_COMPLETED_PRE_SWITCHOVER;
+
+ rc = qemuMigrationWaitForCompletion(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT,
+ dconn, waitFlags);
+ if (rc == -2) {
+ goto error;
+ } else if (rc == -1) {
+ /* QEMU reported failed migration, nothing to cancel anymore */
+ cancel = false;
+ goto error;
+ }
+ }
+
if (iothread) {
qemuMigrationIOThreadPtr io;
--
2.14.2