So far migration could only be completed while a migration API was
running and waiting for the migration to finish. In case such API could
not be called (the connection that initiated the migration is broken)
the migration would just be aborted or left in a "don't know what to do"
state. But this will change soon and we will be able to successfully
complete such migration once we get the corresponding event from QEMU.
This is specific to post-copy migration when vCPUs are already running
on the destination and we're only waiting for all memory pages to be
transferred. Such post-copy migration (which no-one is actively
watching) is called unattended migration.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
Reviewed-by: Peter Krempa <pkrempa(a)redhat.com>
Reviewed-by: Pavel Hrdina <phrdina(a)redhat.com>
---
Notes:
Version 2:
- no change
src/qemu/qemu_domain.c | 1 +
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_driver.c | 5 +++++
src/qemu/qemu_migration.c | 43 +++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_migration.h | 6 ++++++
src/qemu/qemu_process.c | 12 ++++++++++-
6 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 5dee9c6f26..d04ec6cd0c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -11114,6 +11114,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
qemuMonitorMemoryDeviceSizeChangeFree(event->data);
break;
case QEMU_PROCESS_EVENT_PR_DISCONNECT:
+ case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION:
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index ce2dba499c..153dfe3a23 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -426,6 +426,7 @@ typedef enum {
QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED,
QEMU_PROCESS_EVENT_GUEST_CRASHLOADED,
QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE,
+ QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
QEMU_PROCESS_EVENT_LAST
} qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 28cb454ab7..4edf5635c0 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4307,6 +4307,11 @@ static void qemuProcessEventHandler(void *data, void *opaque)
case QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE:
processMemoryDeviceSizeChange(driver, vm, processEvent->data);
break;
+ case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION:
+ qemuMigrationProcessUnattended(driver, vm,
+ processEvent->action,
+ processEvent->status);
+ break;
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 95b69108dc..d427840d14 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -5811,8 +5811,11 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
qemuDomainSaveStatus(vm);
- /* Guest is successfully running, so cancel previous auto destroy */
- qemuProcessAutoDestroyRemove(driver, vm);
+ /* Guest is successfully running, so cancel previous auto destroy. There's
+ * nothing to remove when we are resuming post-copy migration.
+ */
+ if (!virDomainObjIsFailedPostcopy(vm))
+ qemuProcessAutoDestroyRemove(driver, vm);
/* Remove completed stats for post-copy, everything but timing fields
* is obsolete anyway.
@@ -6179,6 +6182,42 @@ qemuMigrationDstFinish(virQEMUDriver *driver,
}
+void
+qemuMigrationProcessUnattended(virQEMUDriver *driver,
+ virDomainObj *vm,
+ virDomainAsyncJob job,
+ qemuMonitorMigrationStatus status)
+{
+ qemuDomainObjPrivate *priv = vm->privateData;
+ qemuMigrationJobPhase phase;
+
+ if (!qemuMigrationJobIsActive(vm, job) ||
+ status != QEMU_MONITOR_MIGRATION_STATUS_COMPLETED)
+ return;
+
+ VIR_DEBUG("Unattended %s migration of domain %s successfully finished",
+ job == VIR_ASYNC_JOB_MIGRATION_IN ? "incoming" :
"outgoing",
+ vm->def->name);
+
+ if (job == VIR_ASYNC_JOB_MIGRATION_IN)
+ phase = QEMU_MIGRATION_PHASE_FINISH3;
+ else
+ phase = QEMU_MIGRATION_PHASE_CONFIRM3;
+
+ qemuMigrationJobStartPhase(vm, phase);
+
+ if (job == VIR_ASYNC_JOB_MIGRATION_IN)
+ qemuMigrationDstComplete(driver, vm, true, job, &priv->job);
+ else
+ qemuMigrationSrcComplete(driver, vm, job);
+
+ qemuMigrationJobFinish(vm);
+
+ if (!virDomainObjIsActive(vm))
+ qemuDomainRemoveInactive(driver, vm);
+}
+
+
/* Helper function called while vm is active. */
int
qemuMigrationSrcToFile(virQEMUDriver *driver, virDomainObj *vm,
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index c099cf99cf..eeb69a52bf 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -211,6 +211,12 @@ qemuMigrationSrcComplete(virQEMUDriver *driver,
virDomainObj *vm,
virDomainAsyncJob asyncJob);
+void
+qemuMigrationProcessUnattended(virQEMUDriver *driver,
+ virDomainObj *vm,
+ virDomainAsyncJob job,
+ qemuMonitorMigrationStatus status);
+
bool
qemuMigrationSrcIsAllowed(virQEMUDriver *driver,
virDomainObj *vm,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index d3769de496..97d84893be 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1549,12 +1549,22 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
}
break;
+ case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED:
+ /* A post-copy migration marked as failed when reconnecting to a domain
+ * with running migration may actually still be running, but we're not
+ * watching it in any thread. Let's make sure the migration is properly
+ * finished in case we get a "completed" event.
+ */
+ if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0)
+ qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
+ priv->job.asyncJob, status, NULL);
+ break;
+
case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
case QEMU_MONITOR_MIGRATION_STATUS_SETUP:
case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE:
case QEMU_MONITOR_MIGRATION_STATUS_PRE_SWITCHOVER:
case QEMU_MONITOR_MIGRATION_STATUS_DEVICE:
- case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED:
case QEMU_MONITOR_MIGRATION_STATUS_ERROR:
case QEMU_MONITOR_MIGRATION_STATUS_CANCELLING:
case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED:
--
2.35.1