On Tue, May 10, 2022 at 17:21:15 +0200, Jiri Denemark wrote:
Mostly we just need to check whether the domain is in a failed
post-copy
migration that can be resumed.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_migration.c | 143 ++++++++++++++++++++++++++++++++++++++
1 file changed, 143 insertions(+)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index c111dd8686..99b1d4b88b 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2717,6 +2717,143 @@ qemuMigrationSrcBeginPhase(virQEMUDriver *driver,
flags);
}
+
+static bool
+qemuMigrationAnyCanResume(virDomainObj *vm,
+ virDomainAsyncJob job,
+ unsigned long flags,
+ qemuMigrationJobPhase expectedPhase)
+{
+ qemuDomainObjPrivate *priv = vm->privateData;
+
+ VIR_DEBUG("vm=%p, job=%s, flags=0x%lx, expectedPhase=%s",
+ vm, virDomainAsyncJobTypeToString(job), flags,
+ qemuDomainAsyncJobPhaseToString(VIR_ASYNC_JOB_MIGRATION_OUT,
+ expectedPhase));
+
+ if (!(flags & VIR_MIGRATE_POSTCOPY)) {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("resuming failed post-copy migration requires "
+ "post-copy to be enabled"));
Error messages shall not have line breaks.
Also ... requiring this flag seems to be a bit pointless and ...
cosmetic at best. Since the previous migration would need to be started
with the post-copy flag already which is checked below.
+ return false;
+ }
+
+ /* This should never happen since POSTCOPY_RESUME is newer than
+ * CHANGE_PROTECTION, but let's check it anyway in case we're talking to
+ * a weired client.
+ */
+ if (job == VIR_ASYNC_JOB_MIGRATION_OUT &&
+ expectedPhase < QEMU_MIGRATION_PHASE_PERFORM_RESUME &&
+ !(flags & VIR_MIGRATE_CHANGE_PROTECTION)) {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("resuming failed post-copy migration requires "
+ "change protection"));
Same here.
+ return NULL;
+ }
[...]
> +static char *
> +qemuMigrationSrcBeginResume(virQEMUDriver *driver,
> + virDomainObj *vm,
> + const char *xmlin,
> + char **cookieout,
> + int *cookieoutlen,
> + unsigned long flags)
> +{
> + virDomainJobStatus status;
> +
> + if (qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_MIGRATION_OUT,
> + &status) < 0)
> + return NULL;
> +
> + if (status != VIR_DOMAIN_JOB_STATUS_POSTCOPY_PAUSED) {
> + virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> + _("QEMU reports migration is still running"));
+ return NULL;
+ }
> +
> + return qemuMigrationSrcBeginXML(driver, vm, xmlin,
> + cookieout, cookieoutlen, 0, NULL, 0, flags);
Certain steps here around the non-shared-storage migration are redundant
at this point and IMO should be skipped.
+}
+
+
+static char *
+qemuMigrationSrcBeginResumePhase(virConnectPtr conn,
+ virQEMUDriver *driver,
+ virDomainObj *vm,
+ const char *xmlin,
+ char **cookieout,
+ int *cookieoutlen,
+ unsigned long flags)
+{
+ g_autofree char *xml = NULL;
+
+ VIR_DEBUG("vm=%p", vm);
+
+ if (!qemuMigrationAnyCanResume(vm, VIR_ASYNC_JOB_MIGRATION_OUT, flags,
+ QEMU_MIGRATION_PHASE_POSTCOPY_FAILED))
+ return NULL;
+
+ if (qemuMigrationJobStartPhase(vm, QEMU_MIGRATION_PHASE_BEGIN_RESUME) < 0)
+ return NULL;
+
+ virCloseCallbacksUnset(driver->closeCallbacks, vm,
+ qemuMigrationSrcCleanup);
+ qemuDomainCleanupRemove(vm, qemuProcessCleanupMigrationJob);
Could you please add an explanation why we are removing the callbacks
...
+
+ xml = qemuMigrationSrcBeginResume(driver, vm, xmlin, cookieout, cookieoutlen,
flags);
+
+ if (virCloseCallbacksSet(driver->closeCallbacks, vm, conn,
+ qemuMigrationSrcCleanup) < 0)
+ g_clear_pointer(&xml, g_free);
+
+ if (!xml)
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
+
+ qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
Just to add them here?
+ qemuMigrationJobContinue(vm);
+ return g_steal_pointer(&xml);
+}