
On Tue, May 10, 2022 at 17:21:15 +0200, Jiri Denemark wrote:
Mostly we just need to check whether the domain is in a failed post-copy migration that can be resumed.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 143 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c111dd8686..99b1d4b88b 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2717,6 +2717,143 @@ qemuMigrationSrcBeginPhase(virQEMUDriver *driver, flags); }
+ +static bool +qemuMigrationAnyCanResume(virDomainObj *vm, + virDomainAsyncJob job, + unsigned long flags, + qemuMigrationJobPhase expectedPhase) +{ + qemuDomainObjPrivate *priv = vm->privateData; + + VIR_DEBUG("vm=%p, job=%s, flags=0x%lx, expectedPhase=%s", + vm, virDomainAsyncJobTypeToString(job), flags, + qemuDomainAsyncJobPhaseToString(VIR_ASYNC_JOB_MIGRATION_OUT, + expectedPhase)); + + if (!(flags & VIR_MIGRATE_POSTCOPY)) { + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("resuming failed post-copy migration requires " + "post-copy to be enabled"));
Error messages shall not have line breaks. Also ... requiring this flag seems to be a bit pointless and ... cosmetic at best. Since the previous migration would need to be started with the post-copy flag already which is checked below.
+ return false; + } + + /* This should never happen since POSTCOPY_RESUME is newer than + * CHANGE_PROTECTION, but let's check it anyway in case we're talking to + * a weired client. + */ + if (job == VIR_ASYNC_JOB_MIGRATION_OUT && + expectedPhase < QEMU_MIGRATION_PHASE_PERFORM_RESUME && + !(flags & VIR_MIGRATE_CHANGE_PROTECTION)) { + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("resuming failed post-copy migration requires " + "change protection"));
Same here.
+ return NULL; + }
[...]
+static char * +qemuMigrationSrcBeginResume(virQEMUDriver *driver, + virDomainObj *vm, + const char *xmlin, + char **cookieout, + int *cookieoutlen, + unsigned long flags) +{ + virDomainJobStatus status; + + if (qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_MIGRATION_OUT, + &status) < 0) + return NULL; + + if (status != VIR_DOMAIN_JOB_STATUS_POSTCOPY_PAUSED) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("QEMU reports migration is still running")); + return NULL; + } + + return qemuMigrationSrcBeginXML(driver, vm, xmlin, + cookieout, cookieoutlen, 0, NULL, 0, flags);
Certain steps here around the non-shared-storage migration are redundant at this point and IMO should be skipped.
+} + + +static char * +qemuMigrationSrcBeginResumePhase(virConnectPtr conn, + virQEMUDriver *driver, + virDomainObj *vm, + const char *xmlin, + char **cookieout, + int *cookieoutlen, + unsigned long flags) +{ + g_autofree char *xml = NULL; + + VIR_DEBUG("vm=%p", vm); + + if (!qemuMigrationAnyCanResume(vm, VIR_ASYNC_JOB_MIGRATION_OUT, flags, + QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)) + return NULL; + + if (qemuMigrationJobStartPhase(vm, QEMU_MIGRATION_PHASE_BEGIN_RESUME) < 0) + return NULL; + + virCloseCallbacksUnset(driver->closeCallbacks, vm, + qemuMigrationSrcCleanup); + qemuDomainCleanupRemove(vm, qemuProcessCleanupMigrationJob);
Could you please add an explanation why we are removing the callbacks ...
+ + xml = qemuMigrationSrcBeginResume(driver, vm, xmlin, cookieout, cookieoutlen, flags); + + if (virCloseCallbacksSet(driver->closeCallbacks, vm, conn, + qemuMigrationSrcCleanup) < 0) + g_clear_pointer(&xml, g_free); + + if (!xml) + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); + + qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
Just to add them here?
+ qemuMigrationJobContinue(vm); + return g_steal_pointer(&xml); +}