
On Tue, May 10, 2022 at 17:21:08 +0200, Jiri Denemark wrote:
The check can reveal a serious bug in our migration code and we should not silently ignore it.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 58 ++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 22 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 5b6073b963..1c5dd9b391 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -147,9 +147,10 @@ qemuMigrationCheckPhase(virDomainObj *vm,
if (phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED && phase < priv->job.phase) { - VIR_ERROR(_("migration protocol going backwards %s => %s"), - qemuMigrationJobPhaseTypeToString(priv->job.phase), - qemuMigrationJobPhaseTypeToString(phase)); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("migration protocol going backwards %s => %s"), + qemuMigrationJobPhaseTypeToString(priv->job.phase), + qemuMigrationJobPhaseTypeToString(phase));
This bit seems to belongs to the previous commit actually, but since it's not used anywhere else ...
return -1; }
[...]
@@ -4920,7 +4928,7 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriver *driver, * until the migration is complete. */ VIR_DEBUG("Perform %p", sconn); - qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM2); + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM2)); if (flags & VIR_MIGRATE_TUNNELLED) ret = qemuMigrationSrcPerformTunnel(driver, vm, st, NULL, NULL, 0, NULL, NULL, @@ -5164,7 +5172,7 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriver *driver, * confirm migration completion. */ VIR_DEBUG("Perform3 %p uri=%s", sconn, NULLSTR(uri)); - qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3); + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3)); VIR_FREE(cookiein); cookiein = g_steal_pointer(&cookieout); cookieinlen = cookieoutlen;
Any reason why you want to ignore this before the migration was performed?
@@ -5189,7 +5197,7 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriver *driver, if (ret < 0) { virErrorPreserveLast(&orig_err); } else { - qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE)); }
/* If Perform returns < 0, then we need to cancel the VM
I could somehwat understand it here after the migration is done, but a bug could be also in this code.
@@ -5657,7 +5667,9 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver, return ret; }
- qemuMigrationJobStartPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3); + if (qemuMigrationJobStartPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3) < 0) + goto endjob; + virCloseCallbacksUnset(driver->closeCallbacks, vm, qemuMigrationSrcCleanup);
@@ -5671,7 +5683,7 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver, goto endjob; }
- qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE));
if (virCloseCallbacksSet(driver->closeCallbacks, vm, conn, qemuMigrationSrcCleanup) < 0)
Same here.