This phase marks a migration protocol as broken in a post-copy phase.
Libvirt is no longer actively watching the migration in this phase as
the migration API that started the migration failed.
This may either happen when post-copy migration really fails (QEMU
enters postcopy-paused migration state) or when the migration still
progresses between both QEMU processes, but libvirt lost control of it
because the connection between libvirt daemons (in p2p migration) or a
daemon and client (non-p2p migration) was closed. For example, when one
of the daemons was restarted.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
Reviewed-by: Peter Krempa <pkrempa(a)redhat.com>
Reviewed-by: Pavel Hrdina <phrdina(a)redhat.com>
---
Notes:
Version 2:
- moved most of the last hunk to a separate patch
src/qemu/qemu_migration.c | 15 +++++++++++----
src/qemu/qemu_process.c | 11 ++++++++---
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 88702c94e4..302589b63c 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2341,6 +2341,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
vm->def->name);
if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@@ -2352,8 +2353,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
}
break;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@@ -2374,7 +2377,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
case QEMU_MIGRATION_PHASE_PERFORM2:
/* single phase outgoing migration; unreachable */
case QEMU_MIGRATION_PHASE_NONE:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_LAST:
/* unreachable */
;
@@ -3744,6 +3746,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver,
flags, cancelled);
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@@ -5572,6 +5575,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver,
virErrorPreserveLast(&orig_err);
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@@ -5664,6 +5668,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver,
jobPriv->migParams, priv->job.apiFlags);
qemuMigrationJobFinish(vm);
} else {
+ if (ret < 0)
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
}
@@ -5903,7 +5909,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
/* Guest is successfully running, so cancel previous auto destroy. There's
* nothing to remove when we are resuming post-copy migration.
*/
- if (!virDomainObjIsFailedPostcopy(vm))
+ if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)
qemuProcessAutoDestroyRemove(driver, vm);
/* Remove completed stats for post-copy, everything but timing fields
@@ -6170,6 +6176,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver,
}
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuProcessAutoDestroyRemove(driver, vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
*finishJob = false;
@@ -6290,9 +6297,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver,
vm->def->name);
if (job == VIR_ASYNC_JOB_MIGRATION_IN)
- phase = QEMU_MIGRATION_PHASE_FINISH3;
+ phase = QEMU_MIGRATION_PHASE_FINISH_RESUME;
else
- phase = QEMU_MIGRATION_PHASE_CONFIRM3;
+ phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME;
if (qemuMigrationJobStartPhase(vm, phase) < 0)
return;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index f752668b2f..8a98c03395 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1555,9 +1555,12 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
* watching it in any thread. Let's make sure the migration is properly
* finished in case we get a "completed" event.
*/
- if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0)
+ if (virDomainObjIsPostcopy(vm, priv->job.current->operation) &&
+ priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
+ priv->job.asyncOwner == 0) {
qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
priv->job.asyncJob, status, NULL);
+ }
break;
case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
@@ -3507,7 +3510,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
case QEMU_MIGRATION_PHASE_CONFIRM3:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
@@ -3545,6 +3547,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
}
break;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
return 1;
@@ -3581,7 +3584,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PREPARE:
case QEMU_MIGRATION_PHASE_FINISH2:
case QEMU_MIGRATION_PHASE_FINISH3:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
case QEMU_MIGRATION_PHASE_LAST:
@@ -3643,6 +3645,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
}
return 1;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
return 1;
@@ -3694,6 +3697,8 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
return -1;
if (rc > 0) {
+ job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
+
if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
VIR_DEBUG("Post-copy migration of domain %s still running, it will be
handled as unattended",
vm->def->name);
--
2.35.1