This phase marks a migration protocol as broken in a post-copy phase.
Libvirt is no longer actively watching the migration in this phase as
the migration API that started the migration failed.
This may either happen when post-copy migration really fails (QEMU
enters postcopy-paused migration state) or when the migration still
progresses between both QEMU processes, but libvirt lost control of it
because the connection between libvirt daemons (in p2p migration) or a
daemon and client (non-p2p migration) was closed. For example, when one
of the daemons was restarted.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_migration.c | 15 +++++++++++----
src/qemu/qemu_process.c | 16 +++++++++++++---
2 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 3f6921b4b2..c111dd8686 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -2369,6 +2369,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
vm->def->name);
if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@@ -2380,8 +2381,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
}
break;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuMigrationSrcPostcopyFailed(vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
@@ -2402,7 +2405,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
case QEMU_MIGRATION_PHASE_PERFORM2:
/* single phase outgoing migration; unreachable */
case QEMU_MIGRATION_PHASE_NONE:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_LAST:
/* unreachable */
;
@@ -3774,6 +3776,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver,
flags, cancelled);
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@@ -5607,6 +5610,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver,
virErrorPreserveLast(&orig_err);
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
} else {
@@ -5699,6 +5703,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver,
jobPriv->migParams, priv->job.apiFlags);
qemuMigrationJobFinish(vm);
} else {
+ if (ret < 0)
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
qemuMigrationJobContinue(vm);
}
@@ -5938,7 +5944,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver,
/* Guest is successfully running, so cancel previous auto destroy. There's
* nothing to remove when we are resuming post-copy migration.
*/
- if (!virDomainObjIsFailedPostcopy(vm))
+ if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)
qemuProcessAutoDestroyRemove(driver, vm);
/* Remove completed stats for post-copy, everything but timing fields
@@ -6205,6 +6211,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver,
}
if (virDomainObjIsFailedPostcopy(vm)) {
+ ignore_value(qemuMigrationJobSetPhase(vm,
QEMU_MIGRATION_PHASE_POSTCOPY_FAILED));
qemuProcessAutoDestroyRemove(driver, vm);
qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
*finishJob = false;
@@ -6327,9 +6334,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver,
vm->def->name);
if (job == VIR_ASYNC_JOB_MIGRATION_IN)
- phase = QEMU_MIGRATION_PHASE_FINISH3;
+ phase = QEMU_MIGRATION_PHASE_FINISH_RESUME;
else
- phase = QEMU_MIGRATION_PHASE_CONFIRM3;
+ phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME;
if (qemuMigrationJobStartPhase(vm, phase) < 0)
return;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index c7ed0a5c56..f42c9a3018 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1624,7 +1624,8 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED,
* watching it in any thread. Let's make sure the migration is properly
* finished in case we get a "completed" event.
*/
- if (virDomainObjIsFailedPostcopy(vm) &&
+ if (virDomainObjIsPostcopy(vm, priv->job.current->operation) &&
+ priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
priv->job.asyncOwner == 0 &&
status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) {
struct qemuProcessEvent *proc = g_new0(struct qemuProcessEvent, 1);
@@ -3566,7 +3567,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
case QEMU_MIGRATION_PHASE_CONFIRM3:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
@@ -3604,6 +3604,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
}
break;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
return 1;
@@ -3639,7 +3640,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PREPARE:
case QEMU_MIGRATION_PHASE_FINISH2:
case QEMU_MIGRATION_PHASE_FINISH3:
- case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
case QEMU_MIGRATION_PHASE_FINISH_RESUME:
case QEMU_MIGRATION_PHASE_LAST:
@@ -3700,6 +3700,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
}
return 1;
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
return 1;
@@ -3751,9 +3752,18 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
return -1;
if (rc > 0) {
+ job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED;
+
if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
VIR_DEBUG("Post-copy migration of domain %s still running, it "
"will be handled as unattended", vm->def->name);
+
+ if (state == VIR_DOMAIN_RUNNING)
+ reason = VIR_DOMAIN_RUNNING_POSTCOPY;
+ else
+ reason = VIR_DOMAIN_PAUSED_POSTCOPY;
+
+ virDomainObjSetState(vm, state, reason);
qemuProcessRestoreMigrationJob(vm, job);
return 0;
}
--
2.35.1