When recovering from a failed post-copy migration, we need to go through
all migration phases again, but don't need to repeat all the steps in
each phase. Let's create a new set of migration phases dedicated to
post-copy recovery so that we can easily distinguish between normal and
recovery code.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_migration.c | 20 +++++++++++++++++++-
src/qemu/qemu_migration.h | 6 ++++++
src/qemu/qemu_process.c | 25 ++++++++++++++++++++++++-
3 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 7299bb6a0b..301d9db1d2 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -79,6 +79,12 @@ VIR_ENUM_IMPL(qemuMigrationJobPhase,
"prepare",
"finish2",
"finish3",
+ "postcopy_failed",
+ "begin_resume",
+ "perform_resume",
+ "confirm_resume",
+ "prepare_resume",
+ "finish_resume",
);
@@ -139,7 +145,8 @@ qemuMigrationJobSetPhase(virDomainObj *vm,
{
qemuDomainObjPrivate *priv = vm->privateData;
- if (phase < priv->job.phase) {
+ if (phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED &&
+ phase < priv->job.phase) {
VIR_ERROR(_("migration protocol going backwards %s => %s"),
qemuMigrationJobPhaseTypeToString(priv->job.phase),
qemuMigrationJobPhaseTypeToString(phase));
@@ -2356,18 +2363,29 @@ qemuMigrationSrcCleanup(virDomainObj *vm,
}
break;
+ case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+ case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+ qemuMigrationSrcPostcopyFailed(vm);
+ qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob);
+ qemuMigrationJobContinue(vm);
+ break;
+
case QEMU_MIGRATION_PHASE_PERFORM3:
/* cannot be seen without an active migration API; unreachable */
case QEMU_MIGRATION_PHASE_CONFIRM3:
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
+ case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
/* all done; unreachable */
case QEMU_MIGRATION_PHASE_PREPARE:
case QEMU_MIGRATION_PHASE_FINISH2:
case QEMU_MIGRATION_PHASE_FINISH3:
+ case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+ case QEMU_MIGRATION_PHASE_FINISH_RESUME:
/* incoming migration; unreachable */
case QEMU_MIGRATION_PHASE_PERFORM2:
/* single phase outgoing migration; unreachable */
case QEMU_MIGRATION_PHASE_NONE:
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
case QEMU_MIGRATION_PHASE_LAST:
/* unreachable */
;
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index 9351d6ac51..7eb0d4fe02 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -100,6 +100,12 @@ typedef enum {
QEMU_MIGRATION_PHASE_PREPARE,
QEMU_MIGRATION_PHASE_FINISH2,
QEMU_MIGRATION_PHASE_FINISH3,
+ QEMU_MIGRATION_PHASE_POSTCOPY_FAILED, /* marker for resume phases */
+ QEMU_MIGRATION_PHASE_BEGIN_RESUME,
+ QEMU_MIGRATION_PHASE_PERFORM_RESUME,
+ QEMU_MIGRATION_PHASE_CONFIRM_RESUME,
+ QEMU_MIGRATION_PHASE_PREPARE_RESUME,
+ QEMU_MIGRATION_PHASE_FINISH_RESUME,
QEMU_MIGRATION_PHASE_LAST
} qemuMigrationJobPhase;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 1cb00af6f1..c7ed0a5c56 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3566,6 +3566,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
case QEMU_MIGRATION_PHASE_CONFIRM3:
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
+ case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+ case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+ case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
case QEMU_MIGRATION_PHASE_LAST:
/* N/A for incoming migration */
break;
@@ -3599,6 +3603,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver,
return -1;
}
break;
+
+ case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+ case QEMU_MIGRATION_PHASE_FINISH_RESUME:
+ return 1;
}
return 0;
@@ -3615,6 +3623,7 @@ static int
qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
virDomainObj *vm,
qemuDomainJobObj *job,
+ virDomainJobStatus migStatus,
virDomainState state,
int reason,
unsigned int *stopFlags)
@@ -3630,6 +3639,9 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
case QEMU_MIGRATION_PHASE_PREPARE:
case QEMU_MIGRATION_PHASE_FINISH2:
case QEMU_MIGRATION_PHASE_FINISH3:
+ case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED:
+ case QEMU_MIGRATION_PHASE_PREPARE_RESUME:
+ case QEMU_MIGRATION_PHASE_FINISH_RESUME:
case QEMU_MIGRATION_PHASE_LAST:
/* N/A for outgoing migration */
break;
@@ -3680,6 +3692,17 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver,
/* migration completed, we need to kill the domain here */
*stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
return -1;
+
+ case QEMU_MIGRATION_PHASE_CONFIRM_RESUME:
+ if (migStatus == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) {
+ *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
+ return -1;
+ }
+ return 1;
+
+ case QEMU_MIGRATION_PHASE_BEGIN_RESUME:
+ case QEMU_MIGRATION_PHASE_PERFORM_RESUME:
+ return 1;
}
if (resume) {
@@ -3718,7 +3741,7 @@ qemuProcessRecoverMigration(virQEMUDriver *driver,
qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_NONE, &migStatus);
if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) {
- rc = qemuProcessRecoverMigrationOut(driver, vm, job,
+ rc = qemuProcessRecoverMigrationOut(driver, vm, job, migStatus,
state, reason, stopFlags);
} else {
rc = qemuProcessRecoverMigrationIn(driver, vm, job, state);
--
2.35.1