[libvirt] [PATCH v2 0/3] qemu: don't duplicate suspended events and state changes

Patches 1 and 2 are already Reviewed-by: John. Patch 3 needs Peter comments. Diff from v1: ============ - minor rebase changes - minor changes according to review [1] PATCH v1 : https://www.redhat.com/archives/libvir-list/2018-October/msg00591.html Nikolay Shirokovskiy (3): qemu: Pass stop reason from qemuProcessStopCPUs to stop handler qemu: Map suspended state reason to suspended event detail qemu: Don't duplicate suspend events and state changes src/qemu/qemu_domain.c | 34 ++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 7 +++++++ src/qemu/qemu_driver.c | 26 +++----------------------- src/qemu/qemu_migration.c | 42 ++++++------------------------------------ src/qemu/qemu_migration.h | 4 ---- src/qemu/qemu_process.c | 38 +++++++++++++++++++++++++------------- 6 files changed, 75 insertions(+), 76 deletions(-) -- 1.8.3.1

Similar to commit [1] which saves and passes the running reason to the RESUME event handler, during qemuProcessStopCPUs let's save and pass the pause reason in the domain private data so that the STOP event handler can use it. [1] 5dab984ed : qemu: Pass running reason to RESUME event handler Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_domain.h | 4 ++++ src/qemu/qemu_process.c | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index fe47417..230c1e1 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -369,6 +369,10 @@ struct _qemuDomainObjPrivate { * RESUME event handler to use it */ virDomainRunningReason runningReason; + /* qemuProcessStopCPUs stores the reason for pausing vCPUs here for the + * STOP event handler to use it */ + virDomainPausedReason pausedReason; + /* true if libvirt remembers the original owner for files */ bool rememberOwner; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0583eb0..6f21962 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -641,14 +641,17 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, { virQEMUDriverPtr driver = opaque; virObjectEventPtr event = NULL; - virDomainPausedReason reason = VIR_DOMAIN_PAUSED_UNKNOWN; + virDomainPausedReason reason; virDomainEventSuspendedDetailType detail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + qemuDomainObjPrivatePtr priv = vm->privateData; virObjectLock(vm); + + reason = priv->pausedReason; + priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN; + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { - qemuDomainObjPrivatePtr priv = vm->privateData; - if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) { if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { @@ -3231,6 +3234,8 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, VIR_FREE(priv->lockState); + priv->pausedReason = reason; + if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) goto cleanup; @@ -3253,6 +3258,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); cleanup: + if (ret < 0) + priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN; + return ret; } @@ -6099,6 +6107,7 @@ qemuProcessPrepareDomain(virQEMUDriverPtr driver, priv->monError = false; priv->monStart = 0; priv->runningReason = VIR_DOMAIN_RUNNING_UNKNOWN; + priv->pausedReason = VIR_DOMAIN_PAUSED_UNKNOWN; VIR_DEBUG("Updating guest CPU definition"); if (qemuProcessUpdateGuestCPU(vm->def, priv->qemuCaps, caps, flags) < 0) -- 1.8.3.1

Map is based on existing cases in code where we send suspended event after changing domain state to paused. Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_domain.c | 34 ++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 3 +++ src/qemu/qemu_process.c | 17 ++++++++--------- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index b6c1a0e..bc4dc5b 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -13926,3 +13926,37 @@ qemuDomainDiskIsMissingLocalOptional(virDomainDiskDefPtr disk) virStorageSourceIsLocalStorage(disk->src) && disk->src->path && !virFileExists(disk->src->path); } + + +virDomainEventSuspendedDetailType +qemuDomainPausedReasonToSuspendedEvent(virDomainPausedReason reason) +{ + switch (reason) { + case VIR_DOMAIN_PAUSED_MIGRATION: + return VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED; + + case VIR_DOMAIN_PAUSED_FROM_SNAPSHOT: + return VIR_DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT; + + case VIR_DOMAIN_PAUSED_POSTCOPY_FAILED: + return VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED; + + case VIR_DOMAIN_PAUSED_POSTCOPY: + return VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY; + + case VIR_DOMAIN_PAUSED_UNKNOWN: + case VIR_DOMAIN_PAUSED_USER: + case VIR_DOMAIN_PAUSED_SAVE: + case VIR_DOMAIN_PAUSED_DUMP: + case VIR_DOMAIN_PAUSED_IOERROR: + case VIR_DOMAIN_PAUSED_WATCHDOG: + case VIR_DOMAIN_PAUSED_SHUTTING_DOWN: + case VIR_DOMAIN_PAUSED_SNAPSHOT: + case VIR_DOMAIN_PAUSED_CRASHED: + case VIR_DOMAIN_PAUSED_STARTING_UP: + case VIR_DOMAIN_PAUSED_LAST: + break; + } + + return VIR_DOMAIN_EVENT_SUSPENDED_PAUSED; +} diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 230c1e1..78abc14 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -1113,4 +1113,7 @@ qemuDomainIsUsingNoShutdown(qemuDomainObjPrivatePtr priv); bool qemuDomainDiskIsMissingLocalOptional(virDomainDiskDefPtr disk); +virDomainEventSuspendedDetailType +qemuDomainPausedReasonToSuspendedEvent(virDomainPausedReason reason); + #endif /* LIBVIRT_QEMU_DOMAIN_H */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 6f21962..e0d5320 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -642,7 +642,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virQEMUDriverPtr driver = opaque; virObjectEventPtr event = NULL; virDomainPausedReason reason; - virDomainEventSuspendedDetailType detail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED; + virDomainEventSuspendedDetailType detail; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); qemuDomainObjPrivatePtr priv = vm->privateData; @@ -653,18 +653,17 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) { - if (priv->job.current->status == - QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) reason = VIR_DOMAIN_PAUSED_POSTCOPY; - detail = VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY; - } else { + else reason = VIR_DOMAIN_PAUSED_MIGRATION; - detail = VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED; - } } - VIR_DEBUG("Transitioned guest %s to paused state, reason %s", - vm->def->name, virDomainPausedReasonTypeToString(reason)); + detail = qemuDomainPausedReasonToSuspendedEvent(reason); + VIR_DEBUG("Transitioned guest %s to paused state, " + "reason %s, event detail %d", + vm->def->name, virDomainPausedReasonTypeToString(reason), + detail); if (priv->job.current) ignore_value(virTimeMillisNow(&priv->job.current->stopped)); -- 1.8.3.1

Since the STOP event handler can use the pausedReason as sent to qemuProcessStopCPUs, we no longer need to send duplicate suspended lifecycle events because we know what caused the stop along with extra details. This processing allows us to also remove the duplicated state change from qemuProcessStopCPUs. Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_driver.c | 26 +++----------------------- src/qemu/qemu_migration.c | 42 ++++++------------------------------------ src/qemu/qemu_migration.h | 4 ---- src/qemu/qemu_process.c | 6 +++++- 4 files changed, 14 insertions(+), 64 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 427c1d0..78079ef 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1790,10 +1790,8 @@ static int qemuDomainSuspend(virDomainPtr dom) virQEMUDriverPtr driver = dom->conn->privateData; virDomainObjPtr vm; int ret = -1; - virObjectEventPtr event = NULL; qemuDomainObjPrivatePtr priv; virDomainPausedReason reason; - int eventDetail; int state; virQEMUDriverConfigPtr cfg = NULL; @@ -1812,16 +1810,12 @@ static int qemuDomainSuspend(virDomainPtr dom) if (virDomainObjCheckActive(vm) < 0) goto endjob; - if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) { + if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) reason = VIR_DOMAIN_PAUSED_MIGRATION; - eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED; - } else if (priv->job.asyncJob == QEMU_ASYNC_JOB_SNAPSHOT) { + else if (priv->job.asyncJob == QEMU_ASYNC_JOB_SNAPSHOT) reason = VIR_DOMAIN_PAUSED_SNAPSHOT; - eventDetail = -1; /* don't create lifecycle events when doing snapshot */ - } else { + else reason = VIR_DOMAIN_PAUSED_USER; - eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED; - } state = virDomainObjGetState(vm, NULL); if (state == VIR_DOMAIN_PMSUSPENDED) { @@ -1831,12 +1825,6 @@ static int qemuDomainSuspend(virDomainPtr dom) } else if (state != VIR_DOMAIN_PAUSED) { if (qemuProcessStopCPUs(driver, vm, reason, QEMU_ASYNC_JOB_NONE) < 0) goto endjob; - - if (eventDetail >= 0) { - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - eventDetail); - } } if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) goto endjob; @@ -1848,7 +1836,6 @@ static int qemuDomainSuspend(virDomainPtr dom) cleanup: virDomainObjEndAPI(&vm); - virObjectEventStateQueue(driver->domainEventState, event); virObjectUnref(cfg); return ret; } @@ -16467,13 +16454,6 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT, QEMU_ASYNC_JOB_START) < 0) goto endjob; - /* Create an event now in case the restore fails, so - * that user will be alerted that they are now paused. - * If restore later succeeds, we might replace this. */ - detail = VIR_DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT; - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - detail); if (!virDomainObjIsActive(vm)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 1433b2c..2e7e356 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1269,29 +1269,6 @@ qemuMigrationSrcIsSafe(virDomainDefPtr def, return true; } -/** qemuMigrationSrcSetOffline - * Pause domain for non-live migration. - */ -int -qemuMigrationSrcSetOffline(virQEMUDriverPtr driver, - virDomainObjPtr vm) -{ - int ret; - VIR_DEBUG("driver=%p vm=%p", driver, vm); - ret = qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_MIGRATION, - QEMU_ASYNC_JOB_MIGRATION_OUT); - if (ret == 0) { - virObjectEventPtr event; - - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED); - virObjectEventStateQueue(driver->domainEventState, event); - } - - return ret; -} - void qemuMigrationAnyPostcopyFailed(virQEMUDriverPtr driver, @@ -1314,19 +1291,10 @@ qemuMigrationAnyPostcopyFailed(virQEMUDriverPtr driver, "leaving the domain paused", vm->def->name); if (state == VIR_DOMAIN_RUNNING) { - virObjectEventPtr event; - if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_POSTCOPY_FAILED, - QEMU_ASYNC_JOB_MIGRATION_IN) < 0) { + QEMU_ASYNC_JOB_MIGRATION_IN) < 0) VIR_WARN("Unable to pause guest CPUs for %s", vm->def->name); - return; - } - - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED); - virObjectEventStateQueue(driver->domainEventState, event); } else { virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_POSTCOPY_FAILED); @@ -3544,10 +3512,11 @@ qemuMigrationSrcRun(virQEMUDriverPtr driver, } } - /* Before EnterMonitor, since qemuMigrationSetOffline already does that */ + /* Before EnterMonitor, since already qemuProcessStopCPUs does that */ if (!(flags & VIR_MIGRATE_LIVE) && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { - if (qemuMigrationSrcSetOffline(driver, vm) < 0) + if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_MIGRATION, + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto error; } @@ -3657,7 +3626,8 @@ qemuMigrationSrcRun(virQEMUDriverPtr driver, goto error; } } else if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING && - qemuMigrationSrcSetOffline(driver, vm) < 0) { + qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_MIGRATION, + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { goto error; } diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 976a723..1439ccf 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -98,10 +98,6 @@ typedef enum { } qemuMigrationJobPhase; VIR_ENUM_DECL(qemuMigrationJobPhase); -int -qemuMigrationSrcSetOffline(virQEMUDriverPtr driver, - virDomainObjPtr vm); - char * qemuMigrationSrcBegin(virConnectPtr conn, virDomainObjPtr vm, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index e0d5320..a11f071 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3251,7 +3251,11 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, if (priv->job.current) ignore_value(virTimeMillisNow(&priv->job.current->stopped)); - virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); + /* The STOP event handler will change the domain state with the reason + * saved in priv->pausedReason and it will also emit corresponding domain + * lifecycle event. + */ + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) VIR_WARN("Unable to release lease on %s", vm->def->name); VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); -- 1.8.3.1

On 2/8/19 2:52 AM, Nikolay Shirokovskiy wrote:
Patches 1 and 2 are already Reviewed-by: John. Patch 3 needs Peter comments.
Right - feel free to add my : Reviewed-by: John Ferlan <jferlan@redhat.com> to the first 2 patches for sure. To help push this along, Peter is again CC'd and of importance is the v1 review of patch3: https://www.redhat.com/archives/libvir-list/2018-October/msg00831.html where I noted a specific commit and case to be considered. John
Diff from v1: ============ - minor rebase changes - minor changes according to review
[1] PATCH v1 : https://www.redhat.com/archives/libvir-list/2018-October/msg00591.html
Nikolay Shirokovskiy (3): qemu: Pass stop reason from qemuProcessStopCPUs to stop handler qemu: Map suspended state reason to suspended event detail qemu: Don't duplicate suspend events and state changes
src/qemu/qemu_domain.c | 34 ++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 7 +++++++ src/qemu/qemu_driver.c | 26 +++----------------------- src/qemu/qemu_migration.c | 42 ++++++------------------------------------ src/qemu/qemu_migration.h | 4 ---- src/qemu/qemu_process.c | 38 +++++++++++++++++++++++++------------- 6 files changed, 75 insertions(+), 76 deletions(-)

ping On 13.02.2019 19:55, John Ferlan wrote:
On 2/8/19 2:52 AM, Nikolay Shirokovskiy wrote:
Patches 1 and 2 are already Reviewed-by: John. Patch 3 needs Peter comments.
Right - feel free to add my :
Reviewed-by: John Ferlan <jferlan@redhat.com>
to the first 2 patches for sure.
To help push this along, Peter is again CC'd and of importance is the v1 review of patch3:
https://www.redhat.com/archives/libvir-list/2018-October/msg00831.html
where I noted a specific commit and case to be considered.
John
Diff from v1: ============ - minor rebase changes - minor changes according to review
[1] PATCH v1 : https://www.redhat.com/archives/libvir-list/2018-October/msg00591.html
Nikolay Shirokovskiy (3): qemu: Pass stop reason from qemuProcessStopCPUs to stop handler qemu: Map suspended state reason to suspended event detail qemu: Don't duplicate suspend events and state changes
src/qemu/qemu_domain.c | 34 ++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 7 +++++++ src/qemu/qemu_driver.c | 26 +++----------------------- src/qemu/qemu_migration.c | 42 ++++++------------------------------------ src/qemu/qemu_migration.h | 4 ---- src/qemu/qemu_process.c | 38 +++++++++++++++++++++++++------------- 6 files changed, 75 insertions(+), 76 deletions(-)

On 3/27/19 7:53 AM, Nikolay Shirokovskiy wrote:
ping
On 13.02.2019 19:55, John Ferlan wrote:
On 2/8/19 2:52 AM, Nikolay Shirokovskiy wrote:
Patches 1 and 2 are already Reviewed-by: John. Patch 3 needs Peter comments.
Right - feel free to add my :
Reviewed-by: John Ferlan <jferlan@redhat.com>
to the first 2 patches for sure.
To help push this along, Peter is again CC'd and of importance is the v1 review of patch3:
https://www.redhat.com/archives/libvir-list/2018-October/msg00831.html
where I noted a specific commit and case to be considered.
To dredge up more context, John is talking about Peter's commit: commit f569b87f5193a69c50ca714734013cc4f82250f1 Author: Peter Krempa <pkrempa@redhat.com> Date: Mon Oct 8 19:38:44 2012 +0200 snapshot: qemu: Add support for external checkpoints Which added this chunk:
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 9358ad99ed..128b98ee22 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1673,6 +1673,9 @@ static int qemudDomainSuspend(virDomainPtr dom) { if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) { reason = VIR_DOMAIN_PAUSED_MIGRATION; eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED; + } else if (priv->job.asyncJob == QEMU_ASYNC_JOB_SNAPSHOT) { + reason = VIR_DOMAIN_PAUSED_SNAPSHOT; + eventDetail = -1; /* don't create lifecycle events when doing snapshot */ } else { reason = VIR_DOMAIN_PAUSED_USER; eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED; @@ -1687,9 +1690,12 @@ static int qemudDomainSuspend(virDomainPtr dom) { if (qemuProcessStopCPUs(driver, vm, reason, QEMU_ASYNC_JOB_NONE) < 0) { goto endjob; } - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - eventDetail); + + if (eventDetail >= 0) { + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + eventDetail); + } } if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) goto endjob;
Which disables sending a VIR_DOMAIN_EVENT_SUSPENDED event when we suspend the VM as an implementation detail of creating an external snapshot checkpoint. Nikolay's series removes that case, so AFAICT checkpoints will now send SUSPENDED events. Current git doesn't perform the same event skippage in the managed save case (where SUSPENDED event is emitted), or the internal snapshot case (where SUSPEND+RESUME are emitted when taking a live snapshot). I can't think of any particular reason why the checkpoint case needs to be special here. Maybe that's an argument for fixing all of these cases, though virt-manager which uses internal snapshots has had no problem with SUSPEND/RESUME events in this area so there's some proof apps aren't going to choke on a similar case IMO patches have been reviewed for almost 2 months, no one has objected, so push em :) Thanks, Cole
participants (3)
-
Cole Robinson
-
John Ferlan
-
Nikolay Shirokovskiy