[libvirt PATCH 0/3] Introduce and use VIR_DOMAIN_PAUSED_API_ERROR

See patch 2/3 for details. Jiri Denemark (3): Clarify VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR semantics Introduce VIR_DOMAIN_PAUSED_API_ERROR qemu_migration: Use VIR_DOMAIN_PAUSED_API_ERROR include/libvirt/libvirt-domain.h | 3 ++- src/conf/domain_conf.c | 1 + src/qemu/qemu_domain.c | 3 +++ src/qemu/qemu_driver.c | 8 ++++++++ src/qemu/qemu_migration.c | 10 ++++++++++ src/qemu/qemu_snapshot.c | 8 ++++++++ tools/virsh-domain-monitor.c | 1 + 7 files changed, 33 insertions(+), 1 deletion(-) -- 2.39.2

Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/libvirt-domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 5152ed4551..53cab6bd4c 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -3823,7 +3823,7 @@ typedef enum { VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG = 3, /* Suspended due to a watchdog firing (Since: 0.8.0) */ VIR_DOMAIN_EVENT_SUSPENDED_RESTORED = 4, /* Restored from paused state file (Since: 0.9.5) */ VIR_DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT = 5, /* Restored from paused snapshot (Since: 0.9.5) */ - VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR = 6, /* suspended after failure during libvirt API call (Since: 1.0.1) */ + VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR = 6, /* Some APIs (e.g., migration, snapshot) internally need to suspend a domain. This event detail is used when resume operation at the end of such API fails. (Since: 1.0.1) */ VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7, /* suspended for post-copy migration (Since: 1.3.3) */ VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED = 8, /* suspended after failed post-copy (Since: 1.3.3) */ -- 2.39.2

Some APIs (migration, save/restore, snapshot, ...) require a domain to be suspended temporarily. In case resuming the domain fails, the domain will be unexpectedly left paused when the API finishes. This situation is reported via VIR_DOMAIN_EVENT_SUSPENDED event with VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR detail. But we do not have a corresponding reason for VIR_DOMAIN_PAUSED state and the reason would remain set to the value used when the domain was paused. So the state reason would suggest the operation is still running. This patch changes the state reason to a new VIR_DOMAIN_PAUSED_API_ERROR to make it clear the API that paused the domain already finished, but failed to resume the domain. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/libvirt-domain.h | 1 + src/conf/domain_conf.c | 1 + src/qemu/qemu_domain.c | 3 +++ src/qemu/qemu_driver.c | 8 ++++++++ src/qemu/qemu_snapshot.c | 8 ++++++++ tools/virsh-domain-monitor.c | 1 + 6 files changed, 22 insertions(+) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 53cab6bd4c..3ebb2c6642 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -148,6 +148,7 @@ typedef enum { VIR_DOMAIN_PAUSED_STARTING_UP = 11, /* the domain is being started (Since: 1.2.14) */ VIR_DOMAIN_PAUSED_POSTCOPY = 12, /* paused for post-copy migration (Since: 1.3.3) */ VIR_DOMAIN_PAUSED_POSTCOPY_FAILED = 13, /* paused after failed post-copy (Since: 1.3.3) */ + VIR_DOMAIN_PAUSED_API_ERROR = 14, /* Some APIs (e.g., migration, snapshot) internally need to suspend a domain. This paused state reason is used when resume operation at the end of such API fails. (Since: 9.2.0) */ # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_PAUSED_LAST /* (Since: 0.9.10) */ diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 34d38f9958..f9b28acc76 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1163,6 +1163,7 @@ VIR_ENUM_IMPL(virDomainPausedReason, "starting up", "post-copy", "post-copy failed", + "api error", ); VIR_ENUM_IMPL(virDomainShutdownReason, diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index f5fd140c85..fb269d0cd1 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -11372,6 +11372,9 @@ qemuDomainPausedReasonToSuspendedEvent(virDomainPausedReason reason) case VIR_DOMAIN_PAUSED_POSTCOPY: return VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY; + case VIR_DOMAIN_PAUSED_API_ERROR: + return VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR; + case VIR_DOMAIN_PAUSED_UNKNOWN: case VIR_DOMAIN_PAUSED_USER: case VIR_DOMAIN_PAUSED_SAVE: diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 3f353eadfa..7256c75c47 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2727,6 +2727,10 @@ qemuDomainSaveInternal(virQEMUDriver *driver, virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR)); + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, + VIR_DOMAIN_PAUSED_API_ERROR); + } } virErrorRestore(&save_err); } @@ -3254,6 +3258,10 @@ qemuDomainCoreDumpWithFormat(virDomainPtr dom, event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR); + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, + VIR_DOMAIN_PAUSED_API_ERROR); + } if (virGetLastErrorCode() == VIR_ERR_OK) virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("resuming after dump failed")); diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c index cfa531edef..e101b43dd3 100644 --- a/src/qemu/qemu_snapshot.c +++ b/src/qemu/qemu_snapshot.c @@ -326,6 +326,10 @@ qemuSnapshotCreateActiveInternal(virQEMUDriver *driver, event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR); + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, + VIR_DOMAIN_PAUSED_API_ERROR); + } if (virGetLastErrorCode() == VIR_ERR_OK) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("resuming after snapshot failed")); @@ -1398,6 +1402,10 @@ qemuSnapshotCreateActiveExternal(virQEMUDriver *driver, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR); virObjectEventStateQueue(driver->domainEventState, event); + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, + VIR_DOMAIN_PAUSED_API_ERROR); + } if (virGetLastErrorCode() == VIR_ERR_OK) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("resuming after snapshot failed")); diff --git a/tools/virsh-domain-monitor.c b/tools/virsh-domain-monitor.c index c2134faba1..bdade8f251 100644 --- a/tools/virsh-domain-monitor.c +++ b/tools/virsh-domain-monitor.c @@ -192,6 +192,7 @@ VIR_ENUM_IMPL(virshDomainPausedReason, N_("starting up"), N_("post-copy"), N_("post-copy failed"), + N_("api error"), ); VIR_ENUM_DECL(virshDomainShutdownReason); -- 2.39.2

Other APIs that internally use QEMU migration and need to temporarily suspend a domain already report failure to resume vCPUs by setting VIR_DOMAIN_PAUSED_API_ERROR state reason and emitting VIR_DOMAIN_EVENT_SUSPENDED event with VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR. Let's do the same in qemuMigrationSrcRestoreDomainState for consistent behavior. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2720f0b083..efec1b3be6 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -251,6 +251,16 @@ qemuMigrationSrcRestoreDomainState(virQEMUDriver *driver, virDomainObj *vm) * overwrite the previous error, though, so we just throw something * to the logs and hope for the best */ VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name); + if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + virObjectEvent *event; + + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, + VIR_DOMAIN_PAUSED_API_ERROR); + event = virDomainEventLifecycleNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR); + virObjectEventStateQueue(driver->domainEventState, event); + } goto cleanup; } ret = true; -- 2.39.2

On a Friday in 2023, Jiri Denemark wrote:
See patch 2/3 for details.
Jiri Denemark (3): Clarify VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR semantics Introduce VIR_DOMAIN_PAUSED_API_ERROR qemu_migration: Use VIR_DOMAIN_PAUSED_API_ERROR
include/libvirt/libvirt-domain.h | 3 ++- src/conf/domain_conf.c | 1 + src/qemu/qemu_domain.c | 3 +++ src/qemu/qemu_driver.c | 8 ++++++++ src/qemu/qemu_migration.c | 10 ++++++++++ src/qemu/qemu_snapshot.c | 8 ++++++++ tools/virsh-domain-monitor.c | 1 + 7 files changed, 33 insertions(+), 1 deletion(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano
participants (2)
-
Jiri Denemark
-
Ján Tomko