On 7/22/24 10:55 AM, Michal Prívozník wrote:
On 7/19/24 17:44, Boris Fiuczynski wrote:
In cases when a QEMU process takes longer than the time sigterm and sigkill are issued to kill the process do not simply fail and leave the VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up an fd on /proc/$pid and get notified when the QEMU process finally has terminated to cleanup the VM state.
Resolves: https://issues.redhat.com/browse/RHEL-28819 Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> --- src/qemu/qemu_domain.c | 8 +++ src/qemu/qemu_domain.h | 2 + src/qemu/qemu_driver.c | 18 ++++++ src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_process.h | 1 + 5 files changed, 148 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 2134b11038..8147ff02fd 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
virChrdevFree(priv->devs);
+ if (priv->pidMonitored >= 0) { + virEventRemoveHandle(priv->pidMonitored); + priv->pidMonitored = -1; + } + /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) priv->blockjobs = virHashNew(virObjectUnref); priv->fds = virHashNew(g_object_unref);
+ priv->pidMonitored = -1; + /* agent commands block by default, user can choose different behavior */ priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) case QEMU_PROCESS_EVENT_RESET: case QEMU_PROCESS_EVENT_NBDKIT_EXITED: case QEMU_PROCESS_EVENT_MONITOR_EOF: + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: case QEMU_PROCESS_EVENT_LAST: break; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index d777559119..a5092dd7f0 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
bool beingDestroyed; char *pidfile; + int pidMonitored;
virDomainPCIAddressSet *pciaddrs; virDomainUSBAddressSet *usbaddrs; @@ -469,6 +470,7 @@ typedef enum { QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, QEMU_PROCESS_EVENT_RESET, QEMU_PROCESS_EVENT_NBDKIT_EXITED, + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
QEMU_PROCESS_EVENT_LAST } qemuProcessEventType; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 9f3013e231..6b1e4084f6 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, }
+static void +processShutdownCompletedEvent(virQEMUDriver *driver, + virDomainObj *vm) +{ + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) + return;
Shouldn't this be:
if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) return;
Otherwise looking good. No need to resend, I can fix that before pushing.
Michal
I was looking at qemuDomainSutdownFlagsMonitor but VIR_JOB_DESTROY seems to be the better match as it is also used in processMonitorEOFEvent and the should have been shutdown already. -- Mit freundlichen Grüßen/Kind regards Boris Fiuczynski IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Wolfgang Wendt Geschäftsführung: David Faller Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294