On 7/19/24 17:44, Boris Fiuczynski wrote:
> In cases when a QEMU process takes longer than the time sigterm and
> sigkill are issued to kill the process do not simply fail and leave the
> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
> an fd on /proc/$pid and get notified when the QEMU process finally has
> terminated to cleanup the VM state.
>
> Resolves:
https://issues.redhat.com/browse/RHEL-28819
> Signed-off-by: Boris Fiuczynski <fiuczy(a)linux.ibm.com>
> ---
> src/qemu/qemu_domain.c | 8 +++
> src/qemu/qemu_domain.h | 2 +
> src/qemu/qemu_driver.c | 18 ++++++
> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
> src/qemu/qemu_process.h | 1 +
> 5 files changed, 148 insertions(+), 5 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 2134b11038..8147ff02fd 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>
> virChrdevFree(priv->devs);
>
> + if (priv->pidMonitored >= 0) {
> + virEventRemoveHandle(priv->pidMonitored);
> + priv->pidMonitored = -1;
> + }
> +
> /* This should never be non-NULL if we get here, but just in case... */
> if (priv->mon) {
> VIR_ERROR(_("Unexpected QEMU monitor still active during domain
deletion"));
> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
> priv->blockjobs = virHashNew(virObjectUnref);
> priv->fds = virHashNew(g_object_unref);
>
> + priv->pidMonitored = -1;
> +
> /* agent commands block by default, user can choose different behavior */
> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> case QEMU_PROCESS_EVENT_RESET:
> case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
> case QEMU_PROCESS_EVENT_MONITOR_EOF:
> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
> case QEMU_PROCESS_EVENT_LAST:
> break;
> }
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index d777559119..a5092dd7f0 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>
> bool beingDestroyed;
> char *pidfile;
> + int pidMonitored;
>
> virDomainPCIAddressSet *pciaddrs;
> virDomainUSBAddressSet *usbaddrs;
> @@ -469,6 +470,7 @@ typedef enum {
> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
> QEMU_PROCESS_EVENT_RESET,
> QEMU_PROCESS_EVENT_NBDKIT_EXITED,
> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>
> QEMU_PROCESS_EVENT_LAST
> } qemuProcessEventType;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 9f3013e231..6b1e4084f6 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
> }
>
>
> +static void
> +processShutdownCompletedEvent(virQEMUDriver *driver,
> + virDomainObj *vm)
> +{
> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> + return;
Shouldn't this be:
if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
return;
Otherwise looking good. No need to resend, I can fix that before pushing.
And followed by qemuProcessEndStopJob after calling qemuProcessStop.
Jirka