In cases when a QEMU process takes longer than the time sigterm and
sigkill are issued to kill the process do not simply fail and leave the
VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
an fd on /proc/$pid and get notified when the QEMU process finally has
terminated to cleanup the VM state.
Resolves:
https://issues.redhat.com/browse/RHEL-28819
Signed-off-by: Boris Fiuczynski <fiuczy(a)linux.ibm.com>
---
src/qemu/qemu_domain.c | 8 +++
src/qemu/qemu_domain.h | 2 +
src/qemu/qemu_driver.c | 18 ++++++
src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_process.h | 1 +
5 files changed, 148 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2134b11038..8147ff02fd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
virChrdevFree(priv->devs);
+ if (priv->pidMonitored >= 0) {
+ virEventRemoveHandle(priv->pidMonitored);
+ priv->pidMonitored = -1;
+ }
+
/* This should never be non-NULL if we get here, but just in case... */
if (priv->mon) {
VIR_ERROR(_("Unexpected QEMU monitor still active during domain
deletion"));
@@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
priv->blockjobs = virHashNew(virObjectUnref);
priv->fds = virHashNew(g_object_unref);
+ priv->pidMonitored = -1;
+
/* agent commands block by default, user can choose different behavior */
priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
@@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
case QEMU_PROCESS_EVENT_RESET:
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
case QEMU_PROCESS_EVENT_MONITOR_EOF:
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index d777559119..a5092dd7f0 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
bool beingDestroyed;
char *pidfile;
+ int pidMonitored;
virDomainPCIAddressSet *pciaddrs;
virDomainUSBAddressSet *usbaddrs;
@@ -469,6 +470,7 @@ typedef enum {
QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
QEMU_PROCESS_EVENT_RESET,
QEMU_PROCESS_EVENT_NBDKIT_EXITED,
+ QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
QEMU_PROCESS_EVENT_LAST
} qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9f3013e231..6b1e4084f6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
}
+static void
+processShutdownCompletedEvent(virQEMUDriver *driver,
+ virDomainObj *vm)
+{
+ if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+ return;
+
+ if (virDomainObjIsActive(vm))
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_UNKNOWN,
+ VIR_ASYNC_JOB_NONE, 0);
+
+ virDomainObjEndJob(vm);
+}
+
+
static void qemuProcessEventHandler(void *data, void *opaque)
{
struct qemuProcessEvent *processEvent = data;
@@ -4101,6 +4116,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
processNbdkitExitedEvent(vm, processEvent->data);
break;
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
+ processShutdownCompletedEvent(driver, vm);
+ break;
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..c6f7d34168 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,9 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#if WITH_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
@@ -8387,9 +8390,114 @@ qemuProcessCreatePretendCmdBuild(virDomainObj *vm,
}
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+typedef struct {
+ virDomainObj *vm;
+ int pidfd;
+} qemuProcessInShutdownEventData;
+
+
+static qemuProcessInShutdownEventData*
+qemuProcessInShutdownEventDataNew(virDomainObj *vm, int pidfd)
+{
+ qemuProcessInShutdownEventData *d = g_new(qemuProcessInShutdownEventData, 1);
+ d->vm = virObjectRef(vm);
+ d->pidfd = pidfd;
+ return d;
+}
+
+
+static void
+qemuProcessInShutdownEventDataFree(qemuProcessInShutdownEventData *d)
+{
+ virObjectUnref(d->vm);
+ VIR_FORCE_CLOSE(d->pidfd);
+ g_free(d);
+}
+
+
+static void
+qemuProcessInShutdownPidfdCb(int watch,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuProcessInShutdownEventData *data = opaque;
+ virDomainObj *vm = data->vm;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld fd=%d",
+ vm, vm->def->name, (long long)vm->pid, fd);
+
+ virEventRemoveHandle(watch);
+
+ virObjectLock(vm);
+
+ VIR_INFO("QEMU process %lld finally completed termination",
+ (long long)vm->pid);
+
+ QEMU_DOMAIN_PRIVATE(vm)->pidMonitored = -1;
+ qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
+ 0, 0, NULL);
+
+ virObjectUnlock(vm);
+}
+#endif /* WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open) */
+
+
+static int
+qemuProcessInShutdownStartMonitor(virDomainObj *vm)
+{
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+ qemuDomainObjPrivate *priv = vm->privateData;
+ qemuProcessInShutdownEventData *data;
+ int pidfd;
+ int ret = -1;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld pidMonitored=%d",
+ vm, vm->def->name, (long long)vm->pid,
+ priv->pidMonitored);
+
+ if (priv->pidMonitored >= 0) {
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i already set up",
vm->pid);
+ goto cleanup;
+ }
+
+ pidfd = syscall(SYS_pidfd_open, vm->pid, 0);
+ if (pidfd < 0) {
+ if (errno == ESRCH) /* process has already terminated */
+ ret = 1;
+ goto cleanup;
+ }
+
+ data = qemuProcessInShutdownEventDataNew(vm, pidfd);
+ if ((priv->pidMonitored = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuProcessInShutdownPidfdCb,
+ data,
+
(virFreeCallback)qemuProcessInShutdownEventDataFree)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to monitor qemu in-shutdown process %1$i"),
+ vm->pid);
+ qemuProcessInShutdownEventDataFree(data);
+ goto cleanup;
+ }
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i for termination",
vm->pid);
+ ret = 0;
+
+ cleanup:
+ return ret;
+#else /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+ VIR_DEBUG("Monitoring qemu process %i not implemented", vm->pid);
+ return -1;
+#endif /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+}
+
+
int
qemuProcessKill(virDomainObj *vm, unsigned int flags)
{
+ int ret = -1;
+
VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
vm, vm->def->name,
(long long)vm->pid, flags);
@@ -8410,10 +8518,16 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags)
/* Request an extra delay of two seconds per current nhostdevs
* to be safe against stalls by the kernel freeing up the resources */
- return virProcessKillPainfullyDelay(vm->pid,
- !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
- vm->def->nhostdevs * 2,
- false);
+ ret = virProcessKillPainfullyDelay(vm->pid,
+ !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
+ vm->def->nhostdevs * 2,
+ false);
+
+ if (ret < 0 && (flags & VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR))
+ if (qemuProcessInShutdownStartMonitor(vm) == 1)
+ ret = 0; /* process termination detected */
+
+ return ret;
}
@@ -8438,7 +8552,7 @@ qemuProcessBeginStopJob(virDomainObj *vm,
* cleared inside qemuProcessStop */
priv->beingDestroyed = true;
- if (qemuProcessKill(vm, killFlags) < 0)
+ if (qemuProcessKill(vm, killFlags|VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR) < 0)
goto error;
/* Wake up anything waiting on domain condition */
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index cb67bfcd2d..2324aeb7bd 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -180,6 +180,7 @@ typedef enum {
VIR_QEMU_PROCESS_KILL_FORCE = 1 << 0,
VIR_QEMU_PROCESS_KILL_NOWAIT = 1 << 1,
VIR_QEMU_PROCESS_KILL_NOCHECK = 1 << 2, /* bypass the running vm check */
+ VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR = 1 << 3, /* on error enable process
monitor */
} virQemuProcessKillMode;
int qemuProcessKill(virDomainObj *vm, unsigned int flags);
--
2.45.0