On Mon, Aug 28, 2023 at 16:44:59 -0500, Jonathon Jongsma wrote:
Adds the ability to monitor the nbdkit process so that we can take
action in case the child exits unexpectedly.
When the nbdkit process exits, we pause the vm, restart nbdkit, and then
resume the vm. This allows the vm to continue working in the event of a
nbdkit failure.
Eventually we may want to generalize this functionality since we may
need something similar for e.g. qemu-storage-daemon, etc.
The process is monitored with the pidfd_open() syscall if it exists
(since linux 5.3). Otherwise it resorts to checking whether the process
is alive once a second. The one-second time period was chosen somewhat
arbitrarily.
Signed-off-by: Jonathon Jongsma <jjongsma(a)redhat.com>
---
meson.build | 7 ++
src/qemu/qemu_domain.c | 1 +
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_driver.c | 18 ++++++
src/qemu/qemu_nbdkit.c | 137 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_nbdkit.h | 8 ++-
src/qemu/qemu_process.c | 13 +++-
src/qemu/qemu_process.h | 3 +
8 files changed, 180 insertions(+), 8 deletions(-)
[...]
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ad8428948b..46e089fe0f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4040,6 +4040,21 @@ processResetEvent(virQEMUDriver *driver,
}
+
+
Too much whitespace before function.
+static void
+processNbdkitExitedEvent(virDomainObj *vm,
+ qemuNbdkitProcess *nbdkit)
+{
+ if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+ return;
+
+ qemuNbdkitProcessRestart(nbdkit, vm);
+
+ virDomainObjEndJob(vm);
+}
+
+
static void qemuProcessEventHandler(void *data, void *opaque)
{
struct qemuProcessEvent *processEvent = data;
[...]
diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
index df638e99c0..c3fa349922 100644
--- a/src/qemu/qemu_nbdkit.c
+++ b/src/qemu/qemu_nbdkit.c
[...]
@@ -611,6 +613,106 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
}
+void
+qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+ qemuDomainObjPrivate *vmpriv = vm->privateData;
+ virQEMUDriver *driver = vmpriv->driver;
+
+ /* clean up resources associated with process */
+ qemuNbdkitProcessStop(proc);
+
+ if (qemuNbdkitProcessStart(proc, vm, driver) < 0)
+ VIR_WARN("Unable to restart nbkdit process");
As noted before VIR_WARN is not acceptable. Move patch 25 before this
one and do the taint right away. And ditch the VIR_WARN.
+}
+
+
+typedef struct {
+ qemuNbdkitProcess *proc;
+ virDomainObj *vm;
+} qemuNbdkitProcessEventData;
+
+
+static qemuNbdkitProcessEventData*
+qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+ qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1);
+ d->proc = proc;
+ d->vm = virObjectRef(vm);
+ return d;
+}
+
+
+static void
+qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d)
+{
+ virObjectUnref(d->vm);
+ g_free(d);
+}
+
+
+#if WITH_DECL_SYS_PIDFD_OPEN
+static void
+qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuNbdkitProcessEventData *d = opaque;
+
+ VIR_FORCE_CLOSE(fd);
+ VIR_DEBUG("nbdkit process %i died", d->proc->pid);
I presume that the 'qemuNbdkitProcess' struct is protected by belonging
into the virDomainObj and thus using it's lock. In such chase this
violates locking as 'vm' is not locked at this point, but 'proc' is
accessed.
+ /* submit an event so that it is handled in the per-vm event
thread */
+ qemuProcessHandleNbdkitExit(d->proc, d->vm);
+}
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+
+
+static int
+qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+#if WITH_DECL_SYS_PIDFD_OPEN
+ int pidfd;
+
+ pidfd = syscall(SYS_pidfd_open, proc->pid, 0);
+ if (pidfd < 0) {
+ virReportSystemError(errno, _("pidfd_open failed for %1$i"),
proc->pid);
+ return -1;
+ }
+
+ proc->eventwatch = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuNbdkitProcessPidfdCb,
+ qemuNbdkitProcessEventDataNew(proc, vm),
+
(virFreeCallback)qemuNbdkitProcessEventDataFree);
'virEventAddHandle' can fail and return -1, if the corresponding
callback is not registered. All other callers check the return value and
also prevent the memory leak from the opaque data blob passed to it in
such case.
+
+ VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid);
[...]
static void
-qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
+qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
+ virDomainObj *vm)
{
qemuDomainStorageSourcePrivate *srcpriv =
QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source);
+ qemuDomainObjPrivate *vmpriv = vm->privateData;
qemuNbdkitProcess *proc;
if (!srcpriv)
@@ -671,6 +775,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
if (!proc)
return;
+ if (!proc->caps)
+ proc->caps = qemuGetNbdkitCaps(vmpriv->driver);
+
if (proc->pid <= 0) {
if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) {
VIR_WARN("Unable to read pidfile '%s'",
proc->pidfile);
@@ -678,8 +785,14 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
}
}
- if (virProcessKill(proc->pid, 0) < 0)
+ if (virProcessKill(proc->pid, 0) < 0) {
VIR_WARN("nbdkit process %i is not alive", proc->pid);
Same comment about VIR_WARN not being correct here.
+ qemuNbdkitProcessRestart(proc, vm);
+ return;
+ }
+
+ if (qemuNbdkitProcessStartMonitor(proc, vm) < 0)
+ VIR_WARN("unable monitor nbdkit process");
aand same here.
Both need to either taint the VM or be demoted to DEBUG.
}
/**
[...]
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index d90990d8a5..1020c1a161 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
[...]
return 0;
}
+
+
+void qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
+ virDomainObj *vm)
Wrong header formatting.
+{
+ virObjectLock(vm);
+ qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
+ virObjectUnlock(vm);
+}
Reviewed-by: Peter Krempa <pkrempa(a)redhat.com>