On Thu, Jul 20, 2023 at 17:19:52 -0500, Jonathon Jongsma wrote:
Adds the ability to monitor the nbdkit process so that we can take
action in case the child exits unexpectedly.
When the nbdkit process exits, we pause the vm, restart nbdkit, and then
resume the vm. This allows the vm to continue working in the event of a
nbdkit failure.
Eventually we may want to generalize this functionality since we may
need something similar for e.g. qemu-storage-daemon, etc.
The process is monitored with the pidfd_open() syscall if it exists
(since linux 5.3). Otherwise it resorts to checking whether the process
is alive once a second. The one-second time period was chosen somewhat
arbitrarily.
Signed-off-by: Jonathon Jongsma <jjongsma(a)redhat.com>
---
meson.build | 7 +++
src/qemu/qemu_nbdkit.c | 136 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_nbdkit.h | 4 +-
src/qemu/qemu_process.c | 4 +-
4 files changed, 143 insertions(+), 8 deletions(-)
diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
index c3b43ff3c0..1199acd501 100644
--- a/src/qemu/qemu_nbdkit.c
+++ b/src/qemu/qemu_nbdkit.c
[...]
@@ -613,6 +616,104 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
}
+static void
+qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+ qemuDomainObjPrivate *vmpriv = vm->privateData;
+ virQEMUDriver *driver = vmpriv->driver;
+
+ /* clean up resources associated with process */
+ qemuNbdkitProcessStop(proc);
+
+ if (qemuNbdkitProcessStart(proc, vm, driver) < 0)
+ VIR_WARN("Unable to restart nbkdit process");
It'd be great to get this message to the user in other way than the
logs.
The 'virDomainGetMessages' API would be a great fit IMO, but at this
point it doesn't have the possibility to do driver specific messages.
In this instance we could taint the VM, as taints are propagated
via the virDomainGetMessages API, when nbdkit fails to start,
which'd be then propagated to the user
+}
+
+
+typedef struct {
+ qemuNbdkitProcess *proc;
+ virDomainObj *vm;
+} qemuNbdkitProcessEventData;
+
+
+static qemuNbdkitProcessEventData*
+qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+ qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1);
+ d->proc = proc;
+ d->vm = virObjectRef(vm);
+ return d;
+}
+
+
+static void
+qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d)
+{
+ virObjectUnref(d->vm);
+ g_free(d);
+}
+
+
+#if WITH_DECL_SYS_PIDFD_OPEN
+static void
+qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuNbdkitProcessEventData *d = opaque;
+
+ VIR_FORCE_CLOSE(fd);
+ VIR_DEBUG("nbdkit process %i died", d->proc->pid);
+ qemuNbdkitProcessRestart(d->proc, d->vm);
This seems to access the 'vm' object unlocked. Since this callback seems
to be triggerrable at an arbitrary point, access to 'vm' locked by
another thread is possible.
Consider defering the reaction to this event to the per-VM event
handling thread so that the startup of the VM and possible locking of
the 'vm' object is not done in the event loop.
+}
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+
+
+static int
+qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+#if WITH_DECL_SYS_PIDFD_OPEN
+ int pidfd;
+
+ pidfd = syscall(SYS_pidfd_open, proc->pid, 0);
+ if (pidfd < 0) {
+ virReportSystemError(errno, _("pidfd_open failed for %1$i"),
proc->pid);
+ return -1;
+ }
+
+ proc->eventwatch = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuNbdkitProcessPidfdCb,
+ qemuNbdkitProcessEventDataNew(proc, vm),
+
(virFreeCallback)qemuNbdkitProcessEventDataFree);
+
+ VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid);
+
+ return 0;
+#else
+ virReportError(VIR_ERR_NO_SUPPORT, "%s",
+ _("pidfd_open system call required for nbdkit support"));
+ return -1;
This seems to be unreachable. Please add a comment.
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+}
+
+
+static void
+qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc)
+{
+#if WITH_DECL_SYS_PIDFD_OPEN
+ if (proc->eventwatch > 0) {
+ virEventRemoveHandle(proc->eventwatch);
+ proc->eventwatch = 0;
+ }
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+}
+
+
static qemuNbdkitProcess *
qemuNbdkitProcessNew(virStorageSource *source,
const char *pidfile,
@@ -660,9 +761,11 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source,
static void
-qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
+qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
+ virDomainObj *vm)
{
qemuDomainStorageSourcePrivate *srcpriv =
QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source);
+ qemuDomainObjPrivate *vmpriv = vm->privateData;
qemuNbdkitProcess *proc;
if (!srcpriv)
@@ -673,6 +776,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
if (!proc)
return;
+ if (!proc->caps)
+ proc->caps = qemuGetNbdkitCaps(vmpriv->driver);
+
if (proc->pid <= 0) {
if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) {
VIR_WARN("Unable to read pidfile '%s'",
proc->pidfile);
@@ -680,8 +786,14 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
}
}
- if (virProcessKill(proc->pid, 0) < 0)
+ if (virProcessKill(proc->pid, 0) < 0) {
VIR_WARN("nbdkit process %i is not alive", proc->pid);
+ qemuNbdkitProcessRestart(proc, vm);
+ return;
+ }
+
+ if (qemuNbdkitProcessStartMonitor(proc, vm) < 0)
+ VIR_WARN("unable monitor nbdkit process");
}
/**
@@ -694,11 +806,12 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
* disk and is attempting to re-connect to active domains.
*/
void
-qemuNbdkitStorageSourceManageProcess(virStorageSource *source)
+qemuNbdkitStorageSourceManageProcess(virStorageSource *source,
+ virDomainObj *vm)
{
virStorageSource *backing;
for (backing = source; backing != NULL; backing = backing->backingStore)
- qemuNbdkitStorageSourceManageProcessOne(backing);
+ qemuNbdkitStorageSourceManageProcessOne(backing, vm);
}
@@ -710,6 +823,7 @@ qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps,
uid_t user,
gid_t group)
{
+#if WITH_DECL_SYS_PIDFD_OPEN
qemuDomainStorageSourcePrivate *srcPriv =
qemuDomainStorageSourcePrivateFetch(source);
g_autofree char *pidname = g_strdup_printf("nbdkit-%s.pid", alias);
g_autofree char *socketname = g_strdup_printf("nbdkit-%s.socket", alias);
@@ -753,6 +867,11 @@ qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps,
srcPriv->nbdkitProcess = proc;
return true;
+#else
+ /* we need pidfd_open in order to monitor the process, so don't construct
+ * the object in this case so we'll fall back to qemu storage drivers */
+ return false;
Please reverse the order of the blocks so it's more clear to the readers
that all of the function can be skipped.
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
}
If you defer the handling of the nbdkit restart to the per-vm thread:
Reviewed-by: Peter Krempa <pkrempa(a)redhat.com>