Adds the ability to monitor the nbdkit process so that we can take
action in case the child exits unexpectedly.
When the nbdkit process exits, we pause the vm, restart nbdkit, and then
resume the vm. This allows the vm to continue working in the event of a
nbdkit failure.
Eventually we may want to generalize this functionality since we may
need something similar for e.g. qemu-storage-daemon, etc.
The process is monitored with the pidfd_open() syscall if it exists
(since linux 5.3). Otherwise it resorts to checking whether the process
is alive once a second. The one-second time period was chosen somewhat
arbitrarily.
Signed-off-by: Jonathon Jongsma <jjongsma(a)redhat.com>
---
meson.build | 7 ++
src/qemu/qemu_nbdkit.c | 166 +++++++++++++++++++++++++++++++++++++++-
src/qemu/qemu_nbdkit.h | 7 +-
src/qemu/qemu_process.c | 4 +-
4 files changed, 177 insertions(+), 7 deletions(-)
diff --git a/meson.build b/meson.build
index e498b49be4..048b15ff71 100644
--- a/meson.build
+++ b/meson.build
@@ -645,6 +645,13 @@ symbols = [
[ 'sched.h', 'cpu_set_t' ],
]
+if host_machine.system() == 'linux'
+ symbols += [
+ # process management
+ [ 'sys/syscall.h', 'SYS_pidfd_open' ],
+ ]
+endif
+
foreach symbol : symbols
if cc.has_header_symbol(symbol[0], symbol[1], args: '-D_GNU_SOURCE', prefix:
symbol.get(2, ''))
conf.set('WITH_DECL_@0(a)'.format(symbol[1].to_upper()), 1)
diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
index 5848710dc2..934970e68c 100644
--- a/src/qemu/qemu_nbdkit.c
+++ b/src/qemu/qemu_nbdkit.c
@@ -19,9 +19,11 @@
#include <config.h>
#include <glib.h>
+#include <sys/syscall.h>
#include "vircommand.h"
#include "virerror.h"
+#include "virevent.h"
#include "virlog.h"
#include "virpidfile.h"
#include "virtime.h"
@@ -34,6 +36,7 @@
#include "qemu_nbdkit.h"
#define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW
#include "qemu_nbdkitpriv.h"
+#include "qemu_process.h"
#include "qemu_security.h"
#include <fcntl.h>
@@ -69,6 +72,12 @@ struct _qemuNbdkitCaps {
G_DEFINE_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, G_TYPE_OBJECT);
+struct _qemuNbdkitProcessPrivate {
+ int pidfdwatch;
+ virDomainObj *vm;
+};
+
+
enum {
PIPE_FD_READ = 0,
PIPE_FD_WRITE = 1
@@ -618,6 +627,137 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
}
+static int
+qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc,
+ virDomainObj *vm);
+
+
+static void
+qemuNbdkitProcessHandleExit(qemuNbdkitProcess *proc)
+{
+ qemuNbdkitProcessPrivate *priv = proc->priv;
+ qemuDomainObjPrivate *vmpriv = priv->vm->privateData;
+ virQEMUDriver *driver = vmpriv->driver;
+
+ VIR_DEBUG("nbdkit process %i died", proc->pid);
+
+ /* clean up resources associated with process */
+ qemuNbdkitProcessStop(proc);
+
+ if (!priv->vm) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot restart nbdkit process without an associated
domain"));
+ return;
+ }
+
+ if (qemuNbdkitProcessStart(proc, priv->vm, driver) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Unable to restart nbkdit process"));
+ return;
+ }
+
+ qemuNbdkitProcessStartMonitor(proc, NULL);
+}
+
+
+#if WITH_DECL_SYS_PIDFD_OPEN
+static void
+qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuNbdkitProcess *proc = opaque;
+
+ VIR_FORCE_CLOSE(fd);
+ qemuNbdkitProcessHandleExit(proc);
+}
+#else
+static void
+qemuNbdkitProcessTimeoutCb(int timer G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuNbdkitProcess *proc = opaque;
+
+ if (virProcessKill(proc->pid, 0) < 0)
+ qemuNbdkitProcessHandleExit(proc);
+}
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+
+
+static int
+qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc,
+ virDomainObj *vm)
+{
+ qemuNbdkitProcessPrivate *priv = proc->priv;
+#if WITH_DECL_SYS_PIDFD_OPEN
+ int pidfd;
+#endif
+
+ if (vm) {
+ virObjectRef(vm);
+
+ if (priv->vm)
+ virObjectUnref(priv->vm);
+
+ priv->vm = vm;
+ }
+
+ if (!priv->vm) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot monitor nbdkit process without an associated
domain"));
+ return -1;
+ }
+
+#if WITH_DECL_SYS_PIDFD_OPEN
+ pidfd = syscall(SYS_pidfd_open, proc->pid, 0);
+ if (pidfd < 0)
+ return -1;
+
+ priv->pidfdwatch = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuNbdkitProcessPidfdCb,
+ proc, NULL);
+#else
+ /* fall back to checking once a second */
+ priv->pidfdwatch = virEventAddTimeout(1000,
+ qemuNbdkitProcessTimeoutCb,
+ proc, NULL);
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+
+ if (priv->pidfdwatch < 0)
+ return -1;
+
+ VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid);
+
+ return 0;
+}
+
+
+static void
+qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc)
+{
+ qemuNbdkitProcessPrivate *priv = proc->priv;
+
+ if (priv->pidfdwatch > 0) {
+#if WITH_DECL_SYS_PIDFD_OPEN
+ virEventRemoveHandle(priv->pidfdwatch);
+#else
+ virEventRemoveTimeout(priv->pidfdwatch);
+#endif /* WITH_DECL_SYS_PIDFD_OPEN */
+ priv->pidfdwatch = 0;
+ }
+}
+
+
+static void
+qemuNbdkitProcessPrivateFree(qemuNbdkitProcessPrivate *priv)
+{
+ virObjectUnref(priv->vm);
+ g_free(priv);
+}
+
+
static qemuNbdkitProcess *
qemuNbdkitProcessNew(virStorageSource *source,
const char *pidfile,
@@ -631,6 +771,7 @@ qemuNbdkitProcessNew(virStorageSource *source,
nbdkit->pid = -1;
nbdkit->pidfile = g_strdup(pidfile);
nbdkit->socketfile = g_strdup(socketfile);
+ nbdkit->priv = g_new0(qemuNbdkitProcessPrivate, 1);
return nbdkit;
}
@@ -665,9 +806,11 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source,
static int
-qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
+qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
+ virDomainObj *vm)
{
qemuDomainStorageSourcePrivate *srcpriv =
QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source);
+ qemuDomainObjPrivate *vmpriv = vm->privateData;
qemuNbdkitProcess *proc;
if (!srcpriv)
@@ -676,6 +819,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
proc = srcpriv->nbdkitProcess;
if (proc) {
+ if (!proc->caps)
+ proc->caps = qemuGetNbdkitCaps(vmpriv->driver);
+
if (proc->pid <= 0) {
if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0)
return -1;
@@ -686,6 +832,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
_("nbdkit process %i is not alive"), proc->pid);
return -1;
}
+
+ if (qemuNbdkitProcessStartMonitor(proc, vm) < 0)
+ return -1;
}
return 0;
@@ -701,15 +850,16 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
* disk and is attempting to re-connect to active domains.
*/
int
-qemuNbdkitStorageSourceManageProcess(virStorageSource *source)
+qemuNbdkitStorageSourceManageProcess(virStorageSource *source,
+ virDomainObj *vm)
{
virStorageSource *backing;
for (backing = source->backingStore; backing != NULL; backing =
backing->backingStore) {
- if (qemuNbdkitStorageSourceManageProcessOne(backing) < 0)
+ if (qemuNbdkitStorageSourceManageProcessOne(backing, vm) < 0)
return -1;
}
- return qemuNbdkitStorageSourceManageProcessOne(source);
+ return qemuNbdkitStorageSourceManageProcessOne(source, vm);
}
@@ -1005,9 +1155,12 @@ qemuNbdkitProcessBuildCommand(qemuNbdkitProcess *proc)
void
qemuNbdkitProcessFree(qemuNbdkitProcess *proc)
{
+ qemuNbdkitProcessStopMonitor(proc);
+
g_clear_pointer(&proc->pidfile, g_free);
g_clear_pointer(&proc->socketfile, g_free);
g_clear_object(&proc->caps);
+ g_clear_pointer(&proc->priv, qemuNbdkitProcessPrivateFree);
g_free(proc);
}
@@ -1087,6 +1240,9 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
goto error;
}
+ if (qemuNbdkitProcessStartMonitor(proc, vm) < 0)
+ goto error;
+
return 0;
error:
@@ -1107,6 +1263,8 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
int
qemuNbdkitProcessStop(qemuNbdkitProcess *proc)
{
+ qemuNbdkitProcessStopMonitor(proc);
+
if (proc->pid < 0)
return 0;
diff --git a/src/qemu/qemu_nbdkit.h b/src/qemu/qemu_nbdkit.h
index 8191ace522..df45f409c0 100644
--- a/src/qemu/qemu_nbdkit.h
+++ b/src/qemu/qemu_nbdkit.h
@@ -69,7 +69,8 @@ void
qemuNbdkitStopStorageSource(virStorageSource *src);
int
-qemuNbdkitStorageSourceManageProcess(virStorageSource *src);
+qemuNbdkitStorageSourceManageProcess(virStorageSource *src,
+ virDomainObj *vm);
bool
qemuNbdkitCapsGet(qemuNbdkitCaps *nbdkitCaps,
@@ -82,6 +83,8 @@ qemuNbdkitCapsSet(qemuNbdkitCaps *nbdkitCaps,
#define QEMU_TYPE_NBDKIT_CAPS qemu_nbdkit_caps_get_type()
G_DECLARE_FINAL_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, QEMU, NBDKIT_CAPS, GObject);
+typedef struct _qemuNbdkitProcessPrivate qemuNbdkitProcessPrivate;
+
struct _qemuNbdkitProcess {
qemuNbdkitCaps *caps;
virStorageSource *source;
@@ -91,6 +94,8 @@ struct _qemuNbdkitProcess {
uid_t user;
gid_t group;
pid_t pid;
+
+ qemuNbdkitProcessPrivate *priv;
};
int
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 7ec31ef6ac..54fd44fb40 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -9049,12 +9049,12 @@ qemuProcessReconnect(void *opaque)
for (i = 0; i < obj->def->ndisks; i++) {
virDomainDiskDef *disk = obj->def->disks[i];
- if (qemuNbdkitStorageSourceManageProcess(disk->src) < 0)
+ if (qemuNbdkitStorageSourceManageProcess(disk->src, obj) < 0)
goto error;
}
if (obj->def->os.loader && obj->def->os.loader->nvram) {
- if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram)
< 0)
+ if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram,
obj) < 0)
goto error;
}
--
2.39.0