On Thu, Dec 24, 2020 at 08:14:35AM -0600, Jonathon Jongsma wrote:
We need to periodically query mdevctl for changes to device
definitions
since an administrator can define new devices with mdevctl outside of
libvirt.
In the future, mdevctl may add a way to signal device add/remove via
events, but for now we resort to a bit of a workaround: monitoring the
mdevctl config directories for changes to files. When a change is
detected, we query mdevctl and update our device list. The mdevctl
querying is handled by the existing udev thread.
Signed-off-by: Jonathon Jongsma <jjongsma(a)redhat.com>
---
src/node_device/node_device_udev.c | 204 ++++++++++++++++++++++++-----
1 file changed, 171 insertions(+), 33 deletions(-)
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index d7f7ab4370..5729fea264 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -41,6 +41,7 @@
#include "virnetdev.h"
#include "virmdev.h"
#include "virutil.h"
+#include <gio/gio.h>
System header includes should come before local header includes.
#include "configmake.h"
@@ -66,6 +67,10 @@ struct _udevEventData {
virCond threadCond;
bool threadQuit;
bool udevReady;
+ bool mdevReady;
+
+ GList *mdevctlMonitors;
+ int mdevctlTimeout;
};
static virClassPtr udevEventDataClass;
@@ -85,6 +90,7 @@ udevEventDataDispose(void *obj)
udev = udev_monitor_get_udev(priv->udev_monitor);
udev_monitor_unref(priv->udev_monitor);
udev_unref(udev);
+ g_list_free_full(priv->mdevctlMonitors, g_object_unref);
virCondDestroy(&priv->threadCond);
}
@@ -1821,7 +1827,7 @@ udevEventHandleThread(void *opaque G_GNUC_UNUSED)
/* continue rather than break from the loop on non-fatal errors */
while (1) {
virObjectLock(priv);
- while (!priv->udevReady && !priv->threadQuit) {
+ while (!priv->udevReady && !priv->mdevReady &&
!priv->threadQuit) {
if (virCondWait(&priv->threadCond, &priv->parent.lock)) {
virReportSystemError(errno, "%s",
_("handler failed to wait on
condition"));
@@ -1835,46 +1841,58 @@ udevEventHandleThread(void *opaque G_GNUC_UNUSED)
return;
}
- errno = 0;
- device = udev_monitor_receive_device(priv->udev_monitor);
- virObjectUnlock(priv);
+ if (priv->udevReady) {
+ errno = 0;
+ device = udev_monitor_receive_device(priv->udev_monitor);
+ virObjectUnlock(priv);
- if (!device) {
- if (errno == 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("failed to receive device from udev
monitor"));
- return;
- }
+ if (!device) {
+ if (errno == 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to receive device from udev
monitor"));
+ return;
+ }
+
+ /* POSIX allows both EAGAIN and EWOULDBLOCK to be used
+ * interchangeably when the read would block or timeout was fired
+ */
+ VIR_WARNINGS_NO_WLOGICALOP_EQUAL_EXPR
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ VIR_WARNINGS_RESET
+ virReportSystemError(errno, "%s",
+ _("failed to receive device from udev
"
+ "monitor"));
+ return;
+ }
+
+ /* Trying to move the reset of the @priv->udevReady flag to
+ * after the udev_monitor_receive_device wouldn't help much
+ * due to event mgmt and scheduler timing. */
+ virObjectLock(priv);
+ priv->udevReady = false;
+ virObjectUnlock(priv);
- /* POSIX allows both EAGAIN and EWOULDBLOCK to be used
- * interchangeably when the read would block or timeout was fired
- */
- VIR_WARNINGS_NO_WLOGICALOP_EQUAL_EXPR
- if (errno != EAGAIN && errno != EWOULDBLOCK) {
- VIR_WARNINGS_RESET
- virReportSystemError(errno, "%s",
- _("failed to receive device from udev "
- "monitor"));
- return;
+ continue;
}
- /* Trying to move the reset of the @priv->udevReady flag to
- * after the udev_monitor_receive_device wouldn't help much
- * due to event mgmt and scheduler timing. */
- virObjectLock(priv);
- priv->udevReady = false;
- virObjectUnlock(priv);
+ udevHandleOneDevice(device);
+ udev_device_unref(device);
- continue;
+ /* Instead of waiting for the next event after processing @device
+ * data, let's keep reading from the udev monitor and only wait
+ * for the next event once either a EAGAIN or a EWOULDBLOCK error
+ * is encountered. */
}
Hmm, we'll lose some responsiveness if we do it within a single thread,
on systems which regularly spawns and destroys virtual functions and mdevs it
may take some time until we actually get to processing mdevctl stuff, but it
may not be that significant, although it doesn't feel quite right to me to do
it in a single thread dedicated to udev. The event ordering discussed in v2
should be a matter of a single shared lock right? The first thing the
throw-away thread does is to acquire a lock to minimize the window for a
context switch and then process the event.
- udevHandleOneDevice(device);
- udev_device_unref(device);
+ if (priv->mdevReady) {
+ virObjectUnlock(priv);
+ if (nodeDeviceUpdateMediatedDevices() < 0)
+ VIR_WARN("mdevctl failed to updated mediated devices");
- /* Instead of waiting for the next event after processing @device
- * data, let's keep reading from the udev monitor and only wait
- * for the next event once either a EAGAIN or a EWOULDBLOCK error
- * is encountered. */
+ virObjectLock(priv);
+ priv->mdevReady = false;
+ virObjectUnlock(priv);
+ }
}
}
@@ -1899,6 +1917,117 @@ udevEventHandleCallback(int watch G_GNUC_UNUSED,
}
+static void
+scheduleMdevctlHandler(int timer G_GNUC_UNUSED, void *opaque)
+{
+ udevEventDataPtr priv = opaque;
+
+ if (priv->mdevctlTimeout > 0) {
+ virEventRemoveTimeout(priv->mdevctlTimeout);
+ priv->mdevctlTimeout = -1;
+ }
+
+ virObjectLock(priv);
+ priv->mdevReady = true;
+ virCondSignal(&priv->threadCond);
+ virObjectUnlock(priv);
+}
+
+
+static void
+mdevctlEventHandleCallback(GFileMonitor *monitor,
+ GFile *file,
+ GFile *other_file,
+ GFileMonitorEvent event_type,
+ gpointer user_data);
+
+
+/* Recursively monitors a directory and its subdirectories for file changes and
+ * returns a GList of GFileMonitor objects */
+static GList*
+monitorFileRecursively(udevEventDataPtr udev,
+ GFile *file)
+{
+ GList *monitors = NULL;
+ g_autoptr(GError) error = NULL;
+ g_autoptr(GFileEnumerator) children = NULL;
+ GFileMonitor *mon;
+
+ if (!(children = g_file_enumerate_children(file, "standard::*",
+ G_FILE_QUERY_INFO_NONE, NULL,
&error))) {
+ if (error->code == G_IO_ERROR_NOT_DIRECTORY)
We're not reporting an error in this code path, I think we should not leave
it like that and report errors on all error code paths.
+ return NULL;
+ goto error;
+ }
+
+ if (!(mon = g_file_monitor(file, G_FILE_MONITOR_NONE, NULL, &error)))
+ goto error;
+
+ g_signal_connect(mon, "changed",
+ G_CALLBACK(mdevctlEventHandleCallback), udev);
+ monitors = g_list_append(monitors, mon);
+
+ while (true) {
+ GFileInfo *info;
+ GFile *child;
+ GList *child_monitors = NULL;
+
+ if (!g_file_enumerator_iterate(children, &info, &child, NULL,
&error))
+ goto error;
+ if (!info)
+ break;
+
+ child_monitors = monitorFileRecursively(udev, child);
+ if (child_monitors)
+ monitors = g_list_concat(monitors, child_monitors);
+ }
+
+ return monitors;
+
+ error:
+ g_list_free_full(monitors, g_object_unref);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unable to monitor directory: %s"), error->message);
+ return NULL;
+}
+
+
+static void
+mdevctlEventHandleCallback(GFileMonitor *monitor G_GNUC_UNUSED,
+ GFile *file,
+ GFile *other_file G_GNUC_UNUSED,
+ GFileMonitorEvent event_type,
+ gpointer user_data)
+{
+ udevEventDataPtr priv = user_data;
+
+ /* if a new directory appears, monitor that directory for changes */
+ if (event_type == G_FILE_MONITOR_EVENT_CREATED &&
+ g_file_query_file_type(file, G_FILE_QUERY_INFO_NONE, NULL) ==
+ G_FILE_TYPE_DIRECTORY) {
+ GList *newmonitors = monitorFileRecursively(priv, file);
+ priv->mdevctlMonitors = g_list_concat(priv->mdevctlMonitors,
newmonitors);
+ }
+
+ /* When mdevctl creates a device, it can result in multiple notify events
+ * emitted for a single logical change (e.g. several CHANGED events, or a
+ * CREATED and CHANGED event followed by CHANGES_DONE_HINT). To avoid
+ * spawning a mdevctl thread multiple times for a single logical
+ * configuration change, try to coalesce these changes by waiting for the
+ * CHANGES_DONE_HINT event. As a fallback, add a timeout to trigger the
+ * signal if that event never comes */
If the CHANGES_DONE_HINT event doesn't come, then the ordering is still broken
even if we have a timer in place, isn't it? I'm wondering whether it actually
happened to you that it didn't arrive. I mean, we should be able to rely on an
event to be delivered properly, it's not like we're trying to wait for an event
that kernel will never send when the whole filesystem tree is created for an
mdev that was just created.
Feel free to point out facts I'm not seeing on why we can't adopt the
throw-away threads for the mdevctl events with a single shared lock.
Regards,
Erik
+ if (event_type != G_FILE_MONITOR_EVENT_CHANGES_DONE_HINT) {
+ if (priv->mdevctlTimeout > 0)
+ virEventRemoveTimeout(priv->mdevctlTimeout);
+ priv->mdevctlTimeout = virEventAddTimeout(100, scheduleMdevctlHandler,
+ priv, NULL);
+ return;
+ }
+
+ scheduleMdevctlHandler(-1, priv);
+}
+
+
/* DMI is intel-compatible specific */
#if defined(__x86_64__) || defined(__i386__) || defined(__amd64__)
static void
@@ -2052,6 +2181,7 @@ nodeStateInitialize(bool privileged,
udevEventDataPtr priv = NULL;
struct udev *udev = NULL;
virThread enumThread;
+ g_autoptr(GFile) mdevctlConfigDir =
g_file_new_for_path("/etc/mdevctl.d");
if (root != NULL) {
virReportError(VIR_ERR_INVALID_ARG, "%s",
@@ -2153,6 +2283,14 @@ nodeStateInitialize(bool privileged,
if (priv->watch == -1)
goto unlock;
+ /* mdevctl may add notification events in the future:
+ *
https://github.com/mdevctl/mdevctl/issues/27. For now, fall back to
+ * monitoring the mdevctl configuration directory for changes.
+ * mdevctl configuration is stored in a directory tree within
+ * /etc/mdevctl.d/. There is a directory for each parent device, which
+ * contains a file defining each mediated device */
+ priv->mdevctlMonitors = monitorFileRecursively(priv, mdevctlConfigDir);
+
virObjectUnlock(priv);
/* Create a fictional 'computer' device to root the device tree. */
--
2.26.2