On Mon, Nov 06, 2023 at 02:39:00 -0500, Laine Stump wrote:
Rather than always binding to the vfio-pci driver, use the new
function virPCIDeviceFindBestVFIOVariant() to see if the running
kernel has a VFIO variant driver available that is a better match for
the device, and if one is found, use that instead.
virPCIDeviceFindBestVFIOVariant() function reads the modalias file for
the given device from sysfs, then looks through
/lib/modules/${kernel_release}/modules.alias for the vfio_pci alias
that matches with the least number of wildcard ('*') fields.
The appropriate "VFIO variant" driver for a device will be the PCI
driver implemented by the discovered module - these drivers are
compatible with (and provide the entire API of) the standard vfio-pci
driver, but have additional device-specific APIs that can be useful
for, e.g., saving/restoring state for migration.
If a specific driver is named, that will still be used rather than
searching modules.alias; this makes it possible to force binding of
vfio-pci if there is an issue with the auto-selected variant driver.
Signed-off-by: Laine Stump <laine(a)redhat.com>
---
src/util/virpci.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 242 insertions(+)
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 885958eb99..40831a5294 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -30,6 +30,10 @@
#include <sys/stat.h>
#include <unistd.h>
+#ifndef WIN32
+# include <sys/utsname.h>
+#endif
+
#include "virlog.h"
#include "virerror.h"
#include "virfile.h"
@@ -1321,6 +1325,225 @@ virPCIDeviceFindDriver(virPCIDevice *dev)
}
+#ifdef __linux__
+typedef struct {
+ /* this is the decomposed version of a string like:
+ *
+ * vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN
+ *
+ * (followed by a space or newline). The "NNNN" are always of the
+ * length in the example unless replaced with a wildcard ("*"),
+ * but we make no assumptions about length.
+ *
+ * Rather than name each field, we just put them
+ * all in an array of 6 elements, so that we
^^^
+ * can write a simple loop to compare them
+ */
+ char *fields[7]; /* v, d, sv, sd, bc, sc, i */
^^^ ...
+} virPCIDeviceAliasInfo;
+
+
+/* NULL in last position makes parsing loop simpler */
+static const char *fieldnames[] = { "v", "d", "sv",
"sd", "bc", "sc", "i", NULL };
+
+
+static void
+virPCIDeviceAliasInfoFree(virPCIDeviceAliasInfo *info)
+{
+ if (info) {
+ size_t i;
+
+ for (i = 0; i < G_N_ELEMENTS(info->fields); i++)
+ g_free(info->fields[i]);
+
+ g_free(info);
+ }
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAliasInfo, virPCIDeviceAliasInfoFree);
+
+
+static virPCIDeviceAliasInfo *
+virPCIDeviceAliasInfoNew(const char *str)
+{
+ const char *field = str;
+
+ size_t i;
+ g_autoptr(virPCIDeviceAliasInfo) ret = g_new0(virPCIDeviceAliasInfo, 1);
+
+ if (!str)
+ return g_steal_pointer(&ret);
So here you return an empty array as "success" if the input string is
empty, but note that neither virPCIDeviceAliasInfoMatch nor
virPCIDeviceAliasInfoPrint properly handle the fields being NULL.
None of the callers use this scenario so it should be removed.
+
+ /* initialize from str */
+ for (i = 0; i < G_N_ELEMENTS(ret->fields); i++) {
+ int len = strlen(fieldnames[i]);
+ const char *next;
+
+ if (strncmp(field, fieldnames[i], len))
+ return NULL;
+
+ field += len;
+ if (fieldnames[i + 1]) {
+ if (!(next = strstr(field, fieldnames[i + 1])))
+ return NULL;
+ } else {
+ next = field;
+ virSkipToSpace(&next);
+ }
+
+ ret->fields[i] = g_strndup(field, next - field);
+ field = next;
+ }
+
+ return g_steal_pointer(&ret);
+}
+
+
+static void
+virPCIDeviceAliasInfoPrint(virPCIDeviceAliasInfo *info)
+{
+ size_t i;
+
+ for (i = 0; i < G_N_ELEMENTS(info->fields); i++)
+ VIR_DEBUG("%s: '%s'", fieldnames[i], info->fields[i]);
+}
+
+
+static bool
+virPCIDeviceAliasInfoMatch(virPCIDeviceAliasInfo *orig,
+ virPCIDeviceAliasInfo *match,
+ int *wildCardCt)
unsigned, negative values don't seem to make sense.
+{
+ size_t i;
+
+ *wildCardCt = 0;
+
+ for (i = 0; i < G_N_ELEMENTS(orig->fields); i++) {
+ if (STREQ(match->fields[i], "*"))
+ (*wildCardCt)++;
+ else if (STRNEQ(orig->fields[i], match->fields[i]))
+ return false;
+ }
+ return true;
+}
+
+
+/* virPCIDeviceFindBestVFIOVariant:
+ *
+ * Find the "best" match of all vfio_pci aliases for @dev in the host
+ * modules.alias file. This uses the algorithm of finding every
+ * modules.alias line that begins with "vfio_pci:", then picking the
+ * one that matches the device's own modalias value (from the file of
+ * that name in the device's sysfs directory) with the fewest
+ * "wildcards" (* character, meaning "match any value for this
+ * attribute").
+ */
+static int
+virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev,
+ char **moduleName)
+{
+ g_autofree char *devModAliasPath = NULL;
+ g_autofree char *devModAliasContent = NULL;
+ const char *devModAlias;
+ g_autoptr(virPCIDeviceAliasInfo) devModAliasInfo = NULL;
+ struct utsname unameInfo;
+ g_autofree char *modulesAliasPath = NULL;
+ g_autofree char *modulesAliasContent = NULL;
+ const char *line;
+ int currentBestWildcardCt = INT_MAX;
Unsigned.
+
+ *moduleName = NULL;
+
+ /* get the modalias values for the device from sysfs */
+ devModAliasPath = virPCIFile(dev->name, "modalias");
If you expect this file to be missing ...
+ if (virFileReadAll(devModAliasPath, 100,
&devModAliasContent) < 0)
... then you need to check whether it exists, as this will spam logs.
And also we most likely should reset the local instance of error in this
code path as virFileReadAll sets it.
+ return 0;
+
+ VIR_DEBUG("modalias path: '%s' contents: '%s'",
+ devModAliasPath, devModAliasContent);
+
+ /* "pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN\n" */
+ if ((devModAlias = STRSKIP(devModAliasContent, "pci:")) == NULL ||
+ !(devModAliasInfo = virPCIDeviceAliasInfoNew(devModAlias))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("device modalias file %1$s content has improper
format"),
+ devModAliasPath);
+ return -1;
+ }
+
+ virPCIDeviceAliasInfoPrint(devModAliasInfo);
Since this logs 7 lines into the debug log I don't think it's a great
idea to have it in production. If you need to be sure that the parser
processes teh modalias correctly, then please add unit tests instead.
Logging the raw string, which happens above should be sufficient for
debugging.
+
+ uname(&unameInfo);
+ modulesAliasPath = g_strdup_printf("/lib/modules/%s/modules.alias",
+ unameInfo.release);
+ if (virFileReadAll(modulesAliasPath,
+ 4 * 1024 * 1024, &modulesAliasContent) < 0) {
Note that the file currently has 1.3MiB already, I'm not sure what the
potenital for growth is though, but this leaves less than an order of
magnitude in reserve.
Additionally the code below only currently looks at 2 of the 25k lines
in the file. Maybe it's worth caching the relevant lines?
+ return -1;
+ }
+
+ /* Look for all lines that are aliases for vfio_pci drivers.
+ * (The first line is always a comment, so we can be sure "alias"
+ * is preceded by a newline)
+ */
+ line = modulesAliasContent;
+
+ while ((line = strstr(line, "\nalias vfio_pci:"))) {
+ g_autoptr(virPCIDeviceAliasInfo) fileModAliasInfo = NULL;
+ int wildCardCt;
+
+ /* "alias vfio_pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN
XXXX\n" */
+ line += strlen("\nalias vfio_pci:");
+ if (!(fileModAliasInfo = virPCIDeviceAliasInfoNew(line)))
+ continue;
+
+ virPCIDeviceAliasInfoPrint(fileModAliasInfo);
Logging the matched line should be sufficient. Please remove the overly
verbose virPCIDeviceAliasInfoPrint.
+
+ if (virPCIDeviceAliasInfoMatch(devModAliasInfo,
+ fileModAliasInfo, &wildCardCt)) {
+
+ const char *aliasStart = strchr(line, ' ');
+ const char *aliasEnd = NULL;
+ g_autofree char *aliasName = NULL;
+
+ if (!aliasStart) {
+ VIR_WARN("malformed modules.alias vfio_pci: line");
+ continue;
+ }
+
+ aliasStart++;
+ line = aliasEnd = strchrnul(aliasStart, '\n');
+ aliasName = g_strndup(aliasStart, aliasEnd - aliasStart);
+
+ VIR_DEBUG("matching alias '%s' found, %d wildcards",
+ aliasName, wildCardCt);
Possibly worth logging currentBestWildcardCt too?
+
+ if (wildCardCt < currentBestWildcardCt) {
+
+ /* this is a better match than previous */
+ currentBestWildcardCt = wildCardCt;
+ g_free(*moduleName);
+ *moduleName = g_steal_pointer(&aliasName);
+ }
+ }
+ }
+ return 0;
+}
+
+
+#else /* __linux__ */
+
+
+static int
+virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev G_GNUC_UNUSED,
+ char **moduleName G_GNUC_UNUSED)
+{
+ virReportSystemError(ENOSYS, "%s",
+ _("VFIO device assignment is not available on this
platform"));
+ return -1;
+}
+#endif /* __linux__ */
+
+
int
virPCIDeviceUnbind(virPCIDevice *dev)
{
@@ -1431,6 +1654,22 @@ virPCIDeviceBindToStub(virPCIDevice *dev)
return -1;
}
+ if (dev->stubDriverType == VIR_PCI_STUB_DRIVER_VFIO
+ && !dev->stubDriverName) {
Logic operators are commonly placed at the end of the line before.
+ g_autofree char *autodetectModuleName = NULL;
+
+ /* automatically use a VFIO variant driver if available for
+ * this device.
+ */
+
+ if (virPCIDeviceFindBestVFIOVariant(dev, &autodetectModuleName) < 0)
+ return -1;
+
+ g_free(dev->stubDriverName);
+ dev->stubDriverName = g_steal_pointer(&autodetectModuleName);
+ }
+
+ /* if a driver name hasn't been decided by now, use default for this type */
if (!dev->stubDriverName) {
const char *stubDriverName = NULL;
@@ -1538,6 +1777,9 @@ virPCIDeviceReattach(virPCIDevice *dev,
return 0;
}
+
+
+
Spurious whitespace change.
static char *
virPCIDeviceReadID(virPCIDevice *dev, const char *id_name)
{
Reviewed-by: Peter Krempa <pkrempa(a)redhat.com>