The parent device needs to report the generic stuff about the supported
mediated devices types, like device API, available instances, type name,
etc. Therefore this patch introduces a new nested capability element of
type 'mdev' with the resulting XML of the following format:
<device>
...
<capability type='pci'>
...
<capability type='mdev'>
<type id='vendor-supplied-id'>
<description>optional, raw, unstructured resource allocation data
</description>
<deviceAPI>vfio-pci</deviceAPI>
<availableInstances>NUM</availableInstances>
</type>
...
<type>
...
</type>
</capability>
</capability>
...
</device>
Signed-off-by: Erik Skultety <eskultet(a)redhat.com>
---
docs/schemas/nodedev.rng | 24 ++++
src/conf/node_device_conf.c | 103 +++++++++++++++++
src/conf/node_device_conf.h | 17 +++
src/libvirt_private.syms | 1 +
src/node_device/node_device_udev.c | 133 ++++++++++++++++++++++
tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml | 27 +++++
6 files changed, 305 insertions(+)
create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
index 0f90a73c8..4b5dca777 100644
--- a/docs/schemas/nodedev.rng
+++ b/docs/schemas/nodedev.rng
@@ -205,6 +205,30 @@
</optional>
<optional>
+ <element name='capability'>
+ <attribute name='type'>
+ <value>mdev</value>
+ </attribute>
+ <element name='type'>
+ <attribute name='id'>
+ <data type='string'/>
+ </attribute>
+ <optional>
+ <element name='name'><text/></element>
+ </optional>
+ <element name='deviceAPI'>
+ <choice>
+ <value>vfio-pci</value>
+ </choice>
+ </element>
+ <element name='availableInstances'>
+ <ref name='unsignedInt'/>
+ </element>
+ </element>
+ </element>
+ </optional>
+
+ <optional>
<element name='iommuGroup'>
<attribute name='number'>
<ref name='unsignedInt'/>
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index fdddc97eb..fe4f1bc60 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -87,6 +87,26 @@ virNodeDevCapsDefParseString(const char *xpath,
}
+static void
+virNodeDevCapMdevClear(virNodeDevCapMdevPtr mdev)
+{
+ VIR_FREE(mdev->type);
+ VIR_FREE(mdev->name);
+ VIR_FREE(mdev->device_api);
+}
+
+
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev)
+{
+ if (!mdev)
+ return;
+
+ virNodeDevCapMdevClear(mdev);
+ VIR_FREE(mdev);
+}
+
+
void
virNodeDeviceDefFree(virNodeDeviceDefPtr def)
{
@@ -264,6 +284,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
virBufferAsprintf(buf, "<capability type='%s'/>\n",
virPCIHeaderTypeToString(data->pci_dev.hdrType));
}
+ if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
+ virBufferAddLit(buf, "<capability type='mdev'>\n");
+ virBufferAdjustIndent(buf, 2);
+ for (i = 0; i < data->pci_dev.nmdevs; i++) {
+ virNodeDevCapMdevPtr mdev = data->pci_dev.mdevs[i];
+ virBufferEscapeString(buf, "<type id='%s'>\n",
mdev->type);
+ virBufferAdjustIndent(buf, 2);
+ if (mdev->name)
+ virBufferAsprintf(buf, "<name>%s</name>\n",
+ mdev->name);
+ virBufferAsprintf(buf, "<deviceAPI>%s</deviceAPI>\n",
+ mdev->device_api);
+ virBufferAsprintf(buf,
+
"<availableInstances>%u</availableInstances>\n",
+ mdev->available_instances);
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</type>\n");
+ }
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</capability>\n");
+ }
if (data->pci_dev.nIommuGroupDevices) {
virBufferAsprintf(buf, "<iommuGroup number='%d'>\n",
data->pci_dev.iommuGroupNumber);
@@ -1358,6 +1399,62 @@ virNodeDevPCICapSRIOVParseXML(xmlXPathContextPtr ctxt,
static int
+virNodeDevPCICapMediatedDevParseXML(xmlXPathContextPtr ctxt,
+ virNodeDevCapPCIDevPtr pci_dev)
+{
+ int ret = -1;
+ xmlNodePtr orignode = NULL;
+ xmlNodePtr *nodes = NULL;
+ int nmdevs = virXPathNodeSet("./type", ctxt, &nodes);
+ virNodeDevCapMdevPtr mdev = NULL;
+ size_t i;
+
+ orignode = ctxt->node;
+ for (i = 0; i < nmdevs; i++) {
+ ctxt->node = nodes[i];
+
+ if (VIR_ALLOC(mdev) < 0)
+ goto cleanup;
+
+ if (!(mdev->type = virXPathString("string(./@id[1])", ctxt))) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("missing 'id' attribute for mediated
device's "
+ "<type> element"));
+ goto cleanup;
+ }
+
+ if (!(mdev->device_api = virXPathString("string(./deviceAPI[1])",
ctxt))) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("missing device API for mediated device type
'%s'"),
+ mdev->type);
+ goto cleanup;
+ }
+
+ if (virXPathUInt("number(./availableInstances)", ctxt,
+ &mdev->available_instances) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("missing number of available instances for "
+ "mediated device type '%s'"),
+ mdev->type);
+ goto cleanup;
+ }
+
+ mdev->name = virXPathString("string(./name)", ctxt);
+
+ if (VIR_APPEND_ELEMENT(pci_dev->mdevs, pci_dev->nmdevs, mdev) < 0)
+ goto cleanup;
+ }
+
+ pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
+ ret = 0;
+ cleanup:
+ virNodeDevCapMdevFree(mdev);
+ ctxt->node = orignode;
+ return ret;
+}
+
+
+static int
virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
xmlNodePtr node,
virNodeDevCapPCIDevPtr pci_dev)
@@ -1382,6 +1479,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
if (sriov_cap &&
virNodeDevPCICapSRIOVParseXML(ctxt, node, pci_dev, sriov_cap) < 0) {
goto cleanup;
+ } if (STREQ(type, "mdev") &&
+ virNodeDevPCICapMediatedDevParseXML(ctxt, pci_dev)) {
+ goto cleanup;
} else {
int hdrType = virPCIHeaderTypeFromString(type);
@@ -1894,6 +1994,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
VIR_FREE(data->pci_dev.iommuGroupDevices);
virPCIEDeviceInfoFree(data->pci_dev.pci_express);
+ for (i = 0; i < data->pci_dev.nmdevs; i++)
+ virNodeDevCapMdevFree(data->pci_dev.mdevs[i]);
+ VIR_FREE(data->pci_dev.mdevs);
break;
case VIR_NODE_DEV_CAP_USB_DEV:
VIR_FREE(data->usb_dev.product_name);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index 375f97256..883fa017e 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -94,6 +94,7 @@ typedef enum {
VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0),
VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1),
VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2),
+ VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3),
} virNodeDevPCICapFlags;
typedef enum {
@@ -132,6 +133,16 @@ struct _virNodeDevCapSystem {
virNodeDevCapSystemFirmware firmware;
};
+typedef struct _virNodeDevCapMdev virNodeDevCapMdev;
+typedef virNodeDevCapMdev *virNodeDevCapMdevPtr;
+struct _virNodeDevCapMdev {
+ char *type;
+ char *name;
+ char *device_api;
+ unsigned int available_instances;
+ unsigned int iommuGroupNumber;
+};
+
typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
struct _virNodeDevCapPCIDev {
@@ -155,6 +166,8 @@ struct _virNodeDevCapPCIDev {
int numa_node;
virPCIEDeviceInfoPtr pci_express;
int hdrType; /* enum virPCIHeaderType or -1 */
+ virNodeDevCapMdevPtr *mdevs;
+ size_t nmdevs;
};
typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
@@ -263,6 +276,7 @@ struct _virNodeDevCapData {
virNodeDevCapStorage storage;
virNodeDevCapSCSIGeneric sg;
virNodeDevCapDRM drm;
+ virNodeDevCapMdev mdev;
};
};
@@ -339,6 +353,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
void
virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev);
+
# define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
(VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \
VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 181e17875..d1e872e60 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -665,6 +665,7 @@ virNetDevIPRouteParseXML;
# conf/node_device_conf.h
+virNodeDevCapMdevFree;
virNodeDevCapsDefFree;
virNodeDevCapTypeFromString;
virNodeDevCapTypeToString;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 95c1aee29..79f1537d9 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -314,6 +314,133 @@ static int udevTranslatePCIIds(unsigned int vendor,
}
+static int
+udevGetMdevCaps(struct udev_device *device,
+ const char *sysfspath,
+ virNodeDevCapMdevPtr mdev)
+{
+ int ret = -1;
+ char *attrpath = NULL; /* relative path to the actual sysfs attribute */
+ const char *devpath = NULL; /* base sysfs path as reported by udev */
+ const char *relpath = NULL; /* diff between @sysfspath and @devpath */
+ char *tmp = NULL;
+
+#define MDEV_GET_SYSFS_ATTR(attr_name, dir, cb, ...) \
+ do { \
+ if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)
\
+ goto cleanup; \
+ \
+ if (cb(device, attrpath, __VA_ARGS__) < 0) \
+ goto cleanup; \
+ \
+ VIR_FREE(attrpath); \
+ } while (0) \
+
+ /* UDEV doesn't report attributes under subdirectories by default but is
+ * able to query them if the path to the attribute is relative to the
+ * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's
+ * base path as udev reports it, but we're interested in attributes under
+ * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, let's
+ * strip the common part of the path and let udev chew the relative bit.
+ */
+ devpath = udev_device_get_syspath(device);
+ relpath = sysfspath + strlen(devpath);
+
+ /* When calling from the mdev child device, @sysfspath is a symbolic link
+ * to the actual mdev type (rather than a physical path), so we need to
+ * resolve it in order to get the type's name.
+ */
+ if (virFileResolveLink(sysfspath, &tmp) < 0)
+ goto cleanup;
+
+ if (VIR_STRDUP(mdev->type, last_component(tmp)) < 0)
+ goto cleanup;
+
+ MDEV_GET_SYSFS_ATTR(name, relpath,
+ udevGetStringSysfsAttr, &mdev->name);
+ MDEV_GET_SYSFS_ATTR(device_api, relpath,
+ udevGetStringSysfsAttr, &mdev->device_api);
+ MDEV_GET_SYSFS_ATTR(available_instances, relpath,
+ udevGetUintSysfsAttr, &mdev->available_instances, 10);
+
+#undef MDEV_GET_SYSFS_ATTR
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(attrpath);
+ VIR_FREE(tmp);
+ return ret;
+}
+
+
+static int
+udevPCIGetMdevCaps(struct udev_device *device,
+ virNodeDevCapPCIDevPtr pcidata)
+{
+ int ret = -1;
+ int direrr = -1;
+ DIR *dir = NULL;
+ struct dirent *entry;
+ char *path = NULL;
+ char *tmppath = NULL;
+ virNodeDevCapMdevPtr mdev = NULL;
+ virNodeDevCapMdevPtr *mdevs = NULL;
+ size_t nmdevs = 0;
+ size_t i;
+
+ if (virAsprintf(&path, "%s/mdev_supported_types",
+ udev_device_get_syspath(device)) < 0)
+ return -1;
+
+ if ((direrr = virDirOpenIfExists(&dir, path)) < 0)
+ goto cleanup;
+
+ if (direrr == 0) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(mdevs) < 0)
+ goto cleanup;
+
+ /* since udev doesn't provide means to list other than top-level
+ * attributes, we need to scan the subdirectories ourselves
+ */
+ while ((direrr = virDirRead(dir, &entry, path)) > 0) {
+ if (VIR_ALLOC(mdev) < 0)
+ goto cleanup;
+
+ if (virAsprintf(&tmppath, "%s/%s", path, entry->d_name) < 0)
+ goto cleanup;
+
+ if (udevGetMdevCaps(device, tmppath, mdev) < 0)
+ goto cleanup;
+
+ if (VIR_APPEND_ELEMENT(mdevs, nmdevs, mdev) < 0)
+ goto cleanup;
+
+ VIR_FREE(tmppath);
+ }
+
+ if (direrr < 0)
+ goto cleanup;
+
+ VIR_STEAL_PTR(pcidata->mdevs, mdevs);
+ pcidata->nmdevs = nmdevs;
+ nmdevs = 0;
+ ret = 0;
+ cleanup:
+ virNodeDevCapMdevFree(mdev);
+ for (i = 0; i < nmdevs; i++)
+ virNodeDevCapMdevFree(mdevs[i]);
+ VIR_FREE(mdevs);
+ VIR_FREE(path);
+ VIR_FREE(tmppath);
+ VIR_DIR_CLOSE(dir);
+ return ret;
+}
+
+
static int udevProcessPCI(struct udev_device *device,
virNodeDeviceDefPtr def)
{
@@ -400,6 +527,12 @@ static int udevProcessPCI(struct udev_device *device,
}
}
+ /* check whether the device is mediated devices framework capable, if so,
+ * process it
+ */
+ if (udevPCIGetMdevCaps(device, pci_dev) < 0)
+ goto cleanup;
+
ret = 0;
cleanup:
diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
new file mode 100644
index 000000000..b745686d3
--- /dev/null
+++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
@@ -0,0 +1,27 @@
+<device>
+ <name>pci_0000_02_10_7</name>
+ <parent>pci_0000_00_04_0</parent>
+ <capability type='pci'>
+ <domain>0</domain>
+ <bus>2</bus>
+ <slot>16</slot>
+ <function>7</function>
+ <product id='0x10ca'>82576 Virtual Function</product>
+ <vendor id='0x8086'>Intel Corporation</vendor>
+ <capability type='mdev'>
+ <type id='foo'>
+ <name>bar</name>
+ <deviceAPI>vfio-pci</deviceAPI>
+ <availableInstances>1</availableInstances>
+ </type>
+ </capability>
+ <iommuGroup number='31'>
+ <address domain='0x0000' bus='0x02' slot='0x10'
function='0x7'/>
+ </iommuGroup>
+ <numa node='0'/>
+ <pci-express>
+ <link validity='cap' port='0' speed='2.5'
width='4'/>
+ <link validity='sta' width='0'/>
+ </pci-express>
+ </capability>
+</device>
--
2.12.2