On Thu, Apr 20, 2017 at 03:05:57PM +0200, Erik Skultety wrote:
The parent device needs to report the generic stuff about the
supported
mediated devices types, like device API, available instances, type name,
etc. Therefore this patch introduces a new nested capability element of
type 'mdev' with the resulting XML of the following format:
<device>
...
<capability type='pci'>
...
<capability type='mdev'>
<type id='vendor-supplied-id'>
<description>optional, raw, unstructured resource allocation data
</description>
<deviceAPI>vfio-pci</deviceAPI>
<availableInstances>NUM</availableInstances>
</type>
...
<type>
...
</type>
</capability>
</capability>
...
</device>
You've mentioned it in the cover letter and asked me privately to share
my opinion about the mdev capability. I agree that we should split the
capability into one for parent device and one for child device. Having
one capability for two different devices where the capability means
something else based on the device doesn't seem to be a good idea especially
if we have an API that allows you list devices whit some specific capability.
The internal representation of the capability is one structure where both
parent and child shares only *type*. The suggested names mdev-parent and
mdev-child seems as good names if the "parent" and "child" naming is
generally
used for mdev devices.
Signed-off-by: Erik Skultety <eskultet(a)redhat.com>
---
docs/schemas/nodedev.rng | 24 ++++
src/conf/node_device_conf.c | 103 +++++++++++++++++
src/conf/node_device_conf.h | 17 +++
src/libvirt_private.syms | 1 +
src/node_device/node_device_udev.c | 133 ++++++++++++++++++++++
tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml | 27 +++++
6 files changed, 305 insertions(+)
create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
index 0f90a73c8..4b5dca777 100644
--- a/docs/schemas/nodedev.rng
+++ b/docs/schemas/nodedev.rng
@@ -205,6 +205,30 @@
</optional>
<optional>
+ <element name='capability'>
+ <attribute name='type'>
+ <value>mdev</value>
+ </attribute>
+ <element name='type'>
+ <attribute name='id'>
+ <data type='string'/>
+ </attribute>
+ <optional>
+ <element name='name'><text/></element>
+ </optional>
+ <element name='deviceAPI'>
+ <choice>
+ <value>vfio-pci</value>
+ </choice>
+ </element>
+ <element name='availableInstances'>
+ <ref name='unsignedInt'/>
+ </element>
+ </element>
+ </element>
+ </optional>
+
+ <optional>
<element name='iommuGroup'>
<attribute name='number'>
<ref name='unsignedInt'/>
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index fdddc97eb..fe4f1bc60 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -87,6 +87,26 @@ virNodeDevCapsDefParseString(const char *xpath,
}
+static void
+virNodeDevCapMdevClear(virNodeDevCapMdevPtr mdev)
+{
+ VIR_FREE(mdev->type);
+ VIR_FREE(mdev->name);
+ VIR_FREE(mdev->device_api);
+}
This won't be required if we split the mdev capability.
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev)
+{
+ if (!mdev)
+ return;
+
+ virNodeDevCapMdevClear(mdev);
+ VIR_FREE(mdev);
+}
+
+
void
virNodeDeviceDefFree(virNodeDeviceDefPtr def)
{
@@ -264,6 +284,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
virBufferAsprintf(buf, "<capability type='%s'/>\n",
virPCIHeaderTypeToString(data->pci_dev.hdrType));
}
+ if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
+ virBufferAddLit(buf, "<capability type='mdev'>\n");
+ virBufferAdjustIndent(buf, 2);
+ for (i = 0; i < data->pci_dev.nmdevs; i++) {
+ virNodeDevCapMdevPtr mdev = data->pci_dev.mdevs[i];
+ virBufferEscapeString(buf, "<type id='%s'>\n",
mdev->type);
+ virBufferAdjustIndent(buf, 2);
+ if (mdev->name)
+ virBufferAsprintf(buf, "<name>%s</name>\n",
+ mdev->name);
This will fit on one line.
+ virBufferAsprintf(buf,
"<deviceAPI>%s</deviceAPI>\n",
+ mdev->device_api);
+ virBufferAsprintf(buf,
+
"<availableInstances>%u</availableInstances>\n",
+ mdev->available_instances);
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</type>\n");
+ }
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</capability>\n");
+ }
if (data->pci_dev.nIommuGroupDevices) {
virBufferAsprintf(buf, "<iommuGroup number='%d'>\n",
data->pci_dev.iommuGroupNumber);
@@ -1358,6 +1399,62 @@ virNodeDevPCICapSRIOVParseXML(xmlXPathContextPtr ctxt,
static int
+virNodeDevPCICapMediatedDevParseXML(xmlXPathContextPtr ctxt,
+ virNodeDevCapPCIDevPtr pci_dev)
+{
+ int ret = -1;
+ xmlNodePtr orignode = NULL;
+ xmlNodePtr *nodes = NULL;
+ int nmdevs = virXPathNodeSet("./type", ctxt, &nodes);
+ virNodeDevCapMdevPtr mdev = NULL;
+ size_t i;
+
+ orignode = ctxt->node;
+ for (i = 0; i < nmdevs; i++) {
+ ctxt->node = nodes[i];
+
+ if (VIR_ALLOC(mdev) < 0)
+ goto cleanup;
+
+ if (!(mdev->type = virXPathString("string(./@id[1])", ctxt))) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("missing 'id' attribute for mediated
device's "
+ "<type> element"));
+ goto cleanup;
+ }
+
+ if (!(mdev->device_api = virXPathString("string(./deviceAPI[1])",
ctxt))) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("missing device API for mediated device type
'%s'"),
+ mdev->type);
+ goto cleanup;
+ }
+
+ if (virXPathUInt("number(./availableInstances)", ctxt,
+ &mdev->available_instances) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("missing number of available instances for "
+ "mediated device type '%s'"),
+ mdev->type);
+ goto cleanup;
+ }
+
+ mdev->name = virXPathString("string(./name)", ctxt);
+
+ if (VIR_APPEND_ELEMENT(pci_dev->mdevs, pci_dev->nmdevs, mdev) < 0)
+ goto cleanup;
+ }
+
+ pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
+ ret = 0;
+ cleanup:
+ virNodeDevCapMdevFree(mdev);
+ ctxt->node = orignode;
+ return ret;
+}
+
+
+static int
virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
xmlNodePtr node,
virNodeDevCapPCIDevPtr pci_dev)
@@ -1382,6 +1479,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
if (sriov_cap &&
virNodeDevPCICapSRIOVParseXML(ctxt, node, pci_dev, sriov_cap) < 0) {
goto cleanup;
+ } if (STREQ(type, "mdev") &&
+ virNodeDevPCICapMediatedDevParseXML(ctxt, pci_dev)) {
+ goto cleanup;
} else {
int hdrType = virPCIHeaderTypeFromString(type);
@@ -1894,6 +1994,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
VIR_FREE(data->pci_dev.iommuGroupDevices);
virPCIEDeviceInfoFree(data->pci_dev.pci_express);
+ for (i = 0; i < data->pci_dev.nmdevs; i++)
+ virNodeDevCapMdevFree(data->pci_dev.mdevs[i]);
+ VIR_FREE(data->pci_dev.mdevs);
break;
case VIR_NODE_DEV_CAP_USB_DEV:
VIR_FREE(data->usb_dev.product_name);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index 375f97256..883fa017e 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -94,6 +94,7 @@ typedef enum {
VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0),
VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1),
VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2),
+ VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3),
} virNodeDevPCICapFlags;
typedef enum {
@@ -132,6 +133,16 @@ struct _virNodeDevCapSystem {
virNodeDevCapSystemFirmware firmware;
};
+typedef struct _virNodeDevCapMdev virNodeDevCapMdev;
+typedef virNodeDevCapMdev *virNodeDevCapMdevPtr;
+struct _virNodeDevCapMdev {
+ char *type;
+ char *name;
+ char *device_api;
+ unsigned int available_instances;
+ unsigned int iommuGroupNumber;
+};
+
typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
struct _virNodeDevCapPCIDev {
@@ -155,6 +166,8 @@ struct _virNodeDevCapPCIDev {
int numa_node;
virPCIEDeviceInfoPtr pci_express;
int hdrType; /* enum virPCIHeaderType or -1 */
+ virNodeDevCapMdevPtr *mdevs;
+ size_t nmdevs;
};
typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
@@ -263,6 +276,7 @@ struct _virNodeDevCapData {
virNodeDevCapStorage storage;
virNodeDevCapSCSIGeneric sg;
virNodeDevCapDRM drm;
+ virNodeDevCapMdev mdev;
This should be part of the next patch.
};
};
@@ -339,6 +353,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
void
virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
+void
+virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev);
+
# define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
(VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \
VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 181e17875..d1e872e60 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -665,6 +665,7 @@ virNetDevIPRouteParseXML;
# conf/node_device_conf.h
+virNodeDevCapMdevFree;
virNodeDevCapsDefFree;
virNodeDevCapTypeFromString;
virNodeDevCapTypeToString;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 95c1aee29..79f1537d9 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -314,6 +314,133 @@ static int udevTranslatePCIIds(unsigned int vendor,
}
+static int
+udevGetMdevCaps(struct udev_device *device,
+ const char *sysfspath,
+ virNodeDevCapMdevPtr mdev)
+{
+ int ret = -1;
+ char *attrpath = NULL; /* relative path to the actual sysfs attribute */
+ const char *devpath = NULL; /* base sysfs path as reported by udev */
+ const char *relpath = NULL; /* diff between @sysfspath and @devpath */
+ char *tmp = NULL;
+
+#define MDEV_GET_SYSFS_ATTR(attr_name, dir, cb, ...) \
+ do { \
+ if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)
\
+ goto cleanup; \
+ \
+ if (cb(device, attrpath, __VA_ARGS__) < 0) \
+ goto cleanup; \
+ \
+ VIR_FREE(attrpath); \
+ } while (0) \
+
+ /* UDEV doesn't report attributes under subdirectories by default but is
+ * able to query them if the path to the attribute is relative to the
+ * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's
+ * base path as udev reports it, but we're interested in attributes under
+ * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, let's
+ * strip the common part of the path and let udev chew the relative bit.
+ */
+ devpath = udev_device_get_syspath(device);
+ relpath = sysfspath + strlen(devpath);
+
+ /* When calling from the mdev child device, @sysfspath is a symbolic link
+ * to the actual mdev type (rather than a physical path), so we need to
+ * resolve it in order to get the type's name.
+ */
+ if (virFileResolveLink(sysfspath, &tmp) < 0)
+ goto cleanup;
+
+ if (VIR_STRDUP(mdev->type, last_component(tmp)) < 0)
+ goto cleanup;
+
+ MDEV_GET_SYSFS_ATTR(name, relpath,
+ udevGetStringSysfsAttr, &mdev->name);
+ MDEV_GET_SYSFS_ATTR(device_api, relpath,
+ udevGetStringSysfsAttr, &mdev->device_api);
+ MDEV_GET_SYSFS_ATTR(available_instances, relpath,
+ udevGetUintSysfsAttr, &mdev->available_instances, 10);
+
+#undef MDEV_GET_SYSFS_ATTR
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(attrpath);
+ VIR_FREE(tmp);
+ return ret;
+}
+
+
+static int
+udevPCIGetMdevCaps(struct udev_device *device,
+ virNodeDevCapPCIDevPtr pcidata)
+{
+ int ret = -1;
+ int direrr = -1;
+ DIR *dir = NULL;
+ struct dirent *entry;
+ char *path = NULL;
+ char *tmppath = NULL;
+ virNodeDevCapMdevPtr mdev = NULL;
+ virNodeDevCapMdevPtr *mdevs = NULL;
+ size_t nmdevs = 0;
+ size_t i;
+
+ if (virAsprintf(&path, "%s/mdev_supported_types",
+ udev_device_get_syspath(device)) < 0)
+ return -1;
+
+ if ((direrr = virDirOpenIfExists(&dir, path)) < 0)
I would probably name it as dirret, it doesn't indicate only whether there
was an error.
+ goto cleanup;
+
+ if (direrr == 0) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC(mdevs) < 0)
+ goto cleanup;
+
+ /* since udev doesn't provide means to list other than top-level
+ * attributes, we need to scan the subdirectories ourselves
+ */
+ while ((direrr = virDirRead(dir, &entry, path)) > 0) {
+ if (VIR_ALLOC(mdev) < 0)
+ goto cleanup;
+
+ if (virAsprintf(&tmppath, "%s/%s", path, entry->d_name) <
0)
+ goto cleanup;
+
+ if (udevGetMdevCaps(device, tmppath, mdev) < 0)
+ goto cleanup;
+
+ if (VIR_APPEND_ELEMENT(mdevs, nmdevs, mdev) < 0)
+ goto cleanup;
+
+ VIR_FREE(tmppath);
+ }
+
+ if (direrr < 0)
+ goto cleanup;
+
+ VIR_STEAL_PTR(pcidata->mdevs, mdevs);
+ pcidata->nmdevs = nmdevs;
+ nmdevs = 0;
+ ret = 0;
+ cleanup:
+ virNodeDevCapMdevFree(mdev);
+ for (i = 0; i < nmdevs; i++)
+ virNodeDevCapMdevFree(mdevs[i]);
+ VIR_FREE(mdevs);
+ VIR_FREE(path);
+ VIR_FREE(tmppath);
+ VIR_DIR_CLOSE(dir);
+ return ret;
+}
+
+
static int udevProcessPCI(struct udev_device *device,
virNodeDeviceDefPtr def)
{
@@ -400,6 +527,12 @@ static int udevProcessPCI(struct udev_device *device,
}
}
+ /* check whether the device is mediated devices framework capable, if so,
+ * process it
+ */
+ if (udevPCIGetMdevCaps(device, pci_dev) < 0)
+ goto cleanup;
+
ret = 0;
cleanup:
diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
new file mode 100644
index 000000000..b745686d3
--- /dev/null
+++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml
@@ -0,0 +1,27 @@
+<device>
+ <name>pci_0000_02_10_7</name>
+ <parent>pci_0000_00_04_0</parent>
+ <capability type='pci'>
+ <domain>0</domain>
+ <bus>2</bus>
+ <slot>16</slot>
+ <function>7</function>
+ <product id='0x10ca'>82576 Virtual Function</product>
+ <vendor id='0x8086'>Intel Corporation</vendor>
+ <capability type='mdev'>
+ <type id='foo'>
+ <name>bar</name>
+ <deviceAPI>vfio-pci</deviceAPI>
+ <availableInstances>1</availableInstances>
+ </type>
+ </capability>
+ <iommuGroup number='31'>
+ <address domain='0x0000' bus='0x02' slot='0x10'
function='0x7'/>
+ </iommuGroup>
+ <numa node='0'/>
+ <pci-express>
+ <link validity='cap' port='0' speed='2.5'
width='4'/>
+ <link validity='sta' width='0'/>
+ </pci-express>
+ </capability>
+</device>
--
2.12.2
--
libvir-list mailing list
libvir-list(a)redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list