[libvirt] [PATCH v4 0/6] Add mdev reporting capability to the nodedev driver

since v1: - dropped the <description> element from the parent device nested capability - added missing RNG schema and tests - updated the documentation to describe the MDEV elements in both the parent and the child since v2: - I further split our PCI sub-capability parser into more blocks as suggested - instead of one capability 'mdev' for both mdev device and physical parent I introduced 2, so we can do virsh nodedev-list --cap 'mdev_types' | 'mdev' to see either parent devices or the mediated devices themselves - other minor adjustments pointed out during review. since v3: - fixed nits - updated virsh man page to include mdev and mdev_types within the list of supported capabilities Erik Skultety (6): mdev: Pass a uuidstr rather than an mdev object to some util functions nodedev: conf: Split PCI sub-capability parsing to separate methods nodedev: Introduce new mdev_types and mdev nodedev capabilities nodedev: Introduce the mdev capability to a PCI parent device nodedev: Introduce mdev capability for mediated devices docs: Document the mediated devices within the nodedev driver docs/drvnodedev.html.in | 168 +++++++++++- docs/schemas/nodedev.rng | 43 +++ include/libvirt/libvirt-nodedev.h | 2 + src/conf/node_device_conf.c | 290 ++++++++++++++++----- src/conf/node_device_conf.h | 29 ++- src/conf/virnodedeviceobj.c | 11 +- src/libvirt-nodedev.c | 2 + src/libvirt_private.syms | 1 + src/node_device/node_device_driver.c | 2 + src/node_device/node_device_udev.c | 165 +++++++++++- src/qemu/qemu_domain.c | 8 +- src/security/security_apparmor.c | 10 +- src/security/security_dac.c | 20 +- src/security/security_selinux.c | 20 +- src/util/virmdev.c | 21 +- src/util/virmdev.h | 4 +- .../mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml | 8 + .../pci_0000_02_10_7_mdev_types.xml | 32 +++ tests/nodedevxml2xmltest.c | 2 + tools/virsh-nodedev.c | 6 + tools/virsh.pod | 7 +- 21 files changed, 722 insertions(+), 129 deletions(-) create mode 100644 tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml -- 2.13.0

Namely, this patch is about virMediatedDeviceGetIOMMUGroup{Dev,Num} functions. There's no compelling reason why these functions should take an object, on the contrary, having to create an object every time one needs to query the IOMMU group number, discarding the object afterwards, seems odd. Signed-off-by: Erik Skultety <eskultet@redhat.com> --- src/qemu/qemu_domain.c | 8 +------- src/security/security_apparmor.c | 10 +--------- src/security/security_dac.c | 20 ++------------------ src/security/security_selinux.c | 20 ++------------------ src/util/virmdev.c | 21 +++++++++++++-------- src/util/virmdev.h | 4 ++-- 6 files changed, 21 insertions(+), 62 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index cc02c801e..76acb1c3f 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7181,7 +7181,6 @@ qemuDomainGetHostdevPath(virDomainDefPtr def, virUSBDevicePtr usb = NULL; virSCSIDevicePtr scsi = NULL; virSCSIVHostDevicePtr host = NULL; - virMediatedDevicePtr mdev = NULL; char *tmpPath = NULL; bool freeTmpPath = false; bool includeVFIO = false; @@ -7282,11 +7281,7 @@ qemuDomainGetHostdevPath(virDomainDefPtr def, } case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: - if (!(mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model))) - goto cleanup; - - if (!(tmpPath = virMediatedDeviceGetIOMMUGroupDev(mdev))) + if (!(tmpPath = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto cleanup; freeTmpPath = true; @@ -7342,7 +7337,6 @@ qemuDomainGetHostdevPath(virDomainDefPtr def, virUSBDeviceFree(usb); virSCSIDeviceFree(scsi); virSCSIVHostDeviceFree(host); - virMediatedDeviceFree(mdev); if (freeTmpPath) VIR_FREE(tmpPath); return ret; diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index fc5581526..62672b0af 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -905,21 +905,13 @@ AppArmorSetSecurityHostdevLabel(virSecurityManagerPtr mgr, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: { char *vfiodev = NULL; - virMediatedDevicePtr mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model); - if (!mdev) + if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto done; - if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdev))) { - virMediatedDeviceFree(mdev); - goto done; - } - ret = AppArmorSetSecurityHostdevLabelHelper(vfiodev, ptr); VIR_FREE(vfiodev); - virMediatedDeviceFree(mdev); break; } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 922e48494..7dcf4c15f 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -968,21 +968,13 @@ virSecurityDACSetHostdevLabel(virSecurityManagerPtr mgr, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: { char *vfiodev = NULL; - virMediatedDevicePtr mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model); - if (!mdev) + if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto done; - if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdev))) { - virMediatedDeviceFree(mdev); - goto done; - } - ret = virSecurityDACSetHostdevLabelHelper(vfiodev, &cbdata); VIR_FREE(vfiodev); - virMediatedDeviceFree(mdev); break; } @@ -1144,21 +1136,13 @@ virSecurityDACRestoreHostdevLabel(virSecurityManagerPtr mgr, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: { char *vfiodev = NULL; - virMediatedDevicePtr mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model); - if (!mdev) + if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto done; - if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdev))) { - virMediatedDeviceFree(mdev); - goto done; - } - ret = virSecurityDACRestoreFileLabel(virSecurityManagerGetPrivateData(mgr), vfiodev); VIR_FREE(vfiodev); - virMediatedDeviceFree(mdev); break; } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index df7c96833..c7a2dfe98 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1843,21 +1843,13 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManagerPtr mgr, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: { char *vfiodev = NULL; - virMediatedDevicePtr mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model); - if (!mdev) + if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto done; - if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdev))) { - virMediatedDeviceFree(mdev); - goto done; - } - ret = virSecuritySELinuxSetHostdevLabelHelper(vfiodev, &data); VIR_FREE(vfiodev); - virMediatedDeviceFree(mdev); break; } @@ -2092,21 +2084,13 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManagerPtr mgr, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: { char *vfiodev = NULL; - virMediatedDevicePtr mdev = virMediatedDeviceNew(mdevsrc->uuidstr, - mdevsrc->model); - if (!mdev) + if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdevsrc->uuidstr))) goto done; - if (!(vfiodev = virMediatedDeviceGetIOMMUGroupDev(mdev))) { - virMediatedDeviceFree(mdev); - goto done; - } - ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiodev); VIR_FREE(vfiodev); - virMediatedDeviceFree(mdev); break; } diff --git a/src/util/virmdev.c b/src/util/virmdev.c index bd8e3f8de..a5f52d10f 100644 --- a/src/util/virmdev.c +++ b/src/util/virmdev.c @@ -209,43 +209,48 @@ virMediatedDeviceGetPath(virMediatedDevicePtr dev) * for freeing the result. */ char * -virMediatedDeviceGetIOMMUGroupDev(virMediatedDevicePtr dev) +virMediatedDeviceGetIOMMUGroupDev(const char *uuidstr) { - char *resultpath = NULL; + char *result_path = NULL; char *iommu_path = NULL; char *vfio_path = NULL; + char *dev_path = virMediatedDeviceGetSysfsPath(uuidstr); - if (virAsprintf(&iommu_path, "%s/iommu_group", dev->path) < 0) + if (!dev_path) return NULL; + if (virAsprintf(&iommu_path, "%s/iommu_group", dev_path) < 0) + goto cleanup; + if (!virFileExists(iommu_path)) { virReportSystemError(errno, _("failed to access '%s'"), iommu_path); goto cleanup; } - if (virFileResolveLink(iommu_path, &resultpath) < 0) { + if (virFileResolveLink(iommu_path, &result_path) < 0) { virReportSystemError(errno, _("failed to resolve '%s'"), iommu_path); goto cleanup; } - if (virAsprintf(&vfio_path, "/dev/vfio/%s", last_component(resultpath)) < 0) + if (virAsprintf(&vfio_path, "/dev/vfio/%s", last_component(result_path)) < 0) goto cleanup; cleanup: - VIR_FREE(resultpath); + VIR_FREE(result_path); VIR_FREE(iommu_path); + VIR_FREE(dev_path); return vfio_path; } int -virMediatedDeviceGetIOMMUGroupNum(virMediatedDevicePtr dev) +virMediatedDeviceGetIOMMUGroupNum(const char *uuidstr) { char *vfio_path = NULL; char *group_num_str = NULL; unsigned int group_num = -1; - if (!(vfio_path = virMediatedDeviceGetIOMMUGroupDev(dev))) + if (!(vfio_path = virMediatedDeviceGetIOMMUGroupDev(uuidstr))) return -1; group_num_str = last_component(vfio_path); diff --git a/src/util/virmdev.h b/src/util/virmdev.h index 8bb46b9c5..0b8e830f4 100644 --- a/src/util/virmdev.h +++ b/src/util/virmdev.h @@ -65,10 +65,10 @@ virMediatedDeviceSetUsedBy(virMediatedDevicePtr dev, const char *domname); char * -virMediatedDeviceGetIOMMUGroupDev(virMediatedDevicePtr dev); +virMediatedDeviceGetIOMMUGroupDev(const char *uuidstr); int -virMediatedDeviceGetIOMMUGroupNum(virMediatedDevicePtr dev); +virMediatedDeviceGetIOMMUGroupNum(const char *uuidstr); char * virMediatedDeviceGetSysfsPath(const char *uuidstr); -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
Namely, this patch is about virMediatedDeviceGetIOMMUGroup{Dev,Num} functions. There's no compelling reason why these functions should take an object, on the contrary, having to create an object every time one needs to query the IOMMU group number, discarding the object afterwards, seems odd.
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- src/qemu/qemu_domain.c | 8 +------- src/security/security_apparmor.c | 10 +--------- src/security/security_dac.c | 20 ++------------------ src/security/security_selinux.c | 20 ++------------------ src/util/virmdev.c | 21 +++++++++++++-------- src/util/virmdev.h | 4 ++-- 6 files changed, 21 insertions(+), 62 deletions(-)
Reviewed-by: John Ferlan <jferlan@redhat.com> John

Since there's at least SRIOV and MDEV sub-capabilities to be parsed, let's make the code more readable by splitting it to several logical blocks. Signed-off-by: Erik Skultety <eskultet@redhat.com> --- src/conf/node_device_conf.c | 142 ++++++++++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 59 deletions(-) diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 85cfd8396..7aab2e03c 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -1286,76 +1286,102 @@ virPCIEDeviceInfoParseXML(xmlXPathContextPtr ctxt, static int +virNodeDevPCICapSRIOVPhysicalParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + xmlNodePtr address = virXPathNode("./address[1]", ctxt); + + if (VIR_ALLOC(pci_dev->physical_function) < 0) + return -1; + + if (!address) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing address in 'phys_function' capability")); + return -1; + } + + if (virPCIDeviceAddressParseXML(address, + pci_dev->physical_function) < 0) + return -1; + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION; + + return 0; +} + + +static int +virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + int ret = -1; + xmlNodePtr *addresses = NULL; + int naddresses = virXPathNodeSet("./address", ctxt, &addresses); + char *maxFuncsStr = virXPathString("string(./@maxCount)", ctxt); + size_t i; + + if (naddresses < 0) + goto cleanup; + + if (maxFuncsStr && + virStrToLong_uip(maxFuncsStr, NULL, 10, + &pci_dev->max_virtual_functions) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Malformed 'maxCount' parameter")); + goto cleanup; + } + + if (VIR_ALLOC_N(pci_dev->virtual_functions, naddresses) < 0) + goto cleanup; + + for (i = 0; i < naddresses; i++) { + virPCIDeviceAddressPtr addr = NULL; + + if (VIR_ALLOC(addr) < 0) + goto cleanup; + + if (virPCIDeviceAddressParseXML(addresses[i], addr) < 0) { + VIR_FREE(addr); + goto cleanup; + } + + if (VIR_APPEND_ELEMENT(pci_dev->virtual_functions, + pci_dev->num_virtual_functions, + addr) < 0) + goto cleanup; + } + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION; + ret = 0; + cleanup: + VIR_FREE(addresses); + VIR_FREE(maxFuncsStr); + return ret; +} + + +static int virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, virNodeDevCapPCIDevPtr pci_dev) { - char *maxFuncsStr = virXMLPropString(node, "maxCount"); char *type = virXMLPropString(node, "type"); - xmlNodePtr *addresses = NULL; xmlNodePtr orignode = ctxt->node; int ret = -1; - size_t i = 0; ctxt->node = node; if (!type) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Missing capability type")); - goto out; + goto cleanup; } - if (STREQ(type, "phys_function")) { - xmlNodePtr address = virXPathNode("./address[1]", ctxt); - - if (VIR_ALLOC(pci_dev->physical_function) < 0) - goto out; - - if (!address) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Missing address in 'phys_function' capability")); - goto out; - } - - if (virPCIDeviceAddressParseXML(address, - pci_dev->physical_function) < 0) - goto out; - - pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION; - } else if (STREQ(type, "virt_functions")) { - int naddresses; - - if ((naddresses = virXPathNodeSet("./address", ctxt, &addresses)) < 0) - goto out; - - if (maxFuncsStr && - virStrToLong_uip(maxFuncsStr, NULL, 10, - &pci_dev->max_virtual_functions) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Malformed 'maxCount' parameter")); - goto out; - } - - if (VIR_ALLOC_N(pci_dev->virtual_functions, naddresses) < 0) - goto out; - - for (i = 0; i < naddresses; i++) { - virPCIDeviceAddressPtr addr = NULL; - - if (VIR_ALLOC(addr) < 0) - goto out; - - if (virPCIDeviceAddressParseXML(addresses[i], addr) < 0) { - VIR_FREE(addr); - goto out; - } - - if (VIR_APPEND_ELEMENT(pci_dev->virtual_functions, - pci_dev->num_virtual_functions, - addr) < 0) - goto out; - } - - pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION; + if (STREQ(type, "phys_function") && + virNodeDevPCICapSRIOVPhysicalParseXML(ctxt, pci_dev) < 0) { + goto cleanup; + } else if (STREQ(type, "virt_functions") && + virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) { + goto cleanup; } else { int hdrType = virPCIHeaderTypeFromString(type); @@ -1364,9 +1390,7 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, } ret = 0; - out: - VIR_FREE(addresses); - VIR_FREE(maxFuncsStr); + cleanup: VIR_FREE(type); ctxt->node = orignode; return ret; -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
Since there's at least SRIOV and MDEV sub-capabilities to be parsed, let's make the code more readable by splitting it to several logical blocks.
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- src/conf/node_device_conf.c | 142 ++++++++++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 59 deletions(-)
Reviewed-by: John Ferlan <jferlan@redhat.com> John

The reason for introducing two capabilities, one for the device itself (cap 'mdev') and one for the parent device listing the available types ('mdev_types'), is that we should be able to do 'virsh nodedev-list --cap' not only for existing mdev devices but also for devices that support creation of mdev devices, since one day libvirt might be actually able to create the mdev devices in an automated way (just like we do for NPIV/vHBA). Signed-off-by: Erik Skultety <eskultet@redhat.com> --- include/libvirt/libvirt-nodedev.h | 2 ++ src/conf/node_device_conf.c | 10 +++++++++- src/conf/node_device_conf.h | 6 +++++- src/conf/virnodedeviceobj.c | 4 +++- src/libvirt-nodedev.c | 2 ++ src/node_device/node_device_driver.c | 2 ++ src/node_device/node_device_udev.c | 3 +++ tools/virsh-nodedev.c | 6 ++++++ 8 files changed, 32 insertions(+), 3 deletions(-) diff --git a/include/libvirt/libvirt-nodedev.h b/include/libvirt/libvirt-nodedev.h index 85003903d..1e3043787 100644 --- a/include/libvirt/libvirt-nodedev.h +++ b/include/libvirt/libvirt-nodedev.h @@ -79,6 +79,8 @@ typedef enum { VIR_CONNECT_LIST_NODE_DEVICES_CAP_VPORTS = 1 << 10, /* Capable of vport */ VIR_CONNECT_LIST_NODE_DEVICES_CAP_SCSI_GENERIC = 1 << 11, /* Capable of scsi_generic */ VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM = 1 << 12, /* DRM device */ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV_TYPES = 1 << 13, /* Capable of mediated devices */ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV = 1 << 14, /* Mediated device */ } virConnectListAllNodeDeviceFlags; int virConnectListAllNodeDevices (virConnectPtr conn, diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 7aab2e03c..40d71f277 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -60,7 +60,9 @@ VIR_ENUM_IMPL(virNodeDevCap, VIR_NODE_DEV_CAP_LAST, "fc_host", "vports", "scsi_generic", - "drm") + "drm", + "mdev_types", + "mdev") VIR_ENUM_IMPL(virNodeDevNetCap, VIR_NODE_DEV_CAP_NET_LAST, "80203", @@ -540,6 +542,8 @@ virNodeDeviceDefFormat(const virNodeDeviceDef *def) case VIR_NODE_DEV_CAP_DRM: virBufferEscapeString(&buf, "<type>%s</type>\n", virNodeDevDRMTypeToString(data->drm.type)); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: case VIR_NODE_DEV_CAP_LAST: @@ -1612,6 +1616,8 @@ virNodeDevCapsDefParseXML(xmlXPathContextPtr ctxt, case VIR_NODE_DEV_CAP_DRM: ret = virNodeDevCapDRMParseXML(ctxt, def, node, &caps->data.drm); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: case VIR_NODE_DEV_CAP_SCSI_GENERIC: @@ -1930,6 +1936,8 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) case VIR_NODE_DEV_CAP_SCSI_GENERIC: VIR_FREE(data->sg.path); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_DRM: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index a5d5cdd2a..273d49f76 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -64,6 +64,8 @@ typedef enum { VIR_NODE_DEV_CAP_VPORTS, /* HBA which is capable of vports */ VIR_NODE_DEV_CAP_SCSI_GENERIC, /* SCSI generic device */ VIR_NODE_DEV_CAP_DRM, /* DRM device */ + VIR_NODE_DEV_CAP_MDEV_TYPES, /* Device capable of mediated devices */ + VIR_NODE_DEV_CAP_MDEV, /* Mediated device */ VIR_NODE_DEV_CAP_LAST } virNodeDevCapType; @@ -351,7 +353,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps); VIR_CONNECT_LIST_NODE_DEVICES_CAP_FC_HOST | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_VPORTS | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_SCSI_GENERIC | \ - VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM) + VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM | \ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV_TYPES | \ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV) char * virNodeDeviceGetParentName(virConnectPtr conn, diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c index 4f47b4e41..181d2efe1 100644 --- a/src/conf/virnodedeviceobj.c +++ b/src/conf/virnodedeviceobj.c @@ -550,7 +550,9 @@ virNodeDeviceMatch(virNodeDeviceObjPtr devobj, MATCH(FC_HOST) || MATCH(VPORTS) || MATCH(SCSI_GENERIC) || - MATCH(DRM))) + MATCH(DRM) || + MATCH(MDEV_TYPES) || + MATCH(MDEV))) return false; } diff --git a/src/libvirt-nodedev.c b/src/libvirt-nodedev.c index 83376b0d9..44e2b4efd 100644 --- a/src/libvirt-nodedev.c +++ b/src/libvirt-nodedev.c @@ -98,6 +98,8 @@ virNodeNumOfDevices(virConnectPtr conn, const char *cap, unsigned int flags) * VIR_CONNECT_LIST_NODE_DEVICES_CAP_VPORTS * VIR_CONNECT_LIST_NODE_DEVICES_CAP_SCSI_GENERIC * VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM + * VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV_TYPES + * VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV * * Returns the number of node devices found or -1 and sets @devices to NULL in * case of error. On success, the array stored into @devices is guaranteed to diff --git a/src/node_device/node_device_driver.c b/src/node_device/node_device_driver.c index c3997c922..ba3da6288 100644 --- a/src/node_device/node_device_driver.c +++ b/src/node_device/node_device_driver.c @@ -82,6 +82,8 @@ static int update_caps(virNodeDeviceObjPtr dev) case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: case VIR_NODE_DEV_CAP_SCSI_GENERIC: + case VIR_NODE_DEV_CAP_MDEV_TYPES: + case VIR_NODE_DEV_CAP_MDEV: case VIR_NODE_DEV_CAP_LAST: break; } diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 6e706a10b..1ddb55c80 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -43,6 +43,7 @@ #include "virpci.h" #include "virstring.h" #include "virnetdev.h" +#include "virmdev.h" #define VIR_FROM_THIS VIR_FROM_NODEDEV @@ -1060,6 +1061,8 @@ static int udevGetDeviceDetails(struct udev_device *device, return udevProcessSCSIGeneric(device, def); case VIR_NODE_DEV_CAP_DRM: return udevProcessDRMDevice(device, def); + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_SYSTEM: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: diff --git a/tools/virsh-nodedev.c b/tools/virsh-nodedev.c index c69144021..ad96dda1f 100644 --- a/tools/virsh-nodedev.c +++ b/tools/virsh-nodedev.c @@ -454,6 +454,12 @@ cmdNodeListDevices(vshControl *ctl, const vshCmd *cmd ATTRIBUTE_UNUSED) case VIR_NODE_DEV_CAP_DRM: flags |= VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM; break; + case VIR_NODE_DEV_CAP_MDEV_TYPES: + flags |= VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV_TYPES; + break; + case VIR_NODE_DEV_CAP_MDEV: + flags |= VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV; + break; case VIR_NODE_DEV_CAP_LAST: break; } -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
The reason for introducing two capabilities, one for the device itself (cap 'mdev') and one for the parent device listing the available types ('mdev_types'), is that we should be able to do 'virsh nodedev-list --cap' not only for existing mdev devices but also for devices that support creation of mdev devices, since one day libvirt might be actually able to create the mdev devices in an automated way (just like we do for NPIV/vHBA).
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- include/libvirt/libvirt-nodedev.h | 2 ++ src/conf/node_device_conf.c | 10 +++++++++- src/conf/node_device_conf.h | 6 +++++- src/conf/virnodedeviceobj.c | 4 +++- src/libvirt-nodedev.c | 2 ++ src/node_device/node_device_driver.c | 2 ++ src/node_device/node_device_udev.c | 3 +++ tools/virsh-nodedev.c | 6 ++++++ 8 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/include/libvirt/libvirt-nodedev.h b/include/libvirt/libvirt-nodedev.h index 85003903d..1e3043787 100644 --- a/include/libvirt/libvirt-nodedev.h +++ b/include/libvirt/libvirt-nodedev.h @@ -79,6 +79,8 @@ typedef enum { VIR_CONNECT_LIST_NODE_DEVICES_CAP_VPORTS = 1 << 10, /* Capable of vport */ VIR_CONNECT_LIST_NODE_DEVICES_CAP_SCSI_GENERIC = 1 << 11, /* Capable of scsi_generic */ VIR_CONNECT_LIST_NODE_DEVICES_CAP_DRM = 1 << 12, /* DRM device */ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV_TYPES = 1 << 13, /* Capable of mediated devices */ + VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV = 1 << 14, /* Mediated device */ } virConnectListAllNodeDeviceFlags;
int virConnectListAllNodeDevices (virConnectPtr conn, diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 7aab2e03c..40d71f277 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -60,7 +60,9 @@ VIR_ENUM_IMPL(virNodeDevCap, VIR_NODE_DEV_CAP_LAST, "fc_host", "vports", "scsi_generic", - "drm") + "drm", + "mdev_types", + "mdev")
Should I mention this is doing two things at one time ;-), nah, nevermind ;-)
VIR_ENUM_IMPL(virNodeDevNetCap, VIR_NODE_DEV_CAP_NET_LAST, "80203", @@ -540,6 +542,8 @@ virNodeDeviceDefFormat(const virNodeDeviceDef *def) case VIR_NODE_DEV_CAP_DRM: virBufferEscapeString(&buf, "<type>%s</type>\n", virNodeDevDRMTypeToString(data->drm.type)); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: case VIR_NODE_DEV_CAP_LAST: @@ -1612,6 +1616,8 @@ virNodeDevCapsDefParseXML(xmlXPathContextPtr ctxt, case VIR_NODE_DEV_CAP_DRM: ret = virNodeDevCapDRMParseXML(ctxt, def, node, &caps->data.drm); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: case VIR_NODE_DEV_CAP_SCSI_GENERIC: @@ -1930,6 +1936,8 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) case VIR_NODE_DEV_CAP_SCSI_GENERIC: VIR_FREE(data->sg.path); break; + case VIR_NODE_DEV_CAP_MDEV: + case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_DRM: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index a5d5cdd2a..273d49f76 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -64,6 +64,8 @@ typedef enum { VIR_NODE_DEV_CAP_VPORTS, /* HBA which is capable of vports */ VIR_NODE_DEV_CAP_SCSI_GENERIC, /* SCSI generic device */ VIR_NODE_DEV_CAP_DRM, /* DRM device */ + VIR_NODE_DEV_CAP_MDEV_TYPES, /* Device capable of mediated devices */ + VIR_NODE_DEV_CAP_MDEV, /* Mediated device */
Since the comments are aligned vertically, you should follow that too. When I look at these w/ vim or via gitk, it's obvious that the comments above are a mix of spaces and tabs to align them (sigh). Reviewed-by: John Ferlan <jferlan@redhat.com> John
VIR_NODE_DEV_CAP_LAST } virNodeDevCapType;
[...]

The parent device needs to report the generic stuff about the supported mediated devices types, like device API, available instances, type name, etc. Therefore this patch introduces a new nested capability element of type 'mdev_types' with the resulting XML of the following format: <device> ... <capability type='pci'> ... <capability type='mdev_types'> <type id='vendor_supplied_id'> <name>optional_vendor_supplied_codename</name> <deviceAPI>vfio-pci</deviceAPI> <availableInstances>NUM</availableInstances> </type> ... <type> ... </type> </capability> </capability> ... </device> Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 97 +++++++++++++++++ src/conf/node_device_conf.h | 15 +++ src/conf/virnodedeviceobj.c | 7 ++ src/libvirt_private.syms | 1 + src/node_device/node_device_udev.c | 119 +++++++++++++++++++++ .../pci_0000_02_10_7_mdev_types.xml | 32 ++++++ tests/nodedevxml2xmltest.c | 1 + 8 files changed, 298 insertions(+) create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 0f90a73c8..e0a2c5032 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -205,6 +205,32 @@ </optional> <optional> + <element name='capability'> + <attribute name='type'> + <value>mdev_types</value> + </attribute> + <oneOrMore> + <element name='type'> + <attribute name='id'> + <data type='string'/> + </attribute> + <optional> + <element name='name'><text/></element> + </optional> + <element name='deviceAPI'> + <choice> + <value>vfio-pci</value> + </choice> + </element> + <element name='availableInstances'> + <ref name='unsignedInt'/> + </element> + </element> + </oneOrMore> + </element> + </optional> + + <optional> <element name='iommuGroup'> <attribute name='number'> <ref name='unsignedInt'/> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 40d71f277..f26b1ffc7 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath, void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type) +{ + if (!type) + return; + + VIR_FREE(type->id); + VIR_FREE(type->name); + VIR_FREE(type->device_api); + VIR_FREE(type); +} + + +void virNodeDeviceDefFree(virNodeDeviceDefPtr def) { virNodeDevCapsDefPtr caps; @@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<capability type='%s'/>\n", virPCIHeaderTypeToString(data->pci_dev.hdrType)); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) { + virBufferAddLit(buf, "<capability type='mdev_types'>\n"); + virBufferAdjustIndent(buf, 2); + for (i = 0; i < data->pci_dev.nmdev_types; i++) { + virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i]; + virBufferEscapeString(buf, "<type id='%s'>\n", type->id); + virBufferAdjustIndent(buf, 2); + if (type->name) + virBufferEscapeString(buf, "<name>%s</name>\n", + type->name); + virBufferEscapeString(buf, "<deviceAPI>%s</deviceAPI>\n", + type->device_api); + virBufferAsprintf(buf, + "<availableInstances>%u</availableInstances>\n", + type->available_instances); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</type>\n"); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</capability>\n"); + } if (data->pci_dev.nIommuGroupDevices) { virBufferAsprintf(buf, "<iommuGroup number='%d'>\n", data->pci_dev.iommuGroupNumber); @@ -1365,6 +1399,63 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt, static int +virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + int ret = -1; + xmlNodePtr orignode = NULL; + xmlNodePtr *nodes = NULL; + int nmdev_types = virXPathNodeSet("./type", ctxt, &nodes); + virNodeDevCapMdevTypePtr type = NULL; + size_t i; + + orignode = ctxt->node; + for (i = 0; i < nmdev_types; i++) { + ctxt->node = nodes[i]; + + if (VIR_ALLOC(type) < 0) + goto cleanup; + + if (!(type->id = virXPathString("string(./@id[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing 'id' attribute for mediated device's " + "<type> element")); + goto cleanup; + } + + if (!(type->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, + _("missing device API for mediated device type '%s'"), + type->id); + goto cleanup; + } + + if (virXPathUInt("number(./availableInstances)", ctxt, + &type->available_instances) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("missing number of available instances for " + "mediated device type '%s'"), + type->id); + goto cleanup; + } + + type->name = virXPathString("string(./name)", ctxt); + + if (VIR_APPEND_ELEMENT(pci_dev->mdev_types, + pci_dev->nmdev_types, type) < 0) + goto cleanup; + } + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ret = 0; + cleanup: + virNodeDevCapMdevTypeFree(type); + ctxt->node = orignode; + return ret; +} + + +static int virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, virNodeDevCapPCIDevPtr pci_dev) @@ -1386,6 +1477,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, } else if (STREQ(type, "virt_functions") && virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) { goto cleanup; + } else if (STREQ(type, "mdev_types") && + virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) { + goto cleanup; } else { int hdrType = virPCIHeaderTypeFromString(type); @@ -1899,6 +1993,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) VIR_FREE(data->pci_dev.iommuGroupDevices[i]); VIR_FREE(data->pci_dev.iommuGroupDevices); virPCIEDeviceInfoFree(data->pci_dev.pci_express); + for (i = 0; i < data->pci_dev.nmdev_types; i++) + virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]); + VIR_FREE(data->pci_dev.mdev_types); break; case VIR_NODE_DEV_CAP_USB_DEV: VIR_FREE(data->usb_dev.product_name); diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 273d49f76..629f732e6 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -95,6 +95,7 @@ typedef enum { VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2), + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3), } virNodeDevPCICapFlags; typedef enum { @@ -133,6 +134,15 @@ struct _virNodeDevCapSystem { virNodeDevCapSystemFirmware firmware; }; +typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType; +typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr; +struct _virNodeDevCapMdevType { + char *id; + char *name; + char *device_api; + unsigned int available_instances; +}; + typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev; typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr; struct _virNodeDevCapPCIDev { @@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev { int numa_node; virPCIEDeviceInfoPtr pci_express; int hdrType; /* enum virPCIHeaderType or -1 */ + virNodeDevCapMdevTypePtr *mdev_types; + size_t nmdev_types; }; typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev; @@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def); void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps); +void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type); + # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \ (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \ diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c index 181d2efe1..fc5f3708f 100644 --- a/src/conf/virnodedeviceobj.c +++ b/src/conf/virnodedeviceobj.c @@ -468,6 +468,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj, VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS)) return true; } + + if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) { + if (type == VIR_NODE_DEV_CAP_MDEV_TYPES && + (cap->data.pci_dev.flags & + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV)) + return true; + } } return false; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bbe283529..a2a30bd58 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -668,6 +668,7 @@ virNetDevIPRouteParseXML; # conf/node_device_conf.h +virNodeDevCapMdevTypeFree; virNodeDevCapsDefFree; virNodeDevCapTypeFromString; virNodeDevCapTypeToString; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 1ddb55c80..4c75dec2b 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor, } +static int +udevFillMdevType(struct udev_device *device, + const char *dir, + virNodeDevCapMdevTypePtr type) +{ + int ret = -1; + char *attrpath = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + if (VIR_STRDUP(type->id, last_component(dir)) < 0) + goto cleanup; + + /* query udev for the attributes under subdirectories using the relative + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>' + */ + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name); + MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr, + &type->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + return ret; +} + + +static int +udevPCIGetMdevTypesCap(struct udev_device *device, + virNodeDevCapPCIDevPtr pcidata) +{ + int ret = -1; + int dirret = -1; + DIR *dir = NULL; + struct dirent *entry; + char *path = NULL; + char *tmppath = NULL; + virNodeDevCapMdevTypePtr type = NULL; + virNodeDevCapMdevTypePtr *types = NULL; + size_t ntypes = 0; + size_t i; + + if (virAsprintf(&path, "%s/mdev_supported_types", + udev_device_get_syspath(device)) < 0) + return -1; + + if ((dirret = virDirOpenIfExists(&dir, path)) < 0) + goto cleanup; + + if (dirret == 0) { + ret = 0; + goto cleanup; + } + + if (VIR_ALLOC(types) < 0) + goto cleanup; + + /* UDEV doesn't report attributes under subdirectories by default but is + * able to query them if the path to the attribute is relative to the + * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's + * base path as udev reports it, but we're interested in attributes under + * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need to + * scan the subdirectories ourselves. + */ + while ((dirret = virDirRead(dir, &entry, path)) > 0) { + if (VIR_ALLOC(type) < 0) + goto cleanup; + + /* construct the relative mdev type path bit for udev */ + if (virAsprintf(&tmppath, "mdev_supported_types/%s", entry->d_name) < 0) + goto cleanup; + + if (udevFillMdevType(device, tmppath, type) < 0) + goto cleanup; + + if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0) + goto cleanup; + + VIR_FREE(tmppath); + } + + if (dirret < 0) + goto cleanup; + + VIR_STEAL_PTR(pcidata->mdev_types, types); + pcidata->nmdev_types = ntypes; + pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ntypes = 0; + ret = 0; + cleanup: + virNodeDevCapMdevTypeFree(type); + for (i = 0; i < ntypes; i++) + virNodeDevCapMdevTypeFree(types[i]); + VIR_FREE(types); + VIR_FREE(path); + VIR_FREE(tmppath); + VIR_DIR_CLOSE(dir); + return ret; +} + + static int udevProcessPCI(struct udev_device *device, virNodeDeviceDefPtr def) { @@ -400,6 +513,12 @@ static int udevProcessPCI(struct udev_device *device, } } + /* check whether the device is mediated devices framework capable, if so, + * process it + */ + if (udevPCIGetMdevTypesCap(device, pci_dev) < 0) + goto cleanup; + ret = 0; cleanup: diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml new file mode 100644 index 000000000..a2d57569a --- /dev/null +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml @@ -0,0 +1,32 @@ +<device> + <name>pci_0000_02_10_7</name> + <parent>pci_0000_00_04_0</parent> + <capability type='pci'> + <domain>0</domain> + <bus>2</bus> + <slot>16</slot> + <function>7</function> + <product id='0x10ca'>82576 Virtual Function</product> + <vendor id='0x8086'>Intel Corporation</vendor> + <capability type='mdev_types'> + <type id='foo1'> + <name>bar1</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>1</availableInstances> + </type> + <type id='foo2'> + <name>bar2</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>2</availableInstances> + </type> + </capability> + <iommuGroup number='31'> + <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/> + </iommuGroup> + <numa node='0'/> + <pci-express> + <link validity='cap' port='0' speed='2.5' width='4'/> + <link validity='sta' width='0'/> + </pci-express> + </capability> +</device> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c index f023d8a13..e3a77646c 100644 --- a/tests/nodedevxml2xmltest.c +++ b/tests/nodedevxml2xmltest.c @@ -101,6 +101,7 @@ mymain(void) DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all"); DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type"); DO_TEST("drm_renderD129"); + DO_TEST("pci_0000_02_10_7_mdev_types"); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
The parent device needs to report the generic stuff about the supported mediated devices types, like device API, available instances, type name, etc. Therefore this patch introduces a new nested capability element of type 'mdev_types' with the resulting XML of the following format:
<device> ... <capability type='pci'> ... <capability type='mdev_types'> <type id='vendor_supplied_id'> <name>optional_vendor_supplied_codename</name> <deviceAPI>vfio-pci</deviceAPI> <availableInstances>NUM</availableInstances> </type> ... <type> ... </type> </capability> </capability> ... </device>
I haven't followed all the discussions that closely - so a comment from the peanut gallery... Since mdev_types is a sub-capability of type='pci', does the "vfio-pci" feel redundant? Ok just the -pci part...
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 97 +++++++++++++++++ src/conf/node_device_conf.h | 15 +++ src/conf/virnodedeviceobj.c | 7 ++ src/libvirt_private.syms | 1 + src/node_device/node_device_udev.c | 119 +++++++++++++++++++++ .../pci_0000_02_10_7_mdev_types.xml | 32 ++++++ tests/nodedevxml2xmltest.c | 1 + 8 files changed, 298 insertions(+) create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 0f90a73c8..e0a2c5032 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -205,6 +205,32 @@ </optional>
<optional> + <element name='capability'> + <attribute name='type'> + <value>mdev_types</value> + </attribute> + <oneOrMore> + <element name='type'> + <attribute name='id'> + <data type='string'/> + </attribute> + <optional> + <element name='name'><text/></element> + </optional> + <element name='deviceAPI'> + <choice> + <value>vfio-pci</value> + </choice> + </element> + <element name='availableInstances'> + <ref name='unsignedInt'/> + </element> + </element> + </oneOrMore> + </element> + </optional> + + <optional> <element name='iommuGroup'> <attribute name='number'> <ref name='unsignedInt'/> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 40d71f277..f26b1ffc7 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath,
void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type) +{ + if (!type) + return; + + VIR_FREE(type->id); + VIR_FREE(type->name); + VIR_FREE(type->device_api); + VIR_FREE(type); +} + + +void virNodeDeviceDefFree(virNodeDeviceDefPtr def) { virNodeDevCapsDefPtr caps; @@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<capability type='%s'/>\n", virPCIHeaderTypeToString(data->pci_dev.hdrType)); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) { + virBufferAddLit(buf, "<capability type='mdev_types'>\n"); + virBufferAdjustIndent(buf, 2); + for (i = 0; i < data->pci_dev.nmdev_types; i++) { + virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i]; + virBufferEscapeString(buf, "<type id='%s'>\n", type->id); + virBufferAdjustIndent(buf, 2); + if (type->name) + virBufferEscapeString(buf, "<name>%s</name>\n", + type->name); + virBufferEscapeString(buf, "<deviceAPI>%s</deviceAPI>\n", + type->device_api); + virBufferAsprintf(buf, + "<availableInstances>%u</availableInstances>\n", + type->available_instances); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</type>\n"); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</capability>\n"); + } if (data->pci_dev.nIommuGroupDevices) { virBufferAsprintf(buf, "<iommuGroup number='%d'>\n", data->pci_dev.iommuGroupNumber); @@ -1365,6 +1399,63 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt,
static int +virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + int ret = -1; + xmlNodePtr orignode = NULL; + xmlNodePtr *nodes = NULL; + int nmdev_types = virXPathNodeSet("./type", ctxt, &nodes);
Coverity lets me know that virXPathNodeSet allocates memory into @nodes, but we never clean it up before leaving. Coverity also is proud to announce that nmdev_types could be < 0 which would really mess up the following for loop!
+ virNodeDevCapMdevTypePtr type = NULL; + size_t i;
I would just go with: if ((nmdev_types = virXPathNodeSet("./type", ctxt, &nodes)) < 0) goto cleanup;
+ + orignode = ctxt->node; + for (i = 0; i < nmdev_types; i++) { + ctxt->node = nodes[i]; + + if (VIR_ALLOC(type) < 0) + goto cleanup; + + if (!(type->id = virXPathString("string(./@id[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing 'id' attribute for mediated device's " + "<type> element")); + goto cleanup; + } + + if (!(type->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, + _("missing device API for mediated device type '%s'"), + type->id); + goto cleanup; + } + + if (virXPathUInt("number(./availableInstances)", ctxt, + &type->available_instances) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("missing number of available instances for " + "mediated device type '%s'"), + type->id); + goto cleanup; + } + + type->name = virXPathString("string(./name)", ctxt); + + if (VIR_APPEND_ELEMENT(pci_dev->mdev_types, + pci_dev->nmdev_types, type) < 0) + goto cleanup; + } + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ret = 0; + cleanup:
VIR_FREE(nodes);
+ virNodeDevCapMdevTypeFree(type); + ctxt->node = orignode; + return ret; +} + + +static int virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, virNodeDevCapPCIDevPtr pci_dev) @@ -1386,6 +1477,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, } else if (STREQ(type, "virt_functions") && virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) { goto cleanup; + } else if (STREQ(type, "mdev_types") && + virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) { + goto cleanup; } else { int hdrType = virPCIHeaderTypeFromString(type);
@@ -1899,6 +1993,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) VIR_FREE(data->pci_dev.iommuGroupDevices[i]); VIR_FREE(data->pci_dev.iommuGroupDevices); virPCIEDeviceInfoFree(data->pci_dev.pci_express); + for (i = 0; i < data->pci_dev.nmdev_types; i++) + virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]); + VIR_FREE(data->pci_dev.mdev_types); break; case VIR_NODE_DEV_CAP_USB_DEV: VIR_FREE(data->usb_dev.product_name); diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 273d49f76..629f732e6 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -95,6 +95,7 @@ typedef enum { VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2), + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3), } virNodeDevPCICapFlags;
typedef enum { @@ -133,6 +134,15 @@ struct _virNodeDevCapSystem { virNodeDevCapSystemFirmware firmware; };
+typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType; +typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr; +struct _virNodeDevCapMdevType { + char *id; + char *name; + char *device_api; + unsigned int available_instances; +}; + typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev; typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr; struct _virNodeDevCapPCIDev { @@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev { int numa_node; virPCIEDeviceInfoPtr pci_express; int hdrType; /* enum virPCIHeaderType or -1 */ + virNodeDevCapMdevTypePtr *mdev_types; + size_t nmdev_types; };
typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev; @@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def); void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
+void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type); + # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \ (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \ diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c index 181d2efe1..fc5f3708f 100644 --- a/src/conf/virnodedeviceobj.c +++ b/src/conf/virnodedeviceobj.c @@ -468,6 +468,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj, VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS)) return true; } + + if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) { + if (type == VIR_NODE_DEV_CAP_MDEV_TYPES && + (cap->data.pci_dev.flags & + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV)) + return true; + } }
return false;
Something that I had to do with VPORTS in the past tells me you may need to also modify virNodeDeviceObjHasCap with similar code. This only matches for the virNodeDeviceObjListExport API... Aha! found it, see commit id 'e8fcac8ec'
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bbe283529..a2a30bd58 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -668,6 +668,7 @@ virNetDevIPRouteParseXML;
# conf/node_device_conf.h +virNodeDevCapMdevTypeFree; virNodeDevCapsDefFree; virNodeDevCapTypeFromString; virNodeDevCapTypeToString; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 1ddb55c80..4c75dec2b 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor, }
+static int +udevFillMdevType(struct udev_device *device, + const char *dir, + virNodeDevCapMdevTypePtr type) +{ + int ret = -1; + char *attrpath = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + if (VIR_STRDUP(type->id, last_component(dir)) < 0) + goto cleanup; + + /* query udev for the attributes under subdirectories using the relative + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>' + */ + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
Does this imply that name isn't necessarily optional as defined in RNG? Can '@name' not exist and if it is optional how does that adjust the macro?
+ MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr, + &type->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + return ret; +} +
With changes to 1. fix the Coverity issues 2. determine whether the virNodeDeviceObjHasCap change is needed 3. address the optional name You have my: Reviewed-by: John Ferlan <jferlan@redhat.com> While I'm not a fan of 'deviceAPI' John
+ +static int +udevPCIGetMdevTypesCap(struct udev_device *device, + virNodeDevCapPCIDevPtr pcidata) +{ + int ret = -1; + int dirret = -1; + DIR *dir = NULL; + struct dirent *entry; + char *path = NULL; + char *tmppath = NULL; + virNodeDevCapMdevTypePtr type = NULL; + virNodeDevCapMdevTypePtr *types = NULL; + size_t ntypes = 0; + size_t i; + + if (virAsprintf(&path, "%s/mdev_supported_types", + udev_device_get_syspath(device)) < 0) + return -1; + + if ((dirret = virDirOpenIfExists(&dir, path)) < 0) + goto cleanup; + + if (dirret == 0) { + ret = 0; + goto cleanup; + } + + if (VIR_ALLOC(types) < 0) + goto cleanup; + + /* UDEV doesn't report attributes under subdirectories by default but is + * able to query them if the path to the attribute is relative to the + * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's + * base path as udev reports it, but we're interested in attributes under + * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need to + * scan the subdirectories ourselves. + */ + while ((dirret = virDirRead(dir, &entry, path)) > 0) { + if (VIR_ALLOC(type) < 0) + goto cleanup; + + /* construct the relative mdev type path bit for udev */ + if (virAsprintf(&tmppath, "mdev_supported_types/%s", entry->d_name) < 0) + goto cleanup; + + if (udevFillMdevType(device, tmppath, type) < 0) + goto cleanup; + + if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0) + goto cleanup; + + VIR_FREE(tmppath); + } + + if (dirret < 0) + goto cleanup; + + VIR_STEAL_PTR(pcidata->mdev_types, types); + pcidata->nmdev_types = ntypes; + pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ntypes = 0; + ret = 0; + cleanup: + virNodeDevCapMdevTypeFree(type); + for (i = 0; i < ntypes; i++) + virNodeDevCapMdevTypeFree(types[i]); + VIR_FREE(types); + VIR_FREE(path); + VIR_FREE(tmppath); + VIR_DIR_CLOSE(dir); + return ret; +} + + static int udevProcessPCI(struct udev_device *device, virNodeDeviceDefPtr def) { @@ -400,6 +513,12 @@ static int udevProcessPCI(struct udev_device *device, } }
+ /* check whether the device is mediated devices framework capable, if so, + * process it + */ + if (udevPCIGetMdevTypesCap(device, pci_dev) < 0) + goto cleanup; + ret = 0;
cleanup: diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml new file mode 100644 index 000000000..a2d57569a --- /dev/null +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml @@ -0,0 +1,32 @@ +<device> + <name>pci_0000_02_10_7</name> + <parent>pci_0000_00_04_0</parent> + <capability type='pci'> + <domain>0</domain> + <bus>2</bus> + <slot>16</slot> + <function>7</function> + <product id='0x10ca'>82576 Virtual Function</product> + <vendor id='0x8086'>Intel Corporation</vendor> + <capability type='mdev_types'> + <type id='foo1'> + <name>bar1</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>1</availableInstances> + </type> + <type id='foo2'> + <name>bar2</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>2</availableInstances> + </type> + </capability> + <iommuGroup number='31'> + <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/> + </iommuGroup> + <numa node='0'/> + <pci-express> + <link validity='cap' port='0' speed='2.5' width='4'/> + <link validity='sta' width='0'/> + </pci-express> + </capability> +</device> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c index f023d8a13..e3a77646c 100644 --- a/tests/nodedevxml2xmltest.c +++ b/tests/nodedevxml2xmltest.c @@ -101,6 +101,7 @@ mymain(void) DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all"); DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type"); DO_TEST("drm_renderD129"); + DO_TEST("pci_0000_02_10_7_mdev_types");
return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; }

On Wed, May 17, 2017 at 05:22:45PM -0400, John Ferlan wrote:
On 05/15/2017 08:10 AM, Erik Skultety wrote:
The parent device needs to report the generic stuff about the supported mediated devices types, like device API, available instances, type name, etc. Therefore this patch introduces a new nested capability element of type 'mdev_types' with the resulting XML of the following format:
<device> ... <capability type='pci'> ... <capability type='mdev_types'> <type id='vendor_supplied_id'> <name>optional_vendor_supplied_codename</name> <deviceAPI>vfio-pci</deviceAPI> <availableInstances>NUM</availableInstances> </type> ... <type> ... </type> </capability> </capability> ... </device>
I haven't followed all the discussions that closely - so a comment from the peanut gallery... Since mdev_types is a sub-capability of type='pci', does the "vfio-pci" feel redundant? Ok just the -pci part...
Well, that would be mangling of the value we query from the sysfs and that would again bring us to the point where people would rely on it. IMHO we should expose the values verbatim as we find it under sysfs, do we actually do adjust the attribute values after querying them from sysfs?? Anyhow, during one of the VFIO meetings we've also discussed a possibility of someone coming up with vfio-pci v2 backend (though unlikely), in that aspect, you really want to know what is the exact device API supported by that mdev type (I have another unlikely and extremely 'visionary' scenario to support my argument, but I'd rather keep it for myself :)).
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 97 +++++++++++++++++ src/conf/node_device_conf.h | 15 +++ src/conf/virnodedeviceobj.c | 7 ++ src/libvirt_private.syms | 1 + src/node_device/node_device_udev.c | 119 +++++++++++++++++++++ .../pci_0000_02_10_7_mdev_types.xml | 32 ++++++ tests/nodedevxml2xmltest.c | 1 + 8 files changed, 298 insertions(+) create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 0f90a73c8..e0a2c5032 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -205,6 +205,32 @@ </optional>
<optional> + <element name='capability'> + <attribute name='type'> + <value>mdev_types</value> + </attribute> + <oneOrMore> + <element name='type'> + <attribute name='id'> + <data type='string'/> + </attribute> + <optional> + <element name='name'><text/></element> + </optional> + <element name='deviceAPI'> + <choice> + <value>vfio-pci</value> + </choice> + </element> + <element name='availableInstances'> + <ref name='unsignedInt'/> + </element> + </element> + </oneOrMore> + </element> + </optional> + + <optional> <element name='iommuGroup'> <attribute name='number'> <ref name='unsignedInt'/> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index 40d71f277..f26b1ffc7 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath,
void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type) +{ + if (!type) + return; + + VIR_FREE(type->id); + VIR_FREE(type->name); + VIR_FREE(type->device_api); + VIR_FREE(type); +} + + +void virNodeDeviceDefFree(virNodeDeviceDefPtr def) { virNodeDevCapsDefPtr caps; @@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<capability type='%s'/>\n", virPCIHeaderTypeToString(data->pci_dev.hdrType)); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) { + virBufferAddLit(buf, "<capability type='mdev_types'>\n"); + virBufferAdjustIndent(buf, 2); + for (i = 0; i < data->pci_dev.nmdev_types; i++) { + virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i]; + virBufferEscapeString(buf, "<type id='%s'>\n", type->id); + virBufferAdjustIndent(buf, 2); + if (type->name) + virBufferEscapeString(buf, "<name>%s</name>\n", + type->name); + virBufferEscapeString(buf, "<deviceAPI>%s</deviceAPI>\n", + type->device_api); + virBufferAsprintf(buf, + "<availableInstances>%u</availableInstances>\n", + type->available_instances); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</type>\n"); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</capability>\n"); + } if (data->pci_dev.nIommuGroupDevices) { virBufferAsprintf(buf, "<iommuGroup number='%d'>\n", data->pci_dev.iommuGroupNumber); @@ -1365,6 +1399,63 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr ctxt,
static int +virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + int ret = -1; + xmlNodePtr orignode = NULL; + xmlNodePtr *nodes = NULL; + int nmdev_types = virXPathNodeSet("./type", ctxt, &nodes);
Coverity lets me know that virXPathNodeSet allocates memory into @nodes, but we never clean it up before leaving.
Coverity also is proud to announce that nmdev_types could be < 0 which would really mess up the following for loop!
Oops, didn't notice that one.
+ virNodeDevCapMdevTypePtr type = NULL; + size_t i;
I would just go with:
if ((nmdev_types = virXPathNodeSet("./type", ctxt, &nodes)) < 0) goto cleanup;
+ + orignode = ctxt->node; + for (i = 0; i < nmdev_types; i++) { + ctxt->node = nodes[i]; + + if (VIR_ALLOC(type) < 0) + goto cleanup; + + if (!(type->id = virXPathString("string(./@id[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing 'id' attribute for mediated device's " + "<type> element")); + goto cleanup; + } + + if (!(type->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, + _("missing device API for mediated device type '%s'"), + type->id); + goto cleanup; + } + + if (virXPathUInt("number(./availableInstances)", ctxt, + &type->available_instances) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("missing number of available instances for " + "mediated device type '%s'"), + type->id); + goto cleanup; + } + + type->name = virXPathString("string(./name)", ctxt); + + if (VIR_APPEND_ELEMENT(pci_dev->mdev_types, + pci_dev->nmdev_types, type) < 0) + goto cleanup; + } + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ret = 0; + cleanup:
VIR_FREE(nodes);
+ virNodeDevCapMdevTypeFree(type); + ctxt->node = orignode; + return ret; +} + + +static int virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, virNodeDevCapPCIDevPtr pci_dev) @@ -1386,6 +1477,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, } else if (STREQ(type, "virt_functions") && virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) { goto cleanup; + } else if (STREQ(type, "mdev_types") && + virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) { + goto cleanup; } else { int hdrType = virPCIHeaderTypeFromString(type);
@@ -1899,6 +1993,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) VIR_FREE(data->pci_dev.iommuGroupDevices[i]); VIR_FREE(data->pci_dev.iommuGroupDevices); virPCIEDeviceInfoFree(data->pci_dev.pci_express); + for (i = 0; i < data->pci_dev.nmdev_types; i++) + virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]); + VIR_FREE(data->pci_dev.mdev_types); break; case VIR_NODE_DEV_CAP_USB_DEV: VIR_FREE(data->usb_dev.product_name); diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 273d49f76..629f732e6 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -95,6 +95,7 @@ typedef enum { VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2), + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3), } virNodeDevPCICapFlags;
typedef enum { @@ -133,6 +134,15 @@ struct _virNodeDevCapSystem { virNodeDevCapSystemFirmware firmware; };
+typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType; +typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr; +struct _virNodeDevCapMdevType { + char *id; + char *name; + char *device_api; + unsigned int available_instances; +}; + typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev; typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr; struct _virNodeDevCapPCIDev { @@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev { int numa_node; virPCIEDeviceInfoPtr pci_express; int hdrType; /* enum virPCIHeaderType or -1 */ + virNodeDevCapMdevTypePtr *mdev_types; + size_t nmdev_types; };
typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev; @@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def); void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
+void +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type); + # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \ (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \ diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c index 181d2efe1..fc5f3708f 100644 --- a/src/conf/virnodedeviceobj.c +++ b/src/conf/virnodedeviceobj.c @@ -468,6 +468,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj, VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS)) return true; } + + if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) { + if (type == VIR_NODE_DEV_CAP_MDEV_TYPES && + (cap->data.pci_dev.flags & + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV)) + return true; + } }
return false;
Something that I had to do with VPORTS in the past tells me you may need to also modify virNodeDeviceObjHasCap with similar code.
This only matches for the virNodeDeviceObjListExport API...
Aha! found it, see commit id 'e8fcac8ec'
I'll adjust that, again one of those places where we handle the very same thing on 2 places with almost identical code which makes it quite hard to track. I'll send a follow-up patch to condense this to one place.
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bbe283529..a2a30bd58 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -668,6 +668,7 @@ virNetDevIPRouteParseXML;
# conf/node_device_conf.h +virNodeDevCapMdevTypeFree; virNodeDevCapsDefFree; virNodeDevCapTypeFromString; virNodeDevCapTypeToString; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 1ddb55c80..4c75dec2b 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor, }
+static int +udevFillMdevType(struct udev_device *device, + const char *dir, + virNodeDevCapMdevTypePtr type) +{ + int ret = -1; + char *attrpath = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + if (VIR_STRDUP(type->id, last_component(dir)) < 0) + goto cleanup; + + /* query udev for the attributes under subdirectories using the relative + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>' + */ + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
Does this imply that name isn't necessarily optional as defined in RNG? Can '@name' not exist and if it is optional how does that adjust the macro?
There's no need for the macro to be changed - type->name == NULL in that case which means it won't be formatted to the XML, there's no issue in that the NULL's going to be handled gracefully all the way down from udevGetStringSysfsAttr.
+ MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr, + &type->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + return ret; +} +
With changes to
1. fix the Coverity issues 2. determine whether the virNodeDeviceObjHasCap change is needed 3. address the optional name
^ see my comment above
You have my:
Reviewed-by: John Ferlan <jferlan@redhat.com>
While I'm not a fan of 'deviceAPI'
Why so? I'm open to any suggestions, choosing the right name for basically anything is a gift I unfortunately do not possess, but truly desire to. Erik
John
+ +static int +udevPCIGetMdevTypesCap(struct udev_device *device, + virNodeDevCapPCIDevPtr pcidata) +{ + int ret = -1; + int dirret = -1; + DIR *dir = NULL; + struct dirent *entry; + char *path = NULL; + char *tmppath = NULL; + virNodeDevCapMdevTypePtr type = NULL; + virNodeDevCapMdevTypePtr *types = NULL; + size_t ntypes = 0; + size_t i; + + if (virAsprintf(&path, "%s/mdev_supported_types", + udev_device_get_syspath(device)) < 0) + return -1; + + if ((dirret = virDirOpenIfExists(&dir, path)) < 0) + goto cleanup; + + if (dirret == 0) { + ret = 0; + goto cleanup; + } + + if (VIR_ALLOC(types) < 0) + goto cleanup; + + /* UDEV doesn't report attributes under subdirectories by default but is + * able to query them if the path to the attribute is relative to the + * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's + * base path as udev reports it, but we're interested in attributes under + * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need to + * scan the subdirectories ourselves. + */ + while ((dirret = virDirRead(dir, &entry, path)) > 0) { + if (VIR_ALLOC(type) < 0) + goto cleanup; + + /* construct the relative mdev type path bit for udev */ + if (virAsprintf(&tmppath, "mdev_supported_types/%s", entry->d_name) < 0) + goto cleanup; + + if (udevFillMdevType(device, tmppath, type) < 0) + goto cleanup; + + if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0) + goto cleanup; + + VIR_FREE(tmppath); + } + + if (dirret < 0) + goto cleanup; + + VIR_STEAL_PTR(pcidata->mdev_types, types); + pcidata->nmdev_types = ntypes; + pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ntypes = 0; + ret = 0; + cleanup: + virNodeDevCapMdevTypeFree(type); + for (i = 0; i < ntypes; i++) + virNodeDevCapMdevTypeFree(types[i]); + VIR_FREE(types); + VIR_FREE(path); + VIR_FREE(tmppath); + VIR_DIR_CLOSE(dir); + return ret; +} + + static int udevProcessPCI(struct udev_device *device, virNodeDeviceDefPtr def) { @@ -400,6 +513,12 @@ static int udevProcessPCI(struct udev_device *device, } }
+ /* check whether the device is mediated devices framework capable, if so, + * process it + */ + if (udevPCIGetMdevTypesCap(device, pci_dev) < 0) + goto cleanup; + ret = 0;
cleanup: diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml new file mode 100644 index 000000000..a2d57569a --- /dev/null +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml @@ -0,0 +1,32 @@ +<device> + <name>pci_0000_02_10_7</name> + <parent>pci_0000_00_04_0</parent> + <capability type='pci'> + <domain>0</domain> + <bus>2</bus> + <slot>16</slot> + <function>7</function> + <product id='0x10ca'>82576 Virtual Function</product> + <vendor id='0x8086'>Intel Corporation</vendor> + <capability type='mdev_types'> + <type id='foo1'> + <name>bar1</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>1</availableInstances> + </type> + <type id='foo2'> + <name>bar2</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>2</availableInstances> + </type> + </capability> + <iommuGroup number='31'> + <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/> + </iommuGroup> + <numa node='0'/> + <pci-express> + <link validity='cap' port='0' speed='2.5' width='4'/> + <link validity='sta' width='0'/> + </pci-express> + </capability> +</device> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c index f023d8a13..e3a77646c 100644 --- a/tests/nodedevxml2xmltest.c +++ b/tests/nodedevxml2xmltest.c @@ -101,6 +101,7 @@ mymain(void) DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all"); DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type"); DO_TEST("drm_renderD129"); + DO_TEST("pci_0000_02_10_7_mdev_types");
return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; }

[...]
+static int +udevFillMdevType(struct udev_device *device, + const char *dir, + virNodeDevCapMdevTypePtr type) +{ + int ret = -1; + char *attrpath = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + if (VIR_STRDUP(type->id, last_component(dir)) < 0) + goto cleanup; + + /* query udev for the attributes under subdirectories using the relative + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>' + */ + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
Does this imply that name isn't necessarily optional as defined in RNG? Can '@name' not exist and if it is optional how does that adjust the macro?
There's no need for the macro to be changed - type->name == NULL in that case which means it won't be formatted to the XML, there's no issue in that the NULL's going to be handled gracefully all the way down from udevGetStringSysfsAttr.
O
+ MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr, + &type->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + return ret; +} +
With changes to
1. fix the Coverity issues 2. determine whether the virNodeDeviceObjHasCap change is needed 3. address the optional name
^ see my comment above
You have my:
Reviewed-by: John Ferlan <jferlan@redhat.com>
While I'm not a fan of 'deviceAPI'
Why so? I'm open to any suggestions, choosing the right name for basically anything is a gift I unfortunately do not possess, but truly desire to.
Hmm... Looks like I got distracted whilst writing and didn't finish my though.... I too have the gift of choosing names that cause angst for reviewers. Anyway - it's just a strange name for something that I think ends up being (what I call) the adapter or controller, e.g. in an .args file: -device vfio-pci,id=hostdev0,\ sysfsdev=/sys/bus/mdev/devices/53764d0e-85a0-42b4-af5c-2046b460b1dc,bus=pci.0,\ addr=0x3 the "vfio-pci" is less a "deviceAPI" to me and more the "device mechanism or name" to handle the traffic. But I see that "device_api" is what the file name ends up being and that I assume was agreed upon by the consortium of those who have been arguing about the vGPU/mdev sysfs layout for far longer than I wanted to pay close attention, so we just get to deal with it. Now as for availableInstances - thankfully there's cut-n-paste to deal with that long name. Ironically the name is far longer than the value as opposed to something like uuid/wwnn/wwpn where the name is far shorter than what the value turns out to be. Glad I don't have to type a uuid/wwnn/wwpn value too often and even happier I have cut-n-paste John

On Thu, May 18, 2017 at 06:48:48AM -0400, John Ferlan wrote:
[...]
+static int +udevFillMdevType(struct udev_device *device, + const char *dir, + virNodeDevCapMdevTypePtr type) +{ + int ret = -1; + char *attrpath = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + if (VIR_STRDUP(type->id, last_component(dir)) < 0) + goto cleanup; + + /* query udev for the attributes under subdirectories using the relative + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>' + */ + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
Does this imply that name isn't necessarily optional as defined in RNG? Can '@name' not exist and if it is optional how does that adjust the macro?
There's no need for the macro to be changed - type->name == NULL in that case which means it won't be formatted to the XML, there's no issue in that the NULL's going to be handled gracefully all the way down from udevGetStringSysfsAttr.
O
+ MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr, &type->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr, + &type->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + return ret; +} +
With changes to
1. fix the Coverity issues 2. determine whether the virNodeDeviceObjHasCap change is needed 3. address the optional name
^ see my comment above
You have my:
Reviewed-by: John Ferlan <jferlan@redhat.com>
While I'm not a fan of 'deviceAPI'
Why so? I'm open to any suggestions, choosing the right name for basically anything is a gift I unfortunately do not possess, but truly desire to.
Hmm... Looks like I got distracted whilst writing and didn't finish my though.... I too have the gift of choosing names that cause angst for reviewers. Anyway - it's just a strange name for something that I think ends up being (what I call) the adapter or controller, e.g. in an .args file:
-device vfio-pci,id=hostdev0,\ sysfsdev=/sys/bus/mdev/devices/53764d0e-85a0-42b4-af5c-2046b460b1dc,bus=pci.0,\ addr=0x3
the "vfio-pci" is less a "deviceAPI" to me and more the "device mechanism or name" to handle the traffic. But I see that "device_api" is
You're right, but this way, we at least stay consistent with arguably poor naming under sysfs - I know, might have been better off with letting the fantasy off the leash, but hopefully this one's going to bite us in the back less than usual....yeah, right....
what the file name ends up being and that I assume was agreed upon by the consortium of those who have been arguing about the vGPU/mdev sysfs layout for far longer than I wanted to pay close attention, so we just get to deal with it.
Now as for availableInstances - thankfully there's cut-n-paste to deal with that long name. Ironically the name is far longer than the value as opposed to something like uuid/wwnn/wwpn where the name is far shorter than what the value turns out to be. Glad I don't have to type a uuid/wwnn/wwpn value too often and even happier I have cut-n-paste
Thanks for review, I'm going to push the series in a moment. I also created BZ 1452072 to track the feature and pasted the URL to patches 3-6. Erik
John

Start discovering the mediated devices on the host system and format the attributes for the mediated device into the XML. Compared to the parent device which reports generic information about the abstract mediated devices types, a child device only reports the type name it has been instantiated from and the IOMMU group number, since that's device specific compared to the rest of the info that can be gathered about mediated devices at the moment. This patch introduces both the formatting and parsing routines, updates nodedev.rng schema, adding a testcase as well. The resulting mdev child device XML: <device> <name>mdev_4b20d080_1b54_4048_85b3_a6a62d165c01</name> <path>/sys/devices/.../4b20d080-1b54-4048-85b3-a6a62d165c01</path> <parent>pci_0000_06_00_0</parent> <driver> <name>vfio_mdev</name> </driver> <capability type='mdev'> <type id='vendor_supplied_type_id'/> <iommuGroup number='NUM'/> <capability/> <device/> Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/schemas/nodedev.rng | 17 +++++++++ src/conf/node_device_conf.c | 41 +++++++++++++++++++++ src/conf/node_device_conf.h | 8 ++++ src/node_device/node_device_udev.c | 43 +++++++++++++++++++++- .../mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml | 8 ++++ tests/nodedevxml2xmltest.c | 1 + 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index e0a2c5032..924f73861 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -83,6 +83,7 @@ <ref name="capscsi"/> <ref name="capstorage"/> <ref name="capdrm"/> + <ref name="capmdev"/> </choice> </element> </define> @@ -580,6 +581,22 @@ </element> </define> + <define name='capmdev'> + <attribute name='type'> + <value>mdev</value> + </attribute> + <element name='type'> + <attribute name='id'> + <data type='string'/> + </attribute> + </element> + <element name='iommuGroup'> + <attribute name='number'> + <ref name='unsignedInt'/> + </attribute> + </element> + </define> + <define name='address'> <element name='address'> <attribute name='domain'><ref name='hexuint'/></attribute> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index f26b1ffc7..bdb6c9cf7 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -577,6 +577,10 @@ virNodeDeviceDefFormat(const virNodeDeviceDef *def) virBufferEscapeString(&buf, "<type>%s</type>\n", virNodeDevDRMTypeToString(data->drm.type)); break; case VIR_NODE_DEV_CAP_MDEV: + virBufferEscapeString(&buf, "<type id='%s'/>\n", data->mdev.type); + virBufferAsprintf(&buf, "<iommuGroup number='%u'/>\n", + data->mdev.iommuGroupNumber); + break; case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: @@ -1643,6 +1647,39 @@ virNodeDevCapSystemParseXML(xmlXPathContextPtr ctxt, } +static int +virNodeDevCapMdevParseXML(xmlXPathContextPtr ctxt, + virNodeDeviceDefPtr def, + xmlNodePtr node, + virNodeDevCapMdevPtr mdev) +{ + xmlNodePtr orignode; + int ret = -1; + + orignode = ctxt->node; + ctxt->node = node; + + if (!(mdev->type = virXPathString("string(./type[1]/@id)", ctxt))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("missing type id attribute for '%s'"), def->name); + goto out; + } + + if (virNodeDevCapsDefParseULong("number(./iommuGroup[1]/@number)", ctxt, + &mdev->iommuGroupNumber, def, + _("missing iommuGroup number atribute for " + "'%s'"), + _("invalid iommuGroup number attribute for " + "'%s'")) < 0) + goto out; + + ret = 0; + out: + ctxt->node = orignode; + return ret; +} + + static virNodeDevCapsDefPtr virNodeDevCapsDefParseXML(xmlXPathContextPtr ctxt, virNodeDeviceDefPtr def, @@ -1711,6 +1748,8 @@ virNodeDevCapsDefParseXML(xmlXPathContextPtr ctxt, ret = virNodeDevCapDRMParseXML(ctxt, def, node, &caps->data.drm); break; case VIR_NODE_DEV_CAP_MDEV: + ret = virNodeDevCapMdevParseXML(ctxt, def, node, &caps->data.mdev); + break; case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: @@ -2034,6 +2073,8 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) VIR_FREE(data->sg.path); break; case VIR_NODE_DEV_CAP_MDEV: + VIR_FREE(data->mdev.type); + break; case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_DRM: case VIR_NODE_DEV_CAP_FC_HOST: diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 629f732e6..cea9247d2 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -143,6 +143,13 @@ struct _virNodeDevCapMdevType { unsigned int available_instances; }; +typedef struct _virNodeDevCapMdev virNodeDevCapMdev; +typedef virNodeDevCapMdev *virNodeDevCapMdevPtr; +struct _virNodeDevCapMdev { + char *type; + unsigned int iommuGroupNumber; +}; + typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev; typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr; struct _virNodeDevCapPCIDev { @@ -276,6 +283,7 @@ struct _virNodeDevCapData { virNodeDevCapStorage storage; virNodeDevCapSCSIGeneric sg; virNodeDevCapDRM drm; + virNodeDevCapMdev mdev; }; }; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 4c75dec2b..8288be1cb 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -1069,6 +1069,42 @@ udevProcessSCSIGeneric(struct udev_device *dev, } static int +udevProcessMediatedDevice(struct udev_device *dev, + virNodeDeviceDefPtr def) +{ + int ret = -1; + const char *uuidstr = NULL; + int iommugrp = -1; + char *linkpath = NULL; + char *realpath = NULL; + virNodeDevCapMdevPtr data = &def->caps->data.mdev; + + if (virAsprintf(&linkpath, "%s/mdev_type", udev_device_get_syspath(dev)) < 0) + goto cleanup; + + if (virFileResolveLink(linkpath, &realpath) < 0) + goto cleanup; + + if (VIR_STRDUP(data->type, last_component(realpath)) < 0) + goto cleanup; + + uuidstr = udev_device_get_sysname(dev); + if ((iommugrp = virMediatedDeviceGetIOMMUGroupNum(uuidstr)) < 0) + goto cleanup; + + if (udevGenerateDeviceName(dev, def, NULL) != 0) + goto cleanup; + + data->iommuGroupNumber = iommugrp; + + ret = 0; + cleanup: + VIR_FREE(linkpath); + VIR_FREE(realpath); + return ret; +} + +static int udevGetDeviceNodes(struct udev_device *device, virNodeDeviceDefPtr def) { @@ -1136,12 +1172,16 @@ udevGetDeviceType(struct udev_device *device, if (udevHasDeviceProperty(device, "INTERFACE")) *type = VIR_NODE_DEV_CAP_NET; - /* SCSI generic device doesn't set DEVTYPE property */ + /* Neither SCSI generic devices nor mediated devices set DEVTYPE + * property, therefore we need to rely on the SUBSYSTEM property */ if (udevGetStringProperty(device, "SUBSYSTEM", &subsystem) < 0) return -1; if (STREQ_NULLABLE(subsystem, "scsi_generic")) *type = VIR_NODE_DEV_CAP_SCSI_GENERIC; + else if (STREQ_NULLABLE(subsystem, "mdev")) + *type = VIR_NODE_DEV_CAP_MDEV; + VIR_FREE(subsystem); } @@ -1181,6 +1221,7 @@ static int udevGetDeviceDetails(struct udev_device *device, case VIR_NODE_DEV_CAP_DRM: return udevProcessDRMDevice(device, def); case VIR_NODE_DEV_CAP_MDEV: + return udevProcessMediatedDevice(device, def); case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_SYSTEM: case VIR_NODE_DEV_CAP_FC_HOST: diff --git a/tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml b/tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml new file mode 100644 index 000000000..470e5917e --- /dev/null +++ b/tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml @@ -0,0 +1,8 @@ +<device> + <name>mdev_3627463d_b7f0_4fea_b468_f1da537d301b</name> + <parent>computer</parent> + <capability type='mdev'> + <type id='mtty-1'/> + <iommuGroup number='12'/> + </capability> +</device> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c index e3a77646c..a2aad518d 100644 --- a/tests/nodedevxml2xmltest.c +++ b/tests/nodedevxml2xmltest.c @@ -102,6 +102,7 @@ mymain(void) DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type"); DO_TEST("drm_renderD129"); DO_TEST("pci_0000_02_10_7_mdev_types"); + DO_TEST("mdev_3627463d_b7f0_4fea_b468_f1da537d301b"); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
Start discovering the mediated devices on the host system and format the attributes for the mediated device into the XML. Compared to the parent device which reports generic information about the abstract mediated devices types, a child device only reports the type name it has been instantiated from and the IOMMU group number, since that's device specific compared to the rest of the info that can be gathered about mediated devices at the moment. This patch introduces both the formatting and parsing routines, updates nodedev.rng schema, adding a testcase as well.
The resulting mdev child device XML: <device> <name>mdev_4b20d080_1b54_4048_85b3_a6a62d165c01</name> <path>/sys/devices/.../4b20d080-1b54-4048-85b3-a6a62d165c01</path> <parent>pci_0000_06_00_0</parent> <driver> <name>vfio_mdev</name> </driver> <capability type='mdev'> <type id='vendor_supplied_type_id'/> <iommuGroup number='NUM'/> <capability/> <device/>
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/schemas/nodedev.rng | 17 +++++++++ src/conf/node_device_conf.c | 41 +++++++++++++++++++++ src/conf/node_device_conf.h | 8 ++++ src/node_device/node_device_udev.c | 43 +++++++++++++++++++++- .../mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml | 8 ++++ tests/nodedevxml2xmltest.c | 1 + 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tests/nodedevschemadata/mdev_3627463d_b7f0_4fea_b468_f1da537d301b.xml
[...]
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index f26b1ffc7..bdb6c9cf7 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -577,6 +577,10 @@ virNodeDeviceDefFormat(const virNodeDeviceDef *def) virBufferEscapeString(&buf, "<type>%s</type>\n", virNodeDevDRMTypeToString(data->drm.type)); break; case VIR_NODE_DEV_CAP_MDEV: + virBufferEscapeString(&buf, "<type id='%s'/>\n", data->mdev.type); + virBufferAsprintf(&buf, "<iommuGroup number='%u'/>\n", + data->mdev.iommuGroupNumber); + break; case VIR_NODE_DEV_CAP_MDEV_TYPES: case VIR_NODE_DEV_CAP_FC_HOST: case VIR_NODE_DEV_CAP_VPORTS: @@ -1643,6 +1647,39 @@ virNodeDevCapSystemParseXML(xmlXPathContextPtr ctxt, }
+static int +virNodeDevCapMdevParseXML(xmlXPathContextPtr ctxt, + virNodeDeviceDefPtr def, + xmlNodePtr node, + virNodeDevCapMdevPtr mdev) +{ + xmlNodePtr orignode; + int ret = -1; + + orignode = ctxt->node; + ctxt->node = node; + + if (!(mdev->type = virXPathString("string(./type[1]/@id)", ctxt))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("missing type id attribute for '%s'"), def->name); + goto out; + } + + if (virNodeDevCapsDefParseULong("number(./iommuGroup[1]/@number)", ctxt, + &mdev->iommuGroupNumber, def, + _("missing iommuGroup number atribute for "
s/atribute/attribute/ Reviewed-by: John Ferlan <jferlan@redhat.com> John
+ "'%s'"), + _("invalid iommuGroup number attribute for " + "'%s'")) < 0) + goto out; + + ret = 0; + out: + ctxt->node = orignode; + return ret; +} + +
[...]

Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/drvnodedev.html.in | 168 +++++++++++++++++++++++++++++++++++++++++++++++- tools/virsh.pod | 7 +- 2 files changed, 171 insertions(+), 4 deletions(-) diff --git a/docs/drvnodedev.html.in b/docs/drvnodedev.html.in index 0a3870343..26c52dd0d 100644 --- a/docs/drvnodedev.html.in +++ b/docs/drvnodedev.html.in @@ -9,7 +9,7 @@ (historically also referred to as node devices) like USB, PCI, SCSI, and network devices. This also includes various virtualization capabilities which the aforementioned devices provide for utilization, for example - SR-IOV, NPIV, DRM, etc. + SR-IOV, NPIV, MDEV, DRM, etc. </p> <p> @@ -75,6 +75,7 @@ <code>storage</code> (<span class="since">Since 1.0.4</span>), <code>scsi_generic</code> (<span class="since">Since 1.0.7</span>), <code>drm</code> (<span class="since">Since 3.1.0</span>), and + <code>mdev</code> (<span class="since">Since 3.4.0</span>). This element can be nested in which case it further specifies a device's capability. Refer to specific device types to see more values for the <code>type</code> attribute which are exclusive. @@ -185,5 +186,170 @@ ... <device></pre> + <h3><a name="MDEVCap">MDEV capability</a></h3> + <p> + A PCI device capable of creating mediated devices will include a nested + capability <code>mdev_types</code> which enumerates all supported mdev + types on the physical device, along with the type attributes available + through sysfs: + </p> + + <dl> + <dt><code>type</code></dt> + <dd> + This element describes a mediated device type which acts as an + abstract template defining a resource allocation for instances of this + device type. The element has one attribute <code>id</code> which holds + an official vendor-supplied identifier for the type. + <span class="since">Since 3.4.0</span> + </dd> + + <dt><code>name</code></dt> + <dd> + The <code>name</code> element holds a vendor-supplied code name for + the given mediated device type. This is an optional element. + <span class="since">Since 3.4.0</span> + </dd> + + <dt><code>deviceAPI</code></dt> + <dd> + The value of this element describes how an instance of the given type + will be presented to the guest by the VFIO framework. + <span class="since">Since 3.4.0</span> + </dd> + + <dt><code>availableInstances</code></dt> + <dd> + This element reports the current state of resource allocation. In other + words, how many instances of the given type can still be successfully + created on the physical device. + <span class="since">Since 3.4.0</span> + </dd> + </dl> + + <p> + For a more info about mediated devices, refer to the + <a href="#MDEV">paragraph below</a>. + </p> + +<pre> +<device> +... + <driver> + <name>nvidia</name> + </driver> + <capability type='pci'> +... + <capability type='mdev_types'> + <type id='nvidia-11'> + <name>GRID M60-0B</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>16</availableInstances> + </type> + <!-- Here would come the rest of the available mdev types --> + </capability> +... + </capability> +</device></pre> + + <h2><a name="MDEV">Mediated devices (MDEVs)</a></h2> + <p> + Mediated devices (<span class="since">Since 3.2.0</span>) are software + devices defining resource allocation on the backing physical device which + in turn allows the parent physical device's resources to be divided into + several mediated devices, thus sharing the physical device's performance + among multiple guests. Unlike SR-IOV however, where a PCIe device appears + as multiple separate PCIe devices on the host's PCI bus, mediated devices + only appear on the mdev virtual bus. Therefore, no detach/reattach + procedure from/to the host driver procedure is involved even though + mediated devices are used in a direct device assignment manner. + </p> + + <p> + The following sub-elements and attributes are exposed within the + <code>capability</code> element: + </p> + + <dl> + <dt><code>type</code></dt> + <dd> + This element describes a mediated device type which acts as an + abstract template defining a resource allocation for instances of this + device type. The element has one attribute <code>id</code> which holds + an official vendor-supplied identifier for the type. + <span class="since">Since 3.4.0</span> + </dd> + + <dt><code>iommuGroup</code></dt> + <dd> + This element supports a single attribute <code>number</code> which holds + the IOMMU group number the mediated device belongs to. + <span class="since">Since 3.4.0</span> + </dd> + </dl> + + <h3>Example of a mediated device</h3> + <pre> +<device> + <name>mdev_4b20d080_1b54_4048_85b3_a6a62d165c01</name> + <path>/sys/devices/pci0000:00/0000:00:02.0/4b20d080-1b54-4048-85b3-a6a62d165c01</path> + <parent>pci_0000_06_00_0</parent> + <driver> + <name>vfio_mdev</name> + </driver> + <capability type='mdev'> + <type id='nvidia-11'/> + <iommuGroup number='12'/> + <capability/> +<device/></pre> + + <p> + The support of mediated device's framework in libvirt's node device driver + covers the following features: + </p> + + <ul> + <li> + list available mediated devices on the host + (<span class="since">Since 3.4.0</span>) + </li> + <li> + display device details + (<span class="since">Since 3.4.0</span>) + </li> + </ul> + + <p> + Because mediated devices are instantiated from vendor specific templates, + simply called 'types', information describing these types is contained + within the parent device's capabilities + (see the example in <a href="#PCI">PCI host devices</a>). + </p> + + <p> + To see the supported mediated device types on a specific physical device + use the following: + </p> + + <pre> +$ ls /sys/class/mdev_bus/<device>/mdev_supported_types</pre> + + <p> + To manually instantiate a mediated device, use one of the following as a + reference: + </p> + + <pre> +$ uuidgen > /sys/class/mdev_bus/<device>/mdev_supported_types/<type>/create +... +$ echo <UUID> > /sys/class/mdev_bus/<device>/mdev_supported_types/<type>/create</pre> + + <p> + Manual removal of a mediated device is then performed as follows: + </p> + + <pre> +$ echo 1 > /sys/bus/mdev/devices/<uuid>/remove</pre> + </body> </html> diff --git a/tools/virsh.pod b/tools/virsh.pod index 727acf6e6..d69c8d87f 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -3177,10 +3177,11 @@ for HBA). List all of the devices available on the node that are known by libvirt. I<cap> is used to filter the list by capability types, the types must be -separated by comma, e.g. --cap pci,scsi, valid capability types include +separated by comma, e.g. --cap pci,scsi. Valid capability types include 'system', 'pci', 'usb_device', 'usb', 'net', 'scsi_host', 'scsi_target', -'scsi', 'storage', 'fc_host', 'vports', 'scsi_generic', 'drm'.If I<--tree> -is used, the output is formatted in a tree representing parents of each +'scsi', 'storage', 'fc_host', 'vports', 'scsi_generic', 'drm', 'mdev', +'mdev_types'. +If I<--tree> is used, the output is formatted in a tree representing parents of each node. I<cap> and I<--tree> are mutually exclusive. =item B<nodedev-reattach> I<nodedev> -- 2.13.0

On 05/15/2017 08:10 AM, Erik Skultety wrote:
Signed-off-by: Erik Skultety <eskultet@redhat.com> --- docs/drvnodedev.html.in | 168 +++++++++++++++++++++++++++++++++++++++++++++++- tools/virsh.pod | 7 +- 2 files changed, 171 insertions(+), 4 deletions(-)
Reviewed-by: John Ferlan <jferlan@redhat.com> John
participants (2)
-
Erik Skultety
-
John Ferlan