On Wed, May 17, 2017 at 05:22:45PM -0400, John Ferlan wrote:
On 05/15/2017 08:10 AM, Erik Skultety wrote:
> The parent device needs to report the generic stuff about the supported
> mediated devices types, like device API, available instances, type name,
> etc. Therefore this patch introduces a new nested capability element of
> type 'mdev_types' with the resulting XML of the following format:
>
> <device>
> ...
> <capability type='pci'>
> ...
> <capability type='mdev_types'>
> <type id='vendor_supplied_id'>
> <name>optional_vendor_supplied_codename</name>
> <deviceAPI>vfio-pci</deviceAPI>
> <availableInstances>NUM</availableInstances>
> </type>
> ...
> <type>
> ...
> </type>
> </capability>
> </capability>
> ...
> </device>
I haven't followed all the discussions that closely - so a comment from
the peanut gallery... Since mdev_types is a sub-capability of
type='pci', does the "vfio-pci" feel redundant? Ok just the -pci
part...
Well, that would be mangling of the value we query from the sysfs and that
would again bring us to the point where people would rely on it. IMHO we should
expose the values verbatim as we find it under sysfs, do we actually do adjust
the attribute values after querying them from sysfs??
Anyhow, during one of the VFIO meetings we've also discussed a possibility of
someone coming up with vfio-pci v2 backend (though unlikely), in that aspect,
you really want to know what is the exact device API supported by that mdev
type (I have another unlikely and extremely 'visionary' scenario to support my
argument, but I'd rather keep it for myself :)).
>
> Signed-off-by: Erik Skultety <eskultet(a)redhat.com>
> ---
> docs/schemas/nodedev.rng | 26 +++++
> src/conf/node_device_conf.c | 97 +++++++++++++++++
> src/conf/node_device_conf.h | 15 +++
> src/conf/virnodedeviceobj.c | 7 ++
> src/libvirt_private.syms | 1 +
> src/node_device/node_device_udev.c | 119 +++++++++++++++++++++
> .../pci_0000_02_10_7_mdev_types.xml | 32 ++++++
> tests/nodedevxml2xmltest.c | 1 +
> 8 files changed, 298 insertions(+)
> create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
>
> diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng
> index 0f90a73c8..e0a2c5032 100644
> --- a/docs/schemas/nodedev.rng
> +++ b/docs/schemas/nodedev.rng
> @@ -205,6 +205,32 @@
> </optional>
>
> <optional>
> + <element name='capability'>
> + <attribute name='type'>
> + <value>mdev_types</value>
> + </attribute>
> + <oneOrMore>
> + <element name='type'>
> + <attribute name='id'>
> + <data type='string'/>
> + </attribute>
> + <optional>
> + <element name='name'><text/></element>
> + </optional>
> + <element name='deviceAPI'>
> + <choice>
> + <value>vfio-pci</value>
> + </choice>
> + </element>
> + <element name='availableInstances'>
> + <ref name='unsignedInt'/>
> + </element>
> + </element>
> + </oneOrMore>
> + </element>
> + </optional>
> +
> + <optional>
> <element name='iommuGroup'>
> <attribute name='number'>
> <ref name='unsignedInt'/>
> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
> index 40d71f277..f26b1ffc7 100644
> --- a/src/conf/node_device_conf.c
> +++ b/src/conf/node_device_conf.c
> @@ -89,6 +89,19 @@ virNodeDevCapsDefParseString(const char *xpath,
>
>
> void
> +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type)
> +{
> + if (!type)
> + return;
> +
> + VIR_FREE(type->id);
> + VIR_FREE(type->name);
> + VIR_FREE(type->device_api);
> + VIR_FREE(type);
> +}
> +
> +
> +void
> virNodeDeviceDefFree(virNodeDeviceDefPtr def)
> {
> virNodeDevCapsDefPtr caps;
> @@ -265,6 +278,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf,
> virBufferAsprintf(buf, "<capability
type='%s'/>\n",
> virPCIHeaderTypeToString(data->pci_dev.hdrType));
> }
> + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) {
> + virBufferAddLit(buf, "<capability
type='mdev_types'>\n");
> + virBufferAdjustIndent(buf, 2);
> + for (i = 0; i < data->pci_dev.nmdev_types; i++) {
> + virNodeDevCapMdevTypePtr type = data->pci_dev.mdev_types[i];
> + virBufferEscapeString(buf, "<type id='%s'>\n",
type->id);
> + virBufferAdjustIndent(buf, 2);
> + if (type->name)
> + virBufferEscapeString(buf,
"<name>%s</name>\n",
> + type->name);
> + virBufferEscapeString(buf,
"<deviceAPI>%s</deviceAPI>\n",
> + type->device_api);
> + virBufferAsprintf(buf,
> +
"<availableInstances>%u</availableInstances>\n",
> + type->available_instances);
> + virBufferAdjustIndent(buf, -2);
> + virBufferAddLit(buf, "</type>\n");
> + }
> + virBufferAdjustIndent(buf, -2);
> + virBufferAddLit(buf, "</capability>\n");
> + }
> if (data->pci_dev.nIommuGroupDevices) {
> virBufferAsprintf(buf, "<iommuGroup
number='%d'>\n",
> data->pci_dev.iommuGroupNumber);
> @@ -1365,6 +1399,63 @@ virNodeDevPCICapSRIOVVirtualParseXML(xmlXPathContextPtr
ctxt,
>
>
> static int
> +virNodeDevPCICapMdevTypesParseXML(xmlXPathContextPtr ctxt,
> + virNodeDevCapPCIDevPtr pci_dev)
> +{
> + int ret = -1;
> + xmlNodePtr orignode = NULL;
> + xmlNodePtr *nodes = NULL;
> + int nmdev_types = virXPathNodeSet("./type", ctxt, &nodes);
Coverity lets me know that virXPathNodeSet allocates memory into @nodes,
but we never clean it up before leaving.
Coverity also is proud to announce that nmdev_types could be < 0 which
would really mess up the following for loop!
Oops, didn't notice that one.
> + virNodeDevCapMdevTypePtr type = NULL;
> + size_t i;
I would just go with:
if ((nmdev_types = virXPathNodeSet("./type", ctxt, &nodes)) < 0)
goto cleanup;
> +
> + orignode = ctxt->node;
> + for (i = 0; i < nmdev_types; i++) {
> + ctxt->node = nodes[i];
> +
> + if (VIR_ALLOC(type) < 0)
> + goto cleanup;
> +
> + if (!(type->id = virXPathString("string(./@id[1])", ctxt))) {
> + virReportError(VIR_ERR_XML_ERROR, "%s",
> + _("missing 'id' attribute for mediated
device's "
> + "<type> element"));
> + goto cleanup;
> + }
> +
> + if (!(type->device_api =
virXPathString("string(./deviceAPI[1])", ctxt))) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("missing device API for mediated device type
'%s'"),
> + type->id);
> + goto cleanup;
> + }
> +
> + if (virXPathUInt("number(./availableInstances)", ctxt,
> + &type->available_instances) < 0) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("missing number of available instances for
"
> + "mediated device type '%s'"),
> + type->id);
> + goto cleanup;
> + }
> +
> + type->name = virXPathString("string(./name)", ctxt);
> +
> + if (VIR_APPEND_ELEMENT(pci_dev->mdev_types,
> + pci_dev->nmdev_types, type) < 0)
> + goto cleanup;
> + }
> +
> + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
> + ret = 0;
> + cleanup:
VIR_FREE(nodes);
> + virNodeDevCapMdevTypeFree(type);
> + ctxt->node = orignode;
> + return ret;
> +}
> +
> +
> +static int
> virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
> xmlNodePtr node,
> virNodeDevCapPCIDevPtr pci_dev)
> @@ -1386,6 +1477,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt,
> } else if (STREQ(type, "virt_functions") &&
> virNodeDevPCICapSRIOVVirtualParseXML(ctxt, pci_dev) < 0) {
> goto cleanup;
> + } else if (STREQ(type, "mdev_types") &&
> + virNodeDevPCICapMdevTypesParseXML(ctxt, pci_dev) < 0) {
> + goto cleanup;
> } else {
> int hdrType = virPCIHeaderTypeFromString(type);
>
> @@ -1899,6 +1993,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
> VIR_FREE(data->pci_dev.iommuGroupDevices[i]);
> VIR_FREE(data->pci_dev.iommuGroupDevices);
> virPCIEDeviceInfoFree(data->pci_dev.pci_express);
> + for (i = 0; i < data->pci_dev.nmdev_types; i++)
> + virNodeDevCapMdevTypeFree(data->pci_dev.mdev_types[i]);
> + VIR_FREE(data->pci_dev.mdev_types);
> break;
> case VIR_NODE_DEV_CAP_USB_DEV:
> VIR_FREE(data->usb_dev.product_name);
> diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
> index 273d49f76..629f732e6 100644
> --- a/src/conf/node_device_conf.h
> +++ b/src/conf/node_device_conf.h
> @@ -95,6 +95,7 @@ typedef enum {
> VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0),
> VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1),
> VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2),
> + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3),
> } virNodeDevPCICapFlags;
>
> typedef enum {
> @@ -133,6 +134,15 @@ struct _virNodeDevCapSystem {
> virNodeDevCapSystemFirmware firmware;
> };
>
> +typedef struct _virNodeDevCapMdevType virNodeDevCapMdevType;
> +typedef virNodeDevCapMdevType *virNodeDevCapMdevTypePtr;
> +struct _virNodeDevCapMdevType {
> + char *id;
> + char *name;
> + char *device_api;
> + unsigned int available_instances;
> +};
> +
> typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev;
> typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr;
> struct _virNodeDevCapPCIDev {
> @@ -156,6 +166,8 @@ struct _virNodeDevCapPCIDev {
> int numa_node;
> virPCIEDeviceInfoPtr pci_express;
> int hdrType; /* enum virPCIHeaderType or -1 */
> + virNodeDevCapMdevTypePtr *mdev_types;
> + size_t nmdev_types;
> };
>
> typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev;
> @@ -340,6 +352,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def);
> void
> virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps);
>
> +void
> +virNodeDevCapMdevTypeFree(virNodeDevCapMdevTypePtr type);
> +
> # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \
> (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \
> VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \
> diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c
> index 181d2efe1..fc5f3708f 100644
> --- a/src/conf/virnodedeviceobj.c
> +++ b/src/conf/virnodedeviceobj.c
> @@ -468,6 +468,13 @@ virNodeDeviceCapMatch(virNodeDeviceObjPtr devobj,
> VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS))
> return true;
> }
> +
> + if (cap->data.type == VIR_NODE_DEV_CAP_PCI_DEV) {
> + if (type == VIR_NODE_DEV_CAP_MDEV_TYPES &&
> + (cap->data.pci_dev.flags &
> + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV))
> + return true;
> + }
> }
>
> return false;
Something that I had to do with VPORTS in the past tells me you may need
to also modify virNodeDeviceObjHasCap with similar code.
This only matches for the virNodeDeviceObjListExport API...
Aha! found it, see commit id 'e8fcac8ec'
I'll adjust that, again one of those places where we handle the very same thing
on 2 places with almost identical code which makes it quite hard to track.
I'll send a follow-up patch to condense this to one place.
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index bbe283529..a2a30bd58 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -668,6 +668,7 @@ virNetDevIPRouteParseXML;
>
>
> # conf/node_device_conf.h
> +virNodeDevCapMdevTypeFree;
> virNodeDevCapsDefFree;
> virNodeDevCapTypeFromString;
> virNodeDevCapTypeToString;
> diff --git a/src/node_device/node_device_udev.c
b/src/node_device/node_device_udev.c
> index 1ddb55c80..4c75dec2b 100644
> --- a/src/node_device/node_device_udev.c
> +++ b/src/node_device/node_device_udev.c
> @@ -314,6 +314,119 @@ static int udevTranslatePCIIds(unsigned int vendor,
> }
>
>
> +static int
> +udevFillMdevType(struct udev_device *device,
> + const char *dir,
> + virNodeDevCapMdevTypePtr type)
> +{
> + int ret = -1;
> + char *attrpath = NULL;
> +
> +#define MDEV_GET_SYSFS_ATTR(attr_name, cb, ...) \
> + do { \
> + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0)
\
> + goto cleanup; \
> + \
> + if (cb(device, attrpath, __VA_ARGS__) < 0) \
> + goto cleanup; \
> + \
> + VIR_FREE(attrpath); \
> + } while (0) \
> +
> + if (VIR_STRDUP(type->id, last_component(dir)) < 0)
> + goto cleanup;
> +
> + /* query udev for the attributes under subdirectories using the relative
> + * path stored in @dir, i.e. 'mdev_supported_types/<type_id>'
> + */
> + MDEV_GET_SYSFS_ATTR(name, udevGetStringSysfsAttr, &type->name);
Does this imply that name isn't necessarily optional as defined in RNG?
Can '@name' not exist and if it is optional how does that adjust the macro?
There's no need for the macro to be changed - type->name == NULL in that case
which means it won't be formatted to the XML, there's no issue in that the
NULL's going to be handled gracefully all the way down from
udevGetStringSysfsAttr.
> + MDEV_GET_SYSFS_ATTR(device_api, udevGetStringSysfsAttr,
&type->device_api);
> + MDEV_GET_SYSFS_ATTR(available_instances, udevGetUintSysfsAttr,
> + &type->available_instances, 10);
> +
> +#undef MDEV_GET_SYSFS_ATTR
> +
> + ret = 0;
> + cleanup:
> + VIR_FREE(attrpath);
> + return ret;
> +}
> +
With changes to
1. fix the Coverity issues
2. determine whether the virNodeDeviceObjHasCap change is needed
3. address the optional name
^ see my comment above
You have my:
Reviewed-by: John Ferlan <jferlan(a)redhat.com>
While I'm not a fan of 'deviceAPI'
Why so? I'm open to any suggestions, choosing the right name for basically
anything is a gift I unfortunately do not possess, but truly desire to.
Erik
John
> +
> +static int
> +udevPCIGetMdevTypesCap(struct udev_device *device,
> + virNodeDevCapPCIDevPtr pcidata)
> +{
> + int ret = -1;
> + int dirret = -1;
> + DIR *dir = NULL;
> + struct dirent *entry;
> + char *path = NULL;
> + char *tmppath = NULL;
> + virNodeDevCapMdevTypePtr type = NULL;
> + virNodeDevCapMdevTypePtr *types = NULL;
> + size_t ntypes = 0;
> + size_t i;
> +
> + if (virAsprintf(&path, "%s/mdev_supported_types",
> + udev_device_get_syspath(device)) < 0)
> + return -1;
> +
> + if ((dirret = virDirOpenIfExists(&dir, path)) < 0)
> + goto cleanup;
> +
> + if (dirret == 0) {
> + ret = 0;
> + goto cleanup;
> + }
> +
> + if (VIR_ALLOC(types) < 0)
> + goto cleanup;
> +
> + /* UDEV doesn't report attributes under subdirectories by default but is
> + * able to query them if the path to the attribute is relative to the
> + * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the
device's
> + * base path as udev reports it, but we're interested in attributes under
> + * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, we need
to
> + * scan the subdirectories ourselves.
> + */
> + while ((dirret = virDirRead(dir, &entry, path)) > 0) {
> + if (VIR_ALLOC(type) < 0)
> + goto cleanup;
> +
> + /* construct the relative mdev type path bit for udev */
> + if (virAsprintf(&tmppath, "mdev_supported_types/%s",
entry->d_name) < 0)
> + goto cleanup;
> +
> + if (udevFillMdevType(device, tmppath, type) < 0)
> + goto cleanup;
> +
> + if (VIR_APPEND_ELEMENT(types, ntypes, type) < 0)
> + goto cleanup;
> +
> + VIR_FREE(tmppath);
> + }
> +
> + if (dirret < 0)
> + goto cleanup;
> +
> + VIR_STEAL_PTR(pcidata->mdev_types, types);
> + pcidata->nmdev_types = ntypes;
> + pcidata->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV;
> + ntypes = 0;
> + ret = 0;
> + cleanup:
> + virNodeDevCapMdevTypeFree(type);
> + for (i = 0; i < ntypes; i++)
> + virNodeDevCapMdevTypeFree(types[i]);
> + VIR_FREE(types);
> + VIR_FREE(path);
> + VIR_FREE(tmppath);
> + VIR_DIR_CLOSE(dir);
> + return ret;
> +}
> +
> +
> static int udevProcessPCI(struct udev_device *device,
> virNodeDeviceDefPtr def)
> {
> @@ -400,6 +513,12 @@ static int udevProcessPCI(struct udev_device *device,
> }
> }
>
> + /* check whether the device is mediated devices framework capable, if so,
> + * process it
> + */
> + if (udevPCIGetMdevTypesCap(device, pci_dev) < 0)
> + goto cleanup;
> +
> ret = 0;
>
> cleanup:
> diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
> new file mode 100644
> index 000000000..a2d57569a
> --- /dev/null
> +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev_types.xml
> @@ -0,0 +1,32 @@
> +<device>
> + <name>pci_0000_02_10_7</name>
> + <parent>pci_0000_00_04_0</parent>
> + <capability type='pci'>
> + <domain>0</domain>
> + <bus>2</bus>
> + <slot>16</slot>
> + <function>7</function>
> + <product id='0x10ca'>82576 Virtual Function</product>
> + <vendor id='0x8086'>Intel Corporation</vendor>
> + <capability type='mdev_types'>
> + <type id='foo1'>
> + <name>bar1</name>
> + <deviceAPI>vfio-pci</deviceAPI>
> + <availableInstances>1</availableInstances>
> + </type>
> + <type id='foo2'>
> + <name>bar2</name>
> + <deviceAPI>vfio-pci</deviceAPI>
> + <availableInstances>2</availableInstances>
> + </type>
> + </capability>
> + <iommuGroup number='31'>
> + <address domain='0x0000' bus='0x02' slot='0x10'
function='0x7'/>
> + </iommuGroup>
> + <numa node='0'/>
> + <pci-express>
> + <link validity='cap' port='0' speed='2.5'
width='4'/>
> + <link validity='sta' width='0'/>
> + </pci-express>
> + </capability>
> +</device>
> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c
> index f023d8a13..e3a77646c 100644
> --- a/tests/nodedevxml2xmltest.c
> +++ b/tests/nodedevxml2xmltest.c
> @@ -101,6 +101,7 @@ mymain(void)
> DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all");
> DO_TEST("pci_0000_02_10_7_sriov_pf_vfs_all_header_type");
> DO_TEST("drm_renderD129");
> + DO_TEST("pci_0000_02_10_7_mdev_types");
>
> return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
> }
>