[libvirt] [PATCH 0/2] Expose PCI Express capabilities

*** BLURB HERE *** Michal Privoznik (2): virpci: Introduce virPCIDeviceIsPCIExpress and friends nodedev: Introduce <pci-express/> to PCI devices docs/formatnode.html.in | 19 ++++ docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 123 ++++++++++++++++++++- src/conf/node_device_conf.h | 31 +++++- src/libvirt_private.syms | 3 + src/node_device/node_device_udev.c | 31 ++++++ src/util/virpci.c | 81 +++++++++++++- src/util/virpci.h | 8 ++ .../pci_8086_4238_pcie_wireless.xml | 26 +++++ tests/nodedevxml2xmltest.c | 1 + 10 files changed, 345 insertions(+), 4 deletions(-) create mode 100644 tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml -- 2.0.0

These functions will handle PCIe devices and their link capabilities to query some info about it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 3 ++ src/util/virpci.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++- src/util/virpci.h | 8 +++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d73a9f5..f72a3ad 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1690,6 +1690,8 @@ virPCIDeviceFree; virPCIDeviceGetDriverPathAndName; virPCIDeviceGetIOMMUGroupDev; virPCIDeviceGetIOMMUGroupList; +virPCIDeviceGetLinkCap; +virPCIDeviceGetLinkSta; virPCIDeviceGetManaged; virPCIDeviceGetName; virPCIDeviceGetRemoveSlot; @@ -1698,6 +1700,7 @@ virPCIDeviceGetStubDriver; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; virPCIDeviceIsAssignable; +virPCIDeviceIsPCIExpress; virPCIDeviceListAdd; virPCIDeviceListAddCopy; virPCIDeviceListCount; diff --git a/src/util/virpci.c b/src/util/virpci.c index e0f2344..8ad28b8 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -130,7 +130,13 @@ struct _virPCIDeviceList { /* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ #define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP 0xc /* Link Capabilities */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ +#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */ +#define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */ +#define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */ /* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */ #define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */ @@ -2750,3 +2756,76 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, return -1; } #endif /* __linux__ */ + +int +virPCIDeviceIsPCIExpress(virPCIDevicePtr dev) +{ + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + ret = dev->pcie_cap_pos != 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkCap(virPCIDevicePtr dev, + int *port, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP); + + *port = t >> 0x18; + *speed = t & PCI_EXP_LNKCAP_SPEED; + *width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4; + ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkSta(virPCIDevicePtr dev, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA); + + *speed = t & PCI_EXP_LNKSTA_SPEED; + *width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4; + ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} diff --git a/src/util/virpci.h b/src/util/virpci.h index 20ffe54..0a608c2 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -176,4 +176,12 @@ int virPCIDeviceGetDriverPathAndName(virPCIDevicePtr dev, char **path, char **name); +int virPCIDeviceIsPCIExpress(virPCIDevicePtr dev); +int virPCIDeviceGetLinkCap(virPCIDevicePtr dev, + int *port, + unsigned int *speed, + unsigned int *width); +int virPCIDeviceGetLinkSta(virPCIDevicePtr dev, + unsigned int *speed, + unsigned int *width); #endif /* __VIR_PCI_H__ */ -- 2.0.0

On Fri, Jun 06, 2014 at 12:54:17PM +0200, Michal Privoznik wrote:
These functions will handle PCIe devices and their link capabilities to query some info about it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 3 ++ src/util/virpci.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++- src/util/virpci.h | 8 +++++ 3 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d73a9f5..f72a3ad 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1690,6 +1690,8 @@ virPCIDeviceFree; virPCIDeviceGetDriverPathAndName; virPCIDeviceGetIOMMUGroupDev; virPCIDeviceGetIOMMUGroupList; +virPCIDeviceGetLinkCap; +virPCIDeviceGetLinkSta; virPCIDeviceGetManaged; virPCIDeviceGetName; virPCIDeviceGetRemoveSlot; @@ -1698,6 +1700,7 @@ virPCIDeviceGetStubDriver; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; virPCIDeviceIsAssignable; +virPCIDeviceIsPCIExpress; virPCIDeviceListAdd; virPCIDeviceListAddCopy; virPCIDeviceListCount; diff --git a/src/util/virpci.c b/src/util/virpci.c index e0f2344..8ad28b8 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -130,7 +130,13 @@ struct _virPCIDeviceList {
/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ #define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP 0xc /* Link Capabilities */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ +#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */ +#define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */ +#define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */
These two sets are essentially the same, just keep it as e.g. PCI_EXP_LINK_(SPEED|WIDTH) with 0x(f|3f) respectively. And keep the order of the names alphabetical.
/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */ #define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */ @@ -2750,3 +2756,76 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, return -1; } #endif /* __linux__ */ + +int +virPCIDeviceIsPCIExpress(virPCIDevicePtr dev) +{ + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + ret = dev->pcie_cap_pos != 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkCap(virPCIDevicePtr dev, + int *port, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP); + + *port = t >> 0x18;
Took me second to figure out you just want the highest byte. s/0x18/24/ would make that much more clear, at least for me.
+ *speed = t & PCI_EXP_LNKCAP_SPEED; + *width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;
And if I wanted to be *really* digging deep into this and nitpicking a lot, I'd say you can do: port = Read8 speed = Read16 & SPEED width = Read8
+ ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkSta(virPCIDevicePtr dev, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA); + + *speed = t & PCI_EXP_LNKSTA_SPEED; + *width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4; + ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +}
Both these functions do the same thing with 2 micro differences, either merge it to one, so you avoid double open, init, etc. Or create one that at least takes a parameter saying whether the caller wants CAPability or STAtus data. Other than that it looks good. Martin

On 12.06.2014 10:56, Martin Kletzander wrote:
On Fri, Jun 06, 2014 at 12:54:17PM +0200, Michal Privoznik wrote:
These functions will handle PCIe devices and their link capabilities to query some info about it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 3 ++ src/util/virpci.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++- src/util/virpci.h | 8 +++++ 3 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d73a9f5..f72a3ad 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1690,6 +1690,8 @@ virPCIDeviceFree; virPCIDeviceGetDriverPathAndName; virPCIDeviceGetIOMMUGroupDev; virPCIDeviceGetIOMMUGroupList; +virPCIDeviceGetLinkCap; +virPCIDeviceGetLinkSta; virPCIDeviceGetManaged; virPCIDeviceGetName; virPCIDeviceGetRemoveSlot; @@ -1698,6 +1700,7 @@ virPCIDeviceGetStubDriver; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; virPCIDeviceIsAssignable; +virPCIDeviceIsPCIExpress; virPCIDeviceListAdd; virPCIDeviceListAddCopy; virPCIDeviceListCount; diff --git a/src/util/virpci.c b/src/util/virpci.c index e0f2344..8ad28b8 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -130,7 +130,13 @@ struct _virPCIDeviceList {
/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ #define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP 0xc /* Link Capabilities */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ +#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */ +#define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */ +#define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */
These two sets are essentially the same, just keep it as e.g. PCI_EXP_LINK_(SPEED|WIDTH) with 0x(f|3f) respectively. And keep the order of the names alphabetical.
The values are copied from /usr/include/pci/header.h. One day we could drop all of these copies and include the file directly. For that, I'd rather keep it just as is in the foreign file. Until the time we do that, I'm sorting these alphabetically, okay.
/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */ #define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */ @@ -2750,3 +2756,76 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, return -1; } #endif /* __linux__ */ + +int +virPCIDeviceIsPCIExpress(virPCIDevicePtr dev) +{ + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true)) < 0) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + ret = dev->pcie_cap_pos != 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkCap(virPCIDevicePtr dev, + int *port, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKCAP); + + *port = t >> 0x18;
Took me second to figure out you just want the highest byte. s/0x18/24/ would make that much more clear, at least for me.
+ *speed = t & PCI_EXP_LNKCAP_SPEED; + *width = (t & PCI_EXP_LNKCAP_WIDTH) >> 4;
And if I wanted to be *really* digging deep into this and nitpicking a lot, I'd say you can do:
port = Read8 speed = Read16 & SPEED width = Read8
The truth is, I've took my inspiration from lscpi sources. But your suggestion is good also.
+ ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +} + +int +virPCIDeviceGetLinkSta(virPCIDevicePtr dev, + unsigned int *speed, + unsigned int *width) +{ + uint32_t t; + int fd; + int ret = -1; + + if ((fd = virPCIDeviceConfigOpen(dev, true) < 0)) + return ret; + + if (virPCIDeviceInit(dev, fd) < 0) + goto cleanup; + + t = virPCIDeviceRead32(dev, fd, dev->pcie_cap_pos + PCI_EXP_LNKSTA); + + *speed = t & PCI_EXP_LNKSTA_SPEED; + *width = (t & PCI_EXP_LNKSTA_WIDTH) >> 4; + ret = 0; + + cleanup: + virPCIDeviceConfigClose(dev, fd); + return ret; +}
Both these functions do the same thing with 2 micro differences, either merge it to one, so you avoid double open, init, etc. Or create one that at least takes a parameter saying whether the caller wants CAPability or STAtus data.
I'll merge those two.
Other than that it looks good.
Martin

On Thu, Jun 12, 2014 at 12:08:08PM +0200, Michal Privoznik wrote:
On 12.06.2014 10:56, Martin Kletzander wrote:
On Fri, Jun 06, 2014 at 12:54:17PM +0200, Michal Privoznik wrote:
These functions will handle PCIe devices and their link capabilities to query some info about it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 3 ++ src/util/virpci.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++- src/util/virpci.h | 8 +++++ 3 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d73a9f5..f72a3ad 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1690,6 +1690,8 @@ virPCIDeviceFree; virPCIDeviceGetDriverPathAndName; virPCIDeviceGetIOMMUGroupDev; virPCIDeviceGetIOMMUGroupList; +virPCIDeviceGetLinkCap; +virPCIDeviceGetLinkSta; virPCIDeviceGetManaged; virPCIDeviceGetName; virPCIDeviceGetRemoveSlot; @@ -1698,6 +1700,7 @@ virPCIDeviceGetStubDriver; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; virPCIDeviceIsAssignable; +virPCIDeviceIsPCIExpress; virPCIDeviceListAdd; virPCIDeviceListAddCopy; virPCIDeviceListCount; diff --git a/src/util/virpci.c b/src/util/virpci.c index e0f2344..8ad28b8 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -130,7 +130,13 @@ struct _virPCIDeviceList {
/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */ #define PCI_EXP_DEVCAP 0x4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP 0xc /* Link Capabilities */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ +#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */ +#define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */ +#define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */ +#define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */
These two sets are essentially the same, just keep it as e.g. PCI_EXP_LINK_(SPEED|WIDTH) with 0x(f|3f) respectively. And keep the order of the names alphabetical.
The values are copied from /usr/include/pci/header.h. One day we could drop all of these copies and include the file directly. For that, I'd rather keep it just as is in the foreign file. Until the time we do that, I'm sorting these alphabetically, okay.
Well, the only ones I was concerned with were simple masks for 8 bits and 13 bits, there's nothing special about those. We even use: "(1 << x) - 1" for mask of x bits somewhere, I think. Including them fromt he system is probably the best option (for applicable systems, of course), so that would be preferred. Be sure we depend on pciutils-devel or what's the package if we don't already. Martin

This new element is there to represent PCI-Express capabilities of a PCI devices, like link speed, number of lanes, etc. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatnode.html.in | 19 ++++ docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 123 ++++++++++++++++++++- src/conf/node_device_conf.h | 31 +++++- src/node_device/node_device_udev.c | 31 ++++++ .../pci_8086_4238_pcie_wireless.xml | 26 +++++ tests/nodedevxml2xmltest.c | 1 + 7 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index b424c96..46ec2bc 100644 --- a/docs/formatnode.html.in +++ b/docs/formatnode.html.in @@ -110,6 +110,21 @@ have a list of <code>address</code> subelements, one for each VF on this PF. </dd> + <dt><code>pci-express</code></dt> + <dd> + This optional element contains information on PCI Express part of + the device. For example, it can contain a child element + <code>link</code> which addresses the PCI Express device's link. + While a device has it's own capabilities + (<code>validity='cap'</code>), the actual run time capabilities + are negotiated on the device initialization + (<code>validity='sta'</code>). The <code>link</code> element then + contains three attributes: <code>port</code> which says in which + port is the device plugged in, <code>speed</code> (in + GigaTransfers per second) and <code>width</code> for the number + of lanes used. Since the port can't be negotiated, it's not + exposed in <code>./pci-express/link/[@validity='sta']</code>. + </dd> </dl> </dd> <dt><code>usb_device</code></dt> @@ -291,6 +306,10 @@ <address domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> <address domain='0x0000' bus='0x02' slot='0x00' function='0x1'/> </iommuGroup> + <pci-express> + <link validity='cap' port='1' speed='2.5' width='1'/> + <link validity='sta' speed='2.5' width='1'/> + </pci-express> </capability> </device> </pre> diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 81ab4d4..79e8fd2 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -158,6 +158,32 @@ </element> </optional> + <optional> + <element name='pci-express'> + <zeroOrMore> + <element name='link'> + <attribute name='validity'> + <choice> + <value>cap</value> + <value>sta</value> + </choice> + </attribute> + <optional> + <attribute name='port'> + <ref name='unsignedInt'/> + </attribute> + </optional> + <optional> + <attribute name='speed'> + </attribute> + </optional> + <attribute name='width'> + <ref name='unsignedInt'/> + </attribute> + </element> + </zeroOrMore> + </element> + </optional> </define> <define name='capusbdev'> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index e65b5e4..70634cc 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -58,6 +58,9 @@ VIR_ENUM_IMPL(virNodeDevNetCap, VIR_NODE_DEV_CAP_NET_LAST, "80203", "80211") +VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST, + "", "2.5", "5", "8") + static int virNodeDevCapsDefParseString(const char *xpath, xmlXPathContextPtr ctxt, @@ -218,6 +221,35 @@ void virNodeDeviceObjRemove(virNodeDeviceObjListPtr devs, } } +static void +virPCIELinkFormat(virBufferPtr buf, + virPCIELinkPtr lnk, + const char *attrib) +{ + virBufferAsprintf(buf, "<link validity='%s'", attrib); + if (lnk->port >= 0) + virBufferAsprintf(buf, " port='%d'", lnk->port); + if (lnk->speed) + virBufferAsprintf(buf, " speed='%s'", + virPCIELinkSpeedTypeToString(lnk->speed)); + virBufferAsprintf(buf, " width='%d'", lnk->width); + virBufferAddLit(buf, "/>\n"); +} + +static void +virPCIEDeviceInfoFormat(virBufferPtr buf, + virPCIEDeviceInfoPtr info) +{ + virBufferAddLit(buf, "<pci-express>\n"); + virBufferAdjustIndent(buf, 2); + + virPCIELinkFormat(buf, &info->link_cap, "cap"); + virPCIELinkFormat(buf, &info->link_sta, "sta"); + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</pci-express>\n"); +} + char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) { virBuffer buf = VIR_BUFFER_INITIALIZER; @@ -346,6 +378,8 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "</iommuGroup>\n"); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCIE) + virPCIEDeviceInfoFormat(&buf, data->pci_dev.pci_express); break; case VIR_NODE_DEV_CAP_USB_DEV: virBufferAsprintf(&buf, "<bus>%d</bus>\n", data->usb_dev.bus); @@ -1043,6 +1077,87 @@ virNodeDevCapPCIDevIommuGroupParseXML(xmlXPathContextPtr ctxt, return ret; } +static int +virNodeDevCapPCIDevPCIExpressParseLinkXML(xmlXPathContextPtr ctxt, + xmlNodePtr linkNode, + virPCIELinkPtr lnk) +{ + xmlNodePtr origNode = ctxt->node; + int ret = -1, speed; + char *speedStr = NULL, *portStr = NULL; + + ctxt->node = linkNode; + + if (virXPathUInt("number(./@width)", ctxt, &lnk->width) < 0) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("mandatory attribute 'width' is missing or malformed")); + goto cleanup; + } + + if ((speedStr = virXPathString("string(./@speed)", ctxt))) { + if ((speed = virPCIELinkSpeedTypeFromString(speedStr)) < 0) { + virReportError(VIR_ERR_XML_DETAIL, + _("malformed 'speed' attribute: %s"), + speedStr); + goto cleanup; + } + lnk->speed = speed; + } + + if ((portStr = virXPathString("string(./@port)", ctxt))) { + if (virStrToLong_i(portStr, NULL, 10, &lnk->port) < 0) { + virReportError(VIR_ERR_XML_DETAIL, + _("malformed 'port' attribute: %s"), + portStr); + goto cleanup; + } + } else { + lnk->port = -1; + } + + ret = 0; + cleanup: + VIR_FREE(portStr); + VIR_FREE(speedStr); + ctxt->node = origNode; + return ret; +} + +static int +virNodeDevCapPCIDevPCIExpressParseXML(xmlXPathContextPtr ctxt, + xmlNodePtr pciExpressNode, + union _virNodeDevCapData *data) +{ + xmlNodePtr lnk, origNode = ctxt->node; + int ret = -1; + virPCIEDeviceInfoPtr pci_express = NULL; + + ctxt->node = pciExpressNode; + + if (VIR_ALLOC(pci_express) < 0) + goto cleanup; + + if ((lnk = virXPathNode("./link[@validity='cap']", ctxt)) && + virNodeDevCapPCIDevPCIExpressParseLinkXML(ctxt, lnk, + &pci_express->link_cap) < 0) + goto cleanup; + + if ((lnk = virXPathNode("./link[@validity='sta']", ctxt)) && + virNodeDevCapPCIDevPCIExpressParseLinkXML(ctxt, lnk, + &pci_express->link_sta) < 0) + goto cleanup; + + + data->pci_dev.pci_express = pci_express; + pci_express = NULL; + data->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCIE; + ret = 0; + cleanup: + VIR_FREE(pci_express); + ctxt->node = origNode; + return ret; +} + static int virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt, @@ -1050,7 +1165,7 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, union _virNodeDevCapData *data) { - xmlNodePtr orignode, iommuGroupNode; + xmlNodePtr orignode, iommuGroupNode, pciExpress; int ret = -1; orignode = ctxt->node; @@ -1101,6 +1216,12 @@ virNodeDevCapPCIDevParseXML(xmlXPathContextPtr ctxt, goto out; } } + + if ((pciExpress = virXPathNode("./pci-express[1]", ctxt))) { + if (virNodeDevCapPCIDevPCIExpressParseXML(ctxt, pciExpress, data) < 0) + goto out; + } + ret = 0; out: ctxt->node = orignode; diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 50e6805..563bf6a 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -75,10 +75,36 @@ typedef enum { } virNodeDevSCSIHostCapFlags; typedef enum { - VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), - VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), + VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), + VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), + VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2), } virNodeDevPCICapFlags; +typedef enum { + VIR_PCIE_LINK_SPEED_NA = 0, + VIR_PCIE_LINK_SPEED_25, + VIR_PCIE_LINK_SPEED_5, + VIR_PCIE_LINK_SPEED_8, + VIR_PCIE_LINK_SPEED_LAST +} virPCIELinkSpeed; + +VIR_ENUM_DECL(virPCIELinkSpeed) + +typedef struct _virPCIELink virPCIELink; +typedef virPCIELink *virPCIELinkPtr; +struct _virPCIELink { + int port; + virPCIELinkSpeed speed; + unsigned int width; +}; + +typedef struct _virPCIEDeviceInfo virPCIEDeviceInfo; +typedef virPCIEDeviceInfo *virPCIEDeviceInfoPtr; +struct _virPCIEDeviceInfo { + virPCIELink link_cap; /* PCIe device link capabilities */ + virPCIELink link_sta; /* Actually negotiated capabilities */ +}; + typedef struct _virNodeDevCapsDef virNodeDevCapsDef; typedef virNodeDevCapsDef *virNodeDevCapsDefPtr; struct _virNodeDevCapsDef { @@ -115,6 +141,7 @@ struct _virNodeDevCapsDef { virPCIDeviceAddressPtr *iommuGroupDevices; size_t nIommuGroupDevices; unsigned int iommuGroupNumber; + virPCIEDeviceInfoPtr pci_express; } pci_dev; struct { unsigned int bus; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 9a951d9..9ed869b 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -425,6 +425,8 @@ static int udevProcessPCI(struct udev_device *device, const char *syspath = NULL; union _virNodeDevCapData *data = &def->caps->data; virPCIDeviceAddress addr; + virPCIEDeviceInfoPtr pci_express = NULL; + virPCIDevicePtr pciDev = NULL; int tmpGroup, ret = -1; char *p; int rc; @@ -522,9 +524,38 @@ static int udevProcessPCI(struct udev_device *device, data->pci_dev.iommuGroupNumber = tmpGroup; } + if (!(pciDev = virPCIDeviceNew(data->pci_dev.domain, + data->pci_dev.bus, + data->pci_dev.slot, + data->pci_dev.function))) + goto out; + + if (virPCIDeviceIsPCIExpress(pciDev) > 0) { + if (VIR_ALLOC(pci_express) < 0) + goto out; + + if (virPCIDeviceGetLinkCap(pciDev, + &pci_express->link_cap.port, + &pci_express->link_cap.speed, + &pci_express->link_cap.width) < 0) + goto out; + + if (virPCIDeviceGetLinkSta(pciDev, + &pci_express->link_sta.speed, + &pci_express->link_sta.width) < 0) + goto out; + + pci_express->link_sta.port = -1; /* PCIe can't negotiate port. Yet :) */ + data->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCIE; + data->pci_dev.pci_express = pci_express; + pci_express = NULL; + } + ret = 0; out: + virPCIDeviceFree(pciDev); + VIR_FREE(pci_express); return ret; } diff --git a/tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml b/tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml new file mode 100644 index 0000000..18172e9 --- /dev/null +++ b/tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml @@ -0,0 +1,26 @@ +<device> + <name>pci_0000_03_00_0</name> + <parent>pci_0000_00_1c_1</parent> + <capability type='pci'> + <domain>0</domain> + <bus>3</bus> + <slot>0</slot> + <function>0</function> + <product id='0x4238'>Centrino Ultimate-N 6300</product> + <vendor id='0x8086'>Intel Corporation</vendor> + <iommuGroup number='8'> + <address domain='0x0000' bus='0x00' slot='0x1c' function='0x0'/> + <address domain='0x0000' bus='0x00' slot='0x1c' function='0x1'/> + <address domain='0x0000' bus='0x00' slot='0x1c' function='0x3'/> + <address domain='0x0000' bus='0x00' slot='0x1c' function='0x4'/> + <address domain='0x0000' bus='0x03' slot='0x00' function='0x0'/> + <address domain='0x0000' bus='0x0d' slot='0x00' function='0x0'/> + <address domain='0x0000' bus='0x0d' slot='0x00' function='0x1'/> + <address domain='0x0000' bus='0x0d' slot='0x00' function='0x3'/> + </iommuGroup> + <pci-express> + <link validity='cap' port='1' speed='2.5' width='1'/> + <link validity='sta' speed='2.5' width='1'/> + </pci-express> + </capability> +</device> diff --git a/tests/nodedevxml2xmltest.c b/tests/nodedevxml2xmltest.c index 9390bf5..6d57318 100644 --- a/tests/nodedevxml2xmltest.c +++ b/tests/nodedevxml2xmltest.c @@ -88,6 +88,7 @@ mymain(void) DO_TEST("storage_serial_SATA_HTS721010G9SA00_MPCZ12Y0GNGWSE"); DO_TEST("usb_device_1d6b_1_0000_00_1d_0_if0"); DO_TEST("usb_device_1d6b_1_0000_00_1d_0"); + DO_TEST("pci_8086_4238_pcie_wireless"); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.0.0

On Fri, Jun 06, 2014 at 12:54:18PM +0200, Michal Privoznik wrote:
This new element is there to represent PCI-Express capabilities of a PCI devices, like link speed, number of lanes, etc.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatnode.html.in | 19 ++++ docs/schemas/nodedev.rng | 26 +++++ src/conf/node_device_conf.c | 123 ++++++++++++++++++++- src/conf/node_device_conf.h | 31 +++++- src/node_device/node_device_udev.c | 31 ++++++ .../pci_8086_4238_pcie_wireless.xml | 26 +++++ tests/nodedevxml2xmltest.c | 1 + 7 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 tests/nodedevschemadata/pci_8086_4238_pcie_wireless.xml
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index b424c96..46ec2bc 100644 --- a/docs/formatnode.html.in +++ b/docs/formatnode.html.in @@ -110,6 +110,21 @@ have a list of <code>address</code> subelements, one for each VF on this PF. </dd> + <dt><code>pci-express</code></dt> + <dd> + This optional element contains information on PCI Express part of + the device. For example, it can contain a child element + <code>link</code> which addresses the PCI Express device's link. + While a device has it's own capabilities + (<code>validity='cap'</code>), the actual run time capabilities + are negotiated on the device initialization + (<code>validity='sta'</code>). The <code>link</code> element then + contains three attributes: <code>port</code> which says in which + port is the device plugged in, <code>speed</code> (in + GigaTransfers per second) and <code>width</code> for the number + of lanes used. Since the port can't be negotiated, it's not + exposed in <code>./pci-express/link/[@validity='sta']</code>. + </dd> </dl> </dd> <dt><code>usb_device</code></dt> @@ -291,6 +306,10 @@ <address domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> <address domain='0x0000' bus='0x02' slot='0x00' function='0x1'/> </iommuGroup> + <pci-express> + <link validity='cap' port='1' speed='2.5' width='1'/> + <link validity='sta' speed='2.5' width='1'/> + </pci-express> </capability> </device> </pre> diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 81ab4d4..79e8fd2 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -158,6 +158,32 @@ </element> </optional>
+ <optional> + <element name='pci-express'> + <zeroOrMore> + <element name='link'> + <attribute name='validity'> + <choice> + <value>cap</value> + <value>sta</value> + </choice> + </attribute> + <optional> + <attribute name='port'> + <ref name='unsignedInt'/> + </attribute> + </optional> + <optional> + <attribute name='speed'>
If you don't want to name the values here, there should be at least <text/> or <data type="string"/>. Maybe the best would be to do (not tested): <data type="string"> <param name="pattern">[0-9]+(.[0-9]+)?</param> </data>
+ </attribute> + </optional> + <attribute name='width'> + <ref name='unsignedInt'/> + </attribute> + </element> + </zeroOrMore> + </element> + </optional> </define>
<define name='capusbdev'> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index e65b5e4..70634cc 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -58,6 +58,9 @@ VIR_ENUM_IMPL(virNodeDevNetCap, VIR_NODE_DEV_CAP_NET_LAST, "80203", "80211")
+VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST, + "", "2.5", "5", "8") + static int virNodeDevCapsDefParseString(const char *xpath, xmlXPathContextPtr ctxt, @@ -218,6 +221,35 @@ void virNodeDeviceObjRemove(virNodeDeviceObjListPtr devs, } }
+static void +virPCIELinkFormat(virBufferPtr buf, + virPCIELinkPtr lnk, + const char *attrib) +{ + virBufferAsprintf(buf, "<link validity='%s'", attrib); + if (lnk->port >= 0) + virBufferAsprintf(buf, " port='%d'", lnk->port); + if (lnk->speed) + virBufferAsprintf(buf, " speed='%s'", + virPCIELinkSpeedTypeToString(lnk->speed)); + virBufferAsprintf(buf, " width='%d'", lnk->width); + virBufferAddLit(buf, "/>\n"); +} + +static void +virPCIEDeviceInfoFormat(virBufferPtr buf, + virPCIEDeviceInfoPtr info) +{ + virBufferAddLit(buf, "<pci-express>\n"); + virBufferAdjustIndent(buf, 2); + + virPCIELinkFormat(buf, &info->link_cap, "cap"); + virPCIELinkFormat(buf, &info->link_sta, "sta"); + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</pci-express>\n"); +} + char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) { virBuffer buf = VIR_BUFFER_INITIALIZER; @@ -346,6 +378,8 @@ char *virNodeDeviceDefFormat(const virNodeDeviceDef *def) virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "</iommuGroup>\n"); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCIE) + virPCIEDeviceInfoFormat(&buf, data->pci_dev.pci_express); break; case VIR_NODE_DEV_CAP_USB_DEV: virBufferAsprintf(&buf, "<bus>%d</bus>\n", data->usb_dev.bus); @@ -1043,6 +1077,87 @@ virNodeDevCapPCIDevIommuGroupParseXML(xmlXPathContextPtr ctxt, return ret; }
+static int +virNodeDevCapPCIDevPCIExpressParseLinkXML(xmlXPathContextPtr ctxt, + xmlNodePtr linkNode, + virPCIELinkPtr lnk) +{ + xmlNodePtr origNode = ctxt->node; + int ret = -1, speed; + char *speedStr = NULL, *portStr = NULL; + + ctxt->node = linkNode; + + if (virXPathUInt("number(./@width)", ctxt, &lnk->width) < 0) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("mandatory attribute 'width' is missing or malformed")); + goto cleanup; + } + + if ((speedStr = virXPathString("string(./@speed)", ctxt))) { + if ((speed = virPCIELinkSpeedTypeFromString(speedStr)) < 0) { + virReportError(VIR_ERR_XML_DETAIL, + _("malformed 'speed' attribute: %s"), + speedStr); + goto cleanup; + } + lnk->speed = speed; + }
It would be nice (maybe for some future code) if you reset lnk->speed to 0 if the provided structure is not clean. Even though you allocate it currently with VIR_ALLOC.
+ + if ((portStr = virXPathString("string(./@port)", ctxt))) { + if (virStrToLong_i(portStr, NULL, 10, &lnk->port) < 0) { + virReportError(VIR_ERR_XML_DETAIL, + _("malformed 'port' attribute: %s"), + portStr); + goto cleanup; + } + } else { + lnk->port = -1; + }
The same way you do it here with port.
+ + ret = 0; + cleanup: + VIR_FREE(portStr); + VIR_FREE(speedStr); + ctxt->node = origNode; + return ret; +} + +static int +virNodeDevCapPCIDevPCIExpressParseXML(xmlXPathContextPtr ctxt,
How about virPCIEDeviceInfoParse()? And s/Parse/LinkParse/ for the one above. Martin
participants (2)
-
Martin Kletzander
-
Michal Privoznik