[libvirt] [PATCH v2 0/4] Interface pools and passthrough mode

Interface Pools and Passthrough mode: Current Method: The passthrough mode uses a macvtap direct connection to connect each guest to the network. The physical interface to be used is picked from among those listed in <interface> sub elements of the <forward> element. The current specification for <forward> extends to allow 0 or more <interface> sub-elements: Example: <forward mode='passthrough' dev='eth10'/> <interface dev='eth10'/> <interface dev='eth12'/> <interface dev='eth18'/> <interface dev='eth20'/> </forward> However with an ethernet card with 64 VF's or more, the above method gets tedious on the system. On the other hand, just parameterizing a string (eth%d) is inadequate, eg, when there are multiple non-contiguous ranges. Proposed Method: The 4 patches provided: i) Introduce a new element 'pf' to implicitly create an interface pool of all the Virtual Functions attached to the specified Physical Function. ii) Modify the networkAllocateActualDevice, networkNotifyActualDevice and networkReleaseActualDevice API to use the above mentioned interface pool in the passthrough mode. Hence Libvirt will now support both the methods mentioned below: * Explicit interface list. App inputs: <forward mode='passthrough'> <interface dev='eth10'/> <interface dev='eth11'/> <interface dev='eth12'/> <interface dev='eth13'/> </forward> libvirt does not change XML * Automatically interface list from PF. App inputs: <forward mode='passthrough'> <pf dev='eth0'/> </forward> libvirt expands XML to be <forward mode='passthrough'> <pf dev='eth0'/> <interface dev='eth10'/> <interface dev='eth11'/> <interface dev='eth12'/> <interface dev='eth13'/> </forward> In the above case we need to differentiate between the implicit and explicit interface pool, which can be done by comparing the dumpxml from active and inactive domains. This will need the addition of the flag VIR_NETWORK_XML_INACTIVE to virNetworkGetXMLDesc() as mentioned in a previous suggestion. I will post this as a separate patch series. Shradha Shah (4): Added function pciSysfsFile to enable access to the PCI SYSFS files. Added Function virNetDevGetVirtualFunctions Adding the element pf to network xml. Functionality to implicitly get interface pool from SR-IOV PF. docs/schemas/network.rng | 7 +++ src/conf/network_conf.c | 69 ++++++++++++++++++++++++++++++-- src/conf/network_conf.h | 3 + src/network/bridge_driver.c | 93 +++++++++++++++++++++++++++++++++--------- src/util/pci.c | 39 ++++++++++++++++++ src/util/pci.h | 7 +++ src/util/virnetdev.c | 83 ++++++++++++++++++++++++++++++++++++++ src/util/virnetdev.h | 6 +++ 8 files changed, 283 insertions(+), 24 deletions(-) -- 1.7.4.4

--- src/util/pci.c | 16 ++++++++++++++++ src/util/pci.h | 2 ++ 2 files changed, 18 insertions(+), 0 deletions(-) diff --git a/src/util/pci.c b/src/util/pci.c index cd82b43..d0ba4c5 100644 --- a/src/util/pci.c +++ b/src/util/pci.c @@ -2027,6 +2027,22 @@ out: } /* + * Returns a path to the PCI sysfs file given the BDF of the PCI function + */ + +int +pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link) +{ + if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s", + pciDeviceName) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + +/* * Returns the network device name of a pci device */ int diff --git a/src/util/pci.h b/src/util/pci.h index 76e37e3..f98e745 100644 --- a/src/util/pci.h +++ b/src/util/pci.h @@ -109,4 +109,6 @@ int pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link, int pciDeviceNetName(char *device_link_sysfs_path, char **netname); +int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link); + #endif /* __VIR_PCI_H__ */ -- 1.7.4.4

This functions enables us to get the Virtual Functions attached to a Physical function given the name of a SR-IOV physical functio. In order to accomplish the task, added a getter function pciGetDeviceAddrString to get the BDF of the Virtual Function in a char array. --- src/util/pci.c | 23 ++++++++++++++ src/util/pci.h | 5 +++ src/util/virnetdev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnetdev.h | 6 +++ 4 files changed, 117 insertions(+), 0 deletions(-) diff --git a/src/util/pci.c b/src/util/pci.c index d0ba4c5..ca05e8f 100644 --- a/src/util/pci.c +++ b/src/util/pci.c @@ -1293,6 +1293,29 @@ pciReadDeviceID(pciDevice *dev, const char *id_name) return id_str; } +int +pciGetDeviceAddrString(unsigned domain, + unsigned bus, + unsigned slot, + unsigned function, + char **pciConfigAddr) +{ + pciDevice *dev = NULL; + int ret = -1; + + dev = pciGetDevice(domain, + bus, + slot, + function); + if (dev != NULL) { + *pciConfigAddr = strdup(dev->name); + ret = 0; + } + + pciFreeDevice(dev); + return ret; +} + pciDevice * pciGetDevice(unsigned domain, unsigned bus, diff --git a/src/util/pci.h b/src/util/pci.h index f98e745..b7723b1 100644 --- a/src/util/pci.h +++ b/src/util/pci.h @@ -111,4 +111,9 @@ int pciDeviceNetName(char *device_link_sysfs_path, char **netname); int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link); +int pciGetDeviceAddrString(unsigned domain, + unsigned bus, + unsigned slot, + unsigned function, + char **pciConfigAddr); #endif /* __VIR_PCI_H__ */ diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index fee87ef..3bbb76e 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -970,6 +970,78 @@ virNetDevSysfsDeviceFile(char **pf_sysfs_device_link, const char *ifname, } /** + * virNetDevGetVirtualFunctions: + * + * @pfname : name of the physical function interface name + * @vfname: array that will hold the interface names of the virtual_functions + * @n_vfname: pointer to the number of virtual functions + * + * Returns 0 on success and -1 on failure + */ + +int +virNetDevGetVirtualFunctions(const char *pfname, + char ***vfname, + unsigned int *n_vfname) +{ + int ret = -1, i; + char *pf_sysfs_device_link = NULL; + char *pci_sysfs_device_link = NULL; + struct pci_config_address **virt_fns; + char *pciConfigAddr; + + if (virNetDevSysfsFile(&pf_sysfs_device_link, pfname, "device") < 0) + return ret; + + if (pciGetVirtualFunctions(pf_sysfs_device_link, &virt_fns, + n_vfname) < 0) { + virReportSystemError(ENOSYS, "%s", + _("Failed to get virtual functions")); + goto out; + } + + if (VIR_ALLOC_N(*vfname, *n_vfname) < 0) { + virReportOOMError(); + goto out; + } + + for (i = 0; i < *n_vfname; i++) + { + if (pciGetDeviceAddrString(virt_fns[i]->domain, + virt_fns[i]->bus, + virt_fns[i]->slot, + virt_fns[i]->function, + &pciConfigAddr) < 0) { + virReportSystemError(ENOSYS, "%s", + _("Failed to get PCI Config Address String")); + goto out; + } + if (pciSysfsFile(pciConfigAddr, &pci_sysfs_device_link) < 0) { + virReportSystemError(ENOSYS, "%s", + _("Failed to get PCI SYSFS file")); + goto out; + } + + if (pciDeviceNetName(pci_sysfs_device_link, &((*vfname)[i])) < 0) { + virReportSystemError(ENOSYS, "%s", + _("Failed to get interface name of the VF")); + goto out; + } + } + + ret = 0; + +out: + for (i = 0; i < *n_vfname; i++) + VIR_FREE(virt_fns[i]); + VIR_FREE(virt_fns); + VIR_FREE(pf_sysfs_device_link); + VIR_FREE(pci_sysfs_device_link); + VIR_FREE(pciConfigAddr); + return ret; +} + +/** * virNetDevIsVirtualFunction: * @ifname : name of the interface * @@ -1056,6 +1128,17 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname) return ret; } #else /* !__linux__ */ + +int +virNetDevGetVirtualFunctions(const char *pfname ATTRIBUTE_UNUSED, + char ***vfname ATTRIBUTE_UNUSED, + unsigned int *n_vfname ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get virtual functions on this platfornm")); + return -1; +} + int virNetDevIsVirtualFunction(const char *ifname ATTRIBUTE_UNUSED) { diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index 13ba1da..0a885d5 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -99,4 +99,10 @@ int virNetDevGetVirtualFunctionIndex(const char *pfname, const char *vfname, int virNetDevGetPhysicalFunction(const char *ifname, char **pfname) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevGetVirtualFunctions(const char *pfname, + char ***vfname, + unsigned int *n_vfname) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3) + ATTRIBUTE_RETURN_CHECK; + #endif /* __VIR_NETDEV_H__ */ -- 1.7.4.4

This element will help the user to just specify the SR-IOV physical function in order to access all the Virtual functions attached to it. --- docs/schemas/network.rng | 7 ++++ src/conf/network_conf.c | 69 +++++++++++++++++++++++++++++++++++++++++++-- src/conf/network_conf.h | 3 ++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/docs/schemas/network.rng b/docs/schemas/network.rng index 937e180..64711ba 100644 --- a/docs/schemas/network.rng +++ b/docs/schemas/network.rng @@ -92,6 +92,13 @@ </attribute> </element> </zeroOrMore> + <optional> + <element name='pf'> + <attribute name='dev'> + <ref name='deviceName'/> + </attribute> + </element> + </optional> </element> </optional> diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c index 1058b07..9557e29 100644 --- a/src/conf/network_conf.c +++ b/src/conf/network_conf.c @@ -153,6 +153,11 @@ void virNetworkDefFree(virNetworkDefPtr def) VIR_FREE(def->bridge); VIR_FREE(def->domain); + for (ii = 0 ; ii < def->nForwardPfs && def->forwardPfs ; ii++) { + virNetworkForwardIfDefClear(&def->forwardPfs[ii]); + } + VIR_FREE(def->forwardPfs); + for (ii = 0 ; ii < def->nForwardIfs && def->forwardIfs ; ii++) { virNetworkForwardIfDefClear(&def->forwardIfs[ii]); } @@ -820,10 +825,11 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) xmlNodePtr *ipNodes = NULL; xmlNodePtr *portGroupNodes = NULL; xmlNodePtr *forwardIfNodes = NULL; + xmlNodePtr *forwardPfNodes = NULL; xmlNodePtr dnsNode = NULL; xmlNodePtr virtPortNode = NULL; xmlNodePtr forwardNode = NULL; - int nIps, nPortGroups, nForwardIfs; + int nIps, nPortGroups, nForwardIfs, nForwardPfs; char *forwardDev = NULL; xmlNodePtr save = ctxt->node; xmlNodePtr bandwidthNode = NULL; @@ -965,10 +971,52 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) /* all of these modes can use a pool of physical interfaces */ nForwardIfs = virXPathNodeSet("./interface", ctxt, &forwardIfNodes); - if (nForwardIfs < 0) + if (nForwardIfs <= 0) { + virNetworkReportError(VIR_ERR_XML_ERROR, + _("No interface pool given, checking for SRIOV pf")); + nForwardPfs = virXPathNodeSet("./pf", ctxt, &forwardPfNodes); + + if (nForwardPfs <= 0) { + virNetworkReportError(VIR_ERR_XML_ERROR, + _("No interface pool or SRIOV physical device given")); goto error; + } + } - if ((nForwardIfs > 0) || forwardDev) { + if (nForwardPfs == 1) { + + if (VIR_ALLOC_N(def->forwardPfs, nForwardPfs) < 0) { + virReportOOMError(); + goto error; + } + + if (forwardDev) { + virNetworkReportError(VIR_ERR_XML_ERROR, + _("A forward Dev should not be used when using a SRIOV PF")); + goto error; + } + + forwardDev = virXMLPropString(*forwardPfNodes, "dev"); + if (!forwardDev) { + virNetworkReportError(VIR_ERR_XML_ERROR, + _("Missing required dev attribute in network '%s' pf element"), + def->name); + goto error; + } + + def->forwardPfs->usageCount = 0; + def->forwardPfs->dev = forwardDev; + forwardDev = NULL; + def->nForwardPfs++; + } + + else if (nForwardPfs > 1) { + virNetworkReportError(VIR_ERR_XML_ERROR, + _("Use of more than one physical interface is not allowed")); + goto error; + } + + else if ((nForwardIfs > 0) || forwardDev) { int ii; /* allocate array to hold all the portgroups */ @@ -1014,6 +1062,8 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) def->nForwardIfs++; } } + VIR_FREE(forwardDev); + VIR_FREE(forwardPfNodes); VIR_FREE(forwardIfNodes); switch (def->forwardType) { @@ -1069,6 +1119,7 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) VIR_FREE(ipNodes); VIR_FREE(portGroupNodes); VIR_FREE(forwardIfNodes); + VIR_FREE(forwardPfNodes); VIR_FREE(forwardDev); ctxt->node = save; return NULL; @@ -1290,7 +1341,9 @@ char *virNetworkDefFormat(const virNetworkDefPtr def) virBufferAsprintf(&buf, " <uuid>%s</uuid>\n", uuidstr); if (def->forwardType != VIR_NETWORK_FORWARD_NONE) { - const char *dev = virNetworkDefForwardIf(def, 0); + char *dev = NULL; + if (def->nForwardPfs < 1) + dev = (char *)virNetworkDefForwardIf(def, 0); const char *mode = virNetworkForwardTypeToString(def->forwardType); if (!mode) { @@ -1305,6 +1358,14 @@ char *virNetworkDefFormat(const virNetworkDefPtr def) virBufferAsprintf(&buf, " mode='%s'%s>\n", mode, def->nForwardIfs ? "" : "/"); + + if (def->nForwardPfs) { + if (def->forwardPfs->dev) { + virBufferEscapeString(&buf, " <pf dev='%s'/>\n", + def->forwardPfs->dev); + } + } + if (def->nForwardIfs) { for (ii = 0; ii < def->nForwardIfs; ii++) { if (def->forwardIfs[ii].dev) { diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h index 1be20f8..e25f8d3 100644 --- a/src/conf/network_conf.h +++ b/src/conf/network_conf.h @@ -146,6 +146,9 @@ struct _virNetworkDef { /* If there are multiple forward devices (i.e. a pool of * interfaces), they will be listed here. */ + size_t nForwardPfs; + virNetworkForwardIfDefPtr forwardPfs; + size_t nForwardIfs; virNetworkForwardIfDefPtr forwardIfs; -- 1.7.4.4

If a system has 64 or more VF's, it is quite tedious to mention each VF in the interface pool. The following modification will implicitly create an interface pool from the SR-IOV PF. --- src/network/bridge_driver.c | 93 +++++++++++++++++++++++++++++++++--------- 1 files changed, 73 insertions(+), 20 deletions(-) diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 63338a2..9ae9c50 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -2817,15 +2817,19 @@ networkAllocateActualDevice(virDomainNetDefPtr iface) /* If there is only a single device, just return it (caller will detect * any error if exclusive use is required but could not be acquired). */ - if (netdef->nForwardIfs == 0) { + if ((netdef->nForwardIfs <= 0) && (netdef->nForwardPfs <= 0)) { networkReportError(VIR_ERR_INTERNAL_ERROR, _("network '%s' uses a direct mode, but has no forward dev and no interface pool"), netdef->name); goto cleanup; - } else { + } + + else { int ii; virNetworkForwardIfDefPtr dev = NULL; - + unsigned int num_virt_fns = 0; + char **vfname = NULL; + /* pick an interface from the pool */ /* PASSTHROUGH mode, and PRIVATE Mode + 802.1Qbh both require @@ -2833,20 +2837,67 @@ networkAllocateActualDevice(virDomainNetDefPtr iface) * 0. Other modes can share, so just search for the one with * the lowest usageCount. */ - if ((netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH) || - ((netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) && - iface->data.network.actual->data.direct.virtPortProfile && - (iface->data.network.actual->data.direct.virtPortProfile->virtPortType - == VIR_NETDEV_VPORT_PROFILE_8021QBH))) { + if (netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH) { + if ((netdef->nForwardPfs > 0) && (netdef->nForwardIfs <= 0)) { + if ((virNetDevGetVirtualFunctions(netdef->forwardPfs->dev, + &vfname, &num_virt_fns)) < 0){ + networkReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not get Virtual functions on %s"), + netdef->forwardPfs->dev); + goto out; + } + + if (num_virt_fns == 0) { + networkReportError(VIR_ERR_INTERNAL_ERROR, + _("No Vf's present on SRIOV PF %s"), + netdef->forwardPfs->dev); + goto out; + } + + if ((VIR_ALLOC_N(netdef->forwardIfs, num_virt_fns)) < 0) { + virReportOOMError(); + goto out; + } + + netdef->nForwardIfs = num_virt_fns; + + for (ii = 0; ii < netdef->nForwardIfs; ii++) { + netdef->forwardIfs[ii].dev = strdup(vfname[ii]); + netdef->forwardIfs[ii].usageCount = 0; + } + + out: + for (ii = 0; ii < num_virt_fns; ii++) { + VIR_FREE(vfname[ii]); + } + VIR_FREE(vfname); + } + /* pick first dev with 0 usageCount */ - + for (ii = 0; ii < netdef->nForwardIfs; ii++) { if (netdef->forwardIfs[ii].usageCount == 0) { dev = &netdef->forwardIfs[ii]; break; } } - } else { + } + else if ((netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) && + iface->data.network.actual->data.direct.virtPortProfile && + (iface->data.network.actual->data.direct.virtPortProfile->virtPortType + == VIR_NETDEV_VPORT_PROFILE_8021QBH)) { + + + /* pick first dev with 0 usageCount */ + + for (ii = 0; ii < netdef->nForwardIfs; ii++) { + if (netdef->forwardIfs[ii].usageCount == 0) { + dev = &netdef->forwardIfs[ii]; + break; + } + } + } + else { /* pick least used dev */ dev = &netdef->forwardIfs[0]; for (ii = 1; ii < netdef->nForwardIfs; ii++) { @@ -2858,7 +2909,7 @@ networkAllocateActualDevice(virDomainNetDefPtr iface) if (!dev) { networkReportError(VIR_ERR_INTERNAL_ERROR, _("network '%s' requires exclusive access to interfaces, but none are available"), - netdef->name); + netdef->name); goto cleanup; } iface->data.network.actual->data.direct.linkdev = strdup(dev->dev); @@ -2901,6 +2952,7 @@ networkNotifyActualDevice(virDomainNetDefPtr iface) virNetworkObjPtr network; virNetworkDefPtr netdef; const char *actualDev; + virNetworkForwardIfDefPtr dev = NULL; int ret = -1; if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK) @@ -2935,12 +2987,12 @@ networkNotifyActualDevice(virDomainNetDefPtr iface) _("network '%s' uses a direct mode, but has no forward dev and no interface pool"), netdef->name); goto cleanup; - } else { + } + else { int ii; - virNetworkForwardIfDefPtr dev = NULL; - + /* find the matching interface in the pool and increment its usageCount */ - + for (ii = 0; ii < netdef->nForwardIfs; ii++) { if (STREQ(actualDev, netdef->forwardIfs[ii].dev)) { dev = &netdef->forwardIfs[ii]; @@ -2954,7 +3006,7 @@ networkNotifyActualDevice(virDomainNetDefPtr iface) netdef->name, actualDev); goto cleanup; } - + /* PASSTHROUGH mode, and PRIVATE Mode + 802.1Qbh both require * exclusive access to a device, so current usageCount must be * 0 in those cases. @@ -3001,6 +3053,7 @@ networkReleaseActualDevice(virDomainNetDefPtr iface) virNetworkObjPtr network = NULL; virNetworkDefPtr netdef; const char *actualDev; + virNetworkForwardIfDefPtr dev = NULL; int ret = -1; if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK) @@ -3036,10 +3089,10 @@ networkReleaseActualDevice(virDomainNetDefPtr iface) _("network '%s' uses a direct mode, but has no forward dev and no interface pool"), netdef->name); goto cleanup; - } else { + } + else { int ii; - virNetworkForwardIfDefPtr dev = NULL; - + for (ii = 0; ii < netdef->nForwardIfs; ii++) { if (STREQ(actualDev, netdef->forwardIfs[ii].dev)) { dev = &netdef->forwardIfs[ii]; @@ -3053,7 +3106,7 @@ networkReleaseActualDevice(virDomainNetDefPtr iface) netdef->name, actualDev); goto cleanup; } - + dev->usageCount--; VIR_DEBUG("Releasing physical device %s, usageCount %d", dev->dev, dev->usageCount); -- 1.7.4.4
participants (1)
-
Shradha Shah