Daniel P. Berrange wrote:
On Tue, Dec 01, 2009 at 12:03:49PM -0500, Dave Allan wrote:
> Daniel P. Berrange wrote:
>> On Tue, Dec 01, 2009 at 10:33:08AM -0500, Dave Allan wrote:
>>> Attached is a patch that exposes the relationships between physical and
>>> virtual functions on SR IOV capable devices.
>>>
>>
>>> + if (data->pci_dev.physical_function) {
>>> + virBufferEscapeString(&buf, "
>>> <physical_function>%s</physical_function>\n",
>>> + data->pci_dev.physical_function);
>>> + }
>>> + if (data->pci_dev.num_virtual_functions > 0) {
>>> + for (i = 0 ; i < data->pci_dev.num_virtual_functions ;
>>> i++) {
>>> + virBufferEscapeString(&buf, "
>>> <virtual_function>%s</virtual_function>\n",
>>> +
>>> data->pci_dev.virtual_functions[i]);
>>> + }
>>> + }
>>
>> What is the actual content of those two new elements ? Are they
>> the names of the corresponding device, suitble for a
>> virNodeDevLooupByName() ?
> They are the PCI device BDF or config address, e.g.:
>
> <virtual_function>0000:01:10.0</virtual_function>
>
> so, no, they aren't suitable for lookup by name. Using the names of the
> corresponding devices was my first attempt, but there is a dependency
> problem there. At the time that we process one device, the others
> aren't necessarily created in libvirt, so it's not possible to look them
> up. If we wanted to go that route, we'd have to do additional work at
> the time of dumping the xml, which I think is a little kludgy. I'd
> rather we created an API to lookup a libvirt device by its BDF.
I don't much like including the raw BDF format, because it is effectively
adding a 3rd way of specifying PCI addresses in libvirt XML. We already
have 2 different ways, which is 1 too many
Node dev XML
<capability type='pci'>
<domain>0</domain>
<bus>0</bus>
<slot>31</slot>
<function>1</function>
</capability>
Domain XML for host devices & guest addresses
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x5'/>
Yes, my bad for screwing up the nodedev XML format when I designed
it, stupidly choosing decimal instead of hex :-(
I think we should make the virtual/physical function follow the domain
XML style, with an <address/> element. Perhaps also use nested capabilities
in the way we did for NPIV host/vport stuff
<capability type='physical_function'>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x1'/>
</capability>
<capability type='virtual_functions'>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x2'/>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x3'/>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x4'/>
</capability>
I'd even be inclined to retro-fit the existing <capability type='pci'>
XML to duplicate the address info in this saner format eg
<capability type='pci'>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x1' />
<domain>0</domain>
<bus>0</bus>
<slot>31</slot>
<function>1</function>
</capability>
So, a PCI card with SR-IOV would end up looking like
<capability type='pci'>
<domain>0</domain>
<bus>0</bus>
<slot>31</slot>
<function>1</function>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x1' />
<capability type='virtual_functions'>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x2'/>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x3'/>
<address domain='0x0000' bus='0x00' slot='0x1f'
function='0x4'/>
</capability>
</capability>
The nice thing about this kind of nesting is that it would let us show
that a card is capable of exposing virtual functions, even if it does
not currently have any enabled
Regards,
Daniel
Here's an updated patch reflecting the above suggestions.
Dave
From d11d00d93c4bfbfd1125560ce80abfdbf88de94b Mon Sep 17 00:00:00 2001
From: David Allan <dallan(a)redhat.com>
Date: Mon, 30 Nov 2009 15:58:47 -0500
Subject: [PATCH 1/1] Add SR IOV physical and virtual function relationships
---
src/conf/node_device_conf.c | 30 +++++
src/conf/node_device_conf.h | 16 +++
src/node_device/node_device_driver.h | 14 ++
src/node_device/node_device_hal.c | 6 +
src/node_device/node_device_linux_sysfs.c | 200 ++++++++++++++++++++++++++++-
5 files changed, 265 insertions(+), 1 deletions(-)
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index 6003ab1..a8ecfb0 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -246,6 +246,7 @@ char *virNodeDeviceDefFormat(virConnectPtr conn,
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
virNodeDevCapsDefPtr caps;
+ unsigned int i = 0;
char *tmp;
virBufferAddLit(&buf, "<device>\n");
@@ -318,6 +319,30 @@ char *virNodeDeviceDefFormat(virConnectPtr conn,
data->pci_dev.vendor_name);
else
virBufferAddLit(&buf, " />\n");
+ if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION)
{
+ virBufferAddLit(&buf, " <capability type='physical
function'>\n");
+ virBufferVSprintf(&buf,
+ " <address domain='0x%.4x'
bus='0x%.2x' "
+ "slot='0x%.2x'
function='0x%.1x'/>\n",
+ data->pci_dev.physical_function->domain,
+ data->pci_dev.physical_function->bus,
+ data->pci_dev.physical_function->slot,
+ data->pci_dev.physical_function->function);
+ virBufferAddLit(&buf, " </capability>\n");
+ }
+ if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION)
{
+ virBufferAddLit(&buf, " <capability type='virtual
functions'>\n");
+ for (i = 0 ; i < data->pci_dev.num_virtual_functions ; i++) {
+ virBufferVSprintf(&buf,
+ " <address domain='0x%.4x'
bus='0x%.2x' "
+ "slot='0x%.2x'
function='0x%.1x'/>\n",
+ data->pci_dev.virtual_functions[i]->domain,
+ data->pci_dev.virtual_functions[i]->bus,
+ data->pci_dev.virtual_functions[i]->slot,
+
data->pci_dev.virtual_functions[i]->function);
+ }
+ virBufferAddLit(&buf, " </capability>\n");
+ }
break;
case VIR_NODE_DEV_CAP_USB_DEV:
virBufferVSprintf(&buf, " <bus>%d</bus>\n",
data->usb_dev.bus);
@@ -1387,6 +1412,7 @@ out:
void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
{
+ int i = 0;
union _virNodeDevCapData *data = &caps->data;
switch (caps->type) {
@@ -1402,6 +1428,10 @@ void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps)
case VIR_NODE_DEV_CAP_PCI_DEV:
VIR_FREE(data->pci_dev.product_name);
VIR_FREE(data->pci_dev.vendor_name);
+ VIR_FREE(data->pci_dev.physical_function);
+ for (i = 0 ; i < data->pci_dev.num_virtual_functions ; i++) {
+ VIR_FREE(data->pci_dev.virtual_functions[i]);
+ }
break;
case VIR_NODE_DEV_CAP_USB_DEV:
VIR_FREE(data->usb_dev.product_name);
diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h
index 7a20bd6..4bfac90 100644
--- a/src/conf/node_device_conf.h
+++ b/src/conf/node_device_conf.h
@@ -76,6 +76,18 @@ enum virNodeDevScsiHostCapFlags {
VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS = (1 << 1),
};
+enum virNodeDevPCICapFlags {
+ VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0),
+ VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1),
+};
+
+struct pci_config_address {
+ unsigned domain;
+ unsigned bus;
+ unsigned slot;
+ unsigned function;
+};
+
typedef struct _virNodeDevCapsDef virNodeDevCapsDef;
typedef virNodeDevCapsDef *virNodeDevCapsDefPtr;
struct _virNodeDevCapsDef {
@@ -105,6 +117,10 @@ struct _virNodeDevCapsDef {
unsigned class;
char *product_name;
char *vendor_name;
+ struct pci_config_address *physical_function;
+ struct pci_config_address **virtual_functions;
+ unsigned num_virtual_functions;
+ unsigned flags;
} pci_dev;
struct {
unsigned bus;
diff --git a/src/node_device/node_device_driver.h b/src/node_device/node_device_driver.h
index 4f0822c..7b68f41 100644
--- a/src/node_device/node_device_driver.h
+++ b/src/node_device/node_device_driver.h
@@ -37,6 +37,10 @@
#define LINUX_SYSFS_VPORT_CREATE_POSTFIX "/vport_create"
#define LINUX_SYSFS_VPORT_DELETE_POSTFIX "/vport_delete"
+#define SRIOV_FOUND 0
+#define SRIOV_NOT_FOUND 1
+#define SRIOV_ERROR -1
+
#define LINUX_NEW_DEVICE_WAIT_TIME 60
#ifdef HAVE_HAL
@@ -61,6 +65,14 @@ int check_fc_host_linux(union _virNodeDevCapData *d);
#define check_vport_capable(d) check_vport_capable_linux(d)
int check_vport_capable_linux(union _virNodeDevCapData *d);
+#define get_physical_function(s,d) get_physical_function_linux(s,d)
+int get_physical_function_linux(const char *sysfs_path,
+ union _virNodeDevCapData *d);
+
+#define get_virtual_functions(s,d) get_virtual_functions_linux(s,d)
+int get_virtual_functions_linux(const char *sysfs_path,
+ union _virNodeDevCapData *d);
+
#define read_wwn(host, file, wwn) read_wwn_linux(host, file, wwn)
int read_wwn_linux(int host, const char *file, char **wwn);
@@ -68,6 +80,8 @@ int read_wwn_linux(int host, const char *file, char **wwn);
#define check_fc_host(d)
#define check_vport_capable(d)
+#define get_physical_function(sysfs_path, d)
+#define get_virtual_functions(sysfs_path, d)
#define read_wwn(host, file, wwn)
#endif /* __linux__ */
diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c
index 6de7e3d..4496301 100644
--- a/src/node_device/node_device_hal.c
+++ b/src/node_device/node_device_hal.c
@@ -145,14 +145,20 @@ static int gather_pci_cap(LibHalContext *ctx, const char *udi,
(void)virStrToLong_ui(p+1, &p, 16, &d->pci_dev.slot);
(void)virStrToLong_ui(p+1, &p, 16, &d->pci_dev.function);
}
+
+ get_physical_function(sysfs_path, d);
+ get_virtual_functions(sysfs_path, d);
+
VIR_FREE(sysfs_path);
}
+
(void)get_int_prop(ctx, udi, "pci.vendor_id", (int
*)&d->pci_dev.vendor);
if (get_str_prop(ctx, udi, "pci.vendor", &d->pci_dev.vendor_name) !=
0)
(void)get_str_prop(ctx, udi, "info.vendor",
&d->pci_dev.vendor_name);
(void)get_int_prop(ctx, udi, "pci.product_id", (int
*)&d->pci_dev.product);
if (get_str_prop(ctx, udi, "pci.product", &d->pci_dev.product_name)
!= 0)
(void)get_str_prop(ctx, udi, "info.product",
&d->pci_dev.product_name);
+
return 0;
}
diff --git a/src/node_device/node_device_linux_sysfs.c
b/src/node_device/node_device_linux_sysfs.c
index b7cf782..d2b10dd 100644
--- a/src/node_device/node_device_linux_sysfs.c
+++ b/src/node_device/node_device_linux_sysfs.c
@@ -29,6 +29,7 @@
#include "virterror_internal.h"
#include "memory.h"
#include "logging.h"
+#include <dirent.h>
#define VIR_FROM_THIS VIR_FROM_NODEDEV
@@ -70,7 +71,7 @@ int read_wwn_linux(int host, const char *file, char **wwn)
char buf[64];
if (open_wwn_file(LINUX_SYSFS_FC_HOST_PREFIX, host, file, &fd) < 0) {
- goto out;
+ goto out;
}
memset(buf, 0, sizeof(buf));
@@ -184,4 +185,201 @@ out:
return retval;
}
+
+static int logStrToLong_ui(char const *s,
+ char **end_ptr,
+ int base,
+ unsigned int *result)
+{
+ int ret = 0;
+
+ ret = virStrToLong_ui(s, end_ptr, base, result);
+ if (ret != 0) {
+ VIR_ERROR("Failed to convert '%s' to unsigned int\n", s);
+ } else {
+ VIR_DEBUG("Converted '%s' to unsigned int %u\n", s, *result);
+ }
+
+ return ret;
+}
+
+
+static int parse_pci_config_address(char *address, struct pci_config_address *bdf)
+{
+ char *p = NULL;
+ int ret = -1;
+
+ if ((address == NULL) || (logStrToLong_ui(address,
+ &p,
+ 16,
+ &bdf->domain) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1,
+ &p,
+ 16,
+ &bdf->bus) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1,
+ &p,
+ 16,
+ &bdf->slot) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1,
+ &p,
+ 16,
+ &bdf->function) == -1)) {
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+
+
+static int get_sriov_function(const char *device_link,
+ struct pci_config_address **bdf)
+{
+ char *device_path = NULL, *config_address = NULL;
+ char errbuf[64];
+ int ret = SRIOV_ERROR;
+
+ VIR_DEBUG("Attempting to resolve device path from device link
'%s'\n",
+ device_link);
+
+ if (!virFileExists(device_link)) {
+
+ VIR_DEBUG("SR IOV function link '%s' does not exist\n",
device_link);
+ /* Not an SR IOV device, not an error, either. */
+ ret = SRIOV_NOT_FOUND;
+
+ goto out;
+
+ }
+
+ device_path = realpath(device_link, device_path);
+ if (device_path == NULL) {
+ memset(errbuf, '\0', sizeof(errbuf));
+ VIR_ERROR("Failed to resolve device link '%s': '%s'\n",
device_link,
+ strerror_r(errno, errbuf, sizeof(errbuf)));
+ goto out;
+ }
+
+ VIR_DEBUG("SR IOV device path is '%s'\n", device_path);
+ config_address = basename(device_path);
+ if (VIR_ALLOC(*bdf) != 0) {
+ VIR_ERROR0("Failed to allocate memory for PCI device name\n");
+ goto out;
+ }
+
+ if (parse_pci_config_address(config_address, *bdf) != 0) {
+ VIR_ERROR("Failed to parse PCI config address '%s'\n",
config_address);
+ goto out;
+ }
+
+ VIR_DEBUG("SR IOV function %.4x:%.2x:%.2x.%.1x/>\n",
+ (*bdf)->domain,
+ (*bdf)->bus,
+ (*bdf)->slot,
+ (*bdf)->function);
+
+ ret = SRIOV_FOUND;
+
+out:
+ VIR_FREE(device_path);
+ return ret;
+}
+
+
+int get_physical_function_linux(const char *sysfs_path,
+ union _virNodeDevCapData *d ATTRIBUTE_UNUSED)
+{
+ int ret = -1;
+ char *device_link = NULL;
+
+ VIR_DEBUG("Attempting to get SR IOV physical function for device "
+ "with sysfs path '%s'\n", sysfs_path);
+
+ if (virBuildPath(&device_link, sysfs_path, "physfn") == -1) {
+ virReportOOMError(NULL);
+ } else {
+ ret = get_sriov_function(device_link, &d->pci_dev.physical_function);
+ if (ret == SRIOV_FOUND) {
+ d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION;
+ }
+ }
+
+ VIR_FREE(device_link);
+ return ret;
+}
+
+
+int get_virtual_functions_linux(const char *sysfs_path,
+ union _virNodeDevCapData *d)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char *device_link = NULL;
+
+ VIR_DEBUG("Attempting to get SR IOV virtual functions for device"
+ "with sysfs path '%s'\n", sysfs_path);
+
+ dir = opendir(sysfs_path);
+ if (dir == NULL) {
+ goto out;
+ }
+
+ while ((entry = readdir(dir))) {
+ if (STRPREFIX(entry->d_name, "virtfn")) {
+ /* This local is just to avoid lines of code much > 80 col. */
+ unsigned int *num_funcs = &d->pci_dev.num_virtual_functions;
+
+ if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) {
+ virReportOOMError(NULL);
+ goto out;
+ }
+
+ VIR_DEBUG("Number of virtual functions: %d\n", *num_funcs);
+ if (VIR_REALLOC_N(d->pci_dev.virtual_functions, (*num_funcs) + 1) != 0) {
+ virReportOOMError(NULL);
+ goto out;
+ }
+
+ if (get_sriov_function(device_link,
+ &d->pci_dev.virtual_functions[*num_funcs]) !=
+ SRIOV_FOUND) {
+
+ /* We should not get back SRIOV_NOT_FOUND in this
+ * case, so if we do, it's an error. */
+ VIR_ERROR("Failed to get SR IOV function from device link
'%s'\n",
+ device_link);
+ goto out;
+ } else {
+ (*num_funcs)++;
+ d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION;
+ }
+
+ VIR_FREE(device_link);
+ }
+ }
+
+ closedir(dir);
+
+ ret = 0;
+
+out:
+ VIR_FREE(device_link);
+ return 0;
+}
+
#endif /* __linux__ */
--
1.6.5.2