From: "Daniel P. Berrange" <berrange(a)redhat.com>
---
po/POTFILES.in | 2 +-
src/Makefile.am | 2 +-
src/conf/node_device_conf.c | 2 +-
src/network/bridge_driver.c | 2 +-
src/node_device/node_device_hal.c | 2 +-
src/node_device/node_device_udev.c | 2 +-
src/qemu/qemu_conf.h | 2 +-
src/qemu/qemu_driver.c | 2 +-
src/qemu/qemu_hostdev.c | 2 +-
src/qemu/qemu_hotplug.c | 2 +-
src/security/security_apparmor.c | 2 +-
src/security/security_dac.c | 2 +-
src/security/security_selinux.c | 2 +-
src/security/virt-aa-helper.c | 2 +-
src/util/pci.c | 2285 ------------------------------------
src/util/pci.h | 136 ---
src/util/virnetdev.c | 2 +-
src/util/virnetdev.h | 2 +-
src/util/virpci.c | 2285 ++++++++++++++++++++++++++++++++++++
src/util/virpci.h | 136 +++
src/xen/xen_driver.c | 2 +-
21 files changed, 2438 insertions(+), 2438 deletions(-)
delete mode 100644 src/util/pci.c
delete mode 100644 src/util/pci.h
create mode 100644 src/util/virpci.c
create mode 100644 src/util/virpci.h
diff --git a/po/POTFILES.in b/po/POTFILES.in
index a9dfcf4..18db09b 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -138,7 +138,6 @@ src/test/test_driver.c
src/uml/uml_conf.c
src/uml/uml_driver.c
src/util/iohelper.c
-src/util/pci.c
src/util/processinfo.c
src/util/sexpr.c
src/util/stats_linux.c
@@ -171,6 +170,7 @@ src/util/virnetdevtap.c
src/util/virnetdevvportprofile.c
src/util/virnetlink.c
src/util/virnodesuspend.c
+src/util/virpci.c
src/util/virpidfile.c
src/util/virprocess.c
src/util/virrandom.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 64f117d..d9f621f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -53,7 +53,6 @@ augeastest_DATA =
# These files are not related to driver APIs. Simply generic
# helper APIs for various purposes
UTIL_SOURCES = \
- util/pci.c util/pci.h \
util/processinfo.c util/processinfo.h \
util/sexpr.c util/sexpr.h \
util/stats_linux.c util/stats_linux.h \
@@ -85,6 +84,7 @@ UTIL_SOURCES = \
util/virjson.c util/virjson.h \
util/virlog.c util/virlog.h \
util/virobject.c util/virobject.h \
+ util/virpci.c util/virpci.h \
util/virpidfile.c util/virpidfile.h \
util/virprocess.c util/virprocess.h \
util/virtypedparam.c util/virtypedparam.h \
diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c
index 85de50f..045f05d 100644
--- a/src/conf/node_device_conf.c
+++ b/src/conf/node_device_conf.c
@@ -36,7 +36,7 @@
#include "util.h"
#include "virbuffer.h"
#include "uuid.h"
-#include "pci.h"
+#include "virpci.h"
#include "virrandom.h"
#define VIR_FROM_THIS VIR_FROM_NODEDEV
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
index 52b36a1..2db9197 100644
--- a/src/network/bridge_driver.c
+++ b/src/network/bridge_driver.c
@@ -61,7 +61,7 @@
#include "virdnsmasq.h"
#include "configmake.h"
#include "virnetdev.h"
-#include "pci.h"
+#include "virpci.h"
#include "virnetdevbridge.h"
#include "virnetdevtap.h"
#include "virnetdevvportprofile.h"
diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c
index 6f89f16..257a363 100644
--- a/src/node_device/node_device_hal.c
+++ b/src/node_device/node_device_hal.c
@@ -35,7 +35,7 @@
#include "datatypes.h"
#include "viralloc.h"
#include "uuid.h"
-#include "pci.h"
+#include "virpci.h"
#include "virlog.h"
#include "node_device_driver.h"
#include "virdbus.h"
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 81a11d7..7289a72 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -37,7 +37,7 @@
#include "uuid.h"
#include "util.h"
#include "virbuffer.h"
-#include "pci.h"
+#include "virpci.h"
#define VIR_FROM_THIS VIR_FROM_NODEDEV
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 283251a..f928c29 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -35,7 +35,7 @@
# include "threads.h"
# include "security/security_manager.h"
# include "vircgroup.h"
-# include "pci.h"
+# include "virpci.h"
# include "virusb.h"
# include "cpu_conf.h"
# include "driver.h"
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 65254b6..14a5e44 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -72,7 +72,7 @@
#include "domain_conf.h"
#include "domain_audit.h"
#include "node_device_conf.h"
-#include "pci.h"
+#include "virpci.h"
#include "virusb.h"
#include "processinfo.h"
#include "libvirt_internal.h"
diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c
index 754f8d0..5c23ccb 100644
--- a/src/qemu/qemu_hostdev.c
+++ b/src/qemu/qemu_hostdev.c
@@ -27,7 +27,7 @@
#include "virlog.h"
#include "virterror_internal.h"
#include "viralloc.h"
-#include "pci.h"
+#include "virpci.h"
#include "virusb.h"
#include "virnetdev.h"
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index c432a32..e120988 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -36,7 +36,7 @@
#include "datatypes.h"
#include "virterror_internal.h"
#include "viralloc.h"
-#include "pci.h"
+#include "virpci.h"
#include "virfile.h"
#include "qemu_cgroup.h"
#include "locking/domain_lock.h"
diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c
index b65384e..d28189f 100644
--- a/src/security/security_apparmor.c
+++ b/src/security/security_apparmor.c
@@ -43,7 +43,7 @@
#include "virterror_internal.h"
#include "datatypes.h"
#include "uuid.h"
-#include "pci.h"
+#include "virpci.h"
#include "virusb.h"
#include "virfile.h"
#include "configmake.h"
diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index bdb29c7..e4f016a 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -28,7 +28,7 @@
#include "util.h"
#include "viralloc.h"
#include "virlog.h"
-#include "pci.h"
+#include "virpci.h"
#include "virusb.h"
#include "storage_file.h"
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c
index 3a1548d..2adf5c9 100644
--- a/src/security/security_selinux.c
+++ b/src/security/security_selinux.c
@@ -37,7 +37,7 @@
#include "util.h"
#include "viralloc.h"
#include "virlog.h"
-#include "pci.h"
+#include "virpci.h"
#include "virusb.h"
#include "storage_file.h"
#include "virfile.h"
diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c
index c9c222a..4945f7c 100644
--- a/src/security/virt-aa-helper.c
+++ b/src/security/virt-aa-helper.c
@@ -51,7 +51,7 @@
#include "xml.h"
#include "uuid.h"
#include "virusb.h"
-#include "pci.h"
+#include "virpci.h"
#include "virfile.h"
#include "configmake.h"
#include "virrandom.h"
diff --git a/src/util/pci.c b/src/util/pci.c
deleted file mode 100644
index e4009fa..0000000
--- a/src/util/pci.c
+++ /dev/null
@@ -1,2285 +0,0 @@
-/*
- * Copyright (C) 2009-2012 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see
- * <
http://www.gnu.org/licenses/>.
- *
- * Authors:
- * Mark McLoughlin <markmc(a)redhat.com>
- */
-
-#include <config.h>
-
-#include "pci.h"
-
-#include <dirent.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdlib.h>
-
-#include "virlog.h"
-#include "viralloc.h"
-#include "vircommand.h"
-#include "virterror_internal.h"
-#include "virfile.h"
-
-#define PCI_SYSFS "/sys/bus/pci/"
-#define PCI_ID_LEN 10 /* "XXXX XXXX" */
-#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */
-
-#define SRIOV_FOUND 0
-#define SRIOV_NOT_FOUND 1
-#define SRIOV_ERROR -1
-
-struct _pciDevice {
- unsigned domain;
- unsigned bus;
- unsigned slot;
- unsigned function;
-
- char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */
- char id[PCI_ID_LEN]; /* product vendor */
- char *path;
- const char *used_by; /* The domain which uses the device */
-
- unsigned pcie_cap_pos;
- unsigned pci_pm_cap_pos;
- unsigned has_flr : 1;
- unsigned has_pm_reset : 1;
- unsigned managed : 1;
-
- /* used by reattach function */
- unsigned unbind_from_stub : 1;
- unsigned remove_slot : 1;
- unsigned reprobe : 1;
-};
-
-struct _pciDeviceList {
- unsigned count;
- pciDevice **devs;
-};
-
-
-/* For virReportOOMError() and virReportSystemError() */
-#define VIR_FROM_THIS VIR_FROM_NONE
-
-/* Specifications referenced in comments:
- * PCI30 - PCI Local Bus Specification 3.0
- * PCIe20 - PCI Express Base Specification 2.0
- * BR12 - PCI-to-PCI Bridge Architecture Specification 1.2
- * PM12 - PCI Bus Power Management Interface Specification 1.2
- * ECN_AF - Advanced Capabilities for Conventional PCI ECN
- */
-
-/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization
*/
-#define PCI_CONF_LEN 0x100
-#define PCI_CONF_HEADER_LEN 0x40
-
-/* PCI30 6.2.1 */
-#define PCI_HEADER_TYPE 0x0e /* Header type */
-#define PCI_HEADER_TYPE_BRIDGE 0x1
-#define PCI_HEADER_TYPE_MASK 0x7f
-#define PCI_HEADER_TYPE_MULTI 0x80
-
-/* PCI30 6.2.1 Device Identification */
-#define PCI_CLASS_DEVICE 0x0a /* Device class */
-
-/* Class Code for bridge; PCI30 D.7 Base Class 06h */
-#define PCI_CLASS_BRIDGE_PCI 0x0604
-
-/* PCI30 6.2.3 Device Status */
-#define PCI_STATUS 0x06 /* 16 bits */
-#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
-
-/* PCI30 6.7 Capabilities List */
-#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
-
-/* PM12 3.2.1 Capability Identifier */
-#define PCI_CAP_ID_PM 0x01 /* Power Management */
-/* PCI30 H Capability IDs */
-#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
-/* ECN_AF 6.x.1.1 Capability ID for AF */
-#define PCI_CAP_ID_AF 0x13 /* Advanced Features */
-
-/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */
-#define PCI_EXP_DEVCAP 0x4 /* Device capabilities */
-#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */
-
-/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
-#define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */
-#define PCI_SECONDARY_BUS 0x19 /* BR12 3.2.5.3 Secondary bus number */
-#define PCI_SUBORDINATE_BUS 0x1a /* BR12 3.2.5.4 Highest bus number behind the
bridge */
-#define PCI_BRIDGE_CONTROL 0x3e
-/* BR12 3.2.5.18 Bridge Control Register */
-#define PCI_BRIDGE_CTL_RESET 0x40 /* Secondary bus reset */
-
-/* PM12 3.2.4 Power Management Control/Status (Offset = 4) */
-#define PCI_PM_CTRL 4 /* PM control and status register */
-#define PCI_PM_CTRL_STATE_MASK 0x3 /* Current power state (D0 to D3) */
-#define PCI_PM_CTRL_STATE_D0 0x0 /* D0 state */
-#define PCI_PM_CTRL_STATE_D3hot 0x3 /* D3 state */
-#define PCI_PM_CTRL_NO_SOFT_RESET 0x8 /* No reset for D3hot->D0 */
-
-/* ECN_AF 6.x.1 Advanced Features Capability Structure */
-#define PCI_AF_CAP 0x3 /* Advanced features capabilities */
-#define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */
-
-#define PCI_EXP_FLAGS 0x2
-#define PCI_EXP_FLAGS_TYPE 0x00f0
-#define PCI_EXP_TYPE_DOWNSTREAM 0x6
-
-#define PCI_EXT_CAP_BASE 0x100
-#define PCI_EXT_CAP_LIMIT 0x1000
-#define PCI_EXT_CAP_ID_MASK 0x0000ffff
-#define PCI_EXT_CAP_OFFSET_SHIFT 20
-#define PCI_EXT_CAP_OFFSET_MASK 0x00000ffc
-
-#define PCI_EXT_CAP_ID_ACS 0x000d
-#define PCI_EXT_ACS_CTRL 0x06
-
-#define PCI_EXT_CAP_ACS_SV 0x01
-#define PCI_EXT_CAP_ACS_RR 0x04
-#define PCI_EXT_CAP_ACS_CR 0x08
-#define PCI_EXT_CAP_ACS_UF 0x10
-#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
- PCI_EXT_CAP_ACS_RR | \
- PCI_EXT_CAP_ACS_CR | \
- PCI_EXT_CAP_ACS_UF)
-
-static int
-pciConfigOpen(pciDevice *dev, bool fatal)
-{
- int fd;
-
- fd = open(dev->path, O_RDWR);
-
- if (fd < 0) {
- if (fatal) {
- virReportSystemError(errno,
- _("Failed to open config space file
'%s'"),
- dev->path);
- } else {
- char ebuf[1024];
- VIR_WARN("Failed to open config space file '%s': %s",
- dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
- }
- return -1;
- }
-
- VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
- return fd;
-}
-
-static void
-pciConfigClose(pciDevice *dev, int cfgfd)
-{
- if (VIR_CLOSE(cfgfd) < 0) {
- char ebuf[1024];
- VIR_WARN("Failed to close config space file '%s': %s",
- dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
- }
-}
-
-
-static int
-pciRead(pciDevice *dev,
- int cfgfd,
- unsigned pos,
- uint8_t *buf,
- unsigned buflen)
-{
- memset(buf, 0, buflen);
-
- if (lseek(cfgfd, pos, SEEK_SET) != pos ||
- saferead(cfgfd, buf, buflen) != buflen) {
- char ebuf[1024];
- VIR_WARN("Failed to read from '%s' : %s", dev->path,
- virStrerror(errno, ebuf, sizeof(ebuf)));
- return -1;
- }
- return 0;
-}
-
-static uint8_t
-pciRead8(pciDevice *dev, int cfgfd, unsigned pos)
-{
- uint8_t buf;
- pciRead(dev, cfgfd, pos, &buf, sizeof(buf));
- return buf;
-}
-
-static uint16_t
-pciRead16(pciDevice *dev, int cfgfd, unsigned pos)
-{
- uint8_t buf[2];
- pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
- return (buf[0] << 0) | (buf[1] << 8);
-}
-
-static uint32_t
-pciRead32(pciDevice *dev, int cfgfd, unsigned pos)
-{
- uint8_t buf[4];
- pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
- return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3]
<< 24);
-}
-
-static int
-pciWrite(pciDevice *dev,
- int cfgfd,
- unsigned pos,
- uint8_t *buf,
- unsigned buflen)
-{
- if (lseek(cfgfd, pos, SEEK_SET) != pos ||
- safewrite(cfgfd, buf, buflen) != buflen) {
- char ebuf[1024];
- VIR_WARN("Failed to write to '%s' : %s", dev->path,
- virStrerror(errno, ebuf, sizeof(ebuf)));
- return -1;
- }
- return 0;
-}
-
-static void
-pciWrite16(pciDevice *dev, int cfgfd, unsigned pos, uint16_t val)
-{
- uint8_t buf[2] = { (val >> 0), (val >> 8) };
- pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
-}
-
-static void
-pciWrite32(pciDevice *dev, int cfgfd, unsigned pos, uint32_t val)
-{
- uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val
>> 24) };
- pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
-}
-
-typedef int (*pciIterPredicate)(pciDevice *, pciDevice *, void *);
-
-/* Iterate over available PCI devices calling @predicate
- * to compare each one to @dev.
- * Return -1 on error since we don't want to assume it is
- * safe to reset if there is an error.
- */
-static int
-pciIterDevices(pciIterPredicate predicate,
- pciDevice *dev,
- pciDevice **matched,
- void *data)
-{
- DIR *dir;
- struct dirent *entry;
- int ret = 0;
- int rc;
-
- *matched = NULL;
-
- VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices",
dev->id, dev->name);
-
- dir = opendir(PCI_SYSFS "devices");
- if (!dir) {
- VIR_WARN("Failed to open " PCI_SYSFS "devices");
- return -1;
- }
-
- while ((entry = readdir(dir))) {
- unsigned int domain, bus, slot, function;
- pciDevice *check;
- char *tmp;
-
- /* Ignore '.' and '..' */
- if (entry->d_name[0] == '.')
- continue;
-
- /* expected format: <domain>:<bus>:<slot>.<function> */
- if (/* domain */
- virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp
!= ':' ||
- /* bus */
- virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp !=
':' ||
- /* slot */
- virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp !=
'.' ||
- /* function */
- virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
- VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s",
entry->d_name);
- continue;
- }
-
- check = pciGetDevice(domain, bus, slot, function);
- if (!check) {
- ret = -1;
- break;
- }
-
- rc = predicate(dev, check, data);
- if (rc < 0) {
- /* the predicate returned an error, bail */
- pciFreeDevice(check);
- ret = -1;
- break;
- }
- else if (rc == 1) {
- VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name,
check->name);
- *matched = check;
- ret = 1;
- break;
- }
-
- pciFreeDevice(check);
- }
- closedir(dir);
- return ret;
-}
-
-static uint8_t
-pciFindCapabilityOffset(pciDevice *dev, int cfgfd, unsigned capability)
-{
- uint16_t status;
- uint8_t pos;
-
- status = pciRead16(dev, cfgfd, PCI_STATUS);
- if (!(status & PCI_STATUS_CAP_LIST))
- return 0;
-
- pos = pciRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
-
- /* Zero indicates last capability, capabilities can't
- * be in the config space header and 0xff is returned
- * by the kernel if we don't have access to this region
- *
- * Note: we're not handling loops or extended
- * capabilities here.
- */
- while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
- uint8_t capid = pciRead8(dev, cfgfd, pos);
- if (capid == capability) {
- VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
- dev->id, dev->name, capability, pos);
- return pos;
- }
-
- pos = pciRead8(dev, cfgfd, pos + 1);
- }
-
- VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name,
capability);
-
- return 0;
-}
-
-static unsigned int
-pciFindExtendedCapabilityOffset(pciDevice *dev,
- int cfgfd,
- unsigned capability)
-{
- int ttl;
- unsigned int pos;
- uint32_t header;
-
- /* minimum 8 bytes per capability */
- ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
- pos = PCI_EXT_CAP_BASE;
-
- while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
- header = pciRead32(dev, cfgfd, pos);
-
- if ((header & PCI_EXT_CAP_ID_MASK) == capability)
- return pos;
-
- pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
- ttl--;
- }
-
- return 0;
-}
-
-/* detects whether this device has FLR. Returns 0 if the device does
- * not have FLR, 1 if it does, and -1 on error
- */
-static int
-pciDetectFunctionLevelReset(pciDevice *dev, int cfgfd)
-{
- uint32_t caps;
- uint8_t pos;
- char *path;
- int found;
-
- /* The PCIe Function Level Reset capability allows
- * individual device functions to be reset without
- * affecting any other functions on the device or
- * any other devices on the bus. This is only common
- * on SR-IOV NICs at the moment.
- */
- if (dev->pcie_cap_pos) {
- caps = pciRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
- if (caps & PCI_EXP_DEVCAP_FLR) {
- VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id,
dev->name);
- return 1;
- }
- }
-
- /* The PCI AF Function Level Reset capability is
- * the same thing, except for conventional PCI
- * devices. This is not common yet.
- */
- pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
- if (pos) {
- caps = pciRead16(dev, cfgfd, pos + PCI_AF_CAP);
- if (caps & PCI_AF_CAP_FLR) {
- VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id,
dev->name);
- return 1;
- }
- }
-
- /* there are some buggy devices that do support FLR, but forget to
- * advertise that fact in their capabilities. However, FLR is *required*
- * to be present for virtual functions (VFs), so if we see that this
- * device is a VF, we just assume FLR works
- */
-
- if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name)
< 0) {
- virReportOOMError();
- return -1;
- }
-
- found = virFileExists(path);
- VIR_FREE(path);
- if (found) {
- VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF;
forcing flr on",
- dev->id, dev->name);
- return 1;
- }
-
- VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);
-
- return 0;
-}
-
-/* Require the device has the PCI Power Management capability
- * and that a D3hot->D0 transition will results in a full
- * internal reset, not just a soft reset.
- */
-static unsigned
-pciDetectPowerManagementReset(pciDevice *dev, int cfgfd)
-{
- if (dev->pci_pm_cap_pos) {
- uint32_t ctl;
-
- /* require the NO_SOFT_RESET bit is clear */
- ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
- if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
- VIR_DEBUG("%s %s: detected PM reset capability", dev->id,
dev->name);
- return 1;
- }
- }
-
- VIR_DEBUG("%s %s: no PM reset capability found", dev->id,
dev->name);
-
- return 0;
-}
-
-/* Any active devices on the same domain/bus ? */
-static int
-pciSharesBusWithActive(pciDevice *dev, pciDevice *check, void *data)
-{
- pciDeviceList *inactiveDevs = data;
-
- /* Different domain, different bus, or simply identical device */
- if (dev->domain != check->domain ||
- dev->bus != check->bus ||
- (dev->slot == check->slot &&
- dev->function == check->function))
- return 0;
-
- /* same bus, but inactive, i.e. about to be assigned to guest */
- if (inactiveDevs && pciDeviceListFind(inactiveDevs, check))
- return 0;
-
- return 1;
-}
-
-static pciDevice *
-pciBusContainsActiveDevices(pciDevice *dev,
- pciDeviceList *inactiveDevs)
-{
- pciDevice *active = NULL;
- if (pciIterDevices(pciSharesBusWithActive,
- dev, &active, inactiveDevs) < 0)
- return NULL;
- return active;
-}
-
-/* Is @check the parent of @dev ? */
-static int
-pciIsParent(pciDevice *dev, pciDevice *check, void *data)
-{
- uint16_t device_class;
- uint8_t header_type, secondary, subordinate;
- pciDevice **best = data;
- int ret = 0;
- int fd;
-
- if (dev->domain != check->domain)
- return 0;
-
- if ((fd = pciConfigOpen(check, false)) < 0)
- return 0;
-
- /* Is it a bridge? */
- device_class = pciRead16(check, fd, PCI_CLASS_DEVICE);
- if (device_class != PCI_CLASS_BRIDGE_PCI)
- goto cleanup;
-
- /* Is it a plane? */
- header_type = pciRead8(check, fd, PCI_HEADER_TYPE);
- if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
- goto cleanup;
-
- secondary = pciRead8(check, fd, PCI_SECONDARY_BUS);
- subordinate = pciRead8(check, fd, PCI_SUBORDINATE_BUS);
-
- VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name,
check->name);
-
- /* if the secondary bus exactly equals the device's bus, then we found
- * the direct parent. No further work is necessary
- */
- if (dev->bus == secondary) {
- ret = 1;
- goto cleanup;
- }
-
- /* otherwise, SRIOV allows VFs to be on different busses then their PFs.
- * In this case, what we need to do is look for the "best" match; i.e.
- * the most restrictive match that still satisfies all of the conditions.
- */
- if (dev->bus > secondary && dev->bus <= subordinate) {
- if (*best == NULL) {
- *best = pciGetDevice(check->domain, check->bus, check->slot,
- check->function);
- if (*best == NULL) {
- ret = -1;
- goto cleanup;
- }
- } else {
- /* OK, we had already recorded a previous "best" match for the
- * parent. See if the current device is more restrictive than the
- * best, and if so, make it the new best
- */
- int bestfd;
- uint8_t best_secondary;
-
- if ((bestfd = pciConfigOpen(*best, false)) < 0)
- goto cleanup;
- best_secondary = pciRead8(*best, bestfd, PCI_SECONDARY_BUS);
- pciConfigClose(*best, bestfd);
-
- if (secondary > best_secondary) {
- pciFreeDevice(*best);
- *best = pciGetDevice(check->domain, check->bus, check->slot,
- check->function);
- if (*best == NULL) {
- ret = -1;
- goto cleanup;
- }
- }
- }
- }
-
-cleanup:
- pciConfigClose(check, fd);
- return ret;
-}
-
-static int
-pciGetParentDevice(pciDevice *dev, pciDevice **parent)
-{
- pciDevice *best = NULL;
- int ret;
-
- *parent = NULL;
- ret = pciIterDevices(pciIsParent, dev, parent, &best);
- if (ret == 1)
- pciFreeDevice(best);
- else if (ret == 0)
- *parent = best;
- return ret;
-}
-
-/* Secondary Bus Reset is our sledgehammer - it resets all
- * devices behind a bus.
- */
-static int
-pciTrySecondaryBusReset(pciDevice *dev,
- int cfgfd,
- pciDeviceList *inactiveDevs)
-{
- pciDevice *parent, *conflict;
- uint8_t config_space[PCI_CONF_LEN];
- uint16_t ctl;
- int ret = -1;
- int parentfd;
-
- /* Refuse to do a secondary bus reset if there are other
- * devices/functions behind the bus are used by the host
- * or other guests.
- */
- if ((conflict = pciBusContainsActiveDevices(dev, inactiveDevs))) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Active %s devices on bus with %s, not doing bus
reset"),
- conflict->name, dev->name);
- return -1;
- }
-
- /* Find the parent bus */
- if (pciGetParentDevice(dev, &parent) < 0)
- return -1;
- if (!parent) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to find parent device for %s"),
- dev->name);
- return -1;
- }
- if ((parentfd = pciConfigOpen(parent, true)) < 0)
- goto out;
-
- VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);
-
- /* Save and restore the device's config space; we only do this
- * for the supplied device since we refuse to do a reset if there
- * are multiple devices/functions
- */
- if (pciRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to read PCI config space for %s"),
- dev->name);
- goto out;
- }
-
- /* Read the control register, set the reset flag, wait 200ms,
- * unset the reset flag and wait 200ms.
- */
- ctl = pciRead16(dev, cfgfd, PCI_BRIDGE_CONTROL);
-
- pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
- ctl | PCI_BRIDGE_CTL_RESET);
-
- usleep(200 * 1000); /* sleep 200ms */
-
- pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
-
- usleep(200 * 1000); /* sleep 200ms */
-
- if (pciWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to restore PCI config space for %s"),
- dev->name);
- goto out;
- }
- ret = 0;
-
-out:
- pciConfigClose(parent, parentfd);
- pciFreeDevice(parent);
- return ret;
-}
-
-/* Power management reset attempts to reset a device using a
- * D-state transition from D3hot to D0. Note, in detect_pm_reset()
- * above we require the device supports a full internal reset.
- */
-static int
-pciTryPowerManagementReset(pciDevice *dev, int cfgfd)
-{
- uint8_t config_space[PCI_CONF_LEN];
- uint32_t ctl;
-
- if (!dev->pci_pm_cap_pos)
- return -1;
-
- /* Save and restore the device's config space. */
- if (pciRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to read PCI config space for %s"),
- dev->name);
- return -1;
- }
-
- VIR_DEBUG("%s %s: doing a power management reset", dev->id,
dev->name);
-
- ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
- ctl &= ~PCI_PM_CTRL_STATE_MASK;
-
- pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
- ctl | PCI_PM_CTRL_STATE_D3hot);
-
- usleep(10 * 1000); /* sleep 10ms */
-
- pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
- ctl | PCI_PM_CTRL_STATE_D0);
-
- usleep(10 * 1000); /* sleep 10ms */
-
- if (pciWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to restore PCI config space for %s"),
- dev->name);
- return -1;
- }
-
- return 0;
-}
-
-static int
-pciInitDevice(pciDevice *dev, int cfgfd)
-{
- int flr;
-
- dev->pcie_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
- dev->pci_pm_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
- flr = pciDetectFunctionLevelReset(dev, cfgfd);
- if (flr < 0)
- return flr;
- dev->has_flr = flr;
- dev->has_pm_reset = pciDetectPowerManagementReset(dev, cfgfd);
-
- return 0;
-}
-
-int
-pciResetDevice(pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs)
-{
- int ret = -1;
- int fd;
-
- if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Not resetting active device %s"), dev->name);
- return -1;
- }
-
- if ((fd = pciConfigOpen(dev, true)) < 0)
- return -1;
-
- if (pciInitDevice(dev, fd) < 0)
- goto cleanup;
-
- /* KVM will perform FLR when starting and stopping
- * a guest, so there is no need for us to do it here.
- */
- if (dev->has_flr) {
- ret = 0;
- goto cleanup;
- }
-
- /* If the device supports PCI power management reset,
- * that's the next best thing because it only resets
- * the function, not the whole device.
- */
- if (dev->has_pm_reset)
- ret = pciTryPowerManagementReset(dev, fd);
-
- /* Bus reset is not an option with the root bus */
- if (ret < 0 && dev->bus != 0)
- ret = pciTrySecondaryBusReset(dev, fd, inactiveDevs);
-
- if (ret < 0) {
- virErrorPtr err = virGetLastError();
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Unable to reset PCI device %s: %s"),
- dev->name,
- err ? err->message : _("no FLR, PM reset or bus reset
available"));
- }
-
-cleanup:
- pciConfigClose(dev, fd);
- return ret;
-}
-
-
-static int
-pciDriverDir(char **buffer, const char *driver)
-{
- VIR_FREE(*buffer);
-
- if (virAsprintf(buffer, PCI_SYSFS "drivers/%s", driver) < 0) {
- virReportOOMError();
- return -1;
- }
-
- return 0;
-}
-
-static int
-pciDriverFile(char **buffer, const char *driver, const char *file)
-{
- VIR_FREE(*buffer);
-
- if (virAsprintf(buffer, PCI_SYSFS "drivers/%s/%s", driver, file) < 0) {
- virReportOOMError();
- return -1;
- }
-
- return 0;
-}
-
-static int
-pciDeviceFile(char **buffer, const char *device, const char *file)
-{
- VIR_FREE(*buffer);
-
- if (virAsprintf(buffer, PCI_SYSFS "devices/%s/%s", device, file) < 0) {
- virReportOOMError();
- return -1;
- }
-
- return 0;
-}
-
-
-static const char *
-pciFindStubDriver(void)
-{
- char *drvpath = NULL;
- int probed = 0;
-
-recheck:
- if (pciDriverDir(&drvpath, "pci-stub") < 0) {
- return NULL;
- }
-
- if (virFileExists(drvpath)) {
- VIR_FREE(drvpath);
- return "pci-stub";
- }
-
- if (pciDriverDir(&drvpath, "pciback") < 0) {
- return NULL;
- }
-
- if (virFileExists(drvpath)) {
- VIR_FREE(drvpath);
- return "pciback";
- }
-
- VIR_FREE(drvpath);
-
- if (!probed) {
- const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL };
- const char *const backprobe[] = { MODPROBE, "pciback", NULL };
-
- probed = 1;
- /*
- * Probing for pci-stub will succeed regardless of whether
- * on native or Xen kernels.
- * On Xen though, we want to prefer pciback, so probe
- * for that first, because that will only work on Xen
- */
- if (virRun(backprobe, NULL) < 0 &&
- virRun(stubprobe, NULL) < 0) {
- char ebuf[1024];
- VIR_WARN("failed to load pci-stub or pciback drivers: %s",
- virStrerror(errno, ebuf, sizeof(ebuf)));
- return NULL;
- }
-
- goto recheck;
- }
-
- return NULL;
-}
-
-static int
-pciUnbindDeviceFromStub(pciDevice *dev, const char *driver)
-{
- int result = -1;
- char *drvdir = NULL;
- char *path = NULL;
-
- if (pciDriverDir(&drvdir, driver) < 0)
- goto cleanup;
-
- if (!dev->unbind_from_stub)
- goto remove_slot;
-
- /* If the device is bound to stub, unbind it.
- */
- if (pciDeviceFile(&path, dev->name, "driver") < 0)
- goto cleanup;
-
- if (virFileExists(drvdir) && virFileLinkPointsTo(path, drvdir)) {
- if (pciDriverFile(&path, driver, "unbind") < 0) {
- goto cleanup;
- }
-
- if (virFileWriteStr(path, dev->name, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to unbind PCI device '%s' from
%s"),
- dev->name, driver);
- goto cleanup;
- }
- }
- dev->unbind_from_stub = 0;
-
-remove_slot:
- if (!dev->remove_slot)
- goto reprobe;
-
- /* Xen's pciback.ko wants you to use remove_slot on the specific device */
- if (pciDriverFile(&path, driver, "remove_slot") < 0) {
- goto cleanup;
- }
-
- if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to remove slot for PCI device '%s'
from %s"),
- dev->name, driver);
- goto cleanup;
- }
- dev->remove_slot = 0;
-
-reprobe:
- if (!dev->reprobe) {
- result = 0;
- goto cleanup;
- }
-
- /* Trigger a re-probe of the device is not in the stub's dynamic
- * ID table. If the stub is available, but 'remove_id' isn't
- * available, then re-probing would just cause the device to be
- * re-bound to the stub.
- */
- if (pciDriverFile(&path, driver, "remove_id") < 0) {
- goto cleanup;
- }
-
- if (!virFileExists(drvdir) || virFileExists(path)) {
- if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0)
{
- virReportSystemError(errno,
- _("Failed to trigger a re-probe for PCI device
'%s'"),
- dev->name);
- goto cleanup;
- }
- }
-
- result = 0;
-
-cleanup:
- /* do not do it again */
- dev->unbind_from_stub = 0;
- dev->remove_slot = 0;
- dev->reprobe = 0;
-
- VIR_FREE(drvdir);
- VIR_FREE(path);
-
- return result;
-}
-
-
-static int
-pciBindDeviceToStub(pciDevice *dev, const char *driver)
-{
- int result = -1;
- char *drvdir = NULL;
- char *path = NULL;
- int reprobe = 0;
-
- /* check whether the device is already bound to a driver */
- if (pciDriverDir(&drvdir, driver) < 0 ||
- pciDeviceFile(&path, dev->name, "driver") < 0) {
- goto cleanup;
- }
-
- if (virFileExists(path)) {
- if (virFileLinkPointsTo(path, drvdir)) {
- /* The device is already bound to pci-stub */
- result = 0;
- goto cleanup;
- }
- reprobe = 1;
- }
-
- /* Add the PCI device ID to the stub's dynamic ID table;
- * this is needed to allow us to bind the device to the stub.
- * Note: if the device is not currently bound to any driver,
- * stub will immediately be bound to the device. Also, note
- * that if a new device with this ID is hotplugged, or if a probe
- * is triggered for such a device, it will also be immediately
- * bound by the stub.
- */
- if (pciDriverFile(&path, driver, "new_id") < 0) {
- goto cleanup;
- }
-
- if (virFileWriteStr(path, dev->id, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to add PCI device ID '%s' to
%s"),
- dev->id, driver);
- goto cleanup;
- }
-
- /* check whether the device is bound to pci-stub when we write dev->id to
- * new_id.
- */
- if (pciDriverDir(&drvdir, driver) < 0 ||
- pciDeviceFile(&path, dev->name, "driver") < 0) {
- goto remove_id;
- }
-
- if (virFileLinkPointsTo(path, drvdir)) {
- dev->unbind_from_stub = 1;
- dev->remove_slot = 1;
- goto remove_id;
- }
-
- /* If the device is already bound to a driver, unbind it.
- * Note, this will have rather unpleasant side effects if this
- * PCI device happens to be IDE controller for the disk hosting
- * your root filesystem.
- */
- if (pciDeviceFile(&path, dev->name, "driver/unbind") < 0) {
- goto cleanup;
- }
-
- if (virFileExists(path)) {
- if (virFileWriteStr(path, dev->name, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to unbind PCI device
'%s'"),
- dev->name);
- goto cleanup;
- }
- dev->reprobe = reprobe;
- }
-
- /* If the device isn't already bound to pci-stub, try binding it now.
- */
- if (pciDriverDir(&drvdir, driver) < 0 ||
- pciDeviceFile(&path, dev->name, "driver") < 0) {
- goto remove_id;
- }
-
- if (!virFileLinkPointsTo(path, drvdir)) {
- /* Xen's pciback.ko wants you to use new_slot first */
- if (pciDriverFile(&path, driver, "new_slot") < 0) {
- goto remove_id;
- }
-
- if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0)
{
- virReportSystemError(errno,
- _("Failed to add slot for PCI device '%s'
to %s"),
- dev->name, driver);
- goto remove_id;
- }
- dev->remove_slot = 1;
-
- if (pciDriverFile(&path, driver, "bind") < 0) {
- goto remove_id;
- }
-
- if (virFileWriteStr(path, dev->name, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to bind PCI device '%s' to
%s"),
- dev->name, driver);
- goto remove_id;
- }
- dev->unbind_from_stub = 1;
- }
-
-remove_id:
- /* If 'remove_id' exists, remove the device id from pci-stub's dynamic
- * ID table so that 'drivers_probe' works below.
- */
- if (pciDriverFile(&path, driver, "remove_id") < 0) {
- /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */
- if (dev->reprobe) {
- VIR_WARN("Could not remove PCI ID '%s' from %s, and the device
"
- "cannot be probed again.", dev->id, driver);
- }
- dev->reprobe = 0;
- goto cleanup;
- }
-
- if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) {
- virReportSystemError(errno,
- _("Failed to remove PCI ID '%s' from
%s"),
- dev->id, driver);
-
- /* remove PCI ID from pci-stub failed, and we cannot reprobe it */
- if (dev->reprobe) {
- VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device
"
- "cannot be probed again.", dev->id, driver);
- }
- dev->reprobe = 0;
- goto cleanup;
- }
-
- result = 0;
-
-cleanup:
- VIR_FREE(drvdir);
- VIR_FREE(path);
-
- if (result < 0) {
- pciUnbindDeviceFromStub(dev, driver);
- }
-
- return result;
-}
-
-int
-pciDettachDevice(pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs)
-{
- const char *driver = pciFindStubDriver();
- if (!driver) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("cannot find any PCI stub module"));
- return -1;
- }
-
- if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Not detaching active device %s"), dev->name);
- return -1;
- }
-
- if (pciBindDeviceToStub(dev, driver) < 0)
- return -1;
-
- /* Add the dev into list inactiveDevs */
- if (inactiveDevs && !pciDeviceListFind(inactiveDevs, dev)) {
- if (pciDeviceListAdd(inactiveDevs, dev) < 0)
- return -1;
- }
-
- return 0;
-}
-
-int
-pciReAttachDevice(pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs)
-{
- const char *driver = pciFindStubDriver();
- if (!driver) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("cannot find any PCI stub module"));
- return -1;
- }
-
- if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Not reattaching active device %s"), dev->name);
- return -1;
- }
-
- if (pciUnbindDeviceFromStub(dev, driver) < 0)
- return -1;
-
- /* Steal the dev from list inactiveDevs */
- if (inactiveDevs)
- pciDeviceListSteal(inactiveDevs, dev);
-
- return 0;
-}
-
-/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on
- * the host when doing device passthrough. This can lead to a race
- * condition where the hypervisor is still cleaning up the device while
- * libvirt is trying to re-attach it to the host device driver. To avoid
- * this situation, we look through /proc/iomem, and if the hypervisor is
- * still holding onto the bar (denoted by the string in the matcher variable),
- * then we can wait around a bit for that to clear up.
- *
- * A typical /proc/iomem looks like this (snipped for brevity):
- * 00010000-0008efff : System RAM
- * 0008f000-0008ffff : reserved
- * ...
- * 00100000-cc9fcfff : System RAM
- * 00200000-00483d3b : Kernel code
- * 00483d3c-005c88df : Kernel data
- * cc9fd000-ccc71fff : ACPI Non-volatile Storage
- * ...
- * d0200000-d02fffff : PCI Bus #05
- * d0200000-d021ffff : 0000:05:00.0
- * d0200000-d021ffff : e1000e
- * d0220000-d023ffff : 0000:05:00.0
- * d0220000-d023ffff : e1000e
- * ...
- * f0000000-f0003fff : 0000:00:1b.0
- * f0000000-f0003fff : kvm_assigned_device
- *
- * Returns 0 if we are clear to continue, and 1 if the hypervisor is still
- * holding onto the resource.
- */
-int
-pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher)
-{
- FILE *fp;
- char line[160];
- char *tmp;
- unsigned long long start, end;
- unsigned int domain, bus, slot, function;
- int in_matching_device;
- int ret;
- size_t match_depth;
-
- fp = fopen("/proc/iomem", "r");
- if (!fp) {
- /* If we failed to open iomem, we just basically ignore the error. The
- * unbind might succeed anyway, and besides, it's very likely we have
- * no way to report the error
- */
- VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway");
- return 0;
- }
-
- ret = 0;
- in_matching_device = 0;
- match_depth = 0;
- while (fgets(line, sizeof(line), fp) != 0) {
- /* the logic here is a bit confusing. For each line, we look to
- * see if it matches the domain:bus:slot.function we were given.
- * If this line matches the DBSF, then any subsequent lines indented
- * by 2 spaces are the PCI regions for this device. It's also
- * possible that none of the PCI regions are currently mapped, in
- * which case we have no indented regions. This code handles all
- * of these situations
- */
- if (in_matching_device && (strspn(line, " ") == (match_depth +
2))) {
- /* expected format: <start>-<end> : <suffix> */
- if (/* start */
- virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp !=
'-' ||
- /* end */
- virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
- (tmp = STRSKIP(tmp, " : ")) == NULL)
- continue;
-
- if (STRPREFIX(tmp, matcher)) {
- ret = 1;
- break;
- }
- }
- else {
- in_matching_device = 0;
-
- /* expected format: <start>-<end> :
<domain>:<bus>:<slot>.<function> */
- if (/* start */
- virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp !=
'-' ||
- /* end */
- virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
- (tmp = STRSKIP(tmp, " : ")) == NULL ||
- /* domain */
- virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp !=
':' ||
- /* bus */
- virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp !=
':' ||
- /* slot */
- virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp !=
'.' ||
- /* function */
- virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp !=
'\n')
- continue;
-
- if (domain != dev->domain || bus != dev->bus || slot != dev->slot
||
- function != dev->function)
- continue;
- in_matching_device = 1;
- match_depth = strspn(line, " ");
- }
- }
-
- VIR_FORCE_FCLOSE(fp);
-
- return ret;
-}
-
-static char *
-pciReadDeviceID(pciDevice *dev, const char *id_name)
-{
- char *path = NULL;
- char *id_str;
-
- if (pciDeviceFile(&path, dev->name, id_name) < 0) {
- return NULL;
- }
-
- /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
- if (virFileReadAll(path, 7, &id_str) < 0) {
- VIR_FREE(path);
- return NULL;
- }
-
- VIR_FREE(path);
-
- /* Check for 0x suffix */
- if (id_str[0] != '0' || id_str[1] != 'x') {
- VIR_FREE(id_str);
- return NULL;
- }
-
- /* Chop off the newline; we know the string is 7 bytes */
- id_str[6] = '\0';
-
- return id_str;
-}
-
-int
-pciGetDeviceAddrString(unsigned domain,
- unsigned bus,
- unsigned slot,
- unsigned function,
- char **pciConfigAddr)
-{
- pciDevice *dev = NULL;
- int ret = -1;
-
- dev = pciGetDevice(domain, bus, slot, function);
- if (dev != NULL) {
- if ((*pciConfigAddr = strdup(dev->name)) == NULL) {
- virReportOOMError();
- goto cleanup;
- }
- ret = 0;
- }
-
-cleanup:
- pciFreeDevice(dev);
- return ret;
-}
-
-pciDevice *
-pciGetDevice(unsigned domain,
- unsigned bus,
- unsigned slot,
- unsigned function)
-{
- pciDevice *dev;
- char *vendor = NULL;
- char *product = NULL;
-
- if (VIR_ALLOC(dev) < 0) {
- virReportOOMError();
- return NULL;
- }
-
- dev->domain = domain;
- dev->bus = bus;
- dev->slot = slot;
- dev->function = function;
-
- if (snprintf(dev->name, sizeof(dev->name), "%.4x:%.2x:%.2x.%.1x",
- dev->domain, dev->bus, dev->slot,
- dev->function) >= sizeof(dev->name)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("dev->name buffer overflow: %.4x:%.2x:%.2x.%.1x"),
- dev->domain, dev->bus, dev->slot, dev->function);
- goto error;
- }
- if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config",
- dev->name) < 0) {
- virReportOOMError();
- goto error;
- }
-
- if (access(dev->path, F_OK) != 0) {
- virReportSystemError(errno,
- _("Device %s not found: could not access %s"),
- dev->name, dev->path);
- goto error;
- }
-
- vendor = pciReadDeviceID(dev, "vendor");
- product = pciReadDeviceID(dev, "device");
-
- if (!vendor || !product) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to read product/vendor ID for %s"),
- dev->name);
- goto error;
- }
-
- /* strings contain '0x' prefix */
- if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
- &product[2]) >= sizeof(dev->id)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("dev->id buffer overflow: %s %s"),
- &vendor[2], &product[2]);
- goto error;
- }
-
- VIR_DEBUG("%s %s: initialized", dev->id, dev->name);
-
-cleanup:
- VIR_FREE(product);
- VIR_FREE(vendor);
- return dev;
-
-error:
- pciFreeDevice(dev);
- dev = NULL;
- goto cleanup;
-}
-
-void
-pciFreeDevice(pciDevice *dev)
-{
- if (!dev)
- return;
- VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
- VIR_FREE(dev->path);
- VIR_FREE(dev);
-}
-
-const char *
-pciDeviceGetName(pciDevice *dev)
-{
- return dev->name;
-}
-
-void pciDeviceSetManaged(pciDevice *dev, unsigned managed)
-{
- dev->managed = !!managed;
-}
-
-unsigned pciDeviceGetManaged(pciDevice *dev)
-{
- return dev->managed;
-}
-
-unsigned
-pciDeviceGetUnbindFromStub(pciDevice *dev)
-{
- return dev->unbind_from_stub;
-}
-
-void
-pciDeviceSetUnbindFromStub(pciDevice *dev, unsigned unbind)
-{
- dev->unbind_from_stub = !!unbind;
-}
-
-unsigned
-pciDeviceGetRemoveSlot(pciDevice *dev)
-{
- return dev->remove_slot;
-}
-
-void
-pciDeviceSetRemoveSlot(pciDevice *dev, unsigned remove_slot)
-{
- dev->remove_slot = !!remove_slot;
-}
-
-unsigned
-pciDeviceGetReprobe(pciDevice *dev)
-{
- return dev->reprobe;
-}
-
-void
-pciDeviceSetReprobe(pciDevice *dev, unsigned reprobe)
-{
- dev->reprobe = !!reprobe;
-}
-
-void
-pciDeviceSetUsedBy(pciDevice *dev, const char *name)
-{
- dev->used_by = name;
-}
-
-const char *
-pciDeviceGetUsedBy(pciDevice *dev)
-{
- return dev->used_by;
-}
-
-void pciDeviceReAttachInit(pciDevice *pci)
-{
- pci->unbind_from_stub = 1;
- pci->remove_slot = 1;
- pci->reprobe = 1;
-}
-
-
-pciDeviceList *
-pciDeviceListNew(void)
-{
- pciDeviceList *list;
-
- if (VIR_ALLOC(list) < 0) {
- virReportOOMError();
- return NULL;
- }
-
- return list;
-}
-
-void
-pciDeviceListFree(pciDeviceList *list)
-{
- int i;
-
- if (!list)
- return;
-
- for (i = 0; i < list->count; i++) {
- pciFreeDevice(list->devs[i]);
- list->devs[i] = NULL;
- }
-
- list->count = 0;
- VIR_FREE(list->devs);
- VIR_FREE(list);
-}
-
-int
-pciDeviceListAdd(pciDeviceList *list,
- pciDevice *dev)
-{
- if (pciDeviceListFind(list, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Device %s is already in use"), dev->name);
- return -1;
- }
-
- if (VIR_REALLOC_N(list->devs, list->count+1) < 0) {
- virReportOOMError();
- return -1;
- }
-
- list->devs[list->count++] = dev;
-
- return 0;
-}
-
-pciDevice *
-pciDeviceListGet(pciDeviceList *list,
- int idx)
-{
- if (idx >= list->count)
- return NULL;
- if (idx < 0)
- return NULL;
-
- return list->devs[idx];
-}
-
-int
-pciDeviceListCount(pciDeviceList *list)
-{
- return list->count;
-}
-
-pciDevice *
-pciDeviceListStealIndex(pciDeviceList *list,
- int idx)
-{
- pciDevice *ret;
-
- if (idx < 0 || idx >= list->count)
- return NULL;
-
- ret = list->devs[idx];
-
- if (idx != --list->count) {
- memmove(&list->devs[idx],
- &list->devs[idx + 1],
- sizeof(*list->devs) * (list->count - idx));
- }
-
- if (VIR_REALLOC_N(list->devs, list->count) < 0) {
- ; /* not fatal */
- }
-
- return ret;
-}
-
-pciDevice *
-pciDeviceListSteal(pciDeviceList *list,
- pciDevice *dev)
-{
- return pciDeviceListStealIndex(list, pciDeviceListFindIndex(list, dev));
-}
-
-void
-pciDeviceListDel(pciDeviceList *list,
- pciDevice *dev)
-{
- pciDevice *ret = pciDeviceListSteal(list, dev);
- if (ret)
- pciFreeDevice(ret);
-}
-
-int
-pciDeviceListFindIndex(pciDeviceList *list, pciDevice *dev)
-{
- int i;
-
- for (i = 0; i < list->count; i++)
- if (list->devs[i]->domain == dev->domain &&
- list->devs[i]->bus == dev->bus &&
- list->devs[i]->slot == dev->slot &&
- list->devs[i]->function == dev->function)
- return i;
- return -1;
-}
-
-pciDevice *
-pciDeviceListFind(pciDeviceList *list, pciDevice *dev)
-{
- int i;
-
- if ((i = pciDeviceListFindIndex(list, dev)) >= 0)
- return list->devs[i];
- else
- return NULL;
-}
-
-
-int pciDeviceFileIterate(pciDevice *dev,
- pciDeviceFileActor actor,
- void *opaque)
-{
- char *pcidir = NULL;
- char *file = NULL;
- DIR *dir = NULL;
- int ret = -1;
- struct dirent *ent;
-
- if (virAsprintf(&pcidir, "/sys/bus/pci/devices/%04x:%02x:%02x.%x",
- dev->domain, dev->bus, dev->slot, dev->function) < 0)
{
- virReportOOMError();
- goto cleanup;
- }
-
- if (!(dir = opendir(pcidir))) {
- virReportSystemError(errno,
- _("cannot open %s"), pcidir);
- goto cleanup;
- }
-
- while ((ent = readdir(dir)) != NULL) {
- /* Device assignment requires:
- * $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
- * $PCIDIR/rom, $PCIDIR/reset
- */
- if (STREQ(ent->d_name, "config") ||
- STRPREFIX(ent->d_name, "resource") ||
- STREQ(ent->d_name, "rom") ||
- STREQ(ent->d_name, "reset")) {
- if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0)
{
- virReportOOMError();
- goto cleanup;
- }
- if ((actor)(dev, file, opaque) < 0)
- goto cleanup;
-
- VIR_FREE(file);
- }
- }
-
- ret = 0;
-
-cleanup:
- if (dir)
- closedir(dir);
- VIR_FREE(file);
- VIR_FREE(pcidir);
- return ret;
-}
-
-static int
-pciDeviceDownstreamLacksACS(pciDevice *dev)
-{
- uint16_t flags;
- uint16_t ctrl;
- unsigned int pos;
- int fd;
- int ret = 0;
-
- if ((fd = pciConfigOpen(dev, true)) < 0)
- return -1;
-
- if (pciInitDevice(dev, fd) < 0) {
- ret = -1;
- goto cleanup;
- }
-
- pos = dev->pcie_cap_pos;
- if (!pos || pciRead16(dev, fd, PCI_CLASS_DEVICE) != PCI_CLASS_BRIDGE_PCI)
- goto cleanup;
-
- flags = pciRead16(dev, fd, pos + PCI_EXP_FLAGS);
- if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
- goto cleanup;
-
- pos = pciFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
- if (!pos) {
- VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id,
dev->name);
- ret = 1;
- goto cleanup;
- }
-
- ctrl = pciRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
- if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
- VIR_DEBUG("%s %s: downstream port has ACS disabled",
- dev->id, dev->name);
- ret = 1;
- goto cleanup;
- }
-
-cleanup:
- pciConfigClose(dev, fd);
- return ret;
-}
-
-static int
-pciDeviceIsBehindSwitchLackingACS(pciDevice *dev)
-{
- pciDevice *parent;
-
- if (pciGetParentDevice(dev, &parent) < 0)
- return -1;
- if (!parent) {
- /* if we have no parent, and this is the root bus, ACS doesn't come
- * into play since devices on the root bus can't P2P without going
- * through the root IOMMU.
- */
- if (dev->bus == 0)
- return 0;
- else {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to find parent device for %s"),
- dev->name);
- return -1;
- }
- }
-
- /* XXX we should rather fail when we can't find device's parent and
- * stop the loop when we get to root instead of just stopping when no
- * parent can be found
- */
- do {
- pciDevice *tmp;
- int acs;
- int ret;
-
- acs = pciDeviceDownstreamLacksACS(parent);
-
- if (acs) {
- pciFreeDevice(parent);
- if (acs < 0)
- return -1;
- else
- return 1;
- }
-
- tmp = parent;
- ret = pciGetParentDevice(parent, &parent);
- pciFreeDevice(tmp);
- if (ret < 0)
- return -1;
- } while (parent);
-
- return 0;
-}
-
-int pciDeviceIsAssignable(pciDevice *dev,
- int strict_acs_check)
-{
- int ret;
-
- /* XXX This could be a great place to actually check that a non-managed
- * device isn't in use, e.g. by checking that device is either un-bound
- * or bound to a stub driver.
- */
-
- ret = pciDeviceIsBehindSwitchLackingACS(dev);
- if (ret < 0)
- return 0;
-
- if (ret) {
- if (!strict_acs_check) {
- VIR_DEBUG("%s %s: strict ACS check disabled; device assignment
allowed",
- dev->id, dev->name);
- } else {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Device %s is behind a switch lacking ACS and "
- "cannot be assigned"),
- dev->name);
- return 0;
- }
- }
-
- return 1;
-}
-
-#ifdef __linux__
-
-/*
- * returns true if equal
- */
-static bool
-pciConfigAddressEqual(struct pci_config_address *bdf1,
- struct pci_config_address *bdf2)
-{
- return ((bdf1->domain == bdf2->domain) &&
- (bdf1->bus == bdf2->bus) &&
- (bdf1->slot == bdf2->slot) &&
- (bdf1->function == bdf2->function));
-}
-
-static int
-logStrToLong_ui(char const *s,
- char **end_ptr,
- int base,
- unsigned int *result)
-{
- int ret = 0;
-
- ret = virStrToLong_ui(s, end_ptr, base, result);
- if (ret != 0) {
- VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
- } else {
- VIR_DEBUG("Converted '%s' to unsigned int %u", s, *result);
- }
-
- return ret;
-}
-
-static int
-pciParsePciConfigAddress(char *address,
- struct pci_config_address *bdf)
-{
- char *p = NULL;
- int ret = -1;
-
- if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
- &bdf->domain) == -1)) {
- goto out;
- }
-
- if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
- &bdf->bus) == -1)) {
- goto out;
- }
-
- if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
- &bdf->slot) == -1)) {
- goto out;
- }
-
- if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
- &bdf->function) == -1)) {
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-pciGetPciConfigAddressFromSysfsDeviceLink(const char *device_link,
- struct pci_config_address **bdf)
-{
- char *config_address = NULL;
- char *device_path = NULL;
- char errbuf[64];
- int ret = -1;
-
- VIR_DEBUG("Attempting to resolve device path from device link
'%s'",
- device_link);
-
- if (!virFileExists(device_link)) {
- VIR_DEBUG("sysfs_path '%s' does not exist", device_link);
- return ret;
- }
-
- device_path = canonicalize_file_name(device_link);
- if (device_path == NULL) {
- memset(errbuf, '\0', sizeof(errbuf));
- virReportSystemError(errno,
- _("Failed to resolve device link '%s'"),
- device_link);
- return ret;
- }
-
- config_address = basename(device_path);
- if (VIR_ALLOC(*bdf) != 0) {
- virReportOOMError();
- goto out;
- }
-
- if (pciParsePciConfigAddress(config_address, *bdf) != 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to parse PCI config address '%s'"),
- config_address);
- VIR_FREE(*bdf);
- goto out;
- }
-
- VIR_DEBUG("pci_config_address %.4x:%.2x:%.2x.%.1x",
- (*bdf)->domain,
- (*bdf)->bus,
- (*bdf)->slot,
- (*bdf)->function);
-
- ret = 0;
-
-out:
- VIR_FREE(device_path);
-
- return ret;
-}
-
-/*
- * Returns Physical function given a virtual function
- */
-int
-pciGetPhysicalFunction(const char *vf_sysfs_path,
- struct pci_config_address **physical_function)
-{
- int ret = -1;
- char *device_link = NULL;
-
- VIR_DEBUG("Attempting to get SR IOV physical function for device "
- "with sysfs path '%s'", vf_sysfs_path);
-
- if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
- virReportOOMError();
- return ret;
- } else {
- ret = pciGetPciConfigAddressFromSysfsDeviceLink(device_link,
- physical_function);
- }
-
- VIR_FREE(device_link);
-
- return ret;
-}
-
-/*
- * Returns virtual functions of a physical function
- */
-int
-pciGetVirtualFunctions(const char *sysfs_path,
- struct pci_config_address ***virtual_functions,
- unsigned int *num_virtual_functions)
-{
- int ret = -1;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char *device_link = NULL;
- char errbuf[64];
-
- VIR_DEBUG("Attempting to get SR IOV virtual functions for device"
- "with sysfs path '%s'", sysfs_path);
-
- dir = opendir(sysfs_path);
- if (dir == NULL) {
- memset(errbuf, '\0', sizeof(errbuf));
- virReportSystemError(errno,
- _("Failed to open dir '%s'"),
- sysfs_path);
- return ret;
- }
-
- *virtual_functions = NULL;
- *num_virtual_functions = 0;
- while ((entry = readdir(dir))) {
- if (STRPREFIX(entry->d_name, "virtfn")) {
-
- if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) {
- virReportOOMError();
- goto out;
- }
-
- VIR_DEBUG("Number of virtual functions: %d",
- *num_virtual_functions);
- if (VIR_REALLOC_N(*virtual_functions,
- (*num_virtual_functions) + 1) != 0) {
- virReportOOMError();
- VIR_FREE(device_link);
- goto out;
- }
-
- if (pciGetPciConfigAddressFromSysfsDeviceLink(device_link,
- &((*virtual_functions)[*num_virtual_functions])) !=
- SRIOV_FOUND) {
- /* We should not get back SRIOV_NOT_FOUND in this
- * case, so if we do, it's an error. */
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to get SR IOV function from device "
- "link '%s'"), device_link);
- VIR_FREE(device_link);
- goto out;
- } else {
- (*num_virtual_functions)++;
- }
- VIR_FREE(device_link);
- }
- }
-
- ret = 0;
-
-out:
- if (dir)
- closedir(dir);
-
- return ret;
-}
-
-/*
- * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
- */
-int
-pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link)
-{
- char *vf_sysfs_physfn_link = NULL;
- int ret = -1;
-
- if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn",
- vf_sysfs_device_link) < 0) {
- virReportOOMError();
- return ret;
- }
-
- ret = virFileExists(vf_sysfs_physfn_link);
-
- VIR_FREE(vf_sysfs_physfn_link);
-
- return ret;
-}
-
-/*
- * Returns the sriov virtual function index of vf given its pf
- */
-int
-pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
- const char *vf_sysfs_device_link,
- int *vf_index)
-{
- int ret = -1, i;
- unsigned int num_virt_fns = 0;
- struct pci_config_address *vf_bdf = NULL;
- struct pci_config_address **virt_fns = NULL;
-
- if (pciGetPciConfigAddressFromSysfsDeviceLink(vf_sysfs_device_link,
- &vf_bdf) < 0)
- return ret;
-
- if (pciGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
- &num_virt_fns) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Error getting physical function's '%s' "
- "virtual_functions"), pf_sysfs_device_link);
- goto out;
- }
-
- for (i = 0; i < num_virt_fns; i++) {
- if (pciConfigAddressEqual(vf_bdf, virt_fns[i])) {
- *vf_index = i;
- ret = 0;
- break;
- }
- }
-
-out:
-
- /* free virtual functions */
- for (i = 0; i < num_virt_fns; i++)
- VIR_FREE(virt_fns[i]);
-
- VIR_FREE(virt_fns);
- VIR_FREE(vf_bdf);
-
- return ret;
-}
-
-/*
- * Returns a path to the PCI sysfs file given the BDF of the PCI function
- */
-
-int
-pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link)
-{
- if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s",
- pciDeviceName) < 0) {
- virReportOOMError();
- return -1;
- }
-
- return 0;
-}
-
-int
-pciConfigAddressToSysfsFile(struct pci_config_address *dev,
- char **pci_sysfs_device_link)
-{
- if (virAsprintf(pci_sysfs_device_link,
- PCI_SYSFS "devices/%04x:%02x:%02x.%x", dev->domain,
- dev->bus, dev->slot, dev->function) < 0) {
- virReportOOMError();
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Returns the network device name of a pci device
- */
-int
-pciDeviceNetName(char *device_link_sysfs_path, char **netname)
-{
- char *pcidev_sysfs_net_path = NULL;
- int ret = -1;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
-
- if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
- "net") == -1) {
- virReportOOMError();
- return -1;
- }
-
- dir = opendir(pcidev_sysfs_net_path);
- if (dir == NULL)
- goto out;
-
- while ((entry = readdir(dir))) {
- if (STREQ(entry->d_name, ".") ||
- STREQ(entry->d_name, ".."))
- continue;
-
- /* Assume a single directory entry */
- *netname = strdup(entry->d_name);
- if (!*netname)
- virReportOOMError();
- else
- ret = 0;
- break;
- }
-
- closedir(dir);
-
-out:
- VIR_FREE(pcidev_sysfs_net_path);
-
- return ret;
-}
-
-int
-pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
- char **pfname, int *vf_index)
-{
- struct pci_config_address *pf_config_address = NULL;
- char *pf_sysfs_device_path = NULL;
- int ret = -1;
-
- if (pciGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
- return ret;
-
- if (pciConfigAddressToSysfsFile(pf_config_address,
- &pf_sysfs_device_path) < 0) {
-
- VIR_FREE(pf_config_address);
- return ret;
- }
-
- if (pciGetVirtualFunctionIndex(pf_sysfs_device_path, vf_sysfs_device_path,
- vf_index) < 0)
- goto cleanup;
-
- ret = pciDeviceNetName(pf_sysfs_device_path, pfname);
-
-cleanup:
- VIR_FREE(pf_config_address);
- VIR_FREE(pf_sysfs_device_path);
-
- return ret;
-}
-
-#else
-static const char *unsupported = N_("not supported on non-linux platforms");
-
-int
-pciGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED,
- struct pci_config_address **physical_function ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-
-int
-pciGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED,
- struct pci_config_address ***virtual_functions ATTRIBUTE_UNUSED,
- unsigned int *num_virtual_functions ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-
-int
-pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-
-int
-pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED,
- const char *vf_sysfs_device_link ATTRIBUTE_UNUSED,
- int *vf_index ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-
-}
-
-int
-pciConfigAddressToSysfsFile(struct pci_config_address *dev ATTRIBUTE_UNUSED,
- char **pci_sysfs_device_link ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-
-int
-pciDeviceNetName(char *device_link_sysfs_path ATTRIBUTE_UNUSED,
- char **netname ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-
-int
-pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
- char **pfname ATTRIBUTE_UNUSED,
- int *vf_index ATTRIBUTE_UNUSED)
-{
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
- return -1;
-}
-#endif /* __linux__ */
diff --git a/src/util/pci.h b/src/util/pci.h
deleted file mode 100644
index 814c24e..0000000
--- a/src/util/pci.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2009, 2011-2012 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see
- * <
http://www.gnu.org/licenses/>.
- *
- * Authors:
- * Mark McLoughlin <markmc(a)redhat.com>
- */
-
-#ifndef __VIR_PCI_H__
-# define __VIR_PCI_H__
-
-# include "internal.h"
-
-typedef struct _pciDevice pciDevice;
-typedef struct _pciDeviceList pciDeviceList;
-
-struct pci_config_address {
- unsigned int domain;
- unsigned int bus;
- unsigned int slot;
- unsigned int function;
-};
-
-pciDevice *pciGetDevice (unsigned domain,
- unsigned bus,
- unsigned slot,
- unsigned function);
-void pciFreeDevice (pciDevice *dev);
-const char *pciDeviceGetName (pciDevice *dev);
-int pciDettachDevice (pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs);
-int pciReAttachDevice (pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs);
-int pciResetDevice (pciDevice *dev,
- pciDeviceList *activeDevs,
- pciDeviceList *inactiveDevs);
-void pciDeviceSetManaged(pciDevice *dev,
- unsigned managed);
-unsigned pciDeviceGetManaged(pciDevice *dev);
-void pciDeviceSetUsedBy(pciDevice *dev,
- const char *used_by);
-const char *pciDeviceGetUsedBy(pciDevice *dev);
-unsigned pciDeviceGetUnbindFromStub(pciDevice *dev);
-void pciDeviceSetUnbindFromStub(pciDevice *dev,
- unsigned unbind);
-unsigned pciDeviceGetRemoveSlot(pciDevice *dev);
-void pciDeviceSetRemoveSlot(pciDevice *dev,
- unsigned remove_slot);
-unsigned pciDeviceGetReprobe(pciDevice *dev);
-void pciDeviceSetReprobe(pciDevice *dev,
- unsigned reprobe);
-void pciDeviceReAttachInit(pciDevice *dev);
-
-pciDeviceList *pciDeviceListNew (void);
-void pciDeviceListFree (pciDeviceList *list);
-int pciDeviceListAdd (pciDeviceList *list,
- pciDevice *dev);
-pciDevice * pciDeviceListGet (pciDeviceList *list,
- int idx);
-int pciDeviceListCount (pciDeviceList *list);
-pciDevice * pciDeviceListSteal (pciDeviceList *list,
- pciDevice *dev);
-pciDevice * pciDeviceListStealIndex(pciDeviceList *list,
- int idx);
-void pciDeviceListDel (pciDeviceList *list,
- pciDevice *dev);
-pciDevice * pciDeviceListFind (pciDeviceList *list,
- pciDevice *dev);
-int pciDeviceListFindIndex(pciDeviceList *list,
- pciDevice *dev);
-
-/*
- * Callback that will be invoked once for each file
- * associated with / used for PCI host device access.
- *
- * Should return 0 if successfully processed, or
- * -1 to indicate error and abort iteration
- */
-typedef int (*pciDeviceFileActor)(pciDevice *dev,
- const char *path, void *opaque);
-
-int pciDeviceFileIterate(pciDevice *dev,
- pciDeviceFileActor actor,
- void *opaque);
-
-int pciDeviceIsAssignable(pciDevice *dev,
- int strict_acs_check);
-int pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher);
-
-int pciGetPhysicalFunction(const char *sysfs_path,
- struct pci_config_address **phys_fn);
-
-int pciGetVirtualFunctions(const char *sysfs_path,
- struct pci_config_address ***virtual_functions,
- unsigned int *num_virtual_functions);
-
-int pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link);
-
-int pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
- const char *vf_sysfs_device_link,
- int *vf_index);
-
-int pciConfigAddressToSysfsFile(struct pci_config_address *dev,
- char **pci_sysfs_device_link);
-
-int pciDeviceNetName(char *device_link_sysfs_path, char **netname);
-
-int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link)
- ATTRIBUTE_RETURN_CHECK;
-
-int pciGetDeviceAddrString(unsigned domain,
- unsigned bus,
- unsigned slot,
- unsigned function,
- char **pciConfigAddr)
- ATTRIBUTE_NONNULL(5) ATTRIBUTE_RETURN_CHECK;
-
-int pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
- char **pfname, int *vf_index);
-
-#endif /* __VIR_PCI_H__ */
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 88a2e4b..3ea0e39 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -28,7 +28,7 @@
#include "virterror_internal.h"
#include "vircommand.h"
#include "viralloc.h"
-#include "pci.h"
+#include "virpci.h"
#include "virlog.h"
#include <sys/ioctl.h>
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 123667c..d588e89 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -26,7 +26,7 @@
# include "virsocketaddr.h"
# include "virnetlink.h"
# include "virmacaddr.h"
-# include "pci.h"
+# include "virpci.h"
int virNetDevExists(const char *brname)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
diff --git a/src/util/virpci.c b/src/util/virpci.c
new file mode 100644
index 0000000..8875aa6
--- /dev/null
+++ b/src/util/virpci.c
@@ -0,0 +1,2285 @@
+/*
+ * Copyright (C) 2009-2012 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ * Mark McLoughlin <markmc(a)redhat.com>
+ */
+
+#include <config.h>
+
+#include "virpci.h"
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "virlog.h"
+#include "viralloc.h"
+#include "vircommand.h"
+#include "virterror_internal.h"
+#include "virfile.h"
+
+#define PCI_SYSFS "/sys/bus/pci/"
+#define PCI_ID_LEN 10 /* "XXXX XXXX" */
+#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */
+
+#define SRIOV_FOUND 0
+#define SRIOV_NOT_FOUND 1
+#define SRIOV_ERROR -1
+
+struct _pciDevice {
+ unsigned domain;
+ unsigned bus;
+ unsigned slot;
+ unsigned function;
+
+ char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */
+ char id[PCI_ID_LEN]; /* product vendor */
+ char *path;
+ const char *used_by; /* The domain which uses the device */
+
+ unsigned pcie_cap_pos;
+ unsigned pci_pm_cap_pos;
+ unsigned has_flr : 1;
+ unsigned has_pm_reset : 1;
+ unsigned managed : 1;
+
+ /* used by reattach function */
+ unsigned unbind_from_stub : 1;
+ unsigned remove_slot : 1;
+ unsigned reprobe : 1;
+};
+
+struct _pciDeviceList {
+ unsigned count;
+ pciDevice **devs;
+};
+
+
+/* For virReportOOMError() and virReportSystemError() */
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+/* Specifications referenced in comments:
+ * PCI30 - PCI Local Bus Specification 3.0
+ * PCIe20 - PCI Express Base Specification 2.0
+ * BR12 - PCI-to-PCI Bridge Architecture Specification 1.2
+ * PM12 - PCI Bus Power Management Interface Specification 1.2
+ * ECN_AF - Advanced Capabilities for Conventional PCI ECN
+ */
+
+/* Type 0 config space header length; PCI30 Section 6.1 Configuration Space Organization
*/
+#define PCI_CONF_LEN 0x100
+#define PCI_CONF_HEADER_LEN 0x40
+
+/* PCI30 6.2.1 */
+#define PCI_HEADER_TYPE 0x0e /* Header type */
+#define PCI_HEADER_TYPE_BRIDGE 0x1
+#define PCI_HEADER_TYPE_MASK 0x7f
+#define PCI_HEADER_TYPE_MULTI 0x80
+
+/* PCI30 6.2.1 Device Identification */
+#define PCI_CLASS_DEVICE 0x0a /* Device class */
+
+/* Class Code for bridge; PCI30 D.7 Base Class 06h */
+#define PCI_CLASS_BRIDGE_PCI 0x0604
+
+/* PCI30 6.2.3 Device Status */
+#define PCI_STATUS 0x06 /* 16 bits */
+#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
+
+/* PCI30 6.7 Capabilities List */
+#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
+
+/* PM12 3.2.1 Capability Identifier */
+#define PCI_CAP_ID_PM 0x01 /* Power Management */
+/* PCI30 H Capability IDs */
+#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
+/* ECN_AF 6.x.1.1 Capability ID for AF */
+#define PCI_CAP_ID_AF 0x13 /* Advanced Features */
+
+/* PCIe20 7.8.3 Device Capabilities Register (Offset 04h) */
+#define PCI_EXP_DEVCAP 0x4 /* Device capabilities */
+#define PCI_EXP_DEVCAP_FLR (1<<28) /* Function Level Reset */
+
+/* Header type 1 BR12 3.2 PCI-to-PCI Bridge Configuration Space Header Format */
+#define PCI_PRIMARY_BUS 0x18 /* BR12 3.2.5.2 Primary bus number */
+#define PCI_SECONDARY_BUS 0x19 /* BR12 3.2.5.3 Secondary bus number */
+#define PCI_SUBORDINATE_BUS 0x1a /* BR12 3.2.5.4 Highest bus number behind the
bridge */
+#define PCI_BRIDGE_CONTROL 0x3e
+/* BR12 3.2.5.18 Bridge Control Register */
+#define PCI_BRIDGE_CTL_RESET 0x40 /* Secondary bus reset */
+
+/* PM12 3.2.4 Power Management Control/Status (Offset = 4) */
+#define PCI_PM_CTRL 4 /* PM control and status register */
+#define PCI_PM_CTRL_STATE_MASK 0x3 /* Current power state (D0 to D3) */
+#define PCI_PM_CTRL_STATE_D0 0x0 /* D0 state */
+#define PCI_PM_CTRL_STATE_D3hot 0x3 /* D3 state */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x8 /* No reset for D3hot->D0 */
+
+/* ECN_AF 6.x.1 Advanced Features Capability Structure */
+#define PCI_AF_CAP 0x3 /* Advanced features capabilities */
+#define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */
+
+#define PCI_EXP_FLAGS 0x2
+#define PCI_EXP_FLAGS_TYPE 0x00f0
+#define PCI_EXP_TYPE_DOWNSTREAM 0x6
+
+#define PCI_EXT_CAP_BASE 0x100
+#define PCI_EXT_CAP_LIMIT 0x1000
+#define PCI_EXT_CAP_ID_MASK 0x0000ffff
+#define PCI_EXT_CAP_OFFSET_SHIFT 20
+#define PCI_EXT_CAP_OFFSET_MASK 0x00000ffc
+
+#define PCI_EXT_CAP_ID_ACS 0x000d
+#define PCI_EXT_ACS_CTRL 0x06
+
+#define PCI_EXT_CAP_ACS_SV 0x01
+#define PCI_EXT_CAP_ACS_RR 0x04
+#define PCI_EXT_CAP_ACS_CR 0x08
+#define PCI_EXT_CAP_ACS_UF 0x10
+#define PCI_EXT_CAP_ACS_ENABLED (PCI_EXT_CAP_ACS_SV | \
+ PCI_EXT_CAP_ACS_RR | \
+ PCI_EXT_CAP_ACS_CR | \
+ PCI_EXT_CAP_ACS_UF)
+
+static int
+pciConfigOpen(pciDevice *dev, bool fatal)
+{
+ int fd;
+
+ fd = open(dev->path, O_RDWR);
+
+ if (fd < 0) {
+ if (fatal) {
+ virReportSystemError(errno,
+ _("Failed to open config space file
'%s'"),
+ dev->path);
+ } else {
+ char ebuf[1024];
+ VIR_WARN("Failed to open config space file '%s': %s",
+ dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
+ }
+ return -1;
+ }
+
+ VIR_DEBUG("%s %s: opened %s", dev->id, dev->name, dev->path);
+ return fd;
+}
+
+static void
+pciConfigClose(pciDevice *dev, int cfgfd)
+{
+ if (VIR_CLOSE(cfgfd) < 0) {
+ char ebuf[1024];
+ VIR_WARN("Failed to close config space file '%s': %s",
+ dev->path, virStrerror(errno, ebuf, sizeof(ebuf)));
+ }
+}
+
+
+static int
+pciRead(pciDevice *dev,
+ int cfgfd,
+ unsigned pos,
+ uint8_t *buf,
+ unsigned buflen)
+{
+ memset(buf, 0, buflen);
+
+ if (lseek(cfgfd, pos, SEEK_SET) != pos ||
+ saferead(cfgfd, buf, buflen) != buflen) {
+ char ebuf[1024];
+ VIR_WARN("Failed to read from '%s' : %s", dev->path,
+ virStrerror(errno, ebuf, sizeof(ebuf)));
+ return -1;
+ }
+ return 0;
+}
+
+static uint8_t
+pciRead8(pciDevice *dev, int cfgfd, unsigned pos)
+{
+ uint8_t buf;
+ pciRead(dev, cfgfd, pos, &buf, sizeof(buf));
+ return buf;
+}
+
+static uint16_t
+pciRead16(pciDevice *dev, int cfgfd, unsigned pos)
+{
+ uint8_t buf[2];
+ pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
+ return (buf[0] << 0) | (buf[1] << 8);
+}
+
+static uint32_t
+pciRead32(pciDevice *dev, int cfgfd, unsigned pos)
+{
+ uint8_t buf[4];
+ pciRead(dev, cfgfd, pos, &buf[0], sizeof(buf));
+ return (buf[0] << 0) | (buf[1] << 8) | (buf[2] << 16) | (buf[3]
<< 24);
+}
+
+static int
+pciWrite(pciDevice *dev,
+ int cfgfd,
+ unsigned pos,
+ uint8_t *buf,
+ unsigned buflen)
+{
+ if (lseek(cfgfd, pos, SEEK_SET) != pos ||
+ safewrite(cfgfd, buf, buflen) != buflen) {
+ char ebuf[1024];
+ VIR_WARN("Failed to write to '%s' : %s", dev->path,
+ virStrerror(errno, ebuf, sizeof(ebuf)));
+ return -1;
+ }
+ return 0;
+}
+
+static void
+pciWrite16(pciDevice *dev, int cfgfd, unsigned pos, uint16_t val)
+{
+ uint8_t buf[2] = { (val >> 0), (val >> 8) };
+ pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
+}
+
+static void
+pciWrite32(pciDevice *dev, int cfgfd, unsigned pos, uint32_t val)
+{
+ uint8_t buf[4] = { (val >> 0), (val >> 8), (val >> 16), (val
>> 24) };
+ pciWrite(dev, cfgfd, pos, &buf[0], sizeof(buf));
+}
+
+typedef int (*pciIterPredicate)(pciDevice *, pciDevice *, void *);
+
+/* Iterate over available PCI devices calling @predicate
+ * to compare each one to @dev.
+ * Return -1 on error since we don't want to assume it is
+ * safe to reset if there is an error.
+ */
+static int
+pciIterDevices(pciIterPredicate predicate,
+ pciDevice *dev,
+ pciDevice **matched,
+ void *data)
+{
+ DIR *dir;
+ struct dirent *entry;
+ int ret = 0;
+ int rc;
+
+ *matched = NULL;
+
+ VIR_DEBUG("%s %s: iterating over " PCI_SYSFS "devices",
dev->id, dev->name);
+
+ dir = opendir(PCI_SYSFS "devices");
+ if (!dir) {
+ VIR_WARN("Failed to open " PCI_SYSFS "devices");
+ return -1;
+ }
+
+ while ((entry = readdir(dir))) {
+ unsigned int domain, bus, slot, function;
+ pciDevice *check;
+ char *tmp;
+
+ /* Ignore '.' and '..' */
+ if (entry->d_name[0] == '.')
+ continue;
+
+ /* expected format: <domain>:<bus>:<slot>.<function> */
+ if (/* domain */
+ virStrToLong_ui(entry->d_name, &tmp, 16, &domain) < 0 || *tmp
!= ':' ||
+ /* bus */
+ virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp !=
':' ||
+ /* slot */
+ virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp !=
'.' ||
+ /* function */
+ virStrToLong_ui(tmp + 1, NULL, 16, &function) < 0) {
+ VIR_WARN("Unusual entry in " PCI_SYSFS "devices: %s",
entry->d_name);
+ continue;
+ }
+
+ check = pciGetDevice(domain, bus, slot, function);
+ if (!check) {
+ ret = -1;
+ break;
+ }
+
+ rc = predicate(dev, check, data);
+ if (rc < 0) {
+ /* the predicate returned an error, bail */
+ pciFreeDevice(check);
+ ret = -1;
+ break;
+ }
+ else if (rc == 1) {
+ VIR_DEBUG("%s %s: iter matched on %s", dev->id, dev->name,
check->name);
+ *matched = check;
+ ret = 1;
+ break;
+ }
+
+ pciFreeDevice(check);
+ }
+ closedir(dir);
+ return ret;
+}
+
+static uint8_t
+pciFindCapabilityOffset(pciDevice *dev, int cfgfd, unsigned capability)
+{
+ uint16_t status;
+ uint8_t pos;
+
+ status = pciRead16(dev, cfgfd, PCI_STATUS);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ pos = pciRead8(dev, cfgfd, PCI_CAPABILITY_LIST);
+
+ /* Zero indicates last capability, capabilities can't
+ * be in the config space header and 0xff is returned
+ * by the kernel if we don't have access to this region
+ *
+ * Note: we're not handling loops or extended
+ * capabilities here.
+ */
+ while (pos >= PCI_CONF_HEADER_LEN && pos != 0xff) {
+ uint8_t capid = pciRead8(dev, cfgfd, pos);
+ if (capid == capability) {
+ VIR_DEBUG("%s %s: found cap 0x%.2x at 0x%.2x",
+ dev->id, dev->name, capability, pos);
+ return pos;
+ }
+
+ pos = pciRead8(dev, cfgfd, pos + 1);
+ }
+
+ VIR_DEBUG("%s %s: failed to find cap 0x%.2x", dev->id, dev->name,
capability);
+
+ return 0;
+}
+
+static unsigned int
+pciFindExtendedCapabilityOffset(pciDevice *dev,
+ int cfgfd,
+ unsigned capability)
+{
+ int ttl;
+ unsigned int pos;
+ uint32_t header;
+
+ /* minimum 8 bytes per capability */
+ ttl = (PCI_EXT_CAP_LIMIT - PCI_EXT_CAP_BASE) / 8;
+ pos = PCI_EXT_CAP_BASE;
+
+ while (ttl > 0 && pos >= PCI_EXT_CAP_BASE) {
+ header = pciRead32(dev, cfgfd, pos);
+
+ if ((header & PCI_EXT_CAP_ID_MASK) == capability)
+ return pos;
+
+ pos = (header >> PCI_EXT_CAP_OFFSET_SHIFT) & PCI_EXT_CAP_OFFSET_MASK;
+ ttl--;
+ }
+
+ return 0;
+}
+
+/* detects whether this device has FLR. Returns 0 if the device does
+ * not have FLR, 1 if it does, and -1 on error
+ */
+static int
+pciDetectFunctionLevelReset(pciDevice *dev, int cfgfd)
+{
+ uint32_t caps;
+ uint8_t pos;
+ char *path;
+ int found;
+
+ /* The PCIe Function Level Reset capability allows
+ * individual device functions to be reset without
+ * affecting any other functions on the device or
+ * any other devices on the bus. This is only common
+ * on SR-IOV NICs at the moment.
+ */
+ if (dev->pcie_cap_pos) {
+ caps = pciRead32(dev, cfgfd, dev->pcie_cap_pos + PCI_EXP_DEVCAP);
+ if (caps & PCI_EXP_DEVCAP_FLR) {
+ VIR_DEBUG("%s %s: detected PCIe FLR capability", dev->id,
dev->name);
+ return 1;
+ }
+ }
+
+ /* The PCI AF Function Level Reset capability is
+ * the same thing, except for conventional PCI
+ * devices. This is not common yet.
+ */
+ pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_AF);
+ if (pos) {
+ caps = pciRead16(dev, cfgfd, pos + PCI_AF_CAP);
+ if (caps & PCI_AF_CAP_FLR) {
+ VIR_DEBUG("%s %s: detected PCI FLR capability", dev->id,
dev->name);
+ return 1;
+ }
+ }
+
+ /* there are some buggy devices that do support FLR, but forget to
+ * advertise that fact in their capabilities. However, FLR is *required*
+ * to be present for virtual functions (VFs), so if we see that this
+ * device is a VF, we just assume FLR works
+ */
+
+ if (virAsprintf(&path, PCI_SYSFS "devices/%s/physfn", dev->name)
< 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ found = virFileExists(path);
+ VIR_FREE(path);
+ if (found) {
+ VIR_DEBUG("%s %s: buggy device didn't advertise FLR, but is a VF;
forcing flr on",
+ dev->id, dev->name);
+ return 1;
+ }
+
+ VIR_DEBUG("%s %s: no FLR capability found", dev->id, dev->name);
+
+ return 0;
+}
+
+/* Require the device has the PCI Power Management capability
+ * and that a D3hot->D0 transition will results in a full
+ * internal reset, not just a soft reset.
+ */
+static unsigned
+pciDetectPowerManagementReset(pciDevice *dev, int cfgfd)
+{
+ if (dev->pci_pm_cap_pos) {
+ uint32_t ctl;
+
+ /* require the NO_SOFT_RESET bit is clear */
+ ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
+ if (!(ctl & PCI_PM_CTRL_NO_SOFT_RESET)) {
+ VIR_DEBUG("%s %s: detected PM reset capability", dev->id,
dev->name);
+ return 1;
+ }
+ }
+
+ VIR_DEBUG("%s %s: no PM reset capability found", dev->id,
dev->name);
+
+ return 0;
+}
+
+/* Any active devices on the same domain/bus ? */
+static int
+pciSharesBusWithActive(pciDevice *dev, pciDevice *check, void *data)
+{
+ pciDeviceList *inactiveDevs = data;
+
+ /* Different domain, different bus, or simply identical device */
+ if (dev->domain != check->domain ||
+ dev->bus != check->bus ||
+ (dev->slot == check->slot &&
+ dev->function == check->function))
+ return 0;
+
+ /* same bus, but inactive, i.e. about to be assigned to guest */
+ if (inactiveDevs && pciDeviceListFind(inactiveDevs, check))
+ return 0;
+
+ return 1;
+}
+
+static pciDevice *
+pciBusContainsActiveDevices(pciDevice *dev,
+ pciDeviceList *inactiveDevs)
+{
+ pciDevice *active = NULL;
+ if (pciIterDevices(pciSharesBusWithActive,
+ dev, &active, inactiveDevs) < 0)
+ return NULL;
+ return active;
+}
+
+/* Is @check the parent of @dev ? */
+static int
+pciIsParent(pciDevice *dev, pciDevice *check, void *data)
+{
+ uint16_t device_class;
+ uint8_t header_type, secondary, subordinate;
+ pciDevice **best = data;
+ int ret = 0;
+ int fd;
+
+ if (dev->domain != check->domain)
+ return 0;
+
+ if ((fd = pciConfigOpen(check, false)) < 0)
+ return 0;
+
+ /* Is it a bridge? */
+ device_class = pciRead16(check, fd, PCI_CLASS_DEVICE);
+ if (device_class != PCI_CLASS_BRIDGE_PCI)
+ goto cleanup;
+
+ /* Is it a plane? */
+ header_type = pciRead8(check, fd, PCI_HEADER_TYPE);
+ if ((header_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
+ goto cleanup;
+
+ secondary = pciRead8(check, fd, PCI_SECONDARY_BUS);
+ subordinate = pciRead8(check, fd, PCI_SUBORDINATE_BUS);
+
+ VIR_DEBUG("%s %s: found parent device %s", dev->id, dev->name,
check->name);
+
+ /* if the secondary bus exactly equals the device's bus, then we found
+ * the direct parent. No further work is necessary
+ */
+ if (dev->bus == secondary) {
+ ret = 1;
+ goto cleanup;
+ }
+
+ /* otherwise, SRIOV allows VFs to be on different busses then their PFs.
+ * In this case, what we need to do is look for the "best" match; i.e.
+ * the most restrictive match that still satisfies all of the conditions.
+ */
+ if (dev->bus > secondary && dev->bus <= subordinate) {
+ if (*best == NULL) {
+ *best = pciGetDevice(check->domain, check->bus, check->slot,
+ check->function);
+ if (*best == NULL) {
+ ret = -1;
+ goto cleanup;
+ }
+ } else {
+ /* OK, we had already recorded a previous "best" match for the
+ * parent. See if the current device is more restrictive than the
+ * best, and if so, make it the new best
+ */
+ int bestfd;
+ uint8_t best_secondary;
+
+ if ((bestfd = pciConfigOpen(*best, false)) < 0)
+ goto cleanup;
+ best_secondary = pciRead8(*best, bestfd, PCI_SECONDARY_BUS);
+ pciConfigClose(*best, bestfd);
+
+ if (secondary > best_secondary) {
+ pciFreeDevice(*best);
+ *best = pciGetDevice(check->domain, check->bus, check->slot,
+ check->function);
+ if (*best == NULL) {
+ ret = -1;
+ goto cleanup;
+ }
+ }
+ }
+ }
+
+cleanup:
+ pciConfigClose(check, fd);
+ return ret;
+}
+
+static int
+pciGetParentDevice(pciDevice *dev, pciDevice **parent)
+{
+ pciDevice *best = NULL;
+ int ret;
+
+ *parent = NULL;
+ ret = pciIterDevices(pciIsParent, dev, parent, &best);
+ if (ret == 1)
+ pciFreeDevice(best);
+ else if (ret == 0)
+ *parent = best;
+ return ret;
+}
+
+/* Secondary Bus Reset is our sledgehammer - it resets all
+ * devices behind a bus.
+ */
+static int
+pciTrySecondaryBusReset(pciDevice *dev,
+ int cfgfd,
+ pciDeviceList *inactiveDevs)
+{
+ pciDevice *parent, *conflict;
+ uint8_t config_space[PCI_CONF_LEN];
+ uint16_t ctl;
+ int ret = -1;
+ int parentfd;
+
+ /* Refuse to do a secondary bus reset if there are other
+ * devices/functions behind the bus are used by the host
+ * or other guests.
+ */
+ if ((conflict = pciBusContainsActiveDevices(dev, inactiveDevs))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Active %s devices on bus with %s, not doing bus
reset"),
+ conflict->name, dev->name);
+ return -1;
+ }
+
+ /* Find the parent bus */
+ if (pciGetParentDevice(dev, &parent) < 0)
+ return -1;
+ if (!parent) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to find parent device for %s"),
+ dev->name);
+ return -1;
+ }
+ if ((parentfd = pciConfigOpen(parent, true)) < 0)
+ goto out;
+
+ VIR_DEBUG("%s %s: doing a secondary bus reset", dev->id, dev->name);
+
+ /* Save and restore the device's config space; we only do this
+ * for the supplied device since we refuse to do a reset if there
+ * are multiple devices/functions
+ */
+ if (pciRead(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to read PCI config space for %s"),
+ dev->name);
+ goto out;
+ }
+
+ /* Read the control register, set the reset flag, wait 200ms,
+ * unset the reset flag and wait 200ms.
+ */
+ ctl = pciRead16(dev, cfgfd, PCI_BRIDGE_CONTROL);
+
+ pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL,
+ ctl | PCI_BRIDGE_CTL_RESET);
+
+ usleep(200 * 1000); /* sleep 200ms */
+
+ pciWrite16(parent, parentfd, PCI_BRIDGE_CONTROL, ctl);
+
+ usleep(200 * 1000); /* sleep 200ms */
+
+ if (pciWrite(dev, cfgfd, 0, config_space, PCI_CONF_LEN) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to restore PCI config space for %s"),
+ dev->name);
+ goto out;
+ }
+ ret = 0;
+
+out:
+ pciConfigClose(parent, parentfd);
+ pciFreeDevice(parent);
+ return ret;
+}
+
+/* Power management reset attempts to reset a device using a
+ * D-state transition from D3hot to D0. Note, in detect_pm_reset()
+ * above we require the device supports a full internal reset.
+ */
+static int
+pciTryPowerManagementReset(pciDevice *dev, int cfgfd)
+{
+ uint8_t config_space[PCI_CONF_LEN];
+ uint32_t ctl;
+
+ if (!dev->pci_pm_cap_pos)
+ return -1;
+
+ /* Save and restore the device's config space. */
+ if (pciRead(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to read PCI config space for %s"),
+ dev->name);
+ return -1;
+ }
+
+ VIR_DEBUG("%s %s: doing a power management reset", dev->id,
dev->name);
+
+ ctl = pciRead32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL);
+ ctl &= ~PCI_PM_CTRL_STATE_MASK;
+
+ pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
+ ctl | PCI_PM_CTRL_STATE_D3hot);
+
+ usleep(10 * 1000); /* sleep 10ms */
+
+ pciWrite32(dev, cfgfd, dev->pci_pm_cap_pos + PCI_PM_CTRL,
+ ctl | PCI_PM_CTRL_STATE_D0);
+
+ usleep(10 * 1000); /* sleep 10ms */
+
+ if (pciWrite(dev, cfgfd, 0, &config_space[0], PCI_CONF_LEN) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to restore PCI config space for %s"),
+ dev->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+pciInitDevice(pciDevice *dev, int cfgfd)
+{
+ int flr;
+
+ dev->pcie_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_EXP);
+ dev->pci_pm_cap_pos = pciFindCapabilityOffset(dev, cfgfd, PCI_CAP_ID_PM);
+ flr = pciDetectFunctionLevelReset(dev, cfgfd);
+ if (flr < 0)
+ return flr;
+ dev->has_flr = flr;
+ dev->has_pm_reset = pciDetectPowerManagementReset(dev, cfgfd);
+
+ return 0;
+}
+
+int
+pciResetDevice(pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs)
+{
+ int ret = -1;
+ int fd;
+
+ if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Not resetting active device %s"), dev->name);
+ return -1;
+ }
+
+ if ((fd = pciConfigOpen(dev, true)) < 0)
+ return -1;
+
+ if (pciInitDevice(dev, fd) < 0)
+ goto cleanup;
+
+ /* KVM will perform FLR when starting and stopping
+ * a guest, so there is no need for us to do it here.
+ */
+ if (dev->has_flr) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ /* If the device supports PCI power management reset,
+ * that's the next best thing because it only resets
+ * the function, not the whole device.
+ */
+ if (dev->has_pm_reset)
+ ret = pciTryPowerManagementReset(dev, fd);
+
+ /* Bus reset is not an option with the root bus */
+ if (ret < 0 && dev->bus != 0)
+ ret = pciTrySecondaryBusReset(dev, fd, inactiveDevs);
+
+ if (ret < 0) {
+ virErrorPtr err = virGetLastError();
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unable to reset PCI device %s: %s"),
+ dev->name,
+ err ? err->message : _("no FLR, PM reset or bus reset
available"));
+ }
+
+cleanup:
+ pciConfigClose(dev, fd);
+ return ret;
+}
+
+
+static int
+pciDriverDir(char **buffer, const char *driver)
+{
+ VIR_FREE(*buffer);
+
+ if (virAsprintf(buffer, PCI_SYSFS "drivers/%s", driver) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+pciDriverFile(char **buffer, const char *driver, const char *file)
+{
+ VIR_FREE(*buffer);
+
+ if (virAsprintf(buffer, PCI_SYSFS "drivers/%s/%s", driver, file) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+pciDeviceFile(char **buffer, const char *device, const char *file)
+{
+ VIR_FREE(*buffer);
+
+ if (virAsprintf(buffer, PCI_SYSFS "devices/%s/%s", device, file) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static const char *
+pciFindStubDriver(void)
+{
+ char *drvpath = NULL;
+ int probed = 0;
+
+recheck:
+ if (pciDriverDir(&drvpath, "pci-stub") < 0) {
+ return NULL;
+ }
+
+ if (virFileExists(drvpath)) {
+ VIR_FREE(drvpath);
+ return "pci-stub";
+ }
+
+ if (pciDriverDir(&drvpath, "pciback") < 0) {
+ return NULL;
+ }
+
+ if (virFileExists(drvpath)) {
+ VIR_FREE(drvpath);
+ return "pciback";
+ }
+
+ VIR_FREE(drvpath);
+
+ if (!probed) {
+ const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL };
+ const char *const backprobe[] = { MODPROBE, "pciback", NULL };
+
+ probed = 1;
+ /*
+ * Probing for pci-stub will succeed regardless of whether
+ * on native or Xen kernels.
+ * On Xen though, we want to prefer pciback, so probe
+ * for that first, because that will only work on Xen
+ */
+ if (virRun(backprobe, NULL) < 0 &&
+ virRun(stubprobe, NULL) < 0) {
+ char ebuf[1024];
+ VIR_WARN("failed to load pci-stub or pciback drivers: %s",
+ virStrerror(errno, ebuf, sizeof(ebuf)));
+ return NULL;
+ }
+
+ goto recheck;
+ }
+
+ return NULL;
+}
+
+static int
+pciUnbindDeviceFromStub(pciDevice *dev, const char *driver)
+{
+ int result = -1;
+ char *drvdir = NULL;
+ char *path = NULL;
+
+ if (pciDriverDir(&drvdir, driver) < 0)
+ goto cleanup;
+
+ if (!dev->unbind_from_stub)
+ goto remove_slot;
+
+ /* If the device is bound to stub, unbind it.
+ */
+ if (pciDeviceFile(&path, dev->name, "driver") < 0)
+ goto cleanup;
+
+ if (virFileExists(drvdir) && virFileLinkPointsTo(path, drvdir)) {
+ if (pciDriverFile(&path, driver, "unbind") < 0) {
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(path, dev->name, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to unbind PCI device '%s' from
%s"),
+ dev->name, driver);
+ goto cleanup;
+ }
+ }
+ dev->unbind_from_stub = 0;
+
+remove_slot:
+ if (!dev->remove_slot)
+ goto reprobe;
+
+ /* Xen's pciback.ko wants you to use remove_slot on the specific device */
+ if (pciDriverFile(&path, driver, "remove_slot") < 0) {
+ goto cleanup;
+ }
+
+ if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to remove slot for PCI device '%s'
from %s"),
+ dev->name, driver);
+ goto cleanup;
+ }
+ dev->remove_slot = 0;
+
+reprobe:
+ if (!dev->reprobe) {
+ result = 0;
+ goto cleanup;
+ }
+
+ /* Trigger a re-probe of the device is not in the stub's dynamic
+ * ID table. If the stub is available, but 'remove_id' isn't
+ * available, then re-probing would just cause the device to be
+ * re-bound to the stub.
+ */
+ if (pciDriverFile(&path, driver, "remove_id") < 0) {
+ goto cleanup;
+ }
+
+ if (!virFileExists(drvdir) || virFileExists(path)) {
+ if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name, 0) < 0)
{
+ virReportSystemError(errno,
+ _("Failed to trigger a re-probe for PCI device
'%s'"),
+ dev->name);
+ goto cleanup;
+ }
+ }
+
+ result = 0;
+
+cleanup:
+ /* do not do it again */
+ dev->unbind_from_stub = 0;
+ dev->remove_slot = 0;
+ dev->reprobe = 0;
+
+ VIR_FREE(drvdir);
+ VIR_FREE(path);
+
+ return result;
+}
+
+
+static int
+pciBindDeviceToStub(pciDevice *dev, const char *driver)
+{
+ int result = -1;
+ char *drvdir = NULL;
+ char *path = NULL;
+ int reprobe = 0;
+
+ /* check whether the device is already bound to a driver */
+ if (pciDriverDir(&drvdir, driver) < 0 ||
+ pciDeviceFile(&path, dev->name, "driver") < 0) {
+ goto cleanup;
+ }
+
+ if (virFileExists(path)) {
+ if (virFileLinkPointsTo(path, drvdir)) {
+ /* The device is already bound to pci-stub */
+ result = 0;
+ goto cleanup;
+ }
+ reprobe = 1;
+ }
+
+ /* Add the PCI device ID to the stub's dynamic ID table;
+ * this is needed to allow us to bind the device to the stub.
+ * Note: if the device is not currently bound to any driver,
+ * stub will immediately be bound to the device. Also, note
+ * that if a new device with this ID is hotplugged, or if a probe
+ * is triggered for such a device, it will also be immediately
+ * bound by the stub.
+ */
+ if (pciDriverFile(&path, driver, "new_id") < 0) {
+ goto cleanup;
+ }
+
+ if (virFileWriteStr(path, dev->id, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to add PCI device ID '%s' to
%s"),
+ dev->id, driver);
+ goto cleanup;
+ }
+
+ /* check whether the device is bound to pci-stub when we write dev->id to
+ * new_id.
+ */
+ if (pciDriverDir(&drvdir, driver) < 0 ||
+ pciDeviceFile(&path, dev->name, "driver") < 0) {
+ goto remove_id;
+ }
+
+ if (virFileLinkPointsTo(path, drvdir)) {
+ dev->unbind_from_stub = 1;
+ dev->remove_slot = 1;
+ goto remove_id;
+ }
+
+ /* If the device is already bound to a driver, unbind it.
+ * Note, this will have rather unpleasant side effects if this
+ * PCI device happens to be IDE controller for the disk hosting
+ * your root filesystem.
+ */
+ if (pciDeviceFile(&path, dev->name, "driver/unbind") < 0) {
+ goto cleanup;
+ }
+
+ if (virFileExists(path)) {
+ if (virFileWriteStr(path, dev->name, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to unbind PCI device
'%s'"),
+ dev->name);
+ goto cleanup;
+ }
+ dev->reprobe = reprobe;
+ }
+
+ /* If the device isn't already bound to pci-stub, try binding it now.
+ */
+ if (pciDriverDir(&drvdir, driver) < 0 ||
+ pciDeviceFile(&path, dev->name, "driver") < 0) {
+ goto remove_id;
+ }
+
+ if (!virFileLinkPointsTo(path, drvdir)) {
+ /* Xen's pciback.ko wants you to use new_slot first */
+ if (pciDriverFile(&path, driver, "new_slot") < 0) {
+ goto remove_id;
+ }
+
+ if (virFileExists(path) && virFileWriteStr(path, dev->name, 0) < 0)
{
+ virReportSystemError(errno,
+ _("Failed to add slot for PCI device '%s'
to %s"),
+ dev->name, driver);
+ goto remove_id;
+ }
+ dev->remove_slot = 1;
+
+ if (pciDriverFile(&path, driver, "bind") < 0) {
+ goto remove_id;
+ }
+
+ if (virFileWriteStr(path, dev->name, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to bind PCI device '%s' to
%s"),
+ dev->name, driver);
+ goto remove_id;
+ }
+ dev->unbind_from_stub = 1;
+ }
+
+remove_id:
+ /* If 'remove_id' exists, remove the device id from pci-stub's dynamic
+ * ID table so that 'drivers_probe' works below.
+ */
+ if (pciDriverFile(&path, driver, "remove_id") < 0) {
+ /* We do not remove PCI ID from pci-stub, and we cannot reprobe it */
+ if (dev->reprobe) {
+ VIR_WARN("Could not remove PCI ID '%s' from %s, and the device
"
+ "cannot be probed again.", dev->id, driver);
+ }
+ dev->reprobe = 0;
+ goto cleanup;
+ }
+
+ if (virFileExists(path) && virFileWriteStr(path, dev->id, 0) < 0) {
+ virReportSystemError(errno,
+ _("Failed to remove PCI ID '%s' from
%s"),
+ dev->id, driver);
+
+ /* remove PCI ID from pci-stub failed, and we cannot reprobe it */
+ if (dev->reprobe) {
+ VIR_WARN("Failed to remove PCI ID '%s' from %s, and the device
"
+ "cannot be probed again.", dev->id, driver);
+ }
+ dev->reprobe = 0;
+ goto cleanup;
+ }
+
+ result = 0;
+
+cleanup:
+ VIR_FREE(drvdir);
+ VIR_FREE(path);
+
+ if (result < 0) {
+ pciUnbindDeviceFromStub(dev, driver);
+ }
+
+ return result;
+}
+
+int
+pciDettachDevice(pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs)
+{
+ const char *driver = pciFindStubDriver();
+ if (!driver) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot find any PCI stub module"));
+ return -1;
+ }
+
+ if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Not detaching active device %s"), dev->name);
+ return -1;
+ }
+
+ if (pciBindDeviceToStub(dev, driver) < 0)
+ return -1;
+
+ /* Add the dev into list inactiveDevs */
+ if (inactiveDevs && !pciDeviceListFind(inactiveDevs, dev)) {
+ if (pciDeviceListAdd(inactiveDevs, dev) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+pciReAttachDevice(pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs)
+{
+ const char *driver = pciFindStubDriver();
+ if (!driver) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot find any PCI stub module"));
+ return -1;
+ }
+
+ if (activeDevs && pciDeviceListFind(activeDevs, dev)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Not reattaching active device %s"), dev->name);
+ return -1;
+ }
+
+ if (pciUnbindDeviceFromStub(dev, driver) < 0)
+ return -1;
+
+ /* Steal the dev from list inactiveDevs */
+ if (inactiveDevs)
+ pciDeviceListSteal(inactiveDevs, dev);
+
+ return 0;
+}
+
+/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on
+ * the host when doing device passthrough. This can lead to a race
+ * condition where the hypervisor is still cleaning up the device while
+ * libvirt is trying to re-attach it to the host device driver. To avoid
+ * this situation, we look through /proc/iomem, and if the hypervisor is
+ * still holding onto the bar (denoted by the string in the matcher variable),
+ * then we can wait around a bit for that to clear up.
+ *
+ * A typical /proc/iomem looks like this (snipped for brevity):
+ * 00010000-0008efff : System RAM
+ * 0008f000-0008ffff : reserved
+ * ...
+ * 00100000-cc9fcfff : System RAM
+ * 00200000-00483d3b : Kernel code
+ * 00483d3c-005c88df : Kernel data
+ * cc9fd000-ccc71fff : ACPI Non-volatile Storage
+ * ...
+ * d0200000-d02fffff : PCI Bus #05
+ * d0200000-d021ffff : 0000:05:00.0
+ * d0200000-d021ffff : e1000e
+ * d0220000-d023ffff : 0000:05:00.0
+ * d0220000-d023ffff : e1000e
+ * ...
+ * f0000000-f0003fff : 0000:00:1b.0
+ * f0000000-f0003fff : kvm_assigned_device
+ *
+ * Returns 0 if we are clear to continue, and 1 if the hypervisor is still
+ * holding onto the resource.
+ */
+int
+pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher)
+{
+ FILE *fp;
+ char line[160];
+ char *tmp;
+ unsigned long long start, end;
+ unsigned int domain, bus, slot, function;
+ int in_matching_device;
+ int ret;
+ size_t match_depth;
+
+ fp = fopen("/proc/iomem", "r");
+ if (!fp) {
+ /* If we failed to open iomem, we just basically ignore the error. The
+ * unbind might succeed anyway, and besides, it's very likely we have
+ * no way to report the error
+ */
+ VIR_DEBUG("Failed to open /proc/iomem, trying to continue anyway");
+ return 0;
+ }
+
+ ret = 0;
+ in_matching_device = 0;
+ match_depth = 0;
+ while (fgets(line, sizeof(line), fp) != 0) {
+ /* the logic here is a bit confusing. For each line, we look to
+ * see if it matches the domain:bus:slot.function we were given.
+ * If this line matches the DBSF, then any subsequent lines indented
+ * by 2 spaces are the PCI regions for this device. It's also
+ * possible that none of the PCI regions are currently mapped, in
+ * which case we have no indented regions. This code handles all
+ * of these situations
+ */
+ if (in_matching_device && (strspn(line, " ") == (match_depth +
2))) {
+ /* expected format: <start>-<end> : <suffix> */
+ if (/* start */
+ virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp !=
'-' ||
+ /* end */
+ virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
+ (tmp = STRSKIP(tmp, " : ")) == NULL)
+ continue;
+
+ if (STRPREFIX(tmp, matcher)) {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ in_matching_device = 0;
+
+ /* expected format: <start>-<end> :
<domain>:<bus>:<slot>.<function> */
+ if (/* start */
+ virStrToLong_ull(line, &tmp, 16, &start) < 0 || *tmp !=
'-' ||
+ /* end */
+ virStrToLong_ull(tmp + 1, &tmp, 16, &end) < 0 ||
+ (tmp = STRSKIP(tmp, " : ")) == NULL ||
+ /* domain */
+ virStrToLong_ui(tmp, &tmp, 16, &domain) < 0 || *tmp !=
':' ||
+ /* bus */
+ virStrToLong_ui(tmp + 1, &tmp, 16, &bus) < 0 || *tmp !=
':' ||
+ /* slot */
+ virStrToLong_ui(tmp + 1, &tmp, 16, &slot) < 0 || *tmp !=
'.' ||
+ /* function */
+ virStrToLong_ui(tmp + 1, &tmp, 16, &function) < 0 || *tmp !=
'\n')
+ continue;
+
+ if (domain != dev->domain || bus != dev->bus || slot != dev->slot
||
+ function != dev->function)
+ continue;
+ in_matching_device = 1;
+ match_depth = strspn(line, " ");
+ }
+ }
+
+ VIR_FORCE_FCLOSE(fp);
+
+ return ret;
+}
+
+static char *
+pciReadDeviceID(pciDevice *dev, const char *id_name)
+{
+ char *path = NULL;
+ char *id_str;
+
+ if (pciDeviceFile(&path, dev->name, id_name) < 0) {
+ return NULL;
+ }
+
+ /* ID string is '0xNNNN\n' ... i.e. 7 bytes */
+ if (virFileReadAll(path, 7, &id_str) < 0) {
+ VIR_FREE(path);
+ return NULL;
+ }
+
+ VIR_FREE(path);
+
+ /* Check for 0x suffix */
+ if (id_str[0] != '0' || id_str[1] != 'x') {
+ VIR_FREE(id_str);
+ return NULL;
+ }
+
+ /* Chop off the newline; we know the string is 7 bytes */
+ id_str[6] = '\0';
+
+ return id_str;
+}
+
+int
+pciGetDeviceAddrString(unsigned domain,
+ unsigned bus,
+ unsigned slot,
+ unsigned function,
+ char **pciConfigAddr)
+{
+ pciDevice *dev = NULL;
+ int ret = -1;
+
+ dev = pciGetDevice(domain, bus, slot, function);
+ if (dev != NULL) {
+ if ((*pciConfigAddr = strdup(dev->name)) == NULL) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ ret = 0;
+ }
+
+cleanup:
+ pciFreeDevice(dev);
+ return ret;
+}
+
+pciDevice *
+pciGetDevice(unsigned domain,
+ unsigned bus,
+ unsigned slot,
+ unsigned function)
+{
+ pciDevice *dev;
+ char *vendor = NULL;
+ char *product = NULL;
+
+ if (VIR_ALLOC(dev) < 0) {
+ virReportOOMError();
+ return NULL;
+ }
+
+ dev->domain = domain;
+ dev->bus = bus;
+ dev->slot = slot;
+ dev->function = function;
+
+ if (snprintf(dev->name, sizeof(dev->name), "%.4x:%.2x:%.2x.%.1x",
+ dev->domain, dev->bus, dev->slot,
+ dev->function) >= sizeof(dev->name)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("dev->name buffer overflow: %.4x:%.2x:%.2x.%.1x"),
+ dev->domain, dev->bus, dev->slot, dev->function);
+ goto error;
+ }
+ if (virAsprintf(&dev->path, PCI_SYSFS "devices/%s/config",
+ dev->name) < 0) {
+ virReportOOMError();
+ goto error;
+ }
+
+ if (access(dev->path, F_OK) != 0) {
+ virReportSystemError(errno,
+ _("Device %s not found: could not access %s"),
+ dev->name, dev->path);
+ goto error;
+ }
+
+ vendor = pciReadDeviceID(dev, "vendor");
+ product = pciReadDeviceID(dev, "device");
+
+ if (!vendor || !product) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to read product/vendor ID for %s"),
+ dev->name);
+ goto error;
+ }
+
+ /* strings contain '0x' prefix */
+ if (snprintf(dev->id, sizeof(dev->id), "%s %s", &vendor[2],
+ &product[2]) >= sizeof(dev->id)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("dev->id buffer overflow: %s %s"),
+ &vendor[2], &product[2]);
+ goto error;
+ }
+
+ VIR_DEBUG("%s %s: initialized", dev->id, dev->name);
+
+cleanup:
+ VIR_FREE(product);
+ VIR_FREE(vendor);
+ return dev;
+
+error:
+ pciFreeDevice(dev);
+ dev = NULL;
+ goto cleanup;
+}
+
+void
+pciFreeDevice(pciDevice *dev)
+{
+ if (!dev)
+ return;
+ VIR_DEBUG("%s %s: freeing", dev->id, dev->name);
+ VIR_FREE(dev->path);
+ VIR_FREE(dev);
+}
+
+const char *
+pciDeviceGetName(pciDevice *dev)
+{
+ return dev->name;
+}
+
+void pciDeviceSetManaged(pciDevice *dev, unsigned managed)
+{
+ dev->managed = !!managed;
+}
+
+unsigned pciDeviceGetManaged(pciDevice *dev)
+{
+ return dev->managed;
+}
+
+unsigned
+pciDeviceGetUnbindFromStub(pciDevice *dev)
+{
+ return dev->unbind_from_stub;
+}
+
+void
+pciDeviceSetUnbindFromStub(pciDevice *dev, unsigned unbind)
+{
+ dev->unbind_from_stub = !!unbind;
+}
+
+unsigned
+pciDeviceGetRemoveSlot(pciDevice *dev)
+{
+ return dev->remove_slot;
+}
+
+void
+pciDeviceSetRemoveSlot(pciDevice *dev, unsigned remove_slot)
+{
+ dev->remove_slot = !!remove_slot;
+}
+
+unsigned
+pciDeviceGetReprobe(pciDevice *dev)
+{
+ return dev->reprobe;
+}
+
+void
+pciDeviceSetReprobe(pciDevice *dev, unsigned reprobe)
+{
+ dev->reprobe = !!reprobe;
+}
+
+void
+pciDeviceSetUsedBy(pciDevice *dev, const char *name)
+{
+ dev->used_by = name;
+}
+
+const char *
+pciDeviceGetUsedBy(pciDevice *dev)
+{
+ return dev->used_by;
+}
+
+void pciDeviceReAttachInit(pciDevice *pci)
+{
+ pci->unbind_from_stub = 1;
+ pci->remove_slot = 1;
+ pci->reprobe = 1;
+}
+
+
+pciDeviceList *
+pciDeviceListNew(void)
+{
+ pciDeviceList *list;
+
+ if (VIR_ALLOC(list) < 0) {
+ virReportOOMError();
+ return NULL;
+ }
+
+ return list;
+}
+
+void
+pciDeviceListFree(pciDeviceList *list)
+{
+ int i;
+
+ if (!list)
+ return;
+
+ for (i = 0; i < list->count; i++) {
+ pciFreeDevice(list->devs[i]);
+ list->devs[i] = NULL;
+ }
+
+ list->count = 0;
+ VIR_FREE(list->devs);
+ VIR_FREE(list);
+}
+
+int
+pciDeviceListAdd(pciDeviceList *list,
+ pciDevice *dev)
+{
+ if (pciDeviceListFind(list, dev)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Device %s is already in use"), dev->name);
+ return -1;
+ }
+
+ if (VIR_REALLOC_N(list->devs, list->count+1) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ list->devs[list->count++] = dev;
+
+ return 0;
+}
+
+pciDevice *
+pciDeviceListGet(pciDeviceList *list,
+ int idx)
+{
+ if (idx >= list->count)
+ return NULL;
+ if (idx < 0)
+ return NULL;
+
+ return list->devs[idx];
+}
+
+int
+pciDeviceListCount(pciDeviceList *list)
+{
+ return list->count;
+}
+
+pciDevice *
+pciDeviceListStealIndex(pciDeviceList *list,
+ int idx)
+{
+ pciDevice *ret;
+
+ if (idx < 0 || idx >= list->count)
+ return NULL;
+
+ ret = list->devs[idx];
+
+ if (idx != --list->count) {
+ memmove(&list->devs[idx],
+ &list->devs[idx + 1],
+ sizeof(*list->devs) * (list->count - idx));
+ }
+
+ if (VIR_REALLOC_N(list->devs, list->count) < 0) {
+ ; /* not fatal */
+ }
+
+ return ret;
+}
+
+pciDevice *
+pciDeviceListSteal(pciDeviceList *list,
+ pciDevice *dev)
+{
+ return pciDeviceListStealIndex(list, pciDeviceListFindIndex(list, dev));
+}
+
+void
+pciDeviceListDel(pciDeviceList *list,
+ pciDevice *dev)
+{
+ pciDevice *ret = pciDeviceListSteal(list, dev);
+ if (ret)
+ pciFreeDevice(ret);
+}
+
+int
+pciDeviceListFindIndex(pciDeviceList *list, pciDevice *dev)
+{
+ int i;
+
+ for (i = 0; i < list->count; i++)
+ if (list->devs[i]->domain == dev->domain &&
+ list->devs[i]->bus == dev->bus &&
+ list->devs[i]->slot == dev->slot &&
+ list->devs[i]->function == dev->function)
+ return i;
+ return -1;
+}
+
+pciDevice *
+pciDeviceListFind(pciDeviceList *list, pciDevice *dev)
+{
+ int i;
+
+ if ((i = pciDeviceListFindIndex(list, dev)) >= 0)
+ return list->devs[i];
+ else
+ return NULL;
+}
+
+
+int pciDeviceFileIterate(pciDevice *dev,
+ pciDeviceFileActor actor,
+ void *opaque)
+{
+ char *pcidir = NULL;
+ char *file = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ struct dirent *ent;
+
+ if (virAsprintf(&pcidir, "/sys/bus/pci/devices/%04x:%02x:%02x.%x",
+ dev->domain, dev->bus, dev->slot, dev->function) < 0)
{
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (!(dir = opendir(pcidir))) {
+ virReportSystemError(errno,
+ _("cannot open %s"), pcidir);
+ goto cleanup;
+ }
+
+ while ((ent = readdir(dir)) != NULL) {
+ /* Device assignment requires:
+ * $PCIDIR/config, $PCIDIR/resource, $PCIDIR/resourceNNN,
+ * $PCIDIR/rom, $PCIDIR/reset
+ */
+ if (STREQ(ent->d_name, "config") ||
+ STRPREFIX(ent->d_name, "resource") ||
+ STREQ(ent->d_name, "rom") ||
+ STREQ(ent->d_name, "reset")) {
+ if (virAsprintf(&file, "%s/%s", pcidir, ent->d_name) < 0)
{
+ virReportOOMError();
+ goto cleanup;
+ }
+ if ((actor)(dev, file, opaque) < 0)
+ goto cleanup;
+
+ VIR_FREE(file);
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ if (dir)
+ closedir(dir);
+ VIR_FREE(file);
+ VIR_FREE(pcidir);
+ return ret;
+}
+
+static int
+pciDeviceDownstreamLacksACS(pciDevice *dev)
+{
+ uint16_t flags;
+ uint16_t ctrl;
+ unsigned int pos;
+ int fd;
+ int ret = 0;
+
+ if ((fd = pciConfigOpen(dev, true)) < 0)
+ return -1;
+
+ if (pciInitDevice(dev, fd) < 0) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ pos = dev->pcie_cap_pos;
+ if (!pos || pciRead16(dev, fd, PCI_CLASS_DEVICE) != PCI_CLASS_BRIDGE_PCI)
+ goto cleanup;
+
+ flags = pciRead16(dev, fd, pos + PCI_EXP_FLAGS);
+ if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM)
+ goto cleanup;
+
+ pos = pciFindExtendedCapabilityOffset(dev, fd, PCI_EXT_CAP_ID_ACS);
+ if (!pos) {
+ VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id,
dev->name);
+ ret = 1;
+ goto cleanup;
+ }
+
+ ctrl = pciRead16(dev, fd, pos + PCI_EXT_ACS_CTRL);
+ if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) {
+ VIR_DEBUG("%s %s: downstream port has ACS disabled",
+ dev->id, dev->name);
+ ret = 1;
+ goto cleanup;
+ }
+
+cleanup:
+ pciConfigClose(dev, fd);
+ return ret;
+}
+
+static int
+pciDeviceIsBehindSwitchLackingACS(pciDevice *dev)
+{
+ pciDevice *parent;
+
+ if (pciGetParentDevice(dev, &parent) < 0)
+ return -1;
+ if (!parent) {
+ /* if we have no parent, and this is the root bus, ACS doesn't come
+ * into play since devices on the root bus can't P2P without going
+ * through the root IOMMU.
+ */
+ if (dev->bus == 0)
+ return 0;
+ else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to find parent device for %s"),
+ dev->name);
+ return -1;
+ }
+ }
+
+ /* XXX we should rather fail when we can't find device's parent and
+ * stop the loop when we get to root instead of just stopping when no
+ * parent can be found
+ */
+ do {
+ pciDevice *tmp;
+ int acs;
+ int ret;
+
+ acs = pciDeviceDownstreamLacksACS(parent);
+
+ if (acs) {
+ pciFreeDevice(parent);
+ if (acs < 0)
+ return -1;
+ else
+ return 1;
+ }
+
+ tmp = parent;
+ ret = pciGetParentDevice(parent, &parent);
+ pciFreeDevice(tmp);
+ if (ret < 0)
+ return -1;
+ } while (parent);
+
+ return 0;
+}
+
+int pciDeviceIsAssignable(pciDevice *dev,
+ int strict_acs_check)
+{
+ int ret;
+
+ /* XXX This could be a great place to actually check that a non-managed
+ * device isn't in use, e.g. by checking that device is either un-bound
+ * or bound to a stub driver.
+ */
+
+ ret = pciDeviceIsBehindSwitchLackingACS(dev);
+ if (ret < 0)
+ return 0;
+
+ if (ret) {
+ if (!strict_acs_check) {
+ VIR_DEBUG("%s %s: strict ACS check disabled; device assignment
allowed",
+ dev->id, dev->name);
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Device %s is behind a switch lacking ACS and "
+ "cannot be assigned"),
+ dev->name);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+#ifdef __linux__
+
+/*
+ * returns true if equal
+ */
+static bool
+pciConfigAddressEqual(struct pci_config_address *bdf1,
+ struct pci_config_address *bdf2)
+{
+ return ((bdf1->domain == bdf2->domain) &&
+ (bdf1->bus == bdf2->bus) &&
+ (bdf1->slot == bdf2->slot) &&
+ (bdf1->function == bdf2->function));
+}
+
+static int
+logStrToLong_ui(char const *s,
+ char **end_ptr,
+ int base,
+ unsigned int *result)
+{
+ int ret = 0;
+
+ ret = virStrToLong_ui(s, end_ptr, base, result);
+ if (ret != 0) {
+ VIR_ERROR(_("Failed to convert '%s' to unsigned int"), s);
+ } else {
+ VIR_DEBUG("Converted '%s' to unsigned int %u", s, *result);
+ }
+
+ return ret;
+}
+
+static int
+pciParsePciConfigAddress(char *address,
+ struct pci_config_address *bdf)
+{
+ char *p = NULL;
+ int ret = -1;
+
+ if ((address == NULL) || (logStrToLong_ui(address, &p, 16,
+ &bdf->domain) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
+ &bdf->bus) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
+ &bdf->slot) == -1)) {
+ goto out;
+ }
+
+ if ((p == NULL) || (logStrToLong_ui(p+1, &p, 16,
+ &bdf->function) == -1)) {
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+pciGetPciConfigAddressFromSysfsDeviceLink(const char *device_link,
+ struct pci_config_address **bdf)
+{
+ char *config_address = NULL;
+ char *device_path = NULL;
+ char errbuf[64];
+ int ret = -1;
+
+ VIR_DEBUG("Attempting to resolve device path from device link
'%s'",
+ device_link);
+
+ if (!virFileExists(device_link)) {
+ VIR_DEBUG("sysfs_path '%s' does not exist", device_link);
+ return ret;
+ }
+
+ device_path = canonicalize_file_name(device_link);
+ if (device_path == NULL) {
+ memset(errbuf, '\0', sizeof(errbuf));
+ virReportSystemError(errno,
+ _("Failed to resolve device link '%s'"),
+ device_link);
+ return ret;
+ }
+
+ config_address = basename(device_path);
+ if (VIR_ALLOC(*bdf) != 0) {
+ virReportOOMError();
+ goto out;
+ }
+
+ if (pciParsePciConfigAddress(config_address, *bdf) != 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to parse PCI config address '%s'"),
+ config_address);
+ VIR_FREE(*bdf);
+ goto out;
+ }
+
+ VIR_DEBUG("pci_config_address %.4x:%.2x:%.2x.%.1x",
+ (*bdf)->domain,
+ (*bdf)->bus,
+ (*bdf)->slot,
+ (*bdf)->function);
+
+ ret = 0;
+
+out:
+ VIR_FREE(device_path);
+
+ return ret;
+}
+
+/*
+ * Returns Physical function given a virtual function
+ */
+int
+pciGetPhysicalFunction(const char *vf_sysfs_path,
+ struct pci_config_address **physical_function)
+{
+ int ret = -1;
+ char *device_link = NULL;
+
+ VIR_DEBUG("Attempting to get SR IOV physical function for device "
+ "with sysfs path '%s'", vf_sysfs_path);
+
+ if (virBuildPath(&device_link, vf_sysfs_path, "physfn") == -1) {
+ virReportOOMError();
+ return ret;
+ } else {
+ ret = pciGetPciConfigAddressFromSysfsDeviceLink(device_link,
+ physical_function);
+ }
+
+ VIR_FREE(device_link);
+
+ return ret;
+}
+
+/*
+ * Returns virtual functions of a physical function
+ */
+int
+pciGetVirtualFunctions(const char *sysfs_path,
+ struct pci_config_address ***virtual_functions,
+ unsigned int *num_virtual_functions)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char *device_link = NULL;
+ char errbuf[64];
+
+ VIR_DEBUG("Attempting to get SR IOV virtual functions for device"
+ "with sysfs path '%s'", sysfs_path);
+
+ dir = opendir(sysfs_path);
+ if (dir == NULL) {
+ memset(errbuf, '\0', sizeof(errbuf));
+ virReportSystemError(errno,
+ _("Failed to open dir '%s'"),
+ sysfs_path);
+ return ret;
+ }
+
+ *virtual_functions = NULL;
+ *num_virtual_functions = 0;
+ while ((entry = readdir(dir))) {
+ if (STRPREFIX(entry->d_name, "virtfn")) {
+
+ if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) {
+ virReportOOMError();
+ goto out;
+ }
+
+ VIR_DEBUG("Number of virtual functions: %d",
+ *num_virtual_functions);
+ if (VIR_REALLOC_N(*virtual_functions,
+ (*num_virtual_functions) + 1) != 0) {
+ virReportOOMError();
+ VIR_FREE(device_link);
+ goto out;
+ }
+
+ if (pciGetPciConfigAddressFromSysfsDeviceLink(device_link,
+ &((*virtual_functions)[*num_virtual_functions])) !=
+ SRIOV_FOUND) {
+ /* We should not get back SRIOV_NOT_FOUND in this
+ * case, so if we do, it's an error. */
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to get SR IOV function from device "
+ "link '%s'"), device_link);
+ VIR_FREE(device_link);
+ goto out;
+ } else {
+ (*num_virtual_functions)++;
+ }
+ VIR_FREE(device_link);
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
+
+/*
+ * Returns 1 if vf device is a virtual function, 0 if not, -1 on error
+ */
+int
+pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link)
+{
+ char *vf_sysfs_physfn_link = NULL;
+ int ret = -1;
+
+ if (virAsprintf(&vf_sysfs_physfn_link, "%s/physfn",
+ vf_sysfs_device_link) < 0) {
+ virReportOOMError();
+ return ret;
+ }
+
+ ret = virFileExists(vf_sysfs_physfn_link);
+
+ VIR_FREE(vf_sysfs_physfn_link);
+
+ return ret;
+}
+
+/*
+ * Returns the sriov virtual function index of vf given its pf
+ */
+int
+pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
+ const char *vf_sysfs_device_link,
+ int *vf_index)
+{
+ int ret = -1, i;
+ unsigned int num_virt_fns = 0;
+ struct pci_config_address *vf_bdf = NULL;
+ struct pci_config_address **virt_fns = NULL;
+
+ if (pciGetPciConfigAddressFromSysfsDeviceLink(vf_sysfs_device_link,
+ &vf_bdf) < 0)
+ return ret;
+
+ if (pciGetVirtualFunctions(pf_sysfs_device_link, &virt_fns,
+ &num_virt_fns) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Error getting physical function's '%s' "
+ "virtual_functions"), pf_sysfs_device_link);
+ goto out;
+ }
+
+ for (i = 0; i < num_virt_fns; i++) {
+ if (pciConfigAddressEqual(vf_bdf, virt_fns[i])) {
+ *vf_index = i;
+ ret = 0;
+ break;
+ }
+ }
+
+out:
+
+ /* free virtual functions */
+ for (i = 0; i < num_virt_fns; i++)
+ VIR_FREE(virt_fns[i]);
+
+ VIR_FREE(virt_fns);
+ VIR_FREE(vf_bdf);
+
+ return ret;
+}
+
+/*
+ * Returns a path to the PCI sysfs file given the BDF of the PCI function
+ */
+
+int
+pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link)
+{
+ if (virAsprintf(pci_sysfs_device_link, PCI_SYSFS "devices/%s",
+ pciDeviceName) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+pciConfigAddressToSysfsFile(struct pci_config_address *dev,
+ char **pci_sysfs_device_link)
+{
+ if (virAsprintf(pci_sysfs_device_link,
+ PCI_SYSFS "devices/%04x:%02x:%02x.%x", dev->domain,
+ dev->bus, dev->slot, dev->function) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Returns the network device name of a pci device
+ */
+int
+pciDeviceNetName(char *device_link_sysfs_path, char **netname)
+{
+ char *pcidev_sysfs_net_path = NULL;
+ int ret = -1;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+
+ if (virBuildPath(&pcidev_sysfs_net_path, device_link_sysfs_path,
+ "net") == -1) {
+ virReportOOMError();
+ return -1;
+ }
+
+ dir = opendir(pcidev_sysfs_net_path);
+ if (dir == NULL)
+ goto out;
+
+ while ((entry = readdir(dir))) {
+ if (STREQ(entry->d_name, ".") ||
+ STREQ(entry->d_name, ".."))
+ continue;
+
+ /* Assume a single directory entry */
+ *netname = strdup(entry->d_name);
+ if (!*netname)
+ virReportOOMError();
+ else
+ ret = 0;
+ break;
+ }
+
+ closedir(dir);
+
+out:
+ VIR_FREE(pcidev_sysfs_net_path);
+
+ return ret;
+}
+
+int
+pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
+ char **pfname, int *vf_index)
+{
+ struct pci_config_address *pf_config_address = NULL;
+ char *pf_sysfs_device_path = NULL;
+ int ret = -1;
+
+ if (pciGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0)
+ return ret;
+
+ if (pciConfigAddressToSysfsFile(pf_config_address,
+ &pf_sysfs_device_path) < 0) {
+
+ VIR_FREE(pf_config_address);
+ return ret;
+ }
+
+ if (pciGetVirtualFunctionIndex(pf_sysfs_device_path, vf_sysfs_device_path,
+ vf_index) < 0)
+ goto cleanup;
+
+ ret = pciDeviceNetName(pf_sysfs_device_path, pfname);
+
+cleanup:
+ VIR_FREE(pf_config_address);
+ VIR_FREE(pf_sysfs_device_path);
+
+ return ret;
+}
+
+#else
+static const char *unsupported = N_("not supported on non-linux platforms");
+
+int
+pciGetPhysicalFunction(const char *vf_sysfs_path ATTRIBUTE_UNUSED,
+ struct pci_config_address **physical_function ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+
+int
+pciGetVirtualFunctions(const char *sysfs_path ATTRIBUTE_UNUSED,
+ struct pci_config_address ***virtual_functions ATTRIBUTE_UNUSED,
+ unsigned int *num_virtual_functions ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+
+int
+pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+
+int
+pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link ATTRIBUTE_UNUSED,
+ const char *vf_sysfs_device_link ATTRIBUTE_UNUSED,
+ int *vf_index ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+
+}
+
+int
+pciConfigAddressToSysfsFile(struct pci_config_address *dev ATTRIBUTE_UNUSED,
+ char **pci_sysfs_device_link ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+
+int
+pciDeviceNetName(char *device_link_sysfs_path ATTRIBUTE_UNUSED,
+ char **netname ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+
+int
+pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
+ char **pfname ATTRIBUTE_UNUSED,
+ int *vf_index ATTRIBUTE_UNUSED)
+{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
+ return -1;
+}
+#endif /* __linux__ */
diff --git a/src/util/virpci.h b/src/util/virpci.h
new file mode 100644
index 0000000..814c24e
--- /dev/null
+++ b/src/util/virpci.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2009, 2011-2012 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ * Mark McLoughlin <markmc(a)redhat.com>
+ */
+
+#ifndef __VIR_PCI_H__
+# define __VIR_PCI_H__
+
+# include "internal.h"
+
+typedef struct _pciDevice pciDevice;
+typedef struct _pciDeviceList pciDeviceList;
+
+struct pci_config_address {
+ unsigned int domain;
+ unsigned int bus;
+ unsigned int slot;
+ unsigned int function;
+};
+
+pciDevice *pciGetDevice (unsigned domain,
+ unsigned bus,
+ unsigned slot,
+ unsigned function);
+void pciFreeDevice (pciDevice *dev);
+const char *pciDeviceGetName (pciDevice *dev);
+int pciDettachDevice (pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs);
+int pciReAttachDevice (pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs);
+int pciResetDevice (pciDevice *dev,
+ pciDeviceList *activeDevs,
+ pciDeviceList *inactiveDevs);
+void pciDeviceSetManaged(pciDevice *dev,
+ unsigned managed);
+unsigned pciDeviceGetManaged(pciDevice *dev);
+void pciDeviceSetUsedBy(pciDevice *dev,
+ const char *used_by);
+const char *pciDeviceGetUsedBy(pciDevice *dev);
+unsigned pciDeviceGetUnbindFromStub(pciDevice *dev);
+void pciDeviceSetUnbindFromStub(pciDevice *dev,
+ unsigned unbind);
+unsigned pciDeviceGetRemoveSlot(pciDevice *dev);
+void pciDeviceSetRemoveSlot(pciDevice *dev,
+ unsigned remove_slot);
+unsigned pciDeviceGetReprobe(pciDevice *dev);
+void pciDeviceSetReprobe(pciDevice *dev,
+ unsigned reprobe);
+void pciDeviceReAttachInit(pciDevice *dev);
+
+pciDeviceList *pciDeviceListNew (void);
+void pciDeviceListFree (pciDeviceList *list);
+int pciDeviceListAdd (pciDeviceList *list,
+ pciDevice *dev);
+pciDevice * pciDeviceListGet (pciDeviceList *list,
+ int idx);
+int pciDeviceListCount (pciDeviceList *list);
+pciDevice * pciDeviceListSteal (pciDeviceList *list,
+ pciDevice *dev);
+pciDevice * pciDeviceListStealIndex(pciDeviceList *list,
+ int idx);
+void pciDeviceListDel (pciDeviceList *list,
+ pciDevice *dev);
+pciDevice * pciDeviceListFind (pciDeviceList *list,
+ pciDevice *dev);
+int pciDeviceListFindIndex(pciDeviceList *list,
+ pciDevice *dev);
+
+/*
+ * Callback that will be invoked once for each file
+ * associated with / used for PCI host device access.
+ *
+ * Should return 0 if successfully processed, or
+ * -1 to indicate error and abort iteration
+ */
+typedef int (*pciDeviceFileActor)(pciDevice *dev,
+ const char *path, void *opaque);
+
+int pciDeviceFileIterate(pciDevice *dev,
+ pciDeviceFileActor actor,
+ void *opaque);
+
+int pciDeviceIsAssignable(pciDevice *dev,
+ int strict_acs_check);
+int pciWaitForDeviceCleanup(pciDevice *dev, const char *matcher);
+
+int pciGetPhysicalFunction(const char *sysfs_path,
+ struct pci_config_address **phys_fn);
+
+int pciGetVirtualFunctions(const char *sysfs_path,
+ struct pci_config_address ***virtual_functions,
+ unsigned int *num_virtual_functions);
+
+int pciDeviceIsVirtualFunction(const char *vf_sysfs_device_link);
+
+int pciGetVirtualFunctionIndex(const char *pf_sysfs_device_link,
+ const char *vf_sysfs_device_link,
+ int *vf_index);
+
+int pciConfigAddressToSysfsFile(struct pci_config_address *dev,
+ char **pci_sysfs_device_link);
+
+int pciDeviceNetName(char *device_link_sysfs_path, char **netname);
+
+int pciSysfsFile(char *pciDeviceName, char **pci_sysfs_device_link)
+ ATTRIBUTE_RETURN_CHECK;
+
+int pciGetDeviceAddrString(unsigned domain,
+ unsigned bus,
+ unsigned slot,
+ unsigned function,
+ char **pciConfigAddr)
+ ATTRIBUTE_NONNULL(5) ATTRIBUTE_RETURN_CHECK;
+
+int pciDeviceGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
+ char **pfname, int *vf_index);
+
+#endif /* __VIR_PCI_H__ */
diff --git a/src/xen/xen_driver.c b/src/xen/xen_driver.c
index 94df116..9b2fcf3 100644
--- a/src/xen/xen_driver.c
+++ b/src/xen/xen_driver.c
@@ -57,7 +57,7 @@
#include "util.h"
#include "viralloc.h"
#include "node_device_conf.h"
-#include "pci.h"
+#include "virpci.h"
#include "uuid.h"
#include "fdstream.h"
#include "virfile.h"
--
1.7.11.7