It will sometimes be desirable for a "managed" hostdev that is in a
VFIO group with several other devices to cause *all* of the devices in
the group to be detached/reattached. This patch gives that ability to
virPCIDeviceDetach and virPCIDeviceReattach.
To cause this, you would need to have previously called the new
virPCIDeviceSetAttachGroup() function, which isn't yet called by
anyone. (The "attach_group" setting must be set in the virPCIDevice
object rather than passed as an argument to
virPCIDevice(De|Reat)tach() because there is at least one case where
the necessary information is available when the virPCIDevice is
created, but is *not* available at a later time when the attach
function is called.)
NB: the call to virPCIDeviceReattachInit() was moved from
qemu_driver.c:qemuNodeDeviceReAttach() to
virpci.c:virPCIDeviceReattach() because it must be done for each
device that is being re-attached, and the qemu function only knows
about a single device.
It also appears that moving the call to virPCIDeviceReattachInit()
coincidentally fixes a bug for the xen driver, since no reattach would
ever take place without the setting of the flags done by
virPCIDeviceReattachInit(), and the xen driver's implementation of
xenNodeDeviceReAttach() was never calling it - since it is now called
by virPCIDeviceReattach(), re-attach should work properly for the xen
driver.
---
src/libvirt_private.syms | 2 +
src/qemu/qemu_driver.c | 4 +-
src/qemu/qemu_hostdev.c | 4 +-
src/util/virpci.c | 254 ++++++++++++++++++++++++++++++++++++++++++-----
src/util/virpci.h | 6 +-
src/xen/xen_driver.c | 2 +-
6 files changed, 241 insertions(+), 31 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index dd3a138..9a877ff 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1652,6 +1652,7 @@ virPCIDeviceCopy;
virPCIDeviceDetach;
virPCIDeviceFileIterate;
virPCIDeviceFree;
+virPCIDeviceGetAttachGroup;
virPCIDeviceGetIOMMUGroupDev;
virPCIDeviceGetIOMMUGroupList;
virPCIDeviceGetManaged;
@@ -1676,6 +1677,7 @@ virPCIDeviceNew;
virPCIDeviceReattach;
virPCIDeviceReattachInit;
virPCIDeviceReset;
+virPCIDeviceSetAttachGroup;
virPCIDeviceSetManaged;
virPCIDeviceSetRemoveSlot;
virPCIDeviceSetReprobe;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 20b127c..8f498e6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -10351,10 +10351,8 @@ qemuNodeDeviceReAttach(virNodeDevicePtr dev)
goto out;
}
- virPCIDeviceReattachInit(pci);
-
if (virPCIDeviceReattach(pci, driver->activePciHostdevs,
- driver->inactivePciHostdevs) < 0)
+ driver->inactivePciHostdevs, true) < 0)
goto out;
ret = 0;
diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c
index dfe39c6..1abad59 100644
--- a/src/qemu/qemu_hostdev.c
+++ b/src/qemu/qemu_hostdev.c
@@ -661,7 +661,7 @@ reattachdevs:
/* NB: This doesn't actually re-bind to original driver, just
* unbinds from the stub driver
*/
- virPCIDeviceReattach(dev, driver->activePciHostdevs, NULL);
+ virPCIDeviceReattach(dev, driver->activePciHostdevs, NULL, false);
}
cleanup:
@@ -1042,7 +1042,7 @@ void qemuReattachPciDevice(virPCIDevicePtr dev, virQEMUDriverPtr
driver)
}
if (virPCIDeviceReattach(dev, driver->activePciHostdevs,
- driver->inactivePciHostdevs) < 0) {
+ driver->inactivePciHostdevs, false) < 0) {
virErrorPtr err = virGetLastError();
VIR_ERROR(_("Failed to re-attach PCI device: %s"),
err ? err->message : _("unknown error"));
diff --git a/src/util/virpci.c b/src/util/virpci.c
index cbade1b..80256f9 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -39,6 +39,7 @@
#include "dirname.h"
#include "virlog.h"
#include "viralloc.h"
+#include "virbitmap.h"
#include "vircommand.h"
#include "virerror.h"
#include "virfile.h"
@@ -70,6 +71,7 @@ struct _virPCIDevice {
bool has_pm_reset;
bool managed;
char *stubDriver;
+ bool attach_group; /* true if okay to attach/detach entire group */
/* used by reattach function */
bool unbind_from_stub;
@@ -967,7 +969,7 @@ static const char *virPCIKnownStubs[] = {
};
static int
-virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
+virPCIDeviceUnbindFromStub(virPCIDevicePtr dev, const char *stubDriver)
{
int result = -1;
char *drvdir = NULL;
@@ -985,6 +987,13 @@ virPCIDeviceUnbindFromStub(virPCIDevicePtr dev)
if (!dev->unbind_from_stub)
goto remove_slot;
+ /* if the caller only wants us to unbind if the device is
+ * currently bound to one specific stub driver, that driver name
+ * will be in @stubDriver.
+ */
+ if (stubDriver && STRNEQ(driver, stubDriver))
+ goto remove_slot;
+
/* If the device isn't bound to a known stub, skip the unbind. */
for (stubTest = virPCIKnownStubs; *stubTest != NULL; stubTest++) {
if (STREQ(driver, *stubTest)) {
@@ -1067,9 +1076,19 @@ cleanup:
}
+/* these drivers are acceptable substitutes for vfio-pci
+ * in virPCIDeviceBindToStub(), unless "force" is true.
+ */
+static const char *virPCIVFIOSubstitutes[] = {
+ "pci-stub",
+ "pcieport",
+ NULL
+};
+
static int
virPCIDeviceBindToStub(virPCIDevicePtr dev,
- const char *stubDriverName)
+ const char *stubDriverName,
+ bool force, bool *bound)
{
int result = -1;
int reprobe = false;
@@ -1081,6 +1100,9 @@ virPCIDeviceBindToStub(virPCIDevicePtr dev,
char *path = NULL; /* reused for different purposes */
const char *newDriverName = NULL;
+ /* this will be set if we change the binding */
+ *bound = false;
+
if (virPCIDriverDir(&stubDriverPath, stubDriverName) < 0 ||
virPCIFile(&driverLink, dev->name, "driver") < 0 ||
virPCIDeviceGetDriverPathAndName(dev, &oldDriverPath,
@@ -1088,6 +1110,7 @@ virPCIDeviceBindToStub(virPCIDevicePtr dev,
goto cleanup;
}
+ /* check whether the device is already bound to an acceptable driver */
if (virFileExists(driverLink)) {
if (virFileLinkPointsTo(driverLink, stubDriverPath)) {
/* The device is already bound to the correct driver */
@@ -1097,6 +1120,28 @@ virPCIDeviceBindToStub(virPCIDevicePtr dev,
result = 0;
goto cleanup;
}
+ /* If force == false, this device is just a secondary driver
+ * in the same VFIO group as the primary driver we want bound
+ * to the stub (rather than the primary device that is the
+ * real target of the bind). In that case, it's okay for it to
+ * instead be bound to one of the acceptable "substitute"
+ * drivers (which are deemed by VFIO to be safe for isolation,
+ * and are on an internal "whitelist" in the vfio driver).
+ */
+ if (!force && oldDriverName && STREQ(stubDriverName,
"vfio-pci")) {
+ const char **stubTest;
+ for (stubTest = virPCIVFIOSubstitutes;
+ *stubTest != NULL; stubTest++) {
+ if (STREQ(oldDriverName, *stubTest)) {
+ VIR_DEBUG("Device %s is bound to acceptable "
+ "substitute driver %s",
+ dev->name, *stubTest);
+ newDriverName = *stubTest;
+ result = 0;
+ goto cleanup;
+ }
+ }
+ }
reprobe = true;
}
@@ -1119,6 +1164,8 @@ virPCIDeviceBindToStub(virPCIDevicePtr dev,
goto cleanup;
}
+ *bound = true;
+
/* check whether the device is bound to pci-stub when we write dev->id to
* ${stubDriver}/new_id.
*/
@@ -1221,7 +1268,7 @@ cleanup:
result = VIR_STRDUP(dev->stubDriver, newDriverName);
}
if (result < 0)
- virPCIDeviceUnbindFromStub(dev);
+ virPCIDeviceUnbindFromStub(dev, stubDriverName);
return result;
}
@@ -1249,54 +1296,201 @@ virPCIDeviceDetach(virPCIDevicePtr dev,
virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs)
{
+ /* grouplist contains all devices in the group, even dev itself. */
+ virPCIDeviceList *groupList = NULL;
+ virPCIDevicePtr current, copy = NULL;
+ virBitmapPtr newlyDetached = NULL;
+ bool bound;
+ int ii, ret = -1;
+
if (virPCIProbeStubDriver(dev->stubDriver) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Failed to load PCI stub module %s"),
dev->stubDriver);
- return -1;
+ goto cleanup;
}
if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
+ virReportError(VIR_ERR_OPERATION_INVALID,
_("Not detaching active device %s"), dev->name);
- return -1;
+ goto cleanup;
}
- if (virPCIDeviceBindToStub(dev, dev->stubDriver) < 0)
- return -1;
+ if (dev->attach_group && STREQ(dev->stubDriver, "vfio-pci"))
{
+ /* Get a list of all devices in the same vfio group as this one */
+ if (!(groupList = virPCIDeviceGetIOMMUGroupList(dev)))
+ goto cleanup;
+ } else {
+ /* simple case - just detach this one device */
+ if (!(groupList = virPCIDeviceListNew()) ||
+ !(copy = virPCIDeviceCopy(dev)) ||
+ virPCIDeviceListAdd(groupList, copy) < 0) {
+ goto cleanup;
+ }
+ copy = NULL;
+ }
- /* Add *a copy of* the dev into list inactiveDevs, if
- * it's not already there.
+ if (!(newlyDetached = virBitmapNew(virPCIDeviceListCount(groupList))))
+ goto cleanup;
+
+ /* * groupList contains a copy of all devices that should be
+ * detached (bound to the stub)
+ *
+ * * dev is the one that *must* be detached.
+ *
+ * * detachedDevs is a bitmap that has the bit set for each item
+ * in groupList that has been newly detached (and will need to
+ * be reattach in case of failure)
*/
- if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, dev)) {
- virPCIDevicePtr copy = virPCIDeviceCopy(dev);
- if ((!copy) || virPCIDeviceListAdd(inactiveDevs, copy) < 0)
- return -1;
+ for (ii = 0; (current = virPCIDeviceListGet(groupList, ii)); ii++) {
+ /* If any device other than the "primary" (the one originally
+ * requested) is on the active list (already assigned to a
+ * domain), just skip detach - we can't tell right now if it's
+ * in use by the correct domain, but that will all come out
+ * when we get to the point of doing the assign. (NB: We've
+ * already determined that the primary *isn't* on the active
+ * list)
+ */
+ if (virPCIDeviceListFind(activeDevs, current))
+ continue;
+
+ /* bind to stub (force if this is the original device) */
+ if (virPCIDeviceBindToStub(current, dev->stubDriver,
+ STREQ(current->name, dev->name),
+ &bound) < 0) {
+ goto cleanup;
+ }
+
+ /* remember if we detached it */
+ if (bound)
+ ignore_value(virBitmapSetBit(newlyDetached, ii));
+
+ /* add to inactive */
+ if (inactiveDevs && !virPCIDeviceListFind(inactiveDevs, current)
&&
+ (!(copy = virPCIDeviceCopy(current)) ||
+ virPCIDeviceListAdd(inactiveDevs, copy) < 0)) {
+ goto cleanup;
+ }
+ copy = NULL; /* it's on the list now */
}
- return 0;
+ ret = 0;
+cleanup:
+
+ if (ret < 0) {
+ /* try to cleanup what was halfway done if we fail */
+ if (groupList && newlyDetached) {
+ for (ii = 0; (current = virPCIDeviceListGet(groupList, ii)); ii++) {
+ ignore_value(virBitmapGetBit(newlyDetached, ii, &bound));
+ if (bound)
+ virPCIDeviceUnbindFromStub(current, dev->stubDriver);
+ }
+ }
+ }
+
+ virObjectUnref(groupList);
+ virPCIDeviceFree(copy);
+ virBitmapFree(newlyDetached);
+ return ret;
}
+/* virPCIDeviceReattach:
+ *
+ * Re-attach this device (and all devices in the same group, if
+ * dev->attach_group is true) to its host driver.
+ *
+ * if @force is true, return failure if the given device couldn't be
+ * re-attached due to another device in the group being buse. If
+ * @force is false, ignore this circumstance (i.e., don't re-attach
+ * the device to the host, but return success).
+ *
+ * Return 0 on success, -1 on failure.
+ */
int
virPCIDeviceReattach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
- virPCIDeviceListPtr inactiveDevs)
+ virPCIDeviceListPtr inactiveDevs,
+ bool force)
{
+ virPCIDeviceList *groupList = NULL;
+ virPCIDevicePtr current, copy = NULL;
+ char *drvPath = NULL, *drvName = NULL;
+ int ii, ret = -1;
+
if (activeDevs && virPCIDeviceListFind(activeDevs, dev)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
+ virReportError(VIR_ERR_OPERATION_INVALID,
_("Not reattaching active device %s"), dev->name);
- return -1;
+ goto cleanup;
}
- if (virPCIDeviceUnbindFromStub(dev) < 0)
- return -1;
+ if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
+ goto cleanup;
- /* Steal the dev from list inactiveDevs */
- if (inactiveDevs)
- virPCIDeviceListDel(inactiveDevs, dev);
+ if (dev->attach_group && STREQ_NULLABLE(drvName, "vfio-pci")) {
+ /* Get a list of all devices in the same vfio group as this one */
+ if (!(groupList = virPCIDeviceGetIOMMUGroupList(dev)))
+ goto cleanup;
+ } else {
+ /* simple case - just reattach this one device */
+ if (!(groupList = virPCIDeviceListNew()) ||
+ !(copy = virPCIDeviceCopy(dev)) ||
+ virPCIDeviceListAdd(groupList, copy) < 0) {
+ goto cleanup;
+ }
+ copy = NULL;
- return 0;
+ if (activeDevs) {
+ /* If any device in the list is still in use by a
+ * guest, don't reattach any of them to the host.
+ */
+ for (ii = 0; (current = virPCIDeviceListGet(groupList, ii)); ii++) {
+ if (virPCIDeviceListFind(activeDevs, current)) {
+ if (force) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("Not reattaching device %s "
+ "because it is in the same group "
+ "as %s which is still in use by a "
+ "domain."),
+ dev->name, current->name);
+ } else {
+ /* if force is false, this isn't an error */
+ VIR_DEBUG("Not reattaching device %s "
+ "because it is in the same group "
+ "as %s which is still in use by a domain.",
+ dev->name, current->name);
+ ret = 0;
+ }
+ goto cleanup;
+ }
+ }
+ }
+ }
+
+ /* if we get this far, all devices in this group are free to be
+ * reattached to the host driver
+ */
+ for (ii = 0; (current = virPCIDeviceListGet(groupList, ii)); ii++) {
+ /* reattach to the host *IFF* this device is currently bound
+ * to exactly the same stub driver as the requested device is
+ * bound to. */
+
+ virPCIDeviceReattachInit(current); /* won't reattach w/o these set */
+ if (virPCIDeviceUnbindFromStub(current, drvName) < 0)
+ goto cleanup;
+ /* Remove reattached device from inactiveDevs */
+ if (inactiveDevs)
+ virPCIDeviceListDel(inactiveDevs, current);
+ }
+
+ ret = 0;
+cleanup:
+
+ VIR_FREE(drvPath);
+ VIR_FREE(drvName);
+ virObjectUnref(groupList);
+ virPCIDeviceFree(copy);
+ return ret;
}
/* Certain hypervisors (like qemu/kvm) map the PCI bar(s) on
@@ -1600,6 +1794,18 @@ virPCIDeviceGetStubDriver(virPCIDevicePtr dev)
return dev->stubDriver;
}
+void
+virPCIDeviceSetAttachGroup(virPCIDevicePtr dev, bool allow)
+{
+ dev->attach_group = allow;
+}
+
+bool
+virPCIDeviceGetAttachGroup(virPCIDevicePtr dev)
+{
+ return dev->attach_group;
+}
+
unsigned int
virPCIDeviceGetUnbindFromStub(virPCIDevicePtr dev)
{
diff --git a/src/util/virpci.h b/src/util/virpci.h
index 972f86b..c598456 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -54,7 +54,8 @@ int virPCIDeviceDetach(virPCIDevicePtr dev,
virPCIDeviceListPtr inactiveDevs);
int virPCIDeviceReattach(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
- virPCIDeviceListPtr inactiveDevs);
+ virPCIDeviceListPtr inactiveDevs,
+ bool force);
int virPCIDeviceReset(virPCIDevicePtr dev,
virPCIDeviceListPtr activeDevs,
virPCIDeviceListPtr inactiveDevs);
@@ -65,6 +66,9 @@ unsigned int virPCIDeviceGetManaged(virPCIDevice *dev);
int virPCIDeviceSetStubDriver(virPCIDevicePtr dev,
const char *driver);
const char *virPCIDeviceGetStubDriver(virPCIDevicePtr dev);
+void virPCIDeviceSetAttachGroup(virPCIDevicePtr dev,
+ bool allow);
+bool virPCIDeviceGetAttachGroup(virPCIDevicePtr dev);
void virPCIDeviceSetUsedBy(virPCIDevice *dev,
const char *used_by);
const char *virPCIDeviceGetUsedBy(virPCIDevice *dev);
diff --git a/src/xen/xen_driver.c b/src/xen/xen_driver.c
index 6724a36..c9e4db8 100644
--- a/src/xen/xen_driver.c
+++ b/src/xen/xen_driver.c
@@ -2341,7 +2341,7 @@ xenUnifiedNodeDeviceReAttach(virNodeDevicePtr dev)
goto out;
}
- if (virPCIDeviceReattach(pci, NULL, NULL) < 0)
+ if (virPCIDeviceReattach(pci, NULL, NULL, false) < 0)
goto out;
ret = 0;
--
1.7.11.7