When reattaching VFIO devices to the host, we have to be careful not to
end up in a situation where ownership of the IOMMU group is shared
between the hosts and a domain, as doing so will result in host crashes.
If a PCI device is not safe for reattach, as detected by
virHostdevIsPCIDeviceSafeForReattach(), we mark as inactive and delay
actually reattaching it to the host to a later time when doing so will
not result in shared IOMMU group ownership.
Note that, with this commit, VFIO devices will no longer be reattached
to the host, even when it would be safe to do so. A later commit will
restore the missing functionality.
Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=1272300
---
src/util/virhostdev.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c
index 9174525..0f2916b 100644
--- a/src/util/virhostdev.c
+++ b/src/util/virhostdev.c
@@ -155,6 +155,38 @@ virHostdevIsPCIDeviceSafeForDetach(virPCIDeviceAddressPtr addr,
return -1;
}
+/**
+ * virHostdevIsPCIDeviceSafeForReattach:
+ * @addr: PCI device address
+ * @opaque: user data (virHostdevIsPCIDeviceSafeData*)
+ *
+ * Checks whether a PCI device can be safely reattached to the host.
+ *
+ * It is meant to be called from virPCIDeviceAddressIOMMUGroupIterate() or
+ * virPCIDeviceIOMMUGroupIterate(), and the checks it performs only make
+ * sense when using VFIO device assignment.
+ *
+ * Returns: 0 if the device can be safely reattached to the host, <0 otherwise
+ */
+static int
+virHostdevIsPCIDeviceSafeForReattach(virPCIDeviceAddressPtr addr,
+ void *opaque)
+{
+ virHostdevIsPCIDeviceSafeData *data = opaque;
+
+ /* Active devices are not safe: IOMMU groups can't be shared between
+ * the host and its domains */
+ if (data->activeDevices &&
+ virPCIDeviceListFindByIDs(data->activeDevices,
+ addr->domain, addr->bus,
+ addr->slot, addr->function))
+ return -1;
+
+ /* All other situations are either safe (pending device, inactive device)
+ * or can't happen (device in use by the host) */
+ return 0;
+}
+
static int virHostdevManagerOnceInit(void)
{
if (!(virHostdevManagerClass = virClassNew(virClassForObject(),
@@ -900,6 +932,41 @@ virHostdevReAttachPCIDevices(virHostdevManagerPtr hostdev_mgr,
}
}
+ /* Loop 3: delay handling of devices that are not yet safe for reattach */
+ i = 0;
+ while (i < virPCIDeviceListCount(pcidevs)) {
+ virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
+ virHostdevIsPCIDeviceSafeData data = { dom_name,
+ hostdev_mgr->activePCIHostdevs,
+ hostdev_mgr->inactivePCIHostdevs,
+ pcidevs };
+ bool usesVFIO = (virPCIDeviceGetStubDriver(dev) == VIR_PCI_STUB_DRIVER_VFIO);
+
+ if (usesVFIO &&
+ virPCIDeviceIOMMUGroupIterate(dev,
+ virHostdevIsPCIDeviceSafeForReattach,
+ &data) < 0) {
+ /* Don't process the device further. Later on, when the last
+ * device in this IOMMU group will be unassigned from its domain,
+ * we will process all the devices at once, including this one */
+ VIR_DEBUG("Delaying reattaching of PCI device %s",
+ virPCIDeviceGetName(dev));
+ dev = virPCIDeviceListSteal(pcidevs, dev);
+ } else {
+ /* This device is safe, keep it in the list and skip ahead */
+ VIR_DEBUG("PCI device %s can be safely reattached",
+ virPCIDeviceGetName(dev));
+ i++;
+ }
+
+ /* Mark the device as inactive */
+ virPCIDeviceListAddCopy(hostdev_mgr->inactivePCIHostdevs, dev);
+ }
+
+ /* At this point, all devices we were asked to reattach have been removed
+ * from activePCIHostdevs and have been added to inactivePCIHostdevs;
+ * pcidevs contains the devices that we can safely process right now */
+
/* Loop 3: perform a PCI Reset on all devices */
for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
--
2.5.0