Author: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
There could be a delay of 1 or 2 seconds before the vfio-pci driver
is unbound and the device file /dev/vfio/<iommu> is actually
removed. If the file exists, the host driver probing the device
can lead to crash. So, wait and avoid the crash. Setting the
timeout to 15 seconds for now.
Signed-off-by: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
---
src/util/virpci.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 425c1a7..6bf640d 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -1097,6 +1097,42 @@ static bool virPCIIsAKnownStub(char *driver)
return ret;
}
+#define VFIO_UNBIND_TIMEOUT 15
+
+/* It is not safe to initiate host driver probe if the vfio driver has not
+ * completely unbound the device.
+ * So, return if the unbind didn't complete in 15 seconds.
+ */
+static int virPCIWaitForVfioUnbindCompletion(virPCIDevicePtr dev)
+{
+ int retry = 0;
+ int ret = -1;
+ char *path = NULL;
+
+ if (!(path = virPCIDeviceGetIOMMUGroupDev(dev)))
+ goto cleanup;
+
+ while (retry++ < VFIO_UNBIND_TIMEOUT) {
+ if (!virFileExists(path))
+ break;
+ sleep(1);
+ }
+
+ if (virFileExists(path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("The VFIO unbind not completed even after %d seconds for
device %.4x:%.2x:%.2x.%.1x"),
+ retry, dev->domain, dev->bus, dev->slot,
dev->function);
+ goto cleanup;
+ }
+
+ ret = 0;
+cleanup :
+ VIR_FREE(path);
+ return ret;
+
+}
+
+
static int virPCIDeviceReprobeHostDriver(virPCIDevicePtr dev, char *driver, char
*drvdir)
{
char *path = NULL;
@@ -1203,6 +1239,9 @@ virPCIDeviceUnbindFromStub(virPCIDevicePtr dev,
goto cleanup;
}
+ if (virPCIWaitForVfioUnbindCompletion(dev) < 0)
+ goto cleanup;
+
while (inactiveDevs && (i < virPCIDeviceListCount(inactiveDevs))) {
virPCIDevicePtr pcidev = virPCIDeviceListGet(inactiveDevs, i);
if (dev->iommuGroup == pcidev->iommuGroup) {