If the timing is "just right", there is a possibility that the
udev nodeStateInitialize conflicts with another systemd thread
running an lspci command leaving both waiting for "something",
but resulting in a hung libvirtd (and hung lspci thread) from
which the only recovery is a reboot because killing either thread
is impossible and results in a defunct libvirtd process if a
SIGKILL is performed.
In order to avoid this let's move where the PCI initialization
is done to be where it's actually needed. Ensure we only perform
the initialization once via a driver bool. Likewise, during
cleanup ensure we only call udevPCITranslateDeinit once the
initialization is successful.
At least a failure for this driver won't hang out the rest of the
the libvirt event loop. May not make certain things usable though.
Still a libvirtd restart is far easier than a host reboot.
Signed-off-by: John Ferlan <jferlan(a)redhat.com>
---
src/conf/virnodedeviceobj.h | 1 +
src/node_device/node_device_udev.c | 16 ++++++++++++----
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/src/conf/virnodedeviceobj.h b/src/conf/virnodedeviceobj.h
index 87f908369..b5f96f206 100644
--- a/src/conf/virnodedeviceobj.h
+++ b/src/conf/virnodedeviceobj.h
@@ -41,6 +41,7 @@ struct _virNodeDeviceDriverState {
virNodeDeviceObjListPtr devs; /* currently-known devices */
void *privateData; /* driver-specific private data */
bool privileged; /* whether we run in privileged mode */
+ bool initPCI; /* Set when PCI thread completed */
/* Immutable pointer, self-locking APIs */
virObjectEventStatePtr nodeDeviceEventState;
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index ca5b47767..9cbba8562 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -551,12 +551,21 @@ udevProcessPCI(struct udev_device *device,
virPCIEDeviceInfoPtr pci_express = NULL;
virPCIDevicePtr pciDev = NULL;
int ret = -1;
+ int rc = 0;
char *p;
bool privileged;
nodeDeviceLock();
privileged = driver->privileged;
+
+ if (!driver->initPCI) {
+ rc = udevPCITranslateInit(driver->privileged);
+ driver->initPCI = true;
+ }
+
nodeDeviceUnlock();
+ if (rc < 0)
+ goto cleanup;
if (udevGetUintProperty(device, "PCI_CLASS", &pci_dev->class, 16)
< 0)
goto cleanup;
@@ -1681,6 +1690,9 @@ nodeStateCleanup(void)
virThreadJoin(&priv->th);
}
+ if (driver->initPCI)
+ udevPCITranslateDeinit();
+
virObjectUnref(priv);
virObjectUnref(driver->nodeDeviceEventState);
@@ -1688,7 +1700,6 @@ nodeStateCleanup(void)
virMutexDestroy(&driver->lock);
VIR_FREE(driver);
- udevPCITranslateDeinit();
return 0;
}
@@ -1962,9 +1973,6 @@ nodeStateInitialize(bool privileged,
driver->privateData = priv;
driver->nodeDeviceEventState = virObjectEventStateNew();
- if (udevPCITranslateInit(privileged) < 0)
- goto cleanup;
-
udev = udev_new();
if (!udev) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
--
2.13.6