We're seeing bugs apparently resulting from thread unsafety of
libpciaccess, such as
https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/726099
To prevent those, as suggested by danpb on irc, move the
nodeDeviceLock(driverState) higher into the callers. In
particular:
udevDeviceMonitorStartup should hold the lock while calling
udevEnumerateDevices(), and udevEventHandleCallback should hold it
over its entire execution.
It's not clear to me whether it is ok to hold the
nodeDeviceLock while taking the virNodeDeviceObjLock(dev) on a
device. If not, then the lock will need to be dropped around
the calling of udevSetupSystemDev(), and udevAddOneDevice()
may not actually be safe to call from higher layers with the
driverstate lock held.
libvirt 0.8.8 with this patch on it seems to work fine for me.
Assuming it looks ok and I haven't done anything obviously dumb,
I'll ask the bug submitters to try this patch.
Signed-off-by: Serge Hallyn <serge.hallyn(a)ubuntu.com>
---
src/node_device/node_device_udev.c | 21 +++++++++------------
1 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 372f1d1..2139ef3 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -1202,7 +1202,6 @@ static int udevRemoveOneDevice(struct udev_device *device)
int ret = 0;
name = udev_device_get_syspath(device);
- nodeDeviceLock(driverState);
dev = virNodeDeviceFindBySysfsPath(&driverState->devs, name);
if (dev != NULL) {
@@ -1214,7 +1213,6 @@ static int udevRemoveOneDevice(struct udev_device *device)
name);
ret = -1;
}
- nodeDeviceUnlock(driverState);
return ret;
}
@@ -1316,9 +1314,7 @@ static int udevAddOneDevice(struct udev_device *device)
/* If this is a device change, the old definition will be freed
* and the current definition will take its place. */
- nodeDeviceLock(driverState);
dev = virNodeDeviceAssignDef(&driverState->devs, def);
- nodeDeviceUnlock(driverState);
if (dev == NULL) {
VIR_ERROR(_("Failed to create device for '%s'"),
def->name);
@@ -1442,6 +1438,7 @@ static void udevEventHandleCallback(int watch ATTRIBUTE_UNUSED,
const char *action = NULL;
int udev_fd = -1;
+ nodeDeviceLock(driverState);
udev_fd = udev_monitor_get_fd(udev_monitor);
if (fd != udev_fd) {
VIR_ERROR(_("File descriptor returned by udev %d does not "
@@ -1470,6 +1467,7 @@ static void udevEventHandleCallback(int watch ATTRIBUTE_UNUSED,
out:
udev_device_unref(device);
+ nodeDeviceUnlock(driverState);
return;
}
@@ -1647,10 +1645,9 @@ static int udevDeviceMonitorStartup(int privileged)
priv->udev_monitor = udev_monitor_new_from_netlink(udev, "udev");
if (priv->udev_monitor == NULL) {
VIR_FREE(priv);
- nodeDeviceUnlock(driverState);
VIR_ERROR0(_("udev_monitor_new_from_netlink returned NULL"));
ret = -1;
- goto out;
+ goto out_unlock;
}
udev_monitor_enable_receiving(priv->udev_monitor);
@@ -1670,26 +1667,26 @@ static int udevDeviceMonitorStartup(int privileged)
VIR_EVENT_HANDLE_READABLE,
udevEventHandleCallback, NULL, NULL);
if (priv->watch == -1) {
- nodeDeviceUnlock(driverState);
ret = -1;
- goto out;
+ goto out_unlock;
}
- nodeDeviceUnlock(driverState);
-
/* Create a fictional 'computer' device to root the device tree. */
if (udevSetupSystemDev() != 0) {
ret = -1;
- goto out;
+ goto out_unlock;
}
/* Populate with known devices */
if (udevEnumerateDevices(udev) != 0) {
ret = -1;
- goto out;
+ goto out_unlock;
}
+out_unlock:
+ nodeDeviceUnlock(driverState);
+
out:
if (ret == -1) {
udevDeviceMonitorShutdown();
ACK, it looks good to me - serializing all access to the udev
and libpciaccess APIs is a good conservative approach.
Daniel
--
|: