It is possible to deadlock libvirtd when concurrently starting a domain
and restarting the daemon. Threads involved in the deadlock are
Thread 4 (Thread 0x7fc13b53e700 (LWP 64084)):
#0 0x00007fc13fba10bf in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00007fc14310213c in virCondWait (c=0x7fc110017fa8, m=0x7fc110017f80)
at util/virthread.c:154
#2 0x00007fc1280244e9 in qemuMonitorSend (mon=0x7fc110017f70, msg=0x7fc13b53d240)
at qemu/qemu_monitor.c:1083
#3 0x00007fc12803bf5a in qemuMonitorJSONCommandWithFd (mon=0x7fc110017f70,
cmd=0x7fc110017700, scm_fd=-1, reply=0x7fc13b53d318) at
qemu/qemu_monitor_json.c:305
#4 0x00007fc12803c09c in qemuMonitorJSONCommand (mon=0x7fc110017f70,
cmd=0x7fc110017700,
reply=0x7fc13b53d318) at qemu/qemu_monitor_json.c:335
#5 0x00007fc12803f116 in qemuMonitorJSONSetCapabilities (mon=0x7fc110017f70)
at qemu/qemu_monitor_json.c:1298
#6 0x00007fc128026e14 in qemuMonitorSetCapabilities (mon=0x7fc110017f70)
at qemu/qemu_monitor.c:1697
#7 0x00007fc127ffe250 in qemuProcessInitMonitor (driver=0x7fc12004e1e0,
vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START) at qemu/qemu_process.c:1763
#8 0x00007fc127ffe564 in qemuConnectMonitor (driver=0x7fc12004e1e0,
vm=0x7fc110003d00,
asyncJob=6, logCtxt=0x7fc1100089c0) at qemu/qemu_process.c:1835
#9 0x00007fc127fff386 in qemuProcessWaitForMonitor (driver=0x7fc12004e1e0,
vm=0x7fc110003d00, asyncJob=6, logCtxt=0x7fc1100089c0) at
qemu/qemu_process.c:2180
#10 0x00007fc128009269 in qemuProcessLaunch (conn=0x7fc1100009a0,
driver=0x7fc12004e1e0,
vm=0x7fc110003d00, asyncJob=QEMU_ASYNC_JOB_START, incoming=0x0, snapshot=0x0,
vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE, flags=17) at qemu/qemu_process.c:6111
#11 0x00007fc128009e85 in qemuProcessStart (conn=0x7fc1100009a0,
driver=0x7fc12004e1e0,
vm=0x7fc110003d00, updatedCPU=0x0, asyncJob=QEMU_ASYNC_JOB_START,
migrateFrom=0x0,
migrateFd=-1, migratePath=0x0, snapshot=0x0,
vmop=VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
flags=17) at qemu/qemu_process.c:6334
#12 0x00007fc1280552f1 in qemuDomainCreateXML (conn=0x7fc1100009a0,
xml=0x7fc110000ed0 "<!--\nWARNING: THIS IS AN AUTO-GENERATED FILE.
CHANGES TO IT ARE LIKELY TO BE\nOVERWRITTEN AND LOST. Changes to this xml
configuration should be made using:\n virsh edit testvv\nor other
applicati"..., flags=0) at qemu/qemu_driver.c:1776
...
Thread 1 (Thread 0x7fc143c66880 (LWP 64081)):
#0 0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from
/lib64/libpthread.so.0
#1 0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
at util/virthread.c:122
#2 0x00007fc1431a394f in virNWFilterWriteLockFilterUpdates () at
conf/nwfilter_conf.c:159
#3 0x00007fc12a5230a0 in nwfilterStateReload () at nwfilter/nwfilter_driver.c:299
#4 0x00007fc1433170c2 in virStateReload () at libvirt.c:829
#5 0x0000558c522d5686 in daemonReloadHandler (dmn=0x558c5328b230,
sig=0x7ffe0a831e30,
opaque=0x0) at remote/remote_daemon.c:724
#6 0x00007fc14321e3c7 in virNetDaemonSignalEvent (watch=2, fd=9, events=1,
opaque=0x558c5328b230) at rpc/virnetdaemon.c:654
#7 0x00007fc143085cc7 in virEventPollDispatchHandles (nfds=11, fds=0x558c532cd930)
at util/vireventpoll.c:508
#8 0x00007fc143086586 in virEventPollRunOnce () at util/vireventpoll.c:657
#9 0x00007fc143084312 in virEventRunDefaultImpl () at util/virevent.c:327
#10 0x00007fc14321ecb8 in virNetDaemonRun (dmn=0x558c5328b230) at
rpc/virnetdaemon.c:858
#11 0x0000558c522d7add in main (argc=3, argv=0x7ffe0a832758) at
remote/remote_daemon.c:1496
(gdb) thr 1
[Switching to thread 1 (Thread 0x7fc143c66880 (LWP 64081))]
#0 0x00007fc13fb9aac8 in __pthread_rwlock_wrlock_slow () from
/lib64/libpthread.so.0
(gdb) f 1
#1 0x00007fc143101ffa in virRWLockWrite (m=0x7fc143678cc0 <updateLock>)
at util/virthread.c:122
122 pthread_rwlock_wrlock(&m->lock);
(gdb) p updateLock
$1 = {lock = {__data = {__lock = 0, __nr_readers = 1, __readers_wakeup = 0,
__writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 1,
__writer = 0,
__shared = 0, __rwelision = 0 '\000', __pad1 =
"\000\000\000\000\000\000",
__pad2 = 0, __flags = 0},
__size = "\000\000\000\000\001", '\000' <repeats 15 times>,
"\001",
'\000' <repeats 34 times>, __align = 4294967296}}
Reloading of the nwfilter driver is stuck waiting for a write lock, which
already has a reader (from qemuDomainCreateXML) in the critical section.
Since the reload occurs in the context of the main event loop thread,
libvirtd becomes deadlocked. The deadlock can be avoided by offloading
the reload work to a thread.
Signed-off-by: Jim Fehlig <jfehlig(a)suse.com>
---
Fix suggested by danpb
https://www.redhat.com/archives/libvir-list/2018-March/msg00382.html
src/remote/remote_daemon.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
index f8082f62f..2146655b0 100644
--- a/src/remote/remote_daemon.c
+++ b/src/remote/remote_daemon.c
@@ -709,20 +709,32 @@ static void daemonShutdownHandler(virNetDaemonPtr dmn,
virNetDaemonQuit(dmn);
}
+static void daemonReloadHandlerThread(void *opague ATTRIBUTE_UNUSED)
+{
+ VIR_INFO("Reloading configuration on SIGHUP");
+ virHookCall(VIR_HOOK_DRIVER_DAEMON, "-",
+ VIR_HOOK_DAEMON_OP_RELOAD, SIGHUP, "SIGHUP", NULL, NULL);
+ if (virStateReload() < 0)
+ VIR_WARN("Error while reloading drivers");
+}
+
static void daemonReloadHandler(virNetDaemonPtr dmn ATTRIBUTE_UNUSED,
siginfo_t *sig ATTRIBUTE_UNUSED,
void *opaque ATTRIBUTE_UNUSED)
{
+ virThread thr;
+
if (!driversInitialized) {
VIR_WARN("Drivers are not initialized, reload ignored");
return;
}
- VIR_INFO("Reloading configuration on SIGHUP");
- virHookCall(VIR_HOOK_DRIVER_DAEMON, "-",
- VIR_HOOK_DAEMON_OP_RELOAD, SIGHUP, "SIGHUP", NULL, NULL);
- if (virStateReload() < 0)
- VIR_WARN("Error while reloading drivers");
+ if (virThreadCreate(&thr, false, daemonReloadHandlerThread, NULL) < 0) {
+ /*
+ * Not much we can do on error here except log it.
+ */
+ VIR_ERROR(_("Failed to create thread to handle daemon restart"));
+ }
}
static int daemonSetupSignals(virNetDaemonPtr dmn)
--
2.16.2