When I use newest libvirt to save a domain, libvirtd will be deadlock.
Here is the output of gdb:
(gdb) thread 3
[Switching to thread 3 (Thread 0x7f972a1fc710 (LWP 30265))]#0 0x000000351fe0e034 in
__lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
#0 0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x000000351fe09345 in _L_lock_870 () from /lib64/libpthread.so.0
#2 0x000000351fe09217 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f9737b0e663 in virMutexLock (m=0x6fdd60) at util/threads-pthread.c:80
#4 0x000000000041853d in virEventUpdateTimeoutImpl (timer=12, frequency=0) at
event.c:247
#5 0x00007f9737af8ac2 in virEventUpdateTimeout (timer=12, timeout=0) at util/event.c:70
#6 0x000000000046654e in qemuDomainEventQueue (driver=0x19c5440, event=0x7f9724005f80) at
qemu/qemu_domain.c:97
#7 0x000000000044184a in qemudDomainSaveFlag (driver=0x19c5440, dom=0x7f9724000970,
vm=0x1a63ac0, path=0x7f9724000940 "/var/lib/libvirt/images/memory.save",
compressed=0)
at qemu/qemu_driver.c:2074
#8 0x0000000000441b30 in qemudDomainSave (dom=0x7f9724000970, path=0x7f9724000940
"/var/lib/libvirt/images/memory.save") at qemu/qemu_driver.c:2137
#9 0x00007f9737b66d7d in virDomainSave (domain=0x7f9724000970, to=0x7f9724000940
"/var/lib/libvirt/images/memory.save") at libvirt.c:2280
#10 0x00000000004273e6 in remoteDispatchDomainSave (server=0x19ac120,
client=0x7f972c081740, conn=0x7f9720000b40, hdr=0x7f972c041250, rerr=0x7f972a1fbb40,
args=0x7f972a1fbc40,
ret=0x7f972a1fbbe0) at remote.c:2273
#11 0x0000000000431b6f in remoteDispatchClientCall (server=0x19ac120,
client=0x7f972c081740, msg=0x7f972c001240, qemu_protocol=false) at dispatch.c:529
#12 0x00000000004316fe in remoteDispatchClientRequest (server=0x19ac120,
client=0x7f972c081740, msg=0x7f972c001240) at dispatch.c:407
#13 0x000000000041d9a2 in qemudWorker (data=0x7f972c000908) at libvirtd.c:1629
#14 0x000000351fe077e1 in start_thread () from /lib64/libpthread.so.0
#15 0x000000351f2e153d in clone () from /lib64/libc.so.6
(gdb) thread 7
[Switching to thread 7 (Thread 0x7f9730bcd710 (LWP 30261))]#0 0x000000351fe0e034 in
__lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
#0 0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x000000351fe09345 in _L_lock_870 () from /lib64/libpthread.so.0
#2 0x000000351fe09217 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f9737b0e663 in virMutexLock (m=0x1a63ac0) at util/threads-pthread.c:80
#4 0x00007f9737b32cb8 in virDomainObjLock (obj=0x1a63ac0) at conf/domain_conf.c:8447
#5 0x00000000004732d2 in qemuProcessHandleMonitorDestroy (mon=0x7f9718002610,
vm=0x1a63ac0) at qemu/qemu_process.c:599
#6 0x000000000047c05b in qemuMonitorFree (mon=0x7f9718002610) at qemu/qemu_monitor.c:209
#7 0x000000000047c0f9 in qemuMonitorUnref (mon=0x7f9718002610) at
qemu/qemu_monitor.c:229
#8 0x000000000047c135 in qemuMonitorUnwatch (monitor=0x7f9718002610) at
qemu/qemu_monitor.c:242
#9 0x00000000004194b4 in virEventCleanupHandles () at event.c:538
#10 0x00000000004197a4 in virEventRunOnce () at event.c:603
#11 0x000000000041f0bd in qemudOneLoop () at libvirtd.c:2285
#12 0x000000000041f5e9 in qemudRunLoop (opaque=0x19ac120) at libvirtd.c:2395
#13 0x000000351fe077e1 in start_thread () from /lib64/libpthread.so.0
#14 0x000000351f2e153d in clone () from /lib64/libc.so.6
(gdb) p *(virMutexPtr)0x6fdd60
$2 = {lock = {__data = {__lock = 2, __count = 0, __owner = 30261, __nusers = 1, __kind =
0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = "\002\000\000\000\000\000\000\000\065v\000\000\001", '\000'
<repeats 26 times>, __align = 2}}
(gdb) p *(virMutexPtr)0x1a63ac0
$3 = {lock = {__data = {__lock = 2, __count = 0, __owner = 30265, __nusers = 1, __kind =
0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = "\002\000\000\000\000\000\000\000\071v\000\000\001", '\000'
<repeats 26 times>, __align = 2}}
(gdb) info threads
7 Thread 0x7f9730bcd710 (LWP 30261) 0x000000351fe0e034 in __lll_lock_wait () from
/lib64/libpthread.so.0
6 Thread 0x7f972bfff710 (LWP 30262) 0x000000351fe0b43c in
pthread_cond_wait@(a)GLIBC_2.3.2 () from /lib64/libpthread.so.0
5 Thread 0x7f972b5fe710 (LWP 30263) 0x000000351fe0b43c in
pthread_cond_wait@(a)GLIBC_2.3.2 () from /lib64/libpthread.so.0
4 Thread 0x7f972abfd710 (LWP 30264) 0x000000351fe0b43c in
pthread_cond_wait@(a)GLIBC_2.3.2 () from /lib64/libpthread.so.0
* 3 Thread 0x7f972a1fc710 (LWP 30265) 0x000000351fe0e034 in __lll_lock_wait () from
/lib64/libpthread.so.0
2 Thread 0x7f97297fb710 (LWP 30266) 0x000000351fe0b43c in
pthread_cond_wait@(a)GLIBC_2.3.2 () from /lib64/libpthread.so.0
1 Thread 0x7f9737aac800 (LWP 30260) 0x000000351fe0803d in pthread_join () from
/lib64/libpthread.so.0
The reason is that we will try to lock some object in callback function, and we may call
event API with locking the same object.
In the function virEventDispatchHandles(), we unlock eventLoop before calling callback
function. I think we should
do the same thing in the function virEventCleanupTimeouts() and virEventCleanupHandles().
Signed-off-by: Wen Congyang <wency(a)cn.fujitsu.com>
---
daemon/event.c | 10 ++++++++--
1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/daemon/event.c b/daemon/event.c
index 1a31717..0d45014 100644
--- a/daemon/event.c
+++ b/daemon/event.c
@@ -493,8 +493,11 @@ static int virEventCleanupTimeouts(void) {
EVENT_DEBUG("Purging timeout %d with id %d", i,
eventLoop.timeouts[i].timer);
- if (eventLoop.timeouts[i].ff)
+ if (eventLoop.timeouts[i].ff) {
+ virMutexUnlock(&eventLoop.lock);
(eventLoop.timeouts[i].ff)(eventLoop.timeouts[i].opaque);
+ virMutexLock(&eventLoop.lock);
+ }
if ((i+1) < eventLoop.timeoutsCount) {
memmove(eventLoop.timeouts+i,
@@ -534,8 +537,11 @@ static int virEventCleanupHandles(void) {
continue;
}
- if (eventLoop.handles[i].ff)
+ if (eventLoop.handles[i].ff) {
+ virMutexUnlock(&eventLoop.lock);
(eventLoop.handles[i].ff)(eventLoop.handles[i].opaque);
+ virMutexLock(&eventLoop.lock);
+ }
if ((i+1) < eventLoop.handlesCount) {
memmove(eventLoop.handles+i,
--
1.7.1