On Wed, Sep 26, 2012 at 03:56:52PM +0100, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange(a)redhat.com>
Some users report (very rarely) seeing a deadlock in the QEMU
monitor callbacks
Thread 10 (Thread 0x7fcd11e20700 (LWP 26753)):
at util/threads-pthread.c:85
at conf/domain_conf.c:14256
vm=0x7fcccc00a850) at qemu/qemu_process.c:1026
at qemu/qemu_monitor.c:249
at util/virobject.c:139
at qemu/qemu_monitor.c:860
vm=vm@entry=0x7fcccc00a850,
reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED, flags=flags@entry=0)
at qemu/qemu_process.c:4057
flags=<optimized out>) at qemu/qemu_driver.c:1977
domain=domain@entry=0x7fccf00c1830, flags=1) at libvirt.c:2256
Argh, GIT ate my stack trace because the first letter was '#'!
Thread 10 (Thread 0x7fcd11e20700 (LWP 26753)):
#0 0x00000030d0e0de4d in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00000030d0e09ca6 in _L_lock_840 () from /lib64/libpthread.so.0
#2 0x00000030d0e09ba8 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007fcd162f416d in virMutexLock (m=<optimized out>)
at util/threads-pthread.c:85
#4 0x00007fcd1632c651 in virDomainObjLock (obj=<optimized out>)
at conf/domain_conf.c:14256
#5 0x00007fcd0daf05cc in qemuProcessHandleMonitorDestroy (mon=0x7fcccc0029e0,
vm=0x7fcccc00a850) at qemu/qemu_process.c:1026
#6 0x00007fcd0db01710 in qemuMonitorDispose (obj=0x7fcccc0029e0)
at qemu/qemu_monitor.c:249
#7 0x00007fcd162fd4e3 in virObjectUnref (anyobj=<optimized out>)
at util/virobject.c:139
#8 0x00007fcd0db027a9 in qemuMonitorClose (mon=<optimized out>)
at qemu/qemu_monitor.c:860
#9 0x00007fcd0daf61ad in qemuProcessStop (driver=driver@entry=0x7fcd04079d50,
vm=vm@entry=0x7fcccc00a850,
reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED, flags=flags@entry=0)
at qemu/qemu_process.c:4057
#10 0x00007fcd0db323cf in qemuDomainDestroyFlags (dom=<optimized out>,
flags=<optimized out>) at qemu/qemu_driver.c:1977
#11 0x00007fcd1637ff51 in virDomainDestroyFlags (
domain=domain@entry=0x7fccf00c1830, flags=1) at libvirt.c:2256
At frame #10 we are holding the domain lock, we call into
qemuProcessStop() to cleanup QEMU, which triggers the monitor
to close, which invokes qemuProcessHandleMonitorDestroy() which
tries to obtain the domain lock again. This is a non-recursive
lock, hence hang.
Since qemuMonitorPtr is a virObject, the unref call in
qemuProcessHandleMonitorDestroy no longer needs mutex
protection. The assignment of priv->mon = NULL, can be
instead done by the caller of qemuMonitorClose(), thus
removing all need for locking.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/qemu/qemu_process.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index ade64b7..c8c188a 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1019,17 +1019,10 @@ no_memory:
}
-static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon,
+static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjPtr vm)
{
- qemuDomainObjPrivatePtr priv;
-
- virDomainObjLock(vm);
- priv = vm->privateData;
- if (priv->mon == mon)
- priv->mon = NULL;
- if (virObjectUnref(vm))
- virDomainObjUnlock(vm);
+ virObjectUnref(vm);
}
static int
@@ -3995,8 +3988,10 @@ void qemuProcessStop(struct qemud_driver *driver,
priv->agentError = false;
}
- if (priv->mon)
+ if (priv->mon) {
qemuMonitorClose(priv->mon);
+ priv->mon = NULL;
+ }
if (priv->monConfig) {
if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
--
1.7.11.2
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|