We were running OpenStack with Ubuntu and libvirt 0.9.10. We found that libvirt monitor command not working well.
2013-02-07 06:07:39.000+0000: 18112: error : qemuDomainObjBeginJobInternal:773 : Timed out during operation: cannot acquire state change lock
We dig into libvirtd by strace and find one of the thread only have the following command
It seems this thread waiting for reply but nothing came back thus other threads would wait for it. We also saw there is a function called virCondWaitUntil(). Is it safe for us to modify the code from virCondWait() to virCondWaitUntil() to prevent such deadlock scenario? Thanks.
#0 0x00007f69c8c1dd84 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1 0x00007f69c9ee884a in virCondWait (c=<optimized out>, m=<optimized out>) at util/threads-pthread.c:117
ret = <optimized out>
#2 0x000000000049c749 in qemuMonitorSend (mon=0x7f69ac0ec0c0, msg=<optimized out>) at qemu/qemu_monitor.c:826
ret = -1
__func__ = "qemuMonitorSend"
__FUNCTION__ = "qemuMonitorSend"
#3 0x00000000004ac8ed in qemuMonitorJSONCommandWithFd (mon=0x7f69ac0ec0c0, cmd=0x7f6998028280, scm_fd=-1, reply=0x7f69c57829f8)
at qemu/qemu_monitor_json.c:230
ret = -1
msg = {txFD = -1, txBuffer = 0x7f69980e9b00 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}\r\n", txOffset = 49, txLength = 49,
rxBuffer = 0x0, rxLength = 0, rxObject = 0x0, finished = false, passwordHandler = 0, passwordOpaque = 0x0}
cmdstr = 0x7f69980ef2f0 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}"
id = 0x7f69980b0a20 "libvirt-1359"
exe = <optimized out>
__FUNCTION__ = "qemuMonitorJSONCommandWithFd"
__func__ = "qemuMonitorJSONCommandWithFd"
#4 0x00000000004ae794 in qemuMonitorJSONGetBalloonInfo (mon=0x7f69ac0ec0c0, currmem=0x7f69c5782a48) at qemu/qemu_monitor_json.c:1190
ret = <optimized out>
cmd = 0x7f6998028280
reply = 0x0
__FUNCTION__ = "qemuMonitorJSONGetBalloonInfo"
#5 0x0000000000457451 in qemudDomainGetInfo (dom=<optimized out>, info=0x7f69c5782b50) at qemu/qemu_driver.c:2181
priv = 0x7f69a0093b00
driver = 0x7f69b80ca8e0
vm = 0x7f69a0093370
ret = -1
err = <optimized out>
balloon = <optimized out>
__FUNCTION__ = "qemudDomainGetInfo"
#6 0x00007f69c9f63eda in virDomainGetInfo (domain=0x7f69980e3650, info=0x7f69c5782b50) at libvirt.c:4230
ret = <optimized out>
conn = <optimized out>
__func__ = "virDomainGetInfo"
__FUNCTION__ = "virDomainGetInfo"
#7 0x0000000000439bca in remoteDispatchDomainGetInfo (ret=0x7f6998000c20, args=<optimized out>, rerr=0x7f69c5782c50, client=0x157e730,
server=<optimized out>, msg=<optimized out>) at remote_dispatch.h:1640
rv = -1
tmp = {state = 1 '\001', maxMem = 2097152, memory = 0, nrVirtCpu = 0, cpuTime = 5981880000000}
dom = 0x7f69980e3650
priv = <optimized out>
#8 remoteDispatchDomainGetInfoHelper (server=<optimized out>, client=0x157e730, msg=<optimized out>, rerr=0x7f69c5782c50, args=<optimized out>,
ret=0x7f6998000c20) at remote_dispatch.h:1616
__func__ = "remoteDispatchDomainGetInfoHelper"
#9 0x00007f69c9fbb915 in virNetServerProgramDispatchCall (msg=0x1689cc0, client=0x157e730, server=0x1577c90, prog=0x15825d0)
at rpc/virnetserverprogram.c:416
ret = 0x7f6998000c20 ""
rv = -1
i = <optimized out>
arg = 0x7f6998027950 "\360e\n\230i\177"
dispatcher = 0x73de40
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
#10 virNetServerProgramDispatch (prog=0x15825d0, server=0x1577c90, client=0x157e730, msg=0x1689cc0) at rpc/virnetserverprogram.c:289
ret = -1
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
__func__ = "virNetServerProgramDispatch"
__FUNCTION__ = "virNetServerProgramDispatch"
#11 0x00007f69c9fb6461 in virNetServerHandleJob (jobOpaque=<optimized out>, opaque=0x1577c90) at rpc/virnetserver.c:164
srv = 0x1577c90
job = 0x155dfa0
__func__ = "virNetServerHandleJob"
#12 0x00007f69c9ee8e3e in virThreadPoolWorker (opaque=<optimized out>) at util/threadpool.c:144
data = 0x0
pool = 0x1577d80
cond = 0x1577de0
priority = false
job = 0x162dd20
#13 0x00007f69c9ee84e6 in virThreadHelper (data=<optimized out>) at util/threads-pthread.c:161
args = 0x0
local = {func = 0x7f69c9ee8d00 <virThreadPoolWorker>, opaque = 0x1559f90}
#14 0x00007f69c8c19e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#15 0x00007f69c89474bd in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#16 0x0000000000000000 in ?? ()
No symbol table info available.
Regards,
Chun-Hung