Hi, all,
We were running OpenStack with Ubuntu and libvirt 0.9.10. We found that
libvirt monitor command not working well.
There were a lot of error in libvirtd.log like this
2013-02-07 06:07:39.000+0000: 18112: error :
qemuDomainObjBeginJobInternal:773 : Timed out during operation: cannot
acquire state change lock
We dig into libvirtd by strace and find one of the thread only have the
following command
futex(0x7f69ac0ec0ec, FUTEX_WAIT_PRIVATE, 2717, NULL
It seems this thread waiting for reply but nothing came back thus other
threads would wait for it. We also saw there is a function called
virCondWaitUntil(). Is it safe for us to modify the code from virCondWait()
to virCondWaitUntil() to prevent such deadlock scenario? Thanks.
Following is the gdb -p 'libvirt.pid' and 'thread id' and 'bt
full'
#0 0x00007f69c8c1dd84 in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1 0x00007f69c9ee884a in virCondWait (c=<optimized out>, m=<optimized
out>) at util/threads-pthread.c:117
ret = <optimized out>
#2 0x000000000049c749 in qemuMonitorSend (mon=0x7f69ac0ec0c0,
msg=<optimized out>) at qemu/qemu_monitor.c:826
ret = -1
__func__ = "qemuMonitorSend"
__FUNCTION__ = "qemuMonitorSend"
#3 0x00000000004ac8ed in qemuMonitorJSONCommandWithFd (mon=0x7f69ac0ec0c0,
cmd=0x7f6998028280, scm_fd=-1, reply=0x7f69c57829f8)
at qemu/qemu_monitor_json.c:230
ret = -1
msg = {txFD = -1, txBuffer = 0x7f69980e9b00
"{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}\r\n",
txOffset =
49, txLength = 49,
rxBuffer = 0x0, rxLength = 0, rxObject = 0x0, finished = false,
passwordHandler = 0, passwordOpaque = 0x0}
cmdstr = 0x7f69980ef2f0
"{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}"
id = 0x7f69980b0a20 "libvirt-1359"
exe = <optimized out>
__FUNCTION__ = "qemuMonitorJSONCommandWithFd"
__func__ = "qemuMonitorJSONCommandWithFd"
#4 0x00000000004ae794 in qemuMonitorJSONGetBalloonInfo
(mon=0x7f69ac0ec0c0, currmem=0x7f69c5782a48) at
qemu/qemu_monitor_json.c:1190
ret = <optimized out>
cmd = 0x7f6998028280
reply = 0x0
__FUNCTION__ = "qemuMonitorJSONGetBalloonInfo"
#5 0x0000000000457451 in qemudDomainGetInfo (dom=<optimized out>,
info=0x7f69c5782b50) at qemu/qemu_driver.c:2181
priv = 0x7f69a0093b00
driver = 0x7f69b80ca8e0
vm = 0x7f69a0093370
ret = -1
err = <optimized out>
balloon = <optimized out>
__FUNCTION__ = "qemudDomainGetInfo"
#6 0x00007f69c9f63eda in virDomainGetInfo (domain=0x7f69980e3650,
info=0x7f69c5782b50) at libvirt.c:4230
ret = <optimized out>
conn = <optimized out>
__func__ = "virDomainGetInfo"
__FUNCTION__ = "virDomainGetInfo"
#7 0x0000000000439bca in remoteDispatchDomainGetInfo (ret=0x7f6998000c20,
args=<optimized out>, rerr=0x7f69c5782c50, client=0x157e730,
server=<optimized out>, msg=<optimized out>) at remote_dispatch.h:1640
rv = -1
tmp = {state = 1 '\001', maxMem = 2097152, memory = 0, nrVirtCpu =
0, cpuTime = 5981880000000}
dom = 0x7f69980e3650
priv = <optimized out>
#8 remoteDispatchDomainGetInfoHelper (server=<optimized out>,
client=0x157e730, msg=<optimized out>, rerr=0x7f69c5782c50, args=<optimized
out>,
ret=0x7f6998000c20) at remote_dispatch.h:1616
__func__ = "remoteDispatchDomainGetInfoHelper"
#9 0x00007f69c9fbb915 in virNetServerProgramDispatchCall (msg=0x1689cc0,
client=0x157e730, server=0x1577c90, prog=0x15825d0)
at rpc/virnetserverprogram.c:416
ret = 0x7f6998000c20 ""
rv = -1
i = <optimized out>
arg = 0x7f6998027950 "\360e\n\230i\177"
dispatcher = 0x73de40
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0,
str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
#10 virNetServerProgramDispatch (prog=0x15825d0, server=0x1577c90,
client=0x157e730, msg=0x1689cc0) at rpc/virnetserverprogram.c:289
ret = -1
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0,
str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
__func__ = "virNetServerProgramDispatch"
__FUNCTION__ = "virNetServerProgramDispatch"
#11 0x00007f69c9fb6461 in virNetServerHandleJob (jobOpaque=<optimized out>,
opaque=0x1577c90) at rpc/virnetserver.c:164
srv = 0x1577c90
job = 0x155dfa0
__func__ = "virNetServerHandleJob"
#12 0x00007f69c9ee8e3e in virThreadPoolWorker (opaque=<optimized out>) at
util/threadpool.c:144
data = 0x0
pool = 0x1577d80
cond = 0x1577de0
priority = false
job = 0x162dd20
#13 0x00007f69c9ee84e6 in virThreadHelper (data=<optimized out>) at
util/threads-pthread.c:161
args = 0x0
local = {func = 0x7f69c9ee8d00 <virThreadPoolWorker>, opaque =
0x1559f90}
#14 0x00007f69c8c19e9a in start_thread () from
/lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#15 0x00007f69c89474bd in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#16 0x0000000000000000 in ?? ()
No symbol table info available.
Regards,
Chun-Hung