Hi, all,

We were running OpenStack with Ubuntu and libvirt 0.9.10. We found that libvirt monitor command not working well. 
There were a lot of error in libvirtd.log like this
2013-02-07 06:07:39.000+0000: 18112: error : qemuDomainObjBeginJobInternal:773 : Timed out during operation: cannot acquire state change lock

We dig into libvirtd by strace and find one of the thread only have the following command
futex(0x7f69ac0ec0ec, FUTEX_WAIT_PRIVATE, 2717, NULL

It seems this thread waiting for reply but nothing came back thus other threads would wait for it. We also saw there is a function called virCondWaitUntil(). Is it safe for us to modify the code from virCondWait() to virCondWaitUntil() to prevent such deadlock scenario? Thanks.

Following is the gdb -p 'libvirt.pid' and 'thread id' and 'bt full'
#0  0x00007f69c8c1dd84 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007f69c9ee884a in virCondWait (c=<optimized out>, m=<optimized out>) at util/threads-pthread.c:117
        ret = <optimized out>
#2  0x000000000049c749 in qemuMonitorSend (mon=0x7f69ac0ec0c0, msg=<optimized out>) at qemu/qemu_monitor.c:826
        ret = -1
        __func__ = "qemuMonitorSend"
        __FUNCTION__ = "qemuMonitorSend"
#3  0x00000000004ac8ed in qemuMonitorJSONCommandWithFd (mon=0x7f69ac0ec0c0, cmd=0x7f6998028280, scm_fd=-1, reply=0x7f69c57829f8)
    at qemu/qemu_monitor_json.c:230
        ret = -1
        msg = {txFD = -1, txBuffer = 0x7f69980e9b00 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}\r\n", txOffset = 49, txLength = 49,
          rxBuffer = 0x0, rxLength = 0, rxObject = 0x0, finished = false, passwordHandler = 0, passwordOpaque = 0x0}
        cmdstr = 0x7f69980ef2f0 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}"
        id = 0x7f69980b0a20 "libvirt-1359"
        exe = <optimized out>
        __FUNCTION__ = "qemuMonitorJSONCommandWithFd"
        __func__ = "qemuMonitorJSONCommandWithFd"
#4  0x00000000004ae794 in qemuMonitorJSONGetBalloonInfo (mon=0x7f69ac0ec0c0, currmem=0x7f69c5782a48) at qemu/qemu_monitor_json.c:1190
        ret = <optimized out>
        cmd = 0x7f6998028280
        reply = 0x0
        __FUNCTION__ = "qemuMonitorJSONGetBalloonInfo"
#5  0x0000000000457451 in qemudDomainGetInfo (dom=<optimized out>, info=0x7f69c5782b50) at qemu/qemu_driver.c:2181
        priv = 0x7f69a0093b00
        driver = 0x7f69b80ca8e0
        vm = 0x7f69a0093370 
        ret = -1
        err = <optimized out>
        balloon = <optimized out>
        __FUNCTION__ = "qemudDomainGetInfo"
#6  0x00007f69c9f63eda in virDomainGetInfo (domain=0x7f69980e3650, info=0x7f69c5782b50) at libvirt.c:4230
        ret = <optimized out>
        conn = <optimized out>
        __func__ = "virDomainGetInfo"
        __FUNCTION__ = "virDomainGetInfo"
#7  0x0000000000439bca in remoteDispatchDomainGetInfo (ret=0x7f6998000c20, args=<optimized out>, rerr=0x7f69c5782c50, client=0x157e730,
    server=<optimized out>, msg=<optimized out>) at remote_dispatch.h:1640
        rv = -1
        tmp = {state = 1 '\001', maxMem = 2097152, memory = 0, nrVirtCpu = 0, cpuTime = 5981880000000}
        dom = 0x7f69980e3650
        priv = <optimized out>
#8  remoteDispatchDomainGetInfoHelper (server=<optimized out>, client=0x157e730, msg=<optimized out>, rerr=0x7f69c5782c50, args=<optimized out>, 
    ret=0x7f6998000c20) at remote_dispatch.h:1616
        __func__ = "remoteDispatchDomainGetInfoHelper"
#9  0x00007f69c9fbb915 in virNetServerProgramDispatchCall (msg=0x1689cc0, client=0x157e730, server=0x1577c90, prog=0x15825d0)
    at rpc/virnetserverprogram.c:416
        ret = 0x7f6998000c20 ""
        rv = -1
        i = <optimized out>
        arg = 0x7f6998027950 "\360e\n\230i\177"
        dispatcher = 0x73de40
        rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
#10 virNetServerProgramDispatch (prog=0x15825d0, server=0x1577c90, client=0x157e730, msg=0x1689cc0) at rpc/virnetserverprogram.c:289
        ret = -1
        rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
        __func__ = "virNetServerProgramDispatch"
        __FUNCTION__ = "virNetServerProgramDispatch"
#11 0x00007f69c9fb6461 in virNetServerHandleJob (jobOpaque=<optimized out>, opaque=0x1577c90) at rpc/virnetserver.c:164
        srv = 0x1577c90
        job = 0x155dfa0
        __func__ = "virNetServerHandleJob"
#12 0x00007f69c9ee8e3e in virThreadPoolWorker (opaque=<optimized out>) at util/threadpool.c:144
        data = 0x0
        pool = 0x1577d80
        cond = 0x1577de0
        priority = false
        job = 0x162dd20
#13 0x00007f69c9ee84e6 in virThreadHelper (data=<optimized out>) at util/threads-pthread.c:161
        args = 0x0
        local = {func = 0x7f69c9ee8d00 <virThreadPoolWorker>, opaque = 0x1559f90}
#14 0x00007f69c8c19e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#15 0x00007f69c89474bd in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#16 0x0000000000000000 in ?? ()
No symbol table info available.

Regards,
Chun-Hung