At 05/16/2011 10:24 PM, Daniel P. Berrange Write:
On Thu, May 12, 2011 at 05:04:36PM +0100, Daniel P. Berrange wrote:
> An update of
>
>
http://www.redhat.com/archives/libvir-list/2011-May/msg00605.html
>
> Changes in this series
>
> - Fix comments from previous review
> - Rebase to latest GIT
>
> Also pullable from
>
>
http://gitorious.org/~berrange/libvirt/staging/commits/migrate-locking-3
>
> NB, this branch includes the migration + lock manager series
> in one big set.
I've pushed this series now. There will be a couple of follow up patches
to add some further enhancements before next release.
I test migration 3 times. libvirtd crashed twice and may be deadlock once.
I do not find the reason of the problem.
Here is command line:
# virsh migrate --p2p --tunnelled vm1 qemu+tls://<dest OS>/system
I use gdb to analyze the core file
The first core file:
# gdb /usr/sbin/libvirtd core.10511
...
Core was generated by `libvirtd --listen'.
Program terminated with signal 11, Segmentation fault.
#0 0x000000338a0779d1 in _int_free () from /lib64/libc.so.6
...
(gdb) info threads
7 Thread 10511 0x000000338a80803d in pthread_join () from /lib64/libpthread.so.0
6 Thread 10513 0x000000338a0d7e33 in poll () from /lib64/libc.so.6
5 Thread 10515 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
4 Thread 10514 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
3 Thread 10518 0x000000338a80e034 in __lll_lock_wait () from /lib64/libpthread.so.0
2 Thread 10516 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
* 1 Thread 10517 0x000000338a0779d1 in _int_free () from /lib64/libc.so.6
(gdb) bt
#0 0x000000338a0779d1 in _int_free () from /lib64/libc.so.6
#1 0x00007fe55552bad3 in virFree (ptrptr=0x7fe547ffebb8) at util/memory.c:310
#2 0x00007fe555527b5b in virLogMessage (category=0x4ce651 "file.dispatch.c",
priority=1, funcname=0x4ce9a0 "remoteDispatchClientRequest", linenr=363,
flags=0,
fmt=0x4ce6a0 "prog=%d ver=%d type=%d status=%d serial=%d proc=%d") at
util/logging.c:812
#3 0x00000000004339f7 in remoteDispatchClientRequest (server=0x8f4470,
client=0x7fe5480603f0, msg=0x7fe5480e7280) at dispatch.c:361
#4 0x000000000041b10f in qemudWorker (data=0x7fe548000908) at libvirtd.c:1619
#5 0x000000338a8077e1 in start_thread () from /lib64/libpthread.so.0
#6 0x000000338a0e153d in clone () from /lib64/libc.so.6
(gdb) thread 3
[Switching to thread 3 (Thread 10518)]#0 0x000000338a80e034 in __lll_lock_wait () from
/lib64/libpthread.so.0
(gdb) bt
#0 0x000000338a80e034 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x000000338a809345 in _L_lock_870 () from /lib64/libpthread.so.0
#2 0x000000338a809217 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007fe55553603c in virMutexLock (m=0x7fe534003140) at util/threads-pthread.c:85
#4 0x00007fe55555b14e in virDomainObjLock (obj=0x7fe534003140) at
conf/domain_conf.c:8984
#5 0x000000000046b118 in qemuDomainObjExitRemoteWithDriver (driver=0x8f85b0,
obj=0x7fe534003140) at qemu/qemu_domain.c:682
#6 0x00000000004816f8 in doPeer2PeerMigrate (driver=0x8f85b0, sconn=0x7fe540002120,
vm=0x7fe534003140, uri=0x7fe540012b00 "qemu+tls://wency/system", flags=6,
dname=0x0,
resource=0) at qemu/qemu_migration.c:1944
#7 0x00000000004818df in qemuMigrationPerform (driver=0x8f85b0, conn=0x7fe540002120,
vm=0x7fe534003140, uri=0x7fe540012b00 "qemu+tls://wency/system", cookiein=0x0,
cookieinlen=0, cookieout=0x0, cookieoutlen=0x0, flags=6, dname=0x0, resource=0,
killOnFinish=true) at qemu/qemu_migration.c:1995
#8 0x000000000044dfdd in qemudDomainMigratePerform (dom=0x7fe540002340, cookie=0x0,
cookielen=0, uri=0x7fe540012b00 "qemu+tls://wency/system", flags=6, dname=0x0,
resource=0)
at qemu/qemu_driver.c:5899
#9 0x00007fe55559350e in virDomainMigratePerform (domain=0x7fe540002340, cookie=0x0,
cookielen=0, uri=0x7fe540012b00 "qemu+tls://wency/system", flags=6, dname=0x0,
bandwidth=0) at libvirt.c:4314
#10 0x0000000000429edf in remoteDispatchDomainMigratePerform (server=0x8f4470,
client=0x7fe5480011f0, conn=0x7fe540002120, hdr=0x7fe548051830, rerr=0x7fe5475fdb40,
args=0x7fe5475fdc40, ret=0x7fe5475fdbe0) at remote_dispatch_bodies.h:1291
#11 0x0000000000433fce in remoteDispatchClientCall (server=0x8f4470,
client=0x7fe5480011f0, msg=0x7fe548011820, qemu_protocol=false) at dispatch.c:516
#12 0x0000000000433b5d in remoteDispatchClientRequest (server=0x8f4470,
client=0x7fe5480011f0, msg=0x7fe548011820) at dispatch.c:394
#13 0x000000000041b10f in qemudWorker (data=0x7fe548000920) at libvirtd.c:1619
#14 0x000000338a8077e1 in start_thread () from /lib64/libpthread.so.0
#15 0x000000338a0e153d in clone () from /lib64/libc.so.6
(gdb) p ((virMutexPtr)0x7fe534003140)
$1 = (virMutex *) 0x7fe534003140
(gdb) p *((virMutexPtr)0x7fe534003140)
$2 = {lock = {__data = {__lock = 859451441, __count = 959658553, __owner = 942747950,
__nusers = 808525882, __kind = 976695605, __spins = 1650811936, __list = {
__prev = 0x6d6572203a206775, __next = 0x617073694465746f}}, __size =
"10:39:39.118: 10517: debug : remoteDispa", __align = 4121702101321134129}}
(gdb)
The domain obj's lock may be breaked.
The other core file:
# gdb /usr/sbin/libvirtd core.11364
...
Core was generated by `libvirtd --listen'.
Program terminated with signal 11, Segmentation fault.
#0 0x00007f267b8d9111 in virDomainDefFree (def=0x7f2658012070) at conf/domain_conf.c:977
977 virDomainGraphicsDefFree(def->graphics[i]);
...
(gdb) info threads
7 Thread 11368 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
6 Thread 11366 0x000000338a0d7e33 in poll () from /lib64/libc.so.6
5 Thread 11369 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
4 Thread 11370 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
3 Thread 11367 0x000000338a80b43c in pthread_cond_wait@(a)GLIBC_2.3.2 () from
/lib64/libpthread.so.0
2 Thread 11364 0x000000338a80803d in pthread_join () from /lib64/libpthread.so.0
* 1 Thread 11371 0x00007f267b8d9111 in virDomainDefFree (def=0x7f2658012070) at
conf/domain_conf.c:977
(gdb) bt
#0 0x00007f267b8d9111 in virDomainDefFree (def=0x7f2658012070) at conf/domain_conf.c:977
#1 0x00007f267b8d9739 in virDomainObjFree (dom=0x7f2658003690) at
conf/domain_conf.c:1074
#2 0x00007f267b8d987b in virDomainObjUnref (dom=0x7f2658003690) at
conf/domain_conf.c:1100
#3 0x000000000046b124 in qemuDomainObjExitRemoteWithDriver (driver=0xb0d540,
obj=0x7f2658003690) at qemu/qemu_domain.c:685
#4 0x00000000004816f8 in doPeer2PeerMigrate (driver=0xb0d540, sconn=0x7f265c000dd0,
vm=0x7f2658003690, uri=0x7f265c000d50 "qemu+tls://wency/system", flags=6,
dname=0x0,
resource=0) at qemu/qemu_migration.c:1944
#5 0x00000000004818df in qemuMigrationPerform (driver=0xb0d540, conn=0x7f265c000dd0,
vm=0x7f2658003690, uri=0x7f265c000d50 "qemu+tls://wency/system", cookiein=0x0,
cookieinlen=0, cookieout=0x0, cookieoutlen=0x0, flags=6, dname=0x0, resource=0,
killOnFinish=true) at qemu/qemu_migration.c:1995
#6 0x000000000044dfdd in qemudDomainMigratePerform (dom=0x7f265c000c50, cookie=0x0,
cookielen=0, uri=0x7f265c000d50 "qemu+tls://wency/system", flags=6, dname=0x0,
resource=0)
at qemu/qemu_driver.c:5899
#7 0x00007f267b92750e in virDomainMigratePerform (domain=0x7f265c000c50, cookie=0x0,
cookielen=0, uri=0x7f265c000d50 "qemu+tls://wency/system", flags=6, dname=0x0,
bandwidth=0) at libvirt.c:4314
#8 0x0000000000429edf in remoteDispatchDomainMigratePerform (server=0xb09470,
client=0x7f26700011f0, conn=0x7f265c000dd0, hdr=0x7f2670043380, rerr=0x7f266d7fab40,
args=0x7f266d7fac40, ret=0x7f266d7fabe0) at remote_dispatch_bodies.h:1291
#9 0x0000000000433fce in remoteDispatchClientCall (server=0xb09470,
client=0x7f26700011f0, msg=0x7f2670003370, qemu_protocol=false) at dispatch.c:516
#10 0x0000000000433b5d in remoteDispatchClientRequest (server=0xb09470,
client=0x7f26700011f0, msg=0x7f2670003370) at dispatch.c:394
#11 0x000000000041b10f in qemudWorker (data=0x7f2670000920) at libvirtd.c:1619
#12 0x000000338a8077e1 in start_thread () from /lib64/libpthread.so.0
#13 0x000000338a0e153d in clone () from /lib64/libc.so.6
(gdb) p def->graphics
$1 = (virDomainGraphicsDefPtr *) 0x0
(gdb) p def->ngraphics
$2 = 1
(gdb)
If I revert this patchset, migration can finish sucessfully.
Daniel