
On 11/08/2013 01:30 PM, Daniel P. Berrange wrote:
On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
I met a problem that container blocked by seteuid/setegid which is call in lxcContainerSetID on UP system and libvirt compiled with --with-fuse=yes.
I looked into the glibc's codes, and found setxid in glibc calls futex() to wait for other threads to change their setxid_futex to 0(see setxid_mark_thread in glibc).
since the process created by clone system call will not share the memory with the other threads and the context of memory doesn't changed until we call execl.(COW)
So if the process which created by clone is called before fuse thread being stated, the new setxid_futex of fuse thread will not be saw in this process, it will be blocked forever.
Maybe this problem should be fixed in glibc, but I send this patch as a quick fix.
Can you show a stack trace of the threads/processes deadlocking
Sure the libvirt_lxc tasks root 7922 0.0 0.1 118976 3704 ? Ssl 20:55 0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1 root 7927 0.0 0.1 53440 3072 ? S 20:55 0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1 the pid of fuse thread is 7925 [root@localhost ~]# ls /proc/7922/task/ 7922 7925 gdb -p 7925 (gdb) bt #0 0x00007f2d39bcb83d in read () at ../sysdeps/unix/syscall-template.S:81 #1 0x00007f2d3a5dfb72 in fuse_kern_chan_receive () from /glibc/lib/libfuse.so.2 #2 0x00007f2d3a5e0b16 in fuse_ll_receive_buf () from /glibc/lib/libfuse.so.2 #3 0x00007f2d3a5dfdd1 in fuse_session_loop () from /glibc/lib/libfuse.so.2 #4 0x00007f2d3a5d8468 in fuse_loop () from /glibc/lib/libfuse.so.2 #5 0x00007f2d3aa55691 in lxcFuseRun (opaque=opaque@entry=0x7f2d3b13a420) at lxc/lxc_fuse.c:276 #6 0x00007f2d3aaebb8e in virThreadHelper (data=<optimized out>) at util/virthreadpthread.c:161 #7 0x00007f2d39bc4f22 in start_thread (arg=0x7f2d37fbc700) at pthread_create.c:309 #8 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 and the arg of start_thread is the struct pthread of fuse thread. you can see the setxid_futex of fuse pthread has been set to 0. (gdb) p *(struct pthread*)0x7f2d37fbc700 $1 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard = 9991483700321457629, vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = { 0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, { i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600, 0x8aa8e17d00c415dd, 0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list = 0x7f2d37fbc9e0, futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x7f2d37fbbe30, cancelhandling = 2, flags = 1, specific_1stblock = {{seq = 0, data = 0x0}, {seq = 0, data = 0x0}, {seq = 0, data = 0x0}, {seq = 1, data = 0x7f2d30021960}, {seq = 0, data = 0x0} <repeats 28 times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used = true, report_events = false, user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = 0, cpuclock_offset = 1398764389412, joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc = { exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = { retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = { nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""} For the cloned process 7927 gdb -p 7927 (gdb) bt #0 setxid_mark_thread (cmdp=0x7f2d3b2ef900, t=0x7f2d37fbc700) at allocatestack.c:994 #1 __nptl_setxid (cmdp=0x7f2d3b2ef900) at allocatestack.c:1086 #2 0x00007f2d392c1da1 in __setregid (rgid=rgid@entry=0, egid=egid@entry=0) at ../sysdeps/unix/sysv/linux/setregid.c:26 #3 0x00007f2d3aaf33f0 in virSetUIDGID (uid=uid@entry=0, gid=gid@entry=0, groups=groups@entry=0x0, ngroups=ngroups@entry=0) at util/virutil.c:1055 #4 0x00007f2d3aa51b3c in lxcContainerSetID (def=0x7f2d3b141190) at lxc/lxc_container.c:427 #5 lxcContainerChild (data=0x7fff40c4d960) at lxc/lxc_container.c:1829 #6 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 the setxid_futex of fuse pthread(0x7f2d37fbc700) is still -2. (gdb) p *t $2 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard = 9991483700321457629, vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = { 0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, { i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600, 0x8aa8e17d00c415dd, 0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list = 0x7f2d37fbc9e0, futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x0, cancelhandling = 0, flags = 1, specific_1stblock = {{seq = 0, data = 0x0} <repeats 32 times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used = false, report_events = false, user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = -2, cpuclock_offset = 0, joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc = {exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = {retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = { s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = { s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = {nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""}