On 11/08/2013 01:30 PM, Daniel P. Berrange wrote:
On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
> I met a problem that container blocked by seteuid/setegid
> which is call in lxcContainerSetID on UP system and libvirt
> compiled with --with-fuse=yes.
>
> I looked into the glibc's codes, and found setxid in glibc
> calls futex() to wait for other threads to change their
> setxid_futex to 0(see setxid_mark_thread in glibc).
>
> since the process created by clone system call will not
> share the memory with the other threads and the context
> of memory doesn't changed until we call execl.(COW)
>
> So if the process which created by clone is called before
> fuse thread being stated, the new setxid_futex of fuse
> thread will not be saw in this process, it will be blocked
> forever.
>
> Maybe this problem should be fixed in glibc, but I send
> this patch as a quick fix.
Can you show a stack trace of the threads/processes deadlocking
Sure
the libvirt_lxc tasks
root 7922 0.0 0.1 118976 3704 ? Ssl 20:55 0:00
/usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20
--background --veth vnet1
root 7927 0.0 0.1 53440 3072 ? S 20:55 0:00
/usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20
--background --veth vnet1
the pid of fuse thread is 7925
[root@localhost ~]# ls /proc/7922/task/
7922 7925
gdb -p 7925
(gdb) bt
#0 0x00007f2d39bcb83d in read () at ../sysdeps/unix/syscall-template.S:81
#1 0x00007f2d3a5dfb72 in fuse_kern_chan_receive () from /glibc/lib/libfuse.so.2
#2 0x00007f2d3a5e0b16 in fuse_ll_receive_buf () from /glibc/lib/libfuse.so.2
#3 0x00007f2d3a5dfdd1 in fuse_session_loop () from /glibc/lib/libfuse.so.2
#4 0x00007f2d3a5d8468 in fuse_loop () from /glibc/lib/libfuse.so.2
#5 0x00007f2d3aa55691 in lxcFuseRun (opaque=opaque@entry=0x7f2d3b13a420) at
lxc/lxc_fuse.c:276
#6 0x00007f2d3aaebb8e in virThreadHelper (data=<optimized out>) at
util/virthreadpthread.c:161
#7 0x00007f2d39bc4f22 in start_thread (arg=0x7f2d37fbc700) at pthread_create.c:309
#8 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
and the arg of start_thread is the struct pthread of fuse thread.
you can see the setxid_futex of fuse pthread has been set to 0.
(gdb) p *(struct pthread*)0x7f2d37fbc700
$1 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700,
multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392,
pointer_guard =
9991483700321457629,
vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0,
0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0,
0}}, {i = {0, 0, 0,
0}}, {i = {
0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}},
{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0, 0}}, {i = {0,
0, 0,
0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0,
0}}}, {{i = {0, 0, 0,
0}}, {
i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0,
0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i
= {0, 0, 0, 0}}, {i =
{0, 0, 0,
0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0,
0x4c8f28122d8dd600,
0x8aa8e17d00c415dd,
0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>,
prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev =
0x7f2d37fbc9e0, robust_head = {list =
0x7f2d37fbc9e0,
futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf =
0x7f2d37fbbe30, cancelhandling = 2, flags = 1, specific_1stblock = {{seq = 0, data = 0x0},
{seq = 0, data = 0x0}, {seq
= 0,
data = 0x0}, {seq = 1, data = 0x7f2d30021960}, {seq = 0, data = 0x0} <repeats 28
times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used =
true, report_events = false,
user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0,
*setxid_futex* = 0, cpuclock_offset = 1398764389412, joinid = 0x7f2d37fbc700, result =
0x0, schedparam =
{__sched_priority = 0},
schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg =
0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS,
eventdata = 0x0}, nextevent = 0x0,
exc = {
exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0},
stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096,
reported_guardsize = 4096, tpp = 0x0, res = {
retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0,
sin_port = 0, sin_addr = {s_addr = 0}, sin_zero =
"\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0,
sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"},
{sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero =
"\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0,
0x0,
0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>,
pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr =
{s_addr = 0}, mask = 0}, {addr = {s_addr = 0},
mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr
= {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask =
0}, {addr = {s_addr
= 0},
mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}},
qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad =
'\000' <repeats 51 times>, _ext = {
nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0,
nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""}
For the cloned process 7927
gdb -p 7927
(gdb) bt
#0 setxid_mark_thread (cmdp=0x7f2d3b2ef900, t=0x7f2d37fbc700) at allocatestack.c:994
#1 __nptl_setxid (cmdp=0x7f2d3b2ef900) at allocatestack.c:1086
#2 0x00007f2d392c1da1 in __setregid (rgid=rgid@entry=0, egid=egid@entry=0) at
../sysdeps/unix/sysv/linux/setregid.c:26
#3 0x00007f2d3aaf33f0 in virSetUIDGID (uid=uid@entry=0, gid=gid@entry=0,
groups=groups@entry=0x0, ngroups=ngroups@entry=0) at util/virutil.c:1055
#4 0x00007f2d3aa51b3c in lxcContainerSetID (def=0x7f2d3b141190) at
lxc/lxc_container.c:427
#5 lxcContainerChild (data=0x7fff40c4d960) at lxc/lxc_container.c:1829
#6 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
the setxid_futex of fuse pthread(0x7f2d37fbc700) is still -2.
(gdb) p *t
$2 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700,
multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392,
pointer_guard =
9991483700321457629,
vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0,
0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0,
0}}, {i = {0, 0, 0,
0}}, {i = {
0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}},
{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0, 0}}, {i = {0,
0, 0,
0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0,
0}}}, {{i = {0, 0, 0,
0}}, {
i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0,
0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i
= {0, 0, 0, 0}}, {i =
{0, 0, 0,
0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0,
0x4c8f28122d8dd600,
0x8aa8e17d00c415dd,
0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>,
prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev =
0x7f2d37fbc9e0, robust_head = {list =
0x7f2d37fbc9e0,
futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x0,
cancelhandling = 0, flags = 1, specific_1stblock = {{seq = 0, data = 0x0} <repeats 32
times>}, specific =
{0x7f2d37fbca10,
0x0 <repeats 31 times>}, specific_used = false, report_events = false,
user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0,
*setxid_futex* = -2, cpuclock_offset = 0,
joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy
= 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0,
eventbuf = {eventmask =
{event_bits = {0,
0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc =
{exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock =
0x7f2d377bc000,
stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res =
{retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0,
sin_port = 0, sin_addr = {
s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family =
0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero =
"\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0,
sin_addr = {
s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0,
dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255
times>, pfcode = 0, ndots = 0, nsort = 0,
ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr =
{s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0},
{addr = {s_addr = 0},
mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr
= {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask =
0}}, qhook = 0x0,
rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000'
<repeats 51 times>, _ext = {nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0},
nscount6 = 0, nsinit = 0, nsaddrs = {0x0,
0x0,
0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""}