Hello everybody!
We have a cluster of servers managed by VMmanager 5 KVM (by ispsystem).
A typical node:
# cat /etc/redhat-release
CentOS Linux release 7.3.1611 (Core)
# uname -r
3.10.0-693.11.6.el7.x86_64
# rpm -qa |grep libvirt
libvirt-daemon-driver-qemu-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-disk-3.7.0-1.el7.centos.x86_64
libvirt-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-core-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-nodedev-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-lxc-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-iscsi-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-gluster-3.7.0-1.el7.centos.x86_64
libvirt-daemon-kvm-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-network-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-interface-3.7.0-1.el7.centos.x86_64
libvirt-daemon-config-network-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-rbd-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-3.7.0-1.el7.centos.x86_64
libvirt-libs-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-nwfilter-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-secret-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-mpath-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-scsi-3.7.0-1.el7.centos.x86_64
libvirt-client-3.7.0-1.el7.centos.x86_64
libvirt-daemon-3.7.0-1.el7.centos.x86_64
libvirt-daemon-config-nwfilter-3.7.0-1.el7.centos.x86_64
libvirt-daemon-driver-storage-logical-3.7.0-1.el7.centos.x86_64
# rpm -qa |grep qemu
qemu-kvm-common-rhev-2.6.0-27.1.el7.centos.maros.x86_64
ipxe-roms-qemu-20160127-5.git6366fa7a.el7.noarch
qemu-img-rhev-2.6.0-27.1.el7.centos.maros.x86_64
qemu-kvm-rhev-2.6.0-27.1.el7.centos.maros.x86_64
# rpm -qa |grep ebtables
ebtables-2.0.10-15.el7.centos.marosnet.x86_64
ebtables build with patch
https://marc.info/?l=netfilter-devel&m=150728694430435 (described at
https://bugzilla.redhat.com/show_bug.cgi?id=1495893)
Sometimes libvirtd just hangs and stops answering for virsh requests
(like `virsh list --all`).
At those moments:
# strace -p 5786
read(53, "\0\0\0\34", 4) = 4
read(53, "keep\0\0\0\1\0\0\0\2\0\0\0\2\0\0\0\0\0\0\0\0", 24) = 24
poll([{fd=5, events=POLLIN}, {fd=7, events=POLLIN}, {fd=12,
events=POLLIN}, {fd=13, events=POLLIN}, {fd=14, events=POLLIN}, {fd=15,
events=POLLIN}, {fd=19, events=POLLIN}, {fd=23,
events=POLLIN|POLLERR|POLLHUP}, {fd=21, events=POLLIN|POLLERR|POLLHUP},
{fd=27, events=POLLIN|POLLERR|POLLHUP}, {fd=25,
events=POLLIN|POLLERR|POLLHUP}, {fd=22, events=POLLIN|POLLERR|POLLHUP},
{fd=24, events=POLLIN|POLLERR|POLLHUP}, {fd=26,
events=POLLIN|POLLERR|POLLHUP}, {fd=29, events=POLLIN|POLLERR|POLLHUP},
{fd=30, events=POLLIN|POLLERR|POLLHUP}, {fd=31,
events=POLLIN|POLLERR|POLLHUP}, {fd=33, events=POLLIN|POLLERR|POLLHUP},
{fd=32, events=POLLIN|POLLERR|POLLHUP}, {fd=36,
events=POLLIN|POLLERR|POLLHUP}, {fd=35, events=POLLIN|POLLERR|POLLHUP},
{fd=39, events=POLLIN|POLLERR|POLLHUP}, {fd=40,
events=POLLIN|POLLERR|POLLHUP}, {fd=41, events=POLLIN|POLLERR|POLLHUP},
{fd=44, events=POLLIN|POLLERR|POLLHUP}, {fd=42,
events=POLLIN|POLLERR|POLLHUP}, {fd=43, events=POLLIN|POLLERR|POLLHUP},
{fd=48, events=POLLIN|POLLERR|POLLHUP}, {fd=49,
events=POLLIN|POLLERR|POLLHUP}, {fd=59, events=POLLIN|POLLERR|POLLHUP},
{fd=46, events=POLLIN|POLLERR|POLLHUP}, {fd=50,
events=POLLIN|POLLERR|POLLHUP}, ...], 43, 5000
# gdb -p 5786
(gdb) thread apply all bt
Thread 17 (Thread 0x7f411a9d7700 (LWP 5788)):
#0 pthread_cond_wait@(a)GLIBC_2.3.2 () at
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4129c2a2e6 in virCondWait (c=c@entry=0x7f412b18ebb8,
m=m@entry=0x7f412b18eb90) at util/virthread.c:154
#2 0x00007f4129c2ada3 in virThreadPoolWorker
(opaque=opaque@entry=0x7f412b183ab0) at util/virthreadpool.c:124
#3 0x00007f4129c2a078 in virThreadHelper (data=<optimized out>) at
util/virthread.c:206
#4 0x00007f4127033dc5 in start_thread (arg=0x7f411a9d7700) at
pthread_create.c:308
#5 0x00007f4126d6273d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:113
Thread 16 (Thread 0x7f411a1d6700 (LWP 5789)):
#0 pthread_cond_wait@(a)GLIBC_2.3.2 () at
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4129c2a2e6 in virCondWait (c=c@entry=0x7f412b18ebb8,
m=m@entry=0x7f412b18eb90) at util/virthread.c:154
#2 0x00007f4129c2ada3 in virThreadPoolWorker
(opaque=opaque@entry=0x7f412b183a00) at util/virthreadpool.c:124
#3 0x00007f4129c2a078 in virThreadHelper (data=<optimized out>) at
util/virthread.c:206
#4 0x00007f4127033dc5 in start_thread (arg=0x7f411a1d6700) at
pthread_create.c:308
#5 0x00007f4126d6273d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:113
Thread 15 (Thread 0x7f41199d5700 (LWP 5790)):
#0 pthread_cond_wait@(a)GLIBC_2.3.2 () at
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4129c2a2e6 in virCondWait (c=c@entry=0x7f412b18ebb8,
m=m@entry=0x7f412b18eb90) at util/virthread.c:154
#2 0x00007f4129c2ada3 in virThreadPoolWorker
(opaque=opaque@entry=0x7f412b183950) at util/virthreadpool.c:124
#3 0x00007f4129c2a078 in virThreadHelper (data=<optimized out>) at
util/virthread.c:206
#4 0x00007f4127033dc5 in start_thread (arg=0x7f41199d5700) at
pthread_create.c:308
#5 0x00007f4126d6273d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:113
Thread 14 (Thread 0x7f41191d4700 (LWP 5791)):
#0 pthread_cond_wait@(a)GLIBC_2.3.2 () at
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4129c2a2e6 in virCondWait (c=c@entry=0x7f412b18ebb8,
m=m@entry=0x7f412b18eb90) at util/virthread.c:154
#2 0x00007f4129c2ada3 in virThreadPoolWorker
(opaque=opaque@entry=0x7f412b1838a0) at util/virthreadpool.c:124
#3 0x00007f4129c2a078 in virThreadHelper (data=<optimized out>) at
util/virthread.c:206
#4 0x00007f4127033dc5 in start_thread (arg=0x7f41191d4700) at
pthread_create.c:308
#5 0x00007f4126d6273d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:113
Thread 13 (Thread 0x7f41189d3700 (LWP 5792)):
---Type <return> to continue, or q <return> to quit---
log_level = 3 at /etc/libvirt/libvirtd.conf doesn't help to detect the
problem. Actually, libvirtd continues acting, but is not responding.
It's like waiting for something... may be an answer. No zombieing, no
cpu loading.
This fixes the issue:
rm -f /run/ebtables.lock ; killall -9 virsh; systemctl restart
systemd-{journald,udevd,logind,machined} ; systemctl restart libvirtd
The same situation appears with libvirt-3.2.0-14.el7_4.7.x86_64.
Could anybody help to resolve this situation?