This change gives me a segfault at startup when running libvirtd from
my working directory. Reverting these two patches solves the issue.
Thread 17 "lt-libvirtd" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffc0dff700 (LWP 4338)]
virDomainObjIsActive (dom=0x7fff9c1a6160) at
../../src/conf/domain_conf.h:2800
2800 return dom->def->id != -1;
(gdb) bt
#0 virDomainObjIsActive (dom=0x7fff9c1b2ec0) at
../../src/conf/domain_conf.h:2800
#1 0x00007fffdb706eb1 in qemuMonitorOpen (vm=0x7fff9c1b2ec0,
config=0x7fffd8dfe520, retry=true, timeout=30, cb=0x7fffdb7fdd80
<callbacks>, opaque=0x0) at ../../src/qemu/qemu_monitor.c:848
#2 0x00007fffdb6f0812 in qemuProcessQMPConnectMonitor
(proc=0x7fff9c19f7e0) at ../../src/qemu/qemu_process.c:8662
#3 0x00007fffdb6f091e in qemuProcessQMPStart (proc=0x7fff9c19f7e0) at
../../src/qemu/qemu_process.c:8707
#4 0x00007fffdb66bff7 in virQEMUCapsInitQMPSingle
(qemuCaps=0x7fff9c19f260, libDir=0x7fff9c130b40
"/var/lib/libvirt/qemu", runUid=107, runGid=107, onlyTCG=false) at
../../src/qemu/qemu_capabilities.c:4663
#5 0x00007fffdb66c096 in virQEMUCapsInitQMP (qemuCaps=0x7fff9c19f260,
libDir=0x7fff9c130b40 "/var/lib/libvirt/qemu", runUid=107, runGid=107)
at ../../src/qemu/qemu_capabilities.c:4686
#6 0x00007fffdb66c285 in virQEMUCapsNewForBinaryInternal
(hostArch=VIR_ARCH_X86_64, binary=0x7fff9c1ac160 "/usr/bin/qemu-system-
i386", libDir=0x7fff9c130b40 "/var/lib/libvirt/qemu", runUid=107,
runGid=107,
microcodeVersion=180, kernelVersion=0x7fff9c14aae0 "5.2.7-
200.fc30.x86_64 #1 SMP Thu Aug 8 05:35:29 UTC 2019") at
../../src/qemu/qemu_capabilities.c:4739
#7 0x00007fffdb66c3df in virQEMUCapsNewData (binary=0x7fff9c1ac160
"/usr/bin/qemu-system-i386", privData=0x7fff9c14af90) at
../../src/qemu/qemu_capabilities.c:4772
#8 0x00007ffff7c0a803 in virFileCacheNewData (cache=0x7fff9c14ade0,
name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at
../../src/util/virfilecache.c:208
#9 0x00007ffff7c0aa9e in virFileCacheValidate (cache=0x7fff9c14ade0,
name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386", data=0x7fffd8dfe820)
at ../../src/util/virfilecache.c:279
#10 0x00007ffff7c0aba3 in virFileCacheLookup (cache=0x7fff9c14ade0,
name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at
../../src/util/virfilecache.c:312
#11 0x00007fffdb66c79c in virQEMUCapsCacheLookup (cache=0x7fff9c14ade0,
binary=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at
../../src/qemu/qemu_capabilities.c:4910
#12 0x00007fffdb663e08 in virQEMUCapsInitGuest (caps=0x7fff9c195bd0,
cache=0x7fff9c14ade0, hostarch=VIR_ARCH_X86_64,
guestarch=VIR_ARCH_I686) at ../../src/qemu/qemu_capabilities.c:802
#13 0x00007fffdb664384 in virQEMUCapsInit (cache=0x7fff9c14ade0) at
../../src/qemu/qemu_capabilities.c:958
#14 0x00007fffdb6d8ad1 in virQEMUDriverCreateCapabilities
(driver=0x7fff9c117590) at ../../src/qemu/qemu_conf.c:1254
#15 0x00007fffdb72f347 in qemuStateInitialize (privileged=true,
callback=0x5555555781b4 <daemonInhibitCallback>, opaque=0x555555617dd0)
at ../../src/qemu/qemu_driver.c:937
#16 0x00007ffff7e1b844 in virStateInitialize (privileged=true,
mandatory=false, callback=0x5555555781b4 <daemonInhibitCallback>,
opaque=0x555555617dd0) at ../../src/libvirt.c:666
#17 0x0000555555578490 in daemonRunStateInit (opaque=0x555555617dd0) at
../../src/remote/remote_daemon.c:846
#18 0x00007ffff7bf8c1b in virThreadHelper (data=0x5555556327d0) at
../../src/util/virthread.c:196
#19 0x00007ffff71f84c0 in start_thread () from /lib64/libpthread.so.0
#20 0x00007ffff7126553 in clone () from /lib64/libc.so.6
On Tue, 2019-10-08 at 17:15 -0300, Daniel Henrique Barboza wrote:
On 10/8/19 6:15 AM, Michal Privoznik wrote:
> When connecting to qemu's monitor the @vm object is unlocked.
> This is justified - connecting may take a long time and we don't
> want to wait with the domain object locked. However, just before
> the domain object is locked again, the monitor's FD is registered
> in the event loop. Therefore, there is a small window where the
> event loop has a chance to call a handler for an event that
> occurred on the monitor FD but vm is not initalized properly just
> yet (i.e. priv->mon is not set). For instance, if there's an
> incoming migration, qemu creates its socket but then fails to
> initialize (for various reasons, I'm reproducing this by using
> hugepages but leaving the HP pool empty) then the following may
> happen:
>
> 1) qemuConnectMonitor() unlocks @vm
>
> 2) qemuMonitorOpen() connects to the monitor socket and by
> calling qemuMonitorOpenInternal() which subsequently calls
> qemuMonitorRegister() the event handler is installed
>
> 3) qemu fails to initialize and exit()-s, which closes the
> monitor
>
> 4) The even loop sees EOF on the monitor and the control gets to
> qemuProcessEventHandler() which locks @vm and calls
> processMonitorEOFEvent() which then calls
> qemuMonitorLastError(priv->mon). But priv->mon is not set just
> yet.
>
> 5) qemuMonitorLastError() dereferences NULL pointer
>
> The solution is to unlock the domain object for a shorter time
> and most importantly, register event handler with domain object
> locked so that any possible event processing is done only after
> @vm's private data was properly initialized.
>
> This issue is also mentioned in v4.2.0-99-ga5a777a8ba.
>
> Since we are unlocking @vm and locking it back, another thread
> might have destroyed the domain meanwhile. Therefore we have to
> check if domain is still activem, and we have to do it at the
s/activem/active
> same place where domain lock is acquired back, i.e. in
> qemuMonitorOpen(). This creates a small problem for our test
> suite which calls qemuMonitorOpen() directly and passes @vm which
> has no definition. This makes virDomainObjIsActive() call crash.
> Fortunately, allocating empty domain definition is sufficient.
>
> Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
> ---
LGTM. Hopefully this will be enough to prevent this vm lock race
with the monitor at VM startup.
Reviewed-by: Daniel Henrique Barboza <danielhb413(a)gmail.com>
> This is an alternative approach to:
>
>
https://www.redhat.com/archives/libvir-list/2019-September/msg00749.html
>
> src/qemu/qemu_monitor.c | 33 +++++++++++++++++++++++++----
> ----
> src/qemu/qemu_process.c | 12 ------------
> tests/qemumonitortestutils.c | 2 ++
> 3 files changed, 27 insertions(+), 20 deletions(-)
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 58de26a276..a53d3b1d60 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -810,35 +810,52 @@ qemuMonitorOpen(virDomainObjPtr vm,
> qemuMonitorCallbacksPtr cb,
> void *opaque)
> {
> - int fd;
> + int fd = -1;
> bool hasSendFD = false;
> - qemuMonitorPtr ret;
> + qemuMonitorPtr ret = NULL;
>
> timeout += QEMU_DEFAULT_MONITOR_WAIT;
>
> + /* Hold an extra reference because we can't allow 'vm' to be
> + * deleted until the monitor gets its own reference. */
> + virObjectRef(vm);
> +
> switch (config->type) {
> case VIR_DOMAIN_CHR_TYPE_UNIX:
> hasSendFD = true;
> - if ((fd = qemuMonitorOpenUnix(config->data.nix.path,
> - vm->pid, retry, timeout)) <
> 0)
> - return NULL;
> + virObjectUnlock(vm);
> + fd = qemuMonitorOpenUnix(config->data.nix.path,
> + vm->pid, retry, timeout);
> + virObjectLock(vm);
> break;
>
> case VIR_DOMAIN_CHR_TYPE_PTY:
> - if ((fd = qemuMonitorOpenPty(config->data.file.path)) < 0)
> - return NULL;
> + virObjectUnlock(vm);
> + fd = qemuMonitorOpenPty(config->data.file.path);
> + virObjectLock(vm);
> break;
>
> default:
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("unable to handle monitor type: %s"),
> virDomainChrTypeToString(config->type));
> - return NULL;
> + break;
> + }
> +
> + if (fd < 0)
> + goto cleanup;
> +
> + if (!virDomainObjIsActive(vm)) {
> + virReportError(VIR_ERR_OPERATION_FAILED, "%s",
> + _("domain is not running"));
> + goto cleanup;
> }
>
> ret = qemuMonitorOpenInternal(vm, fd, hasSendFD, cb, opaque);
> + cleanup:
> if (!ret)
> VIR_FORCE_CLOSE(fd);
> + virObjectUnref(vm);
> return ret;
> }
>
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index a50cd54393..b303e5ba05 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -1955,13 +1955,8 @@ qemuConnectMonitor(virQEMUDriverPtr driver,
> virDomainObjPtr vm, int asyncJob,
> * 1GiB of guest RAM. */
> timeout = vm->def->mem.total_memory / (1024 * 1024);
>
> - /* Hold an extra reference because we can't allow 'vm' to be
> - * deleted until the monitor gets its own reference. */
> - virObjectRef(vm);
> -
> ignore_value(virTimeMillisNow(&priv->monStart));
> monConfig = virObjectRef(priv->monConfig);
> - virObjectUnlock(vm);
>
> mon = qemuMonitorOpen(vm,
> monConfig,
> @@ -1978,15 +1973,8 @@ qemuConnectMonitor(virQEMUDriverPtr driver,
> virDomainObjPtr vm, int asyncJob,
> qemuProcessMonitorLogFree);
> }
>
> - virObjectLock(vm);
> virObjectUnref(monConfig);
> - virObjectUnref(vm);
> priv->monStart = 0;
> -
> - if (!virDomainObjIsActive(vm)) {
> - qemuMonitorClose(mon);
> - mon = NULL;
> - }
> priv->mon = mon;
>
> if (qemuSecurityClearSocketLabel(driver->securityManager, vm-
> >def) < 0) {
> diff --git a/tests/qemumonitortestutils.c
> b/tests/qemumonitortestutils.c
> index e9dff123f8..c7580c5f28 100644
> --- a/tests/qemumonitortestutils.c
> +++ b/tests/qemumonitortestutils.c
> @@ -1085,6 +1085,8 @@
> qemuMonitorCommonTestNew(virDomainXMLOptionPtr xmlopt,
> test->vm = virDomainObjNew(xmlopt);
> if (!test->vm)
> goto error;
> + if (!(test->vm->def = virDomainDefNew()))
> + goto error;
> }
>
> if (virNetSocketNewListenUNIX(path, 0700, geteuid(),
> getegid(),
--
libvir-list mailing list
libvir-list(a)redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list