The destination libvirt daemon in a migration may segfault if the client
disconnects immediately after the migration has begun:
# virsh -c qemu+tls://remote/system list --all
Id Name State
----------------------------------------------------
...
# timeout --signal KILL 1 \
virsh migrate example qemu+tls://remote/system \
--verbose --compressed --live --auto-converge \
--abort-on-error --unsafe --persistent \
--undefinesource --copy-storage-all --xml example.xml
Killed
# virsh -c qemu+tls://remote/system list --all
error: failed to connect to the hypervisor
error: unable to connect to server at 'remote:16514': Connection refused
The crash is in:
1531 void
1532 qemuDomainObjEndJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
1533 {
1534 qemuDomainObjPrivatePtr priv = obj->privateData;
1535 qemuDomainJob job = priv->job.active;
1536
1537 priv->jobs_queued--;
Backtrace:
#0 at qemuDomainObjEndJob at qemu/qemu_domain.c:1537
#1 in qemuDomainRemoveInactive at qemu/qemu_domain.c:2497
#2 in qemuProcessAutoDestroy at qemu/qemu_process.c:5646
#3 in virCloseCallbacksRun at util/virclosecallbacks.c:350
#4 in qemuConnectClose at qemu/qemu_driver.c:1154
...
qemuDomainRemoveInactive calls virDomainObjListRemove, which in this
case is holding the last remaining reference to the domain.
qemuDomainRemoveInactive then calls qemuDomainObjEndJob, but the domain
object has been freed and poisoned by then.
This patch bumps the domain's refcount until qemuDomainRemoveInactive
has completed. We also ensure qemuProcessAutoDestroy does not return the
domain to virCloseCallbacksRun to be unlocked in this case. There is
similar logic in bhyveProcessAutoDestroy and lxcProcessAutoDestroy
(which call virDomainObjListRemove directly).
Signed-off-by: Michael Chapman <mike(a)very.puzzling.org>
---
src/qemu/qemu_domain.c | 5 +++++
src/qemu/qemu_process.c | 4 +++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index ac5ca74..d433cba 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -2424,11 +2424,16 @@ qemuDomainRemoveInactive(virQEMUDriverPtr driver,
VIR_WARN("unable to remove snapshot directory %s", snapDir);
VIR_FREE(snapDir);
}
+
+ virObjectRef(vm);
+
virDomainObjListRemove(driver->domains, vm);
virObjectUnref(cfg);
if (haveJob)
qemuDomainObjEndJob(driver, vm);
+
+ virObjectUnref(vm);
}
void
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 515402e..554399e 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -5689,8 +5689,10 @@ qemuProcessAutoDestroy(virDomainObjPtr dom,
qemuDomainObjEndJob(driver, dom);
- if (!dom->persistent)
+ if (!dom->persistent) {
qemuDomainRemoveInactive(driver, dom);
+ dom = NULL;
+ }
if (event)
qemuDomainEventQueue(driver, event);
--
2.1.0