Imagine two threads. Thread A is executing qemuProcessStop() and
thread B is executing qemuDomainCreateXML(). To make things more
interesting, the domain XML passed to qemuDomainCreateXML matches
name + UUID of that the virDomainObj that qemuProcessStop() is
currently working with. Here's what happens.
1) Thread A locks @vm, enters qemuProcessStop().
2) Thread B parses given XML, calls virDomainObjListAdd() ->
virDomainObjListAdd() -> virDomainObjListAddLocked() ->
virDomainObjListFindByUUIDLocked(). Since there's a match on
UUID, an virDomainObj object is returned and the thread
proceeds to calling virObjectLock(). NB, it's the same object
as in thread A.
3) Thread A sets vm->def->id = -1; this means that from this
point on, virDomainObjIsActive() will return false.
4) Thread A calls qemuDomainObjStopWorker() which unlocks the
@vm.
5) Thread B acquires the @vm lock and since
virDomainObjIsActive() returns false, it proceeds to calling
virDomainObjAssignDef() where vm->def is replaced.
6) Thread B then calls qemuProcessBeginJob() which unlocks the
@vm temporarily.
7) Thread A, still in qemuDomainObjStopWorker() acquires @vm lock
and proceeds with cleanup.
8) Thread A finds different definition than the one needing
cleanup.
In my testing I've seen stale pointers, e.g.
vm->def->nets[0]->priv was NULL, which lead to a SIGSEGV as
there's 'QEMU_DOMAIN_NETWORK_PRIVATE(net)->created' line when
cleaning up nets. Your mileage may vary.
Even if we did not crash, the plain fact that vm->def is changed
in the middle of qemuProcessStop() means we might be cleaning up
something else than intended.
As a fix, I'm moving all lines that obviously touch vm->def
before the domain object is unlocked. That left
virHookCall(VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END) nearly
next to virHookCall(VIR_HOOK_QEMU_OP_RELEASE,
VIR_HOOK_SUBOP_END) which I figured is not something we want. So
I've shuffled things a bit more.
Fixes: 3865410e7f67ca4ec66e9a905e75f452762a97f0
Resolves:
https://issues.redhat.com/browse/RHEL-49607
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/qemu/qemu_process.c | 122 ++++++++++++++++++++--------------------
1 file changed, 62 insertions(+), 60 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..9ea6c678b8 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -8530,6 +8530,18 @@ void qemuProcessStop(virQEMUDriver *driver,
VIR_QEMU_PROCESS_KILL_FORCE|
VIR_QEMU_PROCESS_KILL_NOCHECK));
+ vm->pid = 0;
+
+ /* now that we know it's stopped call the hook if present */
+ if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
+ g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
+
+ /* we can't stop the operation even if the script raised an error */
+ ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
+ VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
+ NULL, xml, NULL));
+ }
+
if (priv->agent) {
g_clear_pointer(&priv->agent, qemuAgentClose);
}
@@ -8553,25 +8565,6 @@ void qemuProcessStop(virQEMUDriver *driver,
qemuDBusStop(driver, vm);
- /* Only after this point we can reset 'priv->beingDestroyed' so that
- * there's no point at which the VM could be considered as alive between
- * entering the destroy job and this point where the active "flag" is
- * cleared.
- */
- vm->def->id = -1;
- priv->beingDestroyed = false;
-
- /* Wake up anything waiting on domain condition */
- virDomainObjBroadcast(vm);
-
- /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent
- * deadlocks with the per-VM event loop thread. This MUST be done after
- * marking the VM as dead */
- qemuDomainObjStopWorker(vm);
-
- if (!!g_atomic_int_dec_and_test(&driver->nactive) &&
driver->inhibitCallback)
- driver->inhibitCallback(false, driver->inhibitOpaque);
-
/* Clear network bandwidth */
virDomainClearNetBandwidth(vm->def);
@@ -8588,18 +8581,6 @@ void qemuProcessStop(virQEMUDriver *driver,
}
}
- virPortAllocatorRelease(priv->nbdPort);
- priv->nbdPort = 0;
-
- if (priv->monConfig) {
- if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
- unlink(priv->monConfig->data.nix.path);
- g_clear_pointer(&priv->monConfig, virObjectUnref);
- }
-
- /* Remove the master key */
- qemuDomainMasterKeyRemove(priv);
-
ignore_value(virDomainChrDefForeach(vm->def,
false,
qemuProcessCleanupChardevDevice,
@@ -8609,22 +8590,6 @@ void qemuProcessStop(virQEMUDriver *driver,
/* Its namespace is also gone then. */
qemuDomainDestroyNamespace(driver, vm);
- virFileDeleteTree(priv->libDir);
- virFileDeleteTree(priv->channelTargetDir);
-
- /* Stop autodestroy in case guest is restarted */
- virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy);
-
- /* now that we know it's stopped call the hook if present */
- if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
- g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
-
- /* we can't stop the operation even if the script raised an error */
- ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
- VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
- NULL, xml, NULL));
- }
-
/* Reset Security Labels unless caller don't want us to */
if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
qemuSecurityRestoreAllLabel(driver, vm,
@@ -8672,8 +8637,6 @@ void qemuProcessStop(virQEMUDriver *driver,
virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
}
- qemuProcessRemoveDomainStatus(driver, vm);
-
/* Remove VNC and Spice ports from port reservation bitmap, but only if
they were reserved by the driver (autoport=yes)
*/
@@ -8706,20 +8669,9 @@ void qemuProcessStop(virQEMUDriver *driver,
}
}
- for (i = 0; i < vm->ndeprecations; i++)
- g_free(vm->deprecations[i]);
- g_clear_pointer(&vm->deprecations, g_free);
- vm->ndeprecations = 0;
- vm->taint = 0;
- vm->pid = 0;
- virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
for (i = 0; i < vm->def->niothreadids; i++)
vm->def->iothreadids[i]->thread_id = 0;
- /* clean up a possible backup job */
- if (priv->backup)
- qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED);
-
/* Do this explicitly after vm->pid is reset so that security drivers don't
* try to enter the domain's namespace which is non-existent by now as qemu
* is no longer running. */
@@ -8753,6 +8705,56 @@ void qemuProcessStop(virQEMUDriver *driver,
qemuSecurityReleaseLabel(driver->securityManager, vm->def);
+ /* Only after this point we can reset 'priv->beingDestroyed' so that
+ * there's no point at which the VM could be considered as alive between
+ * entering the destroy job and this point where the active "flag" is
+ * cleared.
+ */
+ vm->def->id = -1;
+ priv->beingDestroyed = false;
+
+ /* Wake up anything waiting on domain condition */
+ virDomainObjBroadcast(vm);
+
+ /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent
+ * deadlocks with the per-VM event loop thread. This MUST be done after
+ * marking the VM as dead */
+ qemuDomainObjStopWorker(vm);
+
+ if (!!g_atomic_int_dec_and_test(&driver->nactive) &&
driver->inhibitCallback)
+ driver->inhibitCallback(false, driver->inhibitOpaque);
+
+ virPortAllocatorRelease(priv->nbdPort);
+ priv->nbdPort = 0;
+
+ if (priv->monConfig) {
+ if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
+ unlink(priv->monConfig->data.nix.path);
+ g_clear_pointer(&priv->monConfig, virObjectUnref);
+ }
+
+ /* Remove the master key */
+ qemuDomainMasterKeyRemove(priv);
+
+ virFileDeleteTree(priv->libDir);
+ virFileDeleteTree(priv->channelTargetDir);
+
+ /* Stop autodestroy in case guest is restarted */
+ virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy);
+
+ qemuProcessRemoveDomainStatus(driver, vm);
+
+ for (i = 0; i < vm->ndeprecations; i++)
+ g_free(vm->deprecations[i]);
+ g_clear_pointer(&vm->deprecations, g_free);
+ vm->ndeprecations = 0;
+ vm->taint = 0;
+ virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
+
+ /* clean up a possible backup job */
+ if (priv->backup)
+ qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED);
+
/* clear all private data entries which are no longer needed */
qemuDomainObjPrivateDataClear(priv);
--
2.44.2