Calling qemuProcessStop without a job opens a way to race conditions
with qemuDomainObjExitMonitor called in another thread. A real world
example of such a race condition:
- migration thread (A) calls qemuMigrationWaitForSpice
- another thread (B) starts processing qemuDomainAbortJob API
- thread B signals thread A via qemuDomainObjAbortAsyncJob
- thread B enters monitor (qemuDomainObjEnterMonitor)
- thread B calls qemuMonitorSend
- thread A awakens and calls qemuProcessStop
- thread A calls qemuMonitorClose and sets priv->mon to NULL
- thread B calls qemuDomainObjExitMonitor with priv->mon == NULL
=> monitor stays ref'ed and locked
Depending on how lucky we are, the race may result in a memory leak or
it can even deadlock libvirtd's event loop if it tries to lock the
monitor to process an event received before qemuMonitorClose was called.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_driver.c | 28 +++++++++++-------
src/qemu/qemu_migration.c | 6 +++-
src/qemu/qemu_process.c | 72 +++++++++++++++++++++++++++++++----------------
src/qemu/qemu_process.h | 1 +
4 files changed, 71 insertions(+), 36 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index dc448fb..3bccb55 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2287,7 +2287,8 @@ qemuDomainDestroyFlags(virDomainPtr dom,
if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED, stopFlags);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED,
+ QEMU_ASYNC_JOB_NONE, stopFlags);
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -3297,7 +3298,8 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
goto endjob;
/* Shut it down */
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED,
+ QEMU_ASYNC_JOB_SAVE, 0);
virDomainAuditStop(vm, "saved");
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_SAVED);
@@ -3790,7 +3792,8 @@ qemuDomainCoreDumpWithFormat(virDomainPtr dom,
endjob:
if ((ret == 0) && (flags & VIR_DUMP_CRASH)) {
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED,
+ QEMU_ASYNC_JOB_DUMP, 0);
virDomainAuditStop(vm, "crashed");
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
@@ -4070,7 +4073,8 @@ processGuestPanicEvent(virQEMUDriverPtr driver,
/* fall through */
case VIR_DOMAIN_LIFECYCLE_CRASH_DESTROY:
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_CRASHED,
+ QEMU_ASYNC_JOB_DUMP, 0);
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_CRASHED);
@@ -4599,7 +4603,7 @@ processMonitorEOFEvent(virQEMUDriverPtr driver,
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
eventReason);
- qemuProcessStop(driver, vm, stopReason, stopFlags);
+ qemuProcessStop(driver, vm, stopReason, QEMU_ASYNC_JOB_NONE, stopFlags);
virDomainAuditStop(vm, auditReason);
qemuDomainEventQueue(driver, event);
@@ -6609,7 +6613,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
}
if (virCommandWait(cmd, NULL) < 0) {
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
restored = false;
}
VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
@@ -13626,7 +13630,8 @@ qemuDomainSnapshotCreateActiveInternal(virConnectPtr conn,
if (flags & VIR_DOMAIN_SNAPSHOT_CREATE_HALT) {
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT);
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+ QEMU_ASYNC_JOB_SNAPSHOT, 0);
virDomainAuditStop(vm, "from-snapshot");
resume = false;
}
@@ -14468,7 +14473,8 @@ qemuDomainSnapshotCreateActiveExternal(virConnectPtr conn,
if (flags & VIR_DOMAIN_SNAPSHOT_CREATE_HALT) {
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT);
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+ QEMU_ASYNC_JOB_SNAPSHOT, 0);
virDomainAuditStop(vm, "from-snapshot");
resume = false;
thaw = 0;
@@ -15328,7 +15334,8 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
}
virResetError(err);
qemuProcessStop(driver, vm,
- VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+ VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+ QEMU_ASYNC_JOB_START, 0);
virDomainAuditStop(vm, "from-snapshot");
detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT;
event = virDomainEventLifecycleNewFromObj(vm,
@@ -15448,7 +15455,8 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
if (virDomainObjIsActive(vm)) {
/* Transitions 4, 7 */
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT, 0);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT,
+ QEMU_ASYNC_JOB_START, 0);
virDomainAuditStop(vm, "from-snapshot");
detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT;
event = virDomainEventLifecycleNewFromObj(vm,
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 72d7144..21804f6 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -3594,7 +3594,8 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
if (!relabel)
stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
virDomainAuditStart(vm, "migrated", false);
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
+ QEMU_ASYNC_JOB_MIGRATION_IN, stopFlags);
}
qemuMigrationJobFinish(driver, vm);
@@ -3903,6 +3904,7 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
qemuMigrationWaitForSpice(vm);
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_MIGRATED,
+ QEMU_ASYNC_JOB_MIGRATION_OUT,
VIR_QEMU_PROCESS_STOP_MIGRATED);
virDomainAuditStop(vm, "migrated");
@@ -5414,6 +5416,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
*/
if (!v3proto) {
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_MIGRATED,
+ QEMU_ASYNC_JOB_MIGRATION_OUT,
VIR_QEMU_PROCESS_STOP_MIGRATED);
virDomainAuditStop(vm, "migrated");
event = virDomainEventLifecycleNewFromObj(vm,
@@ -5896,6 +5899,7 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
!(flags & VIR_MIGRATE_OFFLINE) &&
virDomainObjIsActive(vm)) {
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
+ QEMU_ASYNC_JOB_MIGRATION_IN,
VIR_QEMU_PROCESS_STOP_MIGRATED);
virDomainAuditStop(vm, "failed");
event = virDomainEventLifecycleNewFromObj(vm,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 19ecf59..9d6afc4 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3526,7 +3526,11 @@ qemuProcessReconnect(void *opaque)
* really is and FAILED means "failed to start" */
state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
}
- qemuProcessStop(driver, obj, state, stopFlags);
+ /* If BeginJob failed, we jumped here without a job, let's hope another
+ * thread didn't have a chance to start playing with the domain yet
+ * (it's all we can do anyway).
+ */
+ qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
}
goto cleanup;
}
@@ -3564,8 +3568,13 @@ qemuProcessReconnectHelper(virDomainObjPtr obj,
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Could not create thread. QEMU initialization "
"might be incomplete"));
- /* We can't spawn a thread and thus connect to monitor. Kill qemu. */
- qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED, 0);
+ /* We can't spawn a thread and thus connect to monitor. Kill qemu.
+ * It's safe to call qemuProcessStop without a job here since there
+ * is no thread that could be doing anything else with the same domain
+ * object.
+ */
+ qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
+ QEMU_ASYNC_JOB_NONE, 0);
qemuDomainRemoveInactive(src->driver, obj);
virDomainObjEndAPI(&obj);
@@ -4308,7 +4317,7 @@ qemuProcessStartValidate(virDomainDefPtr def,
int
qemuProcessInit(virQEMUDriverPtr driver,
virDomainObjPtr vm,
- qemuDomainAsyncJob asyncJob ATTRIBUTE_UNUSED,
+ qemuDomainAsyncJob asyncJob,
bool migration,
bool snap)
{
@@ -4381,7 +4390,7 @@ qemuProcessInit(virQEMUDriverPtr driver,
stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
if (migration)
stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
goto cleanup;
}
@@ -5298,7 +5307,7 @@ qemuProcessStart(virConnectPtr conn,
stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
if (priv->mon)
qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, stopFlags);
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
goto cleanup;
}
@@ -5375,6 +5384,7 @@ qemuProcessBeginStopJob(virQEMUDriverPtr driver,
void qemuProcessStop(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason,
+ qemuDomainAsyncJob asyncJob,
unsigned int flags)
{
int ret;
@@ -5389,27 +5399,34 @@ void qemuProcessStop(virQEMUDriverPtr driver,
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
qemuDomainLogContextPtr logCtxt = NULL;
- VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu flags=%x",
+ VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu, "
+ "reason=%s, asyncJob=%s, flags=%x",
vm, vm->def->name, vm->def->id,
- (unsigned long long)vm->pid, flags);
-
- if (!virDomainObjIsActive(vm)) {
- VIR_DEBUG("VM '%s' not active", vm->def->name);
- virObjectUnref(cfg);
- return;
- }
+ (unsigned long long)vm->pid,
+ virDomainShutoffReasonTypeToString(reason),
+ qemuDomainAsyncJobTypeToString(asyncJob),
+ flags);
/* This method is routinely used in clean up paths. Disable error
* reporting so we don't squash a legit error. */
orig_err = virSaveLastError();
- /*
- * We may unlock the vm in qemuProcessKill(), and another thread
- * can lock the vm, and then call qemuProcessStop(). So we should
- * set vm->def->id to -1 here to avoid qemuProcessStop() to be called twice.
- */
+ if (asyncJob != QEMU_ASYNC_JOB_NONE) {
+ if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
+ goto cleanup;
+ } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
+ priv->job.asyncOwner == virThreadSelfID() &&
+ priv->job.active != QEMU_JOB_ASYNC_NESTED) {
+ VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
+ qemuDomainAsyncJobTypeToString(asyncJob));
+ }
+
+ if (!virDomainObjIsActive(vm)) {
+ VIR_DEBUG("VM '%s' not active", vm->def->name);
+ goto endjob;
+ }
+
vm->def->id = -1;
-
if (virAtomicIntDecAndTest(&driver->nactive) &&
driver->inhibitCallback)
driver->inhibitCallback(false, driver->inhibitOpaque);
@@ -5664,6 +5681,11 @@ void qemuProcessStop(virQEMUDriverPtr driver,
vm->newDef = NULL;
}
+ endjob:
+ if (asyncJob != QEMU_ASYNC_JOB_NONE)
+ qemuDomainObjEndJob(driver, vm);
+
+ cleanup:
if (orig_err) {
virSetError(orig_err);
virFreeError(orig_err);
@@ -5946,13 +5968,13 @@ qemuProcessAutoDestroy(virDomainObjPtr dom,
qemuDomainObjDiscardAsyncJob(driver, dom);
}
- if (qemuDomainObjBeginJob(driver, dom,
- QEMU_JOB_DESTROY) < 0)
- goto cleanup;
-
VIR_DEBUG("Killing domain");
- qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED, stopFlags);
+ if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
+ goto cleanup;
+
+ qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
+ QEMU_ASYNC_JOB_NONE, stopFlags);
virDomainAuditStop(dom, "destroyed");
event = virDomainEventLifecycleNewFromObj(dom,
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index 69be99e..eb37863 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -121,6 +121,7 @@ int qemuProcessBeginStopJob(virQEMUDriverPtr driver,
void qemuProcessStop(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virDomainShutoffReason reason,
+ qemuDomainAsyncJob asyncJob,
unsigned int flags);
int qemuProcessAttach(virConnectPtr conn,
--
2.7.1