qemu/THREADS.txt is explicit that the driver lock must not be
held for long lengths of time, as it blocks all attempts to
manage any other vms on the same connection. We were violating
this by sleep()ing while waiting for a qemu child process to
finish execution during actions such as destroy.
* src/qemu/qemu_process.h (qemuProcessKill): Alter signature.
* src/qemu/qemu_process.c (qemuProcessKill): Add parameter.
(qemuProcessFakeReboot, qemuProcessShutdownOrReboot)
(qemuProcessStop): Update callers.
* src/qemu/qemu_driver.c (qemuDomainDestroyFlags): Likewise.
---
Without this patch, problems in halting one domain could lock
out actions on all other domains for more than 3 seconds, which
is awfully long.
This doesn't solve all the problems - it is still possible to
have a stuck NFS server being the reason for difficulties in
stopping a domain, such as an lstat() call while attempting to
relabel file systems, and those calls are still done while
the driver lock is held; but I'll be submitting further patches
as I try and reduce the critical section sizes.
I'm not sure whether this qualifies for 0.9.7 or should wait
for post-release.
src/qemu/qemu_driver.c | 2 +-
src/qemu/qemu_process.c | 35 ++++++++++++++++++++++++++++-------
src/qemu/qemu_process.h | 3 ++-
3 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 118197a..2a152e8 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1692,7 +1692,7 @@ qemuDomainDestroyFlags(virDomainPtr dom,
* can kill the process even if a job is active. Killing
* it now means the job will be released
*/
- qemuProcessKill(vm, false);
+ qemuProcessKill(driver, vm, false);
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
goto cleanup;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 2882ef8..6867d26 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -419,7 +419,7 @@ endjob:
cleanup:
if (vm) {
if (ret == -1)
- qemuProcessKill(vm, false);
+ qemuProcessKill(driver, vm, false);
if (virDomainObjUnref(vm) > 0)
virDomainObjUnlock(vm);
}
@@ -450,12 +450,12 @@ qemuProcessShutdownOrReboot(virDomainObjPtr vm)
qemuProcessFakeReboot,
vm) < 0) {
VIR_ERROR(_("Failed to create reboot thread, killing domain"));
- qemuProcessKill(vm, true);
+ qemuProcessKill(qemu_driver, vm, true);
/* Safe to ignore value since ref count was incremented above */
ignore_value(virDomainObjUnref(vm));
}
} else {
- qemuProcessKill(vm, true);
+ qemuProcessKill(qemu_driver, vm, true);
}
}
@@ -3249,17 +3249,26 @@ cleanup:
}
-void qemuProcessKill(virDomainObjPtr vm, bool gracefully)
+/* Called while owning the vm lock, but not necessarily a job on the
+ * vm. This function returns immediately if gracefully is true,
+ * otherwise it may block while waiting for the qemu process. If
+ * driver is non-NULL, then also drop the driver lock before sleeping
+ * and re-acquire before returning. */
+void qemuProcessKill(struct qemud_driver *driver, virDomainObjPtr vm,
+ bool gracefully)
{
int i;
- VIR_DEBUG("vm=%s pid=%d gracefully=%d",
- vm->def->name, vm->pid, gracefully);
+ VIR_DEBUG("driver=%p vm=%s pid=%d gracefully=%d",
+ driver, vm->def->name, vm->pid, gracefully);
if (!virDomainObjIsActive(vm)) {
VIR_DEBUG("VM '%s' not active", vm->def->name);
return;
}
+ if (driver && !gracefully)
+ qemuDriverUnlock(driver);
+
/* This loop sends SIGTERM, then waits a few iterations
* (1.6 seconds) to see if it dies. If still alive then
* it does SIGKILL, and waits a few more iterations (1.6
@@ -3288,6 +3297,18 @@ void qemuProcessKill(virDomainObjPtr vm, bool gracefully)
usleep(200 * 1000);
}
+
+ if (driver && !gracefully) {
+ /* Must hold extra reference while dropping vm lock, since we
+ * do not necessarily have an active job to guarantee a
+ * reference. */
+ virDomainObjRef(vm);
+ virDomainObjUnlock(vm);
+ qemuDriverLock(driver);
+ virDomainObjLock(vm);
+ /* Safe to ignore value since ref count was incremented above */
+ ignore_value(virDomainObjUnref(vm));
+ }
}
@@ -3370,7 +3391,7 @@ void qemuProcessStop(struct qemud_driver *driver,
}
/* shut it off for sure */
- qemuProcessKill(vm, false);
+ qemuProcessKill(driver, vm, false);
/* Stop autodestroy in case guest is restarted */
qemuProcessAutoDestroyRemove(driver, vm);
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index ef422c4..19ed8a1 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -68,7 +68,8 @@ int qemuProcessAttach(virConnectPtr conn,
virDomainChrSourceDefPtr monConfig,
bool monJSON);
-void qemuProcessKill(virDomainObjPtr vm, bool gracefully);
+void qemuProcessKill(struct qemud_driver *driver, virDomainObjPtr vm,
+ bool gracefully);
int qemuProcessAutoDestroyInit(struct qemud_driver *driver);
void qemuProcessAutoDestroyRun(struct qemud_driver *driver,
--
1.7.4.4