When destroying a domain qemuDomainDestroy kills its qemu process and
starts a new job, which means it unlocks the domain object and locks it
again after some time. Although the object is usually unlocked for a
pretty short time, chances are another thread processing an EOF event on
qemu monitor is able to lock the object first and does all the cleanup
by itself. This leads to wrong shutoff reason and lifecycle event detail
and virDomainDestroy API incorrectly reporting failure to destroy an
inactive domain.
Reported by Charlie Smurthwaite.
---
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_driver.c | 10 ++++++++++
src/qemu/qemu_process.c | 22 +++++++++++++++-------
3 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index e8bcab3..35f9440 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -110,6 +110,7 @@ struct _qemuDomainObjPrivate {
bool monError;
unsigned long long monStart;
bool gotShutdown;
+ bool beingDestroyed;
char *pidfile;
int nvcpupids;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 10a289e..ceb9f47 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1678,6 +1678,7 @@ qemuDomainDestroyFlags(virDomainPtr dom,
virDomainObjPtr vm;
int ret = -1;
virDomainEventPtr event = NULL;
+ qemuDomainObjPrivatePtr priv;
virCheckFlags(0, -1);
@@ -1691,6 +1692,8 @@ qemuDomainDestroyFlags(virDomainPtr dom,
goto cleanup;
}
+ priv = vm->privateData;
+
qemuDomainSetFakeReboot(driver, vm, false);
/* Although qemuProcessStop does this already, there may
@@ -1700,9 +1703,16 @@ qemuDomainDestroyFlags(virDomainPtr dom,
*/
qemuProcessKill(vm, false);
+ /* We need to prevent monitor EOF callback from doing our work (and sending
+ * misleading events) while the vm is unlocked inside BeginJob API
+ */
+ priv->beingDestroyed = true;
+
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
goto cleanup;
+ priv->beingDestroyed = false;
+
if (!virDomainObjIsActive(vm)) {
qemuReportError(VIR_ERR_OPERATION_INVALID,
"%s", _("domain is not running"));
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index d4271d0..9123f4c 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -128,14 +128,18 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
qemuDriverLock(driver);
virDomainObjLock(vm);
+ priv = vm->privateData;
+
+ if (priv->beingDestroyed) {
+ VIR_DEBUG("Domain is being destroyed, EOF is expected");
+ goto unlock;
+ }
+
if (!virDomainObjIsActive(vm)) {
VIR_DEBUG("Domain %p is not active, ignoring EOF", vm);
- virDomainObjUnlock(vm);
- qemuDriverUnlock(driver);
- return;
+ goto unlock;
}
- priv = vm->privateData;
if (priv->monJSON && !priv->gotShutdown) {
VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event;
"
"assuming the domain crashed", vm->def->name);
@@ -150,11 +154,15 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
qemuProcessStop(driver, vm, 0, stopReason);
virDomainAuditStop(vm, auditReason);
- if (!vm->persistent)
+ if (!vm->persistent) {
qemuDomainRemoveInactive(driver, vm);
- else
- virDomainObjUnlock(vm);
+ goto cleanup;
+ }
+unlock:
+ virDomainObjUnlock(vm);
+
+cleanup:
if (event)
qemuDomainEventQueue(driver, event);
qemuDriverUnlock(driver);
--
1.7.8
Show replies by date
On 12/12/2011 08:31 AM, Jiri Denemark wrote:
When destroying a domain qemuDomainDestroy kills its qemu process
and
starts a new job, which means it unlocks the domain object and locks it
again after some time. Although the object is usually unlocked for a
pretty short time, chances are another thread processing an EOF event on
qemu monitor is able to lock the object first and does all the cleanup
by itself. This leads to wrong shutoff reason and lifecycle event detail
and virDomainDestroy API incorrectly reporting failure to destroy an
inactive domain.
Reported by Charlie Smurthwaite.
---
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_driver.c | 10 ++++++++++
src/qemu/qemu_process.c | 22 +++++++++++++++-------
3 files changed, 26 insertions(+), 7 deletions(-)
ACK.
--
Eric Blake eblake(a)redhat.com +1-919-301-3266
Libvirt virtualization library
http://libvirt.org
On Mon, Dec 12, 2011 at 09:12:13 -0700, Eric Blake wrote:
On 12/12/2011 08:31 AM, Jiri Denemark wrote:
> When destroying a domain qemuDomainDestroy kills its qemu process and
> starts a new job, which means it unlocks the domain object and locks it
> again after some time. Although the object is usually unlocked for a
> pretty short time, chances are another thread processing an EOF event on
> qemu monitor is able to lock the object first and does all the cleanup
> by itself. This leads to wrong shutoff reason and lifecycle event detail
> and virDomainDestroy API incorrectly reporting failure to destroy an
> inactive domain.
>
> Reported by Charlie Smurthwaite.
> ---
> src/qemu/qemu_domain.h | 1 +
> src/qemu/qemu_driver.c | 10 ++++++++++
> src/qemu/qemu_process.c | 22 +++++++++++++++-------
> 3 files changed, 26 insertions(+), 7 deletions(-)
ACK.
Pushed, thanks.
Jirka