Thanks. I know from
that there is a deadlock problem in this fix.
I would appreciate a notice when a solution is found.
Thanks a lot.
--
Shi Jin, PhD
--- On Thu, 11/26/09, Daniel P. Berrange <berrange(a)redhat.com> wrote:
From: Daniel P. Berrange <berrange(a)redhat.com>
Subject: [libvirt] [PATCH 03/14] Fix crash when deleting monitor while a command is in
progress
To: libvir-list(a)redhat.com
Date: Thursday, November 26, 2009, 11:27 AM
If QEMU shuts down while we're in the
middle of processing a
monitor command, the monitor will be freed, and upon
cleaning
up we attempt to do qemuMonitorUnlock(priv->mon)
when priv->mon
is NULL.
To address this we introduce proper reference counting
into
the qemuMonitorPtr object, and hold an extra reference
whenever
executing a command.
* src/qemu/qemu_driver.c: Hold a reference on the monitor
while
executing commands, and only NULL-ify the
priv->mon field when
the last reference is released
* src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add
reference
counting to handle safe deletion of monitor objects
---
src/qemu/qemu_driver.c | 13
++++++--
src/qemu/qemu_monitor.c | 81
+++++++++++++++++++++++++++++------------------
src/qemu/qemu_monitor.h | 5 ++-
3 files changed, 64 insertions(+), 35 deletions(-)
diff --git a/src/qemu/qemu_driver.c
b/src/qemu/qemu_driver.c
index c9b5ac2..3befe3d 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -271,6 +271,7 @@ static void
qemuDomainObjEnterMonitor(virDomainObjPtr obj)
qemuDomainObjPrivatePtr priv =
obj->privateData;
qemuMonitorLock(priv->mon);
+ qemuMonitorRef(priv->mon);
virDomainObjUnlock(obj);
}
@@ -281,10 +282,15 @@ static void
qemuDomainObjEnterMonitor(virDomainObjPtr obj)
*/
static void qemuDomainObjExitMonitor(virDomainObjPtr obj)
{
+ int refs;
qemuDomainObjPrivatePtr priv =
obj->privateData;
+ refs = qemuMonitorUnref(priv->mon);
qemuMonitorUnlock(priv->mon);
virDomainObjLock(obj);
+
+ if (refs == 0)
+ priv->mon = NULL;
}
@@ -301,6 +307,7 @@ static void
qemuDomainObjEnterMonitorWithDriver(struct qemud_driver
*driver, vir
qemuDomainObjPrivatePtr priv =
obj->privateData;
qemuMonitorLock(priv->mon);
+ qemuMonitorRef(priv->mon);
virDomainObjUnlock(obj);
qemuDriverUnlock(driver);
}
@@ -315,6 +322,7 @@ static void
qemuDomainObjExitMonitorWithDriver(struct qemud_driver
*driver, virD
{
qemuDomainObjPrivatePtr priv =
obj->privateData;
+ qemuMonitorUnref(priv->mon);
qemuMonitorUnlock(priv->mon);
qemuDriverLock(driver);
virDomainObjLock(obj);
@@ -2399,10 +2407,9 @@ static void
qemudShutdownVMDaemon(virConnectPtr conn,
_("Failed
to send SIGTERM to %s (%d)"),
vm->def->name, vm->pid);
- if (priv->mon) {
-
qemuMonitorClose(priv->mon);
+ if (priv->mon &&
+ qemuMonitorClose(priv->mon)
== 0)
priv->mon =
NULL;
- }
if (vm->monitor_chr) {
if
(vm->monitor_chr->type == VIR_DOMAIN_CHR_TYPE_UNIX)
diff --git a/src/qemu/qemu_monitor.c
b/src/qemu/qemu_monitor.c
index f0ef81b..3829e7a 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -42,6 +42,8 @@ struct _qemuMonitor {
virMutex lock;
virCond notify;
+ int refs;
+
virDomainObjPtr dom;
int fd;
@@ -67,12 +69,14 @@ struct _qemuMonitor {
* the next monitor msg */
int lastErrno;
- /* If the monitor callback is currently
active */
+ /* If the monitor EOF callback is currently
active (stops more commands being run) */
unsigned eofcb: 1;
- /* If the monitor callback should free the
closed monitor */
+ /* If the monitor is in process of shutting
down */
unsigned closed: 1;
+
};
+
void qemuMonitorLock(qemuMonitorPtr mon)
{
virMutexLock(&mon->lock);
@@ -84,21 +88,33 @@ void qemuMonitorUnlock(qemuMonitorPtr
mon)
}
-static void qemuMonitorFree(qemuMonitorPtr mon, int
lockDomain)
+static void qemuMonitorFree(qemuMonitorPtr mon)
{
- VIR_DEBUG("mon=%p, lockDomain=%d", mon,
lockDomain);
- if (mon->vm) {
- if (lockDomain)
-
virDomainObjLock(mon->vm);
- if
(!virDomainObjUnref(mon->vm) && lockDomain)
-
virDomainObjUnlock(mon->vm);
- }
+ VIR_DEBUG("mon=%p", mon);
+ if (mon->vm)
+
virDomainObjUnref(mon->vm);
if
(virCondDestroy(&mon->notify) < 0)
{}
virMutexDestroy(&mon->lock);
VIR_FREE(mon);
}
+int qemuMonitorRef(qemuMonitorPtr mon)
+{
+ mon->refs++;
+ return mon->refs;
+}
+
+int qemuMonitorUnref(qemuMonitorPtr mon)
+{
+ mon->refs--;
+
+ if (mon->refs == 0)
+ qemuMonitorFree(mon);
+
+ return mon->refs;
+}
+
static int
qemuMonitorOpenUnix(const char *monitor)
@@ -346,8 +362,10 @@ static void
qemuMonitorIO(int watch, int fd, int events, void *opaque)
{
qemuMonitorPtr mon = opaque;
int quit = 0, failed = 0;
+ virDomainObjPtr vm;
qemuMonitorLock(mon);
+ qemuMonitorRef(mon);
VIR_DEBUG("Monitor %p I/O on watch
%d fd %d events %d", mon, watch, fd, events);
if (mon->fd != fd ||
mon->watch != watch) {
@@ -409,22 +427,20 @@ qemuMonitorIO(int watch, int fd, int
events, void *opaque) {
if (failed || quit) {
/* Make sure anyone
waiting wakes up now */
virCondSignal(&mon->notify);
- mon->eofcb = 1;
qemuMonitorUnlock(mon);
VIR_DEBUG("Triggering EOF callback error?
%d", failed);
mon->eofCB(mon,
mon->vm, failed);
qemuMonitorLock(mon);
- if (mon->closed) {
-
qemuMonitorUnlock(mon);
-
VIR_DEBUG("Delayed free of monitor %p", mon);
-
qemuMonitorFree(mon, 1);
- } else {
-
qemuMonitorUnlock(mon);
- }
- } else {
- qemuMonitorUnlock(mon);
}
+
+ /* Safe 'vm' to an intermediate ptr, since
'mon' may be
+ * freed after the Unref call */
+ vm = mon->vm;
+ virDomainObjLock(vm);
+ if (qemuMonitorUnref(mon) > 0)
+ qemuMonitorUnlock(mon);
+ virDomainObjUnlock(vm);
}
@@ -453,6 +469,7 @@ qemuMonitorOpen(virDomainObjPtr vm,
return NULL;
}
mon->fd = -1;
+ mon->refs = 1;
mon->vm = vm;
mon->eofCB = eofCB;
qemuMonitorLock(mon);
@@ -512,10 +529,14 @@ cleanup:
}
-void qemuMonitorClose(qemuMonitorPtr mon)
+int qemuMonitorClose(qemuMonitorPtr mon)
{
+ int refs;
+
if (!mon)
- return;
+ return 0;
+
+ VIR_DEBUG("mon=%p", mon);
qemuMonitorLock(mon);
if (!mon->closed) {
@@ -523,18 +544,17 @@ void qemuMonitorClose(qemuMonitorPtr
mon)
virEventRemoveHandle(mon->watch);
if (mon->fd !=
-1)
close(mon->fd);
- /* NB: don't reset fd /
watch fields, since active
- * callback may
still want them */
+ /* NB: ordinarily one might
immediately set mon->watch to -1
+ * and mon->fd to
-1, but there may be a callback active
+ * that is still
relying on these fields being valid. So
+ * we merely close
them, but not clear their values and
+ * use this explicit
'closed' flag to track this state */
mon->closed = 1;
}
- if (mon->eofcb) {
- VIR_DEBUG("Mark monitor to be
deleted %p", mon);
+ if ((refs = qemuMonitorUnref(mon)) > 0)
qemuMonitorUnlock(mon);
- } else {
- VIR_DEBUG("Delete monitor now
%p", mon);
- qemuMonitorFree(mon, 0);
- }
+ return refs;
}
@@ -552,7 +572,6 @@ int qemuMonitorSend(qemuMonitorPtr
mon,
if (mon->eofcb) {
msg->lastErrno =
EIO;
- qemuMonitorUnlock(mon);
return -1;
}
diff --git a/src/qemu/qemu_monitor.h
b/src/qemu/qemu_monitor.h
index 71688cb..27b43b7 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -78,11 +78,14 @@ typedef int
(*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon,
qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
qemuMonitorEOFNotify eofCB);
-void qemuMonitorClose(qemuMonitorPtr mon);
+int qemuMonitorClose(qemuMonitorPtr mon);
void qemuMonitorLock(qemuMonitorPtr mon);
void qemuMonitorUnlock(qemuMonitorPtr mon);
+int qemuMonitorRef(qemuMonitorPtr mon);
+int qemuMonitorUnref(qemuMonitorPtr mon);
+
void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr
mon,
qemuMonitorDiskSecretLookup secretCB);
--
1.6.5.2
--
Libvir-list mailing list
Libvir-list(a)redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list