[libvirt] [PATCH 0/2] Fix reference counting bugs on qemu monitor

The previous change commit d84bb6d6a3bd2fdd530184cc9743249ebddbee71 Author: Jiri Denemark <jdenemar@redhat.com> Date: Wed May 12 11:50:52 2010 +0200 Fix monitor ref counting when adding event handle actually introduced a couple of bugs in freeing the QEMU monitor object, resulting in priv->mon never being set to NULL and a reference leak on virDomainObjPtr. This interacted with anothe bug in the QEMU driver to cause a hang of the libvirtd daemon

Before issuing monitor commands it is neccessary to check whether the guest is still running. Most places use virDomainIsActive() correctly, but a few relied on 'priv->mon != NULL'. In theory these should be equivalent, but the release of the last reference count on priv->mon can be delayed a small amount of time until the event handler is finally deregistered. A further ref counting bug also means that priv->mon might be never released. In such a case, code could mistakenly issue a monitor command and wait for a response that will never arrive, effectively leaving the QEMU driver waiting on virCondWait() forever.. To protect against these possibilities, make sure all code uses virDomainIsActive(), not 'priv->mon != NULL' * src/qemu/qemu_driver.c: Replace 'priv->mon != NULL' with calls to 'priv->mon != NULL'() --- src/qemu/qemu_driver.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index bc8dcfa..c837611 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5225,7 +5225,7 @@ static int qemudDomainSaveFlag(virDomainPtr dom, const char *path, endjob: if (vm) { if (ret != 0) { - if (header.was_running && priv->mon) { + if (header.was_running && virDomainObjIsActive(vm)) { qemuDomainObjEnterMonitorWithDriver(driver, vm); rc = qemuMonitorStartCPUs(priv->mon, dom->conn); qemuDomainObjExitMonitorWithDriver(driver, vm); @@ -5537,7 +5537,7 @@ endjob: /* Since the monitor is always attached to a pty for libvirt, it will support synchronous operations so we always get here after the migration is complete. */ - else if (resume && paused && priv->mon) { + else if (resume && paused && virDomainObjIsActive(vm)) { qemuDomainObjEnterMonitorWithDriver(driver, vm); if (qemuMonitorStartCPUs(priv->mon, dom->conn) < 0) { if (virGetLastError() == NULL) @@ -7651,7 +7651,7 @@ cleanup: return ret; try_remove: - if (!priv->mon) + if (!virDomainObjIsActive(vm)) goto cleanup; if (vlan < 0) { @@ -7683,7 +7683,7 @@ try_remove: goto cleanup; try_tapfd_close: - if (!priv->mon) + if (!virDomainObjIsActive(vm)) goto cleanup; if (tapfd_name) { @@ -10737,7 +10737,7 @@ static int doTunnelMigrate(virDomainPtr dom, retval = doTunnelSendAll(st, client_sock); cancel: - if (retval != 0 && priv->mon) { + if (retval != 0 && virDomainObjIsActive(vm)) { qemuDomainObjEnterMonitorWithDriver(driver, vm); qemuMonitorMigrateCancel(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); -- 1.6.6.1

On 06/10/2010 08:17 AM, Daniel P. Berrange wrote:
To protect against these possibilities, make sure all code uses virDomainIsActive(), not 'priv->mon != NULL'
* src/qemu/qemu_driver.c: Replace 'priv->mon != NULL' with calls to 'priv->mon != NULL'()
typo in the last line of that commit message ACK, with that cleaned up. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

The current code pattern requires that callers of qemuMonitorClose check for the return value == 0, and if so, set priv->mon = NULL and release the reference held on the associated virDomainObjPtr The change d84bb6d6a3bd2fdd530184cc9743249ebddbee71 violated that requirement, meaning that priv->mon never gets set to NULL, and a reference count is leaked on virDomainObjPtr. This design was a bad one, so remove the need to check the return valueof qemuMonitorClose(). Instead allow registration of a callback that's invoked just when the last reference on qemuMonitorPtr is released. Finally there was a potential reference leak in qemuConnectMonitor in the failure path. * src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h: Add a destroy callback invoked from qemuMonitorFree * src/qemu/qemu_driver.c: Use the destroy callback to release the reference on virDomainObjPtr when the monitor is freed. Fix other potential reference count leak in connecting to monitor --- src/qemu/qemu_driver.c | 50 ++++++++++++++++++++++++++++------------------ src/qemu/qemu_monitor.c | 7 +++-- src/qemu/qemu_monitor.h | 5 +++- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c837611..ac63eff 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1172,7 +1172,17 @@ no_memory: } +static void qemuHandleMonitorDestroy(qemuMonitorPtr mon, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + if (priv->mon == mon) + priv->mon = NULL; + virDomainObjUnref(vm); +} + static qemuMonitorCallbacks monitorCallbacks = { + .destroy = qemuHandleMonitorDestroy, .eofNotify = qemuHandleMonitorEOF, .diskSecretLookup = findVolumeQcowPassphrase, .domainStop = qemuHandleDomainStop, @@ -1189,10 +1199,6 @@ qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm) qemuDomainObjPrivatePtr priv = vm->privateData; int ret = -1; - /* Hold an extra reference because we can't allow 'vm' to be - * deleted while the monitor is active */ - virDomainObjRef(vm); - if ((driver->securityDriver && driver->securityDriver->domainSetSecuritySocketLabel && driver->securityDriver->domainSetSecuritySocketLabel(driver->securityDriver,vm)) < 0) { @@ -1200,13 +1206,17 @@ qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm) goto error; } - if ((priv->mon = qemuMonitorOpen(vm, - priv->monConfig, - priv->monJSON, - &monitorCallbacks)) == NULL) { - VIR_ERROR(_("Failed to connect monitor for %s"), vm->def->name); - goto error; - } + /* Hold an extra reference because we can't allow 'vm' to be + * deleted while the monitor is active */ + virDomainObjRef(vm); + + priv->mon = qemuMonitorOpen(vm, + priv->monConfig, + priv->monJSON, + &monitorCallbacks); + + if (priv->mon == NULL) + virDomainObjUnref(vm); if ((driver->securityDriver && driver->securityDriver->domainClearSecuritySocketLabel && @@ -1215,17 +1225,20 @@ qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm) goto error; } + if (priv->mon == NULL) { + VIR_INFO("Failed to connect monitor for %s", vm->def->name); + goto error; + } + + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorSetCapabilities(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); ret = 0; error: - if (ret < 0) { + if (ret < 0) qemuMonitorClose(priv->mon); - priv->mon = NULL; - virDomainObjUnref(vm); - } return ret; } @@ -3692,11 +3705,8 @@ static void qemudShutdownVMDaemon(struct qemud_driver *driver, _("Failed to send SIGTERM to %s (%d)"), vm->def->name, vm->pid); - if (priv->mon && - qemuMonitorClose(priv->mon) == 0) { - virDomainObjUnref(vm); - priv->mon = NULL; - } + if (priv->mon) + qemuMonitorClose(priv->mon); if (priv->monConfig) { if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 3e18ac8..ea0351e 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -198,6 +198,8 @@ void qemuMonitorUnlock(qemuMonitorPtr mon) static void qemuMonitorFree(qemuMonitorPtr mon) { VIR_DEBUG("mon=%p", mon); + if (mon->cb->destroy) + (mon->cb->destroy)(mon, mon->vm); if (virCondDestroy(&mon->notify) < 0) {} virMutexDestroy(&mon->lock); @@ -675,12 +677,12 @@ cleanup: } -int qemuMonitorClose(qemuMonitorPtr mon) +void qemuMonitorClose(qemuMonitorPtr mon) { int refs; if (!mon) - return 0; + return; VIR_DEBUG("mon=%p", mon); @@ -700,7 +702,6 @@ int qemuMonitorClose(qemuMonitorPtr mon) if ((refs = qemuMonitorUnref(mon)) > 0) qemuMonitorUnlock(mon); - return refs; } diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index e03b4df..1a93c40 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -63,6 +63,9 @@ struct _qemuMonitorMessage { typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks; typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr; struct _qemuMonitorCallbacks { + void (*destroy)(qemuMonitorPtr mon, + virDomainObjPtr vm); + void (*eofNotify)(qemuMonitorPtr mon, virDomainObjPtr vm, int withError); @@ -120,7 +123,7 @@ qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm, int json, qemuMonitorCallbacksPtr cb); -int qemuMonitorClose(qemuMonitorPtr mon); +void qemuMonitorClose(qemuMonitorPtr mon); int qemuMonitorSetCapabilities(qemuMonitorPtr mon); -- 1.6.6.1

On 06/10/2010 08:17 AM, Daniel P. Berrange wrote:
The current code pattern requires that callers of qemuMonitorClose check for the return value == 0, and if so, set priv->mon = NULL and release the reference held on the associated virDomainObjPtr
The change d84bb6d6a3bd2fdd530184cc9743249ebddbee71 violated that requirement, meaning that priv->mon never gets set to NULL, and a reference count is leaked on virDomainObjPtr.
This design was a bad one, so remove the need to check the return valueof qemuMonitorClose(). Instead allow registration of a callback that's invoked just when the last reference on qemuMonitorPtr is released.
Finally there was a potential reference leak in qemuConnectMonitor in the failure path.
Definitely worth cleaning up. ACK. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (2)
-
Daniel P. Berrange
-
Eric Blake