On 12/05/14 15:03, Martin Kletzander wrote:
There is one problem that causes various errors in the daemon. When
domain is waiting for a job, it is unlocked while waiting on the
condition. However, if that domain is for example transient and being
removed in another API (e.g. cancelling incoming migration), it get's
unref'd. If the first call, that was waiting, fails to get the job, it
unref's the domain object, and because it was the last reference, it
causes clearing of the whole domain object. However, when finishing the
call, the domain must be unlocked, but there is no way for the API to
know whether it was cleaned or not (unless there is some ugly temporary
variable, but let's scratch that).
The root cause is that our APIs don't ref the objects they are using and
all use the implicit reference that the object has when it is in the
domain list. That reference can be removed when the API is waiting for
a job. And because each domain doesn't do its ref'ing, it results in
the ugly checking of the return value of virObjectUnref() that we have
everywhere.
This patch changes qemuDomObjFromDomain() to ref the domain (using
virDomainObjListFindByUUIDRef()) and adds qemuDomObjEndAPI() which
should be the only function in which the return value of
virObjectUnref() is checked. This makes all reference counting
deterministic and makes the code a bit clearer.
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
src/qemu/THREADS.txt | 40 ++-
src/qemu/qemu_domain.c | 29 +-
src/qemu/qemu_domain.h | 12 +-
src/qemu/qemu_driver.c | 708 ++++++++++++++++------------------------------
src/qemu/qemu_migration.c | 108 +++----
src/qemu/qemu_migration.h | 10 +-
src/qemu/qemu_process.c | 87 +++---
7 files changed, 359 insertions(+), 635 deletions(-)
Continuing where I finished yesterday:
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9152cf5..2e0b7fc 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
[...]
@@ -15128,21 +14970,18 @@ static virDomainPtr
qemuDomainQemuAttach(virConnectPtr conn,
VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
NULL)))
goto cleanup;
-
Keep the empty line for visual separation.
+ virObjectRef(vm);
def = NULL;
if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) {
qemuDomainRemoveInactive(driver, vm);
- vm = NULL;
goto cleanup;
}
if (qemuProcessAttach(conn, driver, vm, pid,
pidfile, monConfig, monJSON) < 0) {
- if (qemuDomainObjEndJob(driver, vm))
- qemuDomainRemoveInactive(driver, vm);
- vm = NULL;
- monConfig = NULL;
Ownership of monConfig is transferred to qemuProcessAttach(), thus you
need to clean the pointer to avoid double free.
+ qemuDomainObjEndJob(driver, vm);
+ qemuDomainRemoveInactive(driver, vm);
goto cleanup;
}
[...]
@@ -15513,8 +15348,7 @@ qemuDomainBlockPivot(virConnectPtr conn,
}
-/* bandwidth in MiB/s per public API. Caller must lock vm beforehand,
- * and not access it afterwards. */
+/* bandwidth in MiB/s per public API. Caller must lock vm beforehand. */
The semantics didn't change. qemuDomainBlockJobImpl() consumes vm and
calls qemuDomObjEndAPI() on it without adding any reference thus callers
still shouldn't touch VM after passing it to this function.
static int
qemuDomainBlockJobImpl(virDomainObjPtr vm,
virConnectPtr conn,
[...]
@@ -15776,7 +15606,7 @@ qemuDomainBlockJobAbort(virDomainPtr dom,
const char *path, unsigned int flags)
static int
qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
- virDomainBlockJobInfoPtr info, unsigned int flags)
+ virDomainBlockJobInfoPtr info, unsigned int flags)
Unrelated whitespace fix?
{
virQEMUDriverPtr driver = dom->conn->privateData;
qemuDomainObjPrivatePtr priv;
[...]
@@ -18733,13 +18523,15 @@ qemuConnectGetAllDomainStats(virConnectPtr
conn,
continue;
if (doms != domlist &&
- !virConnectGetAllDomainStatsCheckACL(conn, dom->def))
+ !virConnectGetAllDomainStatsCheckACL(conn, dom->def)) {
+ virObjectUnlock(dom);
Hmm, this should be a separate fix as I've done with the migration APIs.
Or perhaps is this a rebase artifact from a un-published patch?
continue;
+ }
- if (HAVE_JOB(domflags) &&
- qemuDomainObjBeginJob(driver, dom, QEMU_JOB_QUERY) < 0)
+ if (HAVE_JOB(privflags) &&
+ qemuDomainObjBeginJob(driver, dom, QEMU_JOB_QUERY) >= 0)
/* As it was never requested. Gather as much as possible anyway. */
- domflags &= ~QEMU_DOMAIN_STATS_HAVE_JOB;
+ domflags |= QEMU_DOMAIN_STATS_HAVE_JOB;
if (qemuDomainGetStats(conn, dom, stats, &tmp, domflags) < 0)
goto endjob;
[...]
@@ -18763,8 +18553,7 @@ qemuConnectGetAllDomainStats(virConnectPtr
conn,
endjob:
if (HAVE_JOB(domflags) && dom)
- if (!qemuDomainObjEndJob(driver, dom))
- dom = NULL;
+ qemuDomainObjEndJob(driver, dom);
cleanup:
if (dom)
@@ -18835,8 +18624,7 @@ qemuDomainGetFSInfo(virDomainPtr dom,
qemuDomainObjExitAgent(vm);
endjob:
- if (!qemuDomainObjEndJob(driver, vm))
- vm = NULL;
+ qemuDomainObjEndJob(driver, vm);
cleanup:
if (vm)
Here's a virObjectUnlock() that you've forgot to change to
qemuDomObjEndAPI().
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 0acbb57..e4c8cf8 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
[...]
+ qemuMigrationJobContinue(vm);
if (autoPort)
priv->migrationPort = port;
@@ -3115,16 +3099,12 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
VIR_FREE(xmlout);
VIR_FORCE_CLOSE(dataFD[0]);
VIR_FORCE_CLOSE(dataFD[1]);
(not in context)
This function creates the @vm object with virDomainObjListAdd() but you
don't ref it afterwards as you've used to do in other places that call
that func ...
- if (vm) {
- if (ret < 0) {
- virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
- priv->nbdPort = 0;
- }
- if (ret >= 0 || vm->persistent)
- virObjectUnlock(vm);
- else
- qemuDomainRemoveInactive(driver, vm);
+ if (ret < 0) {
+ virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
+ priv->nbdPort = 0;
+ qemuDomainRemoveInactive(driver, vm);
}
+ qemuDomObjEndAPI(&vm);
... and now you'd kill your last reference.
if (event)
qemuDomainEventQueue(driver, event);
qemuMigrationCookieFree(mig);
[...]
@@ -5301,21 +5268,16 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
VIR_WARN("Unable to encode migration cookie");
endjob:
- if (qemuMigrationJobFinish(driver, vm) == 0) {
- vm = NULL;
- } else if (!vm->persistent && !virDomainObjIsActive(vm)) {
+ qemuMigrationJobFinish(driver, vm);
+ if (!vm->persistent && !virDomainObjIsActive(vm))
qemuDomainRemoveInactive(driver, vm);
- vm = NULL;
- }
cleanup:
virPortAllocatorRelease(driver->migrationPorts, port);
- if (vm) {
- if (priv->mon)
- qemuMonitorSetDomainLog(priv->mon, -1);
- VIR_FREE(priv->origname);
- virObjectUnlock(vm);
- }
+ if (priv->mon)
+ qemuMonitorSetDomainLog(priv->mon, -1);
+ VIR_FREE(priv->origname);
+ qemuDomObjEndAPI(&vm);
The finish phase of the migration gathers the domain object via
virDomainObjListFindByName() and thus isn't ref'd.
You should tweak the callers to ref the object, otherwise it might
vanish with us thinking we have the object.
if (event)
qemuDomainEventQueue(driver, event);
qemuMigrationCookieFree(mig);
[...]
@@ -5594,7 +5554,7 @@ qemuMigrationJobIsActive(virDomainObjPtr vm,
return true;
}
-bool
+void
qemuMigrationJobFinish(virQEMUDriverPtr driver, virDomainObjPtr vm)
{
return qemuDomainObjEndAsyncJob(driver, vm);
returning result of a void function in a void function? :)
[...]
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a14b6f7..4776ce8 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -571,6 +571,7 @@ qemuProcessFakeReboot(void *opaque)
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
int ret = -1;
+
Spurious whitespace change.
VIR_DEBUG("vm=%p", vm);
virObjectLock(vm);
if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
[...]
@@ -935,15 +932,13 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon
ATTRIBUTE_UNUSED,
*/
virObjectRef(vm);
if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
- if (!virObjectUnref(vm))
- vm = NULL;
+ virObjectUnref(vm);
VIR_FREE(processEvent);
}
}
}
- if (vm)
- virObjectUnlock(vm);
+ virObjectUnlock(vm);
This can be considered as a separate cleanup as it doesn't change any
semantics due to the fact that we acquired a reference to pass to the
worker thread and are guaranteed that we are getting rid of the same one.
if (watchdogEvent)
qemuDomainEventQueue(driver, watchdogEvent);
if (lifecycleEvent)
@@ -1439,14 +1434,12 @@ qemuProcessHandleGuestPanic(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
*/
virObjectRef(vm);
if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
- if (!virObjectUnref(vm))
- vm = NULL;
+ virObjectUnref(vm);
VIR_FREE(processEvent);
}
cleanup:
- if (vm)
- virObjectUnlock(vm);
+ virObjectUnlock(vm);
ditto for this hunk.
return 0;
}
[...]
Overall looks good, but many of the comments warrant for another version.
Peter