On 05.06.2019 12:09, Michal Privoznik wrote:
Changing domain definition must be guarded with acquiring modify
job. The problem is if there is a thread executing say
qemuDomainSetMemoryStatsPeriod() which is an API that acquires
modify job and then possibly unlock the domain object and locks
it again. However, becasue virDomainObjAssignDef() does not take
a job (only object lock) it may have changed the domain
definition while the other thread unlocked the domain object in
order to talk on the monitor. For instance:
Thread1:
1) lookup domain object and lock it
2) acquire job
3) get pointers to live and persistent defs
4) unlock the domain object
5) talk to qemu on the monitor
Thread2 (Execute qemuDomainDefineXMLFlags):
1) lookup domain object and lock it
2) virDomainObjAssignDef() which overwrites persistent def and
frees the old one
3) unlock domain object
Thread1:
6) lock the domain object again
7) try to access the persistent def via pointer stored in 3)
Now, this will crash because the pointer from step 3) points to a
memory that was freed.
However, if Thread2 waited and acquired modify job (just like
Thread1) then these two would serialize and no harm would be
done.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/qemu/qemu_domain.c | 55 +++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_domain.h | 6 +++++
src/qemu/qemu_driver.c | 27 ++++++-------------
src/qemu/qemu_migration.c | 5 +---
4 files changed, 70 insertions(+), 23 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 4d3a8868b2..f6b677c69e 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -7055,6 +7055,61 @@ virDomainDefParserConfig virQEMUDriverDomainDefParserConfig = {
};
+/**
+ * qemuDomainObjListAdd:
+ * @driver: qemu driver
+ * @def: domain definition
+ * @oldDef: previous domain definition
+ * @live: whether @def is live definition
+ * @flags: an bitwise-OR of virDomainObjListAdd flags
+ *
+ * Add a domain onto the list of domain object and sets its
+ * definition. If @oldDef is not NULL and there was pre-existing
+ * definition it's returned in @oldDef.
+ *
+ * In addition to that, if definition of an existing domain is
+ * changed a MODIFY job is acquired prior to that.
+ *
+ * Returns: domain object pointer on success,
+ * NULL otherwise.
+ */
+virDomainObjPtr
+qemuDomainObjListAdd(virQEMUDriverPtr driver,
+ virDomainDefPtr def,
+ virDomainDefPtr *oldDef,
+ bool live,
+ unsigned int flags)
+{
+ virDomainObjPtr vm = NULL;
+
+ if (!(vm = virDomainObjListAdd(driver->domains, def, driver->xmlopt, flags)))
+ return NULL;
+
+ /* At this point, @vm is locked. If it doesn't have any
+ * definition set, then the call above just added it and
+ * there can't be anybody else using the object. It's safe to
+ * just set the definition without acquiring job. */
+ if (!vm->def) {
+ virDomainObjAssignDef(vm, def, live, oldDef);
+ VIR_RETURN_PTR(vm);
Not sure why use VIR_RETURN_PTR
+ }
+
+ /* Bad luck. Domain was pre-existing and this call is trying
+ * to update its definition. Modify job MUST be acquired. */
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) {
+ qemuDomainRemoveInactive(driver, vm);
+ virDomainObjEndAPI(&vm);
+ return NULL;
+ }
+
+ virDomainObjAssignDef(vm, def, live, oldDef);
+
+ qemuDomainObjEndJob(driver, vm);
+
+ return vm;
+}
+
+
static void
qemuDomainObjSaveJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
{
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index f92f0dbc27..f469f8eaca 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -543,6 +543,12 @@ void qemuDomainEventFlush(int timer, void *opaque);
void qemuDomainEventEmitJobCompleted(virQEMUDriverPtr driver,
virDomainObjPtr vm);
+virDomainObjPtr qemuDomainObjListAdd(virQEMUDriverPtr driver,
+ virDomainDefPtr def,
+ virDomainDefPtr *oldDef,
+ bool live,
+ unsigned int flags);
+
int qemuDomainObjBeginJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
qemuDomainJob job)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 8bbac339e0..fa93a686b7 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1701,12 +1701,9 @@ static virDomainPtr qemuDomainCreateXML(virConnectPtr conn,
if (virDomainCreateXMLEnsureACL(conn, def) < 0)
goto cleanup;
- if (!(vm = virDomainObjListAdd(driver->domains, def,
- driver->xmlopt,
- VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE)))
+ if (!(vm = qemuDomainObjListAdd(driver, def, NULL, true,
+ VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE)))
goto cleanup;
-
- virDomainObjAssignDef(vm, def, true, NULL);
def = NULL;
if (qemuProcessBeginJob(driver, vm, VIR_DOMAIN_JOB_OPERATION_START,
@@ -2405,6 +2402,7 @@ static int qemuDomainSetMemoryStatsPeriod(virDomainPtr dom, int
period,
qemuDomainObjEnterMonitor(driver, vm);
r = qemuMonitorSetMemoryStatsPeriod(priv->mon, def->memballoon, period);
+ sleep(60);
if (qemuDomainObjExitMonitor(driver, vm) < 0)
goto endjob;
if (r < 0) {
@@ -6971,12 +6969,9 @@ qemuDomainRestoreFlags(virConnectPtr conn,
def = tmp;
}
- if (!(vm = virDomainObjListAdd(driver->domains, def,
- driver->xmlopt,
- VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE)))
+ if (!(vm = qemuDomainObjListAdd(driver, def, NULL, true,
+ VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE)))
goto cleanup;
-
- virDomainObjAssignDef(vm, def, true, NULL);
def = NULL;
if (flags & VIR_DOMAIN_SAVE_RUNNING)
@@ -7647,11 +7642,8 @@ qemuDomainDefineXMLFlags(virConnectPtr conn,
if (virDomainDefineXMLFlagsEnsureACL(conn, def) < 0)
goto cleanup;
- if (!(vm = virDomainObjListAdd(driver->domains, def,
- driver->xmlopt, 0)))
+ if (!(vm = qemuDomainObjListAdd(driver, def, &oldDef, false, 0)))
goto cleanup;
-
- virDomainObjAssignDef(vm, def, false, &oldDef);
def = NULL;
Unfortunately in case of virDomainSaveConfig failure we change definitions again
without a job. This will not cause crash because of memory leak upon restoring
definition and cause only reverting changes of concurrent API call :) Other places of qemu
driver
do not have this issue because they acquire a job right after adding domain
to list.
Ohhh, I see this will be fixed in next patch....
With/without VIR_RETURN_PTR: Reviewed-by: Nikolay Shirokovskiy
<nshirokovskiy(a)virtuozzo.com>
PS
I still wonder why not using single job for all API affairs? It looks cleaner
and have less unnessesary execution paths in multithreading scenario. What's
the difference if job is async?
Nikolay