On 08/28/14 13:27, Jincheng Miao wrote:
Currently it lacks synchronization to modify domain's snapshot
object list,
that race condition causes unsafe access to some freed snapshot objects.
Therefore, this patch wraps all access of snapshot object list in vm job lock.
Only the qemuDomainSnapshotCreateXML is not synchronized, it is related to
QEMU_ASYNC_JOB_SNAPSHOT async job for --disk-only snapshot. I am not sure
if it's ok to remove it, so need your ideas for qemuDomainSnapshotCreateXML.
Signed-off-by: Jincheng Miao <jmiao(a)redhat.com>
---
src/qemu/qemu_driver.c | 137 +++++++++++++++++++++++++++++++++++++++---------
1 files changed, 112 insertions(+), 25 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 73959da..7329aa9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -13609,6 +13609,7 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char
**names,
{
virDomainObjPtr vm = NULL;
int n = -1;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13619,8 +13620,12 @@ static int qemuDomainSnapshotListNames(virDomainPtr domain, char
**names,
if (virDomainSnapshotListNamesEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
This one isn't necessary. The domain obj is locked the whole time. Also
returning a list at a certain point in time is okay.
+
n = virDomainSnapshotObjListGetNames(vm->snapshots, NULL, names, nameslen,
flags);
+ ignore_value(qemuDomainObjEndJob(driver, vm));
cleanup:
if (vm)
@@ -13633,6 +13638,7 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
{
virDomainObjPtr vm = NULL;
int n = -1;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13643,8 +13649,13 @@ static int qemuDomainSnapshotNum(virDomainPtr domain,
if (virDomainSnapshotNumEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
+
Again, no need to enter the job here.
n = virDomainSnapshotObjListNum(vm->snapshots, NULL, flags);
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13657,6 +13668,7 @@ qemuDomainListAllSnapshots(virDomainPtr domain,
virDomainSnapshotPtr **snaps,
{
virDomainObjPtr vm = NULL;
int n = -1;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_ROOTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13667,8 +13679,13 @@ qemuDomainListAllSnapshots(virDomainPtr domain,
virDomainSnapshotPtr **snaps,
if (virDomainListAllSnapshotsEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
+
Same here, listing function doesn't need it.
n = virDomainListSnapshots(vm->snapshots, NULL, domain,
snaps, flags);
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13684,6 +13701,7 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr
snapshot,
virDomainObjPtr vm = NULL;
virDomainSnapshotObjPtr snap = NULL;
int n = -1;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13694,12 +13712,18 @@ qemuDomainSnapshotListChildrenNames(virDomainSnapshotPtr
snapshot,
if (virDomainSnapshotListChildrenNamesEnsureACL(snapshot->domain->conn,
vm->def) < 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
Also here having a job shouldn't be necessary. The snapshot list should
be consistent at all times.
n = virDomainSnapshotObjListGetNames(vm->snapshots, snap,
names, nameslen,
flags);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13713,6 +13737,7 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
virDomainObjPtr vm = NULL;
virDomainSnapshotObjPtr snap = NULL;
int n = -1;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13723,11 +13748,17 @@ qemuDomainSnapshotNumChildren(virDomainSnapshotPtr snapshot,
if (virDomainSnapshotNumChildrenEnsureACL(snapshot->domain->conn, vm->def)
< 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
Same here.
n = virDomainSnapshotObjListNum(vm->snapshots, snap, flags);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13742,6 +13773,7 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr snapshot,
virDomainObjPtr vm = NULL;
virDomainSnapshotObjPtr snap = NULL;
int n = -1;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_SNAPSHOT_LIST_DESCENDANTS |
VIR_DOMAIN_SNAPSHOT_FILTERS_ALL, -1);
@@ -13752,12 +13784,18 @@ qemuDomainSnapshotListAllChildren(virDomainSnapshotPtr
snapshot,
if (virDomainSnapshotListAllChildrenEnsureACL(snapshot->domain->conn,
vm->def) < 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
no locking necessary.
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
n = virDomainListSnapshots(vm->snapshots, snap, snapshot->domain, snaps,
flags);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13771,6 +13809,7 @@ static virDomainSnapshotPtr
qemuDomainSnapshotLookupByName(virDomainPtr domain,
virDomainObjPtr vm;
virDomainSnapshotObjPtr snap = NULL;
virDomainSnapshotPtr snapshot = NULL;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(0, NULL);
@@ -13780,11 +13819,17 @@ static virDomainSnapshotPtr
qemuDomainSnapshotLookupByName(virDomainPtr domain,
if (virDomainSnapshotLookupByNameEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromName(vm, name)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
+ if (!(snap = qemuSnapObjFromName(vm, name)))
+ goto endjob;
no locking necessary.
+
snapshot = virGetDomainSnapshot(domain, snap->def->name);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13796,6 +13841,7 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
{
virDomainObjPtr vm;
int ret = -1;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(0, -1);
@@ -13805,8 +13851,13 @@ static int qemuDomainHasCurrentSnapshot(virDomainPtr domain,
if (virDomainHasCurrentSnapshotEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
+
Same here
ret = (vm->current_snapshot != NULL);
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13820,6 +13871,7 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
virDomainObjPtr vm;
virDomainSnapshotObjPtr snap = NULL;
virDomainSnapshotPtr parent = NULL;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(0, NULL);
@@ -13829,18 +13881,24 @@ qemuDomainSnapshotGetParent(virDomainSnapshotPtr snapshot,
if (virDomainSnapshotGetParentEnsureACL(snapshot->domain->conn, vm->def)
< 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
Same here
if (!snap->def->parent) {
virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT,
_("snapshot '%s' does not have a parent"),
snap->def->name);
- goto cleanup;
+ goto endjob;
}
parent = virGetDomainSnapshot(snapshot->domain, snap->def->parent);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13852,6 +13910,7 @@ static virDomainSnapshotPtr
qemuDomainSnapshotCurrent(virDomainPtr domain,
{
virDomainObjPtr vm;
virDomainSnapshotPtr snapshot = NULL;
+ virQEMUDriverPtr driver = domain->conn->privateData;
virCheckFlags(0, NULL);
@@ -13861,14 +13920,20 @@ static virDomainSnapshotPtr
qemuDomainSnapshotCurrent(virDomainPtr domain,
if (virDomainSnapshotCurrentEnsureACL(domain->conn, vm->def) < 0)
goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
+
Same here.
if (!vm->current_snapshot) {
virReportError(VIR_ERR_NO_DOMAIN_SNAPSHOT, "%s",
_("the domain does not have a current snapshot"));
- goto cleanup;
+ goto endjob;
}
snapshot = virGetDomainSnapshot(domain, vm->current_snapshot->def->name);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13882,6 +13947,7 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr
snapshot,
char *xml = NULL;
virDomainSnapshotObjPtr snap = NULL;
char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(VIR_DOMAIN_XML_SECURE, NULL);
@@ -13891,13 +13957,19 @@ static char *qemuDomainSnapshotGetXMLDesc(virDomainSnapshotPtr
snapshot,
if (virDomainSnapshotGetXMLDescEnsureACL(snapshot->domain->conn, vm->def)
< 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
As long as the internal strucutres are consistent, no locking is required.
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
virUUIDFormat(snapshot->domain->uuid, uuidstr);
xml = virDomainSnapshotDefFormat(uuidstr, snap->def, flags, 0);
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13911,6 +13983,7 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
virDomainObjPtr vm = NULL;
int ret = -1;
virDomainSnapshotObjPtr snap = NULL;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(0, -1);
@@ -13920,12 +13993,18 @@ qemuDomainSnapshotIsCurrent(virDomainSnapshotPtr snapshot,
if (virDomainSnapshotIsCurrentEnsureACL(snapshot->domain->conn, vm->def)
< 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
Same here
ret = (vm->current_snapshot &&
STREQ(snapshot->name, vm->current_snapshot->def->name));
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -13940,6 +14019,7 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
virDomainObjPtr vm = NULL;
int ret = -1;
virDomainSnapshotObjPtr snap = NULL;
+ virQEMUDriverPtr driver = snapshot->domain->conn->privateData;
virCheckFlags(0, -1);
@@ -13949,14 +14029,20 @@ qemuDomainSnapshotHasMetadata(virDomainSnapshotPtr snapshot,
if (virDomainSnapshotHasMetadataEnsureACL(snapshot->domain->conn, vm->def)
< 0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
goto cleanup;
Same here.
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
/* XXX Someday, we should recognize internal snapshots in qcow2
* images that are not tied to a libvirt snapshot; if we ever do
* that, then we would have a reason to return 0 here. */
ret = 1;
+ endjob:
+ ignore_value(qemuDomainObjEndJob(driver, vm));
+
cleanup:
if (vm)
virObjectUnlock(vm);
@@ -14027,9 +14113,12 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
goto cleanup;
}
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
goto cleanup;
The problem lies here. This would modify the state without entering the
job. This one is necessary.
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
if (!vm->persistent &&
snap->def->state != VIR_DOMAIN_RUNNING &&
snap->def->state != VIR_DOMAIN_PAUSED &&
@@ -14038,13 +14127,13 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("transient domain needs to request run or pause "
"to revert to inactive snapshot"));
- goto cleanup;
+ goto endjob;
}
if (virDomainSnapshotIsExternal(snap)) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("revert to external snapshot not supported yet"));
- goto cleanup;
+ goto endjob;
}
if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) {
@@ -14052,7 +14141,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY,
_("snapshot '%s' lacks domain '%s'
rollback info"),
snap->def->name, vm->def->name);
- goto cleanup;
+ goto endjob;
}
if (virDomainObjIsActive(vm) &&
!(snap->def->state == VIR_DOMAIN_RUNNING
@@ -14061,7 +14150,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED))) {
virReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, "%s",
_("must respawn qemu to start inactive
snapshot"));
- goto cleanup;
+ goto endjob;
}
}
@@ -14070,7 +14159,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
vm->current_snapshot->def->current = false;
if (qemuDomainSnapshotWriteMetadata(vm, vm->current_snapshot,
cfg->snapshotDir) < 0)
- goto cleanup;
+ goto endjob;
vm->current_snapshot = NULL;
/* XXX Should we restore vm->current_snapshot after this point
* in the failure cases where we know there was no change? */
@@ -14085,12 +14174,9 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr
snapshot,
if (snap->def->dom) {
config = virDomainDefCopy(snap->def->dom, caps, driver->xmlopt, true);
if (!config)
- goto cleanup;
+ goto endjob;
}
- if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
- goto cleanup;
-
switch ((virDomainState) snap->def->state) {
case VIR_DOMAIN_RUNNING:
case VIR_DOMAIN_PAUSED:
@@ -14400,9 +14486,13 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr
snapshot,
if (virDomainSnapshotDeleteEnsureACL(snapshot->domain->conn, vm->def) <
0)
goto cleanup;
- if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
goto cleanup;
Here too. This would change the domain state without interlocking and
enter the monitor meanwhile. Here it's fine.
+ if (!(snap = qemuSnapObjFromSnapshot(vm, snapshot)))
+ goto endjob;
+
if (!metadata_only) {
if (!(flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY) &&
virDomainSnapshotIsExternal(snap))
@@ -14415,13 +14505,10 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr
snapshot,
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("deletion of %d external disk snapshots not "
"supported yet"), external);
- goto cleanup;
+ goto endjob;
}
}
- if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
- goto cleanup;
-
if (flags & (VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN |
VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY)) {
rem.driver = driver;
This patch introduces too much locking in unnecessary places. I'll post
a V2 trimmed of the parts that don't need locking.
Peter