[libvirt PATCH 0/3] qemu: block: basic implementation of deletion of external snapshots

Deleting external snapshots has been unimplemented so far. This series aims to enable limited functionality in that direction, handling just the common cases for now. The intention is to subsequently build upon this to eventually cover the more complex/exotic cases as well. Please refer to the commit message of the final commit for a detailed description of features and limitations of this change. Pavel Mores (3): qemu: block: factor implementation out of qemuDomainBlockCommit() qemu: block: factor implementation out of qemuDomainBlockJobAbort() qemu: block: external snapshot-delete implementation for straightforward cases src/qemu/qemu_driver.c | 295 +++++++++++++++++++++++++++++++++-------- 1 file changed, 241 insertions(+), 54 deletions(-) -- 2.24.1

The idea is to make block commit callable from within the QEMU driver. Signed-off-by: Pavel Mores <pmores@redhat.com> --- src/qemu/qemu_driver.c | 84 +++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e9a62684f0..f94636f651 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -154,6 +154,15 @@ static int qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, static virQEMUDriverPtr qemu_driver; +static int +qemuDomainBlockCommitImpl(virDomainObjPtr vm, + virQEMUDriverPtr driver, + const char *path, + const char *base, + const char *top, + unsigned long bandwidth, + unsigned int flags); + /* Looks up the domain object from snapshot and unlocks the * driver. The returned domain object is locked and ref'd and the * caller must call virDomainObjEndAPI() on it. */ @@ -18314,18 +18323,16 @@ qemuDomainBlockPull(virDomainPtr dom, const char *path, unsigned long bandwidth, return qemuDomainBlockPullCommon(vm, path, NULL, bandwidth, flags); } - static int -qemuDomainBlockCommit(virDomainPtr dom, - const char *path, - const char *base, - const char *top, - unsigned long bandwidth, - unsigned int flags) +qemuDomainBlockCommitImpl(virDomainObjPtr vm, + virQEMUDriverPtr driver, + const char *path, + const char *base, + const char *top, + unsigned long bandwidth, + unsigned int flags) { - virQEMUDriverPtr driver = dom->conn->privateData; - qemuDomainObjPrivatePtr priv; - virDomainObjPtr vm = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; const char *device = NULL; const char *jobname = NULL; int ret = -1; @@ -18347,25 +18354,6 @@ qemuDomainBlockCommit(virDomainPtr dom, bool persistjob = false; bool blockdev = false; - virCheckFlags(VIR_DOMAIN_BLOCK_COMMIT_SHALLOW | - VIR_DOMAIN_BLOCK_COMMIT_ACTIVE | - VIR_DOMAIN_BLOCK_COMMIT_RELATIVE | - VIR_DOMAIN_BLOCK_COMMIT_DELETE | - VIR_DOMAIN_BLOCK_COMMIT_BANDWIDTH_BYTES, -1); - - if (!(vm = qemuDomainObjFromDomain(dom))) - goto cleanup; - priv = vm->privateData; - - if (virDomainBlockCommitEnsureACL(dom->conn, vm->def) < 0) - goto cleanup; - - if (qemuDomainSupportsCheckpointsBlockjobs(vm) < 0) - goto cleanup; - - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) - goto cleanup; - if (virDomainObjCheckActive(vm) < 0) goto endjob; @@ -18558,6 +18546,43 @@ qemuDomainBlockCommit(virDomainPtr dom, virErrorRestore(&orig_err); } qemuBlockJobStartupFinalize(vm, job); + + return ret; +} + + +static int +qemuDomainBlockCommit(virDomainPtr dom, + const char *path, + const char *base, + const char *top, + unsigned long bandwidth, + unsigned int flags) +{ + virQEMUDriverPtr driver = dom->conn->privateData; + virDomainObjPtr vm = NULL; + int ret = -1; + + virCheckFlags(VIR_DOMAIN_BLOCK_COMMIT_SHALLOW | + VIR_DOMAIN_BLOCK_COMMIT_ACTIVE | + VIR_DOMAIN_BLOCK_COMMIT_RELATIVE | + VIR_DOMAIN_BLOCK_COMMIT_DELETE | + VIR_DOMAIN_BLOCK_COMMIT_BANDWIDTH_BYTES, -1); + + if (!(vm = qemuDomainObjFromDomain(dom))) + goto cleanup; + + if (virDomainBlockCommitEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (qemuDomainSupportsCheckpointsBlockjobs(vm) < 0) + goto cleanup; + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + + ret = qemuDomainBlockCommitImpl(vm, driver, path, base, top, bandwidth, flags); + qemuDomainObjEndJob(driver, vm); cleanup: @@ -18565,6 +18590,7 @@ qemuDomainBlockCommit(virDomainPtr dom, return ret; } + static int qemuDomainOpenGraphics(virDomainPtr dom, unsigned int idx, -- 2.24.1

As with the previous commit, this should primarily make qemuDomainBlockJobAbort() callable from within the QEMU driver. Signed-off-by: Pavel Mores <pmores@redhat.com> --- src/qemu/qemu_driver.c | 62 ++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f94636f651..b981745ecf 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -163,6 +163,12 @@ qemuDomainBlockCommitImpl(virDomainObjPtr vm, unsigned long bandwidth, unsigned int flags); +static int +qemuDomainBlockJobAbortImpl(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *path, + unsigned int flags); + /* Looks up the domain object from snapshot and unlocks the * driver. The returned domain object is locked and ref'd and the * caller must call virDomainObjEndAPI() on it. */ @@ -17440,45 +17446,31 @@ qemuDomainBlockPullCommon(virDomainObjPtr vm, return ret; } - static int -qemuDomainBlockJobAbort(virDomainPtr dom, - const char *path, - unsigned int flags) +qemuDomainBlockJobAbortImpl(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *path, + unsigned int flags) { - virQEMUDriverPtr driver = dom->conn->privateData; virDomainDiskDefPtr disk = NULL; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); bool pivot = !!(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT); bool async = !!(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC); g_autoptr(qemuBlockJobData) job = NULL; - virDomainObjPtr vm; qemuDomainObjPrivatePtr priv = NULL; bool blockdev = false; int ret = -1; - virCheckFlags(VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC | - VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT, -1); - - if (!(vm = qemuDomainObjFromDomain(dom))) - return -1; - - if (virDomainBlockJobAbortEnsureACL(dom->conn, vm->def) < 0) - goto cleanup; - - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) - goto cleanup; - if (virDomainObjCheckActive(vm) < 0) - goto endjob; + return -1; if (!(disk = qemuDomainDiskByName(vm->def, path))) - goto endjob; + return -1; if (!(job = qemuBlockJobDiskGetJob(disk))) { virReportError(VIR_ERR_INVALID_ARG, _("disk %s does not have an active block job"), disk->dst); - goto endjob; + return -1; } priv = vm->privateData; @@ -17549,6 +17541,34 @@ qemuDomainBlockJobAbort(virDomainPtr dom, endjob: if (job && !async) qemuBlockJobSyncEnd(vm, job, QEMU_ASYNC_JOB_NONE); + + return ret; +} + + +static int +qemuDomainBlockJobAbort(virDomainPtr dom, + const char *path, + unsigned int flags) +{ + virQEMUDriverPtr driver = dom->conn->privateData; + virDomainObjPtr vm; + int ret = -1; + + virCheckFlags(VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC | + VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT, -1); + + if (!(vm = qemuDomainObjFromDomain(dom))) + return -1; + + if (virDomainBlockJobAbortEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + + ret = qemuDomainBlockJobAbortImpl(driver, vm, path, flags); + qemuDomainObjEndJob(driver, vm); cleanup: -- 2.24.1

Works by blockcommitting the snapshot to be deleted into its parent. Handles both deleting children as well and deleting children only (commits the children into the snapshot referred to by snapshot-delete argument). If the necessary block commit operation turns out to be active (the snapshot referred to by snapshot-delete argument is current, or deleting children is requested) pivot is performed as well. This implemetation is limited to the straightforward case which assumes no branching snapshot chains below the snapshot to be deleted, and the current snapshot has to be the leaf of the (linear) chain starting at the snapshot to be deleted. These requirements are checked and enforced at the top of qemuDomainSnapshotDeleteExternal(). They might even be too restrictive for the case where deletion of children is not requested as in that case, requiring that the parent of the snapshot to be deleted not be a branching point in snapshot tree should be enough. The above limitations do not appear to be too severe under the current state of matters where a major source of branching snapshot structures, snapshot-revert, is not even implemented for external snapshots yet. The only other known cause of branching in external snapshots thus seems to be if a user creates branching by manually creating snapshot images and metadata and applies them using snapshot-create --redefine. At any rate, this work should be understood as just a first step to a full support of deleting external snapshots. Signed-off-by: Pavel Mores <pmores@redhat.com> --- src/qemu/qemu_driver.c | 149 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b981745ecf..4d4f3f069f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -16707,6 +16707,149 @@ qemuDomainMomentReparentChildren(void *payload, } +/* + * We can't use virDomainMomentFindLeaf() as it takes a + * virDomainMomentObjListPtr which we don't have. It might be a good idea to + * move this function to a library of virDomainMomentObj helpers, then + * reimplement virDomainMomentFindLeaf() in terms of this function as it only + * uses its virDomainMomentObjListPtr parameter to fish a virDomainMomentObjPtr + * out of it. Then it just runs this function's algorithm on it. + */ +static virDomainMomentObjPtr +myDomainMomentFindLeaf(virDomainMomentObjPtr moment) +{ + if (moment->nchildren != 1) + return NULL; + while (moment->nchildren == 1) + moment = moment->first_child; + if (moment->nchildren == 0) + return moment; + return NULL; +} + + +static int +qemuDomainSnapshotDeleteExternal(virDomainObjPtr vm, + virQEMUDriverPtr driver, + virDomainMomentObjPtr snap, + unsigned int flags) +{ + int ret = -1; + bool isActive; + virDomainSnapshotDefPtr snapdef = virDomainSnapshotObjGetDef(snap); + virDomainMomentObjPtr leaf = snap->nchildren ? myDomainMomentFindLeaf(snap) : snap; + virDomainMomentObjPtr parent = snap->parent; + virDomainSnapshotDefPtr parentdef = virDomainSnapshotObjGetDef(parent); + size_t i; + + /* This function only works if the chain below 'snap' is linear. If + * there's no unique leaf it means the chain of 'snap's children + * branches at some point. Also, if there *is* a leaf but it's not + * the current snapshot, bail out as well. */ + if (leaf == NULL) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + "%s", _("can't delete, snapshot chain branches")); + goto cleanup; + } + if (leaf != virDomainSnapshotGetCurrent(vm->snapshots)) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + "%s", _("can't delete, leaf snapshot is not current")); + goto cleanup; + } + + if (flags & (VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN | + VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY)) { + isActive = true; + } else { + isActive = snap == virDomainSnapshotGetCurrent(vm->snapshots); + } + + for (i = 0; i < snapdef->ndisks; i++) { + virDomainSnapshotDiskDefPtr snapdisk = &(snapdef->disks[i]); + const char *diskName = snapdisk->name; + const char *basePath = NULL; + const char *topPath = snapdisk->src->path; + int blockCommitFlags = VIR_DOMAIN_BLOCK_COMMIT_DELETE; + virDomainDiskDefPtr disk; + + if (!(disk = qemuDomainDiskByName(vm->def, diskName))) + goto cleanup; + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY) { + basePath = snapdisk->src->path; + topPath = disk->src->path; + } else { + if (parent->nchildren == 1) { + if (parentdef == NULL) { + virStorageSourcePtr baseSrc = virStorageFileChainLookup(disk->src, NULL, NULL, 0, NULL); + if (!baseSrc) + goto cleanup; + basePath = baseSrc->path; + } else { + virDomainSnapshotDiskDefPtr parentdisk = &(parentdef->disks[i]); + basePath = parentdisk->src->path; + } + } else { + /* TODO 'snap's parent has multiple children, meaning it's a + * branching point in snapshot tree. This means we can't + * delete 'snap' by commiting into its parent as doing so would + * corrupt the other branches rooted in the parent. We might + * still be able to delete 'snap' though by pulling into its + * child/children. */ + + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("can't delete, parent has multiple children")); + } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path; + } + + if (isActive) + blockCommitFlags |= VIR_DOMAIN_BLOCK_COMMIT_ACTIVE; + + if (qemuDomainBlockCommitImpl(vm, driver, diskName, basePath, topPath, + 0, blockCommitFlags) < 0) + goto cleanup; + + if (isActive) { + /* wait for the blockcommit job to finish (in particular, reach the + * QEMU_BLOCKJOB_STATE_READY state)... */ + qemuBlockJobDataPtr job; + + if (!(job = qemuBlockJobDiskGetJob(disk))) { + virReportError(VIR_ERR_INVALID_ARG, + _("disk %s does not have an active block job"), disk->dst); + goto cleanup; + } + + qemuBlockJobSyncBegin(job); + qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE); + while (job->state != QEMU_BLOCKJOB_STATE_READY) { + if (virDomainObjWait(vm) < 0) { + ret = -1; + goto cleanup; + } + qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE); + } + qemuBlockJobSyncEnd(vm, job, QEMU_ASYNC_JOB_NONE); + + /* ... and pivot */ + if (qemuDomainBlockJobAbortImpl(driver, vm, diskName, + VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) < 0) + goto cleanup; + } + } + + ret = 0; + + cleanup: + return ret; +} + + static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot, unsigned int flags) @@ -16749,10 +16892,8 @@ qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot, qemuDomainSnapshotCountExternal, &external); if (external) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("deletion of %d external disk snapshots not " - "supported yet"), external); - goto endjob; + if (qemuDomainSnapshotDeleteExternal(vm, driver, snap, flags) < 0) + goto endjob; } } -- 2.24.1

On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
Works by blockcommitting the snapshot to be deleted into its parent. Handles both deleting children as well and deleting children only (commits the children into the snapshot referred to by snapshot-delete argument). If the necessary block commit operation turns out to be active (the snapshot referred to by snapshot-delete argument is current, or deleting children is requested) pivot is performed as well.
This implemetation is limited to the straightforward case which assumes no branching snapshot chains below the snapshot to be deleted, and the current snapshot has to be the leaf of the (linear) chain starting at the snapshot to be deleted. These requirements are checked and enforced at the top of qemuDomainSnapshotDeleteExternal(). They might even be too restrictive for the case where deletion of children is not requested as in that case, requiring that the parent of the snapshot to be deleted not be a branching point in snapshot tree should be enough.
The above limitations do not appear to be too severe under the current state of matters where a major source of branching snapshot structures, snapshot-revert, is not even implemented for external snapshots yet. The only other known cause of branching in external snapshots thus seems to be if a user creates branching by manually creating snapshot images and metadata and applies them using snapshot-create --redefine. At any rate, this work should be understood as just a first step to a full support of deleting external snapshots.
Signed-off-by: Pavel Mores <pmores@redhat.com> --- src/qemu/qemu_driver.c | 149 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 4 deletions(-)
Few quick comments:
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b981745ecf..4d4f3f069f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -16707,6 +16707,149 @@ qemuDomainMomentReparentChildren(void *payload, }
+/* + * We can't use virDomainMomentFindLeaf() as it takes a + * virDomainMomentObjListPtr which we don't have. It might be a good idea to + * move this function to a library of virDomainMomentObj helpers, then + * reimplement virDomainMomentFindLeaf() in terms of this function as it only + * uses its virDomainMomentObjListPtr parameter to fish a virDomainMomentObjPtr + * out of it. Then it just runs this function's algorithm on it.
Please do so ...
+ */ +static virDomainMomentObjPtr +myDomainMomentFindLeaf(virDomainMomentObjPtr moment)
... since this does not conform to the naming scheme.
+{ + if (moment->nchildren != 1) + return NULL; + while (moment->nchildren == 1) + moment = moment->first_child; + if (moment->nchildren == 0) + return moment; + return NULL; +} + + +static int +qemuDomainSnapshotDeleteExternal(virDomainObjPtr vm, + virQEMUDriverPtr driver, + virDomainMomentObjPtr snap, + unsigned int flags) +{ + int ret = -1; + bool isActive; + virDomainSnapshotDefPtr snapdef = virDomainSnapshotObjGetDef(snap); + virDomainMomentObjPtr leaf = snap->nchildren ? myDomainMomentFindLeaf(snap) : snap;
Please refrain from using ternary operators here.
+ virDomainMomentObjPtr parent = snap->parent; + virDomainSnapshotDefPtr parentdef = virDomainSnapshotObjGetDef(parent); + size_t i; + + /* This function only works if the chain below 'snap' is linear. If + * there's no unique leaf it means the chain of 'snap's children + * branches at some point. Also, if there *is* a leaf but it's not + * the current snapshot, bail out as well. */ + if (leaf == NULL) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + "%s", _("can't delete, snapshot chain branches")); + goto cleanup; + } + if (leaf != virDomainSnapshotGetCurrent(vm->snapshots)) {
Btw, vm->snapshots.base is the 'virDomainMomentObjListPtr' you were looking for above.
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + "%s", _("can't delete, leaf snapshot is not current")); + goto cleanup; + }
Check that VM is active should be added here, so that we can bail out sooner than when we try to actually commit.
+ + if (flags & (VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN | + VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY)) { + isActive = true; + } else { + isActive = snap == virDomainSnapshotGetCurrent(vm->snapshots); + } + + for (i = 0; i < snapdef->ndisks; i++) { + virDomainSnapshotDiskDefPtr snapdisk = &(snapdef->disks[i]); + const char *diskName = snapdisk->name; + const char *basePath = NULL; + const char *topPath = snapdisk->src->path; + int blockCommitFlags = VIR_DOMAIN_BLOCK_COMMIT_DELETE; + virDomainDiskDefPtr disk; + + if (!(disk = qemuDomainDiskByName(vm->def, diskName))) + goto cleanup;
This loop is too complex as it does the merging itself. If any of these fails you have a half-deleted snapshshot which will be very hard to clean up later. All preparation and validation steps must be extracted and done prior to actually doing the commiting. Specifically a case when this would break is e.g. if a disk is unplugged after a snapshot. This code will attempt looking it up but that will fail.
+ + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN_ONLY) { + basePath = snapdisk->src->path; + topPath = disk->src->path; + } else { + if (parent->nchildren == 1) { + if (parentdef == NULL) { + virStorageSourcePtr baseSrc = virStorageFileChainLookup(disk->src, NULL, NULL, 0, NULL); + if (!baseSrc) + goto cleanup; + basePath = baseSrc->path; + } else { + virDomainSnapshotDiskDefPtr parentdisk = &(parentdef->disks[i]); + basePath = parentdisk->src->path; + } + } else { + /* TODO 'snap's parent has multiple children, meaning it's a + * branching point in snapshot tree. This means we can't + * delete 'snap' by commiting into its parent as doing so would + * corrupt the other branches rooted in the parent. We might + * still be able to delete 'snap' though by pulling into its + * child/children. */ + + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("can't delete, parent has multiple children"));
Missing jump/handling of error.
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof. Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back) Doing the validation above will necessarily give you a virStorageSourcePtr for the appropriate member of the backing chain and that one should be used as argument for the commit operation.
+ } + + if (isActive) + blockCommitFlags |= VIR_DOMAIN_BLOCK_COMMIT_ACTIVE; + + if (qemuDomainBlockCommitImpl(vm, driver, diskName, basePath, topPath, + 0, blockCommitFlags) < 0) + goto cleanup;
This is a long running operation and thus should be handled via an async job. Otherwise for the full duration of multiple sequential block commit operations the VM object will be locked which will prevent callers from e.g. looking up statistics of the VM or job or potentially aborting the job. It is okay in this instance for the API to block for the duration but I'd strongly suggest you add a flag which will cause the API to terminate after kicking off the block commit operations and then use the async job infrastructure to handle the completion notification. Since adding the parameter is not strictly required for the conversion of the deletion API to async job as we can keep the API-blocking semantics this can be done separately. Also consider kicking off the commits in parallel.
+ + if (isActive) { + /* wait for the blockcommit job to finish (in particular, reach the + * QEMU_BLOCKJOB_STATE_READY state)... */ + qemuBlockJobDataPtr job; + + if (!(job = qemuBlockJobDiskGetJob(disk))) { + virReportError(VIR_ERR_INVALID_ARG, + _("disk %s does not have an active block job"), disk->dst); + goto cleanup; + } + + qemuBlockJobSyncBegin(job); + qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE); + while (job->state != QEMU_BLOCKJOB_STATE_READY) {
This will loop forever if the job e.g. fails or is aborted before reaching the 'READY' state.
+ if (virDomainObjWait(vm) < 0) { + ret = -1; + goto cleanup; + } + qemuBlockJobUpdate(vm, job, QEMU_ASYNC_JOB_NONE); + } + qemuBlockJobSyncEnd(vm, job, QEMU_ASYNC_JOB_NONE);
Reference to 'job' obtained by qemuBlockJobDiskGetJob is leaked here.
+ + /* ... and pivot */ + if (qemuDomainBlockJobAbortImpl(driver, vm, diskName, + VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) < 0) + goto cleanup; + }
For nonActive case, the code is not waiting for the completion of the commit job, thus potentially returns success even if the commit itself fails.
+ } + + ret = 0; + + cleanup:
No point to have a cleanup label if there's no code.
+ return ret; +}

On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Doing the validation above will necessarily give you a virStorageSourcePtr for the appropriate member of the backing chain and that one should be used as argument for the commit operation.
I'm afraid I'm not following this. qemuDomainBlockCommitImpl(), just like qemuDomainBlockCommit() take paths so that's what I'm passing them. What do you mean, I should use virStorageSourcePtr as argument for the commit operation? pvl

On Fri, Apr 03, 2020 at 14:34:29 +0200, Pavel Mores wrote:
On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Doing the validation above will necessarily give you a virStorageSourcePtr for the appropriate member of the backing chain and that one should be used as argument for the commit operation.
I'm afraid I'm not following this. qemuDomainBlockCommitImpl(), just like qemuDomainBlockCommit() take paths so that's what I'm passing them. What do you mean, I should use virStorageSourcePtr as argument for the commit operation?
I'm saying you should not use paths at all. Use virStorageSource directly. If you look inside qemuDomainBlockCommitImpl there are calls which take path and look up the virStorageSource. Specifically e.g. for NBD disks path may be NULL, thus must not be used if your code has to work for everything. It's okay only to use them when passed by user, but not internally.

On Fri, Apr 03, 2020 at 02:36:35PM +0200, Peter Krempa wrote:
On Fri, Apr 03, 2020 at 14:34:29 +0200, Pavel Mores wrote:
On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Doing the validation above will necessarily give you a virStorageSourcePtr for the appropriate member of the backing chain and that one should be used as argument for the commit operation.
I'm afraid I'm not following this. qemuDomainBlockCommitImpl(), just like qemuDomainBlockCommit() take paths so that's what I'm passing them. What do you mean, I should use virStorageSourcePtr as argument for the commit operation?
I'm saying you should not use paths at all. Use virStorageSource directly. If you look inside qemuDomainBlockCommitImpl there are calls which take path and look up the virStorageSource.
Specifically e.g. for NBD disks path may be NULL, thus must not be used if your code has to work for everything. It's okay only to use them when passed by user, but not internally.
Alright, so you mean a further refactor of qemuDomainBlockCommitImpl() is needed. Thanks, I'll look into it. pvl

On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Let me check with you how exactly to perform this verification. To retrieve the names of the disks included in a snapshot, I can use its virDomainSnapshotDef which contains an array of disks (virDomainSnapshotDiskDef's) whose 'name' member can be used to identify disks. To get the state of the chain at moment the snapshot was created, I can use the 'src' member and walk its 'backingStore' list to examine the whole chain. To get the currently running configuration, I assume I can use the names of the relevant disks (obtained from the snapshot as mentioned above) to get a virDomainDiskDefPtr for each of them, whose 'src' member points to a virStorageSource that represents the topmost image in the disk's chain. From there, I can walk the 'backingStore' list to get the other images in the chain all the way to the base image. The verification succeeds if the disk names and their chains in the snapshot and the running config correspond. Is the above correct? If so, I'm still not certain how to compare two virStorageSource's. How is identity of two different virStorageSource instances is established? Thanks! pvl

On Fri, Apr 03, 2020 at 05:56:59PM +0200, Pavel Mores wrote:
On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Let me check with you how exactly to perform this verification.
To retrieve the names of the disks included in a snapshot, I can use its virDomainSnapshotDef which contains an array of disks (virDomainSnapshotDiskDef's) whose 'name' member can be used to identify disks.
To get the state of the chain at moment the snapshot was created, I can use the 'src' member and walk its 'backingStore' list to examine the whole chain.
To get the currently running configuration, I assume I can use the names of the relevant disks (obtained from the snapshot as mentioned above) to get a virDomainDiskDefPtr for each of them, whose 'src' member points to a virStorageSource that represents the topmost image in the disk's chain. From there, I can walk the 'backingStore' list to get the other images in the chain all the way to the base image.
The verification succeeds if the disk names and their chains in the snapshot and the running config correspond. Is the above correct?
If so, I'm still not certain how to compare two virStorageSource's. How is identity of two different virStorageSource instances is established?
After having a closer look into this I'm unfortunately even less clear as to how to perform the checks mentioned in the review. In addition to the problem of establishing virStorageSource identity, a similar problem seems to come up with disks. They often seem to be identified and referred to by the target name, however what happens if a snapshot is taken for 'vda', then the disk's target is changed to 'vdb' and now the snapshot is to be deleted? Is there a way to find out that the disk is still there, only under a different name? And as far as comparing chains goes - I know can retrieve the chain for a running disk and I can retrieve what the chain looked like at the point a snapshot was made. However, how do I establish they are equivalent? How can I tell that snapshots in both chains compare identical in the first place? Is the snapshot name stable and persistent enough to be useful for this? Is it enough for chains to be equivalent that the chain at the point of snapshot creation be a superset of the currently running chain? Probably yes, provided snapshots can't be inserted in the middle of a chain - but could that ever happen? Thanks in advance for any help with this! pvl

On Thu, Apr 16, 2020 at 17:07:35 +0200, Pavel Mores wrote:
On Fri, Apr 03, 2020 at 05:56:59PM +0200, Pavel Mores wrote:
On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Let me check with you how exactly to perform this verification.
To retrieve the names of the disks included in a snapshot, I can use its virDomainSnapshotDef which contains an array of disks (virDomainSnapshotDiskDef's) whose 'name' member can be used to identify disks.
To get the state of the chain at moment the snapshot was created, I can use the 'src' member and walk its 'backingStore' list to examine the whole chain.
To get the currently running configuration, I assume I can use the names of the relevant disks (obtained from the snapshot as mentioned above) to get a virDomainDiskDefPtr for each of them, whose 'src' member points to a virStorageSource that represents the topmost image in the disk's chain. From there, I can walk the 'backingStore' list to get the other images in the chain all the way to the base image.
The verification succeeds if the disk names and their chains in the snapshot and the running config correspond. Is the above correct?
If so, I'm still not certain how to compare two virStorageSource's. How is identity of two different virStorageSource instances is established?
After having a closer look into this I'm unfortunately even less clear as to how to perform the checks mentioned in the review.
Well unfortunately developing this is the main part why deleting snapshots is complicated.
In addition to the problem of establishing virStorageSource identity, a similar problem seems to come up with disks. They often seem to be identified and referred to by the target name, however what happens if a snapshot is taken for 'vda', then the disk's target is changed to 'vdb' and now the snapshot is to be
In such case I'd consier it as being different. Similarly as we can't really guarantee that the image described by a virStorageSource is the same as it was when taking the snapshot. As long as the file is named the same you can consider it identical IMO.
deleted? Is there a way to find out that the disk is still there, only under a different name?
No and it doesn't seem to be worth doing that.
And as far as comparing chains goes - I know can retrieve the chain for a running disk and I can retrieve what the chain looked like at the point a snapshot was made. However, how do I establish they are equivalent? How can I
What I meant is that you need to establish that the images you are going to merge and the images you are going to merge into are the same described by the snapshots and at the same time present in the current backing chain of the disk. If they are not in the snapshot metadata you'd potentially merge something unwanted, but this is covered by the previous paragraphs. You need to also verify that they are currently opened by qemu as you can't do blockjobs on images which are not part of the chain.
tell that snapshots in both chains compare identical in the first place? Is the snapshot name stable and persistent enough to be useful for this? Is it enough for chains to be equivalent that the chain at the point of snapshot creation be a superset of the currently running chain? Probably yes, provided snapshots can't be inserted in the middle of a chain - but could that ever happen?
The main problem is that the configuration may change from the time when the snapshot was taken. Either the XML or the on disk setup. In case a user issues a blockjob manually which merges parts of the chain you won't be able to delete the snapshot with data reorganisation via the API. Similarly to when the VM config changes to point to other images and or remove or add disks. The snapshot code should be able to at least properly reject the operation if it's unclear whether we can do the right thing. Unfortunately all the above need to be considered and thought out before because there isn't really any prior art in libvirt that could be copied. There are helpers to compare two virStorageSource structs, but that's far from all of the required logic. While the initial implementation can be limited in what it can do, the complement of what it can't do must be properly rejected to prevent any ambiguity.

On Wed, Apr 22, 2020 at 03:49:57PM +0200, Peter Krempa wrote:
On Thu, Apr 16, 2020 at 17:07:35 +0200, Pavel Mores wrote:
On Fri, Apr 03, 2020 at 05:56:59PM +0200, Pavel Mores wrote:
On Tue, Mar 31, 2020 at 12:06:15PM +0200, Peter Krempa wrote:
On Tue, Mar 31, 2020 at 11:18:29 +0200, Pavel Mores wrote:
+ } + + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) + topPath = disk->src->path; + else + topPath = snapdisk->src->path;
You must not use paths for doing this lookup. Paths at best work for local files only and this would make the code not future proof.
Also you want to verify that: - images you want to merge are actually in the backing chain - the backing chain looks as snapshots describe it (e.g you unplug vda and plug a different chain back)
Let me check with you how exactly to perform this verification.
To retrieve the names of the disks included in a snapshot, I can use its virDomainSnapshotDef which contains an array of disks (virDomainSnapshotDiskDef's) whose 'name' member can be used to identify disks.
To get the state of the chain at moment the snapshot was created, I can use the 'src' member and walk its 'backingStore' list to examine the whole chain.
To get the currently running configuration, I assume I can use the names of the relevant disks (obtained from the snapshot as mentioned above) to get a virDomainDiskDefPtr for each of them, whose 'src' member points to a virStorageSource that represents the topmost image in the disk's chain. From there, I can walk the 'backingStore' list to get the other images in the chain all the way to the base image.
The verification succeeds if the disk names and their chains in the snapshot and the running config correspond. Is the above correct?
If so, I'm still not certain how to compare two virStorageSource's. How is identity of two different virStorageSource instances is established?
After having a closer look into this I'm unfortunately even less clear as to how to perform the checks mentioned in the review.
Well unfortunately developing this is the main part why deleting snapshots is complicated.
In addition to the problem of establishing virStorageSource identity, a similar problem seems to come up with disks. They often seem to be identified and referred to by the target name, however what happens if a snapshot is taken for 'vda', then the disk's target is changed to 'vdb' and now the snapshot is to be
In such case I'd consier it as being different. Similarly as we can't really guarantee that the image described by a virStorageSource is the same as it was when taking the snapshot. As long as the file is named the same you can consider it identical IMO.
deleted? Is there a way to find out that the disk is still there, only under a different name?
No and it doesn't seem to be worth doing that.
And as far as comparing chains goes - I know can retrieve the chain for a running disk and I can retrieve what the chain looked like at the point a snapshot was made. However, how do I establish they are equivalent? How can I
What I meant is that you need to establish that the images you are going to merge and the images you are going to merge into are the same described by the snapshots and at the same time present in the current backing chain of the disk. If they are not in the snapshot metadata you'd potentially merge something unwanted, but this is covered by the previous paragraphs.
You need to also verify that they are currently opened by qemu as you can't do blockjobs on images which are not part of the chain.
tell that snapshots in both chains compare identical in the first place? Is the snapshot name stable and persistent enough to be useful for this? Is it enough for chains to be equivalent that the chain at the point of snapshot creation be a superset of the currently running chain? Probably yes, provided snapshots can't be inserted in the middle of a chain - but could that ever happen?
The main problem is that the configuration may change from the time when the snapshot was taken. Either the XML or the on disk setup. In case a user issues a blockjob manually which merges parts of the chain you won't be able to delete the snapshot with data reorganisation via the API. Similarly to when the VM config changes to point to other images and or remove or add disks. The snapshot code should be able to at least properly reject the operation if it's unclear whether we can do the right thing.
Unfortunately all the above need to be considered and thought out before because there isn't really any prior art in libvirt that could be copied. There are helpers to compare two virStorageSource structs, but that's far from all of the required logic.
While the initial implementation can be limited in what it can do, the complement of what it can't do must be properly rejected to prevent any ambiguity.
Considering all of the above, how about the following algorithm for the initial version. If nobody has any comments/objections I'd like to start implementing this tomorrow. - if the snapshot to be deleted or its parent has more than one child, bail out. For the moment only linear snapshot structures are handled. - for each disk that the snapshot to be deleted touched - get its target name - get disk definition (virDomainDiskDef) for that name and running VM by calling qemuDomainDiskByName() - if that fails the disk is no longer attached to the VM (or at least not with the same target name) so bail out as the snapshot is not safe to delete - examine the 'type' of 'src' member of the disk definition - if it's not VIR_STORAGE_TYPE_FILE bail out (for now) as I don't know how to establish equivalence of virStorageSources of types other than file (I can do that for files by comparing their paths). This assumes that all virStorageSources in a chain will have the same type (correct me if I'm wrong here). If it *can* happen that e.g. a storage source of type 'file' has a backing store of type 'block' then this check is obviously not valid. - walk the backing chain of the running disk and find the storage source of the snapshot (virDomainSnapshotDiskDef::src) there, using the equivalence check mentioned above (e.g. paths for files). Bail out on failure as the disk in the currently running VM is probably a different one than the disk that was attached to the VM under the target name at the time of snapshot creation. - now an additional check can be performed to ensure the currently running disk is the same as it was when our snapshot was created. Assuming that snapshots cannot be inserted in the middle of a chain, the backing chain of the disk at the point of snapshot creation should be an ordered superset of the part of the running disk's backing chain between the base image and the our snapshot. E.g. if we create snapshots 1 2 3 4 5 6 and we want to delete 6 at some later point, the running disk's backing chain might be 2 3 4 6 8 9 10. This should be fine since 6 is still there and its backing chain is a subset of what it was when 6 was created (meaning that 1 and 5 have probably been deleted already which is fine). We don't care that 7 10 were created since 6 was and 7 was already deleted. - now we can delete the snapshot by blockcommitting it into its parent. I believe this incorporates all of the remarks above while only handling linear snapshot structures and file-based storage sources for now (but extending this with handling of storage sources types shouldn't be very hard once it's clear how to tell if they are the same). Thanks, pvl
participants (2)
-
Pavel Mores
-
Peter Krempa