On Wed, Jul 17, 2024 at 21:21:41 +0300, Nikolai Barybin via Devel wrote:
The usage of HMP commands are highly discouraged by qemu. Moreover,
current snapshot creation routine does not provide flexibility in
choosing target device for VM state snapshot.
This patch makes use of QMP commands snapshot-save/delete and by
default chooses first writable non-shared qcow2 disk (if present)
as target for VM state.
Signed-off-by: Nikolai Barybin <nikolai.barybin(a)virtuozzo.com>
---
src/qemu/qemu_snapshot.c | 207 ++++++++++++++++++++++++++++++++++++---
1 file changed, 194 insertions(+), 13 deletions(-)
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index f5260c4a22..42d9385fd5 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -308,6 +308,114 @@ qemuSnapshotCreateInactiveExternal(virQEMUDriver *driver,
return ret;
}
+static int
+qemuSnapshotActiveInternalGetWrdevListHelper(virDomainObj *vm,
+ char ***wrdevs)
+{
+ size_t wrdevCount = 0;
+ size_t i = 0;
+ g_auto(GStrv) wrdevsInternal = NULL;
+
+ *wrdevs = NULL;
+ for (i = 0; i < vm->def->ndisks; i++) {
+ virDomainDiskDef *disk = vm->def->disks[i];
+ if (!disk->src->readonly && disk->src->shared) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("found shared writable disk, VM snapshotting has no
sense"));
+ return -1;
+ }
+
+ if (!disk->src->readonly && !disk->src->shared &&
+ disk->src->format != VIR_STORAGE_FILE_QCOW2) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("found writable non-qcow2 disk, snapshotting
forbidden"));
+ return -1;
+ }
+
+ if (!disk->src->readonly && !disk->src->shared &&
+ disk->src->format == VIR_STORAGE_FILE_QCOW2) {
+ if (wrdevCount == 0)
+ wrdevsInternal = g_new0(char *, vm->def->ndisks + 2);
+
+ wrdevsInternal[wrdevCount++] = g_strdup(disk->src->nodenameformat);
+ }
+ }
1) many of these are already checked in the functions which make sure
that the snapshot is possible.
2) Use of VIR_ERR_INTERNAL_ERROR is not appropriate in any of the cases
above.
3) the code is structured weirdly repeating the same checks mutliple
times which makes it hard to understand and impossible to extend.
This function will need to be rewritten. It also must not use the VM
definition but rather the snapshot definition instead which already has
the checked configuration.
Also as I'll explain below it's unlikely that this logic will be usable
for snapshot deletion due to the possibiliuty that the VM definition was
changed and thus will require much more thoroguh checking.
+ if (wrdevCount > 0 && vm->def->os.loader
&& vm->def->os.loader->nvram &&
+ vm->def->os.loader->nvram->format == VIR_STORAGE_FILE_QCOW2)
+ wrdevsInternal[wrdevCount] =
g_strdup(vm->def->os.loader->nvram->nodenameformat);
+
+ if (wrdevCount == 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
Definitely not an internal error. User requested something that can't be
done.
+ _("no writable device found for
internal snapshot creation/deletion"));
+ return -1;
+ }
+
+ *wrdevs = g_steal_pointer(&wrdevsInternal);
+ return 0;
+}
+
+
+static int
+qemuSnapshotCreateActiveInternalDone(virDomainObj *vm)
+{
+ qemuBlockJobData *job = NULL;
+ qemuDomainObjPrivate *priv = vm->privateData;
+
+ if (!(job = virHashLookup(priv->blockjobs,
g_strdup_printf("snapsave%d", vm->def->id)))) {
Since the code which eventually calls this has to create the job in the
first place, you can simply pass in the job struct instead of looking it
up all the time.
In the deletion code you did exactly that thing even.
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to lookup blockjob 'snapsave%1$d'"),
vm->def->id);
+ return -1;
+ }
+
+ qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_NONE);
+ if (job->state == VIR_DOMAIN_BLOCK_JOB_FAILED) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
This'd be more likely an OPERATION_FAILED.
+ _("snapshot-save job failed:
%1$s"), NULLSTR(job->errmsg));
+ return -1;
+ }
+
+ return job->state == VIR_DOMAIN_BLOCK_JOB_COMPLETED ? 1 : 0;
+}
+
+
+static int
+qemuSnapshotCreateActiveInternalStart(virDomainObj *vm,
+ const char *name)
+{
+ qemuBlockJobData *job = NULL;
+ g_auto(GStrv) wrdevs = NULL;
+ int ret = -1;
+ int rc = 0;
+
+ if (qemuSnapshotActiveInternalGetWrdevListHelper(vm, &wrdevs) < 0)
+ return -1;
+
+ if (!(job = qemuBlockJobDiskNew(vm, NULL, QEMU_BLOCKJOB_TYPE_SNAPSHOT_SAVE,
+ g_strdup_printf("snapsave%d",
vm->def->id)))) {
The buffer allocated by g_strdup_printf() is leaked as
qemuBlockJobDiskNew() just copies the passed values.
This already reports errors ..
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to create new blockjob"));
.. so you overwrite it with a much worse error message.
+ return -1;
+ }
+
+ qemuBlockJobSyncBegin(job);
+ if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) < 0) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ rc = qemuMonitorSnapshotSave(qemuDomainGetMonitor(vm), job->name,
+ name, wrdevs[0], wrdevs);
+ qemuDomainObjExitMonitor(vm);
+ if (rc == 0) {
+ qemuBlockJobStarted(job, vm);
+ ret = 0;
+ }
+
+ cleanup:
+ qemuBlockJobStartupFinalize(vm, job);
+ return ret;
This should return the job so that the caller can use it.
+}
+
/* The domain is expected to be locked and active. */
static int
@@ -321,6 +429,7 @@ qemuSnapshotCreateActiveInternal(virQEMUDriver *driver,
bool resume = false;
virDomainSnapshotDef *snapdef = virDomainSnapshotObjGetDef(snap);
int ret = -1;
+ int rv = 0;
if (!qemuMigrationSrcIsAllowed(vm, false, VIR_ASYNC_JOB_SNAPSHOT, 0))
goto cleanup;
@@ -342,15 +451,30 @@ qemuSnapshotCreateActiveInternal(virQEMUDriver *driver,
}
}
- if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) < 0) {
- resume = false;
- goto cleanup;
- }
+ if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SNAPSHOT_SAVE)) {
+ if ((ret = qemuSnapshotCreateActiveInternalStart(vm, snap->def->name))
< 0) {
+ resume = false;
+ goto cleanup;
+ }
- ret = qemuMonitorCreateSnapshot(priv->mon, snap->def->name);
- qemuDomainObjExitMonitor(vm);
- if (ret < 0)
- goto cleanup;
+ while ((rv = qemuSnapshotCreateActiveInternalDone(vm)) != 1) {
+ if (rv < 0 || qemuDomainObjWait(vm) < 0) {
+ ret = -1;
+ goto cleanup;
+ }
+ }
+ /* Legacy support for QEMU versions < 6.0. */
This comment belongs to the other block.
+ } else {
+ if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) < 0) {
+ resume = false;
+ goto cleanup;
+ }
+
+ ret = qemuMonitorCreateSnapshot(priv->mon, snap->def->name);
+ qemuDomainObjExitMonitor(vm);
+ if (ret < 0)
+ goto cleanup;
+ }
if (!(snapdef->cookie = (virObject *) qemuDomainSaveCookieNew(vm)))
goto cleanup;
@@ -3603,6 +3727,54 @@ qemuSnapshotDiscardMetadata(virDomainObj *vm,
}
+static int
+qemuSnapshotDiscardActiveInternal(virDomainObj *vm,
+ const char *name)
+{
+ qemuBlockJobData *job = NULL;
+ g_auto(GStrv) wrdevs = NULL;
+ int ret = -1;
+ int rc = 0;
+
+ if (qemuSnapshotActiveInternalGetWrdevListHelper(vm, &wrdevs) < 0)
When discarding snapshots using the active definition may be wrong as
the disk sources may have changed. This will need a much more thorough
check and use the data from the snapshot definition as well.
This is what I need to figure out before having a working version and
it doesn't make much sense for me to explain everything here and then
for you to update the code.
+ return -1;
+
+ if (!(job = qemuBlockJobDiskNew(vm, NULL, QEMU_BLOCKJOB_TYPE_SNAPSHOT_DELETE,
+ g_strdup_printf("snapdelete%d",
vm->def->id)))) {
Same memleak as noted above
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to create new blockjob"));
Same error overwrite.
+ return -1;
+ }
+
+ qemuBlockJobSyncBegin(job);
+ if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) < 0)
+ goto cleanup;
+
+ rc = qemuMonitorSnapshotDelete(qemuDomainGetMonitor(vm), job->name, name,
wrdevs);
+ qemuDomainObjExitMonitor(vm);
+ if (rc == 0) {
+ qemuBlockJobStarted(job, vm);
+ ret = 0;
+ } else {
+ goto cleanup;
+ }
+
+ while (job->state != VIR_DOMAIN_BLOCK_JOB_COMPLETED) {
+ qemuDomainObjWait(vm);
+ qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_NONE);
+ if (job->state == VIR_DOMAIN_BLOCK_JOB_FAILED) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
Once again wrong error code.
+ _("snapshot-delete job failed:
%1$s"), NULLSTR(job->errmsg));
+ ret = -1;
+ break;
+ }
+ }
+
+ cleanup:
+ qemuBlockJobStartupFinalize(vm, job);
+ return ret;
+}
+
+
/* Discard one snapshot (or its metadata), without reparenting any children. */
static int
qemuSnapshotDiscardImpl(virQEMUDriver *driver,
@@ -3642,14 +3814,23 @@ qemuSnapshotDiscardImpl(virQEMUDriver *driver,
if (qemuSnapshotDiscardExternal(vm, snap, externalData) < 0)
return -1;
} else {
+ qemuDomainObjPrivate *priv = vm->privateData;
+
/* Similarly as internal snapshot creation we would use a regular job
* here so set a mask to forbid any other job. */
qemuDomainObjSetAsyncJobMask(vm, VIR_JOB_NONE);
- if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) < 0)
- return -1;
- /* we continue on even in the face of error */
- qemuMonitorDeleteSnapshot(qemuDomainGetMonitor(vm),
snap->def->name);
- qemuDomainObjExitMonitor(vm);
+
+ if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_SNAPSHOT_DELETE)) {
+ qemuSnapshotDiscardActiveInternal(vm, snap->def->name);
+ /* Legacy support for QEMU versions < 6.0. */
+ } else {
+ if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) <
0)
+ return -1;
+ /* we continue on even in the face of error */
+ qemuMonitorDeleteSnapshot(qemuDomainGetMonitor(vm),
snap->def->name);
+ qemuDomainObjExitMonitor(vm);
+ }
+
qemuDomainObjSetAsyncJobMask(vm, VIR_JOB_DEFAULT_MASK);
}
}
--
2.43.5