On Thu, Dec 08, 2022 at 14:31:02 +0100, Pavel Hrdina wrote:
When deleting external snapshots the operation may fail at any point
which could lead to situation that some disks finished the block commit
operation but for some disks it failed and the libvirt job ends.
In order to make sure that the qcow2 images are in consistent state
introduce new element "<invalid/>" that will mark the disk in snapshot
metadata as invalid until the snapshot delete is completed successfully.
This will prevent deleting snapshot with the invalid disk and in future
reverting to snapshot with the invalid disk.
Signed-off-by: Pavel Hrdina <phrdina(a)redhat.com>
---
src/conf/snapshot_conf.c | 6 ++++
src/conf/snapshot_conf.h | 1 +
src/qemu/qemu_snapshot.c | 59 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+)
diff --git a/src/conf/snapshot_conf.c b/src/conf/snapshot_conf.c
index 4b5b908d66..7517208d79 100644
--- a/src/conf/snapshot_conf.c
+++ b/src/conf/snapshot_conf.c
@@ -158,6 +158,9 @@ virDomainSnapshotDiskDefParseXML(xmlNodePtr node,
return -1;
}
+ if (flags & VIR_DOMAIN_DEF_PARSE_STATUS)
You want VIR_DOMAIN_SNAPSHOT_PARSE_REDEFINE as this parser doesn't
work with VIR_DOMAIN_DEF_PARSE_* flags.
In fact the only reason this works is that VIR_DOMAIN_DEF_PARSE_STATUS
and VIR_DOMAIN_SNAPSHOT_PARSE_REDEFINE have the same value.
+ def->invalid = !!virXPathNode("./invalid",
ctxt);
Based on my coment below ... the 'invalid' name of this flag might be a
bit misleading. I suppose we want to say that there's another operation
in progress, but I don't have currently ideas for a better name.
+
if ((cur = virXPathNode("./source", ctxt)) &&
virDomainStorageSourceParse(cur, ctxt, src, flags, xmlopt) < 0)
return -1;
[...]
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index 3fbc3ce65f..253de1196b 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -2302,6 +2302,12 @@ qemuSnapshotDeleteExternalPrepare(virDomainObj *vm,
if (snapDisk->snapshot == VIR_DOMAIN_SNAPSHOT_LOCATION_NO)
continue;
+ if (snapDisk->invalid) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("snapshot is in invalid state"));
This error is not really actionable. You want to explain that it's most
likely a previous operation that failed and needs to be re-tried.
+ return NULL;
+ }
+
data = g_new0(qemuSnapshotDeleteExternalData, 1);
data->snapDisk = snapDisk;
@@ -2620,6 +2626,53 @@ qemuSnapshotJobFinishing(virDomainObj *vm,
}
+/**
+ * qemuSnapshotSetInvalid:
+ * @vm: vm object
+ * @snap: snapshot object that contains parent disk
+ * @disk: disk from the snapshot we are deleting
+ * @invalid: boolean to set/unset invalid state
+ *
+ * @snap should point to a ancestor snapshot from the snapshot tree that
+ * affected the @disk which doesn't have to be the direct parent.
+ *
+ * When setting snapshot with parent disk as invalid due to snapshot being
+ * deleted we should not mark the whole snapshot as invalid but only the
+ * affected disks because the snapshot can contain other disks that we are
+ * not modifying at the moment.
+ *
+ * Return 0 on success, -1 on error.
+ * */
+static int
+qemuSnapshotSetInvalid(virDomainObj *vm,
+ virDomainMomentObj *snap,
+ virDomainSnapshotDiskDef *disk,
+ bool invalid)
+{
+ ssize_t i;
+ qemuDomainObjPrivate *priv = vm->privateData;
+ virQEMUDriver *driver = priv->driver;
+ g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+ virDomainSnapshotDef *snapdef = NULL;
+
+ if (!snap)
+ return 0;
+
+ snapdef = virDomainSnapshotObjGetDef(snap);
+ if (!snapdef)
+ return 0;
+
+ for (i = 0; i < snapdef->ndisks; i++) {
+ virDomainSnapshotDiskDef *snapDisk = &(snapdef->disks[i]);
+
+ if (STREQ(snapDisk->name, disk->name))
+ snapDisk->invalid = invalid;
+ }
I think you also want to mark the inactiveDef if present.
+
+ return qemuDomainSnapshotWriteMetadata(vm, snap, driver->xmlopt,
cfg->snapshotDir);
+}