[PATCH 0/6] qemu: refactor disk migration safety check and add corner case exemption for Kubevirt's usage

Patches 1-3 are refactors that simplify the code and prepare it. Patch 4 fixed bug where 'data_file' is not checked Patches 5 and 6 add a workaround for Kubevirt's corner case migration approach. Peter Krempa (6): qemuMigrationSrcIsSafe: Drop 'DEBUG' message about qemu supporting cache dropping qemuMigrationSrcIsSafe: Extract code for checking safe migrability of one disk qemuMigrationSrcIsSafeDisk: Extract safe migration checks for one storage source qemuMigrationSrcIsSafeDisk: Check also data file propertiues for migrability qemuBlockGetNamedNodeData: Extract 'data_file_raw' flag qemuMigrationSrcIsSafeDisk: Allow non-shared qcow2's with raw data file src/qemu/qemu_migration.c | 188 ++++++++++++------ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 3 + tests/qemublocktest.c | 2 + tests/qemublocktestdata/bitmap/synthetic.json | 1 + tests/qemublocktestdata/bitmap/synthetic.out | 1 + 6 files changed, 133 insertions(+), 65 deletions(-) -- 2.51.0

From: Peter Krempa <pkrempa@redhat.com> The feature exists for a long time, no need to add extra notice about it. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index a8f4dd489c..dd171f953c 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1805,14 +1805,10 @@ qemuMigrationSrcIsSafe(virDomainObj *vm, * which case cache mode doesn't matter) or used with cache=none or used with cache=directsync */ if (disk->src->shared || disk->cachemode == VIR_DOMAIN_DISK_CACHE_DISABLE || - disk->cachemode == VIR_DOMAIN_DISK_CACHE_DIRECTSYNC) + disk->cachemode == VIR_DOMAIN_DISK_CACHE_DIRECTSYNC || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATION_FILE_DROP_CACHE)) continue; - if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATION_FILE_DROP_CACHE)) { - VIR_DEBUG("QEMU supports flushing caches; migration is safe"); - continue; - } - virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", _("Migration may lead to data corruption if disks use cache other than none or directsync")); return false; -- 2.51.0

From: Peter Krempa <pkrempa@redhat.com> Extract the code which checks a source of a single disk for safe migratability into 'qemuMigrationSrcIsSafeDisk'. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 132 ++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index dd171f953c..96709dfcdd 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1732,6 +1732,74 @@ qemuMigrationSrcIsAllowed(virDomainObj *vm, return true; } + +static bool +qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, + virQEMUCaps *qemuCaps, + virQEMUDriverConfig *cfg) +{ + virStorageType actualType = virStorageSourceGetActualType(disk->src); + bool unsafe = false; + int rc; + + /* Disks without any source (i.e. floppies and CD-ROMs) OR readonly are safe. */ + if (virStorageSourceIsEmpty(disk->src) || + disk->src->readonly) + return true; + + /* However, disks on local FS (e.g. ext4) are not safe. */ + switch (actualType) { + case VIR_STORAGE_TYPE_FILE: + if ((rc = virFileIsSharedFS(disk->src->path, cfg->sharedFilesystems)) < 0) { + return false; + } else if (rc == 0) { + unsafe = true; + } + + if ((rc = virFileIsClusterFS(disk->src->path)) < 0) + return false; + else if (rc == 1) + return true; + break; + + case VIR_STORAGE_TYPE_NETWORK: + return true; + + case VIR_STORAGE_TYPE_NVME: + unsafe = true; + break; + + case VIR_STORAGE_TYPE_VHOST_USER: + case VIR_STORAGE_TYPE_VHOST_VDPA: + case VIR_STORAGE_TYPE_NONE: + case VIR_STORAGE_TYPE_BLOCK: + case VIR_STORAGE_TYPE_DIR: + case VIR_STORAGE_TYPE_VOLUME: + case VIR_STORAGE_TYPE_LAST: + break; + } + + if (unsafe) { + virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", + _("Migration without shared storage is unsafe")); + return false; + } + + /* Our code elsewhere guarantees shared disks are either readonly (in + * which case cache mode doesn't matter) or used with cache=none or used with cache=directsync */ + if (!(disk->src->shared || + disk->cachemode == VIR_DOMAIN_DISK_CACHE_DISABLE || + disk->cachemode == VIR_DOMAIN_DISK_CACHE_DIRECTSYNC || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATION_FILE_DROP_CACHE))) { + virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", + _("Migration may lead to data corruption if disks use cache other than none or directsync")); + return false; + } + + return true; +} + + static bool qemuMigrationSrcIsSafe(virDomainObj *vm, const char **migrate_disks, @@ -1739,79 +1807,21 @@ qemuMigrationSrcIsSafe(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; - virQEMUCaps *qemuCaps = priv->qemuCaps; - virQEMUDriver *driver = priv->driver; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); bool storagemigration = flags & (VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC); size_t i; - int rc; for (i = 0; i < vm->def->ndisks; i++) { virDomainDiskDef *disk = vm->def->disks[i]; - const char *src = virDomainDiskGetSource(disk); - virStorageType actualType = virStorageSourceGetActualType(disk->src); - bool unsafe = false; - - /* Disks without any source (i.e. floppies and CD-ROMs) - * OR readonly are safe. */ - if (virStorageSourceIsEmpty(disk->src) || - disk->src->readonly) - continue; - /* Disks which are migrated by qemu are safe too. */ + /* Disks which are migrated by qemu are safe */ if (storagemigration && qemuMigrationAnyCopyDisk(disk, migrate_disks)) continue; - /* However, disks on local FS (e.g. ext4) are not safe. */ - switch (actualType) { - case VIR_STORAGE_TYPE_FILE: - if ((rc = virFileIsSharedFS(src, cfg->sharedFilesystems)) < 0) { - return false; - } else if (rc == 0) { - unsafe = true; - } - if ((rc = virFileIsClusterFS(src)) < 0) - return false; - else if (rc == 1) - continue; - break; - case VIR_STORAGE_TYPE_NETWORK: - /* But network disks are safe again. */ - continue; - - case VIR_STORAGE_TYPE_NVME: - unsafe = true; - break; - - case VIR_STORAGE_TYPE_VHOST_USER: - case VIR_STORAGE_TYPE_VHOST_VDPA: - case VIR_STORAGE_TYPE_NONE: - case VIR_STORAGE_TYPE_BLOCK: - case VIR_STORAGE_TYPE_DIR: - case VIR_STORAGE_TYPE_VOLUME: - case VIR_STORAGE_TYPE_LAST: - break; - } - - if (unsafe) { - virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", - _("Migration without shared storage is unsafe")); + if (!qemuMigrationSrcIsSafeDisk(disk, priv->qemuCaps, cfg)) return false; - } - - /* Our code elsewhere guarantees shared disks are either readonly (in - * which case cache mode doesn't matter) or used with cache=none or used with cache=directsync */ - if (disk->src->shared || - disk->cachemode == VIR_DOMAIN_DISK_CACHE_DISABLE || - disk->cachemode == VIR_DOMAIN_DISK_CACHE_DIRECTSYNC || - virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATION_FILE_DROP_CACHE)) - continue; - - virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", - _("Migration may lead to data corruption if disks use cache other than none or directsync")); - return false; } return true; -- 2.51.0

From: Peter Krempa <pkrempa@redhat.com> Further split up the code originally in 'qemuMigrationSrcIsSafe' to separate checks concerning a single storage source. The code will then be reused to check the safe migration state also for the data file (qcow2 feature that allows store of data separated from the qcow2 metadata). Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 71 ++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 96709dfcdd..ae26f74f8c 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1733,40 +1733,34 @@ qemuMigrationSrcIsAllowed(virDomainObj *vm, } -static bool -qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, - virQEMUCaps *qemuCaps, - virQEMUDriverConfig *cfg) +static int +qemuMigrationSrcCheckStorageSourceSafety(virStorageSource *src, + virQEMUDriverConfig *cfg, + bool *unsafe_storage, + bool *requires_safe_cache) { - virStorageType actualType = virStorageSourceGetActualType(disk->src); - bool unsafe = false; - int rc; + switch (virStorageSourceGetActualType(src)) { + case VIR_STORAGE_TYPE_FILE: { + int rc_cluster = virFileIsClusterFS(src->path); + int rc_shared = virFileIsSharedFS(src->path, cfg->sharedFilesystems); - /* Disks without any source (i.e. floppies and CD-ROMs) OR readonly are safe. */ - if (virStorageSourceIsEmpty(disk->src) || - disk->src->readonly) - return true; + if (rc_cluster < 0 || rc_shared < 0) + return -1; - /* However, disks on local FS (e.g. ext4) are not safe. */ - switch (actualType) { - case VIR_STORAGE_TYPE_FILE: - if ((rc = virFileIsSharedFS(disk->src->path, cfg->sharedFilesystems)) < 0) { - return false; - } else if (rc == 0) { - unsafe = true; - } + if (rc_cluster == 0) { + *requires_safe_cache = true; - if ((rc = virFileIsClusterFS(disk->src->path)) < 0) - return false; - else if (rc == 1) - return true; + if (rc_shared == 0) + *unsafe_storage = true; + } + } break; case VIR_STORAGE_TYPE_NETWORK: - return true; + break; case VIR_STORAGE_TYPE_NVME: - unsafe = true; + *unsafe_storage = true; break; case VIR_STORAGE_TYPE_VHOST_USER: @@ -1776,10 +1770,32 @@ qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, case VIR_STORAGE_TYPE_DIR: case VIR_STORAGE_TYPE_VOLUME: case VIR_STORAGE_TYPE_LAST: + *requires_safe_cache = true; break; } - if (unsafe) { + return 0; +} + + +static bool +qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, + virQEMUCaps *qemuCaps, + virQEMUDriverConfig *cfg) +{ + bool unsafe_storage = false; + bool requires_safe_cache = false; + + /* Disks without any source (i.e. floppies and CD-ROMs) OR readonly are safe. */ + if (virStorageSourceIsEmpty(disk->src) || + disk->src->readonly) + return true; + + if (qemuMigrationSrcCheckStorageSourceSafety(disk->src, cfg, &unsafe_storage, + &requires_safe_cache) < 0) + return false; + + if (unsafe_storage) { virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", _("Migration without shared storage is unsafe")); return false; @@ -1787,7 +1803,8 @@ qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, /* Our code elsewhere guarantees shared disks are either readonly (in * which case cache mode doesn't matter) or used with cache=none or used with cache=directsync */ - if (!(disk->src->shared || + if (requires_safe_cache && + !(disk->src->shared || disk->cachemode == VIR_DOMAIN_DISK_CACHE_DISABLE || disk->cachemode == VIR_DOMAIN_DISK_CACHE_DIRECTSYNC || virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATION_FILE_DROP_CACHE))) { -- 2.51.0

From: Peter Krempa <pkrempa@redhat.com> If the qcow2 data file feature (which separates the data into a separate file from the metadata) is in use the migration safety check ought co consider both the metadata and the data file for safe migration. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index ae26f74f8c..2e52e73f48 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1795,6 +1795,12 @@ qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, &requires_safe_cache) < 0) return false; + if (disk->src->dataFileStore && + qemuMigrationSrcCheckStorageSourceSafety(disk->src->dataFileStore, + cfg, &unsafe_storage, + &requires_safe_cache) < 0) + return false; + if (unsafe_storage) { virReportError(VIR_ERR_MIGRATE_UNSAFE, "%s", _("Migration without shared storage is unsafe")); -- 2.51.0

In the commit summary: *properties On a Wednesday in 2025, Peter Krempa via Devel wrote:
From: Peter Krempa <pkrempa@redhat.com>
If the qcow2 data file feature (which separates the data into a separate file from the metadata) is in use the migration safety check ought co
ought to Jano
consider both the metadata and the data file for safe migration.
Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 6 ++++++ 1 file changed, 6 insertions(+)

From: Peter Krempa <pkrempa@redhat.com> The 'data_file_raw' flag of qcow2 notifies that all data inside the 'data_file' is a raw image so can be used standalone without the metadata without any problem (except for not updating the dirty bitmaps). Our migration safety checks will allow skipping the migration safety check for these files as during migration we know it's safe to re-create this on the destination in a location that isn't shared. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_monitor.h | 3 +++ src/qemu/qemu_monitor_json.c | 3 +++ tests/qemublocktest.c | 2 ++ tests/qemublocktestdata/bitmap/synthetic.json | 1 + tests/qemublocktestdata/bitmap/synthetic.out | 1 + 5 files changed, 10 insertions(+) diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 750e7444fc..8ef85ceb0a 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -748,6 +748,9 @@ struct _qemuBlockNamedNodeData { /* qcow2 subcluster allocation -> extended_l2 */ bool qcow2extendedL2; + + /* qcow2 data file 'raw' feature is enabled */ + bool qcow2dataFileRaw; }; GHashTable * diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 2c8ae5198c..9caade7bc9 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2775,6 +2775,9 @@ qemuMonitorJSONBlockGetNamedNodeDataWorker(size_t pos G_GNUC_UNUSED, ignore_value(virJSONValueObjectGetBoolean(qcow2props, "extended-l2", &ent->qcow2extendedL2)); + + ignore_value(virJSONValueObjectGetBoolean(qcow2props, "data-file-raw", + &ent->qcow2dataFileRaw)); } } diff --git a/tests/qemublocktest.c b/tests/qemublocktest.c index 095a54e22c..47746207cc 100644 --- a/tests/qemublocktest.c +++ b/tests/qemublocktest.c @@ -589,6 +589,8 @@ testQemuDetectBitmapsWorker(GHashTable *nodedata, virBufferAsprintf(buf, "%s:\n", nodename); if (data->qcow2v2) virBufferAddLit(buf, " qcow2 v2\n"); + if (data->qcow2dataFileRaw) + virBufferAddLit(buf, " data-file-raw\n"); virBufferAdjustIndent(buf, 1); for (i = 0; i < data->nbitmaps; i++) { diff --git a/tests/qemublocktestdata/bitmap/synthetic.json b/tests/qemublocktestdata/bitmap/synthetic.json index cd468a42a2..1753830f69 100644 --- a/tests/qemublocktestdata/bitmap/synthetic.json +++ b/tests/qemublocktestdata/bitmap/synthetic.json @@ -15,6 +15,7 @@ "compat": "0.10", "compression-type": "zlib", "lazy-refcounts": false, + "data-file-raw": true, "bitmaps": [ { "flags": [ diff --git a/tests/qemublocktestdata/bitmap/synthetic.out b/tests/qemublocktestdata/bitmap/synthetic.out index 2d9545fc9b..45423903a0 100644 --- a/tests/qemublocktestdata/bitmap/synthetic.out +++ b/tests/qemublocktestdata/bitmap/synthetic.out @@ -1,5 +1,6 @@ libvirt-1-format: qcow2 v2 + data-file-raw current: record:1 busy:0 persist:1 inconsist:1 gran:65536 dirty:0 top-ok: record:1 busy:0 persist:1 inconsist:0 gran:65536 dirty:0 top-inactive: record:0 busy:0 persist:1 inconsist:0 gran:65536 dirty:0 -- 2.51.0

From: Peter Krempa <pkrempa@redhat.com> A qcow2 image which uses a data file and the 'data_file_raw' flag is effectively a raw image with the qcow2 wrapper used only to store metadata (block dirty bitmaps). Since the dirty bitmaps are always migrated using the migration stream it's technically not required that the qcow2 overlay itself is shared between the destinations. Management tools like Kubevirt want to migrate VMs which have a qcow2 overlay with the above config stored in a location that is not shared, but the data file itself is. This patch adds code that skips the validation of the overlay since it's not needed to ensure data consistency in that very specific case. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_migration.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2e52e73f48..7d87b3073b 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1779,19 +1779,47 @@ qemuMigrationSrcCheckStorageSourceSafety(virStorageSource *src, static bool -qemuMigrationSrcIsSafeDisk(virDomainDiskDef *disk, +qemuMigrationSrcIsSafeDisk(virDomainObj *vm, + virDomainDiskDef *disk, virQEMUCaps *qemuCaps, - virQEMUDriverConfig *cfg) + virQEMUDriverConfig *cfg, + GHashTable **blockNamedNodeData) { bool unsafe_storage = false; bool requires_safe_cache = false; + bool skip_overlay_check = false; /* Disks without any source (i.e. floppies and CD-ROMs) OR readonly are safe. */ if (virStorageSourceIsEmpty(disk->src) || disk->src->readonly) return true; - if (qemuMigrationSrcCheckStorageSourceSafety(disk->src, cfg, &unsafe_storage, + if (disk->src->dataFileStore && + !virStorageSourceHasBacking(disk->src)) { + qemuBlockNamedNodeData *nodedata; + + /* As a special case if the topmost disk image is a qcow2 with a + * data_file and the 'data_file_raw' option enabled, the overlay itself + * contains no useful data. Kubevirt uses this setup for migrations + * where the qcow2 overlay is used for block dirty bitmaps which are + * migrated using migration stream and kubevirt thus pre-creates the + * overlay rather than putting it on shared storage */ + + if (!*blockNamedNodeData && + !(*blockNamedNodeData = qemuBlockGetNamedNodeData(vm, + VIR_ASYNC_JOB_MIGRATION_OUT))) + return false; + + if ((nodedata = virHashLookup(*blockNamedNodeData, + qemuBlockStorageSourceGetFormatNodename(disk->src)))) { + + if (nodedata->qcow2dataFileRaw) + skip_overlay_check = true; + } + } + + if (!skip_overlay_check && + qemuMigrationSrcCheckStorageSourceSafety(disk->src, cfg, &unsafe_storage, &requires_safe_cache) < 0) return false; @@ -1831,6 +1859,7 @@ qemuMigrationSrcIsSafe(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); + g_autoptr(GHashTable) blockNamedNodeData = NULL; bool storagemigration = flags & (VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC); size_t i; @@ -1843,7 +1872,7 @@ qemuMigrationSrcIsSafe(virDomainObj *vm, qemuMigrationAnyCopyDisk(disk, migrate_disks)) continue; - if (!qemuMigrationSrcIsSafeDisk(disk, priv->qemuCaps, cfg)) + if (!qemuMigrationSrcIsSafeDisk(vm, disk, priv->qemuCaps, cfg, &blockNamedNodeData)) return false; } -- 2.51.0

On a Wednesday in 2025, Peter Krempa via Devel wrote:
Patches 1-3 are refactors that simplify the code and prepare it. Patch 4 fixed bug where 'data_file' is not checked Patches 5 and 6 add a workaround for Kubevirt's corner case migration approach.
Peter Krempa (6): qemuMigrationSrcIsSafe: Drop 'DEBUG' message about qemu supporting cache dropping qemuMigrationSrcIsSafe: Extract code for checking safe migrability of one disk qemuMigrationSrcIsSafeDisk: Extract safe migration checks for one storage source qemuMigrationSrcIsSafeDisk: Check also data file propertiues for migrability qemuBlockGetNamedNodeData: Extract 'data_file_raw' flag qemuMigrationSrcIsSafeDisk: Allow non-shared qcow2's with raw data file
src/qemu/qemu_migration.c | 188 ++++++++++++------ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 3 + tests/qemublocktest.c | 2 + tests/qemublocktestdata/bitmap/synthetic.json | 1 + tests/qemublocktestdata/bitmap/synthetic.out | 1 + 6 files changed, 133 insertions(+), 65 deletions(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano
participants (2)
-
Ján Tomko
-
Peter Krempa