On incoming migration qemu doesn't activate the block graph nodes right
away. This is to properly facilitate locking of the images.
The block nodes are normally re-activated when starting the CPUs after
migration, but in cases (e.g. when a paused VM was migrated) when the VM
is left paused the block nodes are not re-activated by qemu.
This means that blockjobs which would want to write to an existing
backing chain member would fail. Generally read-only jobs would succeed
with older qemu's but this was not intended.
Instead with new qemu you'll always get an error if attempting to access
a inactive node:
error: internal error: unable to execute QEMU command 'blockdev-mirror': Inactive
'libvirt-1-storage' can't be a backing child of active '#block052'
This is the case for explicit blockjobs (virsh blockcopy) but also for
non shared-storage migration (virsh migrate --copy-storage-all).
Since qemu now provides 'blockdev-set-active' QMP command which can
on-demand re-activate the nodes we can re-activate them in similar cases
as when we'd be starting vCPUs if the VM weren't left paused.
The only exception is on the source in case of a failed post-copy
migration as the VM already ran on destination so it won't ever run on
the source even when recovered.
Resolves:
https://issues.redhat.com/browse/RHEL-78398
Signed-off-by: Peter Krempa <pkrempa(a)redhat.com>
---
src/qemu/qemu_migration.c | 46 +++++++++++++++++++++++++++++++++++----
1 file changed, 42 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 53bbbee629..159521fbf5 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -220,6 +220,39 @@ qemuMigrationSrcStoreDomainState(virDomainObj *vm)
}
+/**
+ * qemuMigrationBlockNodesReactivate:
+ *
+ * In case when we're keeping the VM paused qemu will not re-activate the block
+ * device backend tree so blockjobs would fail. In case when qemu supports the
+ * 'blockdev-set-active' command this function will re-activate the block nodes.
+ */
+static void
+qemuMigrationBlockNodesReactivate(virDomainObj *vm,
+ virDomainAsyncJob asyncJob)
+{
+ qemuDomainObjPrivate *priv = vm->privateData;
+ int rc;
+
+ if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV_SET_ACTIVE))
+ return;
+
+ VIR_DEBUG("re-activating block nodes");
+
+ if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
+ return;
+
+ rc = qemuMonitorBlockdevSetActive(priv->mon, NULL, true);
+
+ qemuDomainObjExitMonitor(vm);
+
+ if (rc < 0) {
+ VIR_WARN("failed to re-activate block nodes after migration of VM
'%s'", vm->def->name);
+ virResetLastError();
+ }
+}
+
+
static void
qemuMigrationSrcRestoreDomainState(virQEMUDriver *driver, virDomainObj *vm)
{
@@ -238,11 +271,14 @@ qemuMigrationSrcRestoreDomainState(virQEMUDriver *driver,
virDomainObj *vm)
if (preMigrationState != VIR_DOMAIN_RUNNING ||
state != VIR_DOMAIN_PAUSED ||
- reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
- return;
+ reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) {
+ if (reason == VIR_DOMAIN_PAUSED_IOERROR)
+ VIR_DEBUG("Domain is paused due to I/O error, skipping resume");
+
+ /* Don't reactivate disks on post-copy failure */
+ if (reason != VIR_DOMAIN_PAUSED_POSTCOPY_FAILED)
+ qemuMigrationBlockNodesReactivate(vm, VIR_ASYNC_JOB_MIGRATION_OUT);
- if (reason == VIR_DOMAIN_PAUSED_IOERROR) {
- VIR_DEBUG("Domain is paused due to I/O error, skipping resume");
return;
}
@@ -6795,6 +6831,8 @@ qemuMigrationDstFinishFresh(virQEMUDriver *driver,
if (*inPostCopy)
*doKill = false;
+ } else {
+ qemuMigrationBlockNodesReactivate(vm, VIR_ASYNC_JOB_MIGRATION_IN);
}
if (mig->jobData) {
--
2.48.1