The qemu implementation for virDomainGetBlockJobInfo() has a
minor bug: it grabs the qemu job with intent to QEMU_JOB_MODIFY,
which means it cannot be run in parallel with any other
domain-modifying command. Among others, virDomainBlockJobAbort()
is such a modifying command, and it defaults to being
synchronous, and can wait as long as several seconds to ensure
that the job has actually finished. Due to the job rules, this
means a user cannot obtain status about the job during that
timeframe, even though we know management code is using a
polling loop on status to see when a job finishes.
This bug has been present ever since blockpull support was first
introduced (commit b976165, v0.9.4 in Jul 2011), all because we
stupidly tried to cram too much multiplexing through a single
helper routine. It's time to disentangle some of that mess, and
in the process relax block job query to use QEMU_JOB_QUERY,
since it can safely be used in parallel with any long running
modify command. Technically, there is one case where getting
block job info can modify domain XML - we do snooping to see if
a 2-phase job has transitioned into the second phase, for an
optimization in the case of old qemu that lacked an event for
the transition. But I think that optimization is safe; and if
it proves to be a problem, we could use the difference between
QEMU_CAPS_BLOCKJOB_{ASYNC,SYNC} to determine whether we even
need snooping, and request a modifying job in that case.
* src/qemu/qemu_driver.c (qemuDomainBlockJobImpl): Move info
handling...
(qemuDomainGetBlockJobInfo): ...here, and relax job type.
(qemuDomainBlockJobAbort, qemuDomainBlockJobSetSpeed)
(qemuDomainBlockRebase, qemuDomainBlockPull): Adjust callers.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
src/qemu/qemu_driver.c | 86 ++++++++++++++++++++++++++++++++++++++------------
1 file changed, 66 insertions(+), 20 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 177d3e4..bedccc6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -14994,7 +14994,7 @@ static int
qemuDomainBlockJobImpl(virDomainObjPtr vm,
virConnectPtr conn,
const char *path, const char *base,
- unsigned long bandwidth, virDomainBlockJobInfoPtr info,
+ unsigned long bandwidth,
int mode, unsigned int flags)
{
virQEMUDriverPtr driver = conn->privateData;
@@ -15125,25 +15125,14 @@ qemuDomainBlockJobImpl(virDomainObjPtr vm,
qemuDomainObjEnterMonitor(driver, vm);
ret = qemuMonitorBlockJob(priv->mon, device, basePath, backingPath,
- bandwidth, info, mode, async);
+ bandwidth, NULL, mode, async);
qemuDomainObjExitMonitor(driver, vm);
- if (info && info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT &&
- disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
- info->type = disk->mirrorJob;
if (ret < 0) {
if (mode == BLOCK_JOB_ABORT && disk->mirror)
disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_NONE;
goto endjob;
}
- /* Snoop block copy operations, so future cancel operations can
- * avoid checking if pivot is safe. */
- if (mode == BLOCK_JOB_INFO && ret == 1 && disk->mirror &&
- info->cur == info->end && !disk->mirrorState) {
- disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
- save = true;
- }
-
waitjob:
/* If we have made changes to XML due to a copy job, make a best
* effort to save it now. But we can ignore failure, since there
@@ -15239,15 +15228,22 @@ qemuDomainBlockJobAbort(virDomainPtr dom, const char *path,
unsigned int flags)
return -1;
}
- return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, NULL,
BLOCK_JOB_ABORT,
- flags);
+ return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0,
+ BLOCK_JOB_ABORT, flags);
}
static int
qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
virDomainBlockJobInfoPtr info, unsigned int flags)
{
+ virQEMUDriverPtr driver = dom->conn->privateData;
+ qemuDomainObjPrivatePtr priv;
virDomainObjPtr vm;
+ char *device = NULL;
+ int idx;
+ virDomainDiskDefPtr disk;
+ int ret = -1;
+
virCheckFlags(0, -1);
if (!(vm = qemuDomObjFromDomain(dom)))
@@ -15258,8 +15254,58 @@ qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
return -1;
}
- return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, info, BLOCK_JOB_INFO,
- flags);
+ priv = vm->privateData;
+ if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_ASYNC) &&
+ !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_SYNC)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("block jobs not supported with this QEMU binary"));
+ goto cleanup;
+ }
+
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0)
+ goto cleanup;
+
+ if (!virDomainObjIsActive(vm)) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("domain is not running"));
+ goto endjob;
+ }
+
+ device = qemuDiskPathToAlias(vm, path, &idx);
+ if (!device)
+ goto endjob;
+ disk = vm->def->disks[idx];
+
+ qemuDomainObjEnterMonitor(driver, vm);
+ ret = qemuMonitorBlockJob(priv->mon, device, NULL, NULL, 0,
+ info, BLOCK_JOB_INFO, true);
+ qemuDomainObjExitMonitor(driver, vm);
+ if (ret < 0)
+ goto endjob;
+
+ if (info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT &&
+ disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)
+ info->type = disk->mirrorJob;
+
+ /* Snoop block copy operations, so future cancel operations can
+ * avoid checking if pivot is safe. Save the change to XML, but
+ * we can ignore failure because it is only an optimization. */
+ if (ret == 1 && disk->mirror &&
+ info->cur == info->end && !disk->mirrorState) {
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
+
+ disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
+ ignore_value(virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm));
+ virObjectUnref(cfg);
+ }
+ endjob:
+ if (!qemuDomainObjEndJob(driver, vm))
+ vm = NULL;
+
+ cleanup:
+ if (vm)
+ virObjectUnlock(vm);
+ return ret;
}
static int
@@ -15277,7 +15323,7 @@ qemuDomainBlockJobSetSpeed(virDomainPtr dom, const char *path,
return -1;
}
- return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL,
+ return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth,
BLOCK_JOB_SPEED, flags);
}
@@ -15486,7 +15532,7 @@ qemuDomainBlockRebase(virDomainPtr dom, const char *path, const
char *base,
* everything, including vm cleanup. */
if (!(flags & VIR_DOMAIN_BLOCK_REBASE_COPY))
return qemuDomainBlockJobImpl(vm, dom->conn, path, base, bandwidth,
- NULL, BLOCK_JOB_PULL, flags);
+ BLOCK_JOB_PULL, flags);
/* If we got here, we are doing a block copy rebase. */
if (flags & VIR_DOMAIN_BLOCK_REBASE_COPY_RAW)
@@ -15527,7 +15573,7 @@ qemuDomainBlockPull(virDomainPtr dom, const char *path, unsigned
long bandwidth,
return -1;
}
- return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL,
+ return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth,
BLOCK_JOB_PULL, flags);
}
--
1.9.3