We were not directly saving the domain XML to file after starting
or finishing a blockcopy. Without the startup write, a libvirtd
restart in the middle of a copy job would forget that the job was
underway. Then at pivot, we were indirectly writing new XML in
reaction to events that occur as we stop and restart the guest CPUs.
But there was a race: since pivot is an async action, it is possible
that the guest is restarted before the pivot completes, so if XML
changes during the event, that change was not written. The original
blockcopy code cleared out the <mirror> element prior to restarting
the CPUs, but this is also a race, observed if a user does an async
pivot and a dumpxml before the event occurs. Furthermore, this race
will interfere with active commit, because that code relies on the
<mirror> element at the time of the qemu event to determine whether
to inform the user of a normal commit or an active commit.
Fix things by saving state any time we modify live XML, and by
delaying XML modifications until after the event completes. We
still need a solution for the case of libvirtd restarting in between
requesting qemu to do the pivot and the actual event (that is, if
libvirtd misses the event, but learns the job is complete thanks to
a block query, then the updated state still needs to be updated in
live XML), but that will be a later patch, in part because we want
to start taking advantage of newer qemu's ability to keep the job
around after completion rather than the current usage where the job
disappears both on error and on success.
* src/qemu/qemu_driver.c (qemuDomainBlockCopy): Track XML change
on disk.
(qemuDomainBlockPivot): Move post-pivot XML rewrites...
* src/qemu/qemu_process.c (qemuProcessHandleBlockJob): ...here,
and expand to cover case of persistent vm.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
src/qemu/qemu_driver.c | 46 ++++++++++++++++++---------------
src/qemu/qemu_process.c | 69 +++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 86 insertions(+), 29 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 368e112..a5eaead 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -14843,39 +14841,41 @@ qemuDomainBlockPivot(virConnectPtr conn,
virSecurityManagerSetDiskLabel(driver->securityManager, vm->def, disk)
< 0))
goto cleanup;
- /* Attempt the pivot. */
+ /* Attempt the pivot. We will update the domain later when qemu
+ * emits an event, telling us if we succeeded or failed.
+ * XXX On libvirtd restarts, if we missed the qemu event, we need
+ * to double check what state qemu is in.
+ * XXX We should be using qemu's rerror flag to make sure the job
+ * remains alive until we know it's final state.
+ * XXX If the abort command is synchronous but the qemu event is
+ * that the pivot failed, we need to reflect that failure into the
+ * overall return value. */
qemuDomainObjEnterMonitor(driver, vm);
ret = qemuMonitorDrivePivot(priv->mon, device, disk->mirror->path, format);
qemuDomainObjExitMonitor(driver, vm);
- if (ret == 0) {
- /* XXX We want to revoke security labels and disk lease, as
- * well as audit that revocation, before dropping the original
- * source. But it gets tricky if both source and mirror share
- * common backing files (we want to only revoke the non-shared
- * portion of the chain, and is made more difficult by the
- * fact that we aren't tracking the full chain ourselves; so
- * for now, we leak the access to the original. */
- virStorageSourceFree(oldsrc);
- oldsrc = NULL;
- } else {
+ if (ret < 0) {
/* On failure, qemu abandons the mirror, and reverts back to
* the source disk (RHEL 6.3 has a bug where the revert could
* cause catastrophic failure in qemu, but we don't need to
* worry about it here as it is not an upstream qemu problem. */
/* XXX should we be parsing the exact qemu error, or calling
* 'query-block', to see what state we really got left in
- * before killing the mirroring job? And just as on the
- * success case, there's security labeling to worry about. */
+ * before killing the mirroring job?
+ * XXX We want to revoke security labels and disk lease, as
+ * well as audit that revocation, before dropping the original
+ * source. But it gets tricky if both source and mirror share
+ * common backing files (we want to only revoke the non-shared
+ * portion of the chain); so for now, we leak the access to
+ * the original. */
virStorageSourceFree(disk->mirror);
- }
- disk->mirror = NULL;
- disk->mirroring = false;
- disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN;
+ disk->mirror = NULL;
+ disk->mirroring = false;
+ disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN;
+ }
cleanup:
- /* revert to original disk def on failure */
if (oldsrc)
disk->src = oldsrc;
@@ -15329,6 +15329,10 @@ qemuDomainBlockCopy(virDomainObjPtr vm,
mirror = NULL;
disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_COPY;
+ if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
+ VIR_WARN("Unable to save status on vm %s after state change",
+ vm->def->name);
+
endjob:
if (need_unlink && unlink(dest))
VIR_WARN("unable to unlink just-created %s", dest);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 65e9faf..3229f81 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1016,6 +1016,7 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virObjectEventPtr event2 = NULL;
const char *path;
virDomainDiskDefPtr disk;
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
virObjectLock(vm);
disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias);
@@ -1030,15 +1031,67 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
event = virDomainEventBlockJobNewFromObj(vm, path, type, status);
event2 = virDomainEventBlockJob2NewFromObj(vm, disk->dst, type,
status);
- /* XXX If we completed a block pull or commit, then recompute
- * the cached backing chain to match. Better would be storing
- * the chain ourselves rather than reprobing, but we haven't
- * quite completed that conversion to use our XML tracking. */
- if ((type == VIR_DOMAIN_BLOCK_JOB_TYPE_PULL ||
- type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT ||
- type == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT) &&
- status == VIR_DOMAIN_BLOCK_JOB_COMPLETED)
+
+ /* If we completed a block pull or commit, then update the XML
+ * to match. */
+ if (status == VIR_DOMAIN_BLOCK_JOB_COMPLETED) {
+ if (disk->mirror) {
+ virDomainDiskDefPtr persistDisk = NULL;
+
+ if (vm->newDef) {
+ int indx = virDomainDiskIndexByName(vm->newDef, disk->dst,
+ false);
+ virStorageSourcePtr copy = NULL;
+
+ if (indx >= 0) {
+ persistDisk = vm->newDef->disks[indx];
+ copy = virStorageSourceCopy(disk->mirror, false);
+ if (virStorageSourceInitChainElement(copy,
+ persistDisk->src,
+ false) < 0) {
+ VIR_WARN("Unable to update persistent definition "
+ "on vm %s after block job",
+ vm->def->name);
+ virStorageSourceFree(copy);
+ copy = NULL;
+ persistDisk = NULL;
+ }
+ }
+ if (copy) {
+ virStorageSourceFree(persistDisk->src);
+ persistDisk->src = copy;
+ }
+ }
+
+ /* XXX We want to revoke security labels and disk
+ * lease, as well as audit that revocation, before
+ * dropping the original source. But it gets tricky
+ * if both source and mirror share common backing
+ * files (we want to only revoke the non-shared
+ * portion of the chain); so for now, we leak the
+ * access to the original. */
+ virStorageSourceFree(disk->src);
+ disk->src = disk->mirror;
+ disk->mirror = NULL;
+ disk->mirroring = false;
+ disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN;
+
+ if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir,
+ vm) < 0)
+ VIR_WARN("Unable to save status on vm %s after block job",
+ vm->def->name);
+ if (persistDisk && virDomainSaveConfig(cfg->configDir,
+ vm->newDef) < 0)
+ VIR_WARN("Unable to update persistent definition on vm %s
"
+ "after block job", vm->def->name);
+ }
+ /* Recompute the cached backing chain to match our
+ * updates. Better would be storing the chain ourselves
+ * rather than reprobing, but we haven't quite completed
+ * that conversion to use our XML tracking. */
qemuDomainDetermineDiskChain(driver, vm, disk, true);
+ }
+
if (disk->mirror &&
(type == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY ||
type == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)) {
--
1.9.3