On Tue, Jun 02, 2015 at 14:34:09 +0200, Jiri Denemark wrote:
By switching block jobs to use domain conditions, we can drop some
pretty complicated code in NBD storage migration. Moreover, we are
getting ready for migration events (to replace our 50ms polling on
query-migrate). The ultimate goal is to have a single loop waiting
(virDomainObjWait) for any migration related event (changed status of a
migration, disk mirror events, internal abort requests, ...). This patch
makes NBD storage migration ready for this: first we call a QMP command
to start or cancel drive mirror on all disks we are interested in and
then we wait for a single condition which is signaled on any event
related to any of the mirrors.
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
Notes:
Version 2:
- slightly modified to use domain conditions
po/POTFILES.in | 1 -
src/qemu/qemu_blockjob.c | 137 ++-------------------
src/qemu/qemu_blockjob.h | 12 +-
src/qemu/qemu_domain.c | 17 +--
src/qemu/qemu_domain.h | 1 -
src/qemu/qemu_driver.c | 24 ++--
src/qemu/qemu_migration.c | 299 ++++++++++++++++++++++++++--------------------
src/qemu/qemu_process.c | 13 +-
8 files changed, 197 insertions(+), 307 deletions(-)
...
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 93e29e7..61b3e34 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
...
@@ -1733,111 +1733,148 @@ qemuMigrationStopNBDServer(virQEMUDriverPtr
driver,
* -1 on error.
*/
static int
-qemuMigrationCheckDriveMirror(virQEMUDriverPtr driver,
+qemuMigrationDriveMirrorReady(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
size_t i;
- int ret = 1;
+ size_t notReady = 0;
+ int status;
for (i = 0; i < vm->def->ndisks; i++) {
virDomainDiskDefPtr disk = vm->def->disks[i];
qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
- if (!diskPriv->migrating || !diskPriv->blockJobSync)
+ if (!diskPriv->migrating)
continue;
- /* process any pending event */
- if (qemuBlockJobSyncWaitWithTimeout(driver, vm, disk,
- 0ull, NULL) < 0)
- return -1;
-
- switch (disk->mirrorState) {
- case VIR_DOMAIN_DISK_MIRROR_STATE_NONE:
- ret = 0;
- break;
- case VIR_DOMAIN_DISK_MIRROR_STATE_ABORT:
+ status = qemuBlockJobUpdate(driver, vm, disk);
+ if (status == VIR_DOMAIN_BLOCK_JOB_FAILED) {
virReportError(VIR_ERR_OPERATION_FAILED,
_("migration of disk %s failed"),
disk->dst);
return -1;
}
+
+ if (disk->mirrorState != VIR_DOMAIN_DISK_MIRROR_STATE_READY)
+ notReady++;
}
- return ret;
+ if (notReady) {
+ VIR_DEBUG("Waiting for %zu disk mirrors to get ready", notReady);
+ return 0;
+ } else {
+ VIR_DEBUG("All disk mirrors are ready");
+ return 1;
+ }
}
-/**
- * qemuMigrationCancelOneDriveMirror:
- * @driver: qemu driver
- * @vm: domain
+/*
+ * If @failed is not NULL, the function will report an error and set @failed
+ * to true in case a block job fails. This way we can properly abort migration
+ * in case some block job failed once all memory has already been transferred.
*
- * Cancel all drive-mirrors started by qemuMigrationDriveMirror.
- * Any pending block job events for the mirrored disks will be
- * processed.
- *
- * Returns 0 on success, -1 otherwise.
+ * Returns 1 if all mirrors are gone,
+ * 0 if some mirrors are still active,
+ * -1 on error.
The code below doesn't ever return -1. Perhaps you could use it instead
of passing the pointer.
*/
static int
-qemuMigrationCancelOneDriveMirror(virQEMUDriverPtr driver,
+qemuMigrationDriveMirrorCancelled(virQEMUDriverPtr driver,
virDomainObjPtr vm,
- virDomainDiskDefPtr disk)
+ bool *failed)
{
- qemuDomainObjPrivatePtr priv = vm->privateData;
- char *diskAlias = NULL;
- int ret = -1;
+ size_t i;
+ size_t active = 0;
+ int status;
- /* No need to cancel if mirror already aborted */
- if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_ABORT) {
- ret = 0;
- } else {
- virConnectDomainEventBlockJobStatus status = -1;
+ for (i = 0; i < vm->def->ndisks; i++) {
+ virDomainDiskDefPtr disk = vm->def->disks[i];
+ qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
- if (virAsprintf(&diskAlias, "%s%s",
- QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0)
- goto cleanup;
+ if (!diskPriv->migrating)
+ continue;
- if (qemuDomainObjEnterMonitorAsync(driver, vm,
- QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
- goto endjob;
- ret = qemuMonitorBlockJobCancel(priv->mon, diskAlias, true);
- if (qemuDomainObjExitMonitor(driver, vm) < 0)
- goto endjob;
-
- if (ret < 0) {
- virDomainBlockJobInfo info;
-
- /* block-job-cancel can fail if QEMU simultaneously
- * aborted the job; probe for it again to detect this */
- if (qemuMonitorBlockJobInfo(priv->mon, diskAlias,
- &info, NULL) == 0) {
- ret = 0;
- } else {
+ status = qemuBlockJobUpdate(driver, vm, disk);
+ switch (status) {
+ case VIR_DOMAIN_BLOCK_JOB_FAILED:
+ if (failed) {
virReportError(VIR_ERR_OPERATION_FAILED,
- _("could not cancel migration of disk %s"),
+ _("migration of disk %s failed"),
disk->dst);
+ *failed = true;
}
+ /* fallthrough */
+ case VIR_DOMAIN_BLOCK_JOB_CANCELED:
+ case VIR_DOMAIN_BLOCK_JOB_COMPLETED:
+ qemuBlockJobSyncEnd(driver, vm, disk);
+ diskPriv->migrating = false;
+ break;
- goto endjob;
+ default:
+ active++;
}
+ }
- /* Mirror may become ready before cancellation takes
- * effect; loop if we get that event first */
- do {
- ret = qemuBlockJobSyncWait(driver, vm, disk, &status);
- if (ret < 0) {
- VIR_WARN("Unable to wait for block job on %s to cancel",
- diskAlias);
- goto endjob;
- }
- } while (status == VIR_DOMAIN_BLOCK_JOB_READY);
+ if (active) {
+ VIR_DEBUG("Waiting for %zu disk mirrors to finish", active);
+ return 0;
+ } else {
+ if (failed && *failed)
+ VIR_DEBUG("All disk mirrors are gone; some of them failed");
+ else
+ VIR_DEBUG("All disk mirrors are gone");
+ return 1;
}
+}
+
+
...
@@ -2467,6 +2515,10 @@
qemuMigrationWaitForCompletion(virQEMUDriverPtr driver,
if (qemuMigrationUpdateJobStatus(driver, vm, job, asyncJob) == -1)
goto error;
+ if (storage &&
+ qemuMigrationDriveMirrorReady(driver, vm) < 0)
+ break;
I think this hunk and the related changes should be in a separate patch.
This code preparses for changing to a event based migration finish,
while the rest of the patch touches mostly other parts that don't deal
with the big picture of migration.
+
/* cancel migration if disk I/O error is emitted while migrating */
if (abort_on_error &&
virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&
...
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 9c5d0f4..a945401 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1001,10 +1001,10 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
if (diskPriv->blockJobSync) {
+ /* We have a SYNC API waiting for this event, dispatch it back */
diskPriv->blockJobType = type;
diskPriv->blockJobStatus = status;
- /* We have an SYNC API waiting for this event, dispatch it back */
- virCondSignal(&diskPriv->blockJobSyncCond);
+ virDomainObjSignal(vm);
Once there will be a possibility to have more waiting threads for a
certain events we might need to revisit these.
} else {
/* there is no waiting SYNC API, dispatch the update to a thread */
if (VIR_ALLOC(processEvent) < 0)
Looks good but I'd rather see a version that avoids doing both the
refactor to the new condition and preparation to use events for
migration.
Peter