On 02/06/2013 08:52 AM, Michal Privoznik wrote:
This function does the source part of NBD magic. It
invokes drive-mirror on each non shared and RW disk with
a source and wait till the mirroring process completes.
When it does we can proceed with migration.
Currently, an active waiting is done: every 500ms libvirt
asks qemu if block-job is finished or not. However, once
the job finishes, qemu doesn't report its progress so we
can only assume if the job finished successfully or not.
The better solution would be to listen to the event which
is sent as soon as the job finishes. The event does
contain the result of job.
---
src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 189 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 16840b2..defad6b 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1165,6 +1165,188 @@ cleanup:
return ret;
}
+/**
+ * qemuMigrationDiskMirror:
+ * @driver: qemu driver
+ * @vm: domain
+ * @mig: migration cookie
+ * @host: where are we migrating to
+ * @speed: how much should the copying be limited
+ * @migrate_flags: migrate monitor command flags
+ *
+ * Run drive-mirror to feed NBD server running on dst and
+ * wait till the process completes. On success, update
+ * @migrate_flags so we don't tell 'migrate' command to
+ * do the very same operation.
+ *
+ * Returns 0 on success (@migrate_flags updated),
+ * -1 otherwise.
+ */
+static int
+qemuMigrationDriveMirror(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ qemuMigrationCookiePtr mig,
+ const char *host,
+ unsigned long speed,
+ unsigned int *migrate_flags)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ int ret = -1;
+ int mon_ret;
+ int port;
+ ssize_t i;
Isn't this really just a loop counter?
+ char *diskAlias = NULL;
+ char *nbd_dest = NULL;
+ unsigned int mirror_flags = VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT;
+ virErrorPtr err = NULL;
+
+ if (!(*migrate_flags & (QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
+ QEMU_MONITOR_MIGRATE_NON_SHARED_INC)))
+ return 0;
+
+ if (!mig->nbd) {
+ /* Destination doesn't support NBD server.
+ * Fall back to previous implementation.
+ * XXX Or should we report an error here? */
+ VIR_DEBUG("Destination doesn't support NBD server "
+ "Falling back to previous implementation.");
+ ret = 0;
+ goto cleanup;
+ }
+
+ /* steal NBD port and thus prevent it's propagation back to destination */
+ port = mig->nbd->port;
+ mig->nbd->port = 0;
+
+ if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC)
+ mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW;
+
+ for (i = 0; i < vm->def->ndisks; i++) {
+ virDomainDiskDefPtr disk = vm->def->disks[i];
+ virDomainBlockJobInfo info;
+
+ /* skip shared, RO and source-less disks */
+ if (disk->shared || disk->readonly || !disk->src)
+ continue;
+
+ VIR_FREE(diskAlias);
+ VIR_FREE(nbd_dest);
+ if ((virAsprintf(&diskAlias, "%s%s",
+ QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) ||
+ (virAsprintf(&nbd_dest, "nbd:%s:%u:exportname=%s",
+ host, port, diskAlias) < 0)) {
+ virReportOOMError();
+ goto error;
+ }
+
+ if (qemuDomainObjEnterMonitorAsync(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto error;
+ mon_ret = qemuMonitorDriveMirror(priv->mon, diskAlias, nbd_dest,
+ NULL, speed, mirror_flags);
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+ if (mon_ret < 0)
+ goto error;
+
+ /* wait for completion */
+ while (true) {
+ /* Poll every 500ms for progress & to allow cancellation */
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 500 * 1000 * 1000ull };
+
+ memset(&info, 0, sizeof(info));
+
+ if (qemuDomainObjEnterMonitorAsync(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto error;
+ if (priv->job.asyncAbort) {
+ /* explicitly do this *after* we entered the monitor,
+ * as this is a critical section so we are guaranteed
+ * priv->job.asyncAbort will not change */
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+ virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+ _("canceled by client"));
+ goto error;
+ }
+ mon_ret = qemuMonitorBlockJob(priv->mon, diskAlias, NULL, 0,
+ &info, BLOCK_JOB_INFO, true);
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+ if (mon_ret < 0) {
+ /* If there's not any job running on a block device,
+ * qemu won't report anything, so it is not fatal
+ * if we fail to query status as job may have finished
+ * while we were sleeping. */
+ virReportError(VIR_ERR_OPERATION_FAILED,
+ _("Unable to query drive-mirror job status. "
+ "Stop polling on '%s' cur:%llu
end:%llu"),
+ diskAlias, info.cur, info.end);
+ goto error;
+ }
+
+ if (info.cur == info.end) {
+ VIR_DEBUG("Drive mirroring of '%s' completed",
diskAlias);
+ break;
+ }
+
+ /* XXX Frankly speaking, we should listen to the events,
+ * instead of doing this. But this works for now and we
+ * are doing something similar in migration itself anyway */
+
+ virObjectUnlock(vm);
+ qemuDriverUnlock(driver);
+
+ nanosleep(&ts, NULL);
+
+ qemuDriverLock(driver);
+ virObjectLock(vm);
+ }
+ }
+
+ /* Okay, copied. Modify migrate_flags */
+ *migrate_flags &= ~(QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
+ QEMU_MONITOR_MIGRATE_NON_SHARED_INC);
+ ret = 0;
+
+cleanup:
+ VIR_FREE(diskAlias);
+ VIR_FREE(nbd_dest);
+ return ret;
+
+error:
+ /* don't overwrite any errors */
+ err = virSaveLastError();
+ /* cancel any outstanding jobs */
+ for (; i >= 0; i--) {
You could enter this loop without actually having started a job for the
current "i" value. Also, it seems to me from reading the code that the
processing is start a job, wait for completion... start a job, wait for
completion. Thus is going backwards here necessary? IOW - wouldn't you
only need to cancel the "current" job? There's a number of reasons to
get to error a couple which could be because of some other failure, such
as priv->job.asyncAbort and "(mon_ret < 0)" when there's job a job
running.
+ virDomainDiskDefPtr disk = vm->def->disks[i];
+
+ /* skip shared, RO disks */
+ if (disk->shared || disk->readonly || !disk->src)
+ continue;
+
+ VIR_FREE(diskAlias);
+ if (virAsprintf(&diskAlias, "%s%s",
+ QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) {
+ virReportOOMError();
+ goto error;
This could be one nasty loop
+ }
+ if (qemuDomainObjEnterMonitorAsync(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) == 0) {
+ if (qemuMonitorBlockJob(priv->mon, diskAlias, NULL, 0,
+ NULL, BLOCK_JOB_ABORT, true) < 0) {
+ VIR_WARN("Unable to cancel block-job on '%s'",
diskAlias);
+ }
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+ } else {
+ VIR_WARN("Unable to enter monitor. No block job cancelled");
+ }
+ }
+ if (err)
+ virSetError(err);
+ virFreeError(err);
+ goto cleanup;
+}
/* Validate whether the domain is safe to migrate. If vm is NULL,
* then this is being run in the v2 Prepare stage on the destination
@@ -2429,6 +2611,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
if (qemuDomainMigrateGraphicsRelocate(driver, vm, mig) < 0)
VIR_WARN("unable to provide data for graphics client relocation");
+ /* this will update migrate_flags on success */
+ if (qemuMigrationDriveMirror(driver, vm, mig, spec->dest.host.name,
+ migrate_speed, &migrate_flags) < 0) {
+ /* error reported by helper func */
+ goto cleanup;
+ }
+
/* Before EnterMonitor, since qemuMigrationSetOffline already does that */
if (!(flags & VIR_MIGRATE_LIVE) &&
virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
@@ -2456,7 +2645,6 @@ qemuMigrationRun(virQEMUDriverPtr driver,
goto cleanup;
}
-
/* connect to the destination qemu if needed */
if (spec->destType == MIGRATION_DEST_CONNECT_HOST &&
qemuMigrationConnect(driver, vm, spec) < 0) {