On Mon, Jan 13, 2014 at 14:28:12 +0800, mrhines(a)linux.vnet.ibm.com wrote:
From: "Michael R. Hines" <mrhines(a)us.ibm.com>
RDMA Live migration requires registering memory with the hardware,
Hmm, I forgot to ask when I was reviewing the previous patch but does
any of this RDMA migration functionality require special privileges or
is any unprivileged process able to use RDMA?
and thus QEMU offers a new 'capability' which supports the
ability
to pre-register / mlock() the guest memory in advance for higher
RDMA performance before the migration begins.
This patch exposes this capability with the following example usage:
virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain
qemu+ssh://hostname/system
The URI should be rdma://hostname
This capability is disabled by default, and thus ommiting it will
cause QEMU to register the memory with the hardware in an on-demand basis.
Signed-off-by: Michael R. Hines <mrhines(a)us.ibm.com>
---
include/libvirt/libvirt.h.in | 1 +
src/qemu/qemu_migration.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_migration.h | 3 ++-
src/qemu/qemu_monitor.c | 2 +-
src/qemu/qemu_monitor.h | 1 +
tools/virsh-domain.c | 7 +++++
6 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 5ac2694..476521b 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1192,6 +1192,7 @@ typedef enum {
VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */
VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration
*/
VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors
happened during migration */
+ VIR_MIGRATE_RDMA_PIN_ALL = (1 << 13), /* RDMA memory pinning */
} virDomainMigrateFlags;
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 1e0f538..f4358ba 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1566,6 +1566,46 @@ cleanup:
}
static int
+qemuMigrationSetPinAll(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ enum qemuDomainAsyncJob job)
Bad indentation.
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ int ret;
+
+ if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0)
+ return -1;
+
+ ret = qemuMonitorGetMigrationCapability(
+ priv->mon,
+ QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
Is this capability always present when QEMU supports RDMA migration? If
so, it could be used when we check if QEMU supports RDMA migration.
+
+ if (ret < 0) {
+ goto cleanup;
+ } else if (ret == 0) {
+ if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("rdma pinning migration is not supported by "
+ "target QEMU binary"));
+ } else {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("rdma pinning migration is not supported by "
+ "source QEMU binary"));
+ }
+ ret = -1;
+ goto cleanup;
+ }
+
+ ret = qemuMonitorSetMigrationCapability(
+ priv->mon,
+ QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
+
+cleanup:
+ qemuDomainObjExitMonitor(driver, vm);
+ return ret;
+}
+
+static int
qemuMigrationWaitForSpice(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
@@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
goto stop;
+ if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+ qemuMigrationSetPinAll(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
Indentation. And the flag should be rejected when protocol is not rdma.
+ goto stop;
+
+ if (strstr(protocol, "rdma")) {
I think we don't want to match just a part of the protocol; use STREQ()
instead.
+ unsigned long long memKB = vm->def->mem.hard_limit ?
+ vm->def->mem.hard_limit :
+ vm->def->mem.max_balloon + 1024 * 1024;
+ virProcessSetMaxMemLock(vm->pid, memKB * 3);
Apart from weird indentation of the virProcessSetMaxMemLock line, there
are several issues here:
- this code should be moved inside qemuMigrationSetPinAll and done only
if VIR_MIGRATE_RDMA_PIN_ALL flag was used.
- virProcessSetMaxMemLock wants the value in bytes, thus memKB should
rather by multiplied by 1024.
- what memory is going to be mlock()ed with rdma-pin-all? Is it going to
be all memory allocated by QEMU or just the domain's memory? If it's
all QEMU memory, we already painfully found out it's impossible to
automatically compute how much memory QEMU consumes in addition to the
configured domain's memory and I think a better approach would be to
just migration with rdma-pin-all unless hard_limit is specified.
+ }
+
if (mig->lockState) {
VIR_DEBUG("Received lockstate %s", mig->lockState);
VIR_FREE(priv->lockState);
@@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver,
QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto cleanup;
+ if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+ qemuMigrationSetPinAll(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto cleanup;
+
if (qemuDomainObjEnterMonitorAsync(driver, vm,
QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto cleanup;
Same comments as for the equivalent hunk in qemuMigrationPrepareAny.
@@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
switch (spec->destType) {
case MIGRATION_DEST_HOST:
+ if (strstr(spec->dest.host.proto, "rdma")) {
+ unsigned long long memKB = vm->def->mem.hard_limit ?
+ vm->def->mem.hard_limit :
+ vm->def->mem.max_balloon + 1024 * 1024;
+ virProcessSetMaxMemLock(vm->pid, memKB * 3);
+ }
+
ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags,
spec->dest.host.proto,
spec->dest.host.name,
Same comments as for the equivalent hunk in qemuMigrationPrepareAny.
Jirka