From: "Michael R. Hines" <mrhines(a)us.ibm.com>
RDMA Live migration requires registering memory with the hardware,
and thus QEMU offers a new 'capability' which supports the ability
to pre-register / mlock() the guest memory in advance for higher
RDMA performance before the migration begins.
This patch exposes this capability with the following example usage:
virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain
qemu+ssh://hostname/system
This capability is disabled by default, and thus ommiting it will
cause QEMU to register the memory with the hardware in an on-demand basis.
Signed-off-by: Michael R. Hines <mrhines(a)us.ibm.com>
---
include/libvirt/libvirt.h.in | 1 +
src/qemu/qemu_migration.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_migration.h | 3 ++-
src/qemu/qemu_monitor.c | 2 +-
src/qemu/qemu_monitor.h | 1 +
tools/virsh-domain.c | 7 +++++
6 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 5ac2694..476521b 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1192,6 +1192,7 @@ typedef enum {
VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */
VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration
*/
VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors
happened during migration */
+ VIR_MIGRATE_RDMA_PIN_ALL = (1 << 13), /* RDMA memory pinning */
} virDomainMigrateFlags;
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 1e0f538..f4358ba 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1566,6 +1566,46 @@ cleanup:
}
static int
+qemuMigrationSetPinAll(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ enum qemuDomainAsyncJob job)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ int ret;
+
+ if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0)
+ return -1;
+
+ ret = qemuMonitorGetMigrationCapability(
+ priv->mon,
+ QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
+
+ if (ret < 0) {
+ goto cleanup;
+ } else if (ret == 0) {
+ if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("rdma pinning migration is not supported by "
+ "target QEMU binary"));
+ } else {
+ virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+ _("rdma pinning migration is not supported by "
+ "source QEMU binary"));
+ }
+ ret = -1;
+ goto cleanup;
+ }
+
+ ret = qemuMonitorSetMigrationCapability(
+ priv->mon,
+ QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
+
+cleanup:
+ qemuDomainObjExitMonitor(driver, vm);
+ return ret;
+}
+
+static int
qemuMigrationWaitForSpice(virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
@@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
goto stop;
+ if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+ qemuMigrationSetPinAll(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
+ goto stop;
+
+ if (strstr(protocol, "rdma")) {
+ unsigned long long memKB = vm->def->mem.hard_limit ?
+ vm->def->mem.hard_limit :
+ vm->def->mem.max_balloon + 1024 * 1024;
+ virProcessSetMaxMemLock(vm->pid, memKB * 3);
+ }
+
if (mig->lockState) {
VIR_DEBUG("Received lockstate %s", mig->lockState);
VIR_FREE(priv->lockState);
@@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver,
QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto cleanup;
+ if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+ qemuMigrationSetPinAll(driver, vm,
+ QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+ goto cleanup;
+
if (qemuDomainObjEnterMonitorAsync(driver, vm,
QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
goto cleanup;
@@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
switch (spec->destType) {
case MIGRATION_DEST_HOST:
+ if (strstr(spec->dest.host.proto, "rdma")) {
+ unsigned long long memKB = vm->def->mem.hard_limit ?
+ vm->def->mem.hard_limit :
+ vm->def->mem.max_balloon + 1024 * 1024;
+ virProcessSetMaxMemLock(vm->pid, memKB * 3);
+ }
+
ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags,
spec->dest.host.proto,
spec->dest.host.name,
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index cafa2a2..a76aaef 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -39,7 +39,8 @@
VIR_MIGRATE_UNSAFE | \
VIR_MIGRATE_OFFLINE | \
VIR_MIGRATE_COMPRESSED | \
- VIR_MIGRATE_ABORT_ON_ERROR)
+ VIR_MIGRATE_ABORT_ON_ERROR | \
+ VIR_MIGRATE_RDMA_PIN_ALL)
/* All supported migration parameters and their types. */
# define QEMU_MIGRATION_PARAMETERS \
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 5a450e2..86bffaa 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -118,7 +118,7 @@ VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
VIR_ENUM_IMPL(qemuMonitorMigrationCaps,
QEMU_MONITOR_MIGRATION_CAPS_LAST,
- "xbzrle")
+ "xbzrle", "rdma-pin-all")
VIR_ENUM_IMPL(qemuMonitorVMStatus,
QEMU_MONITOR_VM_STATUS_LAST,
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 16b0b77..a8b1cc6 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -452,6 +452,7 @@ int qemuMonitorGetSpiceMigrationStatus(qemuMonitorPtr mon,
typedef enum {
QEMU_MONITOR_MIGRATION_CAPS_XBZRLE,
+ QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL,
QEMU_MONITOR_MIGRATION_CAPS_LAST
} qemuMonitorMigrationCaps;
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 1fe138c..31df7f6 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -8532,6 +8532,10 @@ static const vshCmdOptDef opts_migrate[] = {
.type = VSH_OT_BOOL,
.help = N_("compress repeated pages during live migration")
},
+ {.name = "rdma-pin-all",
+ .type = VSH_OT_BOOL,
+ .help = N_("support memory pinning during RDMA live migration")
+ },
{.name = "abort-on-error",
.type = VSH_OT_BOOL,
.help = N_("abort on soft errors during migration")
@@ -8676,6 +8680,9 @@ doMigrate(void *opaque)
if (vshCommandOptBool(cmd, "compressed"))
flags |= VIR_MIGRATE_COMPRESSED;
+ if (vshCommandOptBool(cmd, "rdma-pin-all"))
+ flags |= VIR_MIGRATE_RDMA_PIN_ALL;
+
if (vshCommandOptBool(cmd, "offline")) {
flags |= VIR_MIGRATE_OFFLINE;
}
--
1.8.1.2