On 02/03/2014 11:19 PM, Jiri Denemark wrote:
> USAGE: $ virsh migrate --live --migrateuri x-rdma:hostname domain
qemu+ssh://hostname/system
s/x-rdma/rdma/ and I believe we should use rdma://hostname as the URI
Acknowledged.
> Signed-off-by: Michael R. Hines <mrhines(a)us.ibm.com>
> ---
> src/qemu/qemu_capabilities.c | 13 +++++
> src/qemu/qemu_capabilities.h | 1 +
> src/qemu/qemu_command.c | 8 ++++
> src/qemu/qemu_migration.c | 110 ++++++++++++++++++++++++++++++++++---------
> src/qemu/qemu_monitor.c | 3 +-
> src/qemu/qemu_monitor.h | 1 +
> src/util/viruri.c | 7 ++-
> 7 files changed, 119 insertions(+), 24 deletions(-)
>
> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> index 548b988..d82b48c 100644
> --- a/src/qemu/qemu_capabilities.c
> +++ b/src/qemu/qemu_capabilities.c
> @@ -243,6 +243,8 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST,
> "virtio-mmio",
> "ich9-intel-hda",
> "kvm-pit-lost-tick-policy",
> +
> + "migrate-qemu-rdma", /* 160 */
s/migrate-qemu-rdma/migrate-rdma/
"qemu" string in pretty redundant here given that it is a qemu
capability.
Acknowledged.
> );
>
> struct _virQEMUCaps {
> @@ -906,6 +908,9 @@ virCapsPtr virQEMUCapsInit(virQEMUCapsCachePtr cache)
> virCapabilitiesAddHostMigrateTransport(caps,
> "tcp");
>
> + virCapabilitiesAddHostMigrateTransport(caps,
> + "rdma");
> +
> /* QEMU can support pretty much every arch that exists,
> * so just probe for them all - we gracefully fail
> * if a qemu-system-$ARCH binary can't be found
> @@ -1110,6 +1115,7 @@ virQEMUCapsComputeCmdFlags(const char *help,
> * -incoming unix (qemu >= 0.12.0)
> * -incoming fd (qemu >= 0.12.0)
> * -incoming stdio (all earlier kvm)
> + * -incoming rdma (qemu >= 2.0.0)
> *
> * NB, there was a pre-kvm-79 'tcp' support, but it
> * was broken, because it blocked the monitor console
> @@ -1130,6 +1136,9 @@ virQEMUCapsComputeCmdFlags(const char *help,
> virQEMUCapsSet(qemuCaps, QEMU_CAPS_MIGRATE_KVM_STDIO);
> }
>
> + if (version >= 2000000)
> + virQEMUCapsSet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_RDMA);
> +
> if (version >= 10000)
> virQEMUCapsSet(qemuCaps, QEMU_CAPS_0_10);
>
This is not needed, we won't be parsing -help for any QEMU that supports
RDMA.
Acknowledged.
> @@ -2561,6 +2570,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps,
> if (qemuCaps->version >= 1006000)
> virQEMUCapsSet(qemuCaps, QEMU_CAPS_DEVICE_VIDEO_PRIMARY);
>
> + if (qemuCaps->version >= 2000000)
> + virQEMUCapsSet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_RDMA);
> +
> +
And here we need a better check for rdma migration. What if someone
compiles QEMU without RDMA support?
> if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0)
> goto cleanup;
> if (virQEMUCapsProbeQMPEvents(qemuCaps, mon) < 0)
> diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
> index 02d47c6..3e78961 100644
> --- a/src/qemu/qemu_capabilities.h
> +++ b/src/qemu/qemu_capabilities.h
> @@ -198,6 +198,7 @@ enum virQEMUCapsFlags {
> QEMU_CAPS_DEVICE_VIRTIO_MMIO = 157, /* -device virtio-mmio */
> QEMU_CAPS_DEVICE_ICH9_INTEL_HDA = 158, /* -device ich9-intel-hda */
> QEMU_CAPS_KVM_PIT_TICK_POLICY = 159, /* kvm-pit.lost_tick_policy */
> + QEMU_CAPS_MIGRATE_QEMU_RDMA = 160, /* have qemu rdma migration */
s/_QEMU// as it is redundant.
Acknowledged.
>
> QEMU_CAPS_LAST, /* this must always be the last item */
> };
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 763417f..0d23d8b 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -9448,6 +9448,14 @@ qemuBuildCommandLine(virConnectPtr conn,
> goto error;
> }
> virCommandAddArg(cmd, migrateFrom);
> + } else if (STRPREFIX(migrateFrom, "rdma")) {
> + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_RDMA)) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + "%s", _("RDMA migration is not
supported with "
> + "this QEMU binary"));
> + goto error;
> + }
> + virCommandAddArg(cmd, migrateFrom);
> } else if (STREQ(migrateFrom, "stdio")) {
> if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_FD)) {
> virCommandAddArgFormat(cmd, "fd:%d", migrateFd);
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index ef6f1c5..1e0f538 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -46,6 +46,7 @@
> #include "virerror.h"
> #include "viralloc.h"
> #include "virfile.h"
> +#include "virprocess.h"
> #include "datatypes.h"
> #include "fdstream.h"
> #include "viruuid.h"
> @@ -2163,6 +2164,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
> virDomainDefPtr *def,
> const char *origname,
> virStreamPtr st,
> + const char *protocol,
> unsigned short port,
> bool autoPort,
> const char *listenAddress,
> @@ -2275,6 +2277,16 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
> freeaddrinfo(info);
> hostIPv6Capable = true;
> }
> +
> + /*
> + * RDMA (iWarp) until linux 3.11 is broken, need
> + * better host librdmacm IPv6 support detection.
> + * Disallow by default for now if RDMA.
> + */
> + if (hostIPv6Capable && strstr(protocol, "rdma")) {
> + hostIPv6Capable = false;
> + }
> +
Is this still needed? 3.11 will already be quite old when QEMU with RDMA
support is released...
Here's the breakdown:
IPv6 RDMA over native (Infiniband): Works
IPv6 RDMA over RoCE (ethernet): Works
IPv6 RDMA over iWarp (ethernet): Does not yet work
IPv4 RDMA over anything: Never had a problem
As you can see, there's just that one special case (iWarp).
As of now, there is no method in Linux to "detect" in a backwards-compatible
way whether or not the current kernel version has fixed the bug (either
through
/sysfs or /proc or some other configurable way to query whether the
linux RDMA
device drivers can correctly perform the IPv6 connection establishment.)
Without such a detection mechanism, there's no safe way to remove this
IPv6 check without propogating the QEMU connection failure all the way
back to libvirt and reporting it to the user......
And if the user got such an error report, what would they do about it
exactly?
Shut down libvirt / management software, disable IPv6 so that the error
goes away and then restarting the management software?
Comments?
> - if (!(p = STRSKIP(uri_in, "tcp:"))) {
> - virReportError(VIR_ERR_INVALID_ARG, "%s",
> - _("only tcp URIs are supported for KVM/QEMU"
> - " migrations"));
> +
> + if (VIR_STRDUP(uri_save, uri_in) <= 0) {
> goto cleanup;
> }
>
> - /* Convert uri_in to well-formed URI with // after tcp: */
> - if (!(STRPREFIX(uri_in, "tcp://"))) {
> + protocol = strtok_r(uri_save, ":", &protocol_save);
> + VIR_FREE(uri_save);
> + if (protocol) {
> + if (virAsprintf(&well_formed_protocol, "%s://", protocol)
< 0)
> + goto cleanup;
> + }
> +
> + /* Make sure it's a valid protocol */
> + if (!(p = STRSKIP(uri_in, "tcp:")) &&
> + !(p = STRSKIP(uri_in, "rdma:"))) {
> + virReportError(VIR_ERR_INVALID_ARG, _("URI %s (%s) not
supported"
> + " for KVM/QEMU migrations"), protocol,
uri_in);
> + goto cleanup;
> + }
> +
> + /* Convert uri_in to well-formed URI with // after colon */
> + if (!(STRPREFIX(uri_in, well_formed_protocol))) {
> well_formed_uri = false;
> - if (virAsprintf(&uri_str, "tcp://%s", p) < 0)
> + if (virAsprintf(&uri_str, "%s://%s", protocol, p) < 0)
> goto cleanup;
> }
As I already said several month ago, "tcp:hostname" was a mistake and we
should not be redoing it with rdma. Thus I think we should just check
for "tcp:" and turn it into "tcp://" and then parse the result as a
proper URI. RDMA should always use rdma:// to avoid this backward
compatibility hacks.
My apologies. I'll take care of it next time.....
+
+ /* HACK: source host generates bogus URIs, so fix them up */
+
+ if (!STRPREFIX(uri, well_formed_proto)) {
+ if (virAsprintf(&tmp, "%s://%s", spec.dest.host.proto,
+ uri + strlen(spec.dest.host.proto) + 1) < 0)
return -1;
- uribits = virURIParse(tmp);
- VIR_FREE(tmp);
} else {
uribits = virURIParse(uri);
}
And the same applies here. We should keep the hack only for "tcp:" URIs
and pass all other URIs just straight to virURIParse.
Acknowledged......
> - if (!uribits)
> - return -1;
>
> - if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_FD))
> + if (tmp) {
> + uribits = virURIParse(tmp);
> + VIR_FREE(tmp);
> + }
> +
> + if (!uribits) {
> + ret = -1;
> + goto err;
> + }
> +
> + if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_FD) &&
> + !STREQ(spec.dest.host.proto, "rdma"))
Indentation is 4 spaces off.
> spec.destType = MIGRATION_DEST_CONNECT_HOST;
> else
> spec.destType = MIGRATION_DEST_HOST;
> @@ -3392,6 +3455,9 @@ static int doNativeMigrate(virQEMUDriverPtr driver,
>
> virURIFree(uribits);
>
> +err:
> + VIR_FREE(well_formed_proto);
> +
> return ret;
> }
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 1514715..5a450e2 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -2164,6 +2164,7 @@ int qemuMonitorMigrateToFd(qemuMonitorPtr mon,
>
> int qemuMonitorMigrateToHost(qemuMonitorPtr mon,
> unsigned int flags,
> + const char *proto,
> const char *hostname,
> int port)
> {
> @@ -2179,7 +2180,7 @@ int qemuMonitorMigrateToHost(qemuMonitorPtr mon,
> }
>
>
> - if (virAsprintf(&uri, "tcp:%s:%d", hostname, port) < 0)
> + if (virAsprintf(&uri, "%s:%s:%d", proto, hostname, port) < 0)
> return -1;
>
> if (mon->json)
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 27f9cb4..16b0b77 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -476,6 +476,7 @@ int qemuMonitorMigrateToFd(qemuMonitorPtr mon,
>
> int qemuMonitorMigrateToHost(qemuMonitorPtr mon,
> unsigned int flags,
> + const char *proto,
> const char *hostname,
> int port);
>
> diff --git a/src/util/viruri.c b/src/util/viruri.c
> index 35efad8..662029a 100644
> --- a/src/util/viruri.c
> +++ b/src/util/viruri.c
> @@ -187,7 +187,12 @@ virURIParse(const char *uri)
> goto error;
>
> /* First check: does it even make sense to jump inside */
> - if (ret->server != NULL &&
> +
> + /*
> + * IPv6 rdma over iwarp is broken in linux. Waiting for a
> + * fix on the kernel side...
> + */
> + if (ret->server != NULL && !STREQ(ret->scheme, "rdma")
&&
> ret->server[0] == '[') {
> size_t length = strlen(ret->server);
We still need a solution here, though: This function is not respecting
cases where the open IPv6 address "[::]" is not supported - so,
because of the bug in the kernel, binding to and connecting to this
address will always fail.
If we *must* have an IPv4 address listening for RDMA (i.e. '0' or
'0.0.0.0'),
then we need a way to override the default listening address.
Comments?
- Michael