On 11/20/2016 06:36 PM, Bob Liu wrote:
On 11/19/2016 08:22 AM, Jim Fehlig wrote:
> On 11/10/2016 09:14 PM, Bob Liu wrote:
>> Tunnelled migration doesn't require any extra network connections beside the
>> libvirt daemon.
>> It's capable of strong encryption and the default option of openstack-nova.
>>
>> This patch adds the tunnelled migration(Tunnel3params) support to libxl.
>> On the src side, the data flow is:
>> * libxlDoMigrateSend() -> pipe
>> * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream.
>>
>> While on the dest side:
>> Stream -> pipe -> 'recvfd of libxlDomainStartRestore'
>>
>> The usage is the same as p2p migration, execpt adding one extra
'--tunnelled' to
>> the libvirt p2p migration command.
>
> Hi Bob,
>
> Thanks for the patch! A few comments below...
>
Thank you for your review.
>>
>> Signed-off-by: Bob Liu <bob.liu(a)oracle.com>
>> ---
>> src/libxl/libxl_driver.c | 58 ++++++++-
>> src/libxl/libxl_migration.c | 281 +++++++++++++++++++++++++++++++++++++++++---
>> src/libxl/libxl_migration.h | 9 ++
>> 3 files changed, 331 insertions(+), 17 deletions(-)
>>
>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>> index b66cb1f..bc2633b 100644
>> --- a/src/libxl/libxl_driver.c
>> +++ b/src/libxl/libxl_driver.c
>> @@ -5918,6 +5918,61 @@ libxlDomainMigrateBegin3Params(virDomainPtr domain,
>> }
>>
>> static int
>> +libxlDomainMigratePrepareTunnel3Params(virConnectPtr dconn,
>> + virStreamPtr st,
>> + virTypedParameterPtr params,
>> + int nparams,
>> + const char *cookiein,
>> + int cookieinlen,
>> + char **cookieout ATTRIBUTE_UNUSED,
>> + int *cookieoutlen ATTRIBUTE_UNUSED,
>> + unsigned int flags)
>> +{
>> + libxlDriverPrivatePtr driver = dconn->privateData;
>> + virDomainDefPtr def = NULL;
>> + const char *dom_xml = NULL;
>> + const char *dname = NULL;
>> + const char *uri_in = NULL;
>> +
>> +#ifdef LIBXL_HAVE_NO_SUSPEND_RESUME
>> + virReportUnsupportedError();
>> + return -1;
>> +#endif
>> +
>> + virCheckFlags(LIBXL_MIGRATION_FLAGS, -1);
>> + if (virTypedParamsValidate(params, nparams, LIBXL_MIGRATION_PARAMETERS) <
0)
>> + goto error;
>> +
>> + if (virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_DEST_XML,
>> + &dom_xml) < 0 ||
>> + virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_DEST_NAME,
>> + &dname) < 0 ||
>> + virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_URI,
>> + &uri_in) < 0)
>> +
>> + goto error;
>> +
>> + if (!(def = libxlDomainMigrationPrepareDef(driver, dom_xml, dname)))
>> + goto error;
>> +
>> + if (virDomainMigratePrepareTunnel3ParamsEnsureACL(dconn, def) < 0)
>> + goto error;
>> +
>> + if (libxlDomainMigrationPrepareTunnel3(dconn, st, &def, cookiein,
>> + cookieinlen, flags) < 0)
>
> With the exception of the above two calls, this function is identical to
libxlDomainMigratePrepare3Params.
> Perhaps you can use some macro trickery or a common function to reduce the duplicate
code?
>
How about adding a 'is_tunnel' parameter to libxlDomainMigratePrepare3Params()?
That's fine. It seems to be the approach taken in the qemu driver.
>> + goto error;
>> +
>> + return 0;
>> +
>> + error:
>> + virDomainDefFree(def);
>> + return -1;
>> +}
>> +
>> +static int
>> libxlDomainMigratePrepare3Params(virConnectPtr dconn,
>> virTypedParameterPtr params,
>> int nparams,
>> @@ -6017,7 +6072,7 @@ libxlDomainMigratePerform3Params(virDomainPtr dom,
>> if (virDomainMigratePerform3ParamsEnsureACL(dom->conn, vm->def) <
0)
>> goto cleanup;
>>
>> - if (flags & VIR_MIGRATE_PEER2PEER) {
>> + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
>> if (libxlDomainMigrationPerformP2P(driver, vm, dom->conn, dom_xml,
>> dconnuri, uri, dname, flags) < 0)
>> goto cleanup;
>> @@ -6501,6 +6556,7 @@ static virHypervisorDriver libxlHypervisorDriver = {
>> .nodeDeviceReset = libxlNodeDeviceReset, /* 1.2.3 */
>> .domainMigrateBegin3Params = libxlDomainMigrateBegin3Params, /* 1.2.6 */
>> .domainMigratePrepare3Params = libxlDomainMigratePrepare3Params, /* 1.2.6
*/
>> + .domainMigratePrepareTunnel3Params = libxlDomainMigratePrepareTunnel3Params,
/* 2.5.0 */
>> .domainMigratePerform3Params = libxlDomainMigratePerform3Params, /* 1.2.6
*/
>> .domainMigrateFinish3Params = libxlDomainMigrateFinish3Params, /* 1.2.6 */
>> .domainMigrateConfirm3Params = libxlDomainMigrateConfirm3Params, /* 1.2.6
*/
>> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
>> index 534abb8..f3152dc 100644
>> --- a/src/libxl/libxl_migration.c
>> +++ b/src/libxl/libxl_migration.c
>> @@ -44,6 +44,7 @@
>> #include "libxl_migration.h"
>> #include "locking/domain_lock.h"
>> #include "virtypedparam.h"
>> +#include "fdstream.h"
>>
>> #define VIR_FROM_THIS VIR_FROM_LIBXL
>>
>> @@ -484,6 +485,90 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr
driver,
>> }
>>
>> int
>> +libxlDomainMigrationPrepareTunnel3(virConnectPtr dconn,
>> + virStreamPtr st,
>> + virDomainDefPtr *def,
>> + const char *cookiein,
>> + int cookieinlen,
>> + unsigned int flags)
>> +{
>> + libxlMigrationCookiePtr mig = NULL;
>> + libxlDriverPrivatePtr driver = dconn->privateData;
>> + virDomainObjPtr vm = NULL;
>> + libxlMigrationDstArgs *args = NULL;
>> + virThread thread;
>> + int dataFD[2] = { -1, -1 };
>> + int ret = -1;
>> +
>> + if (libxlMigrationEatCookie(cookiein, cookieinlen, &mig) < 0)
>> + goto error;
>> +
>> + if (mig->xenMigStreamVer > LIBXL_SAVE_VERSION) {
>> + virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
>> + _("Xen migration stream version '%d' is not
supported on this host"),
>> + mig->xenMigStreamVer);
>> + goto error;
>> + }
>> +
>> + if (!(vm = virDomainObjListAdd(driver->domains, *def,
>> + driver->xmlopt,
>> + VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
>> + VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
>> + NULL)))
>> + goto error;
>
> I think having a separate function here is fine since there's quite a bit of
difference, but you'll need to update it to account for hook support added by Cedric.
> E.g. see the hook code in libxlDomainMigrationPrepare().
>
Okay, will be updated.
>> +
>> + /*
>> + * The data flow of tunnel3 migration in the dest side:
>> + * stream -> pipe -> recvfd of libxlDomainStartRestore
>> + */
>> + if (pipe(dataFD) < 0)
>> + goto error;
>> +
>> + /* Stream data will be written to pipeIn */
>> + if (virFDStreamOpen(st, dataFD[1]) < 0)
>> + goto error;
>> + dataFD[1] = -1; /* 'st' owns the FD now & will close it */
>> +
>> + if (libxlMigrationDstArgsInitialize() < 0)
>> + goto error;
>> +
>> + if (!(args = virObjectNew(libxlMigrationDstArgsClass)))
>> + goto error;
>> +
>> + args->conn = dconn;
>> + args->vm = vm;
>> + args->flags = flags;
>> + args->migcookie = mig;
>> + /* Receive from pipeOut */
>> + args->recvfd = dataFD[0];
>> + args->nsocks = 0;
>> + if (virThreadCreate(&thread, false, libxlDoMigrateReceive, args) < 0)
{
>> + virReportError(VIR_ERR_OPERATION_FAILED, "%s",
>> + _("Failed to create thread for receiving migration
data"));
>> + goto error;
>> + }
>> +
>> + ret = 0;
>> + goto done;
>> +
>> + error:
>> + VIR_FORCE_CLOSE(dataFD[1]);
>> + VIR_FORCE_CLOSE(dataFD[0]);
>> + virObjectUnref(args);
>> + /* Remove virDomainObj from domain list */
>> + if (vm) {
>> + virDomainObjListRemove(driver->domains, vm);
>> + vm = NULL;
>> + }
>> +
>> + done:
>
> libxlMigrationCookieFree(mig)? Hmm, maybe it would be worth thinking about ways to
extract some of the commonalities between this function and libxlDomainMigratePrepare().
> It would help avoid such bugs in the future.
>
I'm a bit confused about libxlMigrationCookieFree() even in
libxlDomainMigratePrepare().
The msg is set to NULL before call libxlMigrationCookieFree(), so I think the free is
meaningless.
libxlDomainMigrationPrepare():
....
687 args->migcookie = mig;
688 mig = NULL;
...
727 done:
728 libxlMigrationCookieFree(mig);
Ah, you are right. On success, 'mig' will be NULL so this is a noop. On error,
'args' is unref'ed in the 'error' label and will be freed in the
dispose
function. We can cleanup libxlDomainMigrationPrepare() in a separate patch.
And we can't free the migcookie, because the libxlMigrateReceive
> libxlDoMigrateReceive thread will have to use it.
The virObjectUnref(args) call will free the actuall migcookie if I understand right.
Yes, you understand correctly :-).
>> + if (vm)
>> + virObjectUnlock(vm);
>> +
>> + return ret;
>> +}
>> +
>> +int
>> libxlDomainMigrationPrepare(virConnectPtr dconn,
>> virDomainDefPtr *def,
>> const char *uri_in,
>> @@ -710,9 +795,154 @@ libxlDomainMigrationPrepare(virConnectPtr dconn,
>> return ret;
>> }
>>
>> -/* This function is a simplification of virDomainMigrateVersion3Full
>> - * excluding tunnel support and restricting it to migration v3
>> - * with params since it was the first to be introduced in libxl.
>> +typedef struct _libxlTunnelMigrationThread libxlTunnelMigrationThread;
>> +struct _libxlTunnelMigrationThread {
>> + virThread thread;
>
> I think the 'thread' field would be better placed in the libxlTunnelControl
struct. It looks to only be used in {Start,Stop}Tunnel.
>
Will update.
>> + virStreamPtr st;
>> + int srcFD;
>> +};
>> +#define TUNNEL_SEND_BUF_SIZE 65536
>> +
>> +/*
>> + * The data flow of tunnel3 migration in the src side:
>> + * libxlDoMigrateSend() -> pipe
>> + * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream.
>> + */
>> +static void libxlTunnel3MigrationFunc(void *arg)
>> +{
>> + libxlTunnelMigrationThread *data = (libxlTunnelMigrationThread *)arg;
>> + char *buffer = NULL;
>> + struct pollfd fds[1];
>> + int timeout = -1;
>> +
>> + if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) {
>> + virReportError(errno, "%s", _("poll failed in migration
tunnel"));
>> + return;
>> + }
>> +
>> + fds[0].fd = data->srcFD;
>> + for (;;) {
>> + int ret;
>> +
>> + fds[0].events = POLLIN;
>> + fds[0].revents = 0;
>> + ret = poll(fds, ARRAY_CARDINALITY(fds), timeout);
>> + if (ret < 0) {
>> + if (errno == EAGAIN || errno == EINTR)
>> + continue;
>> + virReportError(errno, "%s",
>> + _("poll failed in
libxlTunnel3MigrationFunc"));
>> + goto cleanup;
>> + }
>> +
>> + if (ret == 0) {
>> + VIR_DEBUG("poll got timeout");
>
> Nit: Is that debug message true with a negative timeout?
> The man page isn't really clear, only saying that a return value "of 0
indicates that the call timed out and no file descriptors were ready".
>
>> + break;
>> + }
>> +
>> + if (fds[0].revents & (POLLIN | POLLERR | POLLHUP)) {
>> + int nbytes;
>> +
>> + nbytes = read(data->srcFD, buffer, TUNNEL_SEND_BUF_SIZE);
>> + if (nbytes > 0) {
>> + /* Write to dest stream */
>> + if (virStreamSend(data->st, buffer, nbytes) < 0) {
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to send to
virStream"));
>> + virStreamAbort(data->st);
>> + goto cleanup;
>> + }
>> + } else if (nbytes < 0) {
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to read from
xen side"));
>> + virStreamAbort(data->st);
>> + goto cleanup;
>> + } else {
>> + /* EOF; transferred all data */
>> + break;
>> + }
>> + }
>> + }
>> +
>> + if (virStreamFinish(data->st) < 0)
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to finish
stream"));
>> + cleanup:
>> + VIR_FREE(buffer);
>> +
>> + return;
>> +}
>> +
>> +struct libxlTunnelControl {
>> + libxlTunnelMigrationThread tmThread;
>
> Can this be replaced with the virThread field in libxlTunnelMigrationThread struct?
> I don't see why the libxlTunnelMigrationThread struct needs to be embedded here.
It's used to save the threadPtr, so that libxlMigrationStopTunnel() can cancel the
thread.
Yes, but if you moved the 'thread' field to the libxlTunnelControl struct it
would still be available in libxlMigrationStopTunnel().
> Also, we should use consistent style when declaring these structures.
>
>> + int dataFD[2];
>> +};
>> +
>> +static int
>> +libxlMigrationStartTunnel(libxlDriverPrivatePtr driver,
>> + virDomainObjPtr vm,
>> + unsigned long flags,
>> + virStreamPtr st,
>> + struct libxlTunnelControl *tc)
>> +{
>> + libxlTunnelMigrationThread *tmThreadPtr = NULL;
>> + int ret = -1;
>> +
>> + if (VIR_ALLOC(tc) < 0)
>> + goto out;
>
> It seems better to have the calling function, which owns the libxlTunnelControl
struct, to allocate it.
> In this function we should allocate and fill the libxlTunnelMigrationThread struct,
for use by the thread running libxlTunnel3MigrationFunc.
>
Actually in the previous version, I put all this stuff after the below if directly.
969 if (flags & VIR_MIGRATE_TUNNELLED)
970 ret = libxlMigrationStartTunnel(driver, vm, flags, st, tc);
971 else
No structure 'libxlTunnelMigrationThread' and
libxlMigrationStartTunnel/StopTunnel at all.
E.g.
> VIR_DEBUG("Perform3 uri=%s", NULLSTR(uri_out));
> - ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
> - uri_out, NULL, flags);
> + if (flags & VIR_MIGRATE_TUNNELLED) {
> + if (VIR_ALLOC(libxlTunnelMigationThreadPtr) < 0)
> + goto cleanup;
> + if (pipe(dataFD) < 0) {
> + virReportError(errno, "%s", _("Unable to make
pipes"));
> + goto cleanup;
> + }
> + /* Read from pipe */
> + libxlTunnelMigationThreadPtr->srcFD = dataFD[0];
> + /* Write to dest stream */
> + libxlTunnelMigationThreadPtr->st = st;
> + if (virThreadCreate(&libxlTunnelMigationThreadPtr->thread, true,
> + libxlTunnel3MigrationFunc,
> + libxlTunnelMigationThreadPtr) < 0) {
> + virReportError(errno, "%s",
> + _("Unable to create tunnel migration
thread"));
> + goto cleanup;
> + }
>
> + virObjectUnlock(vm);
> + /* Send data to pipe */
> + ret = libxlDoMigrateSend(driver, vm, flags, dataFD[1]);
> + virObjectLock(vm);
> + } else {
> + ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
> + uri_out, NULL, flags);
> + }
But Joao thought this may not easy to be read.
Which way do you prefer to accept?
I think Joao's suggestion was a good one. I was simply suggesting a minor
cleanup of the libxlTunnelControl and libxlTunnelMigrationThread structs :-).
Regards,
Jim