[libvirt] [PATCH 0/4] qemu: Fix offline migration onto the same host

This started as a report on #virt that the following command makes libvirt lose domain and which is worse - remove its XML! virsh migrate --offline --undefinesource --persistent $dom qemu+tcp://localhost/system Turns out, there are two problems. The first one is that we try to add incoming domain definition onto virDomainObjList before parsing migration cookie; the second one being VIR_MIGRATE_UNDEFINE_SOURCE takes effect regardless of migration success or failure. Michal Prívozník (4): qemuMigrationDstPrepareAny: Don't overwrite error in cleanup path qemuMigrationEatCookie: Pass virDomainDef instead of virDomainObj qemuMigrationDstPrepareAny: Parse cookie before adding domain onto list qemuMigrationSrcConfirm: Don't remove domain config if confirm phase fails src/qemu/qemu_migration.c | 42 ++++++++++++++++++++------------ src/qemu/qemu_migration_cookie.c | 23 ++++++++--------- src/qemu/qemu_migration_cookie.h | 4 ++- 3 files changed, 41 insertions(+), 28 deletions(-) -- 2.18.1

There are several functions called in the cleanup path. Some of them do report error (e.g. qemuDomainRemoveInactiveJob()) which may result in overwriting an error reported earlier with some less useful message. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 67940330aa..317df4bed5 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2282,6 +2282,7 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, { virDomainObjPtr vm = NULL; virObjectEventPtr event = NULL; + virErrorPtr origErr; int ret = -1; int dataFD[2] = { -1, -1 }; qemuDomainObjPrivatePtr priv = NULL; @@ -2595,6 +2596,7 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, ret = 0; cleanup: + virErrorPreserveLast(&origErr); VIR_FREE(tlsAlias); qemuProcessIncomingDefFree(incoming); VIR_FREE(xmlout); @@ -2618,6 +2620,7 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, qemuMigrationCookieFree(mig); virObjectUnref(caps); virNWFilterUnlockFilterUpdates(); + virErrorRestore(&origErr); return ret; stopjob: -- 2.18.1

On Thu, Nov 22, 2018 at 14:16:15 +0100, Michal Privoznik wrote:
There are several functions called in the cleanup path. Some of them do report error (e.g. qemuDomainRemoveInactiveJob()) which may result in overwriting an error reported earlier with some less useful message.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 3 +++ 1 file changed, 3 insertions(+)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

The function currently takes virDomainObjPtr because it's using both: the domain definition and domain private data. Unfortunately, this means that in prepare phase we can't parse migration cookie before putting incoming domain def onto domain objects list (addressed in the very next commit). Change the arguments so that virDomainDef and private data are passed instead of virDomainObjPtr. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 16 ++++++++++------ src/qemu/qemu_migration_cookie.c | 23 ++++++++++++----------- src/qemu/qemu_migration_cookie.h | 4 +++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 317df4bed5..28d3a72e32 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2041,7 +2041,8 @@ qemuMigrationSrcBeginPhase(virQEMUDriverPtr driver, if (!(flags & VIR_MIGRATE_OFFLINE)) cookieFlags |= QEMU_MIGRATION_COOKIE_CAPS; - if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm->def, + priv->origname, NULL, NULL, 0, 0))) goto cleanup; if (qemuMigrationBakeCookie(mig, driver, vm, @@ -2411,7 +2412,8 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, priv->hookRun = true; } - if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm->def, origname, NULL, + cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_LOCKSTATE | QEMU_MIGRATION_COOKIE_NBD | QEMU_MIGRATION_COOKIE_MEMORY_HOTPLUG | @@ -2922,7 +2924,8 @@ qemuMigrationSrcConfirmPhase(virQEMUDriverPtr driver, ? QEMU_MIGRATION_PHASE_CONFIRM3 : QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED); - if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm->def, priv->origname, priv, + cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_STATS))) goto cleanup; @@ -3427,7 +3430,8 @@ qemuMigrationSrcRun(virQEMUDriverPtr driver, } } - mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + mig = qemuMigrationEatCookie(driver, vm->def, priv->origname, NULL, + cookiein, cookieinlen, cookieFlags | QEMU_MIGRATION_COOKIE_GRAPHICS | QEMU_MIGRATION_COOKIE_CAPS); @@ -4948,8 +4952,8 @@ qemuMigrationDstFinish(virQEMUDriverPtr driver, * even though VIR_MIGRATE_PERSIST_DEST was not used. */ cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT; - if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, - cookieinlen, cookie_flags))) + if (!(mig = qemuMigrationEatCookie(driver, vm->def, priv->origname, NULL, + cookiein, cookieinlen, cookie_flags))) goto endjob; if (flags & VIR_MIGRATE_OFFLINE) { diff --git a/src/qemu/qemu_migration_cookie.c b/src/qemu/qemu_migration_cookie.c index 60df449d53..295e28ae30 100644 --- a/src/qemu/qemu_migration_cookie.c +++ b/src/qemu/qemu_migration_cookie.c @@ -279,22 +279,22 @@ qemuMigrationCookieNetworkAlloc(virQEMUDriverPtr driver ATTRIBUTE_UNUSED, static qemuMigrationCookiePtr -qemuMigrationCookieNew(virDomainObjPtr dom) +qemuMigrationCookieNew(const virDomainDef *def, + const char *origname) { - qemuDomainObjPrivatePtr priv = dom->privateData; qemuMigrationCookiePtr mig = NULL; const char *name; if (VIR_ALLOC(mig) < 0) goto error; - if (priv->origname) - name = priv->origname; + if (origname) + name = origname; else - name = dom->def->name; + name = def->name; if (VIR_STRDUP(mig->name, name) < 0) goto error; - memcpy(mig->uuid, dom->def->uuid, VIR_UUID_BUFLEN); + memcpy(mig->uuid, def->uuid, VIR_UUID_BUFLEN); if (!(mig->localHostname = virGetHostname())) goto error; @@ -1472,12 +1472,13 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookiePtr qemuMigrationEatCookie(virQEMUDriverPtr driver, - virDomainObjPtr dom, + const virDomainDef *def, + const char *origname, + qemuDomainObjPrivatePtr priv, const char *cookiein, int cookieinlen, unsigned int flags) { - qemuDomainObjPrivatePtr priv = dom->privateData; qemuMigrationCookiePtr mig = NULL; /* Parse & validate incoming cookie (if any) */ @@ -1490,7 +1491,7 @@ qemuMigrationEatCookie(virQEMUDriverPtr driver, VIR_DEBUG("cookielen=%d cookie='%s'", cookieinlen, NULLSTR(cookiein)); - if (!(mig = qemuMigrationCookieNew(dom))) + if (!(mig = qemuMigrationCookieNew(def, origname))) return NULL; if (cookiein && cookieinlen && @@ -1502,9 +1503,9 @@ qemuMigrationEatCookie(virQEMUDriverPtr driver, if (flags & QEMU_MIGRATION_COOKIE_PERSISTENT && mig->persistent && - STRNEQ(dom->def->name, mig->persistent->name)) { + STRNEQ(def->name, mig->persistent->name)) { VIR_FREE(mig->persistent->name); - if (VIR_STRDUP(mig->persistent->name, dom->def->name) < 0) + if (VIR_STRDUP(mig->persistent->name, def->name) < 0) goto error; } diff --git a/src/qemu/qemu_migration_cookie.h b/src/qemu/qemu_migration_cookie.h index 08c5de8f06..b399787074 100644 --- a/src/qemu/qemu_migration_cookie.h +++ b/src/qemu/qemu_migration_cookie.h @@ -160,7 +160,9 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookiePtr qemuMigrationEatCookie(virQEMUDriverPtr driver, - virDomainObjPtr dom, + const virDomainDef *def, + const char *origname, + qemuDomainObjPrivatePtr priv, const char *cookiein, int cookieinlen, unsigned int flags); -- 2.18.1

On Thu, Nov 22, 2018 at 14:16:16 +0100, Michal Privoznik wrote:
The function currently takes virDomainObjPtr because it's using both: the domain definition and domain private data. Unfortunately, this means that in prepare phase we can't parse migration cookie before putting incoming domain def onto domain objects list (addressed in the very next commit). Change the arguments so that virDomainDef and private data are passed instead of virDomainObjPtr.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 16 ++++++++++------ src/qemu/qemu_migration_cookie.c | 23 ++++++++++++----------- src/qemu/qemu_migration_cookie.h | 4 +++- 3 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 317df4bed5..28d3a72e32 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2041,7 +2041,8 @@ qemuMigrationSrcBeginPhase(virQEMUDriverPtr driver, if (!(flags & VIR_MIGRATE_OFFLINE)) cookieFlags |= QEMU_MIGRATION_COOKIE_CAPS;
- if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm->def, + priv->origname, NULL, NULL, 0, 0)))
I think this patch should always pass priv instead of NULL. In the following patch you can replace priv with NULL in the only place where qemuMigrationEatCookie will be called when there's no priv pointer yet. I know we only use priv inside qemuMigrationEatCookie when QEMU_MIGRATION_COOKIE_STATS flag is set. But I think it would be safer to pass priv everywhere anyway.
goto cleanup;
if (qemuMigrationBakeCookie(mig, driver, vm,
...
@@ -4948,8 +4952,8 @@ qemuMigrationDstFinish(virQEMUDriverPtr driver,
A little bit more lines of context: cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK | QEMU_MIGRATION_COOKIE_STATS | QEMU_MIGRATION_COOKIE_NBD; /* Some older versions of libvirt always send persistent XML in the cookie
* even though VIR_MIGRATE_PERSIST_DEST was not used. */ cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT;
- if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, - cookieinlen, cookie_flags))) + if (!(mig = qemuMigrationEatCookie(driver, vm->def, priv->origname, NULL, + cookiein, cookieinlen, cookie_flags))) goto endjob;
cookie_flags contains QEMU_MIGRATION_COOKIE_STATS at this point so passing NULL would just crash libvirtd. Jirka

There are some checks done when parsing a migration cookie. For instance, one of the checks ensures that the domain is not being migrated onto the same host. If that is the case, then we are in big trouble because the @vm is the same domain object used by source and it has some jobs sets and everything so recovering from failed cookie parsing would be needlessly hard. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 28d3a72e32..3875ea828f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2395,6 +2395,20 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, } } + /* Parse cookie earlier than adding the domain onto the + * domain list. Parsing/validation may fail and there's no + * point in having the domain in the list at that point. */ + if (!(mig = qemuMigrationEatCookie(driver, *def, origname, NULL, + cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE | + QEMU_MIGRATION_COOKIE_NBD | + QEMU_MIGRATION_COOKIE_MEMORY_HOTPLUG | + QEMU_MIGRATION_COOKIE_CPU_HOTPLUG | + QEMU_MIGRATION_COOKIE_CPU | + QEMU_MIGRATION_COOKIE_ALLOW_REBOOT | + QEMU_MIGRATION_COOKIE_CAPS))) + goto cleanup; + if (!(vm = virDomainObjListAdd(driver->domains, *def, driver->xmlopt, VIR_DOMAIN_OBJ_LIST_ADD_LIVE | @@ -2412,17 +2426,6 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, priv->hookRun = true; } - if (!(mig = qemuMigrationEatCookie(driver, vm->def, origname, NULL, - cookiein, cookieinlen, - QEMU_MIGRATION_COOKIE_LOCKSTATE | - QEMU_MIGRATION_COOKIE_NBD | - QEMU_MIGRATION_COOKIE_MEMORY_HOTPLUG | - QEMU_MIGRATION_COOKIE_CPU_HOTPLUG | - QEMU_MIGRATION_COOKIE_CPU | - QEMU_MIGRATION_COOKIE_ALLOW_REBOOT | - QEMU_MIGRATION_COOKIE_CAPS))) - goto cleanup; - if (STREQ_NULLABLE(protocol, "rdma") && !virMemoryLimitIsSet(vm->def->mem.hard_limit)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", -- 2.18.1

On Thu, Nov 22, 2018 at 14:16:17 +0100, Michal Privoznik wrote:
There are some checks done when parsing a migration cookie. For instance, one of the checks ensures that the domain is not being migrated onto the same host. If that is the case, then we are in big trouble because the @vm is the same domain object used by source and it has some jobs sets and everything so recovering from failed cookie parsing would be needlessly hard.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 28d3a72e32..3875ea828f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2395,6 +2395,20 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, } }
The change from priv to NULL should be done in this patch.
+ /* Parse cookie earlier than adding the domain onto the + * domain list. Parsing/validation may fail and there's no + * point in having the domain in the list at that point. */ + if (!(mig = qemuMigrationEatCookie(driver, *def, origname, NULL, + cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE | + QEMU_MIGRATION_COOKIE_NBD | + QEMU_MIGRATION_COOKIE_MEMORY_HOTPLUG | + QEMU_MIGRATION_COOKIE_CPU_HOTPLUG | + QEMU_MIGRATION_COOKIE_CPU | + QEMU_MIGRATION_COOKIE_ALLOW_REBOOT | + QEMU_MIGRATION_COOKIE_CAPS))) + goto cleanup; + if (!(vm = virDomainObjListAdd(driver->domains, *def, driver->xmlopt, VIR_DOMAIN_OBJ_LIST_ADD_LIVE | @@ -2412,17 +2426,6 @@ qemuMigrationDstPrepareAny(virQEMUDriverPtr driver, priv->hookRun = true; }
- if (!(mig = qemuMigrationEatCookie(driver, vm->def, origname, NULL, - cookiein, cookieinlen, - QEMU_MIGRATION_COOKIE_LOCKSTATE | - QEMU_MIGRATION_COOKIE_NBD | - QEMU_MIGRATION_COOKIE_MEMORY_HOTPLUG | - QEMU_MIGRATION_COOKIE_CPU_HOTPLUG | - QEMU_MIGRATION_COOKIE_CPU | - QEMU_MIGRATION_COOKIE_ALLOW_REBOOT | - QEMU_MIGRATION_COOKIE_CAPS))) - goto cleanup; - if (STREQ_NULLABLE(protocol, "rdma") && !virMemoryLimitIsSet(vm->def->mem.hard_limit)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s",
Jirka

If migration is cancelled or confirm phase fails the domain should be kept on the source even if VIR_MIGRATE_UNDEFINE_SOURCE was requested. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 3875ea828f..04bc55944f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3044,7 +3044,7 @@ qemuMigrationSrcConfirm(virQEMUDriverPtr driver, qemuMigrationJobFinish(driver, vm); if (!virDomainObjIsActive(vm)) { - if (flags & VIR_MIGRATE_UNDEFINE_SOURCE) { + if (!cancelled && ret >= 0 && flags & VIR_MIGRATE_UNDEFINE_SOURCE) { virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm); vm->persistent = 0; } -- 2.18.1

On Thu, Nov 22, 2018 at 14:16:18 +0100, Michal Privoznik wrote:
If migration is cancelled or confirm phase fails the domain should be kept on the source even if VIR_MIGRATE_UNDEFINE_SOURCE was requested.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 3875ea828f..04bc55944f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3044,7 +3044,7 @@ qemuMigrationSrcConfirm(virQEMUDriverPtr driver,
qemuMigrationJobFinish(driver, vm); if (!virDomainObjIsActive(vm)) { - if (flags & VIR_MIGRATE_UNDEFINE_SOURCE) { + if (!cancelled && ret >= 0 && flags & VIR_MIGRATE_UNDEFINE_SOURCE) {
The success is normally indicated by ret == 0. I don't think qemuMigrationSrcConfirmPhase would ever return anything > 0.
virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm); vm->persistent = 0; }
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
participants (2)
-
Jiri Denemark
-
Michal Privoznik