[libvirt] [PATCH 0/2] qemu_migration: Check for active domain after talking to remote daemon

https://bugzilla.redhat.com/show_bug.cgi?id=1589730 Jiri Denemark (2): qemu_migration: Rename 'offline' variable in SrcPerformPeer2Peer qemu_migration: Check for active domain after talking to remote daemon src/qemu/qemu_domain.c | 14 ++++++++++- src/qemu/qemu_domain.h | 5 ++-- src/qemu/qemu_migration.c | 51 +++++++++++++++++++++------------------ 3 files changed, 44 insertions(+), 26 deletions(-) -- 2.18.0

The variable is used to store the offline migration capability of the destination daemon. Let's call it 'dstOffline' so that we can later use 'offline' to indicate whether we were asked to do offline migration. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 910766080c..c9aaa38029 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -4399,7 +4399,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, virConnectPtr dconn = NULL; bool p2p; virErrorPtr orig_err = NULL; - bool offline = false; + bool dstOffline = false; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool useParams; @@ -4469,8 +4469,8 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, useParams = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_PARAMS); if (flags & VIR_MIGRATE_OFFLINE) - offline = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, - VIR_DRV_FEATURE_MIGRATION_OFFLINE); + dstOffline = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, + VIR_DRV_FEATURE_MIGRATION_OFFLINE); qemuDomainObjExitRemote(vm); if (!p2p) { @@ -4488,7 +4488,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, goto cleanup; } - if (flags & VIR_MIGRATE_OFFLINE && !offline) { + if (flags & VIR_MIGRATE_OFFLINE && !dstOffline) { virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("offline migration is not supported by " "the destination host")); -- 2.18.0

On Thu, Jun 28, 2018 at 16:06:46 +0200, Jiri Denemark wrote:
The variable is used to store the offline migration capability of the destination daemon. Let's call it 'dstOffline' so that we can later use 'offline' to indicate whether we were asked to do offline migration.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
ACK

Once we called qemuDomainObjEnterRemote to talk to the destination daemon during a peer to peer migration, the vm lock is released and we only hold an async job. If the source domain dies at this point the monitor EOF callback is allowed to do its job and (among other things) clear all private data irrelevant for stopped domain. Thus when we call qemuDomainObjExitRemote, the domain may already be gone and we should avoid touching runtime private data (such as current job info). In other words after acquiring the lock in qemuDomainObjExitRemote, we need to check the domain is still alive. Unless we're doing offline migration. https://bugzilla.redhat.com/show_bug.cgi?id=1589730 Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 14 +++++++++++- src/qemu/qemu_domain.h | 5 +++-- src/qemu/qemu_migration.c | 45 ++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 9afe705929..a22c836985 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7016,11 +7016,23 @@ void qemuDomainObjEnterRemote(virDomainObjPtr obj) virObjectUnlock(obj); } -void qemuDomainObjExitRemote(virDomainObjPtr obj) + +int +qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) { virObjectLock(obj); VIR_DEBUG("Exited remote (vm=%p name=%s)", obj, obj->def->name); + + if (checkActive && !virDomainObjIsActive(obj)) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain '%s' is not running"), + obj->def->name); + return -1; + } + + return 0; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 0a9af756b8..30d186a921 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -584,8 +584,9 @@ void qemuDomainObjExitAgent(virDomainObjPtr obj, qemuAgentPtr agent) void qemuDomainObjEnterRemote(virDomainObjPtr obj) ATTRIBUTE_NONNULL(1); -void qemuDomainObjExitRemote(virDomainObjPtr obj) - ATTRIBUTE_NONNULL(1); +int qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; virDomainDefPtr qemuDomainDefCopy(virQEMUDriverPtr driver, virDomainDefPtr src, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c9aaa38029..cafab2f3e8 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3920,13 +3920,15 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepareTunnel (dconn, st, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepare2 (dconn, &cookie, &cookielen, NULL, &uri_out, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) @@ -3987,7 +3989,8 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, cookie, cookielen, uri_out ? uri_out : dconnuri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + /* The domain is already gone at this point */ + ignore_value(qemuDomainObjExitRemote(vm, false)); if (cancelled && ddomain) VIR_ERROR(_("finish step ignored that migration was cancelled")); @@ -4052,6 +4055,7 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, int nparams = 0; int maxparams = 0; size_t i; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); VIR_DEBUG("driver=%p, sconn=%p, dconn=%p, dconnuri=%s, vm=%p, xmlin=%s, " "dname=%s, uri=%s, graphicsuri=%s, listenAddress=%s, " @@ -4145,7 +4149,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, st, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); if (useParams) { @@ -4157,13 +4162,14 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, cookiein, cookieinlen, &cookieout, &cookieoutlen, uri, &uri_out, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) goto cleanup; - if (flags & VIR_MIGRATE_OFFLINE) { + if (offline) { VIR_DEBUG("Offline migration, skipping Perform phase"); VIR_FREE(cookieout); cookieoutlen = 0; @@ -4253,7 +4259,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3Params (dconn, params, nparams, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } } else { dname = dname ? dname : vm->def->name; @@ -4261,7 +4268,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3 (dconn, dname, cookiein, cookieinlen, &cookieout, &cookieoutlen, dconnuri, uri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } if (cancelled) { @@ -4399,6 +4407,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, virConnectPtr dconn = NULL; bool p2p; virErrorPtr orig_err = NULL; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); bool dstOffline = false; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool useParams; @@ -4439,7 +4448,9 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); dconn = virConnectOpenAuth(dconnuri, &virConnectAuthConfig, 0); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; + if (dconn == NULL) { virReportError(VIR_ERR_OPERATION_FAILED, _("Failed to connect to remote libvirt URI %s: %s"), @@ -4468,10 +4479,11 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, VIR_DRV_FEATURE_MIGRATION_V3); useParams = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_PARAMS); - if (flags & VIR_MIGRATE_OFFLINE) + if (offline) dstOffline = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_OFFLINE); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; if (!p2p) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", @@ -4488,20 +4500,13 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, goto cleanup; } - if (flags & VIR_MIGRATE_OFFLINE && !dstOffline) { + if (offline && !dstOffline) { virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("offline migration is not supported by " "the destination host")); goto cleanup; } - /* domain may have been stopped while we were talking to remote daemon */ - if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - /* Change protection is only required on the source side (us), and * only for v3 migration when begin and perform are separate jobs. * But peer-2-peer is already a single job, and we still want to @@ -4526,7 +4531,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); virConnectUnregisterCloseCallback(dconn, qemuMigrationSrcConnectionClosed); virObjectUnref(dconn); - qemuDomainObjExitRemote(vm); + ignore_value(qemuDomainObjExitRemote(vm, false)); if (orig_err) { virSetError(orig_err); virFreeError(orig_err); -- 2.18.0

On Thu, Jun 28, 2018 at 16:06:47 +0200, Jiri Denemark wrote:
Once we called qemuDomainObjEnterRemote to talk to the destination daemon during a peer to peer migration, the vm lock is released and we only hold an async job. If the source domain dies at this point the monitor EOF callback is allowed to do its job and (among other things) clear all private data irrelevant for stopped domain. Thus when we call qemuDomainObjExitRemote, the domain may already be gone and we should avoid touching runtime private data (such as current job info).
In other words after acquiring the lock in qemuDomainObjExitRemote, we need to check the domain is still alive. Unless we're doing offline migration.
https://bugzilla.redhat.com/show_bug.cgi?id=1589730
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 14 +++++++++++- src/qemu/qemu_domain.h | 5 +++-- src/qemu/qemu_migration.c | 45 ++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 23 deletions(-)
[...]
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c9aaa38029..cafab2f3e8 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3920,13 +3920,15 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepareTunnel (dconn, st, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepare2 (dconn, &cookie, &cookielen, NULL, &uri_out, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1)
There is a check below this which does !virDomainObjIsActive which seems pointless now.
@@ -3987,7 +3989,8 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, cookie, cookielen, uri_out ? uri_out : dconnuri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + /* The domain is already gone at this point */ + ignore_value(qemuDomainObjExitRemote(vm, false)); if (cancelled && ddomain) VIR_ERROR(_("finish step ignored that migration was cancelled"));
[...]
@@ -4488,20 +4500,13 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, goto cleanup; }
- if (flags & VIR_MIGRATE_OFFLINE && !dstOffline) { + if (offline && !dstOffline) {
This slightly mixes two distinct refactors ...
virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("offline migration is not supported by " "the destination host")); goto cleanup; }
ACK
participants (2)
-
Jiri Denemark
-
Peter Krempa