[libvirt] [PATCH 0/4] libxl: a few migration improvements

See individual patches for details. Jim Fehlig (4): libxl: rename goto label libxl: remove domain when migration prepare fails libxl: acquire job though begin phase only libxl: fix crash in migrate confirm for transient domains src/libxl/libxl_driver.c | 7 ++---- src/libxl/libxl_migration.c | 60 ++++++++++++++++++++++----------------------- src/libxl/libxl_migration.h | 2 +- 3 files changed, 33 insertions(+), 36 deletions(-) -- 1.8.4.5

In libxlDomainMigrationPrepare(), the cleanup label handles error conditions and should be renamed as such for clarity. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_migration.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c index 53d961c..cfbbcd0 100644 --- a/src/libxl/libxl_migration.c +++ b/src/libxl/libxl_migration.c @@ -288,31 +288,31 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, VIR_DOMAIN_OBJ_LIST_ADD_LIVE | VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE, NULL))) - goto cleanup; + goto error; /* Create socket connection to receive migration data */ if (!uri_in) { if ((hostname = virGetHostname()) == NULL) - goto cleanup; + goto error; if (STRPREFIX(hostname, "localhost")) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("hostname on destination resolved to localhost," " but migration requires an FQDN")); - goto cleanup; + goto error; } if (virPortAllocatorAcquire(driver->migrationPorts, &port) < 0) - goto cleanup; + goto error; if (virAsprintf(uri_out, "tcp://%s:%d", hostname, port) < 0) - goto cleanup; + goto error; } else { if (!(STRPREFIX(uri_in, "tcp://"))) { /* not full URI, add prefix tcp:// */ char *tmp; if (virAsprintf(&tmp, "tcp://%s", uri_in) < 0) - goto cleanup; + goto error; uri = virURIParse(tmp); VIR_FREE(tmp); } else { @@ -323,28 +323,28 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, virReportError(VIR_ERR_INVALID_ARG, _("unable to parse URI: %s"), uri_in); - goto cleanup; + goto error; } if (uri->server == NULL) { virReportError(VIR_ERR_INVALID_ARG, _("missing host in migration URI: %s"), uri_in); - goto cleanup; + goto error; } else { hostname = uri->server; } if (uri->port == 0) { if (virPortAllocatorAcquire(driver->migrationPorts, &port) < 0) - goto cleanup; + goto error; } else { port = uri->port; } if (virAsprintf(uri_out, "tcp://%s:%d", hostname, port) < 0) - goto cleanup; + goto error; } snprintf(portstr, sizeof(portstr), "%d", port); @@ -352,14 +352,14 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, if (virNetSocketNewListenTCP(hostname, portstr, &socks, &nsocks) < 0) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Fail to create socket for incoming migration")); - goto cleanup; + goto error; } if (libxlMigrationDstArgsInitialize() < 0) - goto cleanup; + goto error; if (!(args = virObjectNew(libxlMigrationDstArgsClass))) - goto cleanup; + goto error; args->conn = dconn; args->vm = vm; @@ -395,12 +395,12 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, virObjectUnref(args); if (!nsocks_listen) - goto cleanup; + goto error; ret = 0; goto done; - cleanup: + error: for (i = 0; i < nsocks; i++) { virNetSocketClose(socks[i]); virObjectUnref(socks[i]); -- 1.8.4.5

On 07/08/2014 03:52 PM, Jim Fehlig wrote:
In libxlDomainMigrationPrepare(), the cleanup label handles error conditions and should be renamed as such for clarity.
Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_migration.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

In libxlDomainMigrationPrepare(), a new virDomainObj is created from the incoming domain def and added to the driver's domain list, but never removed if there are subsequent failures during the prepare phase. targethost# virsh list --all sourcehost# virsh migrate --live dom xen+ssh://targethost/system error: operation failed: Fail to create socket for incoming migration. targethost# virsh list --all error: Failed to list domains error: name in virGetDomain must not be NULL After adding code to remove the domain on prepare failure, noticed that libvirtd crashed due to double free of the virDomainDef. Similar to the qemu driver, pass a pointer to virDomainDefPtr so it can be set to NULL once a virDomainObj is created from it. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_driver.c | 2 +- src/libxl/libxl_migration.c | 10 ++++++++-- src/libxl/libxl_migration.h | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index 9aec78d..b5f8703 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -4435,7 +4435,7 @@ libxlDomainMigratePrepare3Params(virConnectPtr dconn, if (virDomainMigratePrepare3ParamsEnsureACL(dconn, def) < 0) goto error; - if (libxlDomainMigrationPrepare(dconn, def, uri_in, uri_out, flags) < 0) + if (libxlDomainMigrationPrepare(dconn, &def, uri_in, uri_out, flags) < 0) goto error; return 0; diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c index cfbbcd0..334aa85 100644 --- a/src/libxl/libxl_migration.c +++ b/src/libxl/libxl_migration.c @@ -265,7 +265,7 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver, int libxlDomainMigrationPrepare(virConnectPtr dconn, - virDomainDefPtr def, + virDomainDefPtr *def, const char *uri_in, char **uri_out, unsigned int flags) @@ -283,12 +283,13 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, size_t i; int ret = -1; - if (!(vm = virDomainObjListAdd(driver->domains, def, + if (!(vm = virDomainObjListAdd(driver->domains, *def, driver->xmlopt, VIR_DOMAIN_OBJ_LIST_ADD_LIVE | VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE, NULL))) goto error; + *def = NULL; /* Create socket connection to receive migration data */ if (!uri_in) { @@ -405,6 +406,11 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, virNetSocketClose(socks[i]); virObjectUnref(socks[i]); } + /* Remove virDomainObj from domain list */ + if (vm) { + virDomainObjListRemove(driver->domains, vm); + vm = NULL; + } done: virURIFree(uri); diff --git a/src/libxl/libxl_migration.h b/src/libxl/libxl_migration.h index aab96f5..20b45d8 100644 --- a/src/libxl/libxl_migration.h +++ b/src/libxl/libxl_migration.h @@ -50,7 +50,7 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver, int libxlDomainMigrationPrepare(virConnectPtr dconn, - virDomainDefPtr def, + virDomainDefPtr *def, const char *uri_in, char **uri_out, unsigned int flags); -- 1.8.4.5

On 07/08/2014 03:52 PM, Jim Fehlig wrote:
In libxlDomainMigrationPrepare(), a new virDomainObj is created from the incoming domain def and added to the driver's domain list, but never removed if there are subsequent failures during the prepare phase.
targethost# virsh list --all
sourcehost# virsh migrate --live dom xen+ssh://targethost/system error: operation failed: Fail to create socket for incoming migration.
targethost# virsh list --all error: Failed to list domains error: name in virGetDomain must not be NULL
After adding code to remove the domain on prepare failure, noticed that libvirtd crashed due to double free of the virDomainDef. Similar to the qemu driver, pass a pointer to virDomainDefPtr so it can be set to NULL once a virDomainObj is created from it.
Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_driver.c | 2 +- src/libxl/libxl_migration.c | 10 ++++++++-- src/libxl/libxl_migration.h | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-)
ACK -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

During migration, the libxl driver starts a modify job in the begin phase, ending the job in the confirm phase. This is essentially VIR_MIGRATE_CHANGE_PROTECTION semantics, but the driver does not support that flag. Without CHANGE_PROTECTION support, the job would never be terminated in error conditions where migrate confirm phase is not executed. Further attempts to modify the domain would result in failure to acquire a job after LIBXL_JOB_WAIT_TIME. Similar to the qemu driver, end the job in the begin phase. Protecting the domain object across all phases of migration can be done in a future patch adding CHANGE_PROTECTION support. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_driver.c | 5 +---- src/libxl/libxl_migration.c | 16 ++++------------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index b5f8703..b27581e 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -4491,11 +4491,8 @@ libxlDomainMigratePerform3Params(virDomainPtr dom, goto cleanup; if (libxlDomainMigrationPerform(driver, vm, dom_xml, dconnuri, - uri, dname, flags) < 0) { - /* Job terminated and vm unlocked if MigrationPerform failed */ - vm = NULL; + uri, dname, flags) < 0) goto cleanup; - } ret = 0; diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c index 334aa85..ce3f9d5 100644 --- a/src/libxl/libxl_migration.c +++ b/src/libxl/libxl_migration.c @@ -215,6 +215,10 @@ libxlDomainMigrationBegin(virConnectPtr conn, xml = virDomainDefFormat(def, VIR_DOMAIN_XML_SECURE); + endjob: + if (!libxlDomainObjEndJob(driver, vm)) + vm = NULL; + cleanup: if (vm) virObjectUnlock(vm); @@ -222,11 +226,6 @@ libxlDomainMigrationBegin(virConnectPtr conn, virDomainDefFree(tmpdef); virObjectUnref(cfg); return xml; - - endjob: - if (!libxlDomainObjEndJob(driver, vm)) - vm = NULL; - goto cleanup; } virDomainDefPtr @@ -468,11 +467,6 @@ libxlDomainMigrationPerform(libxlDriverPrivatePtr driver, virObjectLock(vm); cleanup: - /* If failure, terminate the job started in MigrationBegin */ - if (ret == -1) { - if (libxlDomainObjEndJob(driver, vm)) - virObjectUnlock(vm); - } VIR_FORCE_CLOSE(sockfd); virURIFree(uri); return ret; @@ -578,8 +572,6 @@ libxlDomainMigrationConfirm(libxlDriverPrivatePtr driver, ret = 0; cleanup: - if (!libxlDomainObjEndJob(driver, vm)) - vm = NULL; if (event) libxlDomainEventQueue(driver, event); if (vm) -- 1.8.4.5

On 07/08/2014 03:52 PM, Jim Fehlig wrote:
During migration, the libxl driver starts a modify job in the begin phase, ending the job in the confirm phase. This is essentially VIR_MIGRATE_CHANGE_PROTECTION semantics, but the driver does not support that flag. Without CHANGE_PROTECTION support, the job would never be terminated in error conditions where migrate confirm phase is not executed. Further attempts to modify the domain would result in failure to acquire a job after LIBXL_JOB_WAIT_TIME.
Similar to the qemu driver, end the job in the begin phase. Protecting the domain object across all phases of migration can be done in a future patch adding CHANGE_PROTECTION support.
Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_driver.c | 5 +---- src/libxl/libxl_migration.c | 16 ++++------------ 2 files changed, 5 insertions(+), 16 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

In libxlDomainMigrationConfirm(), a transient domain is removed from the domain list after successful migration. Later in cleanup, the domain object is unlocked, resulting in a crash Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fb4208ed700 (LWP 12044)] 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 169 if (klass->magic == parent->magic) (gdb) bt 0 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 1 0x00007fb42672591b in virObjectIsClass (anyobj=0x7fb4100082b0, klass=0x7fb42830d0c0) at util/virobject.c:365 2 0x00007fb42672583c in virObjectUnlock (anyobj=0x7fb4100082b0) at util/virobject.c:338 3 0x00007fb41a8c7d7a in libxlDomainMigrationConfirm (driver=0x7fb4100404c0, vm=0x7fb4100082b0, flags=1, cancelled=0) at libxl/libxl_migration.c:583 Fix by setting the virDomainObjPtr to NULL after removing it from the domain list. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_migration.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c index ce3f9d5..dbb5a8f 100644 --- a/src/libxl/libxl_migration.c +++ b/src/libxl/libxl_migration.c @@ -566,8 +566,10 @@ libxlDomainMigrationConfirm(libxlDriverPrivatePtr driver, if (flags & VIR_MIGRATE_UNDEFINE_SOURCE) virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm); - if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) + if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { virDomainObjListRemove(driver->domains, vm); + vm = NULL; + } ret = 0; -- 1.8.4.5

On 07/08/2014 03:52 PM, Jim Fehlig wrote:
In libxlDomainMigrationConfirm(), a transient domain is removed from the domain list after successful migration. Later in cleanup, the domain object is unlocked, resulting in a crash
Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fb4208ed700 (LWP 12044)] 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 169 if (klass->magic == parent->magic) (gdb) bt 0 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 1 0x00007fb42672591b in virObjectIsClass (anyobj=0x7fb4100082b0, klass=0x7fb42830d0c0) at util/virobject.c:365 2 0x00007fb42672583c in virObjectUnlock (anyobj=0x7fb4100082b0) at util/virobject.c:338 3 0x00007fb41a8c7d7a in libxlDomainMigrationConfirm (driver=0x7fb4100404c0, vm=0x7fb4100082b0, flags=1, cancelled=0) at libxl/libxl_migration.c:583
Fix by setting the virDomainObjPtr to NULL after removing it from the domain list.
Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_migration.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Eric Blake wrote:
On 07/08/2014 03:52 PM, Jim Fehlig wrote:
In libxlDomainMigrationConfirm(), a transient domain is removed from the domain list after successful migration. Later in cleanup, the domain object is unlocked, resulting in a crash
Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fb4208ed700 (LWP 12044)] 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 169 if (klass->magic == parent->magic) (gdb) bt 0 0x00007fb4267251e6 in virClassIsDerivedFrom (klass=0xdeadbeef, parent=0x7fb42830d0c0) at util/virobject.c:169 1 0x00007fb42672591b in virObjectIsClass (anyobj=0x7fb4100082b0, klass=0x7fb42830d0c0) at util/virobject.c:365 2 0x00007fb42672583c in virObjectUnlock (anyobj=0x7fb4100082b0) at util/virobject.c:338 3 0x00007fb41a8c7d7a in libxlDomainMigrationConfirm (driver=0x7fb4100404c0, vm=0x7fb4100082b0, flags=1, cancelled=0) at libxl/libxl_migration.c:583
Fix by setting the virDomainObjPtr to NULL after removing it from the domain list.
Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- src/libxl/libxl_migration.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
ACK.
Thanks for the reviews. I've pushed the series now. Regards, Jim
participants (2)
-
Eric Blake
-
Jim Fehlig