[libvirt] [PATCH V2 0/3] libxl: domain destroy fixes

V2 of a small series to fix issues wrt domain destroy https://www.redhat.com/archives/libvir-list/2015-March/msg01337.html Comments from Konrad and Martin are addressed in this version. Jim Fehlig (3): libxl: Move job acquisition in libxlDomainStart to callers libxl: acquire a job when destroying a domain libxl: drop virDomainObj lock when destroying a domain src/libxl/libxl_domain.c | 81 ++++++++++++++++++------------------------------ src/libxl/libxl_domain.h | 4 --- src/libxl/libxl_driver.c | 80 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 91 insertions(+), 74 deletions(-) -- 1.8.4.5

Let callers of libxlDomainStart decide when it is appropriate to acquire a job on the associated virDomainObj. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- V2: Don't call virDomainObjListRemove() on persistent domain src/libxl/libxl_domain.c | 24 ++++++++------------- src/libxl/libxl_driver.c | 55 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index ce9e4d8..6b9959f 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -908,16 +908,13 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, libxl_domain_config_init(&d_config); - if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) - return ret; - cfg = libxlDriverConfigGet(driver); /* If there is a managed saved state restore it instead of starting * from scratch. The old state is removed once the restoring succeeded. */ if (restore_fd < 0) { managed_save_path = libxlDomainManagedSavePath(driver, vm); if (managed_save_path == NULL) - goto endjob; + goto cleanup; if (virFileExists(managed_save_path)) { @@ -925,7 +922,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, managed_save_path, &def, &hdr); if (managed_save_fd < 0) - goto endjob; + goto cleanup; restore_fd = managed_save_fd; @@ -939,7 +936,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, _("cannot restore domain '%s' uuid %s from a file" " which belongs to domain '%s' uuid %s"), vm->def->name, vm_uuidstr, def->name, def_uuidstr); - goto endjob; + goto cleanup; } virDomainObjAssignDef(vm, def, true, NULL); @@ -956,18 +953,18 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, if (libxlBuildDomainConfig(driver->reservedVNCPorts, vm->def, cfg->ctx, &d_config) < 0) - goto endjob; + goto cleanup; if (cfg->autoballoon && libxlDomainFreeMem(cfg->ctx, &d_config) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("libxenlight failed to get free memory for domain '%s'"), d_config.c_info.name); - goto endjob; + goto cleanup; } if (virHostdevPrepareDomainDevices(hostdev_mgr, LIBXL_DRIVER_NAME, vm->def, VIR_HOSTDEV_SP_PCI) < 0) - goto endjob; + goto cleanup; /* Unlock virDomainObj while creating the domain */ virObjectUnlock(vm); @@ -999,7 +996,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, virReportError(VIR_ERR_INTERNAL_ERROR, _("libxenlight failed to restore domain '%s'"), d_config.c_info.name); - goto endjob; + goto cleanup; } /* @@ -1046,7 +1043,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, libxlDomainEventQueue(driver, event); ret = 0; - goto endjob; + goto cleanup; cleanup_dom: if (priv->deathW) { @@ -1057,10 +1054,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, vm->def->id = -1; virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, VIR_DOMAIN_SHUTOFF_FAILED); - endjob: - if (!libxlDomainObjEndJob(driver, vm)) - vm = NULL; - + cleanup: libxl_domain_config_dispose(&d_config); VIR_FREE(dom_xml); VIR_FREE(managed_save_path); diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index 05f6eb1..e315b32 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -303,18 +303,26 @@ libxlAutostartDomain(virDomainObjPtr vm, virObjectLock(vm); virResetLastError(); + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) { + virObjectUnlock(vm); + return ret; + } + if (vm->autostart && !virDomainObjIsActive(vm) && libxlDomainStart(driver, vm, false, -1) < 0) { err = virGetLastError(); VIR_ERROR(_("Failed to autostart VM '%s': %s"), vm->def->name, err ? err->message : _("unknown error")); - goto cleanup; + goto endjob; } ret = 0; - cleanup: - virObjectUnlock(vm); + + endjob: + if (libxlDomainObjEndJob(driver, vm)) + virObjectUnlock(vm); + return ret; } @@ -885,17 +893,28 @@ libxlDomainCreateXML(virConnectPtr conn, const char *xml, goto cleanup; def = NULL; + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) { + if (!vm->persistent) { + virDomainObjListRemove(driver->domains, vm); + vm = NULL; + } + goto cleanup; + } + if (libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1) < 0) { virDomainObjListRemove(driver->domains, vm); - vm = NULL; - goto cleanup; + goto endjob; } dom = virGetDomain(conn, vm->def->name, vm->def->uuid); if (dom) dom->id = vm->def->id; + endjob: + if (!libxlDomainObjEndJob(driver, vm)) + vm = NULL; + cleanup: virDomainDefFree(def); if (vm) @@ -1681,7 +1700,7 @@ libxlDomainRestoreFlags(virConnectPtr conn, const char *from, fd = libxlDomainSaveImageOpen(driver, cfg, from, &def, &hdr); if (fd < 0) - return -1; + goto cleanup; if (virDomainRestoreFlagsEnsureACL(conn, def) < 0) goto cleanup; @@ -1695,11 +1714,20 @@ libxlDomainRestoreFlags(virConnectPtr conn, const char *from, def = NULL; + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) { + if (!vm->persistent) { + virDomainObjListRemove(driver->domains, vm); + vm = NULL; + } + goto cleanup; + } + ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_SAVE_PAUSED) != 0, fd); - if (ret < 0 && !vm->persistent) { + if (ret < 0 && !vm->persistent) virDomainObjListRemove(driver->domains, vm); + + if (!libxlDomainObjEndJob(driver, vm)) vm = NULL; - } cleanup: if (VIR_CLOSE(fd) < 0) @@ -2567,17 +2595,24 @@ libxlDomainCreateWithFlags(virDomainPtr dom, if (virDomainCreateWithFlagsEnsureACL(dom->conn, vm->def) < 0) goto cleanup; + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) + goto cleanup; + if (virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is already running")); - goto cleanup; + goto endjob; } ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1); if (ret < 0) - goto cleanup; + goto endjob; dom->id = vm->def->id; + endjob: + if (!libxlDomainObjEndJob(driver, vm)) + vm = NULL; + cleanup: if (vm) virObjectUnlock(vm); -- 1.8.4.5

A job should be acquired at the beginning of a domain destroy operation, not at the end when cleaning up the domain. Fix two occurrences of this late job acquisition in the libxl driver. Doing so renders libxlDomainCleanupJob unused, so it is removed. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- V2: Don't acquire a job in libxlDomainAutoCoreDump since caller already has a job. Fix typos in commit message. src/libxl/libxl_domain.c | 53 ++++++++++++++++-------------------------------- src/libxl/libxl_domain.h | 4 ---- src/libxl/libxl_driver.c | 20 ++++++++++-------- 3 files changed, 29 insertions(+), 48 deletions(-) diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index 6b9959f..faf2c19 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -371,6 +371,9 @@ libxlDomainShutdownThread(void *opaque) cfg = libxlDriverConfigGet(driver); + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) + goto cleanup; + if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) { dom_event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -384,7 +387,7 @@ libxlDomainShutdownThread(void *opaque) goto restart; case VIR_DOMAIN_LIFECYCLE_PRESERVE: case VIR_DOMAIN_LIFECYCLE_LAST: - goto cleanup; + goto endjob; } } else if (xl_reason == LIBXL_SHUTDOWN_REASON_CRASH) { dom_event = virDomainEventLifecycleNewFromObj(vm, @@ -399,7 +402,7 @@ libxlDomainShutdownThread(void *opaque) goto restart; case VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE: case VIR_DOMAIN_LIFECYCLE_CRASH_LAST: - goto cleanup; + goto endjob; case VIR_DOMAIN_LIFECYCLE_CRASH_COREDUMP_DESTROY: libxlDomainAutoCoreDump(driver, vm); goto destroy; @@ -420,11 +423,11 @@ libxlDomainShutdownThread(void *opaque) goto restart; case VIR_DOMAIN_LIFECYCLE_PRESERVE: case VIR_DOMAIN_LIFECYCLE_LAST: - goto cleanup; + goto endjob; } } else { VIR_INFO("Unhandled shutdown_reason %d", xl_reason); - goto cleanup; + goto endjob; } destroy: @@ -433,13 +436,11 @@ libxlDomainShutdownThread(void *opaque) dom_event = NULL; } libxl_domain_destroy(cfg->ctx, vm->def->id, NULL); - if (libxlDomainCleanupJob(driver, vm, reason)) { - if (!vm->persistent) { - virDomainObjListRemove(driver->domains, vm); - vm = NULL; - } - } - goto cleanup; + libxlDomainCleanup(driver, vm, reason); + if (!vm->persistent) + virDomainObjListRemove(driver->domains, vm); + + goto endjob; restart: if (dom_event) { @@ -447,13 +448,17 @@ libxlDomainShutdownThread(void *opaque) dom_event = NULL; } libxl_domain_destroy(cfg->ctx, vm->def->id, NULL); - libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN); + libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN); if (libxlDomainStart(driver, vm, false, -1) < 0) { virErrorPtr err = virGetLastError(); VIR_ERROR(_("Failed to restart VM '%s': %s"), vm->def->name, err ? err->message : _("unknown error")); } + endjob: + if (!libxlDomainObjEndJob(driver, vm)) + vm = NULL; + cleanup: if (vm) virObjectUnlock(vm); @@ -691,26 +696,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver, } /* - * Cleanup function for domain that has reached shutoff state. - * Executed in the context of a job. - * - * virDomainObjPtr should be locked on invocation - * Returns true if references remain on virDomainObjPtr, false otherwise. - */ -bool -libxlDomainCleanupJob(libxlDriverPrivatePtr driver, - virDomainObjPtr vm, - virDomainShutoffReason reason) -{ - if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_DESTROY) < 0) - return true; - - libxlDomainCleanup(driver, vm, reason); - - return libxlDomainObjEndJob(driver, vm); -} - -/* * Core dump domain to default dump path. * * virDomainObjPtr must be locked on invocation @@ -735,15 +720,11 @@ libxlDomainAutoCoreDump(libxlDriverPrivatePtr driver, timestr) < 0) goto cleanup; - if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) - goto cleanup; - /* Unlock virDomainObj while dumping core */ virObjectUnlock(vm); libxl_domain_core_dump(cfg->ctx, vm->def->id, dumpfile, NULL); virObjectLock(vm); - ignore_value(libxlDomainObjEndJob(driver, vm)); ret = 0; cleanup: diff --git a/src/libxl/libxl_domain.h b/src/libxl/libxl_domain.h index a032e46..30855a2 100644 --- a/src/libxl/libxl_domain.h +++ b/src/libxl/libxl_domain.h @@ -108,10 +108,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver, virDomainObjPtr vm, virDomainShutoffReason reason); -bool -libxlDomainCleanupJob(libxlDriverPrivatePtr driver, - virDomainObjPtr vm, - virDomainShutoffReason reason); /* * Note: Xen 4.3 removed the const from the event handler signature. * Detect which signature to use based on diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index e315b32..c9623ef 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1240,10 +1240,13 @@ libxlDomainDestroyFlags(virDomainPtr dom, if (virDomainDestroyFlagsEnsureACL(dom->conn, vm->def) < 0) goto cleanup; + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) + goto cleanup; + if (!virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); - goto cleanup; + goto endjob; } event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -1252,18 +1255,19 @@ libxlDomainDestroyFlags(virDomainPtr dom, if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Failed to destroy domain '%d'"), vm->def->id); - goto cleanup; + goto endjob; } - if (libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED)) { - if (!vm->persistent) { - virDomainObjListRemove(driver->domains, vm); - vm = NULL; - } - } + libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED); + if (!vm->persistent) + virDomainObjListRemove(driver->domains, vm); ret = 0; + endjob: + if (!libxlDomainObjEndJob(driver, vm)) + vm = NULL; + cleanup: if (vm) virObjectUnlock(vm); -- 1.8.4.5

A destroy operation can take considerable time on large memory domains due to scrubbing the domain' memory. The operation is running in the context of a job, so unlocking the domain and allowing query operations is safe. Signed-off-by: Jim Fehlig <jfehlig@suse.com> --- V2: Unchanged src/libxl/libxl_domain.c | 4 ++++ src/libxl/libxl_driver.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index faf2c19..0ac5c62 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -435,7 +435,9 @@ libxlDomainShutdownThread(void *opaque) libxlDomainEventQueue(driver, dom_event); dom_event = NULL; } + virObjectUnlock(vm); libxl_domain_destroy(cfg->ctx, vm->def->id, NULL); + virObjectLock(vm); libxlDomainCleanup(driver, vm, reason); if (!vm->persistent) virDomainObjListRemove(driver->domains, vm); @@ -447,7 +449,9 @@ libxlDomainShutdownThread(void *opaque) libxlDomainEventQueue(driver, dom_event); dom_event = NULL; } + virObjectUnlock(vm); libxl_domain_destroy(cfg->ctx, vm->def->id, NULL); + virObjectLock(vm); libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN); if (libxlDomainStart(driver, vm, false, -1) < 0) { virErrorPtr err = virGetLastError(); diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index c9623ef..9e08de3 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1252,7 +1252,10 @@ libxlDomainDestroyFlags(virDomainPtr dom, event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_DESTROYED); - if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) { + virObjectUnlock(vm); + ret = libxl_domain_destroy(cfg->ctx, vm->def->id, NULL); + virObjectLock(vm); + if (ret < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Failed to destroy domain '%d'"), vm->def->id); goto endjob; -- 1.8.4.5

Jim Fehlig wrote:
V2 of a small series to fix issues wrt domain destroy
https://www.redhat.com/archives/libvir-list/2015-March/msg01337.html
Comments from Konrad and Martin are addressed in this version.
Jim Fehlig (3): libxl: Move job acquisition in libxlDomainStart to callers libxl: acquire a job when destroying a domain libxl: drop virDomainObj lock when destroying a domain
While testing this series along with some ref counting improvements suggested by Martin, I noticed a few other places where libxl_domain_destroy is called with virDomainObj locked. I'm going to post a V3 that changes 3/3 to provide a wrapper for unlocking, destroying, and locking the domain. Existing libxl_domain_destroy callers are changed to use the wrapper. Regards, Jim
participants (1)
-
Jim Fehlig