[libvirt] [PATCH] Add domainCoreDump to libxl driver

For core dumping to work correctly the following patch for xen is needed: http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01469.html This patch is in xen-unstable and is considered for backport to the xen stable branches. Without this patch the mapped memory pages of the pv guest are not unmapped after core-dump. --- src/libxl/libxl_driver.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 87 insertions(+), 0 deletions(-) diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index b2cc0e8..75008db 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1627,6 +1627,92 @@ libxlDomainGetState(virDomainPtr dom, } static int +libxlDomainCoreDump(virDomainPtr dom, const char *to, int flags) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + libxlDomainObjPrivatePtr priv; + virDomainObjPtr vm; + virDomainEventPtr event = NULL; + int paused = 0; + int ret = -1; + + virCheckFlags(VIR_DUMP_LIVE | VIR_DUMP_CRASH, -1); + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + libxlError(VIR_ERR_NO_DOMAIN, + _("No domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); + goto cleanup; + } + + priv = vm->privateData; + + if (!(flags & VIR_DUMP_LIVE) && + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + if (libxl_domain_pause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to suspend domain '%d' with libxenlight"), + dom->id); + goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_DUMP); + paused = 1; + } + + if (libxl_domain_core_dump(&priv->ctx, dom->id, to) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to dump core of domain '%d' with libxenlight"), + dom->id); + goto cleanup; + } + + if (flags & VIR_DUMP_CRASH) { + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_CRASHED) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to destroy domain '%d'"), dom->id); + goto cleanup; + } + + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + } else if (paused) { + if (libxl_domain_unpause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to resume domain '%d' with libxenlight"), + dom->id); + goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, + VIR_DOMAIN_RUNNING_UNPAUSED); + } + + if ((flags & VIR_DUMP_CRASH) && !vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + + ret = 0; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + libxlDomainEventQueue(driver, event); + libxlDriverUnlock(driver); + return ret; +} + +static int libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus, unsigned int flags) { @@ -2722,6 +2808,7 @@ static virDriver libxlDriver = { .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */ .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */ .domainGetState = libxlDomainGetState, /* 0.9.2 */ + .domainCoreDump = libxlDomainCoreDump, /* 0.9.2 */ .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */ .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */ .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */ -- 1.7.5.1

Markus Groß wrote:
For core dumping to work correctly the following patch for xen is needed: http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01469.html
This patch is in xen-unstable and is considered for backport to the xen stable branches. Without this patch the mapped memory pages of the pv guest are not unmapped after core-dump.
--- src/libxl/libxl_driver.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index b2cc0e8..75008db 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1627,6 +1627,92 @@ libxlDomainGetState(virDomainPtr dom, }
static int +libxlDomainCoreDump(virDomainPtr dom, const char *to, int flags) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + libxlDomainObjPrivatePtr priv; + virDomainObjPtr vm; + virDomainEventPtr event = NULL; + int paused = 0; + int ret = -1; + + virCheckFlags(VIR_DUMP_LIVE | VIR_DUMP_CRASH, -1); + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + libxlError(VIR_ERR_NO_DOMAIN, + _("No domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); + goto cleanup; + } + + priv = vm->privateData; + + if (!(flags & VIR_DUMP_LIVE) && + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + if (libxl_domain_pause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to suspend domain '%d' with libxenlight"), + dom->id);
I think there could be a little more info in that error message, e.g. "Before dumping core, failed to suspend ...".
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_DUMP); + paused = 1; + } + + if (libxl_domain_core_dump(&priv->ctx, dom->id, to) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to dump core of domain '%d' with libxenlight"), + dom->id); + goto cleanup; + }
If core dumping fails and the domain was paused, it won't be unpaused by jumping to cleanup.
+ + if (flags & VIR_DUMP_CRASH) { + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_CRASHED) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to destroy domain '%d'"), dom->id); + goto cleanup; + } + + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + } else if (paused) { + if (libxl_domain_unpause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to resume domain '%d' with libxenlight"), + dom->id);
Here too more info in the error message, e.g. "After dumping core, failed to resume ..."
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, + VIR_DOMAIN_RUNNING_UNPAUSED); + } + + if ((flags & VIR_DUMP_CRASH) && !vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + + ret = 0; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + libxlDomainEventQueue(driver, event); + libxlDriverUnlock(driver);
Do we need to have the driver locked during the core dump? Most of the driver functions use this pattern libxlDriverLock(driver); vm = virDomainFindByUUID(&driver->domains, dom->uuid); libxlDriverUnlock(driver); Regards, Jim
+ return ret; +} + +static int libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus, unsigned int flags) { @@ -2722,6 +2808,7 @@ static virDriver libxlDriver = { .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */ .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */ .domainGetState = libxlDomainGetState, /* 0.9.2 */ + .domainCoreDump = libxlDomainCoreDump, /* 0.9.2 */ .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */ .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */ .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */

Quoting Jim Fehlig <jfehlig@novell.com>:
Markus Groß wrote:
For core dumping to work correctly the following patch for xen is needed: http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01469.html
This patch is in xen-unstable and is considered for backport to the xen stable branches. Without this patch the mapped memory pages of the pv guest are not unmapped after core-dump.
--- src/libxl/libxl_driver.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index b2cc0e8..75008db 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1627,6 +1627,92 @@ libxlDomainGetState(virDomainPtr dom, }
static int +libxlDomainCoreDump(virDomainPtr dom, const char *to, int flags) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + libxlDomainObjPrivatePtr priv; + virDomainObjPtr vm; + virDomainEventPtr event = NULL; + int paused = 0; + int ret = -1; + + virCheckFlags(VIR_DUMP_LIVE | VIR_DUMP_CRASH, -1); + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + libxlError(VIR_ERR_NO_DOMAIN, + _("No domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); + goto cleanup; + } + + priv = vm->privateData; + + if (!(flags & VIR_DUMP_LIVE) && + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + if (libxl_domain_pause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to suspend domain '%d' with libxenlight"), + dom->id);
I think there could be a little more info in that error message, e.g. "Before dumping core, failed to suspend ...".
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_DUMP); + paused = 1; + } + + if (libxl_domain_core_dump(&priv->ctx, dom->id, to) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to dump core of domain '%d' with libxenlight"), + dom->id); + goto cleanup; + }
If core dumping fails and the domain was paused, it won't be unpaused by jumping to cleanup.
+ + if (flags & VIR_DUMP_CRASH) { + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_CRASHED) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to destroy domain '%d'"), dom->id); + goto cleanup; + } + + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + } else if (paused) { + if (libxl_domain_unpause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to resume domain '%d' with libxenlight"), + dom->id);
Here too more info in the error message, e.g. "After dumping core, failed to resume ..."
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, + VIR_DOMAIN_RUNNING_UNPAUSED); + } + + if ((flags & VIR_DUMP_CRASH) && !vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + + ret = 0; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + libxlDomainEventQueue(driver, event); + libxlDriverUnlock(driver);
Do we need to have the driver locked during the core dump? Most of the driver functions use this pattern
libxlDriverLock(driver); vm = virDomainFindByUUID(&driver->domains, dom->uuid); libxlDriverUnlock(driver);
Regards, Jim
I think we do, since the calls to libxlVmReap and virDomainRemoveInactive both use the driver as parameter and at least virDomainRemoveInactive is able to modify the contents of it. But if you can convince me otherwise we can unlock the driver directly after obtaining the vm object. I will post a reworked version of this patch on monday.
+ return ret; +} + +static int libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus, unsigned int flags) { @@ -2722,6 +2808,7 @@ static virDriver libxlDriver = { .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */ .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */ .domainGetState = libxlDomainGetState, /* 0.9.2 */ + .domainCoreDump = libxlDomainCoreDump, /* 0.9.2 */ .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */ .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */ .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */

Markus Groß wrote:
Quoting Jim Fehlig <jfehlig@novell.com>:
Markus Groß wrote:
For core dumping to work correctly the following patch for xen is needed: http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01469.html
This patch is in xen-unstable and is considered for backport to the xen stable branches. Without this patch the mapped memory pages of the pv guest are not unmapped after core-dump.
--- src/libxl/libxl_driver.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index b2cc0e8..75008db 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -1627,6 +1627,92 @@ libxlDomainGetState(virDomainPtr dom, }
static int +libxlDomainCoreDump(virDomainPtr dom, const char *to, int flags) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + libxlDomainObjPrivatePtr priv; + virDomainObjPtr vm; + virDomainEventPtr event = NULL; + int paused = 0; + int ret = -1; + + virCheckFlags(VIR_DUMP_LIVE | VIR_DUMP_CRASH, -1); + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + libxlError(VIR_ERR_NO_DOMAIN, + _("No domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); + goto cleanup; + } + + priv = vm->privateData; + + if (!(flags & VIR_DUMP_LIVE) && + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + if (libxl_domain_pause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to suspend domain '%d' with libxenlight"), + dom->id);
I think there could be a little more info in that error message, e.g. "Before dumping core, failed to suspend ...".
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_DUMP); + paused = 1; + } + + if (libxl_domain_core_dump(&priv->ctx, dom->id, to) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to dump core of domain '%d' with libxenlight"), + dom->id); + goto cleanup; + }
If core dumping fails and the domain was paused, it won't be unpaused by jumping to cleanup.
+ + if (flags & VIR_DUMP_CRASH) { + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_CRASHED) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to destroy domain '%d'"), dom->id); + goto cleanup; + } + + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_CRASHED); + + } else if (paused) { + if (libxl_domain_unpause(&priv->ctx, dom->id) != 0) { + libxlError(VIR_ERR_INTERNAL_ERROR, + _("Failed to resume domain '%d' with libxenlight"), + dom->id);
Here too more info in the error message, e.g. "After dumping core, failed to resume ..."
+ goto cleanup; + } + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, + VIR_DOMAIN_RUNNING_UNPAUSED); + } + + if ((flags & VIR_DUMP_CRASH) && !vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + + ret = 0; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + libxlDomainEventQueue(driver, event); + libxlDriverUnlock(driver);
Do we need to have the driver locked during the core dump? Most of the driver functions use this pattern
libxlDriverLock(driver); vm = virDomainFindByUUID(&driver->domains, dom->uuid); libxlDriverUnlock(driver);
Regards, Jim
I think we do, since the calls to libxlVmReap and virDomainRemoveInactive both use the driver as parameter and at least virDomainRemoveInactive is able to modify the contents of it.
Right, good point.
But if you can convince me otherwise we can unlock the driver directly after obtaining the vm object.
A core dump could take quite some time on a large memory domain. How about unlocking before invoking this long running operation and then reacquiring?
I will post a reworked version of this patch on monday.
Ok, thanks! Monday is a US holiday, but hopefully I'll have some time in the evening to review this and your other patches. We're running out of time before 0.9.2 release ... Regards, Jim
participants (2)
-
Jim Fehlig
-
Markus Groß