Markus Groß wrote:
> Am Montag 23 Mai 2011 04:30:12 schrieb Jim Fehlig:
>
>> Jim Fehlig wrote:
>>
>>> Markus Groß wrote:
>>>
>>>
>>>> This patch adds save/restore functionality to the libxl driver.
>>>>
>>>> It is a v2 of this patch:
>>>>
https://www.redhat.com/archives/libvir-list/2011-April/msg00338.html
>>>>
>>>> v2:
>>>> * header is now padded and has a version field
>>>> * the correct restore function from libxl is used
>>>> * only create the restore event once in libxlVmStart
>>>>
>>>>
>>>>
>>> Hi Markus,
>>>
>>> Finally found time to try your patch. Thanks for the patience :-).
>>>
>>>
>>>
>>>> However I ran into a reproducible segfault.
>>>> Assume you saved a vm with:
>>>> # virsh save domU foo.img
>>>>
>>>>
>>>>
>>> I think the problem actually lies in the save function. The domain does
>>> not appear to be cleaned up properly. From xl's perspective after virsh
>>> save domU foo.img
>>>
>>> xen33: # xl list
>>> Name ID Mem VCPUs State
>>> Time(s)
>>> Domain-0 0 2023 8
>>> r----- 330.0
>>> (null) 11 1
>>> 2 --pssd 27.1
>>>
>>> The orphaned domain disappears after libvirtd restart.
>>>
>> I manged to track down this problem, patch posted to xen-devel
>>
>>
http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01314.html
>>
>>
>
> Great! I attached the current version of the save/restore patch.
> It is rebased against the current master.
>
Ok, thanks, I have some comments below.
>
>>>
>>>
>>>> If you restore the save image,
>>>> destroy the vm and restore it again, a segfault occurs:
>>>> # virsh restore foo.img
>>>> # virsh destroy domU
>>>> # virsh restore foo.img
>>>> # segfault
>>>>
>>>>
>> But I still see the segfault, in addition to domain not booting and
>> consuming 100% cpu on first restore :-(. I'll look at these issues next.
>>
I've identified cause of both of these issues. For the domain not
_restoring_ (booting doesn't make much sense on a restore) and consuming
100% cpu, I've posted this patch (which still needs some work)
https://www.redhat.com/archives/libvir-list/2011-May/msg01450.html
The segfault ended up being a bug in libxc. I've posted a fix to xen-devel
http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01511.html
> diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
> index 65110cf..e75e418 100644
> --- a/src/libxl/libxl_conf.h
> +++ b/src/libxl/libxl_conf.h
> @@ -1,5 +1,6 @@
>
> /*---------------------------------------------------------------------------*/
> /* Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
> + * Copyright (C) 2011 Univention GmbH.
> *
> * This library is free software; you can redistribute it and/or
> * modify it under the terms of the GNU Lesser General Public
> @@ -17,6 +18,7 @@
> *
> * Authors:
> * Jim Fehlig <jfehlig(a)novell.com>
> + * Markus Groß <gross(a)univention.de>
> */
>
> /*---------------------------------------------------------------------------*/
>
> @@ -81,6 +83,18 @@ struct _libxlDomainObjPrivate {
> int eventHdl;
> };
>
> +#define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
> +#define LIBXL_SAVE_VERSION 1
> +
> +typedef struct _libxlSavefileHeader libxlSavefileHeader;
> +typedef libxlSavefileHeader *libxlSavefileHeaderPtr;
> +struct _libxlSavefileHeader {
> + char magic[sizeof(LIBXL_SAVE_MAGIC)-1];
> + uint32_t version;
> + uint32_t xmlLen;
> + /* 24 bytes used, pad up to 64 bytes */
> + uint32_t unused[10];
> +};
>
> # define libxlError(code, ...) \
> virReportErrorHelper(VIR_FROM_LIBXL, code, __FILE__, \
> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
> index b2cc0e8..9bc71fd 100644
> --- a/src/libxl/libxl_driver.c
> +++ b/src/libxl/libxl_driver.c
> @@ -29,6 +29,7 @@
> #include <sys/utsname.h>
> #include <math.h>
> #include <libxl.h>
> +#include <fcntl.h>
>
> #include "internal.h"
> #include "logging.h"
> @@ -60,11 +61,10 @@
>
> static libxlDriverPrivatePtr libxl_driver = NULL;
>
> -
> /* Function declarations */
> static int
> -libxlVmStart(libxlDriverPrivatePtr driver,
> - virDomainObjPtr vm, bool start_paused);
> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
> + bool start_paused, int restore_fd);
>
>
> /* Function definitions */
> @@ -168,7 +168,7 @@ libxlAutostartDomain(void *payload, const void
> *name ATTRIBUTE_UNUSED,
> virResetLastError();
>
> if (vm->autostart && !virDomainObjIsActive(vm) &&
> - libxlVmStart(driver, vm, false) < 0) {
> + libxlVmStart(driver, vm, false, -1) < 0) {
> err = virGetLastError();
> VIR_ERROR(_("Failed to autostart VM '%s': %s"),
> vm->def->name,
> @@ -369,7 +369,7 @@ static void libxlEventHandler(int watch,
> break;
> case SHUTDOWN_reboot:
> libxlVmReap(driver, vm, 0, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> - libxlVmStart(driver, vm, 0);
> + libxlVmStart(driver, vm, 0, -1);
> break;
>
You'll need to add a 'case SHUTDOWN_suspend' here and just call
libxlVmCleanup().
> default:
> VIR_INFO("Unhandled shutdown_reason %d",
> info.shutdown_reason);
> @@ -535,8 +535,8 @@ libxlFreeMem(libxlDomainObjPrivatePtr priv,
> libxl_domain_config *d_config)
> * virDomainObjPtr should be locked on invocation
> */
> static int
> -libxlVmStart(libxlDriverPrivatePtr driver,
> - virDomainObjPtr vm, bool start_paused)
> +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
> + bool start_paused, int restore_fd)
> {
> libxl_domain_config d_config;
> virDomainDefPtr def = vm->def;
> @@ -559,12 +559,23 @@ libxlVmStart(libxlDriverPrivatePtr driver,
> goto error;
> }
>
> - ret = libxl_domain_create_new(&priv->ctx, &d_config,
> - NULL, &child_console_pid, &domid);
> + if (restore_fd < 0)
> + ret = libxl_domain_create_new(&priv->ctx, &d_config,
> + NULL, &child_console_pid, &domid);
> + else
> + ret = libxl_domain_create_restore(&priv->ctx, &d_config, NULL,
> + &child_console_pid, &domid,
> + restore_fd);
> +
> if (ret) {
> - libxlError(VIR_ERR_INTERNAL_ERROR,
> - _("libxenlight failed to create new domain
'%s'"),
> - d_config.c_info.name);
> + if (restore_fd < 0)
> + libxlError(VIR_ERR_INTERNAL_ERROR,
> + _("libxenlight failed to create new domain
'%s'"),
> + d_config.c_info.name);
> + else
> + libxlError(VIR_ERR_INTERNAL_ERROR,
> + _("libxenlight failed to restore domain
'%s'"),
> + d_config.c_info.name);
> goto error;
> }
>
> @@ -597,7 +608,9 @@ libxlVmStart(libxlDriverPrivatePtr driver,
> goto error;
>
> event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED,
> - VIR_DOMAIN_EVENT_STARTED_BOOTED);
> + restore_fd < 0 ?
> + VIR_DOMAIN_EVENT_STARTED_BOOTED :
> +
> VIR_DOMAIN_EVENT_STARTED_RESTORED);
> libxlDomainEventQueue(driver, event);
>
> libxl_domain_config_destroy(&d_config);
> @@ -1075,7 +1088,8 @@ libxlDomainCreateXML(virConnectPtr conn,
> const char *xml,
> goto cleanup;
> def = NULL;
>
> - if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED)
> != 0) < 0) {
> + if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0,
> + -1) < 0) {
> virDomainRemoveInactive(&driver->domains, vm);
> vm = NULL;
> goto cleanup;
> @@ -1627,6 +1641,184 @@ libxlDomainGetState(virDomainPtr dom,
> }
>
> static int
> +libxlDomainSave(virDomainPtr dom, const char *to)
> +{
> + libxlDriverPrivatePtr driver = dom->conn->privateData;
> + virDomainObjPtr vm;
> + libxlDomainObjPrivatePtr priv;
> + virDomainEventPtr event = NULL;
> + libxlSavefileHeader hdr;
> + char *xml;
> + uint32_t xml_len;
> + int fd;
> + int ret = -1;
> +
> + libxlDriverLock(driver);
> + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> +
> + if (!vm) {
> + char uuidstr[VIR_UUID_STRING_BUFLEN];
> + virUUIDFormat(dom->uuid, uuidstr);
> + libxlError(VIR_ERR_NO_DOMAIN,
> + _("No domain with matching uuid '%s'"),
uuidstr);
> + goto cleanup;
> + }
> +
> + if (!virDomainObjIsActive(vm)) {
> + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is
> not running"));
> + goto cleanup;
> + }
> +
> + priv = vm->privateData;
> +
> + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) {
> +
> + if ((fd = virFileOpenAs(to, O_CREAT|O_TRUNC|O_WRONLY,
> S_IRUSR|S_IWUSR,
> + getuid(), getgid(), 0)) < 0) {
> + virReportSystemError(-fd,
> + _("Failed to create domain save
> file '%s'"),
> + to);
> + goto cleanup;
> + }
> +
> + if ((xml = virDomainDefFormat(vm->def, 0)) == NULL)
> + goto cleanup;
> + xml_len = strlen(xml) + 1;
> +
> + memset(&hdr, 0, sizeof(hdr));
> + memcpy(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic));
> + hdr.version = LIBXL_SAVE_VERSION;
> + hdr.xmlLen = xml_len;
> +
> + if (safewrite(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + _("Failed to write save file header"));
> + goto cleanup;
> + }
> +
> + if (safewrite(fd, xml, xml_len) != xml_len) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + _("Failed to write xml description"));
> + goto cleanup;
> + }
> +
> + if (libxl_domain_suspend(&priv->ctx, NULL, dom->id, fd) != 0) {
> + libxlError(VIR_ERR_INTERNAL_ERROR,
> + _("Failed to save domain '%d' with
libxenlight"),
> + dom->id);
> + goto cleanup;
> + }
> +
> + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
> + VIR_DOMAIN_EVENT_STOPPED_SAVED);
> +
> + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_SAVED) != 0) {
> + libxlError(VIR_ERR_INTERNAL_ERROR,
> + _("Failed to destroy domain '%d'"),
dom->id);
> + goto cleanup;
> + }
>
And here call libxl_domain_destroy() directly instead of libxlVmReap(),
allowing the event handler to cleanup the vm.
Can you make these changes and ensure all the save/restore issues are
resolved in your environment?
Regards,
Jim
Thanks for your review. I will post a v3 of this patch tomorrow,
incorporating your suggestions.
Cheers,
Markus
> +
> + if (!vm->persistent) {
> + virDomainRemoveInactive(&driver->domains, vm);
> + vm = NULL;
> + }
> + ret = 0;
> + }
> +cleanup:
> + VIR_FREE(xml);
> + if (VIR_CLOSE(fd) < 0)
> + virReportSystemError(errno, "%s", _("cannot close
file"));
> + if (vm)
> + virDomainObjUnlock(vm);
> + if (event)
> + libxlDomainEventQueue(driver, event);
> + libxlDriverUnlock(driver);
> + return ret;
> +}
> +
> +static int
> +libxlDomainRestore(virConnectPtr conn, const char *from)
> +{
> + libxlDriverPrivatePtr driver = conn->privateData;
> + virDomainDefPtr def = NULL;
> + virDomainObjPtr vm = NULL;
> + libxlSavefileHeader hdr;
> + char *xml = NULL;
> + int fd;
> + int ret = -1;
> +
> + libxlDriverLock(driver);
> +
> + if ((fd = virFileOpenAs(from, O_RDONLY, 0, getuid(), getgid(),
> 0)) < 0) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + "%s", _("cannot read domain image"));
> + goto cleanup;
> + }
> +
> + if (saferead(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + "%s", _("failed to read libxl header"));
> + goto cleanup;
> + }
> +
> + if (memcmp(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic))) {
> + libxlError(VIR_ERR_INVALID_ARG, "%s", _("image magic is
> incorrect"));
> + goto cleanup;
> + }
> +
> + if (hdr.version > LIBXL_SAVE_VERSION) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + _("image version is not supported (%d > %d)"),
> + hdr.version, LIBXL_SAVE_VERSION);
> + goto cleanup;
> + }
> +
> + if (hdr.xmlLen <= 0) {
> + libxlError(VIR_ERR_OPERATION_FAILED,
> + _("invalid XML length: %d"), hdr.xmlLen);
> + goto cleanup;
> + }
> +
> + if (VIR_ALLOC_N(xml, hdr.xmlLen) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> +
> + if (saferead(fd, xml, hdr.xmlLen) != hdr.xmlLen) {
> + libxlError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to
> read XML"));
> + goto cleanup;
> + }
> +
> + if (!(def = virDomainDefParseString(driver->caps, xml,
> + VIR_DOMAIN_XML_INACTIVE)))
> + goto cleanup;
> +
> + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0)
> + goto cleanup;
> +
> + if (!(vm = virDomainAssignDef(driver->caps, &driver->domains,
> def, true)))
> + goto cleanup;
> +
> + def = NULL;
> +
> + if ((ret = libxlVmStart(driver, vm, false, fd)) < 0 &&
> + !vm->persistent) {
> + virDomainRemoveInactive(&driver->domains, vm);
> + vm = NULL;
> + }
> +
> +cleanup:
> + VIR_FREE(xml);
> + virDomainDefFree(def);
> + if (VIR_CLOSE(fd) < 0)
> + virReportSystemError(errno, "%s", _("cannot close
file"));
> + if (vm)
> + virDomainObjUnlock(vm);
> + libxlDriverUnlock(driver);
> + return ret;
> +}
> +
> +static int
> libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus,
> unsigned int flags)
> {
> @@ -2096,7 +2288,7 @@ libxlDomainCreateWithFlags(virDomainPtr dom,
> goto cleanup;
> }
>
> - ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0);
> + ret = libxlVmStart(driver, vm, (flags &
> VIR_DOMAIN_START_PAUSED) != 0, -1);
>
> cleanup:
> if (vm)
> @@ -2722,6 +2914,8 @@ static virDriver libxlDriver = {
> .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */
> .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */
> .domainGetState = libxlDomainGetState, /* 0.9.2 */
> + .domainSave = libxlDomainSave, /* 0.9.2 */
> + .domainRestore = libxlDomainRestore, /* 0.9.2 */
> .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */
> .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */
> .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */
>