On Sun, Dec 09, 2018 at 01:01:27PM -0500, Demi M. Obenour wrote:
This can happen via `xl destroy`, for example. When this happens,
libvirt is stuck in an inconsistent state: libvirt believes the domain
is still running, but attempts to use libvirt’s APIs to shutdown the
domain fail. The only way out of this situation is to restart libvirt.
To prevent this from happening, process LIBXL_EVENT_TYPE_DOMAIN_DEATH as
well as LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN, but only if libvirt has not
already begun to destroy the domain.
Signed-off-by: Demi Obenour <demiobenour(a)gmail.com>
---
src/conf/domain_conf.h | 4 ++++
src/libxl/libxl_domain.c | 24 +++++++++++++++++++-----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index b24e6ec3de..d3520bde15 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2620,6 +2620,10 @@ struct _virDomainObj {
unsigned int updated : 1;
unsigned int removing : 1;
+ /* Only used by the Xen backend */
+ unsigned int being_destroyed_by_libvirt : 1;
+ unsigned int already_destroyed : 1;
This ought to go in the _libxlDomainObjPrivate struct instead
diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 5fe3f44fbe..680e5f209f 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -482,9 +482,21 @@ libxlDomainShutdownThread(void *opaque)
goto cleanup;
}
+ VIR_INFO("Domain %d died", event->domid);
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH == ev->type) {
Normal style is to have variable name first, constant second
+ if (vm->being_destroyed_by_libvirt) {
+ VIR_INFO("VM %d already being destroyed by libvirt",
event->domid);
The patch got mangled by your mail client i'm afraid.
+ goto cleanup;
+ }
+
+ VIR_INFO("Marking VM %d as already destroyed", event->domid);
+ vm->already_destroyed = true;
Using 'true' for an "unsigned int :1' bitfield is not valid.
You would nede to declare the variable as a bool, or use an
integer value here.
+ }
+
if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) {
virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF,
VIR_DOMAIN_SHUTOFF_SHUTDOWN);
@@ -620,7 +632,8 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
virThread thread;
libxlDriverConfigPtr cfg;
- if (event->type != LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) {
+ if (LIBXL_EVENT_TYPE_DOMAIN_DEATH != event->type &&
+ LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN != event->type) {
Again, this is contrary to normal libvirt style, so please put it
back to variable name first.
VIR_INFO("Unhandled event type %d",
event->type);
goto error;
}
@@ -629,18 +642,16 @@ libxlDomainEventHandler(void *data,
VIR_LIBXL_EVENT_CONST libxl_event *event)
* Start a thread to handle shutdown. We don't want to be tying up
* libxl's event machinery by doing a potentially lengthy shutdown.
*/
- if (VIR_ALLOC(shutdown_info) < 0)
- goto error;
+ while (VIR_ALLOC(shutdown_info) < 0) {}
Huh ? This is busy-waiting when getting OOM which is definitely
not what we want.
shutdown_info->driver = driver;
shutdown_info->event = (libxl_event *)event;
- if (virThreadCreate(&thread, false, libxlDomainShutdownThread,
+ while (virThreadCreate(&thread, false, libxlDomainShutdownThread,
shutdown_info) < 0) {
Again, busy-waiting when failin to spawn threads.
/*
* Not much we can do on error here except log it.
*/
VIR_ERROR(_("Failed to create thread to handle domain shutdown"));
- goto error;
}
/*
@@ -752,6 +763,9 @@ libxlDomainDestroyInternal(libxlDriverPrivatePtr driver,
{
libxlDriverConfigPtr cfg = libxlDriverConfigGet(driver);
int ret = -1;
+ if (vm->already_destroyed)
+ return -1;
+ vm->being_destroyed_by_libvirt = true;
Again, using a bool "true" with an unsigned int bitfied.
/* Unlock virDomainObj during destroy, which can take considerable
* time on large memory domains.
Regards,
Daniel
--
|:
https://berrange.com -o-
https://www.flickr.com/photos/dberrange :|
|:
https://libvirt.org -o-
https://fstop138.berrange.com :|
|:
https://entangle-photo.org -o-
https://www.instagram.com/dberrange :|