libxl events are delivered to libvirt via the libxlDomainEventHandler
callback registered with libxl. Documenation in
$xensrc/tools/libxl/libxl_event.h states that the callback "may occur
on any thread in which the application calls libxl". This can result
in deadlock since many of the libvirt callees of libxl hold a lock on
the virDomainObj they are working on. When the callback is invoked, it
attempts to find a virDomainObj corresponding to the domain ID provided
by libxl. Searching the domain obj list results in locking each obj
before checking if it is active, and its ID equals the requested ID.
Deadlock is possible when attempting to lock an obj that is already
locked further up the call stack. Indeed, Max Ustermann reported an
instance of this deadlock
https://www.redhat.com/archives/libvir-list/2015-November/msg00130.html
Guido Rossmueller also recently stumbled across it
https://www.redhat.com/archives/libvir-list/2016-September/msg00287.html
Fix the deadlock by moving the lookup of virDomainObj to the
libxlDomainShutdownThread. After this patch, libxl events are
enqueued on the libvirt side and processed by dedicated thread,
avoiding the described deadlock.
Reported-by: Max Ustermann <ustermann78(a)web.de>
Reported-by: Guido Rossmueller <Guido.Rossmueller(a)gdata.de>
Signed-off-by: Jim Fehlig <jfehlig(a)suse.com>
---
Essentially this is a V2 of
https://www.redhat.com/archives/libvir-list/2015-November/msg00520.html
But that patch is nearly a year old and "V2" takes a completely
different (and IMO much better) approach to fixing the deadlock.
src/libxl/libxl_domain.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 43f4a7f..a472751 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -421,7 +421,6 @@ virDomainDefParserConfig libxlDomainDefParserConfig = {
struct libxlShutdownThreadInfo
{
libxlDriverPrivatePtr driver;
- virDomainObjPtr vm;
libxl_event *event;
};
@@ -430,7 +429,7 @@ static void
libxlDomainShutdownThread(void *opaque)
{
struct libxlShutdownThreadInfo *shutdown_info = opaque;
- virDomainObjPtr vm = shutdown_info->vm;
+ virDomainObjPtr vm = NULL;
libxl_event *ev = shutdown_info->event;
libxlDriverPrivatePtr driver = shutdown_info->driver;
virObjectEventPtr dom_event = NULL;
@@ -439,6 +438,12 @@ libxlDomainShutdownThread(void *opaque)
cfg = libxlDriverConfigGet(driver);
+ vm = virDomainObjListFindByIDRef(driver->domains, ev->domid);
+ if (!vm) {
+ VIR_INFO("Received event for unknown domain ID %d", ev->domid);
+ goto cleanup;
+ }
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
@@ -547,7 +552,6 @@ void
libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event *event)
{
libxlDriverPrivatePtr driver = data;
- virDomainObjPtr vm = NULL;
libxl_shutdown_reason xl_reason = event->u.domain_shutdown.shutdown_reason;
struct libxlShutdownThreadInfo *shutdown_info = NULL;
virThread thread;
@@ -565,12 +569,6 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event
*event)
if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND)
goto error;
- vm = virDomainObjListFindByIDRef(driver->domains, event->domid);
- if (!vm) {
- VIR_INFO("Received event for unknown domain ID %d", event->domid);
- goto error;
- }
-
/*
* Start a thread to handle shutdown. We don't want to be tying up
* libxl's event machinery by doing a potentially lengthy shutdown.
@@ -579,7 +577,6 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event
*event)
goto error;
shutdown_info->driver = driver;
- shutdown_info->vm = vm;
shutdown_info->event = (libxl_event *)event;
if (virThreadCreate(&thread, false, libxlDomainShutdownThread,
shutdown_info) < 0) {
@@ -591,7 +588,7 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event
*event)
}
/*
- * VM is unlocked and libxl_event freed in shutdown thread
+ * libxlShutdownThreadInfo and libxl_event are freed in shutdown thread
*/
return;
@@ -600,7 +597,6 @@ libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST libxl_event
*event)
/* Cast away any const */
libxl_event_free(cfg->ctx, (libxl_event *)event);
virObjectUnref(cfg);
- virDomainObjEndAPI(&vm);
VIR_FREE(shutdown_info);
}
--
2.6.6