libxl can deliver events and invoke callbacks on any application thread
calling into libxl. This can cause deadlock in the libvirt libxl driver
Thread 19 (Thread 0x7f31411ec700 (LWP 14068) "libvirtd"):
#0 0x00007f318520cc7d in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f3185205ed5 in pthread_mutex_lock () from /lib64/libpthread.so.0
#2 0x00007f3189488015 in virMutexLock (m=<optimized out>) at
../../src/util/virthread.c:79
#3 0x00007f3189463f3b in virObjectLock (anyobj=<optimized out>) at
../../src/util/virobject.c:433
#4 0x00007f31894f2f41 in virDomainObjListSearchID (payload=0x7f317400a6d0,
name=<optimized out>, data=0x7f31411eaeac) at ../../src/conf/virdomainobjlist.c:105
JWF: the 'payload' is virDomainObj with domid 28712, 'data' is domid
28710. So looking for 28710 but firsh compare with 28712, which is the virDomainObj we
started working with in f15 and is locked.
#5 0x00007f3189437ac5 in virHashSearch (ctable=0x7f3124025a30,
iter=iter@entry=0x7f31894f2f30 <virDomainObjListSearchID>,
data=data@entry=0x7f31411eaeac, name=name@entry=0x0) at ../../src/util/virhash.c:745
#6 0x00007f31894f3919 in virDomainObjListFindByID (doms=0x7f3124025430, id=<optimized
out>) at ../../src/conf/virdomainobjlist.c:121
#7 0x00007f3152f292e5 in libxlDomainEventHandler (data=0x7f3124023d80,
event=0x7f310c010ae0) at ../../src/libxl/libxl_domain.c:660
#8 0x00007f3152c6ff5d in egc_run_callbacks (egc=egc@entry=0x7f31411eaf50) at
libxl_event.c:1427
#9 0x00007f3152c718bd in libxl__egc_cleanup (egc=0x7f31411eaf50) at libxl_event.c:1458
#10 libxl__ao_inprogress (ao=ao@entry=0x7f310c00b8a0, file=file@entry=0x7f3152cce987
"libxl_domain.c", line=line@entry=730, func=func@entry=0x7f3152ccf750
<__func__.22238> "libxl_domain_unpause") at libxl_event.c:2047
#11 0x00007f3152c8c5b8 in libxl_domain_unpause (ctx=0x7f3124015a40, domid=<optimized
out>, ao_how=ao_how@entry=0x0) at libxl_domain.c:730
#12 0x00007f3152f2a584 in libxl_domain_unpause_0x041200 (domid=<optimized out>,
ctx=<optimized out>) at /usr/include/libxl.h:1756
#13 libxlDomainStart (driver=driver@entry=0x7f3124023d80, vm=vm@entry=0x7f317400a6d0,
start_paused=start_paused@entry=false, restore_fd=restore_fd@entry=-1,
restore_ver=<optimized out>, restore_ver@entry=2) at
../../src/libxl/libxl_domain.c:1482
#14 0x00007f3152f2a6e3 in libxlDomainStartNew (driver=driver@entry=0x7f3124023d80,
vm=vm@entry=0x7f317400a6d0, start_paused=start_paused@entry=false) at
../../src/libxl/libxl_domain.c:1545
#15 0x00007f3152f2a789 in libxlDomainShutdownHandleRestart (driver=0x7f3124023d80,
vm=0x7f317400a6d0) at ../../src/libxl/libxl_domain.c:464 JWF: domid here is 28712
#16 0x00007f3152f2a9e4 in libxlDomainShutdownThread (opaque=<optimized out>) at
../../src/libxl/libxl_domain.c:559
#17 0x00007f3189487ee2 in virThreadHelper (data=<optimized out>) at
../../src/util/virthread.c:196
#18 0x00007f3185203539 in start_thread () from /lib64/libpthread.so.0
#19 0x00007f3184f3becf in clone () from /lib64/libc.so.6
Frame 16 runs a thread created to handle domain shutdown processing for
domid 28712. In this case the event contained the reboot reason, so the
old domain is destroyed and a new one is created by libxlDomainStart new.
After starting the domain, it is unpaused by calling libxl_domain_unpause
in frame 12. While the thread is running within libxl, libxl takes the
opportunity to deliver a pending domain shutdown event for unrelated domid
28710. While searching for the associated virDomainObj by ID, a deadlock is
encountered when attempting to lock the virDomainObj for domid 28712, which
is already locked since this thread is processing its shutdown event.
The deadlock can be avoided by moving the search for a virDomainObj
associated with the event domid to the shutdown thread. The same is done
for the death thread.
Signed-off-by: Jim Fehlig <jfehlig(a)suse.com>
---
src/libxl/libxl_domain.c | 35 ++++++++++++++++++-----------------
1 file changed, 18 insertions(+), 17 deletions(-)
diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index d6c5f7e5b7..366e3b9263 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -480,7 +480,6 @@ libxlDomainShutdownHandleRestart(libxlDriverPrivate *driver,
struct libxlEventHandlerThreadInfo
{
libxlDriverPrivate *driver;
- virDomainObj *vm;
libxl_event *event;
};
@@ -489,7 +488,7 @@ static void
libxlDomainShutdownThread(void *opaque)
{
struct libxlEventHandlerThreadInfo *shutdown_info = opaque;
- virDomainObj *vm = shutdown_info->vm;
+ virDomainObj *vm = NULL;
libxl_event *ev = shutdown_info->event;
libxlDriverPrivate *driver = shutdown_info->driver;
virObjectEvent *dom_event = NULL;
@@ -499,6 +498,12 @@ libxlDomainShutdownThread(void *opaque)
libxl_domain_config_init(&d_config);
+ vm = virDomainObjListFindByID(driver->domains, ev->domid);
+ if (!vm) {
+ /* Nothing to do if we can't find the virDomainObj */
+ goto cleanup;
+ }
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
@@ -616,12 +621,18 @@ static void
libxlDomainDeathThread(void *opaque)
{
struct libxlEventHandlerThreadInfo *death_info = opaque;
- virDomainObj *vm = death_info->vm;
+ virDomainObj *vm = NULL;
libxl_event *ev = death_info->event;
libxlDriverPrivate *driver = death_info->driver;
virObjectEvent *dom_event = NULL;
g_autoptr(libxlDriverConfig) cfg = libxlDriverConfigGet(driver);
+ vm = virDomainObjListFindByID(driver->domains, ev->domid);
+ if (!vm) {
+ /* Nothing to do if we can't find the virDomainObj */
+ goto cleanup;
+ }
+
if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
goto cleanup;
@@ -650,7 +661,6 @@ libxlDomainEventHandler(void *data, libxl_event *event)
{
libxlDriverPrivate *driver = data;
libxl_shutdown_reason xl_reason = event->u.domain_shutdown.shutdown_reason;
- virDomainObj *vm = NULL;
g_autoptr(libxlDriverConfig) cfg = NULL;
struct libxlEventHandlerThreadInfo *thread_info = NULL;
virThread thread;
@@ -671,12 +681,6 @@ libxlDomainEventHandler(void *data, libxl_event *event)
if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND)
goto cleanup;
- vm = virDomainObjListFindByID(driver->domains, event->domid);
- if (!vm) {
- /* Nothing to do if we can't find the virDomainObj */
- goto cleanup;
- }
-
/*
* Start event-specific threads to handle shutdown and death.
* They are potentially lengthy operations and we don't want to be
@@ -686,7 +690,6 @@ libxlDomainEventHandler(void *data, libxl_event *event)
thread_info = g_new0(struct libxlEventHandlerThreadInfo, 1);
thread_info->driver = driver;
- thread_info->vm = vm;
thread_info->event = (libxl_event *)event;
thread_name = g_strdup_printf("shutdown-event-%d", event->domid);
/*
@@ -701,15 +704,14 @@ libxlDomainEventHandler(void *data, libxl_event *event)
goto cleanup;
}
/*
- * virDomainObjEndAPI is called in the shutdown thread, where
- * libxlEventHandlerThreadInfo and libxl_event are also freed.
+ * libxlEventHandlerThreadInfo and libxl_event are freed in the
+ * shutdown thread
*/
return;
} else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH) {
thread_info = g_new0(struct libxlEventHandlerThreadInfo, 1);
thread_info->driver = driver;
- thread_info->vm = vm;
thread_info->event = (libxl_event *)event;
thread_name = g_strdup_printf("death-event-%d", event->domid);
/*
@@ -724,14 +726,13 @@ libxlDomainEventHandler(void *data, libxl_event *event)
goto cleanup;
}
/*
- * virDomainObjEndAPI is called in the death thread, where
- * libxlEventHandlerThreadInfo and libxl_event are also freed.
+ * libxlEventHandlerThreadInfo and libxl_event are freed in the
+ * death thread
*/
return;
}
cleanup:
- virDomainObjEndAPI(&vm);
VIR_FREE(thread_info);
cfg = libxlDriverConfigGet(driver);
/* Cast away any const */
--
2.33.0