[libvirt] [PATCH] Fix deadlock in QEMU close callback APIs

From: "Daniel P. Berrange" <berrange@redhat.com> There is a lock ordering problem in the QEMU close callback APIs. When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks. When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM. This causes deadlock if anyone tries to start a VM, while autodestroy is taking place. The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/qemu/qemu_conf.c | 106 ++++++++++++++++++++++++++++++++++++++++++-------- src/util/virnetlink.c | 5 ++- 2 files changed, 92 insertions(+), 19 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 1cd4b7c..f3b09cf 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -797,20 +797,37 @@ virQEMUCloseCallbacksGet(virQEMUCloseCallbacksPtr closeCallbacks, return cb; } + +typedef struct _virQEMUCloseCallbacksListEntry virQEMUCloseCallbacksListEntry; +typedef virQEMUCloseCallbacksListEntry *virQEMUCloseCallbacksListEntryPtr; +struct _virQEMUCloseCallbacksListEntry { + unsigned char uuid[VIR_UUID_BUFLEN]; + virQEMUCloseCallback callback; +}; + + +typedef struct _virQEMUCloseCallbacksList virQEMUCloseCallbacksList; +typedef virQEMUCloseCallbacksList *virQEMUCloseCallbacksListPtr; +struct _virQEMUCloseCallbacksList { + size_t nentries; + virQEMUCloseCallbacksListEntryPtr entries; +}; + + struct virQEMUCloseCallbacksData { - virHashTablePtr list; - virQEMUDriverPtr driver; virConnectPtr conn; + virQEMUCloseCallbacksListPtr list; + bool oom; }; + static void -virQEMUCloseCallbacksRunOne(void *payload, +virQEMUCloseCallbacksGetOne(void *payload, const void *key, void *opaque) { struct virQEMUCloseCallbacksData *data = opaque; qemuDriverCloseDefPtr closeDef = payload; - virDomainObjPtr dom; const char *uuidstr = key; unsigned char uuid[VIR_UUID_BUFLEN]; @@ -823,35 +840,90 @@ virQEMUCloseCallbacksRunOne(void *payload, if (data->conn != closeDef->conn || !closeDef->cb) return; - if (!(dom = virDomainObjListFindByUUID(data->driver->domains, uuid))) { - VIR_DEBUG("No domain object with UUID %s", uuidstr); + if (VIR_EXPAND_N(data->list->entries, + data->list->nentries, 1) < 0) { + data->oom = true; return; } - dom = closeDef->cb(data->driver, dom, data->conn); - if (dom) - virObjectUnlock(dom); + memcpy(data->list->entries[data->list->nentries - 1].uuid, + uuid, VIR_UUID_BUFLEN); + data->list->entries[data->list->nentries - 1].callback = closeDef->cb; +} + + +static virQEMUCloseCallbacksListPtr +virQEMUCloseCallbacksGetForConn(virQEMUCloseCallbacksPtr closeCallbacks, + virConnectPtr conn) +{ + virQEMUCloseCallbacksListPtr list = NULL; + struct virQEMUCloseCallbacksData data; + + if (VIR_ALLOC(list) < 0) { + virReportOOMError(); + return NULL; + } + + data.conn = conn; + data.list = list; + data.oom = false; + + virHashForEach(closeCallbacks->list, virQEMUCloseCallbacksGetOne, &data); - virHashRemoveEntry(data->list, uuid); + if (data.oom) { + VIR_FREE(list->entries); + VIR_FREE(list); + virReportOOMError(); + return NULL; + } + + return list; } + void virQEMUCloseCallbacksRun(virQEMUCloseCallbacksPtr closeCallbacks, virConnectPtr conn, virQEMUDriverPtr driver) { - struct virQEMUCloseCallbacksData data = { - .driver = driver, - .conn = conn - }; + virQEMUCloseCallbacksListPtr list; + size_t i; VIR_DEBUG("conn=%p", conn); - virObjectLock(closeCallbacks); - data.list = closeCallbacks->list; - virHashForEach(data.list, virQEMUCloseCallbacksRunOne, &data); + /* We must not hold the lock while running the callbacks, + * so first we obtain the list of callbacks, then remove + * them all from the hash. At that point we can release + * the lock and run the callbacks safely. */ + virObjectLock(closeCallbacks); + list = virQEMUCloseCallbacksGetForConn(closeCallbacks, conn); + if (!list) + return; + + for (i = 0 ; i < list->nentries ; i++) { + virHashRemoveEntry(closeCallbacks->list, + list->entries[i].uuid); + } virObjectUnlock(closeCallbacks); + + for (i = 0 ; i < list->nentries ; i++) { + virDomainObjPtr vm; + + if (!(vm = virDomainObjListFindByUUID(driver->domains, + list->entries[i].uuid))) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(list->entries[i].uuid, uuidstr); + VIR_DEBUG("No domain object with UUID %s", uuidstr); + continue; + } + + vm = list->entries[i].callback(driver, vm, conn); + if (vm) + virObjectUnlock(vm); + } + VIR_FREE(list->entries); + VIR_FREE(list); } struct _qemuSharedDiskEntry { diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c index 0b36fdc..8b47ede 100644 --- a/src/util/virnetlink.c +++ b/src/util/virnetlink.c @@ -335,8 +335,9 @@ virNetlinkEventCallback(int watch, if (length == 0) return; if (length < 0) { - virReportSystemError(errno, - "%s", _("nl_recv returned with error")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("nl_recv returned with error: %s"), + nl_geterror(length)); return; } -- 1.7.11.7

On Thu, Feb 28, 2013 at 01:33:31PM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There is a lock ordering problem in the QEMU close callback APIs.
When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks.
When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM.
This causes deadlock if anyone tries to start a VM, while autodestroy is taking place.
The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/qemu/qemu_conf.c | 106 ++++++++++++++++++++++++++++++++++++++++++-------- src/util/virnetlink.c | 5 ++- 2 files changed, 92 insertions(+), 19 deletions(-)
diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c index 0b36fdc..8b47ede 100644 --- a/src/util/virnetlink.c +++ b/src/util/virnetlink.c @@ -335,8 +335,9 @@ virNetlinkEventCallback(int watch, if (length == 0) return; if (length < 0) { - virReportSystemError(errno, - "%s", _("nl_recv returned with error")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("nl_recv returned with error: %s"), + nl_geterror(length)); return; }
This chunk wasn't supposed to be here. I've sent it as a separate fix Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Feb 28, 2013 at 13:33:31 +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There is a lock ordering problem in the QEMU close callback APIs.
When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks.
When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM.
This causes deadlock if anyone tries to start a VM, while autodestroy is taking place.
The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock.
Looks like we can just remove 568a6cda277f04ab9baaeb97490e548b7b608aa6 then, can't we? Jirka

On Thu, Feb 28, 2013 at 02:50:34PM +0100, Jiri Denemark wrote:
On Thu, Feb 28, 2013 at 13:33:31 +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There is a lock ordering problem in the QEMU close callback APIs.
When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks.
When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM.
This causes deadlock if anyone tries to start a VM, while autodestroy is taking place.
The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock.
Looks like we can just remove 568a6cda277f04ab9baaeb97490e548b7b608aa6 then, can't we?
Quite possibly. I'll investigate that further, and test what happens. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Feb 28, 2013 at 01:33:31PM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There is a lock ordering problem in the QEMU close callback APIs.
When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks.
When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM.
This causes deadlock if anyone tries to start a VM, while autodestroy is taking place.
The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/qemu/qemu_conf.c | 106 ++++++++++++++++++++++++++++++++++++++++++-------- src/util/virnetlink.c | 5 ++- 2 files changed, 92 insertions(+), 19 deletions(-)
Incidentally this patch is also a huge performance win. Previously once autodestroy starts running it blocks all startup/shutdown of VMs. When a single client had created 1000 VMs, this blocked libvirtd for a very long time indeed. With this change we get full parallelism in auto-destroy since only 1 VM at a time is locked, and other VMs can continue to start/stop Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Feb 28, 2013 at 13:33:31 +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There is a lock ordering problem in the QEMU close callback APIs.
When starting a guest we have a lock on the VM. We then set a autodestroy callback, which acquires a lock on the close callbacks.
When running auto-destroy, we obtain a lock on the close callbacks, then run each callbacks - which obtains a lock on the VM.
This causes deadlock if anyone tries to start a VM, while autodestroy is taking place.
The fix is to do autodestroy in 2 phases. First obtain all the callbacks and remove them from the list under the close callback lock. Then invoke each callback from outside the close callback lock.
ACK (with the virnetlink hunk removed, of course). Jirka
participants (2)
-
Daniel P. Berrange
-
Jiri Denemark