[libvirt] [PATCH v3] qemu: qemuDomainRename and virDomainObjListNumOfDomains ABBA deadlock fix
by Nikolay Shirokovskiy
A pretty nasty deadlock occurs while trying to rename a VM in parallel
with virDomainObjListNumOfDomains.
The short description of the problem is as follows:
Thread #1:
qemuDomainRename:
------> aquires domain lock by qemuDomObjFromDomain
---------> waits for domain list lock in any of the listed functions:
- virDomainObjListFindByName
- virDomainObjListRenameAddNew
- virDomainObjListRenameRemove
Thread #2:
virDomainObjListNumOfDomains:
------> aquires domain list lock
---------> waits for domain lock in virDomainObjListCount
Introduce generic virDomainObjListRename function for renaming domains.
It aquires list lock in right order to avoid deadlock. Callback is used
to make driver specific domain updates.
---
src/conf/virdomainobjlist.c | 98 +++++++++++++++++++-----------
src/conf/virdomainobjlist.h | 16 +++--
src/libvirt_private.syms | 3 +-
src/qemu/qemu_driver.c | 142 ++++++++++++++++++++------------------------
4 files changed, 142 insertions(+), 117 deletions(-)
diff --git a/src/conf/virdomainobjlist.c b/src/conf/virdomainobjlist.c
index 7568f93..6c0be62 100644
--- a/src/conf/virdomainobjlist.c
+++ b/src/conf/virdomainobjlist.c
@@ -313,40 +313,6 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
}
-int
-virDomainObjListRenameAddNew(virDomainObjListPtr doms,
- virDomainObjPtr vm,
- const char *name)
-{
- int ret = -1;
- virObjectLock(doms);
-
- /* Add new name into the hash table of domain names. */
- if (virHashAddEntry(doms->objsName, name, vm) < 0)
- goto cleanup;
-
- /* Okay, this is crazy. virHashAddEntry() does not increment
- * the refcounter of @vm, but virHashRemoveEntry() does
- * decrement it. We need to work around it. */
- virObjectRef(vm);
-
- ret = 0;
- cleanup:
- virObjectUnlock(doms);
- return ret;
-}
-
-
-int
-virDomainObjListRenameRemove(virDomainObjListPtr doms, const char *name)
-{
- virObjectLock(doms);
- virHashRemoveEntry(doms->objsName, name);
- virObjectUnlock(doms);
- return 0;
-}
-
-
/*
* The caller must hold a lock on the driver owning 'doms',
* and must also have locked 'dom', to ensure no one else
@@ -372,6 +338,70 @@ void virDomainObjListRemove(virDomainObjListPtr doms,
}
+/*
+ * The caller must hold a lock on dom. Callbacks should not sleep/wait othewise
+ * operations on all domains will be blocked as the callback is called with
+ * domains lock hold. Domain lock is dropped/reaquired during this operation
+ * thus domain consistency must not rely on this lock solely.
+ */
+
+int
+virDomainObjListRename(virDomainObjListPtr doms,
+ virDomainObjPtr dom,
+ const char *new_name,
+ unsigned int flags,
+ virDomainObjListRenameCallback callback,
+ void *opaque)
+{
+ int ret = -1;
+ char *old_name = NULL;
+ int rc;
+
+ /* doms and dom locks must be attained in right order thus relock dom. */
+ /* dom reference is touched for the benefit of those callers that
+ * hold a lock on dom but not refcount it. */
+ virObjectRef(dom);
+ virObjectUnlock(dom);
+ virObjectLock(doms);
+ virObjectLock(dom);
+ virObjectUnref(dom);
+
+ if (STREQ(dom->def->name, new_name)) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("Can't rename domain to itself"));
+ goto cleanup;
+ }
+
+ if (virHashLookup(doms->objsName, new_name) != NULL) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("domain with name '%s' already exists"),
+ new_name);
+ goto cleanup;
+ }
+
+ if (virHashAddEntry(doms->objsName, new_name, dom) < 0)
+ goto cleanup;
+
+ /* Okay, this is crazy. virHashAddEntry() does not increment
+ * the refcounter of @dom, but virHashRemoveEntry() does
+ * decrement it. We need to work around it. */
+ virObjectRef(dom);
+
+ if (VIR_STRDUP(old_name, dom->def->name) < 0)
+ goto cleanup;
+
+ rc = callback(dom, new_name, flags, opaque);
+ virHashRemoveEntry(doms->objsName, rc < 0 ? new_name : old_name);
+ if (rc < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ virObjectUnlock(doms);
+ VIR_FREE(old_name);
+ return ret;
+}
+
/* The caller must hold lock on 'doms' in addition to 'virDomainObjListRemove'
* requirements
*
diff --git a/src/conf/virdomainobjlist.h b/src/conf/virdomainobjlist.h
index f479598..c0f856c 100644
--- a/src/conf/virdomainobjlist.h
+++ b/src/conf/virdomainobjlist.h
@@ -51,11 +51,17 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms,
unsigned int flags,
virDomainDefPtr *oldDef);
-int virDomainObjListRenameAddNew(virDomainObjListPtr doms,
- virDomainObjPtr vm,
- const char *name);
-int virDomainObjListRenameRemove(virDomainObjListPtr doms,
- const char *name);
+typedef int (*virDomainObjListRenameCallback)(virDomainObjPtr dom,
+ const char *new_name,
+ unsigned int flags,
+ void *opaque);
+int virDomainObjListRename(virDomainObjListPtr doms,
+ virDomainObjPtr dom,
+ const char *new_name,
+ unsigned int flags,
+ virDomainObjListRenameCallback callback,
+ void *opaque);
+
void virDomainObjListRemove(virDomainObjListPtr doms,
virDomainObjPtr dom);
void virDomainObjListRemoveLocked(virDomainObjListPtr doms,
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 3d0ec9d..3ef3468 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -893,8 +893,7 @@ virDomainObjListNew;
virDomainObjListNumOfDomains;
virDomainObjListRemove;
virDomainObjListRemoveLocked;
-virDomainObjListRenameAddNew;
-virDomainObjListRenameRemove;
+virDomainObjListRename;
# cpu/cpu.h
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index abcdbe6..c0e750d 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19976,14 +19976,14 @@ qemuDomainSetUserPassword(virDomainPtr dom,
}
-static int qemuDomainRename(virDomainPtr dom,
- const char *new_name,
- unsigned int flags)
+static int
+qemuDomainRenameCallback(virDomainObjPtr vm,
+ const char *new_name,
+ unsigned int flags,
+ void *opaque)
{
- virQEMUDriverPtr driver = dom->conn->privateData;
+ virQEMUDriverPtr driver = opaque;
virQEMUDriverConfigPtr cfg = NULL;
- virDomainObjPtr vm = NULL;
- virDomainObjPtr tmp_dom = NULL;
virObjectEventPtr event_new = NULL;
virObjectEventPtr event_old = NULL;
int ret = -1;
@@ -19993,73 +19993,16 @@ static int qemuDomainRename(virDomainPtr dom,
virCheckFlags(0, ret);
- if (!(vm = qemuDomObjFromDomain(dom)))
- goto cleanup;
-
- if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
- goto cleanup;
-
cfg = virQEMUDriverGetConfig(driver);
- if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
- goto cleanup;
-
- if (virDomainObjIsActive(vm)) {
- virReportError(VIR_ERR_OPERATION_INVALID, "%s",
- _("cannot rename active domain"));
- goto endjob;
- }
-
- if (!vm->persistent) {
- virReportError(VIR_ERR_OPERATION_INVALID, "%s",
- _("cannot rename a transient domain"));
- goto endjob;
- }
-
- if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
- virReportError(VIR_ERR_OPERATION_INVALID,
- "%s", _("domain has to be shutoff before renaming"));
- goto endjob;
- }
-
- if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
- virReportError(VIR_ERR_OPERATION_INVALID,
- "%s", _("cannot rename domain with snapshots"));
- goto endjob;
- }
-
- if (STREQ(vm->def->name, new_name)) {
- virReportError(VIR_ERR_OPERATION_INVALID, "%s",
- _("Can't rename domain to itself"));
- goto endjob;
- }
-
- /*
- * This is a rather racy check, but still better than reporting
- * internal error. And since new_name != name here, there's no
- * deadlock imminent.
- */
- tmp_dom = virDomainObjListFindByName(driver->domains, new_name);
- if (tmp_dom) {
- virObjectUnlock(tmp_dom);
- virObjectUnref(tmp_dom);
- virReportError(VIR_ERR_OPERATION_INVALID,
- _("domain with name '%s' already exists"),
- new_name);
- goto endjob;
- }
-
if (VIR_STRDUP(new_dom_name, new_name) < 0)
- goto endjob;
+ goto cleanup;
if (!(old_dom_cfg_file = virDomainConfigFile(cfg->configDir,
vm->def->name))) {
- goto endjob;
+ goto cleanup;
}
- if (virDomainObjListRenameAddNew(driver->domains, vm, new_name) < 0)
- goto endjob;
-
event_old = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_UNDEFINED,
VIR_DOMAIN_EVENT_UNDEFINED_RENAMED);
@@ -20080,21 +20023,12 @@ static int qemuDomainRename(virDomainPtr dom,
goto rollback;
}
- /* Remove old domain name from table. */
- virDomainObjListRenameRemove(driver->domains, old_dom_name);
-
event_new = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_DEFINED,
VIR_DOMAIN_EVENT_DEFINED_RENAMED);
-
- /* Success, domain has been renamed. */
ret = 0;
- endjob:
- qemuDomainObjEndJob(driver, vm);
-
cleanup:
- virDomainObjEndAPI(&vm);
VIR_FREE(old_dom_cfg_file);
VIR_FREE(old_dom_name);
VIR_FREE(new_dom_name);
@@ -20109,9 +20043,65 @@ static int qemuDomainRename(virDomainPtr dom,
vm->def->name = old_dom_name;
old_dom_name = NULL;
}
+ goto cleanup;
+}
- virDomainObjListRenameRemove(driver->domains, new_name);
- goto endjob;
+static int qemuDomainRename(virDomainPtr dom,
+ const char *new_name,
+ unsigned int flags)
+{
+ virQEMUDriverPtr driver = dom->conn->privateData;
+ virDomainObjPtr vm = NULL;
+ int ret = -1;
+
+ virCheckFlags(0, ret);
+
+ if (!(vm = qemuDomObjFromDomain(dom)))
+ goto cleanup;
+
+ if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0)
+ goto cleanup;
+
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
+ goto cleanup;
+
+ if (virDomainObjIsActive(vm)) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("cannot rename active domain"));
+ goto endjob;
+ }
+
+ if (!vm->persistent) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("cannot rename a transient domain"));
+ goto endjob;
+ }
+
+ if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain has to be shutoff before renaming"));
+ goto endjob;
+ }
+
+ if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("cannot rename domain with snapshots"));
+ goto endjob;
+ }
+
+ if (virDomainObjListRename(driver->domains, vm, new_name, flags,
+ qemuDomainRenameCallback, driver) < 0)
+ goto endjob;
+
+ /* Success, domain has been renamed. */
+ ret = 0;
+
+ endjob:
+ qemuDomainObjEndJob(driver, vm);
+
+ cleanup:
+ virDomainObjEndAPI(&vm);
+ return ret;
}
static virHypervisorDriver qemuHypervisorDriver = {
--
1.8.3.1
8 years, 10 months