
On Wed, Apr 06, 2011 at 03:19:51PM +0800, Hu Tao wrote:
This patch also eliminates a dead-lock bug in qemuDomainObjBeginJobWithDriver: if virCondWaitUntil() timeouts, the thread tries to acquire qemu driver lock while holding virDomainObj lock. --- src/conf/domain_conf.c | 56 ++++---- src/conf/domain_conf.h | 6 +- src/openvz/openvz_conf.c | 8 +- src/qemu/qemu_domain.c | 32 ++--- src/qemu/qemu_domain.h | 2 +- src/qemu/qemu_driver.c | 304 ++++++++++++++++++++------------------------- src/qemu/qemu_migration.c | 45 +++---- src/qemu/qemu_process.c | 33 ++--- src/vmware/vmware_conf.c | 2 +- 9 files changed, 215 insertions(+), 273 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 90a1317..fc76a00 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -47,6 +47,7 @@ #include "storage_file.h" #include "files.h" #include "bitmap.h" +#include "virobject.h"
#define VIR_FROM_THIS VIR_FROM_DOMAIN
@@ -395,9 +396,7 @@ static void virDomainObjListDataFree(void *payload, const void *name ATTRIBUTE_UNUSED) { virDomainObjPtr obj = payload; - virDomainObjLock(obj); - if (virDomainObjUnref(obj) > 0) - virDomainObjUnlock(obj); + virDomainObjUnref(obj); }
int virDomainObjListInit(virDomainObjListPtr doms) @@ -437,7 +436,7 @@ virDomainObjPtr virDomainFindByID(const virDomainObjListPtr doms, virDomainObjPtr obj; obj = virHashSearch(doms->objs, virDomainObjListSearchID, &id); if (obj) - virDomainObjLock(obj); + virDomainObjRef(obj); return obj; }
@@ -452,7 +451,7 @@ virDomainObjPtr virDomainFindByUUID(const virDomainObjListPtr doms,
obj = virHashLookup(doms->objs, uuidstr); if (obj) - virDomainObjLock(obj); + virDomainObjRef(obj); return obj; }
@@ -476,7 +475,7 @@ virDomainObjPtr virDomainFindByName(const virDomainObjListPtr doms, virDomainObjPtr obj; obj = virHashSearch(doms->objs, virDomainObjListSearchName, name); if (obj) - virDomainObjLock(obj); + virDomainObjRef(obj); return obj; }
This is a major change in semantics, which makes pretty much every single caller non-thread safe, unless the callers are all changed todo virDomainObjLock immediately after calling this. So I don't really see the point in this - it just means more code duplication.
@@ -967,6 +966,12 @@ static void virDomainObjFree(virDomainObjPtr dom) { if (!dom) return; + virDomainObjUnref(dom); +} + +static void doDomainObjFree(virObjectPtr obj) +{ + virDomainObjPtr dom = (virDomainObjPtr)obj;
VIR_DEBUG("obj=%p", dom); virDomainDefFree(dom->def); @@ -984,21 +989,13 @@ static void virDomainObjFree(virDomainObjPtr dom)
void virDomainObjRef(virDomainObjPtr dom) { - dom->refs++; - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs); + virObjectRef(&dom->obj); }
-int virDomainObjUnref(virDomainObjPtr dom) +void virDomainObjUnref(virDomainObjPtr dom) { - dom->refs--; - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs); - if (dom->refs == 0) { - virDomainObjUnlock(dom); - virDomainObjFree(dom); - return 0; - } - return dom->refs; + virObjectUnref(&dom->obj); }
static virDomainObjPtr virDomainObjNew(virCapsPtr caps) @@ -1010,6 +1007,11 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps) return NULL; }
+ if (virObjectInit(&domain->obj, doDomainObjFree)) { + VIR_FREE(domain); + return NULL; + } + if (caps->privateDataAllocFunc && !(domain->privateData = (caps->privateDataAllocFunc)())) { virReportOOMError(); @@ -1027,9 +1029,7 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps) return NULL; }
- virDomainObjLock(domain); domain->state = VIR_DOMAIN_SHUTOFF; - domain->refs = 1;
virDomainSnapshotObjListInit(&domain->snapshots);
@@ -1075,8 +1075,10 @@ virDomainObjPtr virDomainAssignDef(virCapsPtr caps, domain->def = def;
virUUIDFormat(def->uuid, uuidstr); + virDomainObjRef(domain); if (virHashAddEntry(doms->objs, uuidstr, domain) < 0) { - VIR_FREE(domain); + virDomainObjUnref(domain); + virDomainObjFree(domain); return NULL; }
Simiarly here, you're now requiring all callers to manually obtain a lock.
@@ -1239,39 +1243,57 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml,
qemuDriverLock(driver); if (!(def = virDomainDefParseString(driver->caps, xml, - VIR_DOMAIN_XML_INACTIVE))) + VIR_DOMAIN_XML_INACTIVE))) { + qemuDriverUnlock(driver); goto cleanup; + }
- if (virSecurityManagerVerify(driver->securityManager, def) < 0) + if (virSecurityManagerVerify(driver->securityManager, def) < 0) { + qemuDriverUnlock(driver); goto cleanup; + }
- if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) { + qemuDriverUnlock(driver); goto cleanup; + }
- if (qemudCanonicalizeMachine(driver, def) < 0) + if (qemudCanonicalizeMachine(driver, def) < 0) { + qemuDriverUnlock(driver); goto cleanup; + }
- if (qemuDomainAssignPCIAddresses(def) < 0) + if (qemuDomainAssignPCIAddresses(def) < 0) { + qemuDriverUnlock(driver); goto cleanup; + }
if (!(vm = virDomainAssignDef(driver->caps, &driver->domains, - def, false))) + def, false))) { + qemuDriverUnlock(driver); goto cleanup; + } + + qemuDriverUnlock(driver);
driver is now unlocked....
def = NULL;
- if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) { + virDomainObjUnref(vm);
...but this method *requires* driver to be locked.
goto cleanup; /* XXXX free the 'vm' we created ? */ + }
if (qemuProcessStart(conn, driver, vm, NULL, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1, NULL, VIR_VM_OP_CREATE) < 0) { qemuAuditDomainStart(vm, "booted", false);
...and this method writes to 'driver', so it is now unsafe.
- if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, - vm); - vm = NULL; + qemuDomainObjEndJob(vm); + qemuDriverLock(driver); + virDomainRemoveInactive(&driver->domains, + vm); + qemuDriverUnlock(driver); + virDomainObjUnref(vm); goto cleanup; }
@@ -1283,17 +1305,13 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml, dom = virGetDomain(conn, vm->def->name, vm->def->uuid); if (dom) dom->id = vm->def->id;
- if (vm && - qemuDomainObjEndJob(vm) == 0) - vm = NULL; + qemuDomainObjEndJob(vm); + virDomainObjUnref(vm);
cleanup: virDomainDefFree(def); - if (vm) - virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); return dom; }
And all the usage of 'vm' in this method is now completely unlocked and unsafe.
@@ -1307,13 +1325,14 @@ static int qemudDomainSuspend(virDomainPtr dom) {
qemuDriverLock(driver); vm = virDomainFindByUUID(&driver->domains, dom->uuid); + qemuDriverUnlock(driver);
if (!vm) { char uuidstr[VIR_UUID_STRING_BUFLEN]; virUUIDFormat(dom->uuid, uuidstr); qemuReportError(VIR_ERR_NO_DOMAIN, _("no domain with matching uuid '%s'"), uuidstr); - goto cleanup; + return -1; } if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_OPERATION_INVALID, @@ -1354,16 +1373,12 @@ static int qemudDomainSuspend(virDomainPtr dom) { }
endjob: - if (qemuDomainObjEndJob(vm) == 0) - vm = NULL; + qemuDomainObjEndJob(vm);
cleanup: - if (vm) - virDomainObjUnlock(vm); - + virDomainObjUnref(vm); if (event) qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); return ret; }
Also now completely unsafe since 'vm' is never locked while making changes to it. [cut rest of patch] I don't see how any of this patch is threadsafe now that virtually no methods are acquiring the 'vm' lock. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|