
On Thu, Apr 07, 2011 at 10:38:34AM +0100, Daniel P. Berrange wrote:
On Wed, Apr 06, 2011 at 03:19:55PM +0800, Hu Tao wrote:
--- src/qemu/qemu_domain.c | 30 ++----------- src/qemu/qemu_migration.c | 2 - src/qemu/qemu_monitor.c | 109 ++++++++++++++++++++++++-------------------- src/qemu/qemu_monitor.h | 4 +- src/qemu/qemu_process.c | 32 +++++++------- 5 files changed, 81 insertions(+), 96 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 3a3c953..d11dc5f 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -560,9 +560,8 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) { qemuDomainObjPrivatePtr priv = obj->privateData;
- qemuMonitorLock(priv->mon); - qemuMonitorRef(priv->mon); virDomainObjUnlock(obj); + qemuMonitorLock(priv->mon); }
@@ -573,18 +572,9 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) void qemuDomainObjExitMonitor(virDomainObjPtr obj) { qemuDomainObjPrivatePtr priv = obj->privateData; - int refs; - - refs = qemuMonitorUnref(priv->mon); - - if (refs > 0) - qemuMonitorUnlock(priv->mon);
+ qemuMonitorUnlock(priv->mon); virDomainObjLock(obj); - - if (refs == 0) { - priv->mon = NULL; - } }
@@ -601,10 +591,8 @@ void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, { qemuDomainObjPrivatePtr priv = obj->privateData;
- qemuMonitorLock(priv->mon); - qemuMonitorRef(priv->mon); virDomainObjUnlock(obj); - qemuDriverUnlock(driver); + qemuMonitorLock(priv->mon); }
@@ -617,19 +605,9 @@ void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj) { qemuDomainObjPrivatePtr priv = obj->privateData; - int refs; - - refs = qemuMonitorUnref(priv->mon); - - if (refs > 0) - qemuMonitorUnlock(priv->mon);
- qemuDriverLock(driver); + qemuMonitorUnlock(priv->mon); virDomainObjLock(obj); - - if (refs == 0) { - priv->mon = NULL; - } }
This means that the 'driver' lock is now whenever any QEMU monitor command is runing, which blocks the entire driver.
qemuDomainObjEnterMonitorWithDriver is now called without holding qemu driver lock.
void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 462e6be..6af2e24 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -224,11 +224,9 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) }
virDomainObjUnlock(vm); - qemuDriverUnlock(driver);
nanosleep(&ts, NULL);
- qemuDriverLock(driver); virDomainObjLock(vm); }
Holding the 'driver' lock while sleeping blocks the entire QEMU driver.
Now qemuMigrationWaitForCompletion should be called without holding qemu driver lock.
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 244b22a..4b9087f 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -107,7 +107,6 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
- qemuDriverLock(driver); virDomainObjLock(vm);
if (!virDomainObjIsActive(vm)) { @@ -133,15 +132,17 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuProcessStop(driver, vm, 0); qemuAuditDomainStop(vm, hasError ? "failed" : "shutdown");
- if (!vm->persistent) + if (!vm->persistent) { + qemuDriverLock(driver); virDomainRemoveInactive(&driver->domains, vm); - else - virDomainObjUnlock(vm); + qemuDriverUnlock(driver); + } + + virDomainObjUnlock(vm);
if (event) { qemuDomainEventQueue(driver, event); } - qemuDriverUnlock(driver); }
This violates the lock ordering rules. The 'driver' lock *must* be obtained *before* any 'vm' lock is held.
Excepting for introducing virObject for reference-counting, this series also simplifies the usage of lock: if you want to read/write qemu driver data, it is enough to first acquire qemu driver lock only; if you want to read/write virDomainObj data, it is enough to first acquire virDomainObj lock only; same for others. And we'd better to avoid acquiring two locks at the same time. So yes, the code here is problematic, it should be ideally like this: virDomainObjLock(vm); if (!vm->persistent) { lock_hashtable(doms); /* hashtable's own lock to protect itself */ virDomainRemoveInactive(doms, vm); unlock_hashtable(doms); } virDomainObjUnlock(vm); But it lacks hashtable lock, how about change the code like this: virDomainObjLock(vm); persistent = vm->persistent; virDomainObjUnlock(vm); /* chances that others change vm->persistent and we remove vm mistakenly :( */ if (!persistent) { qemuDriverLock(driver); virDomainRemoveInactive(doms, vm); qemuDriverUnlock(driver); } Or is there a better way?
Now we have some places in the code which do
lock(vm) lock(driver)
and other places which do
lock(driver) lock(vm)
so 2 threads can trivially deadlock waiting for each other
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|