On Thu, Apr 07, 2011 at 10:38:34AM +0100, Daniel P. Berrange wrote:
On Wed, Apr 06, 2011 at 03:19:55PM +0800, Hu Tao wrote:
> ---
> src/qemu/qemu_domain.c | 30 ++-----------
> src/qemu/qemu_migration.c | 2 -
> src/qemu/qemu_monitor.c | 109 ++++++++++++++++++++++++--------------------
> src/qemu/qemu_monitor.h | 4 +-
> src/qemu/qemu_process.c | 32 +++++++-------
> 5 files changed, 81 insertions(+), 96 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 3a3c953..d11dc5f 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -560,9 +560,8 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> - qemuMonitorLock(priv->mon);
> - qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> + qemuMonitorLock(priv->mon);
> }
>
>
> @@ -573,18 +572,9 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> void qemuDomainObjExitMonitor(virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> - int refs;
> -
> - refs = qemuMonitorUnref(priv->mon);
> -
> - if (refs > 0)
> - qemuMonitorUnlock(priv->mon);
>
> + qemuMonitorUnlock(priv->mon);
> virDomainObjLock(obj);
> -
> - if (refs == 0) {
> - priv->mon = NULL;
> - }
> }
>
>
> @@ -601,10 +591,8 @@ void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver
*driver,
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> - qemuMonitorLock(priv->mon);
> - qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> - qemuDriverUnlock(driver);
> + qemuMonitorLock(priv->mon);
> }
>
>
> @@ -617,19 +605,9 @@ void qemuDomainObjExitMonitorWithDriver(struct qemud_driver
*driver,
> virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> - int refs;
> -
> - refs = qemuMonitorUnref(priv->mon);
> -
> - if (refs > 0)
> - qemuMonitorUnlock(priv->mon);
>
> - qemuDriverLock(driver);
> + qemuMonitorUnlock(priv->mon);
> virDomainObjLock(obj);
> -
> - if (refs == 0) {
> - priv->mon = NULL;
> - }
> }
This means that the 'driver' lock is now whenever any QEMU
monitor command is runing, which blocks the entire driver.
qemuDomainObjEnterMonitorWithDriver is now called without holding qemu
driver lock.
>
> void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver,
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 462e6be..6af2e24 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -224,11 +224,9 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver,
virDomainObjPtr vm)
> }
>
> virDomainObjUnlock(vm);
> - qemuDriverUnlock(driver);
>
> nanosleep(&ts, NULL);
>
> - qemuDriverLock(driver);
> virDomainObjLock(vm);
> }
Holding the 'driver' lock while sleeping blocks the entire
QEMU driver.
Now qemuMigrationWaitForCompletion should be called without holding qemu
driver lock.
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 244b22a..4b9087f 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -107,7 +107,6 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon
ATTRIBUTE_UNUSED,
>
> VIR_DEBUG("Received EOF on %p '%s'", vm,
vm->def->name);
>
> - qemuDriverLock(driver);
> virDomainObjLock(vm);
>
> if (!virDomainObjIsActive(vm)) {
> @@ -133,15 +132,17 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon
ATTRIBUTE_UNUSED,
> qemuProcessStop(driver, vm, 0);
> qemuAuditDomainStop(vm, hasError ? "failed" : "shutdown");
>
> - if (!vm->persistent)
> + if (!vm->persistent) {
> + qemuDriverLock(driver);
> virDomainRemoveInactive(&driver->domains, vm);
> - else
> - virDomainObjUnlock(vm);
> + qemuDriverUnlock(driver);
> + }
> +
> + virDomainObjUnlock(vm);
>
> if (event) {
> qemuDomainEventQueue(driver, event);
> }
> - qemuDriverUnlock(driver);
> }
This violates the lock ordering rules. The 'driver' lock *must* be
obtained *before* any 'vm' lock is held.
Excepting for introducing virObject for reference-counting, this series
also simplifies the usage of lock: if you want to read/write qemu
driver data, it is enough to first acquire qemu driver lock only; if you
want to read/write virDomainObj data, it is enough to first acquire
virDomainObj lock only; same for others. And we'd better to avoid
acquiring two locks at the same time.
So yes, the code here is problematic, it should be ideally like this:
virDomainObjLock(vm);
if (!vm->persistent) {
lock_hashtable(doms); /* hashtable's own lock to protect itself */
virDomainRemoveInactive(doms, vm);
unlock_hashtable(doms);
}
virDomainObjUnlock(vm);
But it lacks hashtable lock, how about change the code like this:
virDomainObjLock(vm);
persistent = vm->persistent;
virDomainObjUnlock(vm);
/* chances that others change vm->persistent and we remove
vm mistakenly :( */
if (!persistent) {
qemuDriverLock(driver);
virDomainRemoveInactive(doms, vm);
qemuDriverUnlock(driver);
}
Or is there a better way?
Now we have some places in the code which do
lock(vm)
lock(driver)
and other places which do
lock(driver)
lock(vm)
so 2 threads can trivially deadlock waiting for each other
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|