On Fri, Mar 18, 2011 at 11:20:56AM +0000, Daniel P. Berrange wrote:
On Wed, Mar 16, 2011 at 06:30:20PM +0800, Hu Tao wrote:
> ---
> src/qemu/qemu_domain.c | 26 +-----------
> src/qemu/qemu_monitor.c | 106 ++++++++++++++++++++++++++--------------------
> src/qemu/qemu_monitor.h | 4 +-
> 3 files changed, 64 insertions(+), 72 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 8a2b9cc..e056ef0 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -566,7 +566,6 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> qemuMonitorLock(priv->mon);
> - qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> }
>
> @@ -578,19 +577,9 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
> void qemuDomainObjExitMonitor(virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> - int refs;
> -
> - refs = qemuMonitorUnref(priv->mon);
> -
> - if (refs > 0)
> - qemuMonitorUnlock(priv->mon);
>
> + qemuMonitorUnlock(priv->mon);
> virDomainObjLock(obj);
> -
> - if (refs == 0) {
> - virDomainObjUnref(obj);
> - priv->mon = NULL;
> - }
> }
>
>
> @@ -608,7 +597,6 @@ void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver
*driver,
> qemuDomainObjPrivatePtr priv = obj->privateData;
>
> qemuMonitorLock(priv->mon);
> - qemuMonitorRef(priv->mon);
> virDomainObjUnlock(obj);
> qemuDriverUnlock(driver);
> }
> @@ -623,20 +611,10 @@ void qemuDomainObjExitMonitorWithDriver(struct qemud_driver
*driver,
> virDomainObjPtr obj)
> {
> qemuDomainObjPrivatePtr priv = obj->privateData;
> - int refs;
> -
> - refs = qemuMonitorUnref(priv->mon);
> -
> - if (refs > 0)
> - qemuMonitorUnlock(priv->mon);
>
> + qemuMonitorUnlock(priv->mon);
> qemuDriverLock(driver);
> virDomainObjLock(obj);
> -
> - if (refs == 0) {
> - virDomainObjUnref(obj);
> - priv->mon = NULL;
> - }
> }
I don't think these changes aren't right. The EnterMonitor() method
is releasing the lock on the virDomainObjPtr. This means that other
threads (specifically the main I/O thread) can use the virDomainObjPtr
and its qemuMonitorPtr instance. Incrementing reference count on the
qemuMonitorPtr is the only thing that prevents it being free()d while
it is unlocked by this thread.
eg, consider this
In the main thread making a monitor call
qemuDomainObjEnterMonitor(obj)
qemuMonitorSetCPU(mon, 1, 1);
qemuDomainObjExitMonitor(obj)
So, at step 2 there, qemuMonitorSetCPU() is asleep on the condition
variable, waiting for the reply from QEMU.
Now QEMU dies for some reason, so in the I/O thread, we have
qemuMonitorIO() called. This sees the EOF on the socket, so
it invokes the callback "(eofNotify)(mon, vm, failed);".
This callback is qemuMonitorClose(), which calls qemuMonitorFree()
which releases the last reference count, which deletes the condition
variable. The other thread is now waiting on a condition variable
which has been deleted. It'll either hang forever, or crash
We *must* prevent the monitor being free'd while a thread is running
a command, hence qemuDomainObjEnterMonitor() must call qemuMonitorRef()
and qemuDomainObjExitMonitor() must call qemuMonitorUnref().
You're all right except on when to ref the monitor. The monitor may be
freed by another thread before call of qemuDomainObjEnterMonitor(), so
call of qemuMonitorRef() at this time doesn't help. I think the monitor
must have already been refed before a thread starts operating it.
(After checking this patch carefully, I found this patch doesn't do so,
and it is buggy)
> -int qemuMonitorUnref(qemuMonitorPtr mon)
> +static void qemuMonitorFree(qemuMonitorPtr mon)
> {
> - mon->refs--;
> + virObjectUnref(&mon->obj);
> +}
We should just get rid fo qemuMonitorFree() entirely
and make everyone call qemuMonitorUnref(). Or vica-verca.
There's no point having both methods do the same thing.
qemuMonitorFree is paired with qemuMonitorAlloc to prevent people from
being confused if they have to call qemuMonitorUnref to free a monitor.
But it's OK to get rid of qemuMonitorFree() if well documented.
>
> - if (mon->refs == 0) {
> - qemuMonitorUnlock(mon);
> - qemuMonitorFree(mon);
> - return 0;
> - }
> +void qemuMonitorRef(qemuMonitorPtr mon)
> +{
> + virObjectRef(&mon->obj);
> +}
>
> - return mon->refs;
> +void qemuMonitorUnref(qemuMonitorPtr mon)
> +{
> + virObjectUnref(&mon->obj);
> }
>
> static void
> @@ -238,7 +269,6 @@ qemuMonitorUnwatch(void *monitor)
> {
> qemuMonitorPtr mon = monitor;
>
> - qemuMonitorLock(mon);
> qemuMonitorUnref(mon);
> }
>
> @@ -519,7 +549,6 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
>
> /* lock access to the monitor and protect fd */
> qemuMonitorLock(mon);
> - qemuMonitorRef(mon);
> #if DEBUG_IO
> VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch,
fd, events);
> #endif
> @@ -587,13 +616,11 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
> virDomainObjPtr vm = mon->vm;
> /* Make sure anyone waiting wakes up now */
> virCondSignal(&mon->notify);
> - if (qemuMonitorUnref(mon) > 0)
> - qemuMonitorUnlock(mon);
> + qemuMonitorUnlock(mon);
> VIR_DEBUG("Triggering EOF callback error? %d", failed);
> (eofNotify)(mon, vm, failed);
> } else {
> - if (qemuMonitorUnref(mon) > 0)
> - qemuMonitorUnlock(mon);
> + qemuMonitorUnlock(mon);
> }
> }
>
> @@ -612,30 +639,13 @@ qemuMonitorOpen(virDomainObjPtr vm,
> return NULL;
> }
>
> - if (VIR_ALLOC(mon) < 0) {
> - virReportOOMError();
> + mon = qemuMonitorAlloc();
> + if (!mon)
> return NULL;
> - }
>
> - if (virMutexInit(&mon->lock) < 0) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("cannot initialize monitor mutex"));
> - VIR_FREE(mon);
> - return NULL;
> - }
> - if (virCondInit(&mon->notify) < 0) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("cannot initialize monitor condition"));
> - virMutexDestroy(&mon->lock);
> - VIR_FREE(mon);
> - return NULL;
> - }
> - mon->fd = -1;
> - mon->refs = 1;
> mon->vm = vm;
> mon->json = json;
> mon->cb = cb;
> - qemuMonitorLock(mon);
>
> switch (config->type) {
> case VIR_DOMAIN_CHR_TYPE_UNIX:
> @@ -668,6 +678,12 @@ qemuMonitorOpen(virDomainObjPtr vm,
> }
>
>
> + /* mon will be accessed by qemuMonitorIO which is called in
> + * event thread, so ref it before passing it to the thread.
> + *
> + * Note: mon is unrefed in qemuMonitorUnwatch
> + */
> + qemuMonitorRef(mon);
> if ((mon->watch = virEventAddHandle(mon->fd,
> VIR_EVENT_HANDLE_HANGUP |
> VIR_EVENT_HANDLE_ERROR |
> @@ -678,10 +694,8 @@ qemuMonitorOpen(virDomainObjPtr vm,
> _("unable to register monitor events"));
> goto cleanup;
> }
> - qemuMonitorRef(mon);
Hmm, there are quite afew other code paths higher up in this
function which also do 'goto cleanup'. You've added an extra
qemuMonitorFree() call in 'cleanup:', but only some of the
places which jump there actually obtain an extra reference.
Is that really correct ?
Sorry lack of a call to qemuMonitorUnref(mon) if virEventAddHandle
fails.
>
> VIR_DEBUG("New mon %p fd =%d watch=%d", mon, mon->fd,
mon->watch);
> - qemuMonitorUnlock(mon);
>
> return mon;
>
> @@ -692,8 +706,8 @@ cleanup:
> * so kill the callbacks now.
> */
> mon->cb = NULL;
> - qemuMonitorUnlock(mon);
> qemuMonitorClose(mon);
> + qemuMonitorFree(mon);
> return NULL;
> }
>
> @@ -713,8 +727,8 @@ void qemuMonitorClose(qemuMonitorPtr mon)
> VIR_FORCE_CLOSE(mon->fd);
> }
>
> - if (qemuMonitorUnref(mon) > 0)
> - qemuMonitorUnlock(mon);
> + qemuMonitorUnlock(mon);
> + qemuMonitorFree(mon);
> }
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|