On Tue, 4 Jan 2022, Rohit Kumar wrote:
On 03/01/22 7:12 pm, Ani Sinha wrote:
>
> On Wed, 22 Dec 2021, Rohit Kumar wrote:
>
> > This patch is to determine the VM which had IO or socket hangup error.
> > Accessing directly vm->def->name inside qemuMonitorIO() or
> > qemuMonitorSend()
> > might leads to illegal access as we are out of 'vm' context and
vm->def
> > might
> > not exist. Adding a field "domainName" inside mon object to access vm
name
> > and initialising it when creating mon object.
> The commit message should desribe the purpose of the change, not how it is
> done. That can be easily understood reading the code. As for why the
> implementation follows a particular way, we can explain it in the code
> comments.
>
> This change adds the domain name in the error and debug logs during
> monitor IO processing so that we may infer which VM experienced those
> errors etc. This may help in debugging monitor IO errors. IMHO this
> decription is enough.
Thanks Ani, I will update the commit message in the next patch.
>
> > Signed-off-by: Rohit Kumar <rohit.kumar3(a)nutanix.com>
> > ---
> > diff to v1:
> > - Adding a field domainName inside _qemuMonitor struct for accessing vm
> > name
> > instead of directly accessing mon->vm->def->name.
> > - Link to v1:
> >
https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_a...
> > - Talked with peter on RFC and he suggested me to add a field inside
> > monitor struct to get VM name.
> >
> > src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
> > 1 file changed, 29 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> > index dda6ae9796..c3a0227795 100644
> > --- a/src/qemu/qemu_monitor.c
> > +++ b/src/qemu/qemu_monitor.c
> > @@ -80,6 +80,7 @@ struct _qemuMonitor {
> > GSource *watch;
> >
> > virDomainObj *vm;
> > + char *domainName;
> >
> Here we can explain why we are adding an extra member as opposed to
> referencing it directly.
Right, we can access it directly.
>
> > qemuMonitorCallbacks *cb;
> > void *callbackOpaque;
> > @@ -243,6 +244,7 @@ qemuMonitorDispose(void *obj)
> > virCondDestroy(&mon->notify);
> > g_free(mon->buffer);
> > g_free(mon->balloonpath);
> > + g_free(mon->domainName);
> > }
> >
> >
> > @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> > qemuMonitor *mon = opaque;
> > bool error = false;
> > bool hangup = false;
> > + virDomainObj *vm = NULL;
> > + char *vmName = NULL;
> >
> These initializations are redundant since ...
Acknowledged.
> > virObjectRef(mon);
> >
> > + vm = mon->vm;
> > + vmName = mon->domainName;
> you initialize them here anyway.
I will remove these in next patch.
>
> > +
> > /* lock access to the monitor and protect fd */
> > virObjectLock(mon);
> > #if DEBUG_IO
> > - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket,
cond);
> > + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s",
mon,
> > socket, cond, vm, NULLSTR(vmName));
> > #endif
> > if (mon->fd == -1 || !mon->watch) {
> > virObjectUnlock(mon);
> > @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> >
> > if (!error && !mon->goteof &&
> > cond & G_IO_ERR) {
> > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > - _("Invalid file descriptor while waiting for
> > monitor"));
> > + virReportError(VIR_ERR_INTERNAL_ERROR,
> > + _("%s: Invalid file descriptor while waiting
> > for monitor"), NULLSTR(vmName));
> As per
>
https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org_coding-2...
> , please
> enclose %s within quotes:
> _("'%s': Invalid file descriptor while waiting for monitor")
Ack. Thanks.
> > mon->goteof = true;
> > }
> > }
> > @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> > virResetLastError();
> > } else {
> > if (virGetLastErrorCode() == VIR_ERR_OK &&
!mon->goteof)
> > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > - _("Error while processing monitor
IO"));
> > + virReportError(VIR_ERR_INTERNAL_ERROR,
> > + _("%s: Error while processing monitor
> > IO"), NULLSTR(vmName));
> Ditto.
Ack.
> > virCopyLastError(&mon->lastError);
> > virResetLastError();
> > }
> >
> > - VIR_DEBUG("Error on monitor %s",
> > NULLSTR(mon->lastError.message));
> > + VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
> > + NULLSTR(mon->lastError.message), mon, vm,
> > NULLSTR(vmName));
> > /* If IO process resulted in an error & we have a message,
> > * then wakeup that waiter */
> > if (mon->msg && !mon->msg->finished) {
> > @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> > * will try to acquire the virDomainObj *mutex next */
> > if (mon->goteof) {
> > qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
> > - virDomainObj *vm = mon->vm;
> >
> > /* Make sure anyone waiting wakes up now */
> > virCondSignal(&mon->notify);
> > virObjectUnlock(mon);
> > - VIR_DEBUG("Triggering EOF callback");
> > + VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
> > + mon, vm, NULLSTR(vmName));
> > (eofNotify)(mon, vm, mon->callbackOpaque);
> > virObjectUnref(mon);
> > } else if (error) {
> > qemuMonitorErrorNotifyCallback errorNotify =
> > mon->cb->errorNotify;
> > - virDomainObj *vm = mon->vm;
> >
> > /* Make sure anyone waiting wakes up now */
> > virCondSignal(&mon->notify);
> > virObjectUnlock(mon);
> > - VIR_DEBUG("Triggering error callback");
> > + VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
> > + mon, vm, NULLSTR(vmName));
> > (errorNotify)(mon, vm, mon->callbackOpaque);
> > virObjectUnref(mon);
> > } else {
> > @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
> > mon->fd = fd;
> > mon->context = g_main_context_ref(context);
> > mon->vm = virObjectRef(vm);
> > + mon->domainName = g_strdup(vm->def->name);
> do not forget to g_free() this during cleanup in the same function.
So, in cleanup: qemuMonitorDispose is called. And there I have added g_free()
to clean mon->domainName.
No, in cleanup, I see qemuMonitorClose() is called where do you do not add
any additional code to free the allocation.
This is what I see in cleanup code:
```
cleanup:
/* We don't want the 'destroy' callback invoked during
* cleanup from construction failure, because that can
* give a double-unref on virDomainObj *in the caller,
* so kill the callbacks now.
*/
mon->cb = NULL;
/* The caller owns 'fd' on failure */
mon->fd = -1;
qemuMonitorClose(mon);
return NULL;
```
>
> > mon->waitGreeting = true;
> > mon->cb = cb;
> > mon->callbackOpaque = opaque;
> > @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
> > qemuMonitorMessage *msg)
> > {
> > int ret = -1;
> > + virDomainObj *vm = mon->vm;
> > + char *vmName = mon->domainName;
> >
> > /* Check whether qemu quit unexpectedly */
> > if (mon->lastError.code != VIR_ERR_OK) {
> > - VIR_DEBUG("Attempt to send command while error is set %s",
> > - NULLSTR(mon->lastError.message));
> > + VIR_DEBUG("Attempt to send command while error is set %s mon=%p
> > vm=%p name=%s",
> > + NULLSTR(mon->lastError.message), mon, vm,
> > NULLSTR(vmName));
> > virSetError(&mon->lastError);
> > return -1;
> > }
> > if (mon->goteof) {
> > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > - _("End of file from qemu monitor"));
> > + virReportError(VIR_ERR_INTERNAL_ERROR,
> > + _("%s: End of file from qemu monitor"),
> > NULLSTR(vmName));
> ditto re error string format.
Ack.
>
> > return -1;
> > }
> >
> > @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
> >
> > while (!mon->msg->finished) {
> > if (virCondWait(&mon->notify, &mon->parent.lock) <
0) {
> > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > - _("Unable to wait on monitor
condition"));
> > + virReportError(VIR_ERR_INTERNAL_ERROR,
> > + _("%s: Unable to wait on monitor
condition"),
> > NULLSTR(vmName));
> ditto as above.
Ack.
> > goto cleanup;
> > }
> > }
> >
> > if (mon->lastError.code != VIR_ERR_OK) {
> > - VIR_DEBUG("Send command resulted in error %s",
> > - NULLSTR(mon->lastError.message));
> > + VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p
> > name=%s",
> > + NULLSTR(mon->lastError.message), mon, vm,
> > NULLSTR(vmName));
> > virSetError(&mon->lastError);
> > goto cleanup;
> > }
> > --
> > 2.25.1
> >
> >