
On Wed, Dec 22, 2021 at 22:39:21 -0800, Rohit Kumar wrote:
This patch is to determine the VM which had IO or socket hangup error. Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend() might leads to illegal access as we are out of 'vm' context and vm->def might not exist. Adding a field "domainName" inside mon object to access vm name and initialising it when creating mon object.
Signed-off-by: Rohit Kumar <rohit.kumar3@nutanix.com> --- diff to v1: - Adding a field domainName inside _qemuMonitor struct for accessing vm name instead of directly accessing mon->vm->def->name. - Link to v1: https://listman.redhat.com/archives/libvir-list/2021-December/msg00217.html - Talked with peter on RFC and he suggested me to add a field inside monitor struct to get VM name.
src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 18 deletions(-)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index dda6ae9796..c3a0227795 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c
[...]
@@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, qemuMonitor *mon = opaque; bool error = false; bool hangup = false; + virDomainObj *vm = NULL; + char *vmName = NULL;
These local variables are not very useful, you can access 'mon' directly without the need for this alias ...
virObjectRef(mon);
+ vm = mon->vm; + vmName = mon->domainName; + /* lock access to the monitor and protect fd */ virObjectLock(mon); #if DEBUG_IO - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond); + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName)); #endif
There's no much point in adding to thhese debug statements (inside DEBUG_IO) as they are not compiled by default. In fact I'll propose a removal of those.
if (mon->fd == -1 || !mon->watch) { virObjectUnlock(mon); @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
if (!error && !mon->goteof && cond & G_IO_ERR) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Invalid file descriptor while waiting for monitor")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));
Do not prefix the error message by the VM name, but rather put it at the end or inside: Invalid file descriptor while waiting for monitor (vm='%s') or smilar.
mon->goteof = true; } } @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, virResetLastError(); } else { if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof) - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Error while processing monitor IO")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("%s: Error while processing monitor IO"), NULLSTR(vmName));
Same here. Additionally did you ever get to a situation where vmName would be NULL?
virCopyLastError(&mon->lastError); virResetLastError(); }
- VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message)); + VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s", + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
This is a good example.
/* If IO process resulted in an error & we have a message, * then wakeup that waiter */ if (mon->msg && !mon->msg->finished) { @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, * will try to acquire the virDomainObj *mutex next */ if (mon->goteof) { qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify; - virDomainObj *vm = mon->vm;
You can remove this local when you are accessing it directly.
/* Make sure anyone waiting wakes up now */ virCondSignal(&mon->notify); virObjectUnlock(mon); - VIR_DEBUG("Triggering EOF callback"); + VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s", + mon, vm, NULLSTR(vmName)); (eofNotify)(mon, vm, mon->callbackOpaque); virObjectUnref(mon); } else if (error) { qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify; - virDomainObj *vm = mon->vm;
/* Make sure anyone waiting wakes up now */ virCondSignal(&mon->notify); virObjectUnlock(mon); - VIR_DEBUG("Triggering error callback"); + VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s", + mon, vm, NULLSTR(vmName)); (errorNotify)(mon, vm, mon->callbackOpaque); virObjectUnref(mon); } else { @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm, mon->fd = fd; mon->context = g_main_context_ref(context); mon->vm = virObjectRef(vm); + mon->domainName = g_strdup(vm->def->name);
As noted above, did you ever run into a situation where this would be NULL? That would seem weird.
mon->waitGreeting = true; mon->cb = cb; mon->callbackOpaque = opaque; @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon, qemuMonitorMessage *msg) { int ret = -1; + virDomainObj *vm = mon->vm; + char *vmName = mon->domainName;
Avoid these.
/* Check whether qemu quit unexpectedly */ if (mon->lastError.code != VIR_ERR_OK) { - VIR_DEBUG("Attempt to send command while error is set %s", - NULLSTR(mon->lastError.message)); + VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s", + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName)); virSetError(&mon->lastError); return -1; } if (mon->goteof) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("End of file from qemu monitor")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("%s: End of file from qemu monitor"), NULLSTR(vmName));
No prefixing.
return -1; }
@@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
while (!mon->msg->finished) { if (virCondWait(&mon->notify, &mon->parent.lock) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Unable to wait on monitor condition")); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));
Same here.
goto cleanup; } }
if (mon->lastError.code != VIR_ERR_OK) { - VIR_DEBUG("Send command resulted in error %s", - NULLSTR(mon->lastError.message)); + VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s", + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName)); virSetError(&mon->lastError); goto cleanup; } -- 2.25.1