On 03/01/22 10:12 pm, Peter Krempa wrote:
On Wed, Dec 22, 2021 at 22:39:21 -0800, Rohit Kumar wrote:
> This patch is to determine the VM which had IO or socket hangup error.
> Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
> might leads to illegal access as we are out of 'vm' context and vm->def
might
> not exist. Adding a field "domainName" inside mon object to access vm name
> and initialising it when creating mon object.
>
> Signed-off-by: Rohit Kumar <rohit.kumar3(a)nutanix.com>
> ---
> diff to v1:
> - Adding a field domainName inside _qemuMonitor struct for accessing vm name
> instead of directly accessing mon->vm->def->name.
> - Link to v1:
https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_a...
> - Talked with peter on RFC and he suggested me to add a field inside
> monitor struct to get VM name.
>
> src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
> 1 file changed, 29 insertions(+), 18 deletions(-)
>
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index dda6ae9796..c3a0227795 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
[...]
> @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> qemuMonitor *mon = opaque;
> bool error = false;
> bool hangup = false;
> + virDomainObj *vm = NULL;
> + char *vmName = NULL;
These local variables are not very useful, you can access 'mon' directly
without the need for this alias ...
Thanks for the review Peter. I will update it
in the next patch.
>
> virObjectRef(mon);
>
> + vm = mon->vm;
> + vmName = mon->domainName;
> +
> /* lock access to the monitor and protect fd */
> virObjectLock(mon);
> #if DEBUG_IO
> - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
> + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon,
socket, cond, vm, NULLSTR(vmName));
> #endif
There's no much point in adding to thhese debug statements (inside
DEBUG_IO) as they are not compiled by default. In fact I'll propose a
removal of those.
Okay. I will remove these from here.
> if (mon->fd == -1 || !mon->watch) {
> virObjectUnlock(mon);
> @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>
> if (!error && !mon->goteof &&
> cond & G_IO_ERR) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("Invalid file descriptor while waiting for
monitor"));
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("%s: Invalid file descriptor while waiting for
monitor"), NULLSTR(vmName));
Do not prefix the error message by the VM name, but rather put it at the
end or inside:
Invalid file descriptor while waiting for monitor (vm='%s')
or smilar.
Acknowledge. Will update in next patch. Thanks.
> mon->goteof = true;
> }
> }
> @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> virResetLastError();
> } else {
> if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("Error while processing monitor IO"));
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("%s: Error while processing monitor IO"),
NULLSTR(vmName));
Same here. Additionally did you ever get to a situation where vmName
would be NULL?
There might be a situation when g_strdup() fails to allocate vm
name,
for e.g. if host ran out of memory ?
Let me know your thoughts on this, I would be happy to remove NULLSTR if
it's not the case.
> virCopyLastError(&mon->lastError);
> virResetLastError();
> }
>
> - VIR_DEBUG("Error on monitor %s",
NULLSTR(mon->lastError.message));
> + VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
This is a good example.
Okay.
> /* If IO process resulted in an error & we have a message,
> * then wakeup that waiter */
> if (mon->msg && !mon->msg->finished) {
> @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
> * will try to acquire the virDomainObj *mutex next */
> if (mon->goteof) {
> qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
> - virDomainObj *vm = mon->vm;
You can remove this local when you are accessing it directly.
Sure. Ack.
>
> /* Make sure anyone waiting wakes up now */
> virCondSignal(&mon->notify);
> virObjectUnlock(mon);
> - VIR_DEBUG("Triggering EOF callback");
> + VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
> + mon, vm, NULLSTR(vmName));
> (eofNotify)(mon, vm, mon->callbackOpaque);
> virObjectUnref(mon);
> } else if (error) {
> qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
> - virDomainObj *vm = mon->vm;
>
> /* Make sure anyone waiting wakes up now */
> virCondSignal(&mon->notify);
> virObjectUnlock(mon);
> - VIR_DEBUG("Triggering error callback");
> + VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
> + mon, vm, NULLSTR(vmName));
> (errorNotify)(mon, vm, mon->callbackOpaque);
> virObjectUnref(mon);
> } else {
> @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
> mon->fd = fd;
> mon->context = g_main_context_ref(context);
> mon->vm = virObjectRef(vm);
> + mon->domainName = g_strdup(vm->def->name);
As noted above, did you ever run into a situation where this would be
NULL? That would seem weird.
My concern was with g_strdup() allocation failing. Let
me know if that
isn't the case.
> mon->waitGreeting = true;
> mon->cb = cb;
> mon->callbackOpaque = opaque;
> @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
> qemuMonitorMessage *msg)
> {
> int ret = -1;
> + virDomainObj *vm = mon->vm;
> + char *vmName = mon->domainName;
Avoid these.
Ack.
>
> /* Check whether qemu quit unexpectedly */
> if (mon->lastError.code != VIR_ERR_OK) {
> - VIR_DEBUG("Attempt to send command while error is set %s",
> - NULLSTR(mon->lastError.message));
> + VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p
name=%s",
> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
> virSetError(&mon->lastError);
> return -1;
> }
> if (mon->goteof) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("End of file from qemu monitor"));
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("%s: End of file from qemu monitor"),
NULLSTR(vmName));
No prefixing.
Ack.
> return -1;
> }
>
> @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
>
> while (!mon->msg->finished) {
> if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("Unable to wait on monitor condition"));
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("%s: Unable to wait on monitor condition"),
NULLSTR(vmName));
Same here.
Ack.
Thanks, I will update these in the next patch.
> goto cleanup;
> }
> }
>
> if (mon->lastError.code != VIR_ERR_OK) {
> - VIR_DEBUG("Send command resulted in error %s",
> - NULLSTR(mon->lastError.message));
> + VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p
name=%s",
> + NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
> virSetError(&mon->lastError);
> goto cleanup;
> }
> --
> 2.25.1
>