There have been countless reports from users concerned about the following
error reported by libvirtd when qemu domains are shutdown
internal error: End of file from qemu monitor
While the error is harmless, users often mistaken it for real problem with
their deployments. EOF from the monitor can't be entirely ignored since
other threads may be using the monitor and must be able to detect the EOF
condition.
One potential fix is to delay reporting EOF until the monitor is used
after EOF is detected. This patch adds a 'goteof' member to the
qemuMonitor structure, which is set when EOF is detected on the monitor
socket. If another thread later tries to send data on the monitor, the
EOF error is reported.
Signed-off-by: Jim Fehlig <jfehlig(a)suse.com>
---
An RFC patch to squelch qemu monitor EOF error messages on VM shutdown.
Previous discussions and information on testing of the patch can be
found in this thread
https://listman.redhat.com/archives/libvir-list/2021-September/msg00949.html
src/qemu/qemu_monitor.c | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 5fc23f13d3..751ec8ba6c 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -113,6 +113,7 @@ struct _qemuMonitor {
/* true if qemu no longer wants 'props' sub-object of object-add */
bool objectAddNoWrap;
+ bool goteof;
};
/**
@@ -526,10 +527,10 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
{
qemuMonitor *mon = opaque;
bool error = false;
- bool eof = false;
bool hangup = false;
virObjectRef(mon);
+ mon->goteof = false;
/* lock access to the monitor and protect fd */
virObjectLock(mon);
@@ -544,7 +545,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
if (mon->lastError.code != VIR_ERR_OK) {
if (cond & (G_IO_HUP | G_IO_ERR))
- eof = true;
+ mon->goteof = true;
error = true;
} else {
if (cond & G_IO_OUT) {
@@ -562,7 +563,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
if (errno == ECONNRESET)
hangup = true;
} else if (got == 0) {
- eof = true;
+ mon->goteof = true;
} else {
/* Ignore hangup/error cond if we read some data, to
* give time for that data to be consumed */
@@ -575,22 +576,19 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
if (cond & G_IO_HUP) {
hangup = true;
- if (!error) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("End of file from qemu monitor"));
- eof = true;
- }
+ if (!error)
+ mon->goteof = true;
}
- if (!error && !eof &&
+ if (!error && !mon->goteof &&
cond & G_IO_ERR) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Invalid file descriptor while waiting for
monitor"));
- eof = true;
+ mon->goteof = true;
}
}
- if (error || eof) {
+ if (error || mon->goteof) {
if (hangup && mon->logFunc != NULL) {
/* Check if an error message from qemu is available and if so, use
* it to overwrite the actual message. It's done only in early
@@ -609,7 +607,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
/* Already have an error, so clear any new error */
virResetLastError();
} else {
- if (virGetLastErrorCode() == VIR_ERR_OK)
+ if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Error while processing monitor IO"));
virCopyLastError(&mon->lastError);
@@ -630,7 +628,7 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
/* We have to unlock to avoid deadlock against command thread,
* but is this safe ? I think it is, because the callback
* will try to acquire the virDomainObj *mutex next */
- if (eof) {
+ if (mon->goteof) {
qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
virDomainObj *vm = mon->vm;
@@ -949,6 +947,11 @@ qemuMonitorSend(qemuMonitor *mon,
virSetError(&mon->lastError);
return -1;
}
+ if (mon->goteof) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("End of file from qemu monitor"));
+ return -1;
+ }
mon->msg = msg;
qemuMonitorUpdateWatch(mon);
--
2.33.0