If the QEMU process has been stopped (kill -STOP/gdb), or the
QEMU process has live-locked itself, then we will never get a
reply from the monitor. We should not wait forever in this
case, but instead timeout after a reasonable amount of time.
NB if the host has high CPU load, or a single monitor command
intentionally takes a long time, then this will cause bogus
failures. In the case of high CPU load, arguably the guest
should have been migrated elsewhere, since you can't effectively
manage guests on a host if QEMU is taking > 30 seconds to reply
to simply commands. Since we use background migration, there
should not be any commands which take significant time to
execute any more
* src/qemu/qemu_monitor.c: Timeout waiting for reply after 30 seconds
---
src/qemu/qemu_monitor.c | 21 ++++++++++++++++++---
1 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 89a3f64..d9b6600 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -781,10 +781,19 @@ char *qemuMonitorNextCommandID(qemuMonitorPtr mon)
}
+/* Give up waiting for reply after 30 seconds */
+#define QEMU_MONITOR_WAIT_TIME (1000ull * 30)
+
int qemuMonitorSend(qemuMonitorPtr mon,
qemuMonitorMessagePtr msg)
{
int ret = -1;
+ unsigned long long now;
+ unsigned long long then;
+
+ if (virTimeMs(&now) < 0)
+ return -1;
+ then = now + QEMU_MONITOR_WAIT_TIME;
/* Check whether qemu quited unexpectedly */
if (mon->lastError.code != VIR_ERR_OK) {
@@ -798,9 +807,15 @@ int qemuMonitorSend(qemuMonitorPtr mon,
qemuMonitorUpdateWatch(mon);
while (!mon->msg->finished) {
- if (virCondWait(&mon->notify, &mon->lock) < 0) {
- qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Unable to wait on monitor condition"));
+ if (virCondWaitUntil(&mon->notify, &mon->lock, then) < 0) {
+ if (errno == ETIMEDOUT)
+ qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
+ "%s", _("no reply received from
qemu"));
+ else
+ virReportSystemError(errno,
+ "%s", _("cannot wait on monitor
condition"));
+ /* Ensure no further monitor commands can be run */
+ virCopyLastError(&mon->lastError);
goto cleanup;
}
}
--
1.7.4.4