Previously when a domain would get stuck in a domain job due to a
programming mistake we'd report the following control state:
$ virsh domcontrol domain
occupied (1424343406.150s)
The timestamp is invalid as the monitor was not entered for that domain.
We can use that to detect that the domain has an active job and report a
better error instead:
$ virsh domcontrol domain
error: internal (locking) error
---
include/libvirt/libvirt-domain.h | 27 +++++++++++++++++++++++++--
src/qemu/qemu_driver.c | 15 +++++++++++++--
tools/virsh-domain-monitor.c | 19 +++++++++++++++++++
3 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 4dbd7f5..a9d3efd 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -182,7 +182,9 @@ typedef enum {
monitored by virDomainGetJobInfo); only
limited set of commands may be allowed */
VIR_DOMAIN_CONTROL_OCCUPIED = 2, /* occupied by a running command */
- VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully operated */
+ VIR_DOMAIN_CONTROL_ERROR = 3, /* unusable, domain cannot be fully
+ operated, possible reason is provided
+ in the details field */
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_CONTROL_LAST
@@ -190,6 +192,26 @@ typedef enum {
} virDomainControlState;
/**
+ * virDomainControlErrorReason:
+ *
+ * Reason for the error state.
+ */
+typedef enum {
+ VIR_DOMAIN_CONTROL_ERROR_REASON_NONE = 0, /* server didn't provide a
+ reason */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_UNKNOWN = 1, /* unknown reason for the
+ error */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR = 2, /* monitor connection is
+ broken */
+ VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL = 3, /* error caused due to
+ internal failure in libvirt
+ */
+# ifdef VIR_ENUM_SENTINELS
+ VIR_DOMAIN_CONTROL_ERROR_REASON_LAST
+# endif
+} virDomainControlErrorReason;
+
+/**
* virDomainControlInfo:
*
* Structure filled in by virDomainGetControlInfo and providing details about
@@ -198,7 +220,8 @@ typedef enum {
typedef struct _virDomainControlInfo virDomainControlInfo;
struct _virDomainControlInfo {
unsigned int state; /* control state, one of virDomainControlState */
- unsigned int details; /* state details, currently 0 */
+ unsigned int details; /* state details, currently 0 except for ERROR
+ state (one of virDomainControlErrorReason) */
unsigned long long stateTime; /* for how long (in msec) control interface
has been in current state (except for OK
and ERROR states) */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 8f0cf2b..dda9ab7 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2690,6 +2690,7 @@ qemuDomainGetControlInfo(virDomainPtr dom,
if (priv->monError) {
info->state = VIR_DOMAIN_CONTROL_ERROR;
+ info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_MONITOR;
} else if (priv->job.active) {
if (virTimeMillisNow(&info->stateTime) < 0)
goto cleanup;
@@ -2697,8 +2698,18 @@ qemuDomainGetControlInfo(virDomainPtr dom,
info->state = VIR_DOMAIN_CONTROL_JOB;
info->stateTime -= priv->job.current->started;
} else {
- info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
- info->stateTime -= priv->monStart;
+ if (priv->monStart > 0) {
+ info->state = VIR_DOMAIN_CONTROL_OCCUPIED;
+ info->stateTime -= priv->monStart;
+ } else {
+ /* At this point the domain has an active job, but monitor was
+ * not entered and the domain object lock is not held thus we
+ * are stuck in the job forever due to a programming error.
+ */
+ info->state = VIR_DOMAIN_CONTROL_ERROR;
+ info->details = VIR_DOMAIN_CONTROL_ERROR_REASON_INTERNAL;
+ info->stateTime = 0;
+ }
}
} else {
info->state = VIR_DOMAIN_CONTROL_OK;
diff --git a/tools/virsh-domain-monitor.c b/tools/virsh-domain-monitor.c
index 925eb1b..f774d30 100644
--- a/tools/virsh-domain-monitor.c
+++ b/tools/virsh-domain-monitor.c
@@ -128,6 +128,21 @@ vshDomainControlStateToString(int state)
return str ? _(str) : _("unknown");
}
+VIR_ENUM_DECL(vshDomainControlErrorReason)
+VIR_ENUM_IMPL(vshDomainControlErrorReason,
+ VIR_DOMAIN_CONTROL_ERROR_REASON_LAST,
+ "",
+ N_("unknown"),
+ N_("monitor failure"),
+ N_("internal (locking) error"))
+
+static const char *
+vshDomainControlErrorReasonToString(int reason)
+{
+ const char *ret = vshDomainControlErrorReasonTypeToString(reason);
+ return ret ? _(ret) : _("unknown");
+}
+
VIR_ENUM_DECL(vshDomainState)
VIR_ENUM_IMPL(vshDomainState,
VIR_DOMAIN_LAST,
@@ -814,6 +829,10 @@ cmdDomControl(vshControl *ctl, const vshCmd *cmd)
vshPrint(ctl, "%s (%0.3fs)\n",
vshDomainControlStateToString(info.state),
info.stateTime / 1000.0);
+ } else if (info.state == VIR_DOMAIN_CONTROL_ERROR && info.details > 0) {
+ vshPrint(ctl, "%s: %s\n",
+ vshDomainControlStateToString(info.state),
+ vshDomainControlErrorReasonToString(info.details));
} else {
vshPrint(ctl, "%s\n",
vshDomainControlStateToString(info.state));
--
2.2.2