Some monitor commands may take a very long time to complete. It is
not desirable to block other incoming API calls forever. With this
change, if an existing API call is holding the job lock, additional
API calls will not wait forever. They will time out after a short
period of time, allowing application to retry later.
* include/libvirt/virterror.h, src/util/virterror.c: Add new
VIR_ERR_OPERATION_TIMEOUT error code
* src/qemu/qemu_driver.c: Change to a timed condition variable
wait for acquiring the monitor job lock
---
include/libvirt/virterror.h | 1 +
src/qemu/qemu_driver.c | 43 +++++++++++++++++++++++++++++++++++++------
src/util/virterror.c | 6 ++++++
3 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h
index fa5cac4..b7ed9e3 100644
--- a/include/libvirt/virterror.h
+++ b/include/libvirt/virterror.h
@@ -170,6 +170,7 @@ typedef enum {
VIR_WAR_NO_SECRET, /* failed to start secret storage */
VIR_ERR_INVALID_SECRET, /* invalid secret */
VIR_ERR_NO_SECRET, /* secret not found */
+ VIR_ERR_OPERATION_TIMEOUT, /* timeout occurred during operation */
} virErrorNumber;
/**
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index b7cde56..168dffb 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -156,18 +156,35 @@ static void qemuDomainObjPrivateFree(void *data)
* Upon successful return, the object will have its ref count increased,
* successful calls must be followed by EndJob eventually
*/
+/* Give up waiting for mutex after 30 seconds */
+//#define QEMU_JOB_WAIT_TIME (1000ull * 30)
+#define QEMU_JOB_WAIT_TIME (1000ull * 3)
static int qemuDomainObjBeginJob(virDomainObjPtr obj) ATTRIBUTE_RETURN_CHECK;
static int qemuDomainObjBeginJob(virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
+ struct timeval now;
+ unsigned long long then;
+
+ if (gettimeofday(&now, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("cannot get time of day"));
+ return -1;
+ }
+ then = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
+ then += QEMU_JOB_WAIT_TIME;
virDomainObjRef(obj);
while (priv->jobActive) {
- if (virCondWait(&priv->jobCond, &obj->lock) < 0) {
+ if (virCondWaitUntil(&priv->jobCond, &obj->lock, then) < 0) {
virDomainObjUnref(obj);
- virReportSystemError(NULL, errno,
- "%s", _("cannot acquire job
mutex"));
+ if (errno == ETIMEDOUT)
+ qemudReportError(NULL, NULL, NULL, VIR_ERR_OPERATION_TIMEOUT,
+ "%s", _("cannot acquire state change
lock"));
+ else
+ virReportSystemError(NULL, errno,
+ "%s", _("cannot acquire job
mutex"));
return -1;
}
}
@@ -188,15 +205,29 @@ static int qemuDomainObjBeginJobWithDriver(struct qemud_driver
*driver,
virDomainObjPtr obj)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
+ struct timeval now;
+ unsigned long long then;
+
+ if (gettimeofday(&now, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("cannot get time of day"));
+ return -1;
+ }
+ then = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
+ then += QEMU_JOB_WAIT_TIME;
virDomainObjRef(obj);
qemuDriverUnlock(driver);
while (priv->jobActive) {
- if (virCondWait(&priv->jobCond, &obj->lock) < 0) {
+ if (virCondWaitUntil(&priv->jobCond, &obj->lock, then) < 0) {
virDomainObjUnref(obj);
- virReportSystemError(NULL, errno,
- "%s", _("cannot acquire job
mutex"));
+ if (errno == ETIMEDOUT)
+ qemudReportError(NULL, NULL, NULL, VIR_ERR_OPERATION_TIMEOUT,
+ "%s", _("cannot acquire state change
lock"));
+ else
+ virReportSystemError(NULL, errno,
+ "%s", _("cannot acquire job
mutex"));
return -1;
}
}
diff --git a/src/util/virterror.c b/src/util/virterror.c
index 10f979c..3b3956d 100644
--- a/src/util/virterror.c
+++ b/src/util/virterror.c
@@ -1089,6 +1089,12 @@ virErrorMsg(virErrorNumber error, const char *info)
else
errmsg = _("Secret not found: %s");
break;
+ case VIR_ERR_OPERATION_TIMEOUT:
+ if (info == NULL)
+ errmsg = _("Timed out during operation");
+ else
+ errmsg = _("Timed out during operation: %s");
+ break;
}
return (errmsg);
}
--
1.6.2.5