On 11/13/19 11:06 PM, Jonathon Jongsma wrote:
Some layered products such as oVirt have requested a way to avoid
being
blocked by guest agent commands when querying a loaded vm. For example,
many guest agent commands are polled periodically to monitor changes,
and rather than blocking the calling process, they'd prefer to simply
time out when an agent query is taking too long.
This patch adds a way for the user to specify a custom agent timeout
that is applied to all agent commands.
One special case to note here is the 'guest-sync' command. 'guest-sync'
is issued internally prior to calling any other command. (For example,
when libvirt wants to call 'guest-get-fsinfo', we first call
'guest-sync' and then call 'guest-get-fsinfo').
Previously, the 'guest-sync' command used a 5-second timeout
(VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that
followed always blocked indefinitely
(VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a
custom timeout is specified that is shorter than
5 seconds, this new timeout is also used for 'guest-sync'. If there is
no custom timeout or if the custom timeout is longer than 5 seconds, we
will continue to use the 5-second timeout.
Signed-off-by: Jonathon Jongsma <jjongsma(a)redhat.com>
---
Changes in v3:
- Changed enum names per Daniel's suggestion
- incorporated Michal's review and fixup patches
- As discussed on the mailing list, this patch does not acquire any jobs, but
simply uses mutex locking for data integrity
- libvirt release versions updated
- format and parse private agentTimeout data in status xml
include/libvirt/libvirt-domain.h | 10 +++
include/libvirt/libvirt-qemu.h | 8 +--
src/driver-hypervisor.h | 6 ++
src/libvirt-domain.c | 49 +++++++++++++
src/libvirt_public.syms | 5 ++
src/qemu/qemu_agent.c | 70 ++++++++++---------
src/qemu/qemu_agent.h | 3 +
src/qemu/qemu_domain.c | 6 ++
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_driver.c | 47 +++++++++++++
src/remote/remote_driver.c | 1 +
src/remote/remote_protocol.x | 18 ++++-
src/remote_protocol-structs | 9 +++
.../blockjob-blockdev-in.xml | 1 +
.../blockjob-mirror-in.xml | 1 +
.../disk-secinfo-upgrade-out.xml | 1 +
.../migration-in-params-in.xml | 1 +
.../migration-out-nbd-out.xml | 1 +
.../migration-out-nbd-tls-out.xml | 1 +
.../migration-out-params-in.xml | 1 +
tests/qemustatusxml2xmldata/modern-in.xml | 1 +
.../qemustatusxml2xmldata/vcpus-multi-in.xml | 1 +
tools/virsh-domain.c | 52 ++++++++++++++
23 files changed, 257 insertions(+), 37 deletions(-)
Close enough :-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 54dde34f15..86358341d5 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -2093,6 +2093,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
if (!(priv->dbusVMStates = virHashCreate(5, dbusVMStateHashFree)))
goto error;
+ /* agent commands block by default, user can choose different behavior */
+ priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
priv->driver = opaque;
@@ -2875,6 +2877,8 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf,
if (qemuDomainObjPrivateXMLFormatSlirp(buf, vm) < 0)
return -1;
+ virBufferAsprintf(buf, "<agentTimeout>%i</agentTimeout>\n",
priv->agentTimeout);
+
return 0;
}
@@ -3672,6 +3676,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) ==
1;
+ virXPathInt("string(./agentTimeout)", ctxt, &priv->agentTimeout);
This is not safe. If the value in status XML is not valid number,
ignoring the error is not good.
+
return 0;
error:
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index ab00b25789..98a9540275 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -293,6 +293,7 @@ struct _qemuDomainObjPrivate {
virDomainChrSourceDefPtr monConfig;
bool monError;
unsigned long long monStart;
+ int agentTimeout;
qemuAgentPtr agent;
bool agentError;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c969a3d463..77a26ccf2e 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -22666,6 +22666,52 @@ qemuDomainGetGuestInfo(virDomainPtr dom,
return ret;
}
+
+static int
+qemuDomainAgentSetResponseTimeout(virDomainPtr dom,
+ int timeout,
+ unsigned int flags)
+{
+ virDomainObjPtr vm = NULL;
+ int ret = -1;
+
+ virCheckFlags(0, -1);
+
+ if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) {
+ virReportError(VIR_ERR_INVALID_ARG,
+ _("guest agent timeout '%d' is "
+ "less than the minimum '%d'"),
+ timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN);
+ return -1;
+ }
+
+ if (!(vm = qemuDomainObjFromDomain(dom)))
+ return -1;
+
+ if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm->def) < 0)
+ goto cleanup;
+
+ /* If domain has an agent, change its timeout. Otherwise just save the
+ * request so that we can set the timeout when the agent appears */
+ if (qemuDomainAgentAvailable(vm, false)) {
+ /* We don't need to acquire a job since we're not interacting with the
+ * agent or the qemu monitor. We're only setting a struct member, so
+ * just acquire the mutex lock. Worst case, any in-process agent
+ * commands will use the newly-set agent timeout. */
+ virObjectLock(QEMU_DOMAIN_PRIVATE(vm)->agent);
+ qemuAgentSetResponseTimeout(QEMU_DOMAIN_PRIVATE(vm)->agent, timeout);
+ virObjectUnlock(QEMU_DOMAIN_PRIVATE(vm)->agent);
+ }
+
+ QEMU_DOMAIN_PRIVATE(vm)->agentTimeout = timeout;
This is the point where we need to save status XML if the domain is running.
+ ret = 0;
+
+ cleanup:
+ virDomainObjEndAPI(&vm);
+ return ret;
+}
+
+
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 0feb21ef17..def4107d27 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -9832,6 +9832,52 @@ cmdQemuAgentCommand(vshControl *ctl, const vshCmd *cmd)
return ret;
}
+/*
+ * "guest-agent-timeout" command
+ */
+static const vshCmdInfo info_guest_agent_timeout[] = {
+ {.name = "help",
+ .data = N_("Set the guest agent timeout")
+ },
+ {.name = "desc",
+ .data = N_("Set the number of seconds to wait for a response from the guest
agent.")
+ },
+ {.name = NULL}
+};
+
+static const vshCmdOptDef opts_guest_agent_timeout[] = {
+ VIRSH_COMMON_OPT_DOMAIN_FULL(0),
+ {.name = "timeout",
+ .type = VSH_OT_INT,
+ .flags = VSH_OFLAG_REQ_OPT,
+ .help = N_("timeout seconds.")
+ },
+ {.name = NULL}
+};
+
+static bool
+cmdGuestAgentTimeout(vshControl *ctl, const vshCmd *cmd)
+{
+ virDomainPtr dom = NULL;
+ int timeout;
+ const unsigned int flags = 0;
+ bool ret = false;
+
+ dom = virshCommandOptDomain(ctl, cmd, NULL);
+ if (dom == NULL)
+ goto cleanup;
+
+ if (vshCommandOptInt(ctl, cmd, "timeout", &timeout) < 0)
+ goto cleanup;
+
+ if (virDomainAgentSetResponseTimeout(dom, timeout, flags) < 0)
+ goto cleanup;
+
+ ret = true;
+ cleanup:
+ virshDomainFree(dom);
+ return ret;
+}
/*
* "lxc-enter-namespace" namespace
@@ -14492,6 +14538,12 @@ const vshCmdDef domManagementCmds[] = {
.info = info_qemu_agent_command,
.flags = 0
},
+ {.name = "guest-agent-timeout",
+ .handler = cmdGuestAgentTimeout,
+ .opts = opts_guest_agent_timeout,
+ .info = info_guest_agent_timeout,
+ .flags = 0
+ },
{.name = "reboot",
.handler = cmdReboot,
.opts = opts_reboot,
We need to document new virsh command.
I'm fixing all the isues I've raised, ACKing and pushing. Can you please
post a follow up patch that updates news.xml? I thinkg it's worth
recording in the release notes.
Reviewed-by: Michal Privoznik <mprivozn(a)redhat.com>
Michal