[libvirt] [PATCH 0/8] Don't hold both monitor and agent jobs at the same time

We have to assume that the guest agent may be malicious, so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job and an agent job while we're querying the agent, any other threads will be blocked from using the monitor while the agent is unresponsive. Because libvirt waits forever for an agent response, this makes us vulnerable to a denial of service from a malicious (or simply buggy) guest agent. This series of patches attempts to remove any cases where we were holding both jobs at the same time, removes a convenience function which allows us to grab both jobs at once, and updates documentation regarding this issue. Jonathon Jongsma (8): qemu: don't take agent and monitor job for shutdown qemu: don't hold a monitor and agent job for reboot qemu: don't hold both jobs for suspend qemu: don't hold monitor and agent job when setting time qemu: don't hold monitor job for fsinfo qemu: don't hold monitor job for GetGuestInfo() qemu: remove use of qemuDomainObjBeginJobWithAgent() qemu: remove qemuDomainObjBegin/EndJobWithAgent() src/qemu/THREADS.txt | 58 +----- src/qemu/qemu_domain.c | 56 +----- src/qemu/qemu_domain.h | 7 - src/qemu/qemu_driver.c | 405 +++++++++++++++++++++++++---------------- 4 files changed, 258 insertions(+), 268 deletions(-) -- 2.21.0

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. So split the function into separate parts: one that does the agent shutdown and one that does the monitor shutdown. Each part holds only a job of the appropriate type. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 116 +++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 1911073f3e..92efde72dd 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1929,6 +1929,72 @@ static int qemuDomainResume(virDomainPtr dom) return ret; } +static int qemuDomainShutdownFlagsAgent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool isReboot, + bool reportError) +{ + int ret = -1; + qemuAgentPtr agent; + int agentFlag = isReboot ? QEMU_AGENT_SHUTDOWN_REBOOT : + QEMU_AGENT_SHUTDOWN_POWERDOWN; + + if (qemuDomainObjBeginAgentJob(driver, vm, + QEMU_AGENT_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + if (!qemuDomainAgentAvailable(vm, reportError)) + goto endjob; + + qemuDomainSetFakeReboot(driver, vm, false); + agent = qemuDomainObjEnterAgent(vm); + ret = qemuAgentShutdown(agent, agentFlag); + qemuDomainObjExitAgent(vm, agent); + + endjob: + qemuDomainObjEndAgentJob(vm); + + cleanup: + return ret; +} + +static int qemuDomainShutdownFlagsMonitor(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool isReboot) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv; + + priv = vm->privateData; + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + qemuDomainSetFakeReboot(driver, vm, isReboot); + qemuDomainObjEnterMonitor(driver, vm); + ret = qemuMonitorSystemPowerdown(priv->mon); + if (qemuDomainObjExitMonitor(driver, vm) < 0) + ret = -1; + + endjob: + qemuDomainObjEndJob(driver, vm); + + cleanup: + return ret; +} + static int qemuDomainShutdownFlags(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; @@ -1938,8 +2004,6 @@ static int qemuDomainShutdownFlags(virDomainPtr dom, unsigned int flags) bool useAgent = false, agentRequested, acpiRequested; bool isReboot = false; bool agentForced; - qemuDomainAgentJob agentJob = QEMU_AGENT_JOB_NONE; - int agentFlag = QEMU_AGENT_SHUTDOWN_POWERDOWN; virCheckFlags(VIR_DOMAIN_SHUTDOWN_ACPI_POWER_BTN | VIR_DOMAIN_SHUTDOWN_GUEST_AGENT, -1); @@ -1950,7 +2014,6 @@ static int qemuDomainShutdownFlags(virDomainPtr dom, unsigned int flags) if (vm->def->onPoweroff == VIR_DOMAIN_LIFECYCLE_ACTION_RESTART || vm->def->onPoweroff == VIR_DOMAIN_LIFECYCLE_ACTION_RESTART_RENAME) { isReboot = true; - agentFlag = QEMU_AGENT_SHUTDOWN_REBOOT; VIR_INFO("Domain on_poweroff setting overridden, attempting reboot"); } @@ -1965,62 +2028,27 @@ static int qemuDomainShutdownFlags(virDomainPtr dom, unsigned int flags) if (virDomainShutdownFlagsEnsureACL(dom->conn, vm->def, flags) < 0) goto cleanup; - if (useAgent) - agentJob = QEMU_AGENT_JOB_MODIFY; - - if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_MODIFY, - agentJob) < 0) - goto cleanup; - - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) { - virReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - goto endjob; - } - agentForced = agentRequested && !acpiRequested; - if (!qemuDomainAgentAvailable(vm, agentForced)) { - if (agentForced) - goto endjob; - useAgent = false; - } - - if (useAgent) { - qemuAgentPtr agent; - qemuDomainSetFakeReboot(driver, vm, false); - agent = qemuDomainObjEnterAgent(vm); - ret = qemuAgentShutdown(agent, agentFlag); - qemuDomainObjExitAgent(vm, agent); + ret = qemuDomainShutdownFlagsAgent(driver, vm, isReboot, agentForced); + if (ret < 0 && agentForced) + goto cleanup; } /* If we are not enforced to use just an agent, try ACPI * shutdown as well in case agent did not succeed. */ - if (!useAgent || - (ret < 0 && (acpiRequested || !flags))) { - + if (!useAgent || (ret < 0 && (acpiRequested || !flags))) { /* Even if agent failed, we have to check if guest went away * by itself while our locks were down. */ if (useAgent && !virDomainObjIsActive(vm)) { ret = 0; - goto endjob; + goto cleanup; } - qemuDomainSetFakeReboot(driver, vm, isReboot); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorSystemPowerdown(priv->mon); - if (qemuDomainObjExitMonitor(driver, vm) < 0) - ret = -1; + ret = qemuDomainShutdownFlagsMonitor(driver, vm, isReboot); } - endjob: - if (agentJob) - qemuDomainObjEndJobWithAgent(driver, vm); - else - qemuDomainObjEndJob(driver, vm); - cleanup: virDomainObjEndAPI(&vm); return ret; -- 2.21.0

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. So split the function into separate parts: one that does the agent shutdown and one that does the monitor shutdown. Each part holds only a job of the appropriate type.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 116 +++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 44 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 1911073f3e..92efde72dd 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1929,6 +1929,72 @@ static int qemuDomainResume(virDomainPtr dom) return ret; }
+static int qemuDomainShutdownFlagsAgent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool isReboot, + bool reportError)
Nitpick, new functions should be written as static int qemuDomainBlahBlah() { } Michal

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. Split the function so that we only hold the appropriate type of job while rebooting. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 111 +++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 43 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 92efde72dd..edd36f4a89 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2059,6 +2059,69 @@ static int qemuDomainShutdown(virDomainPtr dom) return qemuDomainShutdownFlags(dom, 0); } +static int +qemuDomainRebootAgent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool isReboot, + bool agentForced) +{ + qemuAgentPtr agent; + int ret = -1; + int agentFlag = QEMU_AGENT_SHUTDOWN_REBOOT; + + if (!isReboot) + agentFlag = QEMU_AGENT_SHUTDOWN_POWERDOWN; + + if (qemuDomainObjBeginAgentJob(driver, vm, + QEMU_AGENT_JOB_MODIFY) < 0) + return -1; + + if (!qemuDomainAgentAvailable(vm, agentForced)) + goto endjob; + + if (virDomainObjCheckActive(vm) < 0) + goto endjob; + + qemuDomainSetFakeReboot(driver, vm, false); + agent = qemuDomainObjEnterAgent(vm); + ret = qemuAgentShutdown(agent, agentFlag); + qemuDomainObjExitAgent(vm, agent); + + endjob: + qemuDomainObjEndAgentJob(vm); + return ret; +} + +static int +qemuDomainRebootMonitor(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool isReboot) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int ret = -1; + + if (qemuDomainObjBeginJob(driver, vm, + QEMU_JOB_MODIFY) < 0) + return -1; + + if (virDomainObjCheckActive(vm) < 0) + goto endjob; + +#if !WITH_YAJL + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("ACPI reboot is not supported without the JSON monitor")); + goto endjob; +#endif + qemuDomainSetFakeReboot(driver, vm, isReboot); + qemuDomainObjEnterMonitor(driver, vm); + ret = qemuMonitorSystemPowerdown(priv->mon); + if (qemuDomainObjExitMonitor(driver, vm) < 0) + ret = -1; + + endjob: + qemuDomainObjEndJob(driver, vm); + return ret; +} static int qemuDomainReboot(virDomainPtr dom, unsigned int flags) @@ -2070,8 +2133,6 @@ qemuDomainReboot(virDomainPtr dom, unsigned int flags) bool useAgent = false, agentRequested, acpiRequested; bool isReboot = true; bool agentForced; - qemuDomainAgentJob agentJob = QEMU_AGENT_JOB_NONE; - int agentFlag = QEMU_AGENT_SHUTDOWN_REBOOT; virCheckFlags(VIR_DOMAIN_REBOOT_ACPI_POWER_BTN | VIR_DOMAIN_REBOOT_GUEST_AGENT, -1); @@ -2081,7 +2142,6 @@ qemuDomainReboot(virDomainPtr dom, unsigned int flags) if (vm->def->onReboot == VIR_DOMAIN_LIFECYCLE_ACTION_DESTROY || vm->def->onReboot == VIR_DOMAIN_LIFECYCLE_ACTION_PRESERVE) { - agentFlag = QEMU_AGENT_SHUTDOWN_POWERDOWN; isReboot = false; VIR_INFO("Domain on_reboot setting overridden, shutting down"); } @@ -2097,56 +2157,21 @@ qemuDomainReboot(virDomainPtr dom, unsigned int flags) if (virDomainRebootEnsureACL(dom->conn, vm->def, flags) < 0) goto cleanup; - if (useAgent) - agentJob = QEMU_AGENT_JOB_MODIFY; - - if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_MODIFY, - agentJob) < 0) - goto cleanup; - agentForced = agentRequested && !acpiRequested; - if (!qemuDomainAgentAvailable(vm, agentForced)) { - if (agentForced) - goto endjob; - useAgent = false; - } - - if (virDomainObjCheckActive(vm) < 0) - goto endjob; - - if (useAgent) { - qemuAgentPtr agent; + if (useAgent) + ret = qemuDomainRebootAgent(driver, vm, isReboot, agentForced); - qemuDomainSetFakeReboot(driver, vm, false); - agent = qemuDomainObjEnterAgent(vm); - ret = qemuAgentShutdown(agent, agentFlag); - qemuDomainObjExitAgent(vm, agent); - } + if (ret < 0 && agentForced) + goto cleanup; /* If we are not enforced to use just an agent, try ACPI * shutdown as well in case agent did not succeed. */ if ((!useAgent) || (ret < 0 && (acpiRequested || !flags))) { -#if !WITH_YAJL - virReportError(VIR_ERR_OPERATION_INVALID, "%s", - _("ACPI reboot is not supported without the JSON monitor")); - goto endjob; -#endif - qemuDomainSetFakeReboot(driver, vm, isReboot); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorSystemPowerdown(priv->mon); - if (qemuDomainObjExitMonitor(driver, vm) < 0) - ret = -1; + ret = qemuDomainRebootMonitor(driver, vm, isReboot); } - endjob: - if (agentJob) - qemuDomainObjEndJobWithAgent(driver, vm); - else - qemuDomainObjEndJob(driver, vm); - cleanup: virDomainObjEndAPI(&vm); return ret; -- 2.21.0

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS.
Split the function so that we only hold the appropriate type of job while rebooting.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 111 +++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 43 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 92efde72dd..edd36f4a89 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c
@@ -2097,56 +2157,21 @@ qemuDomainReboot(virDomainPtr dom, unsigned int flags)
+ if (ret < 0 && agentForced) + goto cleanup;
Ooops, misaligned. Michal

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. So split the function up a bit to only hold the monitor job while querying qemu for whether the domain supports suspend. Then acquire only an agent job while issuing the agent suspend command. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 93 ++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index edd36f4a89..e39ee2acc9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -19713,6 +19713,58 @@ qemuDomainProbeQMPCurrentMachine(virQEMUDriverPtr driver, } +/* returns -1 on error, or if query is not supported, 0 if query was successful */ +static int +qemuDomainQueryWakeupSuspendSupport(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool *wakeupSupported) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CURRENT_MACHINE)) + return ret; + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + return ret; + + if ((ret = virDomainObjCheckActive(vm)) < 0) + goto endjob; + + ret = qemuDomainProbeQMPCurrentMachine(driver, vm, wakeupSupported); + + endjob: + qemuDomainObjEndJob(driver, vm); + + return ret; +} + +static int qemuDomainPMSuspendAgent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + unsigned int target) +{ + qemuAgentPtr agent; + int ret = -1; + + if (qemuDomainObjBeginAgentJob(driver, vm, QEMU_AGENT_JOB_MODIFY) < 0) + return -1; + + if ((ret = virDomainObjCheckActive(vm)) < 0) + goto endjob; + + if (!qemuDomainAgentAvailable(vm, true)) + goto endjob; + + agent = qemuDomainObjEnterAgent(vm); + ret = qemuAgentSuspend(agent, target); + qemuDomainObjExitAgent(vm, agent); + + endjob: + qemuDomainObjEndAgentJob(vm); + + return ret; +} + static int qemuDomainPMSuspendForDuration(virDomainPtr dom, unsigned int target, @@ -19720,11 +19772,9 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; - qemuDomainObjPrivatePtr priv; virDomainObjPtr vm; - qemuAgentPtr agent; - qemuDomainJob job = QEMU_JOB_NONE; int ret = -1; + bool wakeupSupported; virCheckFlags(0, -1); @@ -19749,17 +19799,6 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom, if (virDomainPMSuspendForDurationEnsureACL(dom->conn, vm->def) < 0) goto cleanup; - priv = vm->privateData; - - if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CURRENT_MACHINE)) - job = QEMU_JOB_MODIFY; - - if (qemuDomainObjBeginJobWithAgent(driver, vm, job, QEMU_AGENT_JOB_MODIFY) < 0) - goto cleanup; - - if (virDomainObjCheckActive(vm) < 0) - goto endjob; - /* * The case we want to handle here is when QEMU has the API (i.e. * QEMU_CAPS_QUERY_CURRENT_MACHINE is set). Otherwise, do not interfere @@ -19767,16 +19806,11 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom, * that don't know about this cap, will keep their old behavior of * suspending 'in the dark'. */ - if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CURRENT_MACHINE)) { - bool wakeupSupported; - - if (qemuDomainProbeQMPCurrentMachine(driver, vm, &wakeupSupported) < 0) - goto endjob; - + if (qemuDomainQueryWakeupSuspendSupport(driver, vm, &wakeupSupported) == 0) { if (!wakeupSupported) { virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", _("Domain does not have suspend support")); - goto endjob; + goto cleanup; } } @@ -19786,29 +19820,18 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom, target == VIR_NODE_SUSPEND_TARGET_HYBRID)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("S3 state is disabled for this domain")); - goto endjob; + goto cleanup; } if (vm->def->pm.s4 == VIR_TRISTATE_BOOL_NO && target == VIR_NODE_SUSPEND_TARGET_DISK) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("S4 state is disabled for this domain")); - goto endjob; + goto cleanup; } } - if (!qemuDomainAgentAvailable(vm, true)) - goto endjob; - - agent = qemuDomainObjEnterAgent(vm); - ret = qemuAgentSuspend(agent, target); - qemuDomainObjExitAgent(vm, agent); - - endjob: - if (job) - qemuDomainObjEndJobWithAgent(driver, vm); - else - qemuDomainObjEndAgentJob(vm); + ret = qemuDomainPMSuspendAgent(driver, vm, target); cleanup: virDomainObjEndAPI(&vm); -- 2.21.0

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS.
So split the function up a bit to only hold the monitor job while querying qemu for whether the domain supports suspend. Then acquire only an agent job while issuing the agent suspend command.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 93 ++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 35 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index edd36f4a89..e39ee2acc9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -19713,6 +19713,58 @@ qemuDomainProbeQMPCurrentMachine(virQEMUDriverPtr driver, }
+/* returns -1 on error, or if query is not supported, 0 if query was successful */ +static int +qemuDomainQueryWakeupSuspendSupport(virQEMUDriverPtr driver, + virDomainObjPtr vm, + bool *wakeupSupported) +{ + int ret = -1; + qemuDomainObjPrivatePtr priv = vm->privateData;
Usually, I put @ret last as it's shorter line compared to @priv.
+ + if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CURRENT_MACHINE)) + return ret;
s/ret/-1/
+ + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + return ret; +
Michal

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. Split the function so that the portion issuing the agent command only holds an agent job and the portion issuing the monitor command holds only a monitor job. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 54 +++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e39ee2acc9..10fad8d75d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -20256,6 +20256,35 @@ qemuDomainGetTime(virDomainPtr dom, } +static int +qemuDomainSetTimeAgent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + long long seconds, + unsigned int nseconds, + bool rtcSync) +{ + qemuAgentPtr agent; + int rv = -1; + + if (qemuDomainObjBeginAgentJob(driver, vm, QEMU_AGENT_JOB_MODIFY) < 0) + return -1; + + if (virDomainObjCheckActive(vm) < 0) + goto endjob; + + if (!qemuDomainAgentAvailable(vm, true)) + goto endjob; + + agent = qemuDomainObjEnterAgent(vm); + rv = qemuAgentSetTime(agent, seconds, nseconds, rtcSync); + qemuDomainObjExitAgent(vm, agent); + + endjob: + qemuDomainObjEndJob(driver, vm); + + return rv; +} + static int qemuDomainSetTime(virDomainPtr dom, long long seconds, @@ -20265,7 +20294,6 @@ qemuDomainSetTime(virDomainPtr dom, virQEMUDriverPtr driver = dom->conn->privateData; qemuDomainObjPrivatePtr priv; virDomainObjPtr vm; - qemuAgentPtr agent; bool rtcSync = flags & VIR_DOMAIN_TIME_SYNC; int ret = -1; int rv; @@ -20280,14 +20308,6 @@ qemuDomainSetTime(virDomainPtr dom, priv = vm->privateData; - if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_MODIFY, - QEMU_AGENT_JOB_MODIFY) < 0) - goto cleanup; - - if (virDomainObjCheckActive(vm) < 0) - goto endjob; - /* On x86, the rtc-reset-reinjection QMP command must be called after * setting the time to avoid trouble down the line. If the command is * not available, don't set the time at all and report an error */ @@ -20297,18 +20317,14 @@ qemuDomainSetTime(virDomainPtr dom, virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", _("cannot set time: qemu doesn't support " "rtc-reset-reinjection command")); - goto endjob; + goto cleanup; } - if (!qemuDomainAgentAvailable(vm, true)) - goto endjob; - - agent = qemuDomainObjEnterAgent(vm); - rv = qemuAgentSetTime(agent, seconds, nseconds, rtcSync); - qemuDomainObjExitAgent(vm, agent); + if (qemuDomainSetTimeAgent(driver, vm, seconds, nseconds, rtcSync) < 0) + goto cleanup; - if (rv < 0) - goto endjob; + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; if (virDomainObjCheckActive(vm) < 0) goto endjob; @@ -20327,7 +20343,7 @@ qemuDomainSetTime(virDomainPtr dom, ret = 0; endjob: - qemuDomainObjEndJobWithAgent(driver, vm); + qemuDomainObjEndJob(driver, vm); cleanup: virDomainObjEndAPI(&vm); -- 2.21.0

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. This function does not issue any monitor commands, so we can drop the monitor job and only hold an agent job. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 10fad8d75d..e1a91c5049 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -21623,9 +21623,8 @@ qemuDomainGetFSInfo(virDomainPtr dom, if (virDomainGetFSInfoEnsureACL(dom->conn, vm->def) < 0) goto cleanup; - if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_QUERY, - QEMU_AGENT_JOB_QUERY) < 0) + if (qemuDomainObjBeginAgentJob(driver, vm, + QEMU_AGENT_JOB_QUERY) < 0) goto cleanup; if (virDomainObjCheckActive(vm) < 0) @@ -21639,7 +21638,7 @@ qemuDomainGetFSInfo(virDomainPtr dom, qemuDomainObjExitAgent(vm, agent); endjob: - qemuDomainObjEndJobWithAgent(driver, vm); + qemuDomainObjEndAgentJob(vm); cleanup: virDomainObjEndAPI(&vm); -- 2.21.0

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS.
This function does not issue any monitor commands, so we can drop the monitor job and only hold an agent job.
While this is true, the reason I've added BeginJobWithAgent() call is that qemuAgentGetFSInfo() works with vm->def which may change beneath our hands since we wouldn't be taking a vm job. This is potentially dangerous and may lead to a crash (as @vm is unlocked and not guarded by any job). What we need to do is to create a copy of vm->def and pass that to qemuAgentGetFSInfo(). However, creating a copy of domain definition is very expensive - esp. when the agent monitor function needs only a list of disk targets. So we might construct the list beforehand and pass that to the function. Then taking only agent job is going to be okay.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 10fad8d75d..e1a91c5049 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -21623,9 +21623,8 @@ qemuDomainGetFSInfo(virDomainPtr dom, if (virDomainGetFSInfoEnsureACL(dom->conn, vm->def) < 0) goto cleanup;
- if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_QUERY, - QEMU_AGENT_JOB_QUERY) < 0) + if (qemuDomainObjBeginAgentJob(driver, vm, + QEMU_AGENT_JOB_QUERY) < 0) goto cleanup;
if (virDomainObjCheckActive(vm) < 0) @@ -21639,7 +21638,7 @@ qemuDomainGetFSInfo(virDomainPtr dom, qemuDomainObjExitAgent(vm, agent);
endjob: - qemuDomainObjEndJobWithAgent(driver, vm); + qemuDomainObjEndAgentJob(vm);
cleanup: virDomainObjEndAPI(&vm);
I won't push this one, sorry. Michal

We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS. This function issues several agent commands, but does not issue any monitor commands. Therefore, we can drop the monitor job and only hold an agent job. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e1a91c5049..1cf54cda8a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -22688,9 +22688,8 @@ qemuDomainGetGuestInfo(virDomainPtr dom, if (virDomainGetGuestInfoEnsureACL(dom->conn, vm->def) < 0) goto cleanup; - if (qemuDomainObjBeginJobWithAgent(driver, vm, - QEMU_JOB_QUERY, - QEMU_AGENT_JOB_QUERY) < 0) + if (qemuDomainObjBeginAgentJob(driver, vm, + QEMU_AGENT_JOB_QUERY) < 0) goto cleanup; if (!qemuDomainAgentAvailable(vm, true)) @@ -22740,7 +22739,7 @@ qemuDomainGetGuestInfo(virDomainPtr dom, qemuDomainObjExitAgent(vm, agent); endjob: - qemuDomainObjEndJobWithAgent(driver, vm); + qemuDomainObjEndAgentJob(vm); cleanup: virDomainObjEndAPI(&vm); -- 2.21.0

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job while we're querying the agent, we open ourselves up to a DoS.
This function issues several agent commands, but does not issue any monitor commands. Therefore, we can drop the monitor job and only hold an agent job.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
Same reasoning as to the previous one. Michal

This function will be removed in a future commit because it allows the caller to acquire both monitor and agent jobs at the same time. Holding both job types creates a vulnerability to denial of service from a malicious guest agent. qemuDomainSetVcpusFlags() always passes NONE for either the monitor job or the agent job (and thus is not vulnerable to the DoS), so we can simply replace this function with the functions for acquiring the appropriate type of job. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/qemu_driver.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 1cf54cda8a..921230b8ce 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5045,8 +5045,6 @@ qemuDomainSetVcpusFlags(virDomainPtr dom, virDomainDefPtr persistentDef; bool hotpluggable = !!(flags & VIR_DOMAIN_VCPU_HOTPLUGGABLE); bool useAgent = !!(flags & VIR_DOMAIN_VCPU_GUEST); - qemuDomainJob job = QEMU_JOB_NONE; - qemuDomainAgentJob agentJob = QEMU_AGENT_JOB_NONE; int ret = -1; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | @@ -5061,13 +5059,14 @@ qemuDomainSetVcpusFlags(virDomainPtr dom, if (virDomainSetVcpusFlagsEnsureACL(dom->conn, vm->def, flags) < 0) goto cleanup; - if (useAgent) - agentJob = QEMU_AGENT_JOB_MODIFY; - else - job = QEMU_JOB_MODIFY; - if (qemuDomainObjBeginJobWithAgent(driver, vm, job, agentJob) < 0) - goto cleanup; + if (useAgent) { + if (qemuDomainObjBeginAgentJob(driver, vm, QEMU_AGENT_JOB_MODIFY) < 0) + goto cleanup; + } else { + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + } if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) goto endjob; @@ -5081,7 +5080,7 @@ qemuDomainSetVcpusFlags(virDomainPtr dom, nvcpus, hotpluggable); endjob: - if (agentJob) + if (useAgent) qemuDomainObjEndAgentJob(vm); else qemuDomainObjEndJob(driver, vm); -- 2.21.0

This function potentially grabs both a monitor job and an agent job at the same time. This is problematic because it means that a malicious (or just buggy) guest agent can cause a denial of service on the host. The presence of this function makes it easy to do the wrong thing and hold both jobs at the same time. All existing uses have already been removed by previous commits. Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> --- src/qemu/THREADS.txt | 58 +++++------------------------------------- src/qemu/qemu_domain.c | 56 ++++------------------------------------ src/qemu/qemu_domain.h | 7 ----- 3 files changed, 11 insertions(+), 110 deletions(-) diff --git a/src/qemu/THREADS.txt b/src/qemu/THREADS.txt index aa428fda6a..a7d8709a43 100644 --- a/src/qemu/THREADS.txt +++ b/src/qemu/THREADS.txt @@ -61,11 +61,12 @@ There are a number of locks on various objects Agent job condition is then used when thread wishes to talk to qemu agent monitor. It is possible to acquire just agent job - (qemuDomainObjBeginAgentJob), or only normal job - (qemuDomainObjBeginJob) or both at the same time - (qemuDomainObjBeginJobWithAgent). Which type of job to grab depends - whether caller wishes to communicate only with agent socket, or only - with qemu monitor socket or both, respectively. + (qemuDomainObjBeginAgentJob), or only normal job (qemuDomainObjBeginJob) + but not both at the same time. Holding an agent job and a normal job would + allow an unresponsive or malicious agent to block normal libvirt API and + potentially result in a denial of service. Which type of job to grab + depends whether caller wishes to communicate only with agent socket, or + only with qemu monitor socket. Immediately after acquiring the virDomainObjPtr lock, any method which intends to update state must acquire asynchronous, normal or @@ -141,18 +142,6 @@ To acquire the agent job condition -To acquire both normal and agent job condition - - qemuDomainObjBeginJobWithAgent() - - Waits until there is no normal and no agent job set - - Sets both job.active and job.agentActive with required job types - - qemuDomainObjEndJobWithAgent() - - Sets both job.active and job.agentActive to 0 - - Signals on job.cond condition - - - To acquire the asynchronous job condition qemuDomainObjBeginAsyncJob() @@ -292,41 +281,6 @@ Design patterns virDomainObjEndAPI(&obj); - * Invoking both monitor and agent commands on a virDomainObjPtr - - virDomainObjPtr obj; - qemuAgentPtr agent; - - obj = qemuDomObjFromDomain(dom); - - qemuDomainObjBeginJobWithAgent(obj, QEMU_JOB_TYPE, QEMU_AGENT_JOB_TYPE); - - if (!virDomainObjIsActive(dom)) - goto cleanup; - - ...do prep work... - - if (!qemuDomainAgentAvailable(obj, true)) - goto cleanup; - - agent = qemuDomainObjEnterAgent(obj); - qemuAgentXXXX(agent, ..); - qemuDomainObjExitAgent(obj, agent); - - ... - - qemuDomainObjEnterMonitor(obj); - qemuMonitorXXXX(priv->mon); - qemuDomainObjExitMonitor(obj); - - /* Alternatively, talk to the monitor first and then talk to the agent. */ - - ...do final work... - - qemuDomainObjEndJobWithAgent(obj); - virDomainObjEndAPI(&obj); - - * Running asynchronous job virDomainObjPtr obj; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 470d342afc..97cf6b2255 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8569,26 +8569,6 @@ qemuDomainObjBeginAgentJob(virQEMUDriverPtr driver, QEMU_ASYNC_JOB_NONE, false); } -/** - * qemuDomainObjBeginJobWithAgent: - * - * Grabs both monitor and agent types of job. Use if caller talks to - * both monitor and guest agent. However, if @job (or @agentJob) is - * QEMU_JOB_NONE (or QEMU_AGENT_JOB_NONE) only agent job is acquired (or - * monitor job). - * - * To end job call qemuDomainObjEndJobWithAgent. - */ -int -qemuDomainObjBeginJobWithAgent(virQEMUDriverPtr driver, - virDomainObjPtr obj, - qemuDomainJob job, - qemuDomainAgentJob agentJob) -{ - return qemuDomainObjBeginJobInternal(driver, obj, job, agentJob, - QEMU_ASYNC_JOB_NONE, false); -} - int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj, qemuDomainAsyncJob asyncJob, @@ -8703,31 +8683,6 @@ qemuDomainObjEndAgentJob(virDomainObjPtr obj) virCondBroadcast(&priv->job.cond); } -void -qemuDomainObjEndJobWithAgent(virQEMUDriverPtr driver, - virDomainObjPtr obj) -{ - qemuDomainObjPrivatePtr priv = obj->privateData; - qemuDomainJob job = priv->job.active; - qemuDomainAgentJob agentJob = priv->job.agentActive; - - priv->jobs_queued--; - - VIR_DEBUG("Stopping both jobs: %s %s (async=%s vm=%p name=%s)", - qemuDomainJobTypeToString(job), - qemuDomainAgentJobTypeToString(agentJob), - qemuDomainAsyncJobTypeToString(priv->job.asyncJob), - obj, obj->def->name); - - qemuDomainObjResetJob(priv); - qemuDomainObjResetAgentJob(priv); - if (qemuDomainTrackJob(job)) - qemuDomainObjSaveStatus(driver, obj); - /* We indeed need to wake up ALL threads waiting because - * grabbing a job requires checking more variables. */ - virCondBroadcast(&priv->job.cond); -} - void qemuDomainObjEndAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj) { @@ -8761,9 +8716,9 @@ qemuDomainObjAbortAsyncJob(virDomainObjPtr obj) * obj must be locked before calling * * To be called immediately before any QEMU monitor API call - * Must have already either called qemuDomainObjBeginJob() or - * qemuDomainObjBeginJobWithAgent() and checked that the VM is - * still active; may not be used for nested async jobs. + * Must have already called qemuDomainObjBeginJob() and checked + * that the VM is still active; may not be used for nested async + * jobs. * * To be followed with qemuDomainObjExitMonitor() once complete */ @@ -8885,9 +8840,8 @@ qemuDomainObjEnterMonitorAsync(virQEMUDriverPtr driver, * obj must be locked before calling * * To be called immediately before any QEMU agent API call. - * Must have already called qemuDomainObjBeginAgentJob() or - * qemuDomainObjBeginJobWithAgent() and checked that the VM is - * still active. + * Must have already called qemuDomainObjBeginAgentJob() and + * checked that the VM is still active. * * To be followed with qemuDomainObjExitAgent() once complete */ diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index f626d3a54c..fb4c9e0467 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -622,11 +622,6 @@ int qemuDomainObjBeginAgentJob(virQEMUDriverPtr driver, virDomainObjPtr obj, qemuDomainAgentJob agentJob) G_GNUC_WARN_UNUSED_RESULT; -int qemuDomainObjBeginJobWithAgent(virQEMUDriverPtr driver, - virDomainObjPtr obj, - qemuDomainJob job, - qemuDomainAgentJob agentJob) - G_GNUC_WARN_UNUSED_RESULT; int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj, qemuDomainAsyncJob asyncJob, @@ -645,8 +640,6 @@ int qemuDomainObjBeginJobNowait(virQEMUDriverPtr driver, void qemuDomainObjEndJob(virQEMUDriverPtr driver, virDomainObjPtr obj); void qemuDomainObjEndAgentJob(virDomainObjPtr obj); -void qemuDomainObjEndJobWithAgent(virQEMUDriverPtr driver, - virDomainObjPtr obj); void qemuDomainObjEndAsyncJob(virQEMUDriverPtr driver, virDomainObjPtr obj); void qemuDomainObjAbortAsyncJob(virDomainObjPtr obj); -- 2.21.0

On 12/5/19 10:08 AM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious, so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job and an agent job while we're querying the agent, any other threads will be blocked from using the monitor while the agent is unresponsive. Because libvirt waits forever for an agent response, this makes us vulnerable to a denial of service from a malicious (or simply buggy) guest agent.
This series of patches attempts to remove any cases where we were holding both jobs at the same time, removes a convenience function which allows us to grab both jobs at once, and updates documentation regarding this issue.
Are any of these worth CVEs? Are any of the APIs usable on a read-only connection? (My quick glance says no, it looks like they all require read-write). What about ACLs?
Jonathon Jongsma (8): qemu: don't take agent and monitor job for shutdown
For example, virDomainShutdownFlags requires @acl domain:write:VIR_DOMAIN_SHUTDOWN_GUEST_AGENT (that is, if you request guest agent shutdown, you have to have the equivalent of domain:write ACL privilege which is effectively root permissions, precisely because the guest agent is untrusted). While a DoS is bad, you already have enough permissions to shoot yourself in the foot in other ways, so it is not a privilege escalation and thus not a CVE.
qemu: don't hold a monitor and agent job for reboot qemu: don't hold both jobs for suspend qemu: don't hold monitor and agent job when setting time qemu: don't hold monitor job for fsinfo qemu: don't hold monitor job for GetGuestInfo()
But virDomainGetFSIinfo only requires @acl: domain:fs_freeze. Is it possible for a user to have domain:fs_freeze permission but not domain:write permission? If so, we have a CVE because of the DoS, at least when ACLs are in effect.
qemu: remove use of qemuDomainObjBeginJobWithAgent() qemu: remove qemuDomainObjBegin/EndJobWithAgent()
src/qemu/THREADS.txt | 58 +----- src/qemu/qemu_domain.c | 56 +----- src/qemu/qemu_domain.h | 7 - src/qemu/qemu_driver.c | 405 +++++++++++++++++++++++++---------------- 4 files changed, 258 insertions(+), 268 deletions(-)
-- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org

On 12/5/19 5:08 PM, Jonathon Jongsma wrote:
We have to assume that the guest agent may be malicious, so we don't want to allow any agent queries to block any other libvirt API. By holding a monitor job and an agent job while we're querying the agent, any other threads will be blocked from using the monitor while the agent is unresponsive. Because libvirt waits forever for an agent response, this makes us vulnerable to a denial of service from a malicious (or simply buggy) guest agent.
This series of patches attempts to remove any cases where we were holding both jobs at the same time, removes a convenience function which allows us to grab both jobs at once, and updates documentation regarding this issue.
Jonathon Jongsma (8): qemu: don't take agent and monitor job for shutdown qemu: don't hold a monitor and agent job for reboot qemu: don't hold both jobs for suspend qemu: don't hold monitor and agent job when setting time qemu: don't hold monitor job for fsinfo qemu: don't hold monitor job for GetGuestInfo() qemu: remove use of qemuDomainObjBeginJobWithAgent() qemu: remove qemuDomainObjBegin/EndJobWithAgent()
src/qemu/THREADS.txt | 58 +----- src/qemu/qemu_domain.c | 56 +----- src/qemu/qemu_domain.h | 7 - src/qemu/qemu_driver.c | 405 +++++++++++++++++++++++++---------------- 4 files changed, 258 insertions(+), 268 deletions(-)
ACK to all but 5/8 and 6/8. Also, I'm pushing patches 1-4 and 7. I'd push 8/8 also but we can't remove the function while it's still use :-D Michal
participants (3)
-
Eric Blake
-
Jonathon Jongsma
-
Michal Privoznik