https://bugzilla.redhat.com/show_bug.cgi?id=1047659
If a VM dies very early during an attempted connect to the guest agent
while the locks are down the domain monitor object will be freed. The
object is then accessed later as any failure during guest agent startup
isn't considered fatal.
In the current upstream version this doesn't lead to a crash as
virObjectLock called when entering the monitor in
qemuProcessDetectVcpuPIDs checks the pointer before attempting to
dereference (lock) it. The NULL pointer is then caught in the monitor
helper code.
Before the introduction of virObjectLockable - observed on 0.10.2 - the
pointer is locked directly via virMutexLock leading to a crash.
To avoid this problem we need to differentiate between the guest agent
not being present and the VM quitting when the locks were down. The fix
reorganizes the code in qemuConnectAgent to add the check and then adds
special handling to the callers.
---
src/qemu/qemu_process.c | 34 +++++++++++++++++++++++++---------
1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a27eded..cf23ff3 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -248,6 +248,17 @@ qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm)
virObjectLock(vm);
priv->agentStart = 0;
+ if (agent == NULL)
+ virObjectUnref(vm);
+
+ if (!virDomainObjIsActive(vm)) {
+ qemuAgentClose(agent);
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("guest crashed while connecting to the guest
agent"));
+ ret = -2;
+ goto cleanup;
+ }
+
if (virSecurityManagerClearSocketLabel(driver->securityManager,
vm->def) < 0) {
VIR_ERROR(_("Failed to clear security context for agent for %s"),
@@ -255,13 +266,7 @@ qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm)
goto cleanup;
}
- if (agent == NULL)
- virObjectUnref(vm);
- if (!virDomainObjIsActive(vm)) {
- qemuAgentClose(agent);
- goto cleanup;
- }
priv->agent = agent;
if (priv->agent == NULL) {
@@ -3120,6 +3125,7 @@ qemuProcessReconnect(void *opaque)
int reason;
virQEMUDriverConfigPtr cfg;
size_t i;
+ int ret;
memcpy(&oldjob, &data->oldjob, sizeof(oldjob));
@@ -3144,7 +3150,10 @@ qemuProcessReconnect(void *opaque)
goto error;
/* Failure to connect to agent shouldn't be fatal */
- if (qemuConnectAgent(driver, obj) < 0) {
+ if ((ret = qemuConnectAgent(driver, obj)) < 0) {
+ if (ret == -2)
+ goto error;
+
VIR_WARN("Cannot connect to QEMU guest agent for %s",
obj->def->name);
virResetLastError();
@@ -4018,7 +4027,10 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
/* Failure to connect to agent shouldn't be fatal */
- if (qemuConnectAgent(driver, vm) < 0) {
+ if ((ret = qemuConnectAgent(driver, vm)) < 0) {
+ if (ret == -2)
+ goto cleanup;
+
VIR_WARN("Cannot connect to QEMU guest agent for %s",
vm->def->name);
virResetLastError();
@@ -4478,6 +4490,7 @@ int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED,
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
virCapsPtr caps = NULL;
bool active = false;
+ int ret;
VIR_DEBUG("Beginning VM attach process");
@@ -4592,7 +4605,10 @@ int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED,
goto error;
/* Failure to connect to agent shouldn't be fatal */
- if (qemuConnectAgent(driver, vm) < 0) {
+ if ((ret = qemuConnectAgent(driver, vm)) < 0) {
+ if (ret == -2)
+ goto error;
+
VIR_WARN("Cannot connect to QEMU guest agent for %s",
vm->def->name);
virResetLastError();
--
1.8.5.2