From: "Daniel P. Berrange" <berrange(a)redhat.com>
Currently the QEMU driver creates the VM's cgroup prior to
forking, and then uses a virCommand hook to move the child
into the cgroup. This won't work with systemd whose APIs
do the creation of cgroups + attachment of processes atomically.
Fortunately we have a handshake taking place between the
QEMU driver and the child process prior to QEMU being exec()d,
which was introduced to allow setup of disk locking. By good
fortune this synchronization point can be used to enable the
QEMU drivedr to do atomic setup of cgroups removing the use
of the hook script.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/qemu/qemu_cgroup.c | 12 +++++++++---
src/qemu/qemu_process.c | 39 +++++++++++++++++----------------------
2 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 8559d26..6455f50 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -683,6 +683,9 @@ qemuInitCgroup(virQEMUDriverPtr driver,
&priv->cgroup) < 0)
goto cleanup;
+ if (virCgroupAddTask(priv->cgroup, vm->pid) < 0)
+ goto cleanup;
+
done:
ret = 0;
cleanup:
@@ -738,6 +741,12 @@ qemuSetupCgroup(virQEMUDriverPtr driver,
virCapsPtr caps = NULL;
int ret = -1;
+ if (!vm->pid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot setup cgroups until process is started"));
+ return -1;
+ }
+
if (qemuInitCgroup(driver, vm) < 0)
return -1;
@@ -1009,8 +1018,5 @@ qemuAddToCgroup(virDomainObjPtr vm)
if (priv->cgroup == NULL)
return 0; /* Not supported, so claim success */
- if (virCgroupAddTask(priv->cgroup, getpid()) < 0)
- return -1;
-
return 0;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index c5f281a..e8e459e 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1929,6 +1929,12 @@ qemuProcessInitCpuAffinity(virQEMUDriverPtr driver,
virBitmapPtr cpumap = NULL;
virBitmapPtr cpumapToSet = NULL;
+ if (!vm->pid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot setup CPU affinity until process is
started"));
+ return -1;
+ }
+
if (!(cpumap = qemuPrepareCpumap(driver, nodemask)))
return -1;
@@ -1949,11 +1955,7 @@ qemuProcessInitCpuAffinity(virQEMUDriverPtr driver,
}
}
- /* We are pressuming we are running between fork/exec of QEMU
- * so use '0' to indicate our own process ID. No threads are
- * running at this point
- */
- if (virProcessSetAffinity(0 /* Self */, cpumapToSet) < 0)
+ if (virProcessSetAffinity(vm->pid, cpumapToSet) < 0)
goto cleanup;
ret = 0;
@@ -2562,19 +2564,6 @@ static int qemuProcessHook(void *data)
if (virSecurityManagerClearSocketLabel(h->driver->securityManager,
h->vm->def) < 0)
goto cleanup;
- /* This must take place before exec(), so that all QEMU
- * memory allocation is on the correct NUMA node
- */
- VIR_DEBUG("Moving process to cgroup");
- if (qemuAddToCgroup(h->vm) < 0)
- goto cleanup;
-
- /* This must be done after cgroup placement to avoid resetting CPU
- * affinity */
- if (!h->vm->def->cputune.emulatorpin &&
- qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask) < 0)
- goto cleanup;
-
if (virNumaSetupMemoryPolicy(h->vm->def->numatune, h->nodemask) < 0)
goto cleanup;
@@ -3671,10 +3660,6 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
}
- VIR_DEBUG("Setting up domain cgroup (if required)");
- if (qemuSetupCgroup(driver, vm, nodemask) < 0)
- goto cleanup;
-
if (VIR_ALLOC(priv->monConfig) < 0)
goto cleanup;
@@ -3844,6 +3829,16 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
}
+ VIR_DEBUG("Setting up domain cgroup (if required)");
+ if (qemuSetupCgroup(driver, vm, nodemask) < 0)
+ goto cleanup;
+
+ /* This must be done after cgroup placement to avoid resetting CPU
+ * affinity */
+ if (!vm->def->cputune.emulatorpin &&
+ qemuProcessInitCpuAffinity(driver, vm, nodemask) < 0)
+ goto cleanup;
+
VIR_DEBUG("Setting domain security labels");
if (virSecurityManagerSetAllLabel(driver->securityManager,
vm->def, stdin_path) < 0)
--
1.8.1.4