Don't create the cgroups ahead of launching the container since
there is no need for the limits to apply during initial bootstrap.
Create the cgroup after the container PID is known and tell
systemd the initpid is the leader, instead of the controller
pid.
---
src/lxc/lxc_cgroup.c | 11 +++--------
src/lxc/lxc_cgroup.h | 3 ++-
src/lxc/lxc_controller.c | 42 +++++++++++++++++++++++++++++++++++++-----
3 files changed, 42 insertions(+), 14 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 728e8e5..0987050 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -462,7 +462,8 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
}
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
+ pid_t initpid)
{
virCgroupPtr cgroup = NULL;
@@ -473,18 +474,12 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
goto cleanup;
}
- /*
- * XXX
- * We should pass the PID of the LXC init process
- * not ourselves, but this requires some more
- * refactoring. We should also pass the root dir
- */
if (virCgroupNewMachine(def->name,
"lxc",
true,
def->uuid,
NULL,
- getpid(),
+ initpid,
true,
0, NULL,
def->resource->partition,
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index 0e78126..31d6800 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -27,7 +27,8 @@
# include "lxc_fuse.h"
# include "virusb.h"
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def);
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
+ pid_t initpid);
virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def);
int virLXCCgroupSetup(virDomainDefPtr def,
virCgroupPtr cgroup,
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index f2c0b57..00d0e23 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -680,8 +680,9 @@ static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl,
* virLXCControllerSetupResourceLimits
* @ctrl: the controller state
*
- * Creates a cgroup for the container, moves the task inside,
- * and sets resource limits
+ * Sets up the non-cgroup based resource limits that need
+ * to be inherited by the child process across clone()/exec().
+ * The cgroup limits are setup later
*
* Returns 0 on success or -1 in case of error
*/
@@ -704,6 +705,37 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr
ctrl)
if (virLXCControllerSetupCpuAffinity(ctrl) < 0)
goto cleanup;
+ ret = 0;
+ cleanup:
+ virBitmapFree(auto_nodeset);
+ return ret;
+}
+
+
+/*
+ * Creates the cgroup and sets up the various limits associated
+ * with it
+ */
+static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr ctrl)
+{
+ virBitmapPtr auto_nodeset = NULL;
+ int ret = -1;
+ virBitmapPtr nodeset = NULL;
+
+ VIR_DEBUG("Setting up cgroup resource limits");
+
+ if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0)
+ goto cleanup;
+
+ nodeset = virDomainNumatuneGetNodeset(ctrl->def->numatune, auto_nodeset, -1);
+
+ if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def,
+ ctrl->initpid)))
+ goto cleanup;
+
+ if (virCgroupAddTask(ctrl->cgroup, getpid()) < 0)
+ goto cleanup;
+
if (virLXCCgroupSetup(ctrl->def, ctrl->cgroup, nodeset) < 0)
goto cleanup;
@@ -2224,6 +2256,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
for (i = 0; i < ctrl->npassFDs; i++)
VIR_FORCE_CLOSE(ctrl->passFDs[i]);
+ if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
+ goto cleanup;
+
if (virLXCControllerSetupUserns(ctrl) < 0)
goto cleanup;
@@ -2454,9 +2489,6 @@ int main(int argc, char *argv[])
if (virLXCControllerValidateConsoles(ctrl) < 0)
goto cleanup;
- if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def)))
- goto cleanup;
-
if (virLXCControllerSetupServer(ctrl) < 0)
goto cleanup;
--
2.1.0