From: "Daniel P. Berrange" <berrange(a)redhat.com>
Systemd uses a named cgroup mount for tracking processes. Add
it as another type of controller, albeit one which we have to
special case in a number of places. In particular we must
never create/delete directories there, nor add tasks. Essentially
the systemd mount is to be considered read-only for libvirt.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------
src/util/vircgroup.h | 1 +
tests/vircgrouptest.c | 9 +++++++
3 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 68ec953..5ff8850 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -57,7 +57,8 @@
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
"cpu", "cpuacct", "cpuset",
"memory", "devices",
- "freezer", "blkio", "net_cls",
"perf_event");
+ "freezer", "blkio", "net_cls",
"perf_event",
+ "name=systemd");
typedef enum {
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
@@ -115,6 +116,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *tmp;
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ continue;
+
if (!group->controllers[i].placement)
continue;
@@ -320,6 +324,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
if (!group->controllers[i].mountPoint)
continue;
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ continue;
+
if (path[0] == '/') {
if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
return -1;
@@ -375,6 +382,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
int ret = -1;
char *procfile;
+ VIR_DEBUG("Detecting placement for pid %lld path %s",
+ (unsigned long long)pid, path);
if (pid == -1) {
if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
goto cleanup;
@@ -411,6 +420,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
const char *typestr = virCgroupControllerTypeToString(i);
int typelen = strlen(typestr);
char *tmp = controllers;
+
while (tmp) {
char *next = strchr(tmp, ',');
int len;
@@ -427,13 +437,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
* selfpath=="/libvirt.service" + path="foo" ->
"/libvirt.service/foo"
*/
if (typelen == len && STREQLEN(typestr, tmp, len) &&
- group->controllers[i].mountPoint != NULL) {
- if (virAsprintf(&group->controllers[i].placement,
- "%s%s%s", selfpath,
- (STREQ(selfpath, "/") ||
- STREQ(path, "") ? "" :
"/"),
- path) < 0)
- goto cleanup;
+ group->controllers[i].mountPoint != NULL &&
+ group->controllers[i].placement == NULL) {
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
+ if (VIR_STRDUP(group->controllers[i].placement,
+ selfpath) < 0)
+ goto cleanup;
+ } else {
+ if (virAsprintf(&group->controllers[i].placement,
+ "%s%s%s", selfpath,
+ (STREQ(selfpath, "/") ||
+ STREQ(path, "") ? "" :
"/"),
+ path) < 0)
+ goto cleanup;
+ }
}
tmp = next;
@@ -524,13 +541,16 @@ static int virCgroupDetect(virCgroupPtr group,
return -1;
}
- if (parent || path[0] == '/') {
- if (virCgroupCopyPlacement(group, path, parent) < 0)
- return -1;
- } else {
- if (virCgroupDetectPlacement(group, pid, path) < 0)
- return -1;
- }
+ /* In some cases we can copy part of the placement info
+ * based on the parent cgroup...
+ */
+ if ((parent || path[0] == '/') &&
+ virCgroupCopyPlacement(group, path, parent) < 0)
+ return -1;
+
+ /* ... but use /proc/cgroups to fill in the rest */
+ if (virCgroupDetectPlacement(group, pid, path) < 0)
+ return -1;
/* Check that for every mounted controller, we found our placement */
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
@@ -822,6 +842,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *path = NULL;
+ /* We must never mkdir() in systemd's hierachy */
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
+ VIR_DEBUG("Not creating systemd controller group");
+ continue;
+ }
+
/* Skip over controllers that aren't mounted */
if (!group->controllers[i].mountPoint) {
VIR_DEBUG("Skipping unmounted controller %s",
@@ -1026,6 +1052,10 @@ int virCgroupRemove(virCgroupPtr group)
if (!group->controllers[i].mountPoint)
continue;
+ /* We must never rmdir() in systemd's hiearchy */
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ continue;
+
/* Don't delete the root group, if we accidentally
ended up in it for some reason */
if (STREQ(group->controllers[i].placement, "/"))
@@ -1065,6 +1095,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
if (!group->controllers[i].mountPoint)
continue;
+ /* We must never add tasks in systemd's hiearchy */
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ continue;
+
if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid)
< 0)
goto cleanup;
}
@@ -1166,6 +1200,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr
dest_group)
!dest_group->controllers[i].mountPoint)
continue;
+ /* We must never move tasks in systemd's hiearchy */
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ continue;
+
/* New threads are created in the same group as their parent;
* but if a thread is created after we first read we aren't
* aware that it needs to move. Therefore, we must iterate
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index 3aaf081..e579f41 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -40,6 +40,7 @@ enum {
VIR_CGROUP_CONTROLLER_BLKIO,
VIR_CGROUP_CONTROLLER_NET_CLS,
VIR_CGROUP_CONTROLLER_PERF_EVENT,
+ VIR_CGROUP_CONTROLLER_SYSTEMD,
VIR_CGROUP_CONTROLLER_LAST
};
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
index 20ac494..4bdd4c9 100644
--- a/tests/vircgrouptest.c
+++ b/tests/vircgrouptest.c
@@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
@@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
};
const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
@@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if (virCgroupNewSelf(&cgroup) < 0) {
@@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args
ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
@@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args
ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/virtualmachines", false, -1,
&cgroup)) != -1) {
@@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args
ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] =
"/deployment.partition/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] =
"/deployment.partition/production.partition",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/deployment/production", false, -1,
&cgroup)) != -1) {
@@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args
ATTRIBUTE_UNUSED
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] =
"/user/berrange.user/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] =
"/user/berrange.user/production.partition",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/user/berrange.user/production", false,
-1, &cgroup)) != -1) {
@@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args
ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] =
"/production.partition/foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] =
"/production.partition/foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/production", true, -1,
&partitioncgroup)) != 0) {
@@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args
ATTRIBUTE_UNU
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1,
&partitioncgroup1)) != 0) {
--
1.8.1.4