From: "Daniel P. Berrange" <berrange(a)redhat.com>
If a user cgroup name begins with "cgroup.", "_" or with any of
the controllers from /proc/cgroups followed by a dot, then they
need to be prefixed with a single underscore. eg if there is
an object "cpu.service", then this would end up as "_cpu.service"
in the cgroup filesystem tree, however, "waldo.service" would
stay "waldo.service", at least as long as nobody comes up with
a cgroup controller called "waldo".
Since we require a '.XXXX' suffix on all partitions, there is
no scope for clashing with the kernel 'tasks' and 'release_agent'
files.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/util/vircgroup.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++
tests/vircgroupmock.c | 27 +++++++++++++---
tests/vircgrouptest.c | 54 ++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+), 4 deletions(-)
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 297408d..f132e60 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1092,6 +1092,84 @@ cleanup:
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
+static int virCgroupNeedEscape(const char *path)
+{
+ FILE *fp = NULL;
+ int ret = 0;
+ char *line = NULL;
+ size_t len;
+
+ /* If it starts with 'cgroup.' or a '_' of any
+ * of the controller names from /proc/cgroups,
+ * then we must prefix a '_'
+ */
+ if (STRPREFIX(path, "cgroup."))
+ return 1;
+
+ if (path[0] == '_')
+ return 1;
+
+ if (!(fp = fopen("/proc/cgroups", "r")))
+ return -errno;
+
+ /*
+ * Data looks like this:
+ * #subsys_name hierarchy num_cgroups enabled
+ * cpuset 2 4 1
+ * cpu 3 48 1
+ * cpuacct 3 48 1
+ * memory 4 4 1
+ * devices 5 4 1
+ * freezer 6 4 1
+ * net_cls 7 1 1
+ */
+ while (getline(&line, &len, fp) > 0) {
+ if (STRPREFIX(line, "#subsys_name")) {
+ VIR_FREE(line);
+ continue;
+ }
+ char *tmp = strchr(line, ' ');
+ if (tmp)
+ *tmp = '\0';
+ len = tmp - line;
+
+ if (STRPREFIX(path, line) &&
+ path[len] == '.') {
+ ret = 1;
+ VIR_FREE(line);
+ goto cleanup;
+ }
+ VIR_FREE(line);
+ }
+
+ if (ferror(fp)) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+cleanup:
+ VIR_FORCE_FCLOSE(fp);
+ return ret;
+}
+
+static int virCgroupEscape(char **path)
+{
+ size_t len = strlen(*path);
+ int rc;
+
+ if ((rc = virCgroupNeedEscape(*path)) <= 0)
+ return rc;
+
+ if (VIR_REALLOC_N(*path, len + 2) < 0)
+ return -ENOMEM;
+
+ memmove((*path) + 1,
+ *path,
+ len + 1);
+ (*path)[0] = '_';
+ return 0;
+}
+
static char *virCgroupSetPartitionSuffix(const char *path)
{
char **tokens = virStringSplit(path, "/", 0);
@@ -1123,6 +1201,11 @@ static char *virCgroupSetPartitionSuffix(const char *path)
}
strcat(tokens[i], ".partition");
}
+
+ if (virCgroupEscape(&(tokens[i])) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
}
if (!(ret = virStringJoin((const char **)tokens, "/")))
@@ -1352,6 +1435,9 @@ int virCgroupNewDomainPartition(virCgroupPtr partition,
name, driver) < 0)
return -ENOMEM;
+ if ((rc = virCgroupEscape(&grpname)) < 0)
+ return rc;
+
rc = virCgroupNew(grpname, partition, -1, group);
if (rc == 0) {
diff --git a/tests/vircgroupmock.c b/tests/vircgroupmock.c
index d6b0938..17ea75f 100644
--- a/tests/vircgroupmock.c
+++ b/tests/vircgroupmock.c
@@ -66,7 +66,7 @@ static char *fakesysfsdir;
* Co-mounting cpu & cpuacct controllers
* An anonymous controller for systemd
*/
-const char *mounts =
+const char *procmounts =
"rootfs / rootfs rw 0 0\n"
"tmpfs /run tmpfs rw,seclabel,nosuid,nodev,mode=755 0 0\n"
"tmpfs /not/really/sys/fs/cgroup tmpfs rw,seclabel,nosuid,nodev,noexec,mode=755
0 0\n"
@@ -79,7 +79,7 @@ const char *mounts =
"/dev/sda1 /boot ext4 rw,seclabel,relatime,data=ordered 0 0\n"
"tmpfs /tmp tmpfs rw,seclabel,relatime,size=1024000k 0 0\n";
-const char *cgroups =
+const char *procselfcgroups =
"115:memory:/\n"
"8:blkio:/\n"
"6:freezer:/\n"
@@ -87,6 +87,17 @@ const char *cgroups =
"2:cpuset:/\n"
"1:name=systemd:/user/berrange/123\n";
+const char *proccgroups =
+ "#subsys_name hierarchy num_cgroups enabled\n"
+ "cpuset 2 4 1\n"
+ "cpu 3 48 1\n"
+ "cpuacct 3 48 1\n"
+ "memory 4 4 1\n"
+ "devices 5 4 1\n"
+ "freezer 6 4 1\n"
+ "blkio 8 4 1\n";
+
+
static int make_file(const char *path,
const char *name,
const char *value)
@@ -366,7 +377,15 @@ FILE *fopen(const char *path, const char *mode)
if (STREQ(path, "/proc/mounts")) {
if (STREQ(mode, "r")) {
- return fmemopen((void *)mounts, strlen(mounts), mode);
+ return fmemopen((void *)procmounts, strlen(procmounts), mode);
+ } else {
+ errno = EACCES;
+ return NULL;
+ }
+ }
+ if (STREQ(path, "/proc/cgroups")) {
+ if (STREQ(mode, "r")) {
+ return fmemopen((void *)proccgroups, strlen(proccgroups), mode);
} else {
errno = EACCES;
return NULL;
@@ -374,7 +393,7 @@ FILE *fopen(const char *path, const char *mode)
}
if (STREQ(path, "/proc/self/cgroup")) {
if (STREQ(mode, "r")) {
- return fmemopen((void *)cgroups, strlen(cgroups), mode);
+ return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode);
} else {
errno = EACCES;
return NULL;
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
index b51106a..e2adef8 100644
--- a/tests/vircgrouptest.c
+++ b/tests/vircgrouptest.c
@@ -441,6 +441,58 @@ cleanup:
return ret;
}
+static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNUSED)
+{
+ virCgroupPtr partitioncgroup1 = NULL;
+ virCgroupPtr partitioncgroup2 = NULL;
+ virCgroupPtr partitioncgroup3 = NULL;
+ virCgroupPtr domaincgroup = NULL;
+ int ret = -1;
+ int rv;
+ const char *placement[VIR_CGROUP_CONTROLLER_LAST] = {
+ [VIR_CGROUP_CONTROLLER_CPU] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_CPUACCT] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_CPUSET] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_MEMORY] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
+ [VIR_CGROUP_CONTROLLER_FREEZER] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ [VIR_CGROUP_CONTROLLER_BLKIO] =
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+ };
+
+ if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1,
&partitioncgroup1)) != 0) {
+ fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
+ goto cleanup;
+ }
+
+ if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil", true, -1,
&partitioncgroup2)) != 0) {
+ fprintf(stderr, "Failed to create /cgroup.evil/cpu.evil cgroup: %d\n",
-rv);
+ goto cleanup;
+ }
+
+ if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil/_evil.evil",
true, -1, &partitioncgroup3)) != 0) {
+ fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
+ goto cleanup;
+ }
+
+ if ((rv = virCgroupNewDomainPartition(partitioncgroup3, "lxc",
"cpu.foo", true, &domaincgroup)) != 0) {
+ fprintf(stderr, "Cannot create LXC cgroup: %d\n", -rv);
+ goto cleanup;
+ }
+
+ /* NB we're not expecting 'net_cls.evil' to be escaped,
+ * since our fake /proc/cgroups pretends this controller
+ * isn't compiled into the kernel
+ */
+ ret = validateCgroup(domaincgroup,
"/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", mountsFull,
links, placement);
+
+cleanup:
+ virCgroupFree(&partitioncgroup3);
+ virCgroupFree(&partitioncgroup2);
+ virCgroupFree(&partitioncgroup1);
+ virCgroupFree(&domaincgroup);
+ return ret;
+}
+
# define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX"
static int
@@ -482,6 +534,8 @@ mymain(void)
if (virtTestRun("New cgroup for domain partition", 1,
testCgroupNewForPartitionDomain, NULL) < 0)
ret = -1;
+ if (virtTestRun("New cgroup for domain partition escaped", 1,
testCgroupNewForPartitionDomainEscaped, NULL) < 0)
+ ret = -1;
if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL)
virFileDeleteTree(fakesysfsdir);
--
1.8.1.4