This patch adds src/cgroup.{c,h} with support for creating and manipulating
cgroups. It's quite naive at the moment, but should provide something to
work with to move forward with resource controls.
All groups created with the internal API are forced under $mount/libvirt/
to keep everything together. The first time a group is created, the libvirt
directory is also created, and the settings from the root are inherited.
The code scans the mount table to look for the first mount of type cgroup,
and assumes that all controllers are mounted there. I think this
could/should be updated to prefer a mount with just the controller(s) we
want, if there are multiple ones.
If you have the cpuset controller enabled, and cpuset.cpus_exclusive is 1,
then all cgroups to be created will fail. Since we probably shouldn't
blindly set the root to be non-exclusive, we may also want to consider this
condition to be "no cgroup support".
diff -r 444e2614d0a2 -r 8e948eb88328 src/Makefile.am
--- a/src/Makefile.am Wed Sep 17 16:07:03 2008 +0000
+++ b/src/Makefile.am Mon Sep 29 09:37:42 2008 -0700
@@ -96,7 +96,8 @@
lxc_conf.c lxc_conf.h \
lxc_container.c lxc_container.h \
lxc_controller.c \
- veth.c veth.h
+ veth.c veth.h \
+ cgroup.c cgroup.h
OPENVZ_DRIVER_SOURCES = \
openvz_conf.c openvz_conf.h \
diff -r 444e2614d0a2 -r 8e948eb88328 src/cgroup.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cgroup.c Mon Sep 29 09:37:42 2008 -0700
@@ -0,0 +1,526 @@
+/*
+ * cgroup.c: Tools for managing cgroups
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * See COPYING.LIB for the License of this software
+ *
+ * Authors:
+ * Dan Smith <danms(a)us.ibm.com>
+ */
+#include <config.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <mntent.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <libgen.h>
+
+#include "internal.h"
+#include "util.h"
+#include "cgroup.h"
+
+#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
+#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
+
+struct virCgroup {
+ char *path;
+};
+
+void virCgroupFree(virCgroupPtr *group)
+{
+ if (*group != NULL) {
+ free((*group)->path);
+ free(*group);
+ *group = NULL;
+ }
+}
+
+static virCgroupPtr cgroup_get_mount(void)
+{
+ FILE *mounts;
+ struct mntent entry;
+ char buf[512];
+ virCgroupPtr root = NULL;
+
+ root = calloc(1, sizeof(*root));
+ if (root == NULL)
+ return NULL;
+
+ mounts = fopen("/proc/mounts", "r");
+ if (mounts == NULL) {
+ DEBUG0("Unable to open /proc/mounts: %m");
+ goto err;
+ }
+
+ while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
+ if (STREQ(entry.mnt_type, "cgroup")) {
+ root->path = strdup(entry.mnt_dir);
+ break;
+ }
+ }
+
+ if (root->path == NULL) {
+ DEBUG0("Did not find cgroup mount");
+ goto err;
+ }
+
+ fclose(mounts);
+
+ return root;
+err:
+ virCgroupFree(&root);
+
+ return NULL;
+}
+
+int virCgroupHaveSupport(void)
+{
+ virCgroupPtr root;
+
+ root = cgroup_get_mount();
+ if (root == NULL)
+ return -1;
+
+ virCgroupFree(&root);
+
+ return 0;
+}
+
+static int cgroup_path_of(const char *grppath,
+ const char *key,
+ char **path)
+{
+ virCgroupPtr root;
+ int rc = 0;
+
+ root = cgroup_get_mount();
+ if (root == NULL) {
+ rc = -ENOTDIR;
+ goto out;
+ }
+
+ if (asprintf(path, "%s/%s/%s", root->path, grppath, key) == -1)
+ rc = -ENOMEM;
+out:
+ virCgroupFree(&root);
+
+ return rc;
+}
+
+int virCgroupSetValueStr(virCgroupPtr group,
+ const char *key,
+ const char *value)
+{
+ int fd = -1;
+ int rc = 0;
+ char *keypath = NULL;
+
+ rc = cgroup_path_of(group->path, key, &keypath);
+ if (rc != 0)
+ return rc;
+
+ fd = open(keypath, O_WRONLY);
+ if (fd < 0) {
+ DEBUG("Unable to open %s: %m", keypath);
+ rc = -ENOENT;
+ goto out;
+ }
+
+ DEBUG("Writing '%s' to '%s'", value, keypath);
+
+ rc = safewrite(fd, value, strlen(value));
+ if (rc < 0) {
+ DEBUG("Failed to write value '%s': %m", value);
+ rc = -errno;
+ goto out;
+ } else if (rc != strlen(value)) {
+ DEBUG("Short write of value '%s'", value);
+ rc = -ENOSPC;
+ goto out;
+ }
+
+ rc = 0;
+out:
+ free(keypath);
+ close(fd);
+
+ return rc;
+}
+
+int virCgroupSetValueU64(virCgroupPtr group,
+ const char *key,
+ uint64_t value)
+{
+ char *strval = NULL;
+ int rc;
+
+ if (asprintf(&strval, "%" PRIu64, value) == -1)
+ return -ENOMEM;
+
+ rc = virCgroupSetValueStr(group, key, strval);
+
+ free(strval);
+
+ return rc;
+}
+
+int virCgroupSetValueI64(virCgroupPtr group,
+ const char *key,
+ int64_t value)
+{
+ char *strval = NULL;
+ int rc;
+
+ if (asprintf(&strval, "%" PRIi64, value) == -1)
+ return -ENOMEM;
+
+ rc = virCgroupSetValueStr(group, key, strval);
+
+ free(strval);
+
+ return rc;
+}
+
+int virCgroupGetValueStr(virCgroupPtr group,
+ const char *key,
+ char **value)
+{
+ int fd = -1;
+ int rc;
+ char *keypath = NULL;
+ char buf[512];
+
+ memset(buf, 0, sizeof(buf));
+
+ rc = cgroup_path_of(group->path, key, &keypath);
+ if (rc != 0) {
+ DEBUG("No path of %s, %s", group->path, key);
+ return rc;
+ }
+
+ fd = open(keypath, O_RDONLY);
+ if (fd < 0) {
+ DEBUG("Unable to open %s: %m", keypath);
+ rc = -ENOENT;
+ goto out;
+ }
+
+ rc = saferead(fd, buf, sizeof(buf));
+ if (rc < 0) {
+ DEBUG("Failed to read %s: %m\n", keypath);
+ rc = -errno;
+ goto out;
+ } else if (rc == 0) {
+ DEBUG("Short read of %s\n", keypath);
+ rc = -EIO;
+ goto out;
+ }
+
+ *value = strdup(buf);
+ if (*value == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = 0;
+out:
+ free(keypath);
+ close(fd);
+
+ return rc;
+}
+
+int virCgroupGetValueU64(virCgroupPtr group,
+ const char *key,
+ uint64_t *value)
+{
+ char *strval = NULL;
+ int rc = 0;
+
+ rc = virCgroupGetValueStr(group, key, &strval);
+ if (rc != 0)
+ goto out;
+
+ if (sscanf(strval, "%" SCNu64, value) != 1)
+ rc = -EINVAL;
+out:
+ free(strval);
+
+ return rc;
+}
+
+int virCgroupGetValueI64(virCgroupPtr group,
+ const char *key,
+ int64_t *value)
+{
+ char *strval = NULL;
+ int rc = 0;
+
+ rc = virCgroupGetValueStr(group, key, &strval);
+ if (rc != 0)
+ goto out;
+
+ if (sscanf(strval, "%" SCNi64, value) != 1)
+ rc = -EINVAL;
+out:
+ free(strval);
+
+ return rc;
+}
+
+int virCgroupHasValue(virCgroupPtr group,
+ const char *key)
+{
+ int rc;
+ char *keypath = NULL;
+
+ rc = cgroup_path_of(group->path, key, &keypath);
+ if (rc < 0)
+ goto out;
+
+ if (access(keypath, F_OK) != 0)
+ rc = -ENOENT;
+out:
+ free(keypath);
+
+ return rc;
+}
+
+static int _cgroup_inherit(virCgroupPtr parent,
+ virCgroupPtr group,
+ const char *key)
+{
+ int rc;
+ char *keyval = NULL;
+
+ rc = virCgroupGetValueStr(parent, key, &keyval);
+ if (rc != 0)
+ goto out;
+
+ rc = virCgroupSetValueStr(group, key, keyval);
+
+out:
+ free(keyval);
+
+ return rc;
+}
+
+static int cgroup_inherit(virCgroupPtr parent,
+ virCgroupPtr group)
+{
+ int i;
+ int rc = 0;
+ const char *inherit_values[] = {
+ "cpuset.cpus",
+ "cpuset.mems",
+ NULL
+ };
+
+ for (i = 0; inherit_values[i] != NULL; i++) {
+ const char *key = inherit_values[i];
+ if (virCgroupHasValue(group, key) == 0)
+ rc = _cgroup_inherit(parent, group, key);
+
+ if (rc != 0) {
+ DEBUG("inherit of %s failed\n", key);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int cgroup_root(virCgroupPtr *root)
+{
+ int rc = 0;
+ char *grppath = NULL;
+ struct virCgroup _root = { (char *)"." };
+
+ *root = calloc(1, sizeof(**root));
+ if (*root == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ (*root)->path = strdup("libvirt");
+ if ((*root)->path == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = cgroup_path_of(".", (*root)->path, &grppath);
+ if (rc != 0)
+ goto out;
+
+ if (access(grppath, F_OK) != 0) {
+ if (mkdir(grppath, 0655) != 0)
+ rc = -errno;
+ else
+ rc = cgroup_inherit(&_root, *root);
+ }
+out:
+ if (rc != 0)
+ virCgroupFree(root);
+ free(grppath);
+
+ return rc;
+}
+
+static int cgroup_new(virCgroupPtr *parent,
+ const char *group,
+ virCgroupPtr *newgroup)
+{
+ int rc = 0;
+ char *typpath = NULL;
+
+ if (*parent == NULL) {
+ rc = cgroup_root(parent);
+ if (rc != 0)
+ goto err;
+ }
+
+ *newgroup = calloc(1, sizeof(**newgroup));
+ if (*newgroup == NULL) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = asprintf(&((*newgroup)->path),
+ "%s/%s",
+ (*parent)->path,
+ group);
+ if (rc == -1) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = 0;
+
+ return rc;
+err:
+ virCgroupFree(newgroup);
+ *newgroup = NULL;
+
+ free(typpath);
+
+ return rc;
+}
+
+int virCgroupOpen(virCgroupPtr parent,
+ const char *group,
+ virCgroupPtr *newgroup)
+{
+ int rc = 0;
+ char *grppath = NULL;
+
+ rc = cgroup_new(&parent, group, newgroup);
+ if (rc != 0)
+ goto err;
+ virCgroupFree(&parent);
+
+ rc = cgroup_path_of(".", (*newgroup)->path, &grppath);
+ if (rc != 0)
+ goto err;
+
+ if (access(grppath, F_OK) != 0) {
+ rc = -ENOENT;
+ goto err;
+ }
+
+ return rc;
+err:
+ virCgroupFree(newgroup);
+ *newgroup = NULL;
+
+ return rc;
+}
+
+int virCgroupCreate(virCgroupPtr parent,
+ const char *group,
+ virCgroupPtr *newgroup)
+{
+ int rc = 0;
+ char *grppath = NULL;
+ bool free_parent = (parent == NULL);
+
+ rc = cgroup_new(&parent, group, newgroup);
+ if (rc != 0) {
+ DEBUG0("Unable to allocate new virCgroup structure");
+ goto err;
+ }
+
+ rc = cgroup_path_of(".", (*newgroup)->path, &grppath);
+ if (rc != 0)
+ goto err;
+
+ if (access(grppath, F_OK) == 0) {
+ rc = -EEXIST;
+ goto err;
+ }
+
+ if (mkdir(grppath, 0655) != 0) {
+ DEBUG("mkdir of %s failed", grppath);
+ rc = -errno;
+ goto err;
+ }
+
+ rc = cgroup_inherit(parent, *newgroup);
+ if (rc != 0)
+ goto err;
+
+ free(grppath);
+
+ if (free_parent)
+ virCgroupFree(&parent);
+
+ return rc;
+err:
+ free(grppath);
+ virCgroupFree(newgroup);
+ *newgroup = NULL;
+
+ if (free_parent)
+ virCgroupFree(&parent);
+
+ return rc;
+}
+
+int virCgroupRemove(virCgroupPtr group)
+{
+ int rc = 0;
+ char *grppath = NULL;
+
+ rc = cgroup_path_of(".", group->path, &grppath);
+ if (rc != 0)
+ goto out;
+
+ if (rmdir(grppath) != 0)
+ rc = -errno;
+out:
+ free(grppath);
+
+ return rc;
+}
+
+int virCgroupAddTask(virCgroupPtr group, pid_t pid)
+{
+ int rc = 0;
+ char *pidstr = NULL;
+
+ if (asprintf(&pidstr, "%lu", (unsigned long)pid) == -1)
+ return -ENOMEM;
+
+ rc = virCgroupSetValueStr(group, "tasks", pidstr);
+
+ free(pidstr);
+
+ return rc;
+}
diff -r 444e2614d0a2 -r 8e948eb88328 src/cgroup.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cgroup.h Mon Sep 29 09:37:42 2008 -0700
@@ -0,0 +1,120 @@
+/*
+ * cgroup.h: Interface to tools for managing cgroups
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * See COPYING.LIB for the License of this software
+ *
+ * Authors:
+ * Dan Smith <danms(a)us.ibm.com>
+ */
+
+#ifndef CGROUP_H
+#define CGROUP_H
+
+#include <stdint.h>
+
+struct virCgroup;
+typedef struct virCgroup *virCgroupPtr;
+
+/**
+ * virCgroupHaveSupport:
+ *
+ * Returns 0 if support is present, negative if not
+ */
+int virCgroupHaveSupport(void);
+
+/**
+ * virCgroupCreate:
+ * @parent: The path to the parent of the desired new group, or NULL if root
+ * @group: The name of the new group
+ * @newgroup: The newly-created group. Must be passed to cgroup_free()
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupCreate(virCgroupPtr parent,
+ const char *group,
+ virCgroupPtr *newgroup);
+
+/**
+ * virCgroupOpen:
+ * @parent: The path to the parent of the desired new group, or NULL if root
+ * @group: The name of the group
+ * @newgroup: The group. Must be passed to cgroup_free()
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupOpen(virCgroupPtr parent,
+ const char *group,
+ virCgroupPtr *newgroup);
+
+/**
+ * virCgroupFree:
+ * @group: A pointer to the virCgroupPtr to be freed
+ */
+void virCgroupFree(virCgroupPtr *group);
+
+/**
+ * virCgroupRemove
+ * @parent: The path to the parent of the group to be deleted
+ * @group: The name of the group to be removed
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupRemove(virCgroupPtr group);
+
+/**
+ * virCgroupHasValue:
+ * @group: The scoping cgroup
+ * @key: The cgrop node to test for
+ *
+ * Returns 0 if present, negative if not
+ */
+int virCgroupHasValue(virCgroupPtr group, const char *key);
+
+/**
+ * virCgroupAddTask:
+ * @group: The scoping cgroup
+ * @pid: The pid of the task to add
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupAddTask(virCgroupPtr group, pid_t pid);
+
+/**
+ * virCgroupSetValueXXX:
+ * @group: The scoping cgroup
+ * @key: The cgroup node to set
+ * @value: The value to write into the @key node
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupSetValueStr(virCgroupPtr group,
+ const char *key,
+ const char *value);
+int virCgroupSetValueU64(virCgroupPtr group,
+ const char *key,
+ uint64_t value);
+int virCgroupSetValueI64(virCgroupPtr group,
+ const char *key,
+ int64_t value);
+
+/**
+ * virCgroupGetValueXXX:
+ * @group: The scoping cgroup
+ * @key: The cgroup node to read
+ * @value: A pointer to the storage area
+ *
+ * Returns 0 on success, negative on failure
+ */
+int virCgroupGetValueStr(virCgroupPtr group,
+ const char *key,
+ char **value);
+int virCgroupGetValueU64(virCgroupPtr group,
+ const char *key,
+ uint64_t *value);
+int virCgroupGetValueI64(virCgroupPtr group,
+ const char *key,
+ int64_t *value);
+
+#endif /* CGROUP_H */