On Mon, Sep 29, 2008 at 09:40:42AM -0700, Dan Smith wrote:
This patch adds src/cgroup.{c,h} with support for creating and
manipulating
cgroups. It's quite naive at the moment, but should provide something to
work with to move forward with resource controls.
All groups created with the internal API are forced under $mount/libvirt/
to keep everything together. The first time a group is created, the libvirt
directory is also created, and the settings from the root are inherited.
The code scans the mount table to look for the first mount of type cgroup,
and assumes that all controllers are mounted there. I think this
could/should be updated to prefer a mount with just the controller(s) we
want, if there are multiple ones.
If you have the cpuset controller enabled, and cpuset.cpus_exclusive is 1,
then all cgroups to be created will fail. Since we probably shouldn't
blindly set the root to be non-exclusive, we may also want to consider this
condition to be "no cgroup support".
My thought on the overall design of this internal API is that it is
too low level & pushing too much work to the caller. eg, the caller
would have to lookup virCGroupPtr objects for each controller it
cares about, and has to remember the tunables & their data types.
While this kind of interface is applicable for a general purpose
API, libvirt's needs are not general purpose. While LXC driver is
the only current user, as more controllers are added I anticipate
that QEMU driver might use cgroups, eg for I/O controls and CPU
schedular controls
As such I'd expect an API to be at a slightly higher level of
abstraction, strongly typed and a single cgroup object associated
with a domain object.
virDomainObjPtr dom = ...driver's internal domain object...
virCGroupPtr cg = virCGroupNew(dom);
# Set domain memory limit to 1 GB
virCGroupSetMemory(cg, 1024 * 1024 * 1024);
# Remove all device permissions
virCGroupDenyAllDevices(cg);
# Allow access to /dev/null device
virCGroupAllowDevice(cg, 3, 1)
Some of the existing APIs could be made static since they'd be
needed anyway, but some could be re-design to be tailored to
the semantics we want.
+static virCgroupPtr cgroup_get_mount(void)
+{
+ FILE *mounts;
+ struct mntent entry;
+ char buf[512];
+ virCgroupPtr root = NULL;
+
+ root = calloc(1, sizeof(*root));
+ if (root == NULL)
+ return NULL;
VIR_ALLOC() please
+
+ mounts = fopen("/proc/mounts", "r");
+ if (mounts == NULL) {
+ DEBUG0("Unable to open /proc/mounts: %m");
+ goto err;
+ }
+
+ while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
+ if (STREQ(entry.mnt_type, "cgroup")) {
+ root->path = strdup(entry.mnt_dir);
+ break;
+ }
+ }
+
+ if (root->path == NULL) {
+ DEBUG0("Did not find cgroup mount");
+ goto err;
+ }
+
+ fclose(mounts);
+
+ return root;
+err:
+ virCgroupFree(&root);
+
+ return NULL;
+}
+
+int virCgroupHaveSupport(void)
+{
+ virCgroupPtr root;
+
+ root = cgroup_get_mount();
+ if (root == NULL)
+ return -1;
+
+ virCgroupFree(&root);
+
+ return 0;
+}
+
+static int cgroup_path_of(const char *grppath,
+ const char *key,
+ char **path)
+{
+ virCgroupPtr root;
+ int rc = 0;
+
+ root = cgroup_get_mount();
+ if (root == NULL) {
+ rc = -ENOTDIR;
+ goto out;
+ }
+
+ if (asprintf(path, "%s/%s/%s", root->path, grppath, key) == -1)
+ rc = -ENOMEM;
+out:
+ virCgroupFree(&root);
+
+ return rc;
+}
+
+int virCgroupSetValueStr(virCgroupPtr group,
+ const char *key,
+ const char *value)
+{
+ int fd = -1;
+ int rc = 0;
+ char *keypath = NULL;
+
+ rc = cgroup_path_of(group->path, key, &keypath);
+ if (rc != 0)
+ return rc;
+
+ fd = open(keypath, O_WRONLY);
+ if (fd < 0) {
+ DEBUG("Unable to open %s: %m", keypath);
+ rc = -ENOENT;
+ goto out;
+ }
+
+ DEBUG("Writing '%s' to '%s'", value, keypath);
+
+ rc = safewrite(fd, value, strlen(value));
+ if (rc < 0) {
+ DEBUG("Failed to write value '%s': %m", value);
+ rc = -errno;
+ goto out;
+ } else if (rc != strlen(value)) {
+ DEBUG("Short write of value '%s'", value);
+ rc = -ENOSPC;
+ goto out;
+ }
+
+ rc = 0;
+out:
+ free(keypath);
+ close(fd);
+
+ return rc;
+}
+
+int virCgroupSetValueU64(virCgroupPtr group,
+ const char *key,
+ uint64_t value)
+{
+ char *strval = NULL;
+ int rc;
+
+ if (asprintf(&strval, "%" PRIu64, value) == -1)
+ return -ENOMEM;
+
+ rc = virCgroupSetValueStr(group, key, strval);
+
+ free(strval);
Not much difference in this case, but VIR_FREE() is
the standard API for this, since we like to wrap all
alloc related APIs for consistency. There's a few other
example of this through the patch, but I won't mention
them in this review.
+ *root = calloc(1, sizeof(**root));
VIR_ALLOC..
+ if (*root == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ (*root)->path = strdup("libvirt");
+ if ((*root)->path == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = cgroup_path_of(".", (*root)->path, &grppath);
+ if (rc != 0)
+ goto out;
+
+ if (access(grppath, F_OK) != 0) {
+ if (mkdir(grppath, 0655) != 0)
+ rc = -errno;
+ else
+ rc = cgroup_inherit(&_root, *root);
+ }
+out:
+ if (rc != 0)
+ virCgroupFree(root);
+ free(grppath);
+
+ return rc;
+}
Daniel
--
|: Red Hat, Engineering, London -o-
http://people.redhat.com/berrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org -o-
http://ovirt.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|