[libvirt] [PATCH 0/7] Support cgroups in QEMU driver

This series of patches introduces cgroups support to the QEMU driver. At this time it uses the 'devices' controller to whitelist block devices for QEMU guests to prevent unauthorized access. It uses the 'cpu_shares' controller to allow schedular tunables on a per guest basis. In the future we should use the 'memory' controller to enforce the limit set in the balloon driver ie if the guest does not honour the balloon request, then force the guest into swap, avoiding host overcommit. There is also some refactoring of the cgroups code to remove the assumption that libvirtd is starting in the root cgroup, remove the requirement that all controllers be active, and allow for use in non-privileged drivers. Daniel P. Berrange (7): Use enums for cgroup controller types / labels Use virFileReadAll/virFileWriteStr for key cgroup read/write helpers Make cgroups a little more efficient Place every QEMU guest in a private cgroup Implement schedular tunables API using cgroups Use cgroups for block device whitelisting in QEMU guests Refactor cgroups to allow a group per driver to be managed directly src/cgroup.c | 861 ++++++++++++++++++++++++++-------------------- src/cgroup.h | 28 +- src/libvirt_private.syms | 1 + src/lxc_conf.h | 2 + src/lxc_controller.c | 19 +- src/lxc_driver.c | 25 +- src/qemu_conf.h | 2 + src/qemu_driver.c | 355 ++++++++++++++++++- src/util.c | 27 ++- src/util.h | 2 + 10 files changed, 904 insertions(+), 420 deletions(-)

--- src/cgroup.c | 46 +++++++++++++++++++++++++++++----------------- 1 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 50517e2..4955298 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -31,15 +31,21 @@ #define CGROUP_MAX_VAL 512 -struct virCgroup { - char *path; +enum { + VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_MEMORY, + VIR_CGROUP_CONTROLLER_DEVICES, + VIR_CGROUP_CONTROLLER_CPUSET, + + VIR_CGROUP_CONTROLLER_LAST }; -const char *supported_controllers[] = { - "memory", - "devices", - "cpuacct", - NULL +VIR_ENUM_DECL(virCgroupController); +VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, + "cpuacct", "memory", "devices", "cpuset"); + +struct virCgroup { + char *path; }; /** @@ -107,8 +113,9 @@ int virCgroupHaveSupport(void) virCgroupPtr root; int i; - for (i = 0; supported_controllers[i] != NULL; i++) { - root = virCgroupGetMount(supported_controllers[i]); + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); + root = virCgroupGetMount(label); if (root == NULL) return -1; virCgroupFree(&root); @@ -419,15 +426,16 @@ static int virCgroupMakeGroup(const char *name) int i; int rc = 0; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); char *path = NULL; virCgroupPtr root; - root = virCgroupGetMount(supported_controllers[i]); + root = virCgroupGetMount(label); if (root == NULL) continue; - rc = virCgroupPathOfGroup(name, supported_controllers[i], &path); + rc = virCgroupPathOfGroup(name, label, &path); if (rc != 0) { virCgroupFree(&root); break; @@ -521,6 +529,7 @@ static int virCgroupOpen(virCgroupPtr parent, virCgroupPtr *newgroup) { int rc = 0; + const char *label = virCgroupControllerTypeToString(0); char *grppath = NULL; bool free_parent = (parent == NULL); @@ -532,7 +541,7 @@ static int virCgroupOpen(virCgroupPtr parent, virCgroupFree(&parent); rc = virCgroupPathOfGroup((*newgroup)->path, - supported_controllers[0], + label, &grppath); if (rc != 0) goto err; @@ -594,9 +603,10 @@ int virCgroupRemove(virCgroupPtr group) int i; char *grppath = NULL; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); if (virCgroupPathOfGroup(group->path, - supported_controllers[i], + label, &grppath) != 0) continue; @@ -626,9 +636,11 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) char *taskpath = NULL; char *pidstr = NULL; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); + rc = virCgroupPathOfGroup(group->path, - supported_controllers[i], + label, &grppath); if (rc != 0) goto done; -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:23AM -0400, Daniel P. Berrange wrote:
--- src/cgroup.c | 46 +++++++++++++++++++++++++++++----------------- 1 files changed, 29 insertions(+), 17 deletions(-)
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

--- src/cgroup.c | 87 ++++++++++++++++++---------------------------------------- 1 files changed, 27 insertions(+), 60 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 4955298..aecdf47 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -178,7 +178,6 @@ static int virCgroupSetValueStr(virCgroupPtr group, const char *key, const char *value) { - int fd = -1; int rc = 0; char *keypath = NULL; @@ -186,47 +185,16 @@ static int virCgroupSetValueStr(virCgroupPtr group, if (rc != 0) return rc; - fd = open(keypath, O_WRONLY); - if (fd < 0) { - DEBUG("Unable to open %s: %m", keypath); - rc = -ENOENT; - goto out; - } - - DEBUG("Writing '%s' to '%s'", value, keypath); - - rc = safewrite(fd, value, strlen(value)); + VIR_DEBUG("Set value %s", keypath); + rc = virFileWriteStr(keypath, value); if (rc < 0) { DEBUG("Failed to write value '%s': %m", value); rc = -errno; - goto out; - } else if (rc != strlen(value)) { - DEBUG("Short write of value '%s'", value); - rc = -ENOSPC; - goto out; + } else { + rc = 0; } - rc = 0; -out: VIR_FREE(keypath); - close(fd); - - return rc; -} - -static int virCgroupSetValueU64(virCgroupPtr group, - const char *key, - uint64_t value) -{ - char *strval = NULL; - int rc; - - if (virAsprintf(&strval, "%" PRIu64, value) == -1) - return -ENOMEM; - - rc = virCgroupSetValueStr(group, key, strval); - - VIR_FREE(strval); return rc; } @@ -235,12 +203,10 @@ static int virCgroupGetValueStr(virCgroupPtr group, const char *key, char **value) { - int fd = -1; int rc; char *keypath = NULL; - char buf[CGROUP_MAX_VAL]; - memset(buf, 0, sizeof(buf)); + *value = NULL; rc = virCgroupPathOf(group->path, key, &keypath); if (rc != 0) { @@ -248,38 +214,39 @@ static int virCgroupGetValueStr(virCgroupPtr group, return rc; } - fd = open(keypath, O_RDONLY); - if (fd < 0) { - DEBUG("Unable to open %s: %m", keypath); - rc = -ENOENT; - goto out; - } + VIR_DEBUG("Get value %s", keypath); - rc = saferead(fd, buf, sizeof(buf)); + rc = virFileReadAll(keypath, 1024, value); if (rc < 0) { DEBUG("Failed to read %s: %m\n", keypath); rc = -errno; - goto out; - } else if (rc == 0) { - DEBUG("Short read of %s\n", keypath); - rc = -EIO; - goto out; - } - - *value = strdup(buf); - if (*value == NULL) { - rc = -ENOMEM; - goto out; + } else { + rc = 0; } - rc = 0; -out: VIR_FREE(keypath); - close(fd); return rc; } +static int virCgroupSetValueU64(virCgroupPtr group, + const char *key, + uint64_t value) +{ + char *strval = NULL; + int rc; + + if (virAsprintf(&strval, "%" PRIu64, value) == -1) + return -ENOMEM; + + rc = virCgroupSetValueStr(group, key, strval); + + VIR_FREE(strval); + + return rc; +} + + #if 0 /* This is included for completeness, but not yet used */ -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:24AM -0400, Daniel P. Berrange wrote:
--- src/cgroup.c | 87 ++++++++++++++++++----------------------------------------
Simple, ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/cgroup.c: Detect the mount location of every controller at time a virCgroupPtr is created. Detect current process' placement within group to avoid assuming it is in the root. Pass controller ID into SetValueStr/GetValueStr to enable much duplicated code to be eliminated --- src/cgroup.c | 616 ++++++++++++++++++++++++++++++---------------------------- 1 files changed, 317 insertions(+), 299 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index aecdf47..d9c3141 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -32,20 +32,29 @@ #define CGROUP_MAX_VAL 512 enum { + VIR_CGROUP_CONTROLLER_CPU, VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_CPUSET, VIR_CGROUP_CONTROLLER_MEMORY, VIR_CGROUP_CONTROLLER_DEVICES, - VIR_CGROUP_CONTROLLER_CPUSET, VIR_CGROUP_CONTROLLER_LAST }; VIR_ENUM_DECL(virCgroupController); VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, - "cpuacct", "memory", "devices", "cpuset"); + "cpu", "cpuacct", "cpuset", "memory", "devices"); + +struct virCgroupController { + int type; + char *mountPoint; + char *placement; +}; struct virCgroup { char *path; + + struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST]; }; /** @@ -55,133 +64,218 @@ struct virCgroup { */ void virCgroupFree(virCgroupPtr *group) { - if (*group != NULL) { - VIR_FREE((*group)->path); - VIR_FREE(*group); + int i; + + if (*group == NULL) + return; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + VIR_FREE((*group)->controllers[i].mountPoint); + VIR_FREE((*group)->controllers[i].placement); } + + VIR_FREE((*group)->path); + VIR_FREE(*group); } -static virCgroupPtr virCgroupGetMount(const char *controller) + +/* + * Process /proc/mounts figuring out what controllers are + * mounted and where + */ +static int virCgroupDetectMounts(virCgroupPtr group) { + int i; FILE *mounts = NULL; struct mntent entry; char buf[CGROUP_MAX_VAL]; - virCgroupPtr root = NULL; - - if (VIR_ALLOC(root) != 0) - return NULL; mounts = fopen("/proc/mounts", "r"); if (mounts == NULL) { - DEBUG0("Unable to open /proc/mounts: %m"); - goto err; + VIR_ERROR0("Unable to open /proc/mounts"); + return -ENOENT; } while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup") && - (strstr(entry.mnt_opts, controller))) { - root->path = strdup(entry.mnt_dir); - break; - } - } - - DEBUG("Mount for %s is %s\n", controller, root->path); + if (STRNEQ(entry.mnt_type, "cgroup")) + continue; - if (root->path == NULL) { - DEBUG0("Did not find cgroup mount"); - goto err; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = entry.mnt_opts; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) + goto no_memory; + tmp = next; + } + } } fclose(mounts); - return root; -err: - if (mounts != NULL) - fclose(mounts); - virCgroupFree(&root); + return 0; - return NULL; +no_memory: + if (mounts) + fclose(mounts); + return -ENOMEM; } -/** - * virCgroupHaveSupport: - * - * Returns 0 if support is present, negative if not + +/* + * Process /proc/self/cgroup figuring out what cgroup + * sub-path the current process is assigned to. ie not + * neccessarily in the root */ -int virCgroupHaveSupport(void) +static int virCgroupDetectPlacement(virCgroupPtr group) { - virCgroupPtr root; int i; + FILE *mapping = NULL; + char line[1024]; - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - root = virCgroupGetMount(label); - if (root == NULL) - return -1; - virCgroupFree(&root); + mapping = fopen("/proc/self/cgroup", "r"); + if (mapping == NULL) { + VIR_ERROR0("Unable to open /proc/self/cgroup"); + return -ENOENT; } + while (fgets(line, sizeof(line), mapping) != NULL) { + char *controllers = strchr(line, ':'); + char *path = controllers ? strchr(controllers+1, ':') : NULL; + char *nl = path ? strchr(path, '\n') : NULL; + + if (!controllers || !path) + continue; + + if (nl) + *nl = '\0'; + + *path = '\0'; + controllers++; + path++; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = controllers; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path))) + goto no_memory; + + tmp = next; + } + } + } + + fclose(mapping); + return 0; + +no_memory: + return -ENOMEM; + } -static int virCgroupPathOfGroup(const char *group, - const char *controller, - char **path) +static int virCgroupDetect(virCgroupPtr group) { - virCgroupPtr root = NULL; - int rc = 0; + int any = 0; + int rc; + int i; - root = virCgroupGetMount(controller); - if (root == NULL) { - rc = -ENOTDIR; - goto out; + rc = virCgroupDetectMounts(group); + if (rc < 0) { + VIR_ERROR("Failed to detect mounts for %s", group->path); + return rc; } - if (virAsprintf(path, "%s/%s", root->path, group) == -1) - rc = -ENOMEM; -out: - virCgroupFree(&root); + /* Check that at least 1 controller is available */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (group->controllers[i].mountPoint != NULL) + any = 1; + } + if (!any) + return -ENXIO; - return rc; -} -static int virCgroupPathOf(const char *grppath, - const char *key, - char **path) -{ - virCgroupPtr root; - int rc = 0; - char *controller = NULL; + rc = virCgroupDetectPlacement(group); - if (strchr(key, '.') == NULL) - return -EINVAL; + if (rc == 0) { + /* Check that for every mounted controller, we found our placement */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (!group->controllers[i].mountPoint) + continue; - if (sscanf(key, "%a[^.]", &controller) != 1) - return -EINVAL; + if (!group->controllers[i].placement) { + VIR_ERROR("Could not find placement for controller %s at %s", + virCgroupControllerTypeToString(i), + group->controllers[i].placement); + rc = -ENOENT; + break; + } - root = virCgroupGetMount(controller); - if (root == NULL) { - rc = -ENOTDIR; - goto out; + VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i, + virCgroupControllerTypeToString(i), + group->controllers[i].mountPoint, + group->controllers[i].placement); + } + } else { + VIR_ERROR("Failed to detect mapping for %s", group->path); } - if (virAsprintf(path, "%s/%s/%s", root->path, grppath, key) == -1) - rc = -ENOMEM; -out: - virCgroupFree(&root); - VIR_FREE(controller); - return rc; } + +static int virCgroupPathOfController(virCgroupPtr group, + int controller, + const char *key, + char **path) +{ + if (group->controllers[controller].mountPoint == NULL) + return -ENOENT; + + if (group->controllers[controller].placement == NULL) + return -ENOENT; + + if (virAsprintf(path, "%s%s%s/%s", + group->controllers[controller].mountPoint, + group->controllers[controller].placement, + STREQ(group->path, "/") ? "" : group->path, + key ? key : "") == -1) + return -ENOMEM; + + return 0; +} + + static int virCgroupSetValueStr(virCgroupPtr group, + int controller, const char *key, const char *value) { int rc = 0; char *keypath = NULL; - rc = virCgroupPathOf(group->path, key, &keypath); + rc = virCgroupPathOfController(group, controller, key, &keypath); if (rc != 0) return rc; @@ -200,6 +294,7 @@ static int virCgroupSetValueStr(virCgroupPtr group, } static int virCgroupGetValueStr(virCgroupPtr group, + int controller, const char *key, char **value) { @@ -208,7 +303,7 @@ static int virCgroupGetValueStr(virCgroupPtr group, *value = NULL; - rc = virCgroupPathOf(group->path, key, &keypath); + rc = virCgroupPathOfController(group, controller, key, &keypath); if (rc != 0) { DEBUG("No path of %s, %s", group->path, key); return rc; @@ -230,6 +325,7 @@ static int virCgroupGetValueStr(virCgroupPtr group, } static int virCgroupSetValueU64(virCgroupPtr group, + int controller, const char *key, uint64_t value) { @@ -239,7 +335,7 @@ static int virCgroupSetValueU64(virCgroupPtr group, if (virAsprintf(&strval, "%" PRIu64, value) == -1) return -ENOMEM; - rc = virCgroupSetValueStr(group, key, strval); + rc = virCgroupSetValueStr(group, controller, key, strval); VIR_FREE(strval); @@ -251,6 +347,7 @@ static int virCgroupSetValueU64(virCgroupPtr group, /* This is included for completeness, but not yet used */ static int virCgroupSetValueI64(virCgroupPtr group, + int controller, const char *key, int64_t value) { @@ -260,7 +357,7 @@ static int virCgroupSetValueI64(virCgroupPtr group, if (virAsprintf(&strval, "%" PRIi64, value) == -1) return -ENOMEM; - rc = virCgroupSetValueStr(group, key, strval); + rc = virCgroupSetValueStr(group, controller, key, strval); VIR_FREE(strval); @@ -268,13 +365,14 @@ static int virCgroupSetValueI64(virCgroupPtr group, } static int virCgroupGetValueI64(virCgroupPtr group, + int controller, const char *key, int64_t *value) { char *strval = NULL; int rc = 0; - rc = virCgroupGetValueStr(group, key, &strval); + rc = virCgroupGetValueStr(group, controller, key, &strval); if (rc != 0) goto out; @@ -288,13 +386,14 @@ out: #endif static int virCgroupGetValueU64(virCgroupPtr group, + int controller, const char *key, uint64_t *value) { char *strval = NULL; int rc = 0; - rc = virCgroupGetValueStr(group, key, &strval); + rc = virCgroupGetValueStr(group, controller, key, &strval); if (rc != 0) goto out; @@ -306,81 +405,38 @@ out: return rc; } -static int _virCgroupInherit(const char *path, - const char *key) -{ - int rc = 0; - int fd = -1; - char buf[CGROUP_MAX_VAL]; - char *keypath = NULL; - char *pkeypath = NULL; - - memset(buf, 0, sizeof(buf)); - - if (virAsprintf(&keypath, "%s/%s", path, key) == -1) { - rc = -ENOMEM; - goto out; - } - - if (access(keypath, F_OK) != 0) { - DEBUG("Group %s has no key %s\n", path, key); - goto out; - } - - if (virAsprintf(&pkeypath, "%s/../%s", path, key) == -1) { - rc = -ENOMEM; - VIR_FREE(keypath); - goto out; - } - - fd = open(pkeypath, O_RDONLY); - if (fd < 0) { - rc = -errno; - goto out; - } - - if (saferead(fd, buf, sizeof(buf)) <= 0) { - rc = -errno; - goto out; - } - - close(fd); - fd = open(keypath, O_WRONLY); - if (fd < 0) { - rc = -errno; - goto out; - } - - if (safewrite(fd, buf, strlen(buf)) != strlen(buf)) { - rc = -errno; - goto out; - } - -out: - VIR_FREE(keypath); - VIR_FREE(pkeypath); - close(fd); - - return rc; -} - -static int virCgroupInherit(const char *grppath) +static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) { int i; int rc = 0; const char *inherit_values[] = { "cpuset.cpus", "cpuset.mems", - NULL }; - for (i = 0; inherit_values[i] != NULL; i++) { - const char *key = inherit_values[i]; + VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); + for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) { + char *value; - rc = _virCgroupInherit(grppath, key); + rc = virCgroupGetValueStr(parent, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + &value); if (rc != 0) { - DEBUG("inherit of %s failed\n", key); + VIR_ERROR("Failed to get %s %d", inherit_values[i], rc); + break; + } + + VIR_DEBUG("Inherit %s = %s", inherit_values[i], value); + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + value); + + if (rc != 0) { + VIR_ERROR("Failed to set %s %d", inherit_values[i], rc); break; } } @@ -388,35 +444,37 @@ static int virCgroupInherit(const char *grppath) return rc; } -static int virCgroupMakeGroup(const char *name) +static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group) { int i; int rc = 0; + VIR_DEBUG("Make group %s", group->path); for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); char *path = NULL; - virCgroupPtr root; - root = virCgroupGetMount(label); - if (root == NULL) + /* Skip over controllers that aren't mounted */ + if (!group->controllers[i].mountPoint) continue; - rc = virCgroupPathOfGroup(name, label, &path); - if (rc != 0) { - virCgroupFree(&root); - break; - } - - virCgroupFree(&root); + rc = virCgroupPathOfController(group, i, "", &path); + if (rc < 0) + return rc; + VIR_DEBUG("Make controller %s", path); if (access(path, F_OK) != 0) { if (mkdir(path, 0755) < 0) { rc = -errno; VIR_FREE(path); break; } - virCgroupInherit(path); + if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL && + (i == VIR_CGROUP_CONTROLLER_CPUSET || + STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) { + rc = virCgroupCpuSetInherit(parent, group); + if (rc != 0) + break; + } } VIR_FREE(path); @@ -425,134 +483,107 @@ static int virCgroupMakeGroup(const char *name) return rc; } -static int virCgroupRoot(virCgroupPtr *root) + +static int virCgroupRoot(virCgroupPtr *group); +/** + * virCgroupHaveSupport: + * + * Returns 0 if support is present, negative if not + */ +int virCgroupHaveSupport(void) { - int rc = 0; - char *grppath = NULL; + virCgroupPtr root; + int i; + int rc; + int any = 0; - if (VIR_ALLOC((*root)) != 0) { - rc = -ENOMEM; - goto out; - } + rc = virCgroupRoot(&root); + if (rc < 0) + return rc; - (*root)->path = strdup("libvirt"); - if ((*root)->path == NULL) { - rc = -ENOMEM; - goto out; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (root->controllers[i].mountPoint != NULL) + any = 1; } - rc = virCgroupMakeGroup((*root)->path); -out: - if (rc != 0) - virCgroupFree(root); - VIR_FREE(grppath); + virCgroupFree(&root); - return rc; + if (any) + return 0; + + return -ENOENT; } -static int virCgroupNew(virCgroupPtr *parent, - const char *group, - virCgroupPtr *newgroup) +static int virCgroupNew(const char *path, + virCgroupPtr *group) { int rc = 0; char *typpath = NULL; - *newgroup = NULL; - - if (*parent == NULL) { - rc = virCgroupRoot(parent); - if (rc != 0) - goto err; - } + VIR_DEBUG("New group %s", path); + *group = NULL; - if (VIR_ALLOC((*newgroup)) != 0) { + if (VIR_ALLOC((*group)) != 0) { rc = -ENOMEM; goto err; } - rc = virAsprintf(&((*newgroup)->path), - "%s/%s", - (*parent)->path, - group); - if (rc == -1) { + if (!((*group)->path = strdup(path))) { rc = -ENOMEM; goto err; } - rc = 0; + rc = virCgroupDetect(*group); + if (rc < 0) + goto err; return rc; err: - virCgroupFree(newgroup); - *newgroup = NULL; + virCgroupFree(group); + *group = NULL; VIR_FREE(typpath); return rc; } -static int virCgroupOpen(virCgroupPtr parent, - const char *group, - virCgroupPtr *newgroup) +static int virCgroupRoot(virCgroupPtr *group) { - int rc = 0; - const char *label = virCgroupControllerTypeToString(0); - char *grppath = NULL; - bool free_parent = (parent == NULL); - - rc = virCgroupNew(&parent, group, newgroup); - if (rc != 0) - goto err; - - if (free_parent) - virCgroupFree(&parent); - - rc = virCgroupPathOfGroup((*newgroup)->path, - label, - &grppath); - if (rc != 0) - goto err; - - if (access(grppath, F_OK) != 0) { - rc = -ENOENT; - goto err; - } - - return rc; -err: - virCgroupFree(newgroup); - *newgroup = NULL; - - return rc; + return virCgroupNew("/", group); } static int virCgroupCreate(virCgroupPtr parent, - const char *group, - virCgroupPtr *newgroup) + const char *name, + virCgroupPtr *group) { int rc = 0; - bool free_parent = (parent == NULL); + char *path; + + VIR_DEBUG("Creating %s under %s", name, parent->path); - rc = virCgroupNew(&parent, group, newgroup); + rc = virAsprintf(&path, "%s/%s", + STREQ(parent->path, "/") ? + "" : parent->path, + name); + if (rc < 0) { + rc = -ENOMEM; + goto err; + } + + rc = virCgroupNew(path, group); if (rc != 0) { DEBUG0("Unable to allocate new virCgroup structure"); goto err; } - rc = virCgroupMakeGroup((*newgroup)->path); + rc = virCgroupMakeGroup(parent, *group); if (rc != 0) goto err; - if (free_parent) - virCgroupFree(&parent); - return rc; err: - virCgroupFree(newgroup); - *newgroup = NULL; - - if (free_parent) - virCgroupFree(&parent); + virCgroupFree(group); + *group = NULL; return rc; } @@ -571,15 +602,20 @@ int virCgroupRemove(virCgroupPtr group) char *grppath = NULL; for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - if (virCgroupPathOfGroup(group->path, - label, - &grppath) != 0) + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) continue; - if (rmdir(grppath) != 0) - rc = -errno; + if (virCgroupPathOfController(group, + i, + NULL, + &grppath) != 0) + continue; + DEBUG("Removing cgroup %s", grppath); + if (rmdir(grppath) != 0 && errno != ENOENT) { + rc = -errno; + } VIR_FREE(grppath); } @@ -597,48 +633,14 @@ int virCgroupRemove(virCgroupPtr group) int virCgroupAddTask(virCgroupPtr group, pid_t pid) { int rc = 0; - int fd = -1; int i; - char *grppath = NULL; - char *taskpath = NULL; - char *pidstr = NULL; for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - - rc = virCgroupPathOfGroup(group->path, - label, - &grppath); - if (rc != 0) - goto done; - - if (virAsprintf(&taskpath, "%s/tasks", grppath) == -1) { - rc = -ENOMEM; - goto done; - } - - fd = open(taskpath, O_WRONLY); - if (fd < 0) { - rc = -errno; - goto done; - } - - if (virAsprintf(&pidstr, "%lu", (unsigned long)pid) == -1) { - rc = -ENOMEM; - goto done; - } - - if (safewrite(fd, pidstr, strlen(pidstr)) <= 0) { - rc = -errno; - goto done; - } - - done: - VIR_FREE(grppath); - VIR_FREE(taskpath); - VIR_FREE(pidstr); - close(fd); + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) + continue; + rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid); if (rc != 0) break; } @@ -660,21 +662,27 @@ int virCgroupForDomain(virDomainDefPtr def, virCgroupPtr *group) { int rc; + virCgroupPtr rootgrp = NULL; + virCgroupPtr daemongrp = NULL; virCgroupPtr typegrp = NULL; - rc = virCgroupOpen(NULL, driverName, &typegrp); - if (rc == -ENOENT) { - rc = virCgroupCreate(NULL, driverName, &typegrp); - if (rc != 0) - goto out; - } else if (rc != 0) + rc = virCgroupRoot(&rootgrp); + if (rc != 0) goto out; - rc = virCgroupOpen(typegrp, def->name, group); - if (rc == -ENOENT) - rc = virCgroupCreate(typegrp, def->name, group); + rc = virCgroupCreate(rootgrp, "libvirt", &daemongrp); + if (rc != 0) + goto out; + + rc = virCgroupCreate(daemongrp, driverName, &typegrp); + if (rc != 0) + goto out; + + rc = virCgroupCreate(typegrp, def->name, group); out: virCgroupFree(&typegrp); + virCgroupFree(&daemongrp); + virCgroupFree(&rootgrp); return rc; } @@ -690,6 +698,7 @@ out: int virCgroupSetMemory(virCgroupPtr group, unsigned long kb) { return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, "memory.limit_in_bytes", kb << 10); } @@ -704,8 +713,9 @@ int virCgroupSetMemory(virCgroupPtr group, unsigned long kb) int virCgroupDenyAllDevices(virCgroupPtr group) { return virCgroupSetValueStr(group, - "devices.deny", - "a"); + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + "a"); } /** @@ -732,6 +742,7 @@ int virCgroupAllowDevice(virCgroupPtr group, } rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, "devices.allow", devstr); out: @@ -762,6 +773,7 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, } rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, "devices.allow", devstr); out: @@ -772,15 +784,21 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares) { - return virCgroupSetValueU64(group, "cpu.shares", (uint64_t)shares); + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", (uint64_t)shares); } int virCgroupGetCpuShares(virCgroupPtr group, unsigned long *shares) { - return virCgroupGetValueU64(group, "cpu.shares", (uint64_t *)shares); + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", (uint64_t *)shares); } int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) { - return virCgroupGetValueU64(group, "cpuacct.usage", (uint64_t *)usage); + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPUACCT, + "cpuacct.usage", (uint64_t *)usage); } -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:25AM -0400, Daniel P. Berrange wrote:
* src/cgroup.c: Detect the mount location of every controller at time a virCgroupPtr is created. Detect current process' placement within group to avoid assuming it is in the root. Pass controller ID into SetValueStr/GetValueStr to enable much duplicated code to be eliminated [...] +/* + * Process /proc/mounts figuring out what controllers are + * mounted and where + */ +static int virCgroupDetectMounts(virCgroupPtr group) { + int i; FILE *mounts = NULL; struct mntent entry; char buf[CGROUP_MAX_VAL]; - virCgroupPtr root = NULL; - - if (VIR_ALLOC(root) != 0) - return NULL;
mounts = fopen("/proc/mounts", "r"); if (mounts == NULL) { - DEBUG0("Unable to open /proc/mounts: %m"); - goto err; + VIR_ERROR0("Unable to open /proc/mounts"); + return -ENOENT; }
while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup") && - (strstr(entry.mnt_opts, controller))) { - root->path = strdup(entry.mnt_dir); - break; - } - } - - DEBUG("Mount for %s is %s\n", controller, root->path);
Hum, we should keep that debug output in some ways, and report if not found either.
+ if (STRNEQ(entry.mnt_type, "cgroup")) + continue;
- if (root->path == NULL) { - DEBUG0("Did not find cgroup mount"); - goto err; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = entry.mnt_opts; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) + goto no_memory; + tmp = next; + } + } }
fclose(mounts);
- return root; -err: - if (mounts != NULL) - fclose(mounts); - virCgroupFree(&root); + return 0;
- return NULL; +no_memory: + if (mounts) + fclose(mounts); + return -ENOMEM; } [...] + while (fgets(line, sizeof(line), mapping) != NULL) { + char *controllers = strchr(line, ':'); + char *path = controllers ? strchr(controllers+1, ':') : NULL; + char *nl = path ? strchr(path, '\n') : NULL; + + if (!controllers || !path) + continue;
Why not continue immediately instead of ? : constructs ? [...]
- if (sscanf(key, "%a[^.]", &controller) != 1) - return -EINVAL;
somehow I'm happy to see this go away :-) I'm not really following the intend of the patch, I need to spend some time learning cgroups, but it seems to me the code is quite cleaner after than before, so ACK, but hopefully someone more versed in cgroups can double-check. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Fri, Jul 17, 2009 at 04:28:51PM +0200, Daniel Veillard wrote:
On Fri, Jul 17, 2009 at 09:04:25AM -0400, Daniel P. Berrange wrote:
mounts = fopen("/proc/mounts", "r"); if (mounts == NULL) { - DEBUG0("Unable to open /proc/mounts: %m"); - goto err; + VIR_ERROR0("Unable to open /proc/mounts"); + return -ENOENT; }
while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup") && - (strstr(entry.mnt_opts, controller))) { - root->path = strdup(entry.mnt_dir); - break; - } - } - - DEBUG("Mount for %s is %s\n", controller, root->path);
Hum, we should keep that debug output in some ways, and report if not found either.
You missed the code later on which does this :-) + if (!group->controllers[i].placement) { + VIR_ERROR("Could not find placement for controller %s at %s", + virCgroupControllerTypeToString(i), + group->controllers[i].placement); + rc = -ENOENT; + break; + } + VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i, + virCgroupControllerTypeToString(i), + group->controllers[i].mountPoint, + group->controllers[i].placement); Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

* src/qemu_driver.c: Place guest in cgroup upon startup. Remove cgroup upon shutdown --- src/qemu_driver.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 98 insertions(+), 7 deletions(-) diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 00dc6e5..b336fef 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -66,6 +66,7 @@ #include "node_device_conf.h" #include "pci.h" #include "security.h" +#include "cgroup.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -1377,6 +1378,82 @@ error: return -1; } +static int qemuSetupCgroup(virConnectPtr conn, + struct qemud_driver *driver ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup = NULL; + int rc; + + if (virCgroupHaveSupport() != 0) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(vm->def, "qemu", &cgroup); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to create cgroup for %s"), vm->def->name); + goto cleanup; + } + + virCgroupFree(&cgroup); + return 0; + +cleanup: + if (cgroup) { + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + } + return -1; +} + + +static int qemuRemoveCgroup(virConnectPtr conn, + struct qemud_driver *driver ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup; + + if (virCgroupHaveSupport() != 0) + return 0; /* Not supported, so claim success */ + + if (virCgroupForDomain(vm->def, "qemu", &cgroup) != 0) { + qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Unable to find cgroup for %s\n"), + vm->def->name); + return -1; + } + + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + return 0; +} + +static int qemuAddToCgroup(virDomainDefPtr def) +{ + virCgroupPtr cgroup = NULL; + int rc; + + if (virCgroupHaveSupport() != 0) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(def, "qemu", &cgroup); + if (rc != 0) { + virReportSystemError(NULL, -rc, _("unable to find cgroup for domain %s"), def->name); + return -1; + } + + rc = virCgroupAddTask(cgroup, getpid()); + if (rc != 0) { + virReportSystemError(NULL, -rc, _("unable to add domain %s task %d to cgroup"), def->name, getpid()); + virCgroupFree(&cgroup); + return -1; + } + + virCgroupFree(&cgroup); + return 0; +} + + static int qemudDomainSetSecurityLabel(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { if (vm->def->seclabel.label != NULL) @@ -1588,14 +1665,17 @@ static int qemuDomainSetAllDeviceOwnership(virConnectPtr conn, static virDomainPtr qemudDomainLookupByName(virConnectPtr conn, const char *name); -struct gemudHookData { - virConnectPtr conn; - virDomainObjPtr vm; - struct qemud_driver *driver; +struct qemudHookData { + virConnectPtr conn; + virDomainObjPtr vm; + struct qemud_driver *driver; }; static int qemudSecurityHook(void *data) { - struct gemudHookData *h = (struct gemudHookData *) data; + struct qemudHookData *h = data; + + if (qemuAddToCgroup(h->vm->def) < 0) + return -1; if (qemudDomainSetSecurityLabel(h->conn, h->driver, h->vm) < 0) { qemudReportError(h->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -1668,7 +1748,7 @@ static int qemudStartVMDaemon(virConnectPtr conn, char *pidfile = NULL; int logfile; - struct gemudHookData hookData; + struct qemudHookData hookData; hookData.conn = conn; hookData.vm = vm; hookData.driver = driver; @@ -1689,6 +1769,9 @@ static int qemudStartVMDaemon(virConnectPtr conn, driver->securityDriver->domainGenSecurityLabel(conn, vm) < 0) return -1; + /* Ensure no historical cgroup for this VM is lieing around bogus settings */ + qemuRemoveCgroup(conn, driver, vm); + if ((vm->def->ngraphics == 1) && vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && vm->def->graphics[0]->data.vnc.autoport) { @@ -1729,6 +1812,9 @@ static int qemudStartVMDaemon(virConnectPtr conn, &qemuCmdFlags) < 0) goto cleanup; + if (qemuSetupCgroup(conn, driver, vm) < 0) + goto cleanup; + if (qemuPrepareHostDevices(conn, vm->def) < 0) goto cleanup; @@ -1855,6 +1941,7 @@ cleanup: VIR_FREE(vm->def->seclabel.label); VIR_FREE(vm->def->seclabel.imagelabel); } + qemuRemoveCgroup(conn, driver, vm); if ((vm->def->ngraphics == 1) && vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && vm->def->graphics[0]->data.vnc.autoport) @@ -1866,7 +1953,7 @@ cleanup: } -static void qemudShutdownVMDaemon(virConnectPtr conn ATTRIBUTE_UNUSED, +static void qemudShutdownVMDaemon(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { int ret; @@ -1909,6 +1996,10 @@ static void qemudShutdownVMDaemon(virConnectPtr conn ATTRIBUTE_UNUSED, VIR_WARN("Failed to restore all device ownership for %s", vm->def->name); + if (qemuRemoveCgroup(conn, driver, vm) < 0) + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + if (qemudRemoveDomainStatus(conn, driver, vm) < 0) { VIR_WARN(_("Failed to remove domain status for %s"), vm->def->name); -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:26AM -0400, Daniel P. Berrange wrote:
* src/qemu_driver.c: Place guest in cgroup upon startup. Remove cgroup upon shutdown --- src/qemu_driver.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++---- [...]
+ virReportSystemError(conn, -rc, + _("Unable to create cgroup for %s"), vm->def->name);
hum, there is a missing line split here, I think I saw one similar in patch 3 but it was less obvious
+static int qemuAddToCgroup(virDomainDefPtr def) +{ + virCgroupPtr cgroup = NULL; + int rc; + + if (virCgroupHaveSupport() != 0) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(def, "qemu", &cgroup); + if (rc != 0) { + virReportSystemError(NULL, -rc, _("unable to find cgroup for domain %s"), def->name); + return -1; + } + + rc = virCgroupAddTask(cgroup, getpid()); + if (rc != 0) { + virReportSystemError(NULL, -rc, _("unable to add domain %s task %d to cgroup"), def->name, getpid()); + virCgroupFree(&cgroup); + return -1; + }
A couple more, and I would define itn ret = 0; and factorize the cleanup on exit
+ + virCgroupFree(&cgroup); + return 0; +} + + [...]
-struct gemudHookData { - virConnectPtr conn; - virDomainObjPtr vm; - struct qemud_driver *driver; +struct qemudHookData { + virConnectPtr conn; + virDomainObjPtr vm; + struct qemud_driver *driver; };
took me a while to spot that gemu turned in qemu Again not a cgroup expert but this looks okay to me, ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters --- src/qemu_driver.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 135 insertions(+), 3 deletions(-) diff --git a/src/qemu_driver.c b/src/qemu_driver.c index b336fef..f6d3f52 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -4965,6 +4965,138 @@ cleanup: return ret; } + +static char *qemuGetSchedulerType(virDomainPtr dom, + int *nparams) +{ + char *ret; + + if (virCgroupHaveSupport() != 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return NULL; + } + + if (nparams) + *nparams = 1; + + ret = strdup("posix"); + if (!ret) + virReportOOMError(dom->conn); + return ret; +} + +static int qemuSetSchedulerParameters(virDomainPtr dom, + virSchedParameterPtr params, + int nparams) +{ + struct qemud_driver *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + int ret = -1; + + if (virCgroupHaveSupport() != 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return -1; + } + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + qemuDriverUnlock(driver); + + if (vm == NULL) { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if (virCgroupForDomain(vm->def, "qemu", &group) != 0) + goto cleanup; + + for (i = 0; i < nparams; i++) { + virSchedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, "cpu_shares")) { + int rc; + rc = virCgroupSetCpuShares(group, params[i].value.ui); + if (rc != 0) { + virReportSystemError(dom->conn, -rc, "%s", + _("unable to set cpu shares tunable")); + goto cleanup; + } + } else { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INVALID_ARG, + _("Invalid parameter `%s'"), param->field); + goto cleanup; + } + } + ret = 0; + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + return ret; +} + +static int qemuGetSchedulerParameters(virDomainPtr dom, + virSchedParameterPtr params, + int *nparams) +{ + struct qemud_driver *driver = dom->conn->privateData; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + unsigned long val; + int ret = -1; + int rc; + + if (virCgroupHaveSupport() != 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return -1; + } + + if ((*nparams) != 1) { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INVALID_ARG, + "%s", _("Invalid parameter count")); + return -1; + } + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + qemuDriverUnlock(driver); + + if (vm == NULL) { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if (virCgroupForDomain(vm->def, "qemu", &group) != 0) + goto cleanup; + + rc = virCgroupGetCpuShares(group, &val); + if (rc != 0) { + virReportSystemError(dom->conn, -rc, "%s", + _("unable to get cpu shares tunable")); + goto cleanup; + } + params[0].value.ul = val; + strncpy(params[0].field, "cpu_shares", sizeof(params[0].field)); + params[0].type = VIR_DOMAIN_SCHED_FIELD_ULLONG; + + ret = 0; + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + return ret; +} + + /* This uses the 'info blockstats' monitor command which was * integrated into both qemu & kvm in late 2007. If the command is * not supported we detect this and return the appropriate error. @@ -5933,9 +6065,9 @@ static virDriver qemuDriver = { qemudDomainDetachDevice, /* domainDetachDevice */ qemudDomainGetAutostart, /* domainGetAutostart */ qemudDomainSetAutostart, /* domainSetAutostart */ - NULL, /* domainGetSchedulerType */ - NULL, /* domainGetSchedulerParameters */ - NULL, /* domainSetSchedulerParameters */ + qemuGetSchedulerType, /* domainGetSchedulerType */ + qemuGetSchedulerParameters, /* domainGetSchedulerParameters */ + qemuSetSchedulerParameters, /* domainSetSchedulerParameters */ NULL, /* domainMigratePrepare (v1) */ qemudDomainMigratePerform, /* domainMigratePerform */ NULL, /* domainMigrateFinish */ -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:27AM -0400, Daniel P. Berrange wrote:
* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters [...] + for (i = 0; i < nparams; i++) { + virSchedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, "cpu_shares")) {
Hum, the type should be checked first as a safety since this comes directly from the user and those SchedParameter are really error prone. The name "cpu_shares" need to be documented too. Once type is checked on input, ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/qemu_driver.c: Set a restrictive block device whitelist for all QEMU guests. Update whitelist when hotplugging disks. * src/cgroup.h, src/cgroup.c: Add some more convenience methods for dealing with block device whitelists. --- src/cgroup.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/cgroup.h | 12 ++++++ src/qemu_driver.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 2 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index d9c3141..21e0dd4 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -782,6 +782,104 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, return rc; } +int virCgroupAllowDevicePath(virCgroupPtr group, + const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupAllowDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} + +/** + * virCgroupDenyDevice: + * + * @group: The cgroup to deny a device for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device + * @minor: The minor number of the device + * + * Returns: 0 on success + */ +int virCgroupDenyDevice(virCgroupPtr group, + char type, + int major, + int minor) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:%i rwm", type, major, minor) == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); +out: + VIR_FREE(devstr); + + return rc; +} + +/** + * virCgroupDenyDeviceMajor: + * + * @group: The cgroup to deny an entire device major type for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device type + * + * Returns: 0 on success + */ +int virCgroupDenyDeviceMajor(virCgroupPtr group, + char type, + int major) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:* rwm", type, major) == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); + out: + VIR_FREE(devstr); + + return rc; +} + +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupDenyDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} + int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares) { return virCgroupSetValueU64(group, diff --git a/src/cgroup.h b/src/cgroup.h index 11c44f9..89bd4a1 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -38,6 +38,18 @@ int virCgroupAllowDevice(virCgroupPtr group, int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major); +int virCgroupAllowDevicePath(virCgroupPtr group, + const char *path); + +int virCgroupDenyDevice(virCgroupPtr group, + char type, + int major, + int minor); +int virCgroupDenyDeviceMajor(virCgroupPtr group, + char type, + int major); +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path); int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares); int virCgroupGetCpuShares(virCgroupPtr group, unsigned long *shares); diff --git a/src/qemu_driver.c b/src/qemu_driver.c index f6d3f52..33e9cfa 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -1378,12 +1378,19 @@ error: return -1; } +static const char *const devs[] = { + "/dev/null", "/dev/full", "/dev/zero", + "/dev/random", "/dev/urandom", + "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +}; + static int qemuSetupCgroup(virConnectPtr conn, struct qemud_driver *driver ATTRIBUTE_UNUSED, virDomainObjPtr vm) { virCgroupPtr cgroup = NULL; int rc; + unsigned int i; if (virCgroupHaveSupport() != 0) return 0; /* Not supported, so claim success */ @@ -1395,6 +1402,54 @@ static int qemuSetupCgroup(virConnectPtr conn, goto cleanup; } + rc = virCgroupDenyAllDevices(cgroup); + if (rc != 0) { + if (rc == -EPERM) { + VIR_WARN0("Group devices ACL is not accessible, disabling whitelisting"); + goto done; + } + + virReportSystemError(conn, -rc, + _("Unable to deny all devices for %s"), vm->def->name); + goto cleanup; + } + + for (i = 0; i < vm->def->ndisks ; i++) { + if (vm->def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK || + vm->def->disks[i]->src == NULL) + continue; + + rc = virCgroupAllowDevicePath(cgroup, + vm->def->disks[i]->src); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to allow device %s for %s"), + vm->def->disks[i]->src, vm->def->name); + goto cleanup; + } + } + + rc = virCgroupAllowDeviceMajor(cgroup, 'c', 136); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("unable to allow device %s"), + devs[i]); + goto cleanup; + } + + for (i = 0; i < ARRAY_CARDINALITY(devs) ; i++) { + rc = virCgroupAllowDevicePath(cgroup, + devs[i]); + if (rc < 0 && + rc != -ENOENT) { + virReportSystemError(conn, -rc, + _("unable to allow device %s"), + devs[i]); + goto cleanup; + } + } + +done: virCgroupFree(&cgroup); return 0; @@ -4637,6 +4692,7 @@ static int qemudDomainAttachDevice(virDomainPtr dom, struct qemud_driver *driver = dom->conn->privateData; virDomainObjPtr vm; virDomainDeviceDefPtr dev = NULL; + virCgroupPtr cgroup = NULL; int ret = -1; qemuDriverLock(driver); @@ -4662,6 +4718,26 @@ static int qemudDomainAttachDevice(virDomainPtr dom, if (dev->type == VIR_DOMAIN_DEVICE_DISK) { + if (virCgroupHaveSupport() == 0) { + if (virCgroupForDomain(vm->def, "qemu", &cgroup) !=0 ) { + qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Unable to create cgroup for %s\n"), + vm->def->name); + goto cleanup; + } + if (dev->data.disk->src != NULL && + virCgroupAllowDevicePath(cgroup, + dev->data.disk->src) < 0) { + qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("unable to allow device %s"), + dev->data.disk->src); + goto cleanup; + } + } + + if (driver->securityDriver) + driver->securityDriver->domainSetSecurityImageLabel(dom->conn, vm, dev->data.disk); + switch (dev->data.disk->device) { case VIR_DOMAIN_DISK_DEVICE_CDROM: case VIR_DOMAIN_DISK_DEVICE_FLOPPY: @@ -4690,7 +4766,7 @@ static int qemudDomainAttachDevice(virDomainPtr dom, qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, _("disk bus '%s' cannot be hotplugged."), virDomainDiskBusTypeToString(dev->data.disk->bus)); - goto cleanup; + /* fallthrough */ } break; @@ -4698,7 +4774,11 @@ static int qemudDomainAttachDevice(virDomainPtr dom, qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, _("disk device type '%s' cannot be hotplugged"), virDomainDiskDeviceTypeToString(dev->data.disk->device)); - goto cleanup; + /* Fallthrough */ + } + if (ret != 0) { + virCgroupDenyDevicePath(cgroup, + dev->data.disk->src); } } else if (dev->type == VIR_DOMAIN_DEVICE_HOSTDEV && dev->data.hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && @@ -4718,6 +4798,9 @@ static int qemudDomainAttachDevice(virDomainPtr dom, ret = -1; cleanup: + if (cgroup) + virCgroupFree(&cgroup); + if (ret < 0) { if (qemuDomainSetDeviceOwnership(dom->conn, driver, dev, 1) < 0) VIR_WARN0("Fail to restore disk device ownership"); -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:28AM -0400, Daniel P. Berrange wrote:
* src/qemu_driver.c: Set a restrictive block device whitelist for all QEMU guests. Update whitelist when hotplugging disks. * src/cgroup.h, src/cgroup.c: Add some more convenience methods for dealing with block device whitelists.
+}
Worth adding a description for this function too, especially the return value which is not trivial from reading the code.
+ +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupDenyDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} [...] diff --git a/src/qemu_driver.c b/src/qemu_driver.c index f6d3f52..33e9cfa 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -1378,12 +1378,19 @@ error: return -1; }
+static const char *const devs[] = { + "/dev/null", "/dev/full", "/dev/zero", + "/dev/random", "/dev/urandom", + "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +};
Hum, that list sounds a bit arbitrary, this could break for random reasons maybe this should be extended through the configuration, I assume a mismatch may result in domain failing to start or operate properly, right ? [...]
+ + rc = virCgroupAllowDeviceMajor(cgroup, 'c', 136);
Hum, that's a magic number, can we get a meaningful #define The idea sounds good but I'm a bit afraid of the inflexibility, this has the potential of making qemu/kvm far more fragile without a way to fix this by patching and recompiling. Again I'm not a cgroup expert but I feel a bit uneasy, can we get at least an option to disable it at runtime in the configuration (sorry if I missed that !) ? Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Fri, Jul 17, 2009 at 05:15:15PM +0200, Daniel Veillard wrote:
+ +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupDenyDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} [...] diff --git a/src/qemu_driver.c b/src/qemu_driver.c index f6d3f52..33e9cfa 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -1378,12 +1378,19 @@ error: return -1; }
+static const char *const devs[] = { + "/dev/null", "/dev/full", "/dev/zero", + "/dev/random", "/dev/urandom", + "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +};
Hum, that list sounds a bit arbitrary, this could break for random reasons maybe this should be extended through the configuration, I assume a mismatch may result in domain failing to start or operate properly, right ?
Yes, QEMU will get -EPERM if it attempts to access a block device we've not permitted, and hopefully exit. Looking at it again, I've missed a couple of devices I should have allowed. I'll check the QEMU source to match up the list properly
[...]
+ + rc = virCgroupAllowDeviceMajor(cgroup, 'c', 136);
Hum, that's a magic number, can we get a meaningful #define
The idea sounds good but I'm a bit afraid of the inflexibility, this has the potential of making qemu/kvm far more fragile without a way to fix this by patching and recompiling.
Perhaps we should make it a config in /etc/libvirt/qemu.conf too
Again I'm not a cgroup expert but I feel a bit uneasy, can we get at least an option to disable it at runtime in the configuration (sorry if I missed that !) ?
Well the simplest way to disable it at runtime, is to simply not mount the cgroups device ACL controller on the host. If this is not mounted, then libvirt will just skip this functionality. NB, no cgroups stuff is enabled by default on any current distro I'm aware of. Arguably this should be changed, but that's not libvirt's problem Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On Mon, Jul 20, 2009 at 11:48:45AM +0100, Daniel P. Berrange wrote:
On Fri, Jul 17, 2009 at 05:15:15PM +0200, Daniel Veillard wrote:
+static const char *const devs[] = { + "/dev/null", "/dev/full", "/dev/zero", + "/dev/random", "/dev/urandom", + "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +};
Hum, that list sounds a bit arbitrary, this could break for random reasons maybe this should be extended through the configuration, I assume a mismatch may result in domain failing to start or operate properly, right ?
Yes, QEMU will get -EPERM if it attempts to access a block device we've not permitted, and hopefully exit.
Looking at it again, I've missed a couple of devices I should have allowed. I'll check the QEMU source to match up the list properly
okay :-)
The idea sounds good but I'm a bit afraid of the inflexibility, this has the potential of making qemu/kvm far more fragile without a way to fix this by patching and recompiling.
Perhaps we should make it a config in /etc/libvirt/qemu.conf too
yes, agreed.
Again I'm not a cgroup expert but I feel a bit uneasy, can we get at least an option to disable it at runtime in the configuration (sorry if I missed that !) ?
Well the simplest way to disable it at runtime, is to simply not mount the cgroups device ACL controller on the host. If this is not mounted, then libvirt will just skip this functionality.
NB, no cgroups stuff is enabled by default on any current distro I'm aware of. Arguably this should be changed, but that's not libvirt's problem
Well, yes and no :-) First if there are uses, it's more likely that the service will be activated, and on the other hand we should try to avoid too much problems for our users when used as the guinea pig for the feature, IMHO the simplest is to make it optional into libvirt, but actiavted by default if present. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Allow the driver level cgroup to be managed explicitly by the hypervisor drivers, in order to detect whether to enable or disable cgroup support for domains. Provides better error reporting of failures. Also allow for creation of cgroups for unprivileged drivers if controller is accessible by the user. * src/cgroup.c, src/cgroup.h * src/lxc_conf.h, src/lxc_controller.c, src/lxc_driver.c, src/qemu_conf.h, src/qemu_driver.c: Update for change in API * src/util.c, src/util.h, src/libvirt_private.syms: Add a virGetUserName() helper --- src/cgroup.c | 162 ++++++++++++++++++++++++---------------------- src/cgroup.h | 16 ++-- src/libvirt_private.syms | 1 + src/lxc_conf.h | 2 + src/lxc_controller.c | 19 ++++-- src/lxc_driver.c | 25 +++++--- src/qemu_conf.h | 2 + src/qemu_driver.c | 81 +++++++++++++++-------- src/util.c | 27 +++++++- src/util.h | 2 + 10 files changed, 204 insertions(+), 133 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 21e0dd4..6f66c96 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -484,36 +484,6 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group) } -static int virCgroupRoot(virCgroupPtr *group); -/** - * virCgroupHaveSupport: - * - * Returns 0 if support is present, negative if not - */ -int virCgroupHaveSupport(void) -{ - virCgroupPtr root; - int i; - int rc; - int any = 0; - - rc = virCgroupRoot(&root); - if (rc < 0) - return rc; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (root->controllers[i].mountPoint != NULL) - any = 1; - } - - virCgroupFree(&root); - - if (any) - return 0; - - return -ENOENT; -} - static int virCgroupNew(const char *path, virCgroupPtr *group) { @@ -547,47 +517,47 @@ err: return rc; } -static int virCgroupRoot(virCgroupPtr *group) -{ - return virCgroupNew("/", group); -} - -static int virCgroupCreate(virCgroupPtr parent, - const char *name, - virCgroupPtr *group) +static int virCgroupAppRoot(int privileged, + virCgroupPtr *group) { - int rc = 0; - char *path; + virCgroupPtr rootgrp = NULL; + int rc; - VIR_DEBUG("Creating %s under %s", name, parent->path); + rc = virCgroupNew("/", &rootgrp); + if (rc != 0) + return rc; - rc = virAsprintf(&path, "%s/%s", - STREQ(parent->path, "/") ? - "" : parent->path, - name); - if (rc < 0) { - rc = -ENOMEM; - goto err; - } + if (privileged) { + rc = virCgroupNew("/libvirt", group); + } else { + char *rootname; + char *username; + username = virGetUserName(NULL, getuid()); + if (!username) { + rc = -ENOMEM; + goto cleanup; + } + rc = virAsprintf(&rootname, "/libvirt-%s", username); + VIR_FREE(username); + if (rc < 0) { + rc = -ENOMEM; + goto cleanup; + } - rc = virCgroupNew(path, group); - if (rc != 0) { - DEBUG0("Unable to allocate new virCgroup structure"); - goto err; + rc = virCgroupNew(rootname, group); + VIR_FREE(rootname); } - - rc = virCgroupMakeGroup(parent, *group); if (rc != 0) - goto err; + goto cleanup; - return rc; -err: - virCgroupFree(group); - *group = NULL; + rc = virCgroupMakeGroup(rootgrp, *group); +cleanup: + virCgroupFree(&rootgrp); return rc; } + /** * virCgroupRemove: * @@ -648,45 +618,83 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) return rc; } + /** - * virCgroupForDomain: + * virCgroupForDriver: * - * @def: Domain definition to create cgroup for - * @driverName: Classification of this domain type (e.g., xen, qemu, lxc) + * @name: name of this driver (e.g., xen, qemu, lxc) * @group: Pointer to returned virCgroupPtr * * Returns 0 on success */ -int virCgroupForDomain(virDomainDefPtr def, - const char *driverName, - virCgroupPtr *group) +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + int privileged, + int create) { int rc; + char *path = NULL; virCgroupPtr rootgrp = NULL; - virCgroupPtr daemongrp = NULL; - virCgroupPtr typegrp = NULL; - rc = virCgroupRoot(&rootgrp); + rc = virCgroupAppRoot(privileged, &rootgrp); if (rc != 0) goto out; - rc = virCgroupCreate(rootgrp, "libvirt", &daemongrp); - if (rc != 0) + if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) { + rc = -ENOMEM; goto out; + } - rc = virCgroupCreate(daemongrp, driverName, &typegrp); - if (rc != 0) - goto out; + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0 && + create) { + rc = virCgroupMakeGroup(rootgrp, *group); + if (rc != 0) + virCgroupFree(group); + } - rc = virCgroupCreate(typegrp, def->name, group); out: - virCgroupFree(&typegrp); - virCgroupFree(&daemongrp); virCgroupFree(&rootgrp); return rc; } + +/** + * virCgroupForDomain: + * + * @driver: group for driver owning the domain + * @name: name of the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + int create) +{ + int rc; + char *path; + + if (virAsprintf(&path, "%s/%s", driver->path, name) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0 && + create) { + rc = virCgroupMakeGroup(driver, *group); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} + /** * virCgroupSetMemory: * diff --git a/src/cgroup.h b/src/cgroup.h index 89bd4a1..907f2e8 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -12,18 +12,18 @@ #ifndef CGROUP_H #define CGROUP_H -#include <stdint.h> - struct virCgroup; typedef struct virCgroup *virCgroupPtr; -#include "domain_conf.h" - -int virCgroupHaveSupport(void); +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + int privileged, + int create); -int virCgroupForDomain(virDomainDefPtr def, - const char *driverName, - virCgroupPtr *group); +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + int create); int virCgroupAddTask(virCgroupPtr group, pid_t pid); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 59c78d5..8435ff6 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -378,6 +378,7 @@ virRun; virSkipSpaces; virKillProcess; virGetUserDirectory; +virGetUserName; virGetUserID; virGetGroupID; diff --git a/src/lxc_conf.h b/src/lxc_conf.h index 30a51e1..24aadf7 100644 --- a/src/lxc_conf.h +++ b/src/lxc_conf.h @@ -30,6 +30,7 @@ #include "domain_conf.h" #include "capabilities.h" #include "threads.h" +#include "cgroup.h" #define LXC_CONFIG_DIR SYSCONF_DIR "/libvirt/lxc" #define LXC_STATE_DIR LOCAL_STATE_DIR "/run/libvirt/lxc" @@ -41,6 +42,7 @@ struct __lxc_driver { virCapsPtr caps; + virCgroupPtr cgroup; virDomainObjList domains; char *configDir; char *autostartDir; diff --git a/src/lxc_controller.c b/src/lxc_controller.c index 08fbbe4..9ad48a7 100644 --- a/src/lxc_controller.c +++ b/src/lxc_controller.c @@ -48,7 +48,6 @@ #include "veth.h" #include "memory.h" #include "util.h" -#include "cgroup.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -69,6 +68,7 @@ struct cgroup_device_policy { */ static int lxcSetContainerResources(virDomainDefPtr def) { + virCgroupPtr driver; virCgroupPtr cgroup; int rc = -1; int i; @@ -82,14 +82,18 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; - if (virCgroupHaveSupport() != 0) - return 0; /* Not supported, so claim success */ + rc = virCgroupForDriver("lxc", &driver, 1, 0); + if (rc != 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, "%s", + _("Unable to get cgroup for driver")); + return rc; + } - rc = virCgroupForDomain(def, "lxc", &cgroup); + rc = virCgroupForDomain(driver, def->name, &cgroup, 1); if (rc != 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("Unable to create cgroup for %s\n"), def->name); - return rc; + _("Unable to create cgroup for domain %s"), def->name); + goto cleanup; } rc = virCgroupSetMemory(cgroup, def->maxmem); @@ -119,9 +123,10 @@ out: if (rc != 0) { virReportSystemError(NULL, -rc, "%s", _("Failed to set lxc resources")); - virCgroupRemove(cgroup); } +cleanup: + virCgroupFree(&driver); virCgroupFree(&cgroup); return rc; diff --git a/src/lxc_driver.c b/src/lxc_driver.c index 3503573..ff35845 100644 --- a/src/lxc_driver.c +++ b/src/lxc_driver.c @@ -46,7 +46,6 @@ #include "bridge.h" #include "veth.h" #include "event.h" -#include "cgroup.h" #include "nodeinfo.h" @@ -393,10 +392,10 @@ static int lxcDomainGetInfo(virDomainPtr dom, info->state = vm->state; - if (!virDomainIsActive(vm) || virCgroupHaveSupport() != 0) { + if (!virDomainIsActive(vm) || driver->cgroup == NULL) { info->cpuTime = 0; } else { - if (virCgroupForDomain(vm->def, "lxc", &cgroup) != 0) { + if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) { lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR, _("Unable to get cgroup for %s\n"), vm->def->name); goto cleanup; @@ -527,7 +526,8 @@ static int lxcVMCleanup(virConnectPtr conn, vethDelete(vm->def->nets[i]->ifname); } - if (virCgroupForDomain(vm->def, "lxc", &cgroup) == 0) { + if (driver->cgroup && + virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) == 0) { virCgroupRemove(cgroup); virCgroupFree(&cgroup); } @@ -1145,6 +1145,7 @@ static int lxcStartup(int privileged) { unsigned int i; char *ld; + int rc; /* Valgrind gets very annoyed when we clone containers, so * disable LXC when under valgrind @@ -1174,6 +1175,13 @@ static int lxcStartup(int privileged) lxc_driver->have_netns = lxcCheckNetNsSupport(); + rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1); + if (rc < 0) { + char buf[1024]; + VIR_WARN("Unable to create cgroup for driver: %s", + virStrerror(-rc, buf, sizeof(buf))); + } + /* Call function to load lxc driver configuration information */ if (lxcLoadDriverConfig(lxc_driver) < 0) goto cleanup; @@ -1193,7 +1201,6 @@ static int lxcStartup(int privileged) virDomainObjPtr vm = lxc_driver->domains.objs[i]; char *config = NULL; virDomainDefPtr tmp; - int rc; virDomainObjLock(vm); if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) { @@ -1330,7 +1337,7 @@ static int lxcSetSchedulerParameters(virDomainPtr domain, virDomainObjPtr vm = NULL; int ret = -1; - if (virCgroupHaveSupport() != 0) + if (driver->cgroup == NULL) return -1; lxcDriverLock(driver); @@ -1343,7 +1350,7 @@ static int lxcSetSchedulerParameters(virDomainPtr domain, goto cleanup; } - if (virCgroupForDomain(vm->def, "lxc", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; for (i = 0; i < nparams; i++) { @@ -1377,7 +1384,7 @@ static int lxcGetSchedulerParameters(virDomainPtr domain, unsigned long val; int ret = -1; - if (virCgroupHaveSupport() != 0) + if (driver->cgroup == NULL) return -1; if ((*nparams) != 1) { @@ -1396,7 +1403,7 @@ static int lxcGetSchedulerParameters(virDomainPtr domain, goto cleanup; } - if (virCgroupForDomain(vm->def, "lxc", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; if (virCgroupGetCpuShares(group, &val) != 0) diff --git a/src/qemu_conf.h b/src/qemu_conf.h index fbf2ab9..2bcaa0e 100644 --- a/src/qemu_conf.h +++ b/src/qemu_conf.h @@ -34,6 +34,7 @@ #include "domain_event.h" #include "threads.h" #include "security.h" +#include "cgroup.h" #define qemudDebug(fmt, ...) do {} while(0) @@ -72,6 +73,7 @@ struct qemud_driver { unsigned int qemuVersion; int nextvmid; + virCgroupPtr cgroup; virDomainObjList domains; brControl *brctl; diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 33e9cfa..96e5ef2 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -419,6 +419,7 @@ static int qemudStartup(int privileged) { char *base = NULL; char driverConf[PATH_MAX]; + int rc; if (VIR_ALLOC(qemu_driver) < 0) return -1; @@ -434,6 +435,13 @@ qemudStartup(int privileged) { /* Don't have a dom0 so start from 1 */ qemu_driver->nextvmid = 1; + rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1); + if (rc < 0) { + char buf[1024]; + VIR_WARN("Unable to create cgroup for driver: %s", + virStrerror(-rc, buf, sizeof(buf))); + } + /* Init callback list */ if(VIR_ALLOC(qemu_driver->domainEventCallbacks) < 0) goto out_of_memory; @@ -629,6 +637,7 @@ qemudShutdown(void) { virCapabilitiesFree(qemu_driver->caps); virDomainObjListFree(&qemu_driver->domains); + virCgroupFree(&qemu_driver->cgroup); VIR_FREE(qemu_driver->securityDriverName); VIR_FREE(qemu_driver->logDir); @@ -1385,17 +1394,17 @@ static const char *const devs[] = { }; static int qemuSetupCgroup(virConnectPtr conn, - struct qemud_driver *driver ATTRIBUTE_UNUSED, + struct qemud_driver *driver, virDomainObjPtr vm) { virCgroupPtr cgroup = NULL; int rc; unsigned int i; - if (virCgroupHaveSupport() != 0) - return 0; /* Not supported, so claim success */ + if (!driver->cgroup) + return 0; - rc = virCgroupForDomain(vm->def, "qemu", &cgroup); + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 1); if (rc != 0) { virReportSystemError(conn, -rc, _("Unable to create cgroup for %s"), vm->def->name); @@ -1463,35 +1472,38 @@ cleanup: static int qemuRemoveCgroup(virConnectPtr conn, - struct qemud_driver *driver ATTRIBUTE_UNUSED, + struct qemud_driver *driver, virDomainObjPtr vm) { virCgroupPtr cgroup; - - if (virCgroupHaveSupport() != 0) + int rc; + DEBUG("Remove %p", vm); + if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ - if (virCgroupForDomain(vm->def, "qemu", &cgroup) != 0) { - qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("Unable to find cgroup for %s\n"), - vm->def->name); + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to find cgroup for %s\n"), + vm->def->name); return -1; } - virCgroupRemove(cgroup); + rc = virCgroupRemove(cgroup); virCgroupFree(&cgroup); - return 0; + return rc; } -static int qemuAddToCgroup(virDomainDefPtr def) +static int qemuAddToCgroup(struct qemud_driver *driver, + virDomainDefPtr def) { virCgroupPtr cgroup = NULL; int rc; - if (virCgroupHaveSupport() != 0) + if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ - rc = virCgroupForDomain(def, "qemu", &cgroup); + rc = virCgroupForDomain(driver->cgroup, def->name, &cgroup, 0); if (rc != 0) { virReportSystemError(NULL, -rc, _("unable to find cgroup for domain %s"), def->name); return -1; @@ -1729,7 +1741,7 @@ struct qemudHookData { static int qemudSecurityHook(void *data) { struct qemudHookData *h = data; - if (qemuAddToCgroup(h->vm->def) < 0) + if (qemuAddToCgroup(h->driver, h->vm->def) < 0) return -1; if (qemudDomainSetSecurityLabel(h->conn, h->driver, h->vm) < 0) { @@ -2011,7 +2023,7 @@ cleanup: static void qemudShutdownVMDaemon(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { - int ret; + int ret, retries = 0; if (!virDomainIsActive(vm)) return; @@ -2051,9 +2063,18 @@ static void qemudShutdownVMDaemon(virConnectPtr conn, VIR_WARN("Failed to restore all device ownership for %s", vm->def->name); - if (qemuRemoveCgroup(conn, driver, vm) < 0) - VIR_WARN("Failed to remove cgroup for %s", - vm->def->name); +retry: + ret = qemuRemoveCgroup(conn, driver, vm); + if (ret != 0) { + if (ret == -EBUSY && retries < 5) { + retries++; + usleep(1000*200); + goto retry; + } else { + VIR_WARN("Failed to remove cgroup for %s: %d", + vm->def->name, -ret); + } + } if (qemudRemoveDomainStatus(conn, driver, vm) < 0) { VIR_WARN(_("Failed to remove domain status for %s"), @@ -4718,14 +4739,15 @@ static int qemudDomainAttachDevice(virDomainPtr dom, if (dev->type == VIR_DOMAIN_DEVICE_DISK) { - if (virCgroupHaveSupport() == 0) { - if (virCgroupForDomain(vm->def, "qemu", &cgroup) !=0 ) { + if (driver->cgroup) { + if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) !=0 ) { qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("Unable to create cgroup for %s\n"), + _("Unable to find cgroup for %s\n"), vm->def->name); goto cleanup; } if (dev->data.disk->src != NULL && + dev->data.disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && virCgroupAllowDevicePath(cgroup, dev->data.disk->src) < 0) { qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -5052,9 +5074,10 @@ cleanup: static char *qemuGetSchedulerType(virDomainPtr dom, int *nparams) { + struct qemud_driver *driver = dom->conn->privateData; char *ret; - if (virCgroupHaveSupport() != 0) { + if (driver->cgroup == NULL) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return NULL; @@ -5079,7 +5102,7 @@ static int qemuSetSchedulerParameters(virDomainPtr dom, virDomainObjPtr vm = NULL; int ret = -1; - if (virCgroupHaveSupport() != 0) { + if (driver->cgroup == NULL) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return -1; @@ -5095,7 +5118,7 @@ static int qemuSetSchedulerParameters(virDomainPtr dom, goto cleanup; } - if (virCgroupForDomain(vm->def, "qemu", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; for (i = 0; i < nparams; i++) { @@ -5135,7 +5158,7 @@ static int qemuGetSchedulerParameters(virDomainPtr dom, int ret = -1; int rc; - if (virCgroupHaveSupport() != 0) { + if (driver->cgroup == NULL) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return -1; @@ -5157,7 +5180,7 @@ static int qemuGetSchedulerParameters(virDomainPtr dom, goto cleanup; } - if (virCgroupForDomain(vm->def, "qemu", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; rc = virCgroupGetCpuShares(group, &val); diff --git a/src/util.c b/src/util.c index 2420f8b..5261714 100644 --- a/src/util.c +++ b/src/util.c @@ -1826,8 +1826,14 @@ int virRandom(int max) #ifdef HAVE_GETPWUID_R -char *virGetUserDirectory(virConnectPtr conn, - uid_t uid) +enum { + VIR_USER_ENT_DIRECTORY, + VIR_USER_ENT_NAME, +}; + +static char *virGetUserEnt(virConnectPtr conn, + uid_t uid, + int field) { char *strbuf; char *ret; @@ -1855,7 +1861,10 @@ char *virGetUserDirectory(virConnectPtr conn, return NULL; } - ret = strdup(pw->pw_dir); + if (field == VIR_USER_ENT_DIRECTORY) + ret = strdup(pw->pw_dir); + else + ret = strdup(pw->pw_name); VIR_FREE(strbuf); if (!ret) @@ -1864,6 +1873,18 @@ char *virGetUserDirectory(virConnectPtr conn, return ret; } +char *virGetUserDirectory(virConnectPtr conn, + uid_t uid) +{ + return virGetUserEnt(conn, uid, VIR_USER_ENT_DIRECTORY); +} + +char *virGetUserName(virConnectPtr conn, + uid_t uid) +{ + return virGetUserEnt(conn, uid, VIR_USER_ENT_NAME); +} + int virGetUserID(virConnectPtr conn, const char *name, diff --git a/src/util.h b/src/util.h index 1a7286c..e905c38 100644 --- a/src/util.h +++ b/src/util.h @@ -217,6 +217,8 @@ int virKillProcess(pid_t pid, int sig); #ifdef HAVE_GETPWUID_R char *virGetUserDirectory(virConnectPtr conn, uid_t uid); +char *virGetUserName(virConnectPtr conn, + uid_t uid); int virGetUserID(virConnectPtr conn, const char *name, uid_t *uid); -- 1.6.2.5

On Fri, Jul 17, 2009 at 09:04:29AM -0400, Daniel P. Berrange wrote:
Allow the driver level cgroup to be managed explicitly by the hypervisor drivers, in order to detect whether to enable or disable cgroup support for domains. Provides better error reporting of failures. Also allow for creation of cgroups for unprivileged drivers if controller is accessible by the user.
* src/cgroup.c, src/cgroup.h * src/lxc_conf.h, src/lxc_controller.c, src/lxc_driver.c, src/qemu_conf.h, src/qemu_driver.c: Update for change in API * src/util.c, src/util.h, src/libvirt_private.syms: Add a virGetUserName() helper
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/
participants (2)
-
Daniel P. Berrange
-
Daniel Veillard