[libvirt] [PATCH 0/8] Support cgroups in QEMU driver (v2)

This is an updated series of the v1 patches here: http://www.redhat.com/archives/libvir-list/2009-July/msg00435.html I have re-ordered the patches a little to put the generic refactoring all first. I identified some bugs in the existing LXC driver schedular parameter handling which I fixed. Also fixed virsh to use the correct data types when setting schedular parameters instead of hardcoding a bogus type. Finally, I've made QEMU's use of cgroups configurable. Daniel P. Berrange (8): Use enums for cgroup controller types / labels Use virFileReadAll/virFileWriteStr for key cgroup read/write helpers Make cgroups a little more efficient Refactor cgroups to allow a group per driver to be managed directly Place every QEMU guest in a private cgroup Implement schedular tunables API using cgroups Use cgroups for block device whitelisting in QEMU guests Make QEMU cgroups use configurable qemud/libvirtd_qemu.aug | 2 + qemud/test_libvirtd_qemu.aug | 21 +- src/Makefile.am | 3 +- src/cgroup.c | 867 +++++++++++++++++++++++------------------- src/cgroup.h | 42 ++- src/libvirt_private.syms | 2 + src/lxc_conf.h | 2 + src/lxc_controller.c | 19 +- src/lxc_driver.c | 34 ++- src/qemu.conf | 34 ++ src/qemu_conf.c | 61 +++ src/qemu_conf.h | 5 + src/qemu_driver.c | 399 +++++++++++++++++++- src/util.c | 47 +++- src/util.h | 5 + src/virsh.c | 232 +++++++----- 16 files changed, 1237 insertions(+), 538 deletions(-)

Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 46 +++++++++++++++++++++++++++++----------------- 1 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 50517e2..4955298 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -31,15 +31,21 @@ #define CGROUP_MAX_VAL 512 -struct virCgroup { - char *path; +enum { + VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_MEMORY, + VIR_CGROUP_CONTROLLER_DEVICES, + VIR_CGROUP_CONTROLLER_CPUSET, + + VIR_CGROUP_CONTROLLER_LAST }; -const char *supported_controllers[] = { - "memory", - "devices", - "cpuacct", - NULL +VIR_ENUM_DECL(virCgroupController); +VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, + "cpuacct", "memory", "devices", "cpuset"); + +struct virCgroup { + char *path; }; /** @@ -107,8 +113,9 @@ int virCgroupHaveSupport(void) virCgroupPtr root; int i; - for (i = 0; supported_controllers[i] != NULL; i++) { - root = virCgroupGetMount(supported_controllers[i]); + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); + root = virCgroupGetMount(label); if (root == NULL) return -1; virCgroupFree(&root); @@ -419,15 +426,16 @@ static int virCgroupMakeGroup(const char *name) int i; int rc = 0; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); char *path = NULL; virCgroupPtr root; - root = virCgroupGetMount(supported_controllers[i]); + root = virCgroupGetMount(label); if (root == NULL) continue; - rc = virCgroupPathOfGroup(name, supported_controllers[i], &path); + rc = virCgroupPathOfGroup(name, label, &path); if (rc != 0) { virCgroupFree(&root); break; @@ -521,6 +529,7 @@ static int virCgroupOpen(virCgroupPtr parent, virCgroupPtr *newgroup) { int rc = 0; + const char *label = virCgroupControllerTypeToString(0); char *grppath = NULL; bool free_parent = (parent == NULL); @@ -532,7 +541,7 @@ static int virCgroupOpen(virCgroupPtr parent, virCgroupFree(&parent); rc = virCgroupPathOfGroup((*newgroup)->path, - supported_controllers[0], + label, &grppath); if (rc != 0) goto err; @@ -594,9 +603,10 @@ int virCgroupRemove(virCgroupPtr group) int i; char *grppath = NULL; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); if (virCgroupPathOfGroup(group->path, - supported_controllers[i], + label, &grppath) != 0) continue; @@ -626,9 +636,11 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) char *taskpath = NULL; char *pidstr = NULL; - for (i = 0; supported_controllers[i] != NULL; i++) { + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *label = virCgroupControllerTypeToString(i); + rc = virCgroupPathOfGroup(group->path, - supported_controllers[i], + label, &grppath); if (rc != 0) goto done; -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:40PM +0100, Daniel P. Berrange wrote:
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 46 +++++++++++++++++++++++++++++----------------- 1 files changed, 29 insertions(+), 17 deletions(-)
nicer, ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 87 ++++++++++++++++++---------------------------------------- 1 files changed, 27 insertions(+), 60 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 4955298..aecdf47 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -178,7 +178,6 @@ static int virCgroupSetValueStr(virCgroupPtr group, const char *key, const char *value) { - int fd = -1; int rc = 0; char *keypath = NULL; @@ -186,47 +185,16 @@ static int virCgroupSetValueStr(virCgroupPtr group, if (rc != 0) return rc; - fd = open(keypath, O_WRONLY); - if (fd < 0) { - DEBUG("Unable to open %s: %m", keypath); - rc = -ENOENT; - goto out; - } - - DEBUG("Writing '%s' to '%s'", value, keypath); - - rc = safewrite(fd, value, strlen(value)); + VIR_DEBUG("Set value %s", keypath); + rc = virFileWriteStr(keypath, value); if (rc < 0) { DEBUG("Failed to write value '%s': %m", value); rc = -errno; - goto out; - } else if (rc != strlen(value)) { - DEBUG("Short write of value '%s'", value); - rc = -ENOSPC; - goto out; + } else { + rc = 0; } - rc = 0; -out: VIR_FREE(keypath); - close(fd); - - return rc; -} - -static int virCgroupSetValueU64(virCgroupPtr group, - const char *key, - uint64_t value) -{ - char *strval = NULL; - int rc; - - if (virAsprintf(&strval, "%" PRIu64, value) == -1) - return -ENOMEM; - - rc = virCgroupSetValueStr(group, key, strval); - - VIR_FREE(strval); return rc; } @@ -235,12 +203,10 @@ static int virCgroupGetValueStr(virCgroupPtr group, const char *key, char **value) { - int fd = -1; int rc; char *keypath = NULL; - char buf[CGROUP_MAX_VAL]; - memset(buf, 0, sizeof(buf)); + *value = NULL; rc = virCgroupPathOf(group->path, key, &keypath); if (rc != 0) { @@ -248,38 +214,39 @@ static int virCgroupGetValueStr(virCgroupPtr group, return rc; } - fd = open(keypath, O_RDONLY); - if (fd < 0) { - DEBUG("Unable to open %s: %m", keypath); - rc = -ENOENT; - goto out; - } + VIR_DEBUG("Get value %s", keypath); - rc = saferead(fd, buf, sizeof(buf)); + rc = virFileReadAll(keypath, 1024, value); if (rc < 0) { DEBUG("Failed to read %s: %m\n", keypath); rc = -errno; - goto out; - } else if (rc == 0) { - DEBUG("Short read of %s\n", keypath); - rc = -EIO; - goto out; - } - - *value = strdup(buf); - if (*value == NULL) { - rc = -ENOMEM; - goto out; + } else { + rc = 0; } - rc = 0; -out: VIR_FREE(keypath); - close(fd); return rc; } +static int virCgroupSetValueU64(virCgroupPtr group, + const char *key, + uint64_t value) +{ + char *strval = NULL; + int rc; + + if (virAsprintf(&strval, "%" PRIu64, value) == -1) + return -ENOMEM; + + rc = virCgroupSetValueStr(group, key, strval); + + VIR_FREE(strval); + + return rc; +} + + #if 0 /* This is included for completeness, but not yet used */ -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:41PM +0100, Daniel P. Berrange wrote:
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 87 ++++++++++++++++++----------------------------------------
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/cgroup.c: Detect the mount location of every controller at time a virCgroupPtr is created. Detect current process' placement within group to avoid assuming it is in the root. Pass controller ID into SetValueStr/GetValueStr to enable much duplicated code to be eliminated Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 616 ++++++++++++++++++++++++++++++---------------------------- 1 files changed, 317 insertions(+), 299 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index aecdf47..d9c3141 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -32,20 +32,29 @@ #define CGROUP_MAX_VAL 512 enum { + VIR_CGROUP_CONTROLLER_CPU, VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_CPUSET, VIR_CGROUP_CONTROLLER_MEMORY, VIR_CGROUP_CONTROLLER_DEVICES, - VIR_CGROUP_CONTROLLER_CPUSET, VIR_CGROUP_CONTROLLER_LAST }; VIR_ENUM_DECL(virCgroupController); VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, - "cpuacct", "memory", "devices", "cpuset"); + "cpu", "cpuacct", "cpuset", "memory", "devices"); + +struct virCgroupController { + int type; + char *mountPoint; + char *placement; +}; struct virCgroup { char *path; + + struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST]; }; /** @@ -55,133 +64,218 @@ struct virCgroup { */ void virCgroupFree(virCgroupPtr *group) { - if (*group != NULL) { - VIR_FREE((*group)->path); - VIR_FREE(*group); + int i; + + if (*group == NULL) + return; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + VIR_FREE((*group)->controllers[i].mountPoint); + VIR_FREE((*group)->controllers[i].placement); } + + VIR_FREE((*group)->path); + VIR_FREE(*group); } -static virCgroupPtr virCgroupGetMount(const char *controller) + +/* + * Process /proc/mounts figuring out what controllers are + * mounted and where + */ +static int virCgroupDetectMounts(virCgroupPtr group) { + int i; FILE *mounts = NULL; struct mntent entry; char buf[CGROUP_MAX_VAL]; - virCgroupPtr root = NULL; - - if (VIR_ALLOC(root) != 0) - return NULL; mounts = fopen("/proc/mounts", "r"); if (mounts == NULL) { - DEBUG0("Unable to open /proc/mounts: %m"); - goto err; + VIR_ERROR0("Unable to open /proc/mounts"); + return -ENOENT; } while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup") && - (strstr(entry.mnt_opts, controller))) { - root->path = strdup(entry.mnt_dir); - break; - } - } - - DEBUG("Mount for %s is %s\n", controller, root->path); + if (STRNEQ(entry.mnt_type, "cgroup")) + continue; - if (root->path == NULL) { - DEBUG0("Did not find cgroup mount"); - goto err; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = entry.mnt_opts; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) + goto no_memory; + tmp = next; + } + } } fclose(mounts); - return root; -err: - if (mounts != NULL) - fclose(mounts); - virCgroupFree(&root); + return 0; - return NULL; +no_memory: + if (mounts) + fclose(mounts); + return -ENOMEM; } -/** - * virCgroupHaveSupport: - * - * Returns 0 if support is present, negative if not + +/* + * Process /proc/self/cgroup figuring out what cgroup + * sub-path the current process is assigned to. ie not + * neccessarily in the root */ -int virCgroupHaveSupport(void) +static int virCgroupDetectPlacement(virCgroupPtr group) { - virCgroupPtr root; int i; + FILE *mapping = NULL; + char line[1024]; - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - root = virCgroupGetMount(label); - if (root == NULL) - return -1; - virCgroupFree(&root); + mapping = fopen("/proc/self/cgroup", "r"); + if (mapping == NULL) { + VIR_ERROR0("Unable to open /proc/self/cgroup"); + return -ENOENT; } + while (fgets(line, sizeof(line), mapping) != NULL) { + char *controllers = strchr(line, ':'); + char *path = controllers ? strchr(controllers+1, ':') : NULL; + char *nl = path ? strchr(path, '\n') : NULL; + + if (!controllers || !path) + continue; + + if (nl) + *nl = '\0'; + + *path = '\0'; + controllers++; + path++; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = controllers; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path))) + goto no_memory; + + tmp = next; + } + } + } + + fclose(mapping); + return 0; + +no_memory: + return -ENOMEM; + } -static int virCgroupPathOfGroup(const char *group, - const char *controller, - char **path) +static int virCgroupDetect(virCgroupPtr group) { - virCgroupPtr root = NULL; - int rc = 0; + int any = 0; + int rc; + int i; - root = virCgroupGetMount(controller); - if (root == NULL) { - rc = -ENOTDIR; - goto out; + rc = virCgroupDetectMounts(group); + if (rc < 0) { + VIR_ERROR("Failed to detect mounts for %s", group->path); + return rc; } - if (virAsprintf(path, "%s/%s", root->path, group) == -1) - rc = -ENOMEM; -out: - virCgroupFree(&root); + /* Check that at least 1 controller is available */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (group->controllers[i].mountPoint != NULL) + any = 1; + } + if (!any) + return -ENXIO; - return rc; -} -static int virCgroupPathOf(const char *grppath, - const char *key, - char **path) -{ - virCgroupPtr root; - int rc = 0; - char *controller = NULL; + rc = virCgroupDetectPlacement(group); - if (strchr(key, '.') == NULL) - return -EINVAL; + if (rc == 0) { + /* Check that for every mounted controller, we found our placement */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (!group->controllers[i].mountPoint) + continue; - if (sscanf(key, "%a[^.]", &controller) != 1) - return -EINVAL; + if (!group->controllers[i].placement) { + VIR_ERROR("Could not find placement for controller %s at %s", + virCgroupControllerTypeToString(i), + group->controllers[i].placement); + rc = -ENOENT; + break; + } - root = virCgroupGetMount(controller); - if (root == NULL) { - rc = -ENOTDIR; - goto out; + VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i, + virCgroupControllerTypeToString(i), + group->controllers[i].mountPoint, + group->controllers[i].placement); + } + } else { + VIR_ERROR("Failed to detect mapping for %s", group->path); } - if (virAsprintf(path, "%s/%s/%s", root->path, grppath, key) == -1) - rc = -ENOMEM; -out: - virCgroupFree(&root); - VIR_FREE(controller); - return rc; } + +static int virCgroupPathOfController(virCgroupPtr group, + int controller, + const char *key, + char **path) +{ + if (group->controllers[controller].mountPoint == NULL) + return -ENOENT; + + if (group->controllers[controller].placement == NULL) + return -ENOENT; + + if (virAsprintf(path, "%s%s%s/%s", + group->controllers[controller].mountPoint, + group->controllers[controller].placement, + STREQ(group->path, "/") ? "" : group->path, + key ? key : "") == -1) + return -ENOMEM; + + return 0; +} + + static int virCgroupSetValueStr(virCgroupPtr group, + int controller, const char *key, const char *value) { int rc = 0; char *keypath = NULL; - rc = virCgroupPathOf(group->path, key, &keypath); + rc = virCgroupPathOfController(group, controller, key, &keypath); if (rc != 0) return rc; @@ -200,6 +294,7 @@ static int virCgroupSetValueStr(virCgroupPtr group, } static int virCgroupGetValueStr(virCgroupPtr group, + int controller, const char *key, char **value) { @@ -208,7 +303,7 @@ static int virCgroupGetValueStr(virCgroupPtr group, *value = NULL; - rc = virCgroupPathOf(group->path, key, &keypath); + rc = virCgroupPathOfController(group, controller, key, &keypath); if (rc != 0) { DEBUG("No path of %s, %s", group->path, key); return rc; @@ -230,6 +325,7 @@ static int virCgroupGetValueStr(virCgroupPtr group, } static int virCgroupSetValueU64(virCgroupPtr group, + int controller, const char *key, uint64_t value) { @@ -239,7 +335,7 @@ static int virCgroupSetValueU64(virCgroupPtr group, if (virAsprintf(&strval, "%" PRIu64, value) == -1) return -ENOMEM; - rc = virCgroupSetValueStr(group, key, strval); + rc = virCgroupSetValueStr(group, controller, key, strval); VIR_FREE(strval); @@ -251,6 +347,7 @@ static int virCgroupSetValueU64(virCgroupPtr group, /* This is included for completeness, but not yet used */ static int virCgroupSetValueI64(virCgroupPtr group, + int controller, const char *key, int64_t value) { @@ -260,7 +357,7 @@ static int virCgroupSetValueI64(virCgroupPtr group, if (virAsprintf(&strval, "%" PRIi64, value) == -1) return -ENOMEM; - rc = virCgroupSetValueStr(group, key, strval); + rc = virCgroupSetValueStr(group, controller, key, strval); VIR_FREE(strval); @@ -268,13 +365,14 @@ static int virCgroupSetValueI64(virCgroupPtr group, } static int virCgroupGetValueI64(virCgroupPtr group, + int controller, const char *key, int64_t *value) { char *strval = NULL; int rc = 0; - rc = virCgroupGetValueStr(group, key, &strval); + rc = virCgroupGetValueStr(group, controller, key, &strval); if (rc != 0) goto out; @@ -288,13 +386,14 @@ out: #endif static int virCgroupGetValueU64(virCgroupPtr group, + int controller, const char *key, uint64_t *value) { char *strval = NULL; int rc = 0; - rc = virCgroupGetValueStr(group, key, &strval); + rc = virCgroupGetValueStr(group, controller, key, &strval); if (rc != 0) goto out; @@ -306,81 +405,38 @@ out: return rc; } -static int _virCgroupInherit(const char *path, - const char *key) -{ - int rc = 0; - int fd = -1; - char buf[CGROUP_MAX_VAL]; - char *keypath = NULL; - char *pkeypath = NULL; - - memset(buf, 0, sizeof(buf)); - - if (virAsprintf(&keypath, "%s/%s", path, key) == -1) { - rc = -ENOMEM; - goto out; - } - - if (access(keypath, F_OK) != 0) { - DEBUG("Group %s has no key %s\n", path, key); - goto out; - } - - if (virAsprintf(&pkeypath, "%s/../%s", path, key) == -1) { - rc = -ENOMEM; - VIR_FREE(keypath); - goto out; - } - - fd = open(pkeypath, O_RDONLY); - if (fd < 0) { - rc = -errno; - goto out; - } - - if (saferead(fd, buf, sizeof(buf)) <= 0) { - rc = -errno; - goto out; - } - - close(fd); - fd = open(keypath, O_WRONLY); - if (fd < 0) { - rc = -errno; - goto out; - } - - if (safewrite(fd, buf, strlen(buf)) != strlen(buf)) { - rc = -errno; - goto out; - } - -out: - VIR_FREE(keypath); - VIR_FREE(pkeypath); - close(fd); - - return rc; -} - -static int virCgroupInherit(const char *grppath) +static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) { int i; int rc = 0; const char *inherit_values[] = { "cpuset.cpus", "cpuset.mems", - NULL }; - for (i = 0; inherit_values[i] != NULL; i++) { - const char *key = inherit_values[i]; + VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); + for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) { + char *value; - rc = _virCgroupInherit(grppath, key); + rc = virCgroupGetValueStr(parent, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + &value); if (rc != 0) { - DEBUG("inherit of %s failed\n", key); + VIR_ERROR("Failed to get %s %d", inherit_values[i], rc); + break; + } + + VIR_DEBUG("Inherit %s = %s", inherit_values[i], value); + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + value); + + if (rc != 0) { + VIR_ERROR("Failed to set %s %d", inherit_values[i], rc); break; } } @@ -388,35 +444,37 @@ static int virCgroupInherit(const char *grppath) return rc; } -static int virCgroupMakeGroup(const char *name) +static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group) { int i; int rc = 0; + VIR_DEBUG("Make group %s", group->path); for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); char *path = NULL; - virCgroupPtr root; - root = virCgroupGetMount(label); - if (root == NULL) + /* Skip over controllers that aren't mounted */ + if (!group->controllers[i].mountPoint) continue; - rc = virCgroupPathOfGroup(name, label, &path); - if (rc != 0) { - virCgroupFree(&root); - break; - } - - virCgroupFree(&root); + rc = virCgroupPathOfController(group, i, "", &path); + if (rc < 0) + return rc; + VIR_DEBUG("Make controller %s", path); if (access(path, F_OK) != 0) { if (mkdir(path, 0755) < 0) { rc = -errno; VIR_FREE(path); break; } - virCgroupInherit(path); + if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL && + (i == VIR_CGROUP_CONTROLLER_CPUSET || + STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) { + rc = virCgroupCpuSetInherit(parent, group); + if (rc != 0) + break; + } } VIR_FREE(path); @@ -425,134 +483,107 @@ static int virCgroupMakeGroup(const char *name) return rc; } -static int virCgroupRoot(virCgroupPtr *root) + +static int virCgroupRoot(virCgroupPtr *group); +/** + * virCgroupHaveSupport: + * + * Returns 0 if support is present, negative if not + */ +int virCgroupHaveSupport(void) { - int rc = 0; - char *grppath = NULL; + virCgroupPtr root; + int i; + int rc; + int any = 0; - if (VIR_ALLOC((*root)) != 0) { - rc = -ENOMEM; - goto out; - } + rc = virCgroupRoot(&root); + if (rc < 0) + return rc; - (*root)->path = strdup("libvirt"); - if ((*root)->path == NULL) { - rc = -ENOMEM; - goto out; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (root->controllers[i].mountPoint != NULL) + any = 1; } - rc = virCgroupMakeGroup((*root)->path); -out: - if (rc != 0) - virCgroupFree(root); - VIR_FREE(grppath); + virCgroupFree(&root); - return rc; + if (any) + return 0; + + return -ENOENT; } -static int virCgroupNew(virCgroupPtr *parent, - const char *group, - virCgroupPtr *newgroup) +static int virCgroupNew(const char *path, + virCgroupPtr *group) { int rc = 0; char *typpath = NULL; - *newgroup = NULL; - - if (*parent == NULL) { - rc = virCgroupRoot(parent); - if (rc != 0) - goto err; - } + VIR_DEBUG("New group %s", path); + *group = NULL; - if (VIR_ALLOC((*newgroup)) != 0) { + if (VIR_ALLOC((*group)) != 0) { rc = -ENOMEM; goto err; } - rc = virAsprintf(&((*newgroup)->path), - "%s/%s", - (*parent)->path, - group); - if (rc == -1) { + if (!((*group)->path = strdup(path))) { rc = -ENOMEM; goto err; } - rc = 0; + rc = virCgroupDetect(*group); + if (rc < 0) + goto err; return rc; err: - virCgroupFree(newgroup); - *newgroup = NULL; + virCgroupFree(group); + *group = NULL; VIR_FREE(typpath); return rc; } -static int virCgroupOpen(virCgroupPtr parent, - const char *group, - virCgroupPtr *newgroup) +static int virCgroupRoot(virCgroupPtr *group) { - int rc = 0; - const char *label = virCgroupControllerTypeToString(0); - char *grppath = NULL; - bool free_parent = (parent == NULL); - - rc = virCgroupNew(&parent, group, newgroup); - if (rc != 0) - goto err; - - if (free_parent) - virCgroupFree(&parent); - - rc = virCgroupPathOfGroup((*newgroup)->path, - label, - &grppath); - if (rc != 0) - goto err; - - if (access(grppath, F_OK) != 0) { - rc = -ENOENT; - goto err; - } - - return rc; -err: - virCgroupFree(newgroup); - *newgroup = NULL; - - return rc; + return virCgroupNew("/", group); } static int virCgroupCreate(virCgroupPtr parent, - const char *group, - virCgroupPtr *newgroup) + const char *name, + virCgroupPtr *group) { int rc = 0; - bool free_parent = (parent == NULL); + char *path; + + VIR_DEBUG("Creating %s under %s", name, parent->path); - rc = virCgroupNew(&parent, group, newgroup); + rc = virAsprintf(&path, "%s/%s", + STREQ(parent->path, "/") ? + "" : parent->path, + name); + if (rc < 0) { + rc = -ENOMEM; + goto err; + } + + rc = virCgroupNew(path, group); if (rc != 0) { DEBUG0("Unable to allocate new virCgroup structure"); goto err; } - rc = virCgroupMakeGroup((*newgroup)->path); + rc = virCgroupMakeGroup(parent, *group); if (rc != 0) goto err; - if (free_parent) - virCgroupFree(&parent); - return rc; err: - virCgroupFree(newgroup); - *newgroup = NULL; - - if (free_parent) - virCgroupFree(&parent); + virCgroupFree(group); + *group = NULL; return rc; } @@ -571,15 +602,20 @@ int virCgroupRemove(virCgroupPtr group) char *grppath = NULL; for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - if (virCgroupPathOfGroup(group->path, - label, - &grppath) != 0) + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) continue; - if (rmdir(grppath) != 0) - rc = -errno; + if (virCgroupPathOfController(group, + i, + NULL, + &grppath) != 0) + continue; + DEBUG("Removing cgroup %s", grppath); + if (rmdir(grppath) != 0 && errno != ENOENT) { + rc = -errno; + } VIR_FREE(grppath); } @@ -597,48 +633,14 @@ int virCgroupRemove(virCgroupPtr group) int virCgroupAddTask(virCgroupPtr group, pid_t pid) { int rc = 0; - int fd = -1; int i; - char *grppath = NULL; - char *taskpath = NULL; - char *pidstr = NULL; for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *label = virCgroupControllerTypeToString(i); - - rc = virCgroupPathOfGroup(group->path, - label, - &grppath); - if (rc != 0) - goto done; - - if (virAsprintf(&taskpath, "%s/tasks", grppath) == -1) { - rc = -ENOMEM; - goto done; - } - - fd = open(taskpath, O_WRONLY); - if (fd < 0) { - rc = -errno; - goto done; - } - - if (virAsprintf(&pidstr, "%lu", (unsigned long)pid) == -1) { - rc = -ENOMEM; - goto done; - } - - if (safewrite(fd, pidstr, strlen(pidstr)) <= 0) { - rc = -errno; - goto done; - } - - done: - VIR_FREE(grppath); - VIR_FREE(taskpath); - VIR_FREE(pidstr); - close(fd); + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) + continue; + rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid); if (rc != 0) break; } @@ -660,21 +662,27 @@ int virCgroupForDomain(virDomainDefPtr def, virCgroupPtr *group) { int rc; + virCgroupPtr rootgrp = NULL; + virCgroupPtr daemongrp = NULL; virCgroupPtr typegrp = NULL; - rc = virCgroupOpen(NULL, driverName, &typegrp); - if (rc == -ENOENT) { - rc = virCgroupCreate(NULL, driverName, &typegrp); - if (rc != 0) - goto out; - } else if (rc != 0) + rc = virCgroupRoot(&rootgrp); + if (rc != 0) goto out; - rc = virCgroupOpen(typegrp, def->name, group); - if (rc == -ENOENT) - rc = virCgroupCreate(typegrp, def->name, group); + rc = virCgroupCreate(rootgrp, "libvirt", &daemongrp); + if (rc != 0) + goto out; + + rc = virCgroupCreate(daemongrp, driverName, &typegrp); + if (rc != 0) + goto out; + + rc = virCgroupCreate(typegrp, def->name, group); out: virCgroupFree(&typegrp); + virCgroupFree(&daemongrp); + virCgroupFree(&rootgrp); return rc; } @@ -690,6 +698,7 @@ out: int virCgroupSetMemory(virCgroupPtr group, unsigned long kb) { return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, "memory.limit_in_bytes", kb << 10); } @@ -704,8 +713,9 @@ int virCgroupSetMemory(virCgroupPtr group, unsigned long kb) int virCgroupDenyAllDevices(virCgroupPtr group) { return virCgroupSetValueStr(group, - "devices.deny", - "a"); + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + "a"); } /** @@ -732,6 +742,7 @@ int virCgroupAllowDevice(virCgroupPtr group, } rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, "devices.allow", devstr); out: @@ -762,6 +773,7 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, } rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, "devices.allow", devstr); out: @@ -772,15 +784,21 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares) { - return virCgroupSetValueU64(group, "cpu.shares", (uint64_t)shares); + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", (uint64_t)shares); } int virCgroupGetCpuShares(virCgroupPtr group, unsigned long *shares) { - return virCgroupGetValueU64(group, "cpu.shares", (uint64_t *)shares); + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", (uint64_t *)shares); } int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) { - return virCgroupGetValueU64(group, "cpuacct.usage", (uint64_t *)usage); + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPUACCT, + "cpuacct.usage", (uint64_t *)usage); } -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:42PM +0100, Daniel P. Berrange wrote:
* src/cgroup.c: Detect the mount location of every controller at time a virCgroupPtr is created. Detect current process' placement within group to avoid assuming it is in the root. Pass controller ID into SetValueStr/GetValueStr to enable much duplicated code to be eliminated [...] +static int virCgroupDetectMounts(virCgroupPtr group) [...] + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) + goto no_memory;
Humpf that works but it's a bit hard to be sure we are doing the right thing in all cases. [...]
+ if (mapping == NULL) { + VIR_ERROR0("Unable to open /proc/self/cgroup");
relatively low level but shouldn't that be localized ? [...]
+static int virCgroupDetect(virCgroupPtr group) { [...] + rc = virCgroupDetectMounts(group); + if (rc < 0) { + VIR_ERROR("Failed to detect mounts for %s", group->path);
same here [...]
+static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) { [...] + VIR_ERROR("Failed to get %s %d", inherit_values[i], rc); + break; [...] + if (rc != 0) { + VIR_ERROR("Failed to set %s %d", inherit_values[i], rc); break;
here too, any reason we should not localize ? [...]
+static int virCgroupNew(const char *path, + virCgroupPtr *group) { int rc = 0; char *typpath = NULL;
it's internal but maybe check for group == NULL since we write there Clearly I'm not really following what the patches changes, but code looks fine, ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Allow the driver level cgroup to be managed explicitly by the hypervisor drivers, in order to detect whether to enable or disable cgroup support for domains. Provides better error reporting of failures. Also allow for creation of cgroups for unprivileged drivers if controller is accessible by the user. * src/cgroup.c, src/cgroup.h: Add an API to obtain a driver cgroup * src/lxc_conf.h, src/lxc_controller.c, src/lxc_driver.c: Obtain a driver cgroup at startup and use that instead of re-creating everytime. * src/util.c, src/util.h, src/libvirt_private.syms: Add a virGetUserName() helper Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 162 ++++++++++++++++++++++++---------------------- src/cgroup.h | 16 ++-- src/libvirt_private.syms | 1 + src/lxc_conf.h | 2 + src/lxc_controller.c | 19 ++++-- src/lxc_driver.c | 25 +++++--- src/util.c | 27 +++++++- src/util.h | 2 + 8 files changed, 150 insertions(+), 104 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index d9c3141..d3d45d2 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -484,36 +484,6 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group) } -static int virCgroupRoot(virCgroupPtr *group); -/** - * virCgroupHaveSupport: - * - * Returns 0 if support is present, negative if not - */ -int virCgroupHaveSupport(void) -{ - virCgroupPtr root; - int i; - int rc; - int any = 0; - - rc = virCgroupRoot(&root); - if (rc < 0) - return rc; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (root->controllers[i].mountPoint != NULL) - any = 1; - } - - virCgroupFree(&root); - - if (any) - return 0; - - return -ENOENT; -} - static int virCgroupNew(const char *path, virCgroupPtr *group) { @@ -547,47 +517,47 @@ err: return rc; } -static int virCgroupRoot(virCgroupPtr *group) -{ - return virCgroupNew("/", group); -} - -static int virCgroupCreate(virCgroupPtr parent, - const char *name, - virCgroupPtr *group) +static int virCgroupAppRoot(int privileged, + virCgroupPtr *group) { - int rc = 0; - char *path; + virCgroupPtr rootgrp = NULL; + int rc; - VIR_DEBUG("Creating %s under %s", name, parent->path); + rc = virCgroupNew("/", &rootgrp); + if (rc != 0) + return rc; - rc = virAsprintf(&path, "%s/%s", - STREQ(parent->path, "/") ? - "" : parent->path, - name); - if (rc < 0) { - rc = -ENOMEM; - goto err; - } + if (privileged) { + rc = virCgroupNew("/libvirt", group); + } else { + char *rootname; + char *username; + username = virGetUserName(NULL, getuid()); + if (!username) { + rc = -ENOMEM; + goto cleanup; + } + rc = virAsprintf(&rootname, "/libvirt-%s", username); + VIR_FREE(username); + if (rc < 0) { + rc = -ENOMEM; + goto cleanup; + } - rc = virCgroupNew(path, group); - if (rc != 0) { - DEBUG0("Unable to allocate new virCgroup structure"); - goto err; + rc = virCgroupNew(rootname, group); + VIR_FREE(rootname); } - - rc = virCgroupMakeGroup(parent, *group); if (rc != 0) - goto err; + goto cleanup; - return rc; -err: - virCgroupFree(group); - *group = NULL; + rc = virCgroupMakeGroup(rootgrp, *group); +cleanup: + virCgroupFree(&rootgrp); return rc; } + /** * virCgroupRemove: * @@ -648,45 +618,83 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) return rc; } + /** - * virCgroupForDomain: + * virCgroupForDriver: * - * @def: Domain definition to create cgroup for - * @driverName: Classification of this domain type (e.g., xen, qemu, lxc) + * @name: name of this driver (e.g., xen, qemu, lxc) * @group: Pointer to returned virCgroupPtr * * Returns 0 on success */ -int virCgroupForDomain(virDomainDefPtr def, - const char *driverName, - virCgroupPtr *group) +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + int privileged, + int create) { int rc; + char *path = NULL; virCgroupPtr rootgrp = NULL; - virCgroupPtr daemongrp = NULL; - virCgroupPtr typegrp = NULL; - rc = virCgroupRoot(&rootgrp); + rc = virCgroupAppRoot(privileged, &rootgrp); if (rc != 0) goto out; - rc = virCgroupCreate(rootgrp, "libvirt", &daemongrp); - if (rc != 0) + if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) { + rc = -ENOMEM; goto out; + } - rc = virCgroupCreate(daemongrp, driverName, &typegrp); - if (rc != 0) - goto out; + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0 && + create) { + rc = virCgroupMakeGroup(rootgrp, *group); + if (rc != 0) + virCgroupFree(group); + } - rc = virCgroupCreate(typegrp, def->name, group); out: - virCgroupFree(&typegrp); - virCgroupFree(&daemongrp); virCgroupFree(&rootgrp); return rc; } + +/** + * virCgroupForDomain: + * + * @driver: group for driver owning the domain + * @name: name of the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + int create) +{ + int rc; + char *path; + + if (virAsprintf(&path, "%s/%s", driver->path, name) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0 && + create) { + rc = virCgroupMakeGroup(driver, *group); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} + /** * virCgroupSetMemory: * diff --git a/src/cgroup.h b/src/cgroup.h index 11c44f9..dbb444b 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -12,18 +12,18 @@ #ifndef CGROUP_H #define CGROUP_H -#include <stdint.h> - struct virCgroup; typedef struct virCgroup *virCgroupPtr; -#include "domain_conf.h" - -int virCgroupHaveSupport(void); +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + int privileged, + int create); -int virCgroupForDomain(virDomainDefPtr def, - const char *driverName, - virCgroupPtr *group); +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + int create); int virCgroupAddTask(virCgroupPtr group, pid_t pid); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7a62b67..0e9c9f7 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -382,6 +382,7 @@ virRun; virSkipSpaces; virKillProcess; virGetUserDirectory; +virGetUserName; virGetUserID; virGetGroupID; diff --git a/src/lxc_conf.h b/src/lxc_conf.h index 30a51e1..24aadf7 100644 --- a/src/lxc_conf.h +++ b/src/lxc_conf.h @@ -30,6 +30,7 @@ #include "domain_conf.h" #include "capabilities.h" #include "threads.h" +#include "cgroup.h" #define LXC_CONFIG_DIR SYSCONF_DIR "/libvirt/lxc" #define LXC_STATE_DIR LOCAL_STATE_DIR "/run/libvirt/lxc" @@ -41,6 +42,7 @@ struct __lxc_driver { virCapsPtr caps; + virCgroupPtr cgroup; virDomainObjList domains; char *configDir; char *autostartDir; diff --git a/src/lxc_controller.c b/src/lxc_controller.c index 08fbbe4..9ad48a7 100644 --- a/src/lxc_controller.c +++ b/src/lxc_controller.c @@ -48,7 +48,6 @@ #include "veth.h" #include "memory.h" #include "util.h" -#include "cgroup.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -69,6 +68,7 @@ struct cgroup_device_policy { */ static int lxcSetContainerResources(virDomainDefPtr def) { + virCgroupPtr driver; virCgroupPtr cgroup; int rc = -1; int i; @@ -82,14 +82,18 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; - if (virCgroupHaveSupport() != 0) - return 0; /* Not supported, so claim success */ + rc = virCgroupForDriver("lxc", &driver, 1, 0); + if (rc != 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, "%s", + _("Unable to get cgroup for driver")); + return rc; + } - rc = virCgroupForDomain(def, "lxc", &cgroup); + rc = virCgroupForDomain(driver, def->name, &cgroup, 1); if (rc != 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("Unable to create cgroup for %s\n"), def->name); - return rc; + _("Unable to create cgroup for domain %s"), def->name); + goto cleanup; } rc = virCgroupSetMemory(cgroup, def->maxmem); @@ -119,9 +123,10 @@ out: if (rc != 0) { virReportSystemError(NULL, -rc, "%s", _("Failed to set lxc resources")); - virCgroupRemove(cgroup); } +cleanup: + virCgroupFree(&driver); virCgroupFree(&cgroup); return rc; diff --git a/src/lxc_driver.c b/src/lxc_driver.c index 3503573..ff35845 100644 --- a/src/lxc_driver.c +++ b/src/lxc_driver.c @@ -46,7 +46,6 @@ #include "bridge.h" #include "veth.h" #include "event.h" -#include "cgroup.h" #include "nodeinfo.h" @@ -393,10 +392,10 @@ static int lxcDomainGetInfo(virDomainPtr dom, info->state = vm->state; - if (!virDomainIsActive(vm) || virCgroupHaveSupport() != 0) { + if (!virDomainIsActive(vm) || driver->cgroup == NULL) { info->cpuTime = 0; } else { - if (virCgroupForDomain(vm->def, "lxc", &cgroup) != 0) { + if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) { lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR, _("Unable to get cgroup for %s\n"), vm->def->name); goto cleanup; @@ -527,7 +526,8 @@ static int lxcVMCleanup(virConnectPtr conn, vethDelete(vm->def->nets[i]->ifname); } - if (virCgroupForDomain(vm->def, "lxc", &cgroup) == 0) { + if (driver->cgroup && + virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) == 0) { virCgroupRemove(cgroup); virCgroupFree(&cgroup); } @@ -1145,6 +1145,7 @@ static int lxcStartup(int privileged) { unsigned int i; char *ld; + int rc; /* Valgrind gets very annoyed when we clone containers, so * disable LXC when under valgrind @@ -1174,6 +1175,13 @@ static int lxcStartup(int privileged) lxc_driver->have_netns = lxcCheckNetNsSupport(); + rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1); + if (rc < 0) { + char buf[1024]; + VIR_WARN("Unable to create cgroup for driver: %s", + virStrerror(-rc, buf, sizeof(buf))); + } + /* Call function to load lxc driver configuration information */ if (lxcLoadDriverConfig(lxc_driver) < 0) goto cleanup; @@ -1193,7 +1201,6 @@ static int lxcStartup(int privileged) virDomainObjPtr vm = lxc_driver->domains.objs[i]; char *config = NULL; virDomainDefPtr tmp; - int rc; virDomainObjLock(vm); if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) { @@ -1330,7 +1337,7 @@ static int lxcSetSchedulerParameters(virDomainPtr domain, virDomainObjPtr vm = NULL; int ret = -1; - if (virCgroupHaveSupport() != 0) + if (driver->cgroup == NULL) return -1; lxcDriverLock(driver); @@ -1343,7 +1350,7 @@ static int lxcSetSchedulerParameters(virDomainPtr domain, goto cleanup; } - if (virCgroupForDomain(vm->def, "lxc", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; for (i = 0; i < nparams; i++) { @@ -1377,7 +1384,7 @@ static int lxcGetSchedulerParameters(virDomainPtr domain, unsigned long val; int ret = -1; - if (virCgroupHaveSupport() != 0) + if (driver->cgroup == NULL) return -1; if ((*nparams) != 1) { @@ -1396,7 +1403,7 @@ static int lxcGetSchedulerParameters(virDomainPtr domain, goto cleanup; } - if (virCgroupForDomain(vm->def, "lxc", &group) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) goto cleanup; if (virCgroupGetCpuShares(group, &val) != 0) diff --git a/src/util.c b/src/util.c index 2420f8b..5261714 100644 --- a/src/util.c +++ b/src/util.c @@ -1826,8 +1826,14 @@ int virRandom(int max) #ifdef HAVE_GETPWUID_R -char *virGetUserDirectory(virConnectPtr conn, - uid_t uid) +enum { + VIR_USER_ENT_DIRECTORY, + VIR_USER_ENT_NAME, +}; + +static char *virGetUserEnt(virConnectPtr conn, + uid_t uid, + int field) { char *strbuf; char *ret; @@ -1855,7 +1861,10 @@ char *virGetUserDirectory(virConnectPtr conn, return NULL; } - ret = strdup(pw->pw_dir); + if (field == VIR_USER_ENT_DIRECTORY) + ret = strdup(pw->pw_dir); + else + ret = strdup(pw->pw_name); VIR_FREE(strbuf); if (!ret) @@ -1864,6 +1873,18 @@ char *virGetUserDirectory(virConnectPtr conn, return ret; } +char *virGetUserDirectory(virConnectPtr conn, + uid_t uid) +{ + return virGetUserEnt(conn, uid, VIR_USER_ENT_DIRECTORY); +} + +char *virGetUserName(virConnectPtr conn, + uid_t uid) +{ + return virGetUserEnt(conn, uid, VIR_USER_ENT_NAME); +} + int virGetUserID(virConnectPtr conn, const char *name, diff --git a/src/util.h b/src/util.h index 1a7286c..e905c38 100644 --- a/src/util.h +++ b/src/util.h @@ -217,6 +217,8 @@ int virKillProcess(pid_t pid, int sig); #ifdef HAVE_GETPWUID_R char *virGetUserDirectory(virConnectPtr conn, uid_t uid); +char *virGetUserName(virConnectPtr conn, + uid_t uid); int virGetUserID(virConnectPtr conn, const char *name, uid_t *uid); -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:43PM +0100, Daniel P. Berrange wrote:
Allow the driver level cgroup to be managed explicitly by the hypervisor drivers, in order to detect whether to enable or disable cgroup support for domains. Provides better error reporting of failures. Also allow for creation of cgroups for unprivileged drivers if controller is accessible by the user.
* src/cgroup.c, src/cgroup.h: Add an API to obtain a driver cgroup * src/lxc_conf.h, src/lxc_controller.c, src/lxc_driver.c: Obtain a driver cgroup at startup and use that instead of re-creating everytime. * src/util.c, src/util.h, src/libvirt_private.syms: Add a virGetUserName() helper
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/qemu_driver.c: Place guest in cgroup upon startup. Remove cgroup upon shutdown Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/qemu_conf.h | 2 + src/qemu_driver.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 128 insertions(+), 7 deletions(-) diff --git a/src/qemu_conf.h b/src/qemu_conf.h index 50d7c0a..a5c8b3f 100644 --- a/src/qemu_conf.h +++ b/src/qemu_conf.h @@ -34,6 +34,7 @@ #include "domain_event.h" #include "threads.h" #include "security.h" +#include "cgroup.h" #define qemudDebug(fmt, ...) do {} while(0) @@ -77,6 +78,7 @@ struct qemud_driver { unsigned int qemuVersion; int nextvmid; + virCgroupPtr cgroup; virDomainObjList domains; brControl *brctl; diff --git a/src/qemu_driver.c b/src/qemu_driver.c index c4683ae..39ad47e 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -66,6 +66,7 @@ #include "node_device_conf.h" #include "pci.h" #include "security.h" +#include "cgroup.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -418,6 +419,7 @@ static int qemudStartup(int privileged) { char *base = NULL; char driverConf[PATH_MAX]; + int rc; if (VIR_ALLOC(qemu_driver) < 0) return -1; @@ -498,6 +500,13 @@ qemudStartup(int privileged) { VIR_FREE(base); + rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1); + if (rc < 0) { + char buf[1024]; + VIR_WARN("Unable to create cgroup for driver: %s", + virStrerror(-rc, buf, sizeof(buf))); + } + if ((qemu_driver->caps = qemudCapsInit()) == NULL) goto out_of_memory; @@ -649,6 +658,8 @@ qemudShutdown(void) { if (qemu_driver->brctl) brShutdown(qemu_driver->brctl); + virCgroupFree(&qemu_driver->cgroup); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); VIR_FREE(qemu_driver); @@ -1377,6 +1388,93 @@ error: return -1; } +static int qemuSetupCgroup(virConnectPtr conn, + struct qemud_driver *driver, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup = NULL; + int rc; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 1); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } + + virCgroupFree(&cgroup); + return 0; + +cleanup: + if (cgroup) { + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + } + return -1; +} + + +static int qemuRemoveCgroup(virConnectPtr conn, + struct qemud_driver *driver, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup; + int rc; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); + if (rc != 0) { + qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Unable to find cgroup for %s\n"), + vm->def->name); + return rc; + } + + rc = virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + return rc; +} + +static int qemuAddToCgroup(struct qemud_driver *driver, + virDomainDefPtr def) +{ + virCgroupPtr cgroup = NULL; + int ret = -1; + int rc; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, def->name, &cgroup, 0); + if (rc != 0) { + virReportSystemError(NULL, -rc, + _("unable to find cgroup for domain %s"), + def->name); + goto cleanup; + } + + rc = virCgroupAddTask(cgroup, getpid()); + if (rc != 0) { + virReportSystemError(NULL, -rc, + _("unable to add domain %s task %d to cgroup"), + def->name, getpid()); + goto cleanup; + } + + ret = 0; + +cleanup: + virCgroupFree(&cgroup); + return ret; +} + + static int qemudDomainSetSecurityLabel(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { if (vm->def->seclabel.label != NULL) @@ -1588,14 +1686,17 @@ static int qemuDomainSetAllDeviceOwnership(virConnectPtr conn, static virDomainPtr qemudDomainLookupByName(virConnectPtr conn, const char *name); -struct gemudHookData { - virConnectPtr conn; - virDomainObjPtr vm; - struct qemud_driver *driver; +struct qemudHookData { + virConnectPtr conn; + virDomainObjPtr vm; + struct qemud_driver *driver; }; static int qemudSecurityHook(void *data) { - struct gemudHookData *h = (struct gemudHookData *) data; + struct qemudHookData *h = data; + + if (qemuAddToCgroup(h->driver, h->vm->def) < 0) + return -1; if (qemudDomainSetSecurityLabel(h->conn, h->driver, h->vm) < 0) { qemudReportError(h->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -1668,7 +1769,7 @@ static int qemudStartVMDaemon(virConnectPtr conn, char *pidfile = NULL; int logfile; - struct gemudHookData hookData; + struct qemudHookData hookData; hookData.conn = conn; hookData.vm = vm; hookData.driver = driver; @@ -1689,6 +1790,9 @@ static int qemudStartVMDaemon(virConnectPtr conn, driver->securityDriver->domainGenSecurityLabel(conn, vm) < 0) return -1; + /* Ensure no historical cgroup for this VM is lieing around bogus settings */ + qemuRemoveCgroup(conn, driver, vm); + if ((vm->def->ngraphics == 1) && vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && vm->def->graphics[0]->data.vnc.autoport) { @@ -1729,6 +1833,9 @@ static int qemudStartVMDaemon(virConnectPtr conn, &qemuCmdFlags) < 0) goto cleanup; + if (qemuSetupCgroup(conn, driver, vm) < 0) + goto cleanup; + if (qemuPrepareHostDevices(conn, vm->def) < 0) goto cleanup; @@ -1855,6 +1962,7 @@ cleanup: VIR_FREE(vm->def->seclabel.label); VIR_FREE(vm->def->seclabel.imagelabel); } + qemuRemoveCgroup(conn, driver, vm); if ((vm->def->ngraphics == 1) && vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && vm->def->graphics[0]->data.vnc.autoport) @@ -1866,10 +1974,11 @@ cleanup: } -static void qemudShutdownVMDaemon(virConnectPtr conn ATTRIBUTE_UNUSED, +static void qemudShutdownVMDaemon(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { int ret; + int retries = 0; if (!virDomainIsActive(vm)) return; @@ -1909,6 +2018,16 @@ static void qemudShutdownVMDaemon(virConnectPtr conn ATTRIBUTE_UNUSED, VIR_WARN("Failed to restore all device ownership for %s", vm->def->name); +retry: + if ((ret = qemuRemoveCgroup(conn, driver, vm)) < 0) { + if (ret == -EBUSY && (retries++ < 5)) { + usleep(200*1000); + goto retry; + } + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + } + if (qemudRemoveDomainStatus(conn, driver, vm) < 0) { VIR_WARN(_("Failed to remove domain status for %s"), vm->def->name); -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:44PM +0100, Daniel P. Berrange wrote:
* src/qemu_driver.c: Place guest in cgroup upon startup. Remove cgroup upon shutdown
Okay
@@ -1668,7 +1769,7 @@ static int qemudStartVMDaemon(virConnectPtr conn, char *pidfile = NULL; int logfile;
- struct gemudHookData hookData; + struct qemudHookData hookData;
Again it too me some time to spot the g->q ... ! Funny how reviewing the same patch leds to the same reactions :-) ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters * src/lxc_driver.c: Fix to use unsigned long long consistently for schedular parameters * src/cgroup.h, src/cgroup.c: Fix cpu_shares to take unsigned long long * src/util.c, src/util.h, src/libvirt_private.syms: Add a virStrToDouble helper * src/virsh.c: Fix handling of --set arg to schedinfo command to honour the designated data type of each schedular tunable as declared by the driver Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 10 +- src/cgroup.h | 4 +- src/libvirt_private.syms | 1 + src/lxc_driver.c | 9 ++- src/qemu_driver.c | 151 +++++++++++++++++++++++++++++- src/util.c | 20 ++++ src/util.h | 3 + src/virsh.c | 232 +++++++++++++++++++++++++--------------------- 8 files changed, 314 insertions(+), 116 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index d3d45d2..1a80a20 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -790,23 +790,23 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, return rc; } -int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares) +int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares) { return virCgroupSetValueU64(group, VIR_CGROUP_CONTROLLER_CPU, - "cpu.shares", (uint64_t)shares); + "cpu.shares", shares); } -int virCgroupGetCpuShares(virCgroupPtr group, unsigned long *shares) +int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares) { return virCgroupGetValueU64(group, VIR_CGROUP_CONTROLLER_CPU, - "cpu.shares", (uint64_t *)shares); + "cpu.shares", shares); } int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) { return virCgroupGetValueU64(group, VIR_CGROUP_CONTROLLER_CPUACCT, - "cpuacct.usage", (uint64_t *)usage); + "cpuacct.usage", usage); } diff --git a/src/cgroup.h b/src/cgroup.h index dbb444b..f452e2d 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -39,8 +39,8 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major); -int virCgroupSetCpuShares(virCgroupPtr group, unsigned long shares); -int virCgroupGetCpuShares(virCgroupPtr group, unsigned long *shares); +int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares); +int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares); int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 0e9c9f7..22fb083 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -348,6 +348,7 @@ virStrToLong_i; virStrToLong_ll; virStrToLong_ull; virStrToLong_ui; +virStrToDouble; virFileLinkPointsTo; virFileResolveLink; saferead; diff --git a/src/lxc_driver.c b/src/lxc_driver.c index ff35845..843b066 100644 --- a/src/lxc_driver.c +++ b/src/lxc_driver.c @@ -1355,9 +1355,14 @@ static int lxcSetSchedulerParameters(virDomainPtr domain, for (i = 0; i < nparams; i++) { virSchedParameterPtr param = ¶ms[i]; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) { + lxcError(NULL, domain, VIR_ERR_INVALID_ARG, + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + } if (STREQ(param->field, "cpu_shares")) { - if (virCgroupSetCpuShares(group, params[i].value.ui) != 0) + if (virCgroupSetCpuShares(group, params[i].value.ul) != 0) goto cleanup; } else { lxcError(NULL, domain, VIR_ERR_INVALID_ARG, @@ -1381,7 +1386,7 @@ static int lxcGetSchedulerParameters(virDomainPtr domain, lxc_driver_t *driver = domain->conn->privateData; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; - unsigned long val; + unsigned long long val; int ret = -1; if (driver->cgroup == NULL) diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 39ad47e..6c8370c 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -5275,6 +5275,151 @@ cleanup: return ret; } + +static char *qemuGetSchedulerType(virDomainPtr dom, + int *nparams) +{ + struct qemud_driver *driver = dom->conn->privateData; + char *ret; + + if (driver->cgroup == NULL) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return NULL; + } + + if (nparams) + *nparams = 1; + + ret = strdup("posix"); + if (!ret) + virReportOOMError(dom->conn); + return ret; +} + +static int qemuSetSchedulerParameters(virDomainPtr dom, + virSchedParameterPtr params, + int nparams) +{ + struct qemud_driver *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + int ret = -1; + + if (driver->cgroup == NULL) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return -1; + } + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + qemuDriverUnlock(driver); + + if (vm == NULL) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + + for (i = 0; i < nparams; i++) { + virSchedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, "cpu_shares")) { + int rc; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + } + + rc = virCgroupSetCpuShares(group, params[i].value.ul); + if (rc != 0) { + virReportSystemError(dom->conn, -rc, "%s", + _("unable to set cpu shares tunable")); + goto cleanup; + } + } else { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INVALID_ARG, + _("Invalid parameter `%s'"), param->field); + goto cleanup; + } + } + ret = 0; + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + return ret; +} + +static int qemuGetSchedulerParameters(virDomainPtr dom, + virSchedParameterPtr params, + int *nparams) +{ + struct qemud_driver *driver = dom->conn->privateData; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + unsigned long long val; + int ret = -1; + int rc; + + if (driver->cgroup == NULL) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + __FUNCTION__); + return -1; + } + + if ((*nparams) != 1) { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INVALID_ARG, + "%s", _("Invalid parameter count")); + return -1; + } + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + qemuDriverUnlock(driver); + + if (vm == NULL) { + qemudReportError(dom->conn, domain, NULL, VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + + rc = virCgroupGetCpuShares(group, &val); + if (rc != 0) { + virReportSystemError(dom->conn, -rc, "%s", + _("unable to get cpu shares tunable")); + goto cleanup; + } + params[0].value.ul = val; + strncpy(params[0].field, "cpu_shares", sizeof(params[0].field)); + params[0].type = VIR_DOMAIN_SCHED_FIELD_ULLONG; + + ret = 0; + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + return ret; +} + + /* This uses the 'info blockstats' monitor command which was * integrated into both qemu & kvm in late 2007. If the command is * not supported we detect this and return the appropriate error. @@ -6248,9 +6393,9 @@ static virDriver qemuDriver = { qemudDomainDetachDevice, /* domainDetachDevice */ qemudDomainGetAutostart, /* domainGetAutostart */ qemudDomainSetAutostart, /* domainSetAutostart */ - NULL, /* domainGetSchedulerType */ - NULL, /* domainGetSchedulerParameters */ - NULL, /* domainSetSchedulerParameters */ + qemuGetSchedulerType, /* domainGetSchedulerType */ + qemuGetSchedulerParameters, /* domainGetSchedulerParameters */ + qemuSetSchedulerParameters, /* domainSetSchedulerParameters */ NULL, /* domainMigratePrepare (v1) */ qemudDomainMigratePerform, /* domainMigratePerform */ NULL, /* domainMigrateFinish */ diff --git a/src/util.c b/src/util.c index 5261714..ee64b28 100644 --- a/src/util.c +++ b/src/util.c @@ -1479,6 +1479,26 @@ virStrToLong_ull(char const *s, char **end_ptr, int base, unsigned long long *re return 0; } +int +virStrToDouble(char const *s, + char **end_ptr, + double *result) +{ + double val; + char *p; + int err; + + errno = 0; + val = strtod(s, &p); + err = (errno || (!end_ptr && *p) || p == s); + if (end_ptr) + *end_ptr = p; + if (err) + return -1; + *result = val; + return 0; +} + /** * virSkipSpaces: * @str: pointer to the char pointer used diff --git a/src/util.h b/src/util.h index e905c38..e761e83 100644 --- a/src/util.h +++ b/src/util.h @@ -154,6 +154,9 @@ int virStrToLong_ull(char const *s, char **end_ptr, int base, unsigned long long *result); +int virStrToDouble(char const *s, + char **end_ptr, + double *result); int virMacAddrCompare (const char *mac1, const char *mac2); diff --git a/src/virsh.c b/src/virsh.c index fff73a1..db3d8de 100644 --- a/src/virsh.c +++ b/src/virsh.c @@ -1181,111 +1181,118 @@ static const vshCmdOptDef opts_schedinfo[] = { }; static int -cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) +cmdSchedInfoUpdate(vshControl *ctl, const vshCmd *cmd, + virSchedParameterPtr param) { - char *schedulertype; - char *set; - char *param_name = NULL; - long long int param_value = 0; - virDomainPtr dom; - virSchedParameterPtr params = NULL; - int i, ret; - int nparams = 0; - int nr_inputparams = 0; - int inputparams = 0; - int weightfound = 0; - int setfound = 0; - int weight = 0; - int capfound = 0; - int cap = 0; - char str_weight[] = "weight"; - char str_cap[] = "cap"; - int ret_val = FALSE; - - if (!vshConnectionUsability(ctl, ctl->conn, TRUE)) - return FALSE; - - if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) - return FALSE; - - /* Deprecated Xen-only options */ - if(vshCommandOptBool(cmd, "weight")) { - weight = vshCommandOptInt(cmd, "weight", &weightfound); - if (!weightfound) { + int found; + char *data; + + /* Legacy 'weight' parameter */ + if (STREQ(param->field, "weight") && + param->type == VIR_DOMAIN_SCHED_FIELD_UINT && + vshCommandOptBool(cmd, "weight")) { + int val; + val = vshCommandOptInt(cmd, "weight", &found); + if (!found) { vshError(ctl, FALSE, "%s", _("Invalid value of weight")); - goto cleanup; + return -1; } else { - nr_inputparams++; + param->value.ui = val; } + return 1; } - if(vshCommandOptBool(cmd, "cap")) { - cap = vshCommandOptInt(cmd, "cap", &capfound); - if (!capfound) { + /* Legacy 'cap' parameter */ + if (STREQ(param->field, "cap") && + param->type == VIR_DOMAIN_SCHED_FIELD_UINT && + vshCommandOptBool(cmd, "cap")) { + int val; + val = vshCommandOptInt(cmd, "cap", &found); + if (!found) { vshError(ctl, FALSE, "%s", _("Invalid value of cap")); - goto cleanup; + return -1; } else { - nr_inputparams++; + param->value.ui = val; } + return 1; } - if(vshCommandOptBool(cmd, "set")) { - set = vshCommandOptString(cmd, "set", &setfound); - if (!setfound) { - vshError(ctl, FALSE, "%s", _("Error getting param")); - goto cleanup; + if ((data = vshCommandOptString(cmd, "set", NULL))) { + char *val = strchr(data, '='); + int match = 0; + if (!val) { + vshError(ctl, FALSE, "%s", _("Invalid syntax for --set, expecting name=value")); + return -1; } + *val = '\0'; + match = STREQ(data, param->field); + *val = '='; + val++; - param_name = vshMalloc(ctl, strlen(set) + 1); - if (param_name == NULL) - goto cleanup; + if (!match) + return 0; - if (sscanf(set, "%[^=]=%lli", param_name, ¶m_value) != 2) { - vshError(ctl, FALSE, "%s", _("Invalid value of param")); - goto cleanup; + switch (param->type) { + case VIR_DOMAIN_SCHED_FIELD_INT: + if (virStrToLong_i(val, NULL, 10, ¶m->value.i) < 0) { + vshError(ctl, FALSE, "%s", + _("Invalid value for parameter, expecting an int")); + return -1; + } + break; + case VIR_DOMAIN_SCHED_FIELD_UINT: + if (virStrToLong_ui(val, NULL, 10, ¶m->value.ui) < 0) { + vshError(ctl, FALSE, "%s", + _("Invalid value for parameter, expecting an unsigned int")); + return -1; + } + break; + case VIR_DOMAIN_SCHED_FIELD_LLONG: + if (virStrToLong_ll(val, NULL, 10, ¶m->value.l) < 0) { + vshError(ctl, FALSE, "%s", + _("Invalid value for parameter, expecting an long long")); + return -1; + } + break; + case VIR_DOMAIN_SCHED_FIELD_ULLONG: + if (virStrToLong_ull(val, NULL, 10, ¶m->value.ul) < 0) { + vshError(ctl, FALSE, "%s", + _("Invalid value for parameter, expecting an unsigned long long")); + return -1; + } + break; + case VIR_DOMAIN_SCHED_FIELD_DOUBLE: + if (virStrToDouble(val, NULL, ¶m->value.d) < 0) { + vshError(ctl, FALSE, "%s", _("Invalid value for parameter, expecting a double")); + return -1; + } + break; + case VIR_DOMAIN_SCHED_FIELD_BOOLEAN: + param->value.b = STREQ(val, "0") ? 0 : 1; } - - nr_inputparams++; - } - - params = vshMalloc(ctl, sizeof (virSchedParameter) * nr_inputparams); - if (params == NULL) { - goto cleanup; + return 1; } - if (weightfound) { - strncpy(params[inputparams].field,str_weight,sizeof(str_weight)); - params[inputparams].type = VIR_DOMAIN_SCHED_FIELD_UINT; - params[inputparams].value.ui = weight; - inputparams++; - } + return 0; +} - if (capfound) { - strncpy(params[inputparams].field,str_cap,sizeof(str_cap)); - params[inputparams].type = VIR_DOMAIN_SCHED_FIELD_UINT; - params[inputparams].value.ui = cap; - inputparams++; - } - /* End Deprecated Xen-only options */ - if (setfound) { - strncpy(params[inputparams].field,param_name,sizeof(params[0].field)); - params[inputparams].type = VIR_DOMAIN_SCHED_FIELD_LLONG; - params[inputparams].value.l = param_value; - inputparams++; - } +static int +cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) +{ + char *schedulertype; + virDomainPtr dom; + virSchedParameterPtr params = NULL; + int nparams = 0; + int update = 0; + int i, ret; + int ret_val = FALSE; - assert (inputparams == nr_inputparams); + if (!vshConnectionUsability(ctl, ctl->conn, TRUE)) + return FALSE; - /* Set SchedulerParameters */ - if (inputparams > 0) { - ret = virDomainSetSchedulerParameters(dom, params, inputparams); - if (ret == -1) { - goto cleanup; - } - } - free(params); - params = NULL; + if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) + return FALSE; /* Print SchedulerType */ schedulertype = virDomainGetSchedulerType(dom, &nparams); @@ -1298,21 +1305,38 @@ cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) goto cleanup; } - /* Get SchedulerParameters */ - params = vshMalloc(ctl, sizeof(virSchedParameter)* nparams); - if (params == NULL) { - goto cleanup; - } - for (i = 0; i < nparams; i++){ - params[i].type = 0; - memset (params[i].field, 0, sizeof params[i].field); - } - ret = virDomainGetSchedulerParameters(dom, params, &nparams); - if (ret == -1) { - goto cleanup; - } - ret_val = TRUE; - if(nparams){ + if (nparams) { + params = vshMalloc(ctl, sizeof(virSchedParameter)* nparams); + if (params == NULL) + goto cleanup; + + memset(params, 0, sizeof(virSchedParameter)* nparams); + ret = virDomainGetSchedulerParameters(dom, params, &nparams); + if (ret == -1) + goto cleanup; + + /* See if any params are being set */ + for (i = 0; i < nparams; i++){ + ret = cmdSchedInfoUpdate(ctl, cmd, &(params[i])); + if (ret == -1) + goto cleanup; + + if (ret == 1) + update = 1; + } + + /* Update parameters & refresh data */ + if (update) { + ret = virDomainSetSchedulerParameters(dom, params, nparams); + if (ret == -1) + goto cleanup; + + ret = virDomainGetSchedulerParameters(dom, params, &nparams); + if (ret == -1) + goto cleanup; + } + + ret_val = TRUE; for (i = 0; i < nparams; i++){ switch (params[i].type) { case VIR_DOMAIN_SCHED_FIELD_INT: @@ -1322,10 +1346,10 @@ cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) printf("%-15s: %u\n", params[i].field, params[i].value.ui); break; case VIR_DOMAIN_SCHED_FIELD_LLONG: - printf("%-15s: %Ld\n", params[i].field, params[i].value.l); + printf("%-15s: %lld\n", params[i].field, params[i].value.l); break; case VIR_DOMAIN_SCHED_FIELD_ULLONG: - printf("%-15s: %Lu\n", params[i].field, params[i].value.ul); + printf("%-15s: %llu\n", params[i].field, params[i].value.ul); break; case VIR_DOMAIN_SCHED_FIELD_DOUBLE: printf("%-15s: %f\n", params[i].field, params[i].value.d); @@ -1338,9 +1362,9 @@ cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) } } } + cleanup: free(params); - free(param_name); virDomainFree(dom); return ret_val; } -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:45PM +0100, Daniel P. Berrange wrote:
* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters * src/lxc_driver.c: Fix to use unsigned long long consistently for schedular parameters * src/cgroup.h, src/cgroup.c: Fix cpu_shares to take unsigned long long * src/util.c, src/util.h, src/libvirt_private.syms: Add a virStrToDouble helper * src/virsh.c: Fix handling of --set arg to schedinfo command to honour the designated data type of each schedular tunable as declared by the driver
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 10 +- src/cgroup.h | 4 +- src/libvirt_private.syms | 1 + src/lxc_driver.c | 9 ++- src/qemu_driver.c | 151 +++++++++++++++++++++++++++++- src/util.c | 20 ++++ src/util.h | 3 + [...] @@ -1355,9 +1355,14 @@ static int lxcSetSchedulerParameters(virDomainPtr domain,
for (i = 0; i < nparams; i++) { virSchedParameterPtr param = ¶ms[i]; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) { + lxcError(NULL, domain, VIR_ERR_INVALID_ARG, + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + }
Humpf, we are actually breaking the API in some ways there, what is the argument ? Consistency with qemu scheduling parameters ?
if (STREQ(param->field, "cpu_shares")) {
[...]
+ if (STREQ(param->field, "cpu_shares")) { + int rc; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) {
Apparently, but is that worth it ? [...]
--- a/src/virsh.c +++ b/src/virsh.c @@ -1181,111 +1181,118 @@ static const vshCmdOptDef opts_schedinfo[] = { };
static int -cmdSchedinfo(vshControl *ctl, const vshCmd *cmd) +cmdSchedInfoUpdate(vshControl *ctl, const vshCmd *cmd, + virSchedParameterPtr param) { [...] + /* Legacy 'weight' parameter */ + if (STREQ(param->field, "weight") && + param->type == VIR_DOMAIN_SCHED_FIELD_UINT && + vshCommandOptBool(cmd, "weight")) { + int val; + val = vshCommandOptInt(cmd, "weight", &found); + if (!found) { vshError(ctl, FALSE, "%s", _("Invalid value of weight")); - goto cleanup; + return -1; } else { - nr_inputparams++; + param->value.ui = val; } + return 1;
This API is and remains a pain, looking at the ESX driver there is just yet another completely different set ... ACK, though the lxc breakage is a bit nasty IMHO Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Thu, Jul 23, 2009 at 05:38:45PM +0200, Daniel Veillard wrote:
On Wed, Jul 22, 2009 at 04:23:45PM +0100, Daniel P. Berrange wrote:
* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters * src/lxc_driver.c: Fix to use unsigned long long consistently for schedular parameters * src/cgroup.h, src/cgroup.c: Fix cpu_shares to take unsigned long long * src/util.c, src/util.h, src/libvirt_private.syms: Add a virStrToDouble helper * src/virsh.c: Fix handling of --set arg to schedinfo command to honour the designated data type of each schedular tunable as declared by the driver
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 10 +- src/cgroup.h | 4 +- src/libvirt_private.syms | 1 + src/lxc_driver.c | 9 ++- src/qemu_driver.c | 151 +++++++++++++++++++++++++++++- src/util.c | 20 ++++ src/util.h | 3 + [...] @@ -1355,9 +1355,14 @@ static int lxcSetSchedulerParameters(virDomainPtr domain,
for (i = 0; i < nparams; i++) { virSchedParameterPtr param = ¶ms[i]; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) { + lxcError(NULL, domain, VIR_ERR_INVALID_ARG, + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + }
Humpf, we are actually breaking the API in some ways there, what is the argument ? Consistency with qemu scheduling parameters ?
Yes & no. It was essentially broken already. - The lxcGetSchedulerParameters method returned cpu_shares with type ULLONG, and filled the '.ul' field. - The lxcSetSchedulerParameters method did not check the type at all, and blindly read the '.ui' field instead. The language bindings need to know the data types for each parameter in order to covert from the languages' type to the parameter type. The Perl and Python both do this by calling virGetSchedulerParameters to fetch current values, and then updating the values in place, and then calling virSetSchedulerParameters to update them. So the fact that the LXC driver 'get' method returned ULLONG, means it should be also accepting ULLONG in the set method, otherwise it is impossible for the language bindings to work. A similar situation exists for virsh with its --set parameter, which can only work by calling 'get' to discover the existing type and then updating it. So this code is just validating what apps should have already been doing, and fixing the mistaken reading of .ui to be .ul Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On Thu, Jul 23, 2009 at 04:53:56PM +0100, Daniel P. Berrange wrote:
On Thu, Jul 23, 2009 at 05:38:45PM +0200, Daniel Veillard wrote:
On Wed, Jul 22, 2009 at 04:23:45PM +0100, Daniel P. Berrange wrote:
* src/qemu_driver.c: Add driver methods qemuGetSchedulerType, qemuGetSchedulerParameters, qemuSetSchedulerParameters * src/lxc_driver.c: Fix to use unsigned long long consistently for schedular parameters * src/cgroup.h, src/cgroup.c: Fix cpu_shares to take unsigned long long * src/util.c, src/util.h, src/libvirt_private.syms: Add a virStrToDouble helper * src/virsh.c: Fix handling of --set arg to schedinfo command to honour the designated data type of each schedular tunable as declared by the driver
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 10 +- src/cgroup.h | 4 +- src/libvirt_private.syms | 1 + src/lxc_driver.c | 9 ++- src/qemu_driver.c | 151 +++++++++++++++++++++++++++++- src/util.c | 20 ++++ src/util.h | 3 + [...] @@ -1355,9 +1355,14 @@ static int lxcSetSchedulerParameters(virDomainPtr domain,
for (i = 0; i < nparams; i++) { virSchedParameterPtr param = ¶ms[i]; + if (param->type != VIR_DOMAIN_SCHED_FIELD_ULLONG) { + lxcError(NULL, domain, VIR_ERR_INVALID_ARG, + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + }
Humpf, we are actually breaking the API in some ways there, what is the argument ? Consistency with qemu scheduling parameters ?
Yes & no. It was essentially broken already.
- The lxcGetSchedulerParameters method returned cpu_shares with type ULLONG, and filled the '.ul' field. - The lxcSetSchedulerParameters method did not check the type at all, and blindly read the '.ui' field instead.
The language bindings need to know the data types for each parameter in order to covert from the languages' type to the parameter type. The Perl and Python both do this by calling virGetSchedulerParameters to fetch current values, and then updating the values in place, and then calling virSetSchedulerParameters to update them. So the fact that the LXC driver 'get' method returned ULLONG, means it should be also accepting ULLONG in the set method, otherwise it is impossible for the language bindings to work. A similar situation exists for virsh with its --set parameter, which can only work by calling 'get' to discover the existing type and then updating it. So this code is just validating what apps should have already been doing, and fixing the mistaken reading of .ui to be .ul
Okay :-) thanks ! Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* src/qemu_driver.c: Set a restrictive block device whitelist for all QEMU guests. Update whitelist when hotplugging disks. * src/cgroup.h, src/cgroup.c: Add some more convenience methods for dealing with block device whitelists. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/cgroup.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/cgroup.h | 12 ++++++ src/qemu_driver.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 215 insertions(+), 9 deletions(-) diff --git a/src/cgroup.c b/src/cgroup.c index 1a80a20..35fedad 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -736,10 +736,7 @@ int virCgroupDenyAllDevices(virCgroupPtr group) * * Returns: 0 on success */ -int virCgroupAllowDevice(virCgroupPtr group, - char type, - int major, - int minor) +int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor) { int rc; char *devstr = NULL; @@ -768,9 +765,7 @@ out: * * Returns: 0 on success */ -int virCgroupAllowDeviceMajor(virCgroupPtr group, - char type, - int major) +int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major) { int rc; char *devstr = NULL; @@ -790,6 +785,108 @@ int virCgroupAllowDeviceMajor(virCgroupPtr group, return rc; } +/** + * virCgroupAllowDevicePath: + * + * @group: The cgroup to allow the device for + * @path: the device to allow + * + * Queries the type of device and its major/minor number, and + * adds that to the cgroup ACL + * + * Returns: 0 on success + */ +int virCgroupAllowDevicePath(virCgroupPtr group, const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupAllowDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} + +/** + * virCgroupDenyDevice: + * + * @group: The cgroup to deny a device for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device + * @minor: The minor number of the device + * + * Returns: 0 on success + */ +int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:%i rwm", type, major, minor) == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); +out: + VIR_FREE(devstr); + + return rc; +} + +/** + * virCgroupDenyDeviceMajor: + * + * @group: The cgroup to deny an entire device major type for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device type + * + * Returns: 0 on success + */ +int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:* rwm", type, major) == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); + out: + VIR_FREE(devstr); + + return rc; +} + +int virCgroupDenyDevicePath(virCgroupPtr group, const char *path) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return -EINVAL; + + return virCgroupDenyDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev)); +} + int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares) { return virCgroupSetValueU64(group, diff --git a/src/cgroup.h b/src/cgroup.h index f452e2d..efc3370 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -38,6 +38,18 @@ int virCgroupAllowDevice(virCgroupPtr group, int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major); +int virCgroupAllowDevicePath(virCgroupPtr group, + const char *path); + +int virCgroupDenyDevice(virCgroupPtr group, + char type, + int major, + int minor); +int virCgroupDenyDeviceMajor(virCgroupPtr group, + char type, + int major); +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path); int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares); int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares); diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 6c8370c..19572b4 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -1388,12 +1388,24 @@ error: return -1; } +static const char *const defaultDeviceACL[] = { + "/dev/null", "/dev/full", "/dev/zero", + "/dev/random", "/dev/urandom", + "/dev/ptmx", "/dev/kvm", "/dev/kqemu", + "/dev/rtc", "/dev/hpet", "/dev/net/tun", + NULL, +}; +#define DEVICE_PTY_MAJOR 136 +#define DEVICE_SND_MAJOR 116 + static int qemuSetupCgroup(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm) { virCgroupPtr cgroup = NULL; int rc; + unsigned int i; + const char *const *deviceACL = defaultDeviceACL; if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ @@ -1406,6 +1418,62 @@ static int qemuSetupCgroup(virConnectPtr conn, goto cleanup; } + rc = virCgroupDenyAllDevices(cgroup); + if (rc != 0) { + if (rc == -EPERM) { + VIR_WARN0("Group devices ACL is not accessible, disabling whitelisting"); + goto done; + } + + virReportSystemError(conn, -rc, + _("Unable to deny all devices for %s"), vm->def->name); + goto cleanup; + } + + for (i = 0; i < vm->def->ndisks ; i++) { + if (vm->def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK || + vm->def->disks[i]->src == NULL) + continue; + + rc = virCgroupAllowDevicePath(cgroup, + vm->def->disks[i]->src); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to allow device %s for %s"), + vm->def->disks[i]->src, vm->def->name); + goto cleanup; + } + } + + rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_PTY_MAJOR); + if (rc != 0) { + virReportSystemError(conn, -rc, "%s", + _("unable to allow /dev/pts/ devices")); + goto cleanup; + } + + if (vm->def->nsounds) { + rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_SND_MAJOR); + if (rc != 0) { + virReportSystemError(conn, -rc, "%s", + _("unable to allow /dev/snd/ devices")); + goto cleanup; + } + } + + for (i = 0; deviceACL[i] != NULL ; i++) { + rc = virCgroupAllowDevicePath(cgroup, + deviceACL[i]); + if (rc < 0 && + rc != -ENOENT) { + virReportSystemError(conn, -rc, + _("unable to allow device %s"), + deviceACL[i]); + goto cleanup; + } + } + +done: virCgroupFree(&cgroup); return 0; @@ -4836,6 +4904,7 @@ static int qemudDomainAttachDevice(virDomainPtr dom, virDomainObjPtr vm; virDomainDeviceDefPtr dev = NULL; unsigned int qemuCmdFlags; + virCgroupPtr cgroup = NULL; int ret = -1; qemuDriverLock(driver); @@ -4865,6 +4934,27 @@ static int qemudDomainAttachDevice(virDomainPtr dom, goto cleanup; if (dev->type == VIR_DOMAIN_DEVICE_DISK) { + if (driver->cgroup != NULL) { + if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) !=0 ) { + qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Unable to find cgroup for %s\n"), + vm->def->name); + goto cleanup; + } + if (dev->data.disk->src != NULL && + dev->data.disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && + virCgroupAllowDevicePath(cgroup, + dev->data.disk->src) < 0) { + qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("unable to allow device %s"), + dev->data.disk->src); + goto cleanup; + } + } + + if (driver->securityDriver) + driver->securityDriver->domainSetSecurityImageLabel(dom->conn, vm, dev->data.disk); + switch (dev->data.disk->device) { case VIR_DOMAIN_DISK_DEVICE_CDROM: case VIR_DOMAIN_DISK_DEVICE_FLOPPY: @@ -4893,7 +4983,7 @@ static int qemudDomainAttachDevice(virDomainPtr dom, qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, _("disk bus '%s' cannot be hotplugged."), virDomainDiskBusTypeToString(dev->data.disk->bus)); - goto cleanup; + /* fallthrough */ } break; @@ -4901,7 +4991,11 @@ static int qemudDomainAttachDevice(virDomainPtr dom, qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, _("disk device type '%s' cannot be hotplugged"), virDomainDiskDeviceTypeToString(dev->data.disk->device)); - goto cleanup; + /* Fallthrough */ + } + if (ret != 0) { + virCgroupDenyDevicePath(cgroup, + dev->data.disk->src); } } else if (dev->type == VIR_DOMAIN_DEVICE_NET) { ret = qemudDomainAttachNetDevice(dom->conn, vm, dev, qemuCmdFlags); @@ -4923,6 +5017,9 @@ static int qemudDomainAttachDevice(virDomainPtr dom, ret = -1; cleanup: + if (cgroup) + virCgroupFree(&cgroup); + if (ret < 0) { if (qemuDomainSetDeviceOwnership(dom->conn, driver, dev, 1) < 0) VIR_WARN0("Fail to restore disk device ownership"); -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:46PM +0100, Daniel P. Berrange wrote:
* src/qemu_driver.c: Set a restrictive block device whitelist for all QEMU guests. Update whitelist when hotplugging disks. * src/cgroup.h, src/cgroup.c: Add some more convenience methods for dealing with block device whitelists.
As said last time, best is to push this and see if/where it breaks, fairly hard to predict, ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

* qemud/libvirtd_qemu.aug, qemud/test_libvirtd_qemu.aug, src/qemu.conf: Add 'cgroups_controllers' and 'cgroups_device_acl' parameters * src/qemu_conf.h, src/qemu_conf.c: Load & parse configuration params for cgroups * src/qemu_driver.c: Only use cgroups controllers that are activated, and use configured device whitelist instead of default, if set. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- qemud/libvirtd_qemu.aug | 2 + qemud/test_libvirtd_qemu.aug | 21 +++++++- src/Makefile.am | 3 +- src/cgroup.c | 11 ---- src/cgroup.h | 12 +++++ src/qemu.conf | 34 +++++++++++++ src/qemu_conf.c | 61 ++++++++++++++++++++++++ src/qemu_conf.h | 3 + src/qemu_driver.c | 106 ++++++++++++++++++++++++------------------ 9 files changed, 192 insertions(+), 61 deletions(-) diff --git a/qemud/libvirtd_qemu.aug b/qemud/libvirtd_qemu.aug index 8cf0461..8b8aab2 100644 --- a/qemud/libvirtd_qemu.aug +++ b/qemud/libvirtd_qemu.aug @@ -31,6 +31,8 @@ module Libvirtd_qemu = | str_entry "vnc_sasl_dir" | str_entry "user" | str_entry "group" + | str_array_entry "cgroup_controllers" + | str_array_entry "cgroup_device_acl" (* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/qemud/test_libvirtd_qemu.aug b/qemud/test_libvirtd_qemu.aug index f62da01..274c89d 100644 --- a/qemud/test_libvirtd_qemu.aug +++ b/qemud/test_libvirtd_qemu.aug @@ -83,6 +83,10 @@ vnc_sasl_dir = \"/some/directory/sasl2\" user = \"root\" group = \"root\" + +cgroup_controllers = [ \"cpu\", \"devices\" ] + +cgroup_device_acl = [ \"/dev/null\", \"/dev/full\", \"/dev/zero\" ] " test Libvirtd_qemu.lns get conf = @@ -165,7 +169,18 @@ group = \"root\" { "#comment" = "point to the directory, and create a qemu.conf in that location" } { "#comment" = "" } { "vnc_sasl_dir" = "/some/directory/sasl2" } -{ "#comment" = "" } +{ "#empty" } { "user"= "root" } -{ "#comment" = "" } -{ "group" = "root" } \ No newline at end of file +{ "#empty" } +{ "group" = "root" } +{ "#empty" } +{ "cgroup_controllers" + { "1" = "cpu" } + { "2" = "devices" } +} +{ "#empty" } +{ "cgroup_device_acl" + { "1" = "/dev/null" } + { "2" = "/dev/full" } + { "3" = "/dev/zero" } +} diff --git a/src/Makefile.am b/src/Makefile.am index 79826b1..b9b0bf7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -142,7 +142,8 @@ VBOX_DRIVER_EXTRA_DIST = vbox/vbox_tmpl.c vbox/README QEMU_DRIVER_SOURCES = \ qemu_conf.c qemu_conf.h \ - qemu_driver.c qemu_driver.h + qemu_driver.c qemu_driver.h \ + cgroup.c cgroup.h UML_DRIVER_SOURCES = \ uml_conf.c uml_conf.h \ diff --git a/src/cgroup.c b/src/cgroup.c index 35fedad..e678517 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -31,17 +31,6 @@ #define CGROUP_MAX_VAL 512 -enum { - VIR_CGROUP_CONTROLLER_CPU, - VIR_CGROUP_CONTROLLER_CPUACCT, - VIR_CGROUP_CONTROLLER_CPUSET, - VIR_CGROUP_CONTROLLER_MEMORY, - VIR_CGROUP_CONTROLLER_DEVICES, - - VIR_CGROUP_CONTROLLER_LAST -}; - -VIR_ENUM_DECL(virCgroupController); VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, "cpu", "cpuacct", "cpuset", "memory", "devices"); diff --git a/src/cgroup.h b/src/cgroup.h index efc3370..6d43b14 100644 --- a/src/cgroup.h +++ b/src/cgroup.h @@ -15,6 +15,18 @@ struct virCgroup; typedef struct virCgroup *virCgroupPtr; +enum { + VIR_CGROUP_CONTROLLER_CPU, + VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_CPUSET, + VIR_CGROUP_CONTROLLER_MEMORY, + VIR_CGROUP_CONTROLLER_DEVICES, + + VIR_CGROUP_CONTROLLER_LAST +}; + +VIR_ENUM_DECL(virCgroupController); + int virCgroupForDriver(const char *name, virCgroupPtr *group, int privileged, diff --git a/src/qemu.conf b/src/qemu.conf index 3009725..653f487 100644 --- a/src/qemu.conf +++ b/src/qemu.conf @@ -95,3 +95,37 @@ # The group ID for QEMU processes run by the system instance #group = "root" + + +# What cgroup controllers to make use of with QEMU guests +# +# - 'cpu' - use for schedular tunables +# - 'devices' - use for device whitelisting +# +# NB, even if configured here, they won't be used unless +# the adminsitrator has mounted cgroups. eg +# +# mkdir /dev/cgroup +# mount -t cgroup -o devices,cpu none /dev/cgroup +# +# They can be mounted anywhere, and different controlers +# can be mounted in different locations. libvirt will detect +# where they are located. +# +# cgroup_controllers = [ "cpu", "devices" ] + +# This is the basic set of devices allowed / required by +# all virtual machines. +# +# As well as this, any configured block backed disks, +# all sound device, and all PTY devices are allowed. +# +# This will only need setting if newer QEMU suddenly +# wants some device we don't already know a bout. +# +#cgroup_device_acl = [ +# "/dev/null", "/dev/full", "/dev/zero", +# "/dev/random", "/dev/urandom", +# "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +# "/dev/rtc", "/dev/hpet", "/dev/net/tun", +#] diff --git a/src/qemu_conf.c b/src/qemu_conf.c index f483a51..3739fff 100644 --- a/src/qemu_conf.c +++ b/src/qemu_conf.c @@ -94,6 +94,7 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, virConfValuePtr p; char *user; char *group; + int i; /* Setup 2 critical defaults */ if (!(driver->vncListen = strdup("127.0.0.1"))) { @@ -218,6 +219,66 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } VIR_FREE(group); + p = virConfGetValue (conf, "cgroup_controllers"); + CHECK_TYPE ("cgroup_controllers", VIR_CONF_LIST); + if (p) { + virConfValuePtr pp; + for (i = 0, pp = p->list; pp; ++i, pp = pp->next) { + int ctl; + if (pp->type != VIR_CONF_STRING) { + VIR_ERROR("%s", _("cgroup_device_acl must be a list of strings")); + virConfFree(conf); + return -1; + } + ctl = virCgroupControllerTypeFromString(pp->str); + if (ctl < 0) { + VIR_ERROR("Unknown cgroup controller '%s'", pp->str); + virConfFree(conf); + return -1; + } + driver->cgroupControllers |= (1 << ctl); + } + } else { + driver->cgroupControllers = + (1 << VIR_CGROUP_CONTROLLER_CPU) | + (1 << VIR_CGROUP_CONTROLLER_DEVICES); + } + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (driver->cgroupControllers & (1 << i)) { + VIR_INFO("Configured cgroup controller '%s'", + virCgroupControllerTypeToString(i)); + } + } + + p = virConfGetValue (conf, "cgroup_device_acl"); + CHECK_TYPE ("cgroup_device_acl", VIR_CONF_LIST); + if (p) { + int len = 0; + virConfValuePtr pp; + for (pp = p->list; pp; pp = pp->next) + len++; + if (VIR_ALLOC_N(driver->cgroupDeviceACL, 1+len) < 0) { + virReportOOMError(NULL); + virConfFree(conf); + return -1; + } + for (i = 0, pp = p->list; pp; ++i, pp = pp->next) { + if (pp->type != VIR_CONF_STRING) { + VIR_ERROR("%s", _("cgroup_device_acl must be a list of strings")); + virConfFree(conf); + return -1; + } + driver->cgroupDeviceACL[i] = strdup (pp->str); + if (driver->cgroupDeviceACL[i] == NULL) { + virReportOOMError(NULL); + virConfFree(conf); + return -1; + } + + } + driver->cgroupDeviceACL[i] = NULL; + } + virConfFree (conf); return 0; } diff --git a/src/qemu_conf.h b/src/qemu_conf.h index a5c8b3f..e753ba0 100644 --- a/src/qemu_conf.h +++ b/src/qemu_conf.h @@ -79,6 +79,9 @@ struct qemud_driver { int nextvmid; virCgroupPtr cgroup; + int cgroupControllers; + char **cgroupDeviceACL; + virDomainObjList domains; brControl *brctl; diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 19572b4..60963db 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -125,6 +125,15 @@ static int qemudDetectVcpuPIDs(virConnectPtr conn, static struct qemud_driver *qemu_driver = NULL; +static int qemuCgroupControllerActive(struct qemud_driver *driver, + int controller) +{ + if (driver->cgroup == NULL) + return 0; + if (driver->cgroupControllers & (1 << controller)) + return 1; + return 0; +} static int qemudLogFD(virConnectPtr conn, struct qemud_driver *driver, const char* name) @@ -1405,7 +1414,10 @@ static int qemuSetupCgroup(virConnectPtr conn, virCgroupPtr cgroup = NULL; int rc; unsigned int i; - const char *const *deviceACL = defaultDeviceACL; + const char *const *deviceACL = + driver->cgroupDeviceACL ? + (const char *const *)driver->cgroupDeviceACL : + defaultDeviceACL; if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ @@ -1418,58 +1430,60 @@ static int qemuSetupCgroup(virConnectPtr conn, goto cleanup; } - rc = virCgroupDenyAllDevices(cgroup); - if (rc != 0) { - if (rc == -EPERM) { - VIR_WARN0("Group devices ACL is not accessible, disabling whitelisting"); - goto done; - } - - virReportSystemError(conn, -rc, - _("Unable to deny all devices for %s"), vm->def->name); - goto cleanup; - } - - for (i = 0; i < vm->def->ndisks ; i++) { - if (vm->def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK || - vm->def->disks[i]->src == NULL) - continue; - - rc = virCgroupAllowDevicePath(cgroup, - vm->def->disks[i]->src); + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_DEVICES)) { + rc = virCgroupDenyAllDevices(cgroup); if (rc != 0) { + if (rc == -EPERM) { + VIR_WARN0("Group devices ACL is not accessible, disabling whitelisting"); + goto done; + } + virReportSystemError(conn, -rc, - _("Unable to allow device %s for %s"), - vm->def->disks[i]->src, vm->def->name); + _("Unable to deny all devices for %s"), vm->def->name); goto cleanup; } - } - rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_PTY_MAJOR); - if (rc != 0) { - virReportSystemError(conn, -rc, "%s", - _("unable to allow /dev/pts/ devices")); - goto cleanup; - } + for (i = 0; i < vm->def->ndisks ; i++) { + if (vm->def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK || + vm->def->disks[i]->src == NULL) + continue; + + rc = virCgroupAllowDevicePath(cgroup, + vm->def->disks[i]->src); + if (rc != 0) { + virReportSystemError(conn, -rc, + _("Unable to allow device %s for %s"), + vm->def->disks[i]->src, vm->def->name); + goto cleanup; + } + } - if (vm->def->nsounds) { - rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_SND_MAJOR); + rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_PTY_MAJOR); if (rc != 0) { virReportSystemError(conn, -rc, "%s", - _("unable to allow /dev/snd/ devices")); + _("unable to allow /dev/pts/ devices")); goto cleanup; } - } - for (i = 0; deviceACL[i] != NULL ; i++) { - rc = virCgroupAllowDevicePath(cgroup, - deviceACL[i]); - if (rc < 0 && - rc != -ENOENT) { - virReportSystemError(conn, -rc, - _("unable to allow device %s"), - deviceACL[i]); - goto cleanup; + if (vm->def->nsounds) { + rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_SND_MAJOR); + if (rc != 0) { + virReportSystemError(conn, -rc, "%s", + _("unable to allow /dev/snd/ devices")); + goto cleanup; + } + } + + for (i = 0; deviceACL[i] != NULL ; i++) { + rc = virCgroupAllowDevicePath(cgroup, + deviceACL[i]); + if (rc < 0 && + rc != -ENOENT) { + virReportSystemError(conn, -rc, + _("unable to allow device %s"), + deviceACL[i]); + goto cleanup; + } } } @@ -4934,7 +4948,7 @@ static int qemudDomainAttachDevice(virDomainPtr dom, goto cleanup; if (dev->type == VIR_DOMAIN_DEVICE_DISK) { - if (driver->cgroup != NULL) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_DEVICES)) { if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) !=0 ) { qemudReportError(dom->conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("Unable to find cgroup for %s\n"), @@ -5379,7 +5393,7 @@ static char *qemuGetSchedulerType(virDomainPtr dom, struct qemud_driver *driver = dom->conn->privateData; char *ret; - if (driver->cgroup == NULL) { + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return NULL; @@ -5404,7 +5418,7 @@ static int qemuSetSchedulerParameters(virDomainPtr dom, virDomainObjPtr vm = NULL; int ret = -1; - if (driver->cgroup == NULL) { + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return -1; @@ -5469,7 +5483,7 @@ static int qemuGetSchedulerParameters(virDomainPtr dom, int ret = -1; int rc; - if (driver->cgroup == NULL) { + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, __FUNCTION__); return -1; -- 1.6.2.5

On Wed, Jul 22, 2009 at 04:23:47PM +0100, Daniel P. Berrange wrote:
* qemud/libvirtd_qemu.aug, qemud/test_libvirtd_qemu.aug, src/qemu.conf: Add 'cgroups_controllers' and 'cgroups_device_acl' parameters * src/qemu_conf.h, src/qemu_conf.c: Load & parse configuration params for cgroups * src/qemu_driver.c: Only use cgroups controllers that are activated, and use configured device whitelist instead of default, if set.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- qemud/libvirtd_qemu.aug | 2 + qemud/test_libvirtd_qemu.aug | 21 +++++++- src/Makefile.am | 3 +- src/cgroup.c | 11 ---- src/cgroup.h | 12 +++++ src/qemu.conf | 34 +++++++++++++ src/qemu_conf.c | 61 ++++++++++++++++++++++++ src/qemu_conf.h | 3 + src/qemu_driver.c | 106 ++++++++++++++++++++++++------------------ [...] +++ b/src/qemu.conf @@ -95,3 +95,37 @@
# The group ID for QEMU processes run by the system instance #group = "root" + + +# What cgroup controllers to make use of with QEMU guests +# +# - 'cpu' - use for schedular tunables +# - 'devices' - use for device whitelisting +# +# NB, even if configured here, they won't be used unless +# the adminsitrator has mounted cgroups. eg +# +# mkdir /dev/cgroup +# mount -t cgroup -o devices,cpu none /dev/cgroup +# +# They can be mounted anywhere, and different controlers +# can be mounted in different locations. libvirt will detect +# where they are located. +# +# cgroup_controllers = [ "cpu", "devices" ] + +# This is the basic set of devices allowed / required by +# all virtual machines. +# +# As well as this, any configured block backed disks, +# all sound device, and all PTY devices are allowed. +# +# This will only need setting if newer QEMU suddenly +# wants some device we don't already know a bout. +# +#cgroup_device_acl = [ +# "/dev/null", "/dev/full", "/dev/zero", +# "/dev/random", "/dev/urandom", +# "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +# "/dev/rtc", "/dev/hpet", "/dev/net/tun", +#]
great, but that doesn't really replace documentation :-) [...]
- if (vm->def->nsounds) { - rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_SND_MAJOR); + rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_PTY_MAJOR); if (rc != 0) { virReportSystemError(conn, -rc, "%s", - _("unable to allow /dev/snd/ devices")); + _("unable to allow /dev/pts/ devices")); goto cleanup;
diff is really making the patch hard to read ... ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/
participants (2)
-
Daniel P. Berrange
-
Daniel Veillard