From: "Daniel P. Berrange" <berrange(a)redhat.com>
Currently when getting an instance of virCgroupPtr we will
create the path in all cgroup controllers. Only at the virt
driver layer are we attempting to filter controllers. This
is bad because the mere act of creating the dirs in the
controllers can have a functional impact on the kernel,
particularly for performance.
Update the virCgroupForDriver() method to accept a bitmask
of controllers to use. Only create dirs in the controllers
that are requested. When creating cgroups for domains,
respect the active controller list from the parent cgroup
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/lxc/lxc_cgroup.c | 2 +-
src/lxc/lxc_driver.c | 2 +-
src/qemu/qemu_cgroup.c | 27 +++-----
src/qemu/qemu_conf.c | 16 +----
src/qemu/qemu_driver.c | 3 +-
src/util/vircgroup.c | 181 +++++++++++++++++++++++++++++++------------------
src/util/vircgroup.h | 6 +-
7 files changed, 132 insertions(+), 105 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 33c305a..545acaa 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -530,7 +530,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
int ret = -1;
int rc;
- rc = virCgroupForDriver("lxc", &driver, 1, 0);
+ rc = virCgroupForDriver("lxc", &driver, true, false, 0);
if (rc != 0) {
virReportSystemError(-rc, "%s",
_("Unable to get cgroup for driver"));
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 8603078..989c2f3 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1460,7 +1460,7 @@ static int lxcStartup(bool privileged,
lxc_driver->log_libvirtd = 0; /* by default log to container logfile */
lxc_driver->have_netns = lxcCheckNetNsSupport();
- rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
+ rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged,
true, -1);
if (rc < 0) {
char buf[1024] ATTRIBUTE_UNUSED;
VIR_DEBUG("Unable to create cgroup for LXC driver: %s",
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index c9b4ca2..2cdc2b7 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -57,8 +57,6 @@ bool qemuCgroupControllerActive(virQEMUDriverPtr driver,
goto cleanup;
if (!virCgroupMounted(driver->cgroup, controller))
goto cleanup;
- if (cfg->cgroupControllers & (1 << controller))
- ret = true;
cleanup:
virObjectUnref(cfg);
@@ -668,7 +666,7 @@ int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
virDomainDefPtr def = vm->def;
unsigned long long period = vm->def->cputune.emulator_period;
long long quota = vm->def->cputune.emulator_quota;
- int rc, i;
+ int rc;
if ((period || quota) &&
(!driver->cgroup ||
@@ -697,22 +695,13 @@ int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
goto cleanup;
}
- for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
- if (i != VIR_CGROUP_CONTROLLER_CPU &&
- i != VIR_CGROUP_CONTROLLER_CPUACCT &&
- i != VIR_CGROUP_CONTROLLER_CPUSET)
- continue;
-
- if (!qemuCgroupControllerActive(driver, i))
- continue;
- rc = virCgroupMoveTask(cgroup, cgroup_emulator, i);
- if (rc < 0) {
- virReportSystemError(-rc,
- _("Unable to move tasks from domain cgroup to
"
- "emulator cgroup in controller %d for %s"),
- i, vm->def->name);
- goto cleanup;
- }
+ rc = virCgroupMoveTask(cgroup, cgroup_emulator);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("Unable to move tasks from domain cgroup to "
+ "emulator cgroup for %s"),
+ vm->def->name);
+ goto cleanup;
}
if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index c2e2e10..5a3dde0 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -134,14 +134,7 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
}
cfg->dynamicOwnership = privileged;
- cfg->cgroupControllers =
- (1 << VIR_CGROUP_CONTROLLER_CPU) |
- (1 << VIR_CGROUP_CONTROLLER_DEVICES) |
- (1 << VIR_CGROUP_CONTROLLER_MEMORY) |
- (1 << VIR_CGROUP_CONTROLLER_BLKIO) |
- (1 << VIR_CGROUP_CONTROLLER_CPUSET) |
- (1 << VIR_CGROUP_CONTROLLER_CPUACCT);
-
+ cfg->cgroupControllers = -1; /* -1 == auto-detect */
if (privileged) {
if (virAsprintf(&cfg->logDir,
@@ -454,6 +447,7 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
p = virConfGetValue(conf, "cgroup_controllers");
CHECK_TYPE("cgroup_controllers", VIR_CONF_LIST);
if (p) {
+ cfg->cgroupControllers = 0;
virConfValuePtr pp;
for (i = 0, pp = p->list; pp; ++i, pp = pp->next) {
int ctl;
@@ -472,12 +466,6 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
cfg->cgroupControllers |= (1 << ctl);
}
}
- for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
- if (cfg->cgroupControllers & (1 << i)) {
- VIR_INFO("Configured cgroup controller '%s'",
- virCgroupControllerTypeToString(i));
- }
- }
p = virConfGetValue(conf, "cgroup_device_acl");
CHECK_TYPE("cgroup_device_acl", VIR_CONF_LIST);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 1f9b8b2..aa9eb0f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -628,7 +628,8 @@ qemuStartup(bool privileged,
goto error;
}
- rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged,
1);
+ rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged,
true,
+ cfg->cgroupControllers);
if (rc < 0) {
VIR_INFO("Unable to create cgroup for driver: %s",
virStrerror(-rc, ebuf, sizeof(ebuf)));
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 266cecb..0d6db58 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -70,8 +70,6 @@ typedef enum {
* before creating subcgroups and
* attaching tasks
*/
- VIR_CGROUP_VCPU = 1 << 1, /* create subdir only under the cgroup cpu,
- * cpuacct and cpuset if possible. */
} virCgroupFlags;
/**
@@ -108,6 +106,16 @@ bool virCgroupMounted(virCgroupPtr cgroup, int controller)
return cgroup->controllers[controller].mountPoint != NULL;
}
+static int virCgroupMountMask(virCgroupPtr cgroup)
+{
+ int controllers = 0;
+ int i;
+ for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++)
+ if (cgroup->controllers[i].mountPoint != NULL)
+ controllers |= (1 << i);
+ return controllers;
+}
+
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
/*
* Process /proc/mounts figuring out what controllers are
@@ -230,11 +238,12 @@ no_memory:
}
-static int virCgroupDetect(virCgroupPtr group)
+static int virCgroupDetect(virCgroupPtr group,
+ int controllers)
{
- int any = 0;
int rc;
int i;
+ int j;
rc = virCgroupDetectMounts(group);
if (rc < 0) {
@@ -242,14 +251,55 @@ static int virCgroupDetect(virCgroupPtr group)
return rc;
}
- /* Check that at least 1 controller is available */
- for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
- if (group->controllers[i].mountPoint != NULL)
- any = 1;
+ if (controllers >= 0) {
+ VIR_DEBUG("Validating controllers %d", controllers);
+ for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
+ VIR_DEBUG("Controller '%s' wanted=%s",
+ virCgroupControllerTypeToString(i),
+ (1 << i) & controllers ? "yes" :
"no");
+ if (((1 << i) & controllers)) {
+ /* Ensure requested controller is present */
+ if (!group->controllers[i].mountPoint) {
+ VIR_DEBUG("Requested controlled '%s' not mounted",
+ virCgroupControllerTypeToString(i));
+ return -ENOENT;
+ }
+ } else {
+ /* Check whether a request to disable a controller
+ * clashes with co-mounting of controllers */
+ for (j = 0 ; j < VIR_CGROUP_CONTROLLER_LAST ; j++) {
+ if (j == i)
+ continue;
+ if (!((1 << j) & controllers))
+ continue;
+
+ if (STREQ_NULLABLE(group->controllers[i].mountPoint,
+ group->controllers[j].mountPoint)) {
+ VIR_ERROR("Controller '%s' is not wanted, but
'%s' is co-mounted",
+ virCgroupControllerTypeToString(i),
+ virCgroupControllerTypeToString(j));
+ return -EINVAL;
+ }
+ }
+ VIR_FREE(group->controllers[i].mountPoint);
+ }
+ }
+ } else {
+ VIR_DEBUG("Auto-detecting controllers");
+ controllers = 0;
+ for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
+ VIR_DEBUG("Controller '%s' present=%s",
+ virCgroupControllerTypeToString(i),
+ group->controllers[i].mountPoint ? "yes" :
"no");
+ if (group->controllers[i].mountPoint == NULL)
+ continue;
+ controllers |= (1 << i);
+ }
}
- if (!any)
- return -ENXIO;
+ /* Check that at least 1 controller is available */
+ if (!controllers)
+ return -ENXIO;
rc = virCgroupDetectPlacement(group);
@@ -542,16 +592,6 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
if (!group->controllers[i].mountPoint)
continue;
- /* We need to control cpu bandwidth for each vcpu now */
- if ((flags & VIR_CGROUP_VCPU) &&
- (i != VIR_CGROUP_CONTROLLER_CPU &&
- i != VIR_CGROUP_CONTROLLER_CPUACCT &&
- i != VIR_CGROUP_CONTROLLER_CPUSET)) {
- /* treat it as unmounted and we can use virCgroupAddTask */
- VIR_FREE(group->controllers[i].mountPoint);
- continue;
- }
-
rc = virCgroupPathOfController(group, i, "", &path);
if (rc < 0)
return rc;
@@ -611,12 +651,13 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
static int virCgroupNew(const char *path,
+ int controllers,
virCgroupPtr *group)
{
int rc = 0;
char *typpath = NULL;
- VIR_DEBUG("New group %s", path);
+ VIR_DEBUG("path=%s controllers=%d", path, controllers);
*group = NULL;
if (VIR_ALLOC((*group)) != 0) {
@@ -629,7 +670,7 @@ static int virCgroupNew(const char *path,
goto err;
}
- rc = virCgroupDetect(*group);
+ rc = virCgroupDetect(*group, controllers);
if (rc < 0)
goto err;
@@ -645,17 +686,18 @@ err:
static int virCgroupAppRoot(bool privileged,
virCgroupPtr *group,
- bool create)
+ bool create,
+ int controllers)
{
virCgroupPtr rootgrp = NULL;
int rc;
- rc = virCgroupNew("/", &rootgrp);
+ rc = virCgroupNew("/", controllers, &rootgrp);
if (rc != 0)
return rc;
if (privileged) {
- rc = virCgroupNew("/libvirt", group);
+ rc = virCgroupNew("/libvirt", controllers, group);
} else {
char *rootname;
char *username;
@@ -671,7 +713,7 @@ static int virCgroupAppRoot(bool privileged,
goto cleanup;
}
- rc = virCgroupNew(rootname, group);
+ rc = virCgroupNew(rootname, controllers, group);
VIR_FREE(rootname);
}
if (rc != 0)
@@ -779,6 +821,7 @@ int virCgroupRemove(virCgroupPtr group)
return rc;
}
+
/**
* virCgroupAddTask:
*
@@ -872,45 +915,30 @@ cleanup:
*
* Returns: 0 on success or -errno on failure
*/
-int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group,
- int controller)
+int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
{
- int rc = 0, err = 0;
+ int rc = 0;
char *content = NULL;
+ int i;
- if (controller < VIR_CGROUP_CONTROLLER_CPU ||
- controller > VIR_CGROUP_CONTROLLER_BLKIO)
- return -EINVAL;
-
- if (!src_group->controllers[controller].mountPoint ||
- !dest_group->controllers[controller].mountPoint) {
- return -EINVAL;
- }
-
- rc = virCgroupGetValueStr(src_group, controller, "tasks", &content);
- if (rc != 0)
- return rc;
+ for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
+ if (!src_group->controllers[i].mountPoint ||
+ !dest_group->controllers[i].mountPoint)
+ continue;
- rc = virCgroupAddTaskStrController(dest_group, content, controller);
- if (rc != 0)
- goto cleanup;
+ rc = virCgroupGetValueStr(src_group, i, "tasks", &content);
+ if (rc != 0)
+ return rc;
- VIR_FREE(content);
+ rc = virCgroupAddTaskStrController(dest_group, content, i);
+ if (rc != 0)
+ goto cleanup;
- return 0;
+ VIR_FREE(content);
+ }
cleanup:
- /*
- * We don't need to recover dest_cgroup because cgroup will make sure
- * that one task only resides in one cgroup of the same controller.
- */
- err = virCgroupAddTaskStrController(src_group, content, controller);
- if (err != 0)
- VIR_ERROR(_("Cannot recover cgroup %s from %s"),
- src_group->controllers[controller].mountPoint,
- dest_group->controllers[controller].mountPoint);
VIR_FREE(content);
-
return rc;
}
@@ -926,13 +954,15 @@ cleanup:
int virCgroupForDriver(const char *name,
virCgroupPtr *group,
bool privileged,
- bool create)
+ bool create,
+ int controllers)
{
int rc;
char *path = NULL;
virCgroupPtr rootgrp = NULL;
- rc = virCgroupAppRoot(privileged, &rootgrp, create);
+ rc = virCgroupAppRoot(privileged, &rootgrp,
+ create, controllers);
if (rc != 0)
goto out;
@@ -941,7 +971,7 @@ int virCgroupForDriver(const char *name,
goto out;
}
- rc = virCgroupNew(path, group);
+ rc = virCgroupNew(path, controllers, group);
VIR_FREE(path);
if (rc == 0) {
@@ -979,7 +1009,7 @@ int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED,
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
int virCgroupForSelf(virCgroupPtr *group)
{
- return virCgroupNew("/", group);
+ return virCgroupNew("/", -1, group);
}
#else
int virCgroupForSelf(virCgroupPtr *group ATTRIBUTE_UNUSED)
@@ -1005,6 +1035,7 @@ int virCgroupForDomain(virCgroupPtr driver,
{
int rc;
char *path;
+ int controllers;
if (driver == NULL)
return -EINVAL;
@@ -1012,7 +1043,9 @@ int virCgroupForDomain(virCgroupPtr driver,
if (virAsprintf(&path, "%s/%s", driver->path, name) < 0)
return -ENOMEM;
- rc = virCgroupNew(path, group);
+ controllers = virCgroupMountMask(driver);
+
+ rc = virCgroupNew(path, controllers, group);
VIR_FREE(path);
if (rc == 0) {
@@ -1060,6 +1093,7 @@ int virCgroupForVcpu(virCgroupPtr driver,
{
int rc;
char *path;
+ int controllers;
if (driver == NULL)
return -EINVAL;
@@ -1067,11 +1101,18 @@ int virCgroupForVcpu(virCgroupPtr driver,
if (virAsprintf(&path, "%s/vcpu%d", driver->path, vcpuid) < 0)
return -ENOMEM;
- rc = virCgroupNew(path, group);
+ controllers = virCgroupMountMask(driver);
+
+ /* Filter parent controllers to just the CPU ones */
+ controllers &= ((1 << VIR_CGROUP_CONTROLLER_CPU) |
+ (1 << VIR_CGROUP_CONTROLLER_CPUACCT) |
+ (1 << VIR_CGROUP_CONTROLLER_CPUSET));
+
+ rc = virCgroupNew(path, controllers, group);
VIR_FREE(path);
if (rc == 0) {
- rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU);
+ rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_NONE);
if (rc != 0)
virCgroupFree(group);
}
@@ -1103,6 +1144,7 @@ int virCgroupForEmulator(virCgroupPtr driver,
{
int rc;
char *path;
+ int controllers;
if (driver == NULL)
return -EINVAL;
@@ -1110,11 +1152,18 @@ int virCgroupForEmulator(virCgroupPtr driver,
if (virAsprintf(&path, "%s/emulator", driver->path) < 0)
return -ENOMEM;
- rc = virCgroupNew(path, group);
+ controllers = virCgroupMountMask(driver);
+
+ /* Filter parent controllers to just the CPU ones */
+ controllers &= ((1 << VIR_CGROUP_CONTROLLER_CPU) |
+ (1 << VIR_CGROUP_CONTROLLER_CPUACCT) |
+ (1 << VIR_CGROUP_CONTROLLER_CPUSET));
+
+ rc = virCgroupNew(path, controllers, group);
VIR_FREE(path);
if (rc == 0) {
- rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU);
+ rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_NONE);
if (rc != 0)
virCgroupFree(group);
}
@@ -2014,7 +2063,7 @@ static int virCgroupKillRecursiveInternal(virCgroupPtr group, int
signum, virHas
goto cleanup;
}
- if ((rc = virCgroupNew(subpath, &subgroup)) != 0)
+ if ((rc = virCgroupNew(subpath, -1, &subgroup)) != 0)
goto cleanup;
if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0)
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index 45a2006..725d2d0 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -47,7 +47,8 @@ VIR_ENUM_DECL(virCgroupController);
int virCgroupForDriver(const char *name,
virCgroupPtr *group,
bool privileged,
- bool create);
+ bool create,
+ int controllers);
int virCgroupForSelf(virCgroupPtr *group);
@@ -77,8 +78,7 @@ int virCgroupAddTaskController(virCgroupPtr group,
int controller);
int virCgroupMoveTask(virCgroupPtr src_group,
- virCgroupPtr dest_group,
- int controller);
+ virCgroupPtr dest_group);
int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight);
int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight);
--
1.8.1.4