From: "Daniel P. Berrange" <berrange(a)redhat.com>
Historically QEMU/LXC guests have been placed in a cgroup layout
that is
$LOCATION-OF-LIBVIRTD/libvirt/{qemu,lxc}/$VMNAME
This is bad for a number of reasons
- The cgroup hierarchy gets very deep which seriously
impacts kernel performance due to cgroups scalability
limitations.
- It is hard to setup cgroup policies which apply across
services and virtual machines, since all VMs are underneath
the libvirtd service.
To address this the default cgroup location is changed to
be
/system/$VMNAME.{lxc,qemu}.libvirt
This puts virtual machines at the same level in the hierarchy
as system services, allowing consistent policy to be setup
across all of them.
This also honours the new resource partition location from the
XML configuration, for example
<resource>
<partition>/virtualmachines/production</partitions>
</resource>
will result in the VM being placed at
/virtualmachines/production/$VMNAME.{lxc,qemu}.libvirt
NB, with the exception of the default, /system, path which
is intended to always exist, libvirt will not attempt to
auto-create the partitions in the XML. It is the responsibility
of the admin/app to configure the partitions. Later libvirt
APIs will provide a way todo this.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/lxc/lxc_cgroup.c | 91 +++++++++++++++++++++++++++++++-------
src/lxc/lxc_cgroup.h | 2 +-
src/lxc/lxc_process.c | 4 +-
src/qemu/qemu_cgroup.c | 114 +++++++++++++++++++++++++++++++++++++-----------
src/qemu/qemu_cgroup.h | 3 +-
src/qemu/qemu_process.c | 2 +-
6 files changed, 169 insertions(+), 47 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 72940bd..8f19057 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -523,29 +523,88 @@ cleanup:
}
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup)
{
- virCgroupPtr driver = NULL;
- virCgroupPtr cgroup = NULL;
int rc;
+ virCgroupPtr parent = NULL;
+ virCgroupPtr cgroup = NULL;
- rc = virCgroupNewDriver("lxc", true, false, -1, &driver);
- if (rc != 0) {
- virReportSystemError(-rc, "%s",
- _("Unable to get cgroup for driver"));
- goto cleanup;
+ if (!def->resource && startup) {
+ virDomainResourceDefPtr res;
+
+ if (VIR_ALLOC(res) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (!(res->partition = strdup("/system"))) {
+ virReportOOMError();
+ VIR_FREE(res);
+ goto cleanup;
+ }
+
+ def->resource = res;
}
- rc = virCgroupNewDomainDriver(driver, def->name, true, &cgroup);
- if (rc != 0) {
- virReportSystemError(-rc,
- _("Unable to create cgroup for domain %s"),
- def->name);
- goto cleanup;
+ if (def->resource &&
+ def->resource->partition) {
+ if (def->resource->partition[0] != '/') {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource partition '%s' must start with
'/'"),
+ def->resource->partition);
+ goto cleanup;
+ }
+ /* We only auto-create the default partition. In other
+ * cases we expec the sysadmin/app to have done so */
+ rc = virCgroupNewPartition(def->resource->partition,
+ STREQ(def->resource->partition,
"/system"),
+ -1,
+ &parent);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to initialize %s cgroup"),
+ def->resource->partition);
+ goto cleanup;
+ }
+
+ rc = virCgroupNewDomainPartition(parent,
+ "lxc",
+ def->name,
+ true,
+ &cgroup);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ def->name);
+ goto cleanup;
+ }
+ } else {
+ rc = virCgroupNewDriver("lxc",
+ true,
+ true,
+ -1,
+ &parent);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ def->name);
+ goto cleanup;
+ }
+
+ rc = virCgroupNewDomainDriver(parent,
+ def->name,
+ true,
+ &cgroup);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ def->name);
+ goto cleanup;
+ }
}
cleanup:
- virCgroupFree(&driver);
+ virCgroupFree(&parent);
return cgroup;
}
@@ -556,7 +615,7 @@ virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def)
int ret = -1;
int rc;
- if (!(cgroup = virLXCCgroupCreate(def)))
+ if (!(cgroup = virLXCCgroupCreate(def, true)))
return NULL;
rc = virCgroupAddTask(cgroup, getpid());
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index 25a427c..f040de2 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -27,7 +27,7 @@
# include "lxc_fuse.h"
# include "virusb.h"
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def);
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup);
virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def);
int virLXCCgroupSetup(virDomainDefPtr def,
virCgroupPtr cgroup,
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 1bbffa3..ab07a1e 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -1049,7 +1049,7 @@ int virLXCProcessStart(virConnectPtr conn,
virCgroupFree(&priv->cgroup);
- if (!(priv->cgroup = virLXCCgroupCreate(vm->def)))
+ if (!(priv->cgroup = virLXCCgroupCreate(vm->def, true)))
return -1;
if (!virCgroupHasController(priv->cgroup,
@@ -1464,7 +1464,7 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm,
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm)))
goto error;
- if (!(priv->cgroup = virLXCCgroupCreate(vm->def)))
+ if (!(priv->cgroup = virLXCCgroupCreate(vm->def, false)))
goto error;
if (virLXCUpdateActiveUsbHostdevs(driver, vm->def) < 0)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index cb0faa1..db9aafe 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -188,46 +188,108 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev
ATTRIBUTE_UNUSED,
int qemuInitCgroup(virQEMUDriverPtr driver,
- virDomainObjPtr vm)
+ virDomainObjPtr vm,
+ bool startup)
{
- int rc;
+ int rc = -1;
qemuDomainObjPrivatePtr priv = vm->privateData;
- virCgroupPtr driverGroup = NULL;
+ virCgroupPtr parent = NULL;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
virCgroupFree(&priv->cgroup);
- rc = virCgroupNewDriver("qemu",
- cfg->privileged,
- true,
- cfg->cgroupControllers,
- &driverGroup);
- if (rc != 0) {
- if (rc == -ENXIO ||
- rc == -EPERM ||
- rc == -EACCES) { /* No cgroups mounts == success */
- VIR_DEBUG("No cgroups present/configured/accessible, ignoring
error");
- goto done;
+ if (!vm->def->resource && startup) {
+ virDomainResourceDefPtr res;
+
+ if (VIR_ALLOC(res) < 0) {
+ virReportOOMError();
+ goto cleanup;
}
- virReportSystemError(-rc,
- _("Unable to create cgroup for %s"),
- vm->def->name);
- goto cleanup;
+ if (!(res->partition = strdup("/system"))) {
+ virReportOOMError();
+ VIR_FREE(res);
+ goto cleanup;
+ }
+
+ vm->def->resource = res;
}
- rc = virCgroupNewDomainDriver(driverGroup, vm->def->name, true,
&priv->cgroup);
- if (rc != 0) {
- virReportSystemError(-rc,
- _("Unable to create cgroup for %s"),
- vm->def->name);
- goto cleanup;
+ if (vm->def->resource &&
+ vm->def->resource->partition) {
+ if (vm->def->resource->partition[0] != '/') {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource partition '%s' must start with
'/'"),
+ vm->def->resource->partition);
+ goto cleanup;
+ }
+ /* We only auto-create the default partition. In other
+ * cases we expec the sysadmin/app to have done so */
+ rc = virCgroupNewPartition(vm->def->resource->partition,
+ STREQ(vm->def->resource->partition,
"/system"),
+ cfg->cgroupControllers,
+ &parent);
+ if (rc != 0) {
+ if (rc == -ENXIO ||
+ rc == -EPERM ||
+ rc == -EACCES) { /* No cgroups mounts == success */
+ VIR_DEBUG("No cgroups present/configured/accessible, ignoring
error");
+ goto done;
+ }
+
+ virReportSystemError(-rc,
+ _("Unable to initialize %s cgroup"),
+ vm->def->resource->partition);
+ goto cleanup;
+ }
+
+ rc = virCgroupNewDomainPartition(parent,
+ "qemu",
+ vm->def->name,
+ true,
+ &priv->cgroup);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ vm->def->name);
+ goto cleanup;
+ }
+ } else {
+ rc = virCgroupNewDriver("qemu",
+ cfg->privileged,
+ true,
+ cfg->cgroupControllers,
+ &parent);
+ if (rc != 0) {
+ if (rc == -ENXIO ||
+ rc == -EPERM ||
+ rc == -EACCES) { /* No cgroups mounts == success */
+ VIR_DEBUG("No cgroups present/configured/accessible, ignoring
error");
+ goto done;
+ }
+
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ vm->def->name);
+ goto cleanup;
+ }
+
+ rc = virCgroupNewDomainDriver(parent,
+ vm->def->name,
+ true,
+ &priv->cgroup);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to create cgroup for %s"),
+ vm->def->name);
+ goto cleanup;
+ }
}
done:
rc = 0;
cleanup:
- virCgroupFree(&driverGroup);
+ virCgroupFree(&parent);
virObjectUnref(cfg);
return rc;
}
@@ -246,7 +308,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
(const char *const *)cfg->cgroupDeviceACL :
defaultDeviceACL;
- if (qemuInitCgroup(driver, vm) < 0)
+ if (qemuInitCgroup(driver, vm, true) < 0)
return -1;
if (!priv->cgroup)
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 6cbfebc..e63f443 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -37,7 +37,8 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev,
const char *path,
void *opaque);
int qemuInitCgroup(virQEMUDriverPtr driver,
- virDomainObjPtr vm);
+ virDomainObjPtr vm,
+ bool startup);
int qemuSetupCgroup(virQEMUDriverPtr driver,
virDomainObjPtr vm,
virBitmapPtr nodemask);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index da47b43..ce9f501 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3005,7 +3005,7 @@ qemuProcessReconnect(void *opaque)
if (qemuUpdateActiveUsbHostdevs(driver, obj->def) < 0)
goto error;
- if (qemuInitCgroup(driver, obj) < 0)
+ if (qemuInitCgroup(driver, obj, false) < 0)
goto error;
/* XXX: Need to change as long as lock is introduced for
--
1.8.1.4