Currently the QEMU driver has three ways of setting up cgroups. It either
skips them entirely (if non-root), or uses systemd-machined, or uses
cgroups directly.
It is further possible to register directly with systemd and bypass
machined. We don't support this by systemd-nsspawn does and we ought
to.
This change adds ability to configure the mechanism for registering
resources between all these options explicitly. via
<resource register="none|direct|machined|systemd"/>
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/conf/domain_conf.c | 42 ++++++++++++++++++++++-------
src/conf/domain_conf.h | 12 +++++++++
src/libvirt_private.syms | 2 ++
src/lxc/lxc_cgroup.c | 34 ++++++++++++++++++++++++
src/lxc/lxc_cgroup.h | 3 +++
src/lxc/lxc_process.c | 11 ++++----
src/qemu/qemu_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++-------
src/util/vircgroup.c | 55 ++++++++++++++++++++++++--------------
src/util/vircgroup.h | 10 ++++++-
9 files changed, 194 insertions(+), 44 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 9a62bc472c..fb8e7a0ec7 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -910,6 +910,14 @@ VIR_ENUM_IMPL(virDomainHPTResizing,
"required",
);
+VIR_ENUM_IMPL(virDomainResourceRegister,
+ VIR_DOMAIN_RESOURCE_REGISTER_LAST,
+ "default",
+ "none",
+ "cgroup",
+ "machined",
+ "systemd");
+
/* Internal mapping: subset of block job types that can be present in
* <mirror> XML (remaining types are not two-phase). */
VIR_ENUM_DECL(virDomainBlockJob)
@@ -17698,16 +17706,20 @@ virDomainResourceDefParse(xmlNodePtr node,
{
virDomainResourceDefPtr def = NULL;
xmlNodePtr tmp = ctxt->node;
+ char *reg;
ctxt->node = node;
if (VIR_ALLOC(def) < 0)
goto error;
- /* Find out what type of virtualization to use */
- if (!(def->partition = virXPathString("string(./partition)", ctxt))) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- "%s", _("missing resource partition
attribute"));
+ def->partition = virXPathString("string(./partition)", ctxt);
+
+ reg = virXMLPropString(node, "register");
+ if (reg != NULL &&
+ (def->reg = virDomainResourceRegisterTypeFromString(reg)) <= 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ "%s", _("Invalid register attribute"));
goto error;
}
@@ -25568,11 +25580,23 @@ static void
virDomainResourceDefFormat(virBufferPtr buf,
virDomainResourceDefPtr def)
{
- virBufferAddLit(buf, "<resource>\n");
- virBufferAdjustIndent(buf, 2);
- virBufferEscapeString(buf, "<partition>%s</partition>\n",
def->partition);
- virBufferAdjustIndent(buf, -2);
- virBufferAddLit(buf, "</resource>\n");
+ if (def->reg == VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT &&
+ def->partition == NULL)
+ return;
+
+ virBufferAddLit(buf, "<resource");
+ if (def->reg != VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT)
+ virBufferAsprintf(buf, " register='%s'",
virDomainResourceRegisterTypeToString(def->reg));
+
+ if (def->partition) {
+ virBufferAddLit(buf, ">\n");
+ virBufferAdjustIndent(buf, 2);
+ virBufferEscapeString(buf, "<partition>%s</partition>\n",
def->partition);
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</resource>\n");
+ } else {
+ virBufferAddLit(buf, "/>\n");
+ }
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 6f7f96b3dd..a7a6628a36 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2145,9 +2145,20 @@ struct _virDomainPanicDef {
void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights,
int ndevices);
+typedef enum {
+ VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT,
+ VIR_DOMAIN_RESOURCE_REGISTER_NONE,
+ VIR_DOMAIN_RESOURCE_REGISTER_CGROUP,
+ VIR_DOMAIN_RESOURCE_REGISTER_MACHINED,
+ VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD,
+
+ VIR_DOMAIN_RESOURCE_REGISTER_LAST,
+} virDomainResourceRegister;
+
typedef struct _virDomainResourceDef virDomainResourceDef;
typedef virDomainResourceDef *virDomainResourceDefPtr;
struct _virDomainResourceDef {
+ int reg; /* enum virDomainResourceRegister */
char *partition;
};
@@ -3325,6 +3336,7 @@ VIR_ENUM_DECL(virDomainMemorySource)
VIR_ENUM_DECL(virDomainMemoryAllocation)
VIR_ENUM_DECL(virDomainIOMMUModel)
VIR_ENUM_DECL(virDomainShmemModel)
+VIR_ENUM_DECL(virDomainResourceRegister)
/* from libvirt.h */
VIR_ENUM_DECL(virDomainState)
VIR_ENUM_DECL(virDomainNostateReason)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d5c3b9abb5..a0fde65dba 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -489,6 +489,8 @@ virDomainRedirdevBusTypeToString;
virDomainRedirdevDefFind;
virDomainRedirdevDefFree;
virDomainRedirdevDefRemove;
+virDomainResourceRegisterTypeFromString;
+virDomainResourceRegisterTypeToString;
virDomainRNGBackendTypeToString;
virDomainRNGDefFree;
virDomainRNGFind;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 3369801870..7bd479df1b 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -478,6 +478,35 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
return ret;
}
+int virLXCCgroupMode(virDomainResourceRegister reg,
+ virCgroupRegister *cgreg)
+{
+ switch (reg) {
+ case VIR_DOMAIN_RESOURCE_REGISTER_NONE:
+ goto unsupported;
+ case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT:
+ *cgreg = VIR_CGROUP_REGISTER_DEFAULT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED:
+ *cgreg = VIR_CGROUP_REGISTER_MACHINED;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP:
+ *cgreg = VIR_CGROUP_REGISTER_DIRECT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD:
+ default:
+ goto unsupported;
+ }
+
+ return 0;
+
+ unsupported:
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource register '%s' not available"),
+ virDomainResourceRegisterTypeToString(reg));
+ return -1;
+}
+
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
pid_t initpid,
@@ -485,11 +514,15 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
int *nicindexes)
{
virCgroupPtr cgroup = NULL;
+ virCgroupRegister reg;
char *machineName = virLXCDomainGetMachineName(def, 0);
if (!machineName)
goto cleanup;
+ if (virLXCCgroupMode(def->resource->reg, ®) < 0)
+ goto cleanup;
+
if (def->resource->partition[0] != '/') {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Resource partition '%s' must start with
'/'"),
@@ -504,6 +537,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
initpid,
true,
nnicindexes, nicindexes,
+ ®,
def->resource->partition,
-1,
&cgroup) < 0)
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index e85f21c47d..979d6a154b 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -27,6 +27,9 @@
# include "lxc_fuse.h"
# include "virusb.h"
+int virLXCCgroupMode(virDomainResourceRegister reg,
+ virCgroupRegister *cgreg);
+
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
pid_t initpid,
size_t nnicindexes,
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index efd8a69000..24aa0cb0bf 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -1166,6 +1166,7 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm)
return -1;
}
+
/**
* virLXCProcessStart:
* @conn: pointer to connection
@@ -1260,13 +1261,13 @@ int virLXCProcessStart(virConnectPtr conn,
if (VIR_ALLOC(res) < 0)
goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) {
- VIR_FREE(res);
- goto cleanup;
- }
-
vm->def->resource = res;
}
+ if (vm->def->resource->reg != VIR_DOMAIN_RESOURCE_REGISTER_NONE &&
+ !vm->def->resource->partition) {
+ if (VIR_STRDUP(vm->def->resource->partition, "/machine") <
0)
+ goto cleanup;
+ }
if (virAsprintf(&logfile, "%s/%s.log",
cfg->logDir, vm->def->name) < 0)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 19252ea239..5167d7fee1 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -834,6 +834,46 @@ qemuSetupCpuCgroup(virDomainObjPtr vm)
}
+static int qemuGetCgroupMode(virDomainObjPtr vm,
+ virDomainResourceRegister reg,
+ virCgroupRegister *cgreg)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ bool avail = virQEMUDriverIsPrivileged(priv->driver) &&
+ virCgroupAvailable();
+
+ switch (reg) {
+ case VIR_DOMAIN_RESOURCE_REGISTER_NONE:
+ return 0;
+ case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT:
+ if (!avail)
+ return 0;
+ *cgreg = VIR_CGROUP_REGISTER_DEFAULT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED:
+ if (!avail)
+ goto unsupported;
+ *cgreg = VIR_CGROUP_REGISTER_MACHINED;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP:
+ if (!avail)
+ goto unsupported;
+ *cgreg = VIR_CGROUP_REGISTER_DIRECT;
+ break;
+ case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD:
+ default:
+ goto unsupported;
+ }
+
+ return 1;
+
+ unsupported:
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource register '%s' not available"),
+ virDomainResourceRegisterTypeToString(reg));
+ return -1;
+}
+
static int
qemuInitCgroup(virDomainObjPtr vm,
size_t nnicindexes,
@@ -842,11 +882,17 @@ qemuInitCgroup(virDomainObjPtr vm,
int ret = -1;
qemuDomainObjPrivatePtr priv = vm->privateData;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver);
+ virCgroupRegister reg;
+ int rv;
- if (!virQEMUDriverIsPrivileged(priv->driver))
- goto done;
-
- if (!virCgroupAvailable())
+ rv = qemuGetCgroupMode(vm,
+ vm->def->resource ?
+ vm->def->resource->reg :
+ VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT,
+ ®);
+ if (rv < 0)
+ goto cleanup;
+ if (rv == 0)
goto done;
virCgroupFree(&priv->cgroup);
@@ -857,13 +903,12 @@ qemuInitCgroup(virDomainObjPtr vm,
if (VIR_ALLOC(res) < 0)
goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) {
- VIR_FREE(res);
- goto cleanup;
- }
-
vm->def->resource = res;
}
+ if (!vm->def->resource->partition) {
+ if (VIR_STRDUP(vm->def->resource->partition, "/machine") <
0)
+ goto cleanup;
+ }
if (vm->def->resource->partition[0] != '/') {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
@@ -879,6 +924,7 @@ qemuInitCgroup(virDomainObjPtr vm,
vm->pid,
false,
nnicindexes, nicindexes,
+ ®,
vm->def->resource->partition,
cfg->cgroupControllers,
&priv->cgroup) < 0) {
@@ -980,6 +1026,11 @@ qemuConnectCgroup(virDomainObjPtr vm)
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver);
int ret = -1;
+ if (vm->def->resource &&
+ vm->def->resource->reg == VIR_DOMAIN_RESOURCE_REGISTER_NONE) {
+ goto done;
+ }
+
if (!virQEMUDriverIsPrivileged(priv->driver))
goto done;
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 0a31947b0d..07ffb78c78 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1733,6 +1733,7 @@ virCgroupNewMachine(const char *name,
bool isContainer,
size_t nnicindexes,
int *nicindexes,
+ virCgroupRegister *reg,
const char *partition,
int controllers,
virCgroupPtr *group)
@@ -1741,28 +1742,42 @@ virCgroupNewMachine(const char *name,
*group = NULL;
- if ((rv = virCgroupNewMachineSystemd(name,
- drivername,
- uuid,
- rootdir,
- pidleader,
- isContainer,
- nnicindexes,
- nicindexes,
- partition,
- controllers,
- group)) == 0)
- return 0;
+ if (*reg == VIR_CGROUP_REGISTER_DEFAULT ||
+ *reg == VIR_CGROUP_REGISTER_MACHINED) {
+ if ((rv = virCgroupNewMachineSystemd(name,
+ drivername,
+ uuid,
+ rootdir,
+ pidleader,
+ isContainer,
+ nnicindexes,
+ nicindexes,
+ partition,
+ controllers,
+ group)) == 0) {
+ *reg = VIR_CGROUP_REGISTER_MACHINED;
+ return 0;
+ }
- if (rv == -1)
- return -1;
+ if (rv == -1)
+ return -1;
+
+ if (*reg == VIR_CGROUP_REGISTER_MACHINED) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ "%s", _("Systemd machined requested, but not
available"));
+ return -1;
+ }
+ }
- return virCgroupNewMachineManual(name,
- drivername,
- pidleader,
- partition,
- controllers,
- group);
+ rv = virCgroupNewMachineManual(name,
+ drivername,
+ pidleader,
+ partition,
+ controllers,
+ group);
+ if (rv == 0)
+ *reg = VIR_CGROUP_REGISTER_DIRECT;
+ return rv;
}
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index d833927678..63ee1aba5c 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -46,7 +46,8 @@ enum {
VIR_CGROUP_CONTROLLER_LAST
};
-VIR_ENUM_DECL(virCgroupController);
+VIR_ENUM_DECL(virCgroupController)
+
/* Items of this enum are used later in virCgroupNew to create
* bit array stored in int. Like this:
* 1 << VIR_CGROUP_CONTROLLER_CPU
@@ -103,6 +104,12 @@ virCgroupNewDetectMachine(const char *name,
virCgroupPtr *group)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+typedef enum {
+ VIR_CGROUP_REGISTER_DEFAULT,
+ VIR_CGROUP_REGISTER_DIRECT,
+ VIR_CGROUP_REGISTER_MACHINED,
+} virCgroupRegister;
+
int virCgroupNewMachine(const char *name,
const char *drivername,
const unsigned char *uuid,
@@ -111,6 +118,7 @@ int virCgroupNewMachine(const char *name,
bool isContainer,
size_t nnicindexes,
int *nicindexes,
+ virCgroupRegister *reg,
const char *partition,
int controllers,
virCgroupPtr *group)
--
2.14.3