
I know it's a POC type series, but figured I'd take a look primarily at the first 4 patches, learn a bit in the 5th one, and provide some review feedback in order to help move things along... There's perhaps a few adjustments in here that could be made into multiple patches. On 12/20/2017 11:47 AM, Daniel P. Berrange wrote:
Currently the QEMU driver has three ways of setting up cgroups. It either skips them entirely (if non-root), or uses systemd-machined, or uses cgroups directly.
It is further possible to register directly with systemd and bypass machined. We don't support this by systemd-nsspawn does and we ought
s/by/but/ (I think) Still a bit odd to add an option that isn't supported unless there was a plan to add the support when formulating the non POC patches...
to.
This change adds ability to configure the mechanism for registering resources between all these options explicitly. via
s/. via/ via:/ (e.g. remove the '.' and add ':')
<resource register="none|direct|machined|systemd"/>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/conf/domain_conf.c | 42 ++++++++++++++++++++++------- src/conf/domain_conf.h | 12 +++++++++ src/libvirt_private.syms | 2 ++ src/lxc/lxc_cgroup.c | 34 ++++++++++++++++++++++++ src/lxc/lxc_cgroup.h | 3 +++ src/lxc/lxc_process.c | 11 ++++---- src/qemu/qemu_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++------- src/util/vircgroup.c | 55 ++++++++++++++++++++++++-------------- src/util/vircgroup.h | 10 ++++++- 9 files changed, 194 insertions(+), 44 deletions(-)
formatdomain.html.in would need to be augmented to describe the new attribute. domaincommon.rng modified to add the new attribute. A tests either added or existing one adjusted to exhibit the new attribute option(s).
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9a62bc472c..fb8e7a0ec7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -910,6 +910,14 @@ VIR_ENUM_IMPL(virDomainHPTResizing, "required", );
+VIR_ENUM_IMPL(virDomainResourceRegister, + VIR_DOMAIN_RESOURCE_REGISTER_LAST, + "default", + "none", + "cgroup", + "machined", + "systemd"); + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -17698,16 +17706,20 @@ virDomainResourceDefParse(xmlNodePtr node, { virDomainResourceDefPtr def = NULL; xmlNodePtr tmp = ctxt->node; + char *reg;
= NULL;
ctxt->node = node;
if (VIR_ALLOC(def) < 0) goto error;
- /* Find out what type of virtualization to use */ - if (!(def->partition = virXPathString("string(./partition)", ctxt))) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("missing resource partition attribute")); + def->partition = virXPathString("string(./partition)", ctxt);
It seems partition is now optional, although formatdomain seems to have already indicated that... However, that's not a perfect match to domaincommon.rng where only "respartition" is optional. (This type of change could perhaps be it's own patch).
+ + reg = virXMLPropString(node, "register"); + if (reg != NULL && + (def->reg = virDomainResourceRegisterTypeFromString(reg)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Invalid register attribute")); goto error; }
Need a VIR_FREE(reg); since we haven't GO-ified yet in both error and normal paths unless this is rewritten a bit...
@@ -25568,11 +25580,23 @@ static void virDomainResourceDefFormat(virBufferPtr buf, virDomainResourceDefPtr def) { - virBufferAddLit(buf, "<resource>\n"); - virBufferAdjustIndent(buf, 2); - virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); - virBufferAdjustIndent(buf, -2); - virBufferAddLit(buf, "</resource>\n"); + if (def->reg == VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT && + def->partition == NULL) + return; + + virBufferAddLit(buf, "<resource"); + if (def->reg != VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT) + virBufferAsprintf(buf, " register='%s'", virDomainResourceRegisterTypeToString(def->reg)); + + if (def->partition) { + virBufferAddLit(buf, ">\n"); + virBufferAdjustIndent(buf, 2); + virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</resource>\n"); + } else { + virBufferAddLit(buf, "/>\n"); + } }
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 6f7f96b3dd..a7a6628a36 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2145,9 +2145,20 @@ struct _virDomainPanicDef { void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights, int ndevices);
+typedef enum { + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, + VIR_DOMAIN_RESOURCE_REGISTER_NONE, + VIR_DOMAIN_RESOURCE_REGISTER_CGROUP, + VIR_DOMAIN_RESOURCE_REGISTER_MACHINED, + VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD, + + VIR_DOMAIN_RESOURCE_REGISTER_LAST, +} virDomainResourceRegister; + typedef struct _virDomainResourceDef virDomainResourceDef; typedef virDomainResourceDef *virDomainResourceDefPtr; struct _virDomainResourceDef { + int reg; /* enum virDomainResourceRegister */ char *partition; };
@@ -3325,6 +3336,7 @@ VIR_ENUM_DECL(virDomainMemorySource) VIR_ENUM_DECL(virDomainMemoryAllocation) VIR_ENUM_DECL(virDomainIOMMUModel) VIR_ENUM_DECL(virDomainShmemModel) +VIR_ENUM_DECL(virDomainResourceRegister) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d5c3b9abb5..a0fde65dba 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -489,6 +489,8 @@ virDomainRedirdevBusTypeToString; virDomainRedirdevDefFind; virDomainRedirdevDefFree; virDomainRedirdevDefRemove; +virDomainResourceRegisterTypeFromString; +virDomainResourceRegisterTypeToString; virDomainRNGBackendTypeToString; virDomainRNGDefFree; virDomainRNGFind; diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 3369801870..7bd479df1b 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -478,6 +478,35 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, return ret; }
+int virLXCCgroupMode(virDomainResourceRegister reg, + virCgroupRegister *cgreg) +{ + switch (reg) { + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: + goto unsupported; + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: + *cgreg = VIR_CGROUP_REGISTER_MACHINED; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: + *cgreg = VIR_CGROUP_REGISTER_DIRECT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: + default:
Use VIR_DOMAIN_RESOURCE_REGISTER_LAST instead of default
+ goto unsupported; + } + + return 0; + + unsupported: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource register '%s' not available"), + virDomainResourceRegisterTypeToString(reg)); + return -1; +} +
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, pid_t initpid, @@ -485,11 +514,15 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, int *nicindexes) { virCgroupPtr cgroup = NULL; + virCgroupRegister reg; char *machineName = virLXCDomainGetMachineName(def, 0);
if (!machineName) goto cleanup;
+ if (virLXCCgroupMode(def->resource->reg, ®) < 0) + goto cleanup; + if (def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Resource partition '%s' must start with '/'"), @@ -504,6 +537,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, initpid, true, nnicindexes, nicindexes, + ®, def->resource->partition, -1, &cgroup) < 0) diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index e85f21c47d..979d6a154b 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -27,6 +27,9 @@ # include "lxc_fuse.h" # include "virusb.h"
+int virLXCCgroupMode(virDomainResourceRegister reg, + virCgroupRegister *cgreg); + virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, pid_t initpid, size_t nnicindexes, diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index efd8a69000..24aa0cb0bf 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1166,6 +1166,7 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm) return -1; }
+ /** * virLXCProcessStart: * @conn: pointer to connection @@ -1260,13 +1261,13 @@ int virLXCProcessStart(virConnectPtr conn, if (VIR_ALLOC(res) < 0) goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) { - VIR_FREE(res); - goto cleanup; - } - vm->def->resource = res; }
So the above could be shortened to just: if (!vm->def->resource && VIR_ALLOC(vm->def->resource) < 0) goto cleanup;
+ if (vm->def->resource->reg != VIR_DOMAIN_RESOURCE_REGISTER_NONE && + !vm->def->resource->partition) { + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) + goto cleanup; + }
This could be similar too w/r/t single if condition...
if (virAsprintf(&logfile, "%s/%s.log", cfg->logDir, vm->def->name) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 19252ea239..5167d7fee1 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -834,6 +834,46 @@ qemuSetupCpuCgroup(virDomainObjPtr vm) }
There could be a helper here that would be callable by qemuConnectCgroup that would return the @avail value.
+static int qemuGetCgroupMode(virDomainObjPtr vm, + virDomainResourceRegister reg, + virCgroupRegister *cgreg) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + bool avail = virQEMUDriverIsPrivileged(priv->driver) && + virCgroupAvailable(); + + switch (reg) { + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: + return 0; + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: + if (!avail) + return 0; + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: + if (!avail) + goto unsupported; + *cgreg = VIR_CGROUP_REGISTER_MACHINED; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: + if (!avail) + goto unsupported; + *cgreg = VIR_CGROUP_REGISTER_DIRECT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: + default:
Use VIR_DOMAIN_RESOURCE_REGISTER_LAST instead of default
+ goto unsupported; + } + + return 1; + + unsupported: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource register '%s' not available"), + virDomainResourceRegisterTypeToString(reg)); + return -1; +} + static int qemuInitCgroup(virDomainObjPtr vm, size_t nnicindexes, @@ -842,11 +882,17 @@ qemuInitCgroup(virDomainObjPtr vm, int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); + virCgroupRegister reg; + int rv;
- if (!virQEMUDriverIsPrivileged(priv->driver)) - goto done; - - if (!virCgroupAvailable()) + rv = qemuGetCgroupMode(vm, + vm->def->resource ? + vm->def->resource->reg : + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, + ®); + if (rv < 0) + goto cleanup; + if (rv == 0) goto done;
virCgroupFree(&priv->cgroup); @@ -857,13 +903,12 @@ qemuInitCgroup(virDomainObjPtr vm, if (VIR_ALLOC(res) < 0) goto cleanup;
- if (VIR_STRDUP(res->partition, "/machine") < 0) { - VIR_FREE(res); - goto cleanup; - } - vm->def->resource = res; }
Similar to above, we now have: if (!vm->def->resource && VIR_ALLOC(vm->def->resource) < 0) goto cleanup;
+ if (!vm->def->resource->partition) { + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) + goto cleanup; + }
and similar construct if desired.
if (vm->def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -879,6 +924,7 @@ qemuInitCgroup(virDomainObjPtr vm, vm->pid, false, nnicindexes, nicindexes, + ®, vm->def->resource->partition, cfg->cgroupControllers, &priv->cgroup) < 0) { @@ -980,6 +1026,11 @@ qemuConnectCgroup(virDomainObjPtr vm) virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); int ret = -1;
+ if (vm->def->resource && + vm->def->resource->reg == VIR_DOMAIN_RESOURCE_REGISTER_NONE) { + goto done; + } +
The subsequent checks could use the @avail helper mentioned above... John
if (!virQEMUDriverIsPrivileged(priv->driver)) goto done;
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 0a31947b0d..07ffb78c78 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1733,6 +1733,7 @@ virCgroupNewMachine(const char *name, bool isContainer, size_t nnicindexes, int *nicindexes, + virCgroupRegister *reg, const char *partition, int controllers, virCgroupPtr *group) @@ -1741,28 +1742,42 @@ virCgroupNewMachine(const char *name,
*group = NULL;
- if ((rv = virCgroupNewMachineSystemd(name, - drivername, - uuid, - rootdir, - pidleader, - isContainer, - nnicindexes, - nicindexes, - partition, - controllers, - group)) == 0) - return 0; + if (*reg == VIR_CGROUP_REGISTER_DEFAULT || + *reg == VIR_CGROUP_REGISTER_MACHINED) { + if ((rv = virCgroupNewMachineSystemd(name, + drivername, + uuid, + rootdir, + pidleader, + isContainer, + nnicindexes, + nicindexes, + partition, + controllers, + group)) == 0) { + *reg = VIR_CGROUP_REGISTER_MACHINED; + return 0; + }
- if (rv == -1) - return -1; + if (rv == -1) + return -1; + + if (*reg == VIR_CGROUP_REGISTER_MACHINED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Systemd machined requested, but not available")); + return -1; + } + }
- return virCgroupNewMachineManual(name, - drivername, - pidleader, - partition, - controllers, - group); + rv = virCgroupNewMachineManual(name, + drivername, + pidleader, + partition, + controllers, + group); + if (rv == 0) + *reg = VIR_CGROUP_REGISTER_DIRECT; + return rv; }
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index d833927678..63ee1aba5c 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -46,7 +46,8 @@ enum { VIR_CGROUP_CONTROLLER_LAST };
-VIR_ENUM_DECL(virCgroupController); +VIR_ENUM_DECL(virCgroupController) + /* Items of this enum are used later in virCgroupNew to create * bit array stored in int. Like this: * 1 << VIR_CGROUP_CONTROLLER_CPU @@ -103,6 +104,12 @@ virCgroupNewDetectMachine(const char *name, virCgroupPtr *group) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+typedef enum { + VIR_CGROUP_REGISTER_DEFAULT, + VIR_CGROUP_REGISTER_DIRECT, + VIR_CGROUP_REGISTER_MACHINED, +} virCgroupRegister; + int virCgroupNewMachine(const char *name, const char *drivername, const unsigned char *uuid, @@ -111,6 +118,7 @@ int virCgroupNewMachine(const char *name, bool isContainer, size_t nnicindexes, int *nicindexes, + virCgroupRegister *reg, const char *partition, int controllers, virCgroupPtr *group)