[libvirt] [PATCH] Add ability to set rlimits at container boot

--- docs/formatdomain.html.in | 49 +++++++++++++++++++++++ docs/schemas/domaincommon.rng | 89 +++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.c | 92 +++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 33 ++++++++++++++++ src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 32 +++++++++++++++ src/util/virprocess.c | 4 +- src/util/virprocess.h | 2 + 8 files changed, 300 insertions(+), 2 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f8d5f89..5aec51c 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -348,6 +348,55 @@ </pre> + <p> + If you want to set an rlimit of the containter init process instead of + inheriting from the host init, set the <code>rlimits</code> element. You + are able to set any of the rlimits that setrlimits is able to set using + any of the following sub-elements: + </p> + + <dl> + <dt><code>as</code></dt> + <dd>Used to set RLIMIT_AS.</dd> + <dt><code>core</code></dt> + <dd>Used to set RLIMIT_CORE.</dd> + <dt><code>cpu</code></dt> + <dd>Used to set RLIMIT_CPU.</dd> + <dt><code>data</code></dt> + <dd>Used to set RLIMIT_DATA.</dd> + <dt><code>fsize</code></dt> + <dd>Used to set RLIMIT_FSIZE.</dd> + <dt><code>locks</code></dt> + <dd>Used to set RLIMIT_LOCKS.</dd> + <dt><code>memlock</code></dt> + <dd>Used to set RLIMIT_MEMLOCK.</dd> + <dt><code>msgqueue</code></dt> + <dd>Used to set RLIMIT_MSGQUEUE.</dd> + <dt><code>nice</code></dt> + <dd>Used to set RLIMIT_NICE.</dd> + <dt><code>nofile</code></dt> + <dd>Used to set RLIMIT_NOFILE.</dd> + <dt><code>nproc</code></dt> + <dd>Used to set RLIMIT_NPROC.</dd> + <dt><code>rss</code></dt> + <dd>Used to set RLIMIT_RSS.</dd> + <dt><code>rtprio</code></dt> + <dd>Used to set RLIMIT_RTPRIO.</dd> + <dt><code>rttime</code></dt> + <dd>Used to set RLIMIT_RTTIME.</dd> + <dt><code>sigpending</code></dt> + <dd>Used to set RLIMIT_SIGPENDING.</dd> + <dt><code>stack</code></dt> + <dd>Used to set RLIMIT_STACK.</dd> + </dl> + + <pre> + <rlimits> + <nofile>10240</nofile> + </rlimits> + </pre> + + <h3><a name="elementsSysinfo">SMBIOS System Information</a></h3> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d467dce..b98f8d5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -59,6 +59,9 @@ <ref name="idmap"/> </optional> <optional> + <ref name="rlimits"/> + </optional> + <optional> <ref name="devices"/> </optional> <zeroOrMore> @@ -570,6 +573,92 @@ </interleave> </element> </define> + <define name="rlimits"> + <element name="rlimits"> + <interleave> + <optional> + <element name="cpu"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="fsize"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="data"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="stack"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="core"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rss"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nproc"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nofile"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="memlock"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="as"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="locks"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="sigpending"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="msgqueue"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nice"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rtprio"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rttime"> + <ref name='scaledInteger'/> + </element> + </optional> + </interleave> + </element> + </define> <!-- Resources usage defines the amount of memory (maximum and possibly current usage) and number of virtual CPUs used by that domain. diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d562e1a..399976e 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -772,6 +772,24 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash") +VIR_ENUM_IMPL(virDomainRLimit, VIR_DOMAIN_RLIMIT_LAST, + "cpu", + "fsize", + "data", + "stack", + "core", + "rss", + "nproc", + "nofile", + "memlock", + "as", + "locks", + "sigpending", + "msgqueue", + "nice", + "rtprio", + "rttime") + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -979,7 +997,40 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root, return -1; } +static virDomainRLimitsPtr +virDomainRLimitParseXML(xmlNodePtr node) +{ + char *c = NULL; + long long val; + virDomainRLimitsPtr def; + + if (VIR_ALLOC(def) < 0) + return NULL; + if (node->type == XML_ELEMENT_NODE) { + c = (char *)xmlNodeGetContent(node); + if (virStrToLong_ll(c, NULL, 10, &val) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("could not parse rlimit value of %s"), + c); + goto error; + } + VIR_FREE(c); + + def->limit = val; + if ((def->resource = virDomainRLimitTypeFromString((const char *)node->name)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("could not determine resource type of '%s'"), + node->name); + goto error; + } + } + return def; + error: + VIR_FREE(c); + VIR_FREE(def); + return NULL; +} static void virDomainObjListDataFree(void *payload, const void *name ATTRIBUTE_UNUSED) @@ -14423,6 +14474,34 @@ virDomainDefParseXML(xmlDocPtr xml, virHashFree(bootHash); + if ((node = virXPathNode("./rlimits[1]", ctxt)) != NULL && (n = virXMLChildElementCount(node)) > 0) { + xmlNodePtr cur = node->children; + if (n && VIR_ALLOC_N(def->rlimits, n) < 0) + goto error; + + for (i = 0; i < n; i++) { + if (!(def->rlimits[i] = virDomainRLimitParseXML(cur))) { + for (j = 0; j < i; j++) + VIR_FREE(def->rlimits[j]); + VIR_FREE(def->rlimits); + goto error; + } + def->nrlimits++; + for (j = 0; j < i; j++) { + if (def->rlimits[j]->resource == def->rlimits[i]->resource) { + virReportError(VIR_ERR_XML_ERROR, + _("duplicate rlimit resources '%s'"), + virDomainRLimitTypeToString(def->rlimits[j]->resource)); + for (int k = 0; k < i; k++) + VIR_FREE(def->rlimits[k]); + VIR_FREE(def->rlimits); + goto error; + } + } + cur = cur->next; + } + } + return def; error: @@ -20048,6 +20127,19 @@ virDomainDefFormatInternal(virDomainDefPtr def, goto error; } + if (def->nrlimits > 0) { + virBufferAddLit(buf, "<rlimits>\n"); + virBufferAdjustIndent(buf, 2); + for (n = 0; n < def->nrlimits; n++) { + virBufferAsprintf(buf, "<%s>%lld</%s>\n", + virDomainRLimitTypeToString(def->rlimits[n]->resource), + def->rlimits[n]->limit, + virDomainRLimitTypeToString(def->rlimits[n]->resource)); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</rlimits>\n"); + } + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</domain>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 93f2314..b032202 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1851,6 +1851,27 @@ typedef enum { VIR_DOMAIN_CLOCK_BASIS_LAST } virDomainClockBasis; +typedef enum { + VIR_DOMAIN_RLIMIT_CPU, + VIR_DOMAIN_RLIMIT_FSIZE, + VIR_DOMAIN_RLIMIT_DATA, + VIR_DOMAIN_RLIMIT_STACK, + VIR_DOMAIN_RLIMIT_CORE, + VIR_DOMAIN_RLIMIT_RSS, + VIR_DOMAIN_RLIMIT_NPROC, + VIR_DOMAIN_RLIMIT_NOFILE, + VIR_DOMAIN_RLIMIT_MEMLOCK, + VIR_DOMAIN_RLIMIT_AS, + VIR_DOMAIN_RLIMIT_LOCKS, + VIR_DOMAIN_RLIMIT_SIGPENDING, + VIR_DOMAIN_RLIMIT_MSGQUEUE, + VIR_DOMAIN_RLIMIT_NICE, + VIR_DOMAIN_RLIMIT_RTPRIO, + VIR_DOMAIN_RLIMIT_RTTIME, + + VIR_DOMAIN_RLIMIT_LAST +} virDomainRLimit; + typedef struct _virDomainClockDef virDomainClockDef; typedef virDomainClockDef *virDomainClockDefPtr; struct _virDomainClockDef { @@ -2039,6 +2060,14 @@ struct _virDomainPowerManagement { int s4; }; +typedef struct _virDomainRLimits virDomainRLimits; +typedef virDomainRLimits *virDomainRLimitsPtr; + +struct _virDomainRLimits { + int resource; + long long limit; +}; + /* * Guest VM main configuration * @@ -2156,6 +2185,9 @@ struct _virDomainDef { size_t nshmems; virDomainShmemDefPtr *shmems; + size_t nrlimits; + virDomainRLimitsPtr *rlimits; + /* Only 1 */ virDomainWatchdogDefPtr watchdog; virDomainMemballoonDefPtr memballoon; @@ -2844,6 +2876,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainRLimit) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bd7870f..7b71ff1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1896,6 +1896,7 @@ virProcessGetNamespaces; virProcessGetStartTime; virProcessKill; virProcessKillPainfully; +virProcessPrLimit; virProcessRunInMountNamespace; virProcessSetAffinity; virProcessSetMaxFiles; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 8a7c7e8..5c63a1b 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -798,6 +798,35 @@ static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr ctrl) return ret; } +static int virLXCControllerSetupRLimits(virLXCControllerPtr ctrl) +{ + int i, ret = -1; + struct rlimit rlim; + + VIR_DEBUG("Setting up rlimits"); + + VIR_DEBUG("nrlimits = %d", (int)ctrl->def->nrlimits); + VIR_DEBUG("setting limits on process %d", ctrl->initpid); + if (ctrl->def->nrlimits > 0) { + for (i = 0; i < ctrl->def->nrlimits; i++) { + rlim.rlim_max = rlim.rlim_cur = ctrl->def->rlimits[i]->limit; + VIR_DEBUG("Setting rlimit %s(%d) on pid %d to %lld", + virDomainRLimitTypeToString(ctrl->def->rlimits[i]->resource), + ctrl->def->rlimits[i]->resource, + ctrl->initpid, + ctrl->def->rlimits[i]->limit); + if (virProcessPrLimit(ctrl->initpid, ctrl->def->rlimits[i]->resource, &rlim) < 0) { + virReportSystemError(errno, "%s", + _("Unable to set rlimit")); + goto cleanup; + } + } + } + ret = 0; + cleanup: + return ret; +} + static void virLXCControllerClientCloseHook(virNetServerClientPtr client) { @@ -2318,6 +2347,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl) if (virLXCControllerSetupCgroupLimits(ctrl) < 0) goto cleanup; + if (virLXCControllerSetupRLimits(ctrl) < 0) + goto cleanup; + if (virLXCControllerSetupUserns(ctrl) < 0) goto cleanup; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d0a1500..d83ae28 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -676,13 +676,13 @@ int virProcessSetNamespaces(size_t nfdlist, } #if HAVE_PRLIMIT -static int +int virProcessPrLimit(pid_t pid, int resource, struct rlimit *rlim) { return prlimit(pid, resource, rlim, NULL); } #elif HAVE_SETRLIMIT -static int +int virProcessPrLimit(pid_t pid ATTRIBUTE_UNUSED, int resource ATTRIBUTE_UNUSED, struct rlimit *rlim ATTRIBUTE_UNUSED) diff --git a/src/util/virprocess.h b/src/util/virprocess.h index bcaede5..c40b41a 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -22,6 +22,7 @@ #ifndef __VIR_PROCESS_H__ # define __VIR_PROCESS_H__ +# include <sys/resource.h> # include <sys/types.h> # include "internal.h" @@ -73,4 +74,5 @@ typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque); +int virProcessPrLimit(pid_t pid, int resource, struct rlimit *rlim); #endif /* __VIR_PROCESS_H__ */ -- 1.9.3 (Apple Git-50)

On Fri, Jan 30, 2015 at 08:53:20AM -0600, Ryan Cleere wrote:
--- docs/formatdomain.html.in | 49 +++++++++++++++++++++++ docs/schemas/domaincommon.rng | 89 +++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.c | 92 +++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 33 ++++++++++++++++ src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 32 +++++++++++++++ src/util/virprocess.c | 4 +- src/util/virprocess.h | 2 + 8 files changed, 300 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f8d5f89..5aec51c 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -348,6 +348,55 @@ </pre>
+ <p> + If you want to set an rlimit of the containter init process instead of + inheriting from the host init, set the <code>rlimits</code> element. You + are able to set any of the rlimits that setrlimits is able to set using + any of the following sub-elements: + </p> + + <dl> + <dt><code>as</code></dt> + <dd>Used to set RLIMIT_AS.</dd> + <dt><code>core</code></dt> + <dd>Used to set RLIMIT_CORE.</dd> + <dt><code>cpu</code></dt> + <dd>Used to set RLIMIT_CPU.</dd> + <dt><code>data</code></dt> + <dd>Used to set RLIMIT_DATA.</dd> + <dt><code>fsize</code></dt> + <dd>Used to set RLIMIT_FSIZE.</dd> + <dt><code>locks</code></dt> + <dd>Used to set RLIMIT_LOCKS.</dd> + <dt><code>memlock</code></dt> + <dd>Used to set RLIMIT_MEMLOCK.</dd> + <dt><code>msgqueue</code></dt> + <dd>Used to set RLIMIT_MSGQUEUE.</dd> + <dt><code>nice</code></dt> + <dd>Used to set RLIMIT_NICE.</dd> + <dt><code>nofile</code></dt> + <dd>Used to set RLIMIT_NOFILE.</dd> + <dt><code>nproc</code></dt> + <dd>Used to set RLIMIT_NPROC.</dd> + <dt><code>rss</code></dt> + <dd>Used to set RLIMIT_RSS.</dd> + <dt><code>rtprio</code></dt> + <dd>Used to set RLIMIT_RTPRIO.</dd> + <dt><code>rttime</code></dt> + <dd>Used to set RLIMIT_RTTIME.</dd> + <dt><code>sigpending</code></dt> + <dd>Used to set RLIMIT_SIGPENDING.</dd> + <dt><code>stack</code></dt> + <dd>Used to set RLIMIT_STACK.</dd> + </dl> + + <pre> + <rlimits> + <nofile>10240</nofile> + </rlimits> + </pre> + + <h3><a name="elementsSysinfo">SMBIOS System Information</a></h3>
<p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d467dce..b98f8d5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -59,6 +59,9 @@ <ref name="idmap"/> </optional> <optional> + <ref name="rlimits"/> + </optional> + <optional> <ref name="devices"/> </optional> <zeroOrMore> @@ -570,6 +573,92 @@ </interleave> </element> </define> + <define name="rlimits"> + <element name="rlimits"> + <interleave> + <optional> + <element name="cpu"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="fsize"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="data"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="stack"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="core"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rss"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nproc"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nofile"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="memlock"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="as"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="locks"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="sigpending"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="msgqueue"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nice"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rtprio"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rttime"> + <ref name='scaledInteger'/> + </element> + </optional> + </interleave> + </element> + </define> <!-- Resources usage defines the amount of memory (maximum and possibly current usage) and number of virtual CPUs used by that domain. diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d562e1a..399976e 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -772,6 +772,24 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash")
+VIR_ENUM_IMPL(virDomainRLimit, VIR_DOMAIN_RLIMIT_LAST, + "cpu", + "fsize", + "data", + "stack", + "core", + "rss", + "nproc", + "nofile", + "memlock", + "as", + "locks", + "sigpending", + "msgqueue", + "nice", + "rtprio", + "rttime")
I'm really not a huge fan of this passthrough of arbitrary rlimits in the XML like this. Not least because a number of these limits are actually schedular tunables and so would logically belong under the existing <cputune> XML element. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

I guess I don't really have an argument for or against removing some of them from <rlimits>. The original patch that I wrote and we use internally only allowed setting of RLIMIT_NOFILE, but when I went to publish it back to this list is was trivial to just make it a generic interface to all of the RLIMIT_* tunables. I don't have a need for them at this time, but I figured someone else might find them useful. But if this list can come up with a set we want included/excluded then the <rlimits> section can be modified accordingly. Although it might be confusing to an operator who is reading the setrlimit(2) manpage and can't understand why they can't set the limit they are interested in. Ryan On Fri, Jan 30, 2015 at 9:02 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
On Fri, Jan 30, 2015 at 08:53:20AM -0600, Ryan Cleere wrote:
--- docs/formatdomain.html.in | 49 +++++++++++++++++++++++ docs/schemas/domaincommon.rng | 89 +++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.c | 92 +++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 33 ++++++++++++++++ src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 32 +++++++++++++++ src/util/virprocess.c | 4 +- src/util/virprocess.h | 2 + 8 files changed, 300 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f8d5f89..5aec51c 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -348,6 +348,55 @@ </pre>
+ <p> + If you want to set an rlimit of the containter init process instead of + inheriting from the host init, set the <code>rlimits</code> element. You + are able to set any of the rlimits that setrlimits is able to set using + any of the following sub-elements: + </p> + + <dl> + <dt><code>as</code></dt> + <dd>Used to set RLIMIT_AS.</dd> + <dt><code>core</code></dt> + <dd>Used to set RLIMIT_CORE.</dd> + <dt><code>cpu</code></dt> + <dd>Used to set RLIMIT_CPU.</dd> + <dt><code>data</code></dt> + <dd>Used to set RLIMIT_DATA.</dd> + <dt><code>fsize</code></dt> + <dd>Used to set RLIMIT_FSIZE.</dd> + <dt><code>locks</code></dt> + <dd>Used to set RLIMIT_LOCKS.</dd> + <dt><code>memlock</code></dt> + <dd>Used to set RLIMIT_MEMLOCK.</dd> + <dt><code>msgqueue</code></dt> + <dd>Used to set RLIMIT_MSGQUEUE.</dd> + <dt><code>nice</code></dt> + <dd>Used to set RLIMIT_NICE.</dd> + <dt><code>nofile</code></dt> + <dd>Used to set RLIMIT_NOFILE.</dd> + <dt><code>nproc</code></dt> + <dd>Used to set RLIMIT_NPROC.</dd> + <dt><code>rss</code></dt> + <dd>Used to set RLIMIT_RSS.</dd> + <dt><code>rtprio</code></dt> + <dd>Used to set RLIMIT_RTPRIO.</dd> + <dt><code>rttime</code></dt> + <dd>Used to set RLIMIT_RTTIME.</dd> + <dt><code>sigpending</code></dt> + <dd>Used to set RLIMIT_SIGPENDING.</dd> + <dt><code>stack</code></dt> + <dd>Used to set RLIMIT_STACK.</dd> + </dl> + + <pre> + <rlimits> + <nofile>10240</nofile> + </rlimits> + </pre> + + <h3><a name="elementsSysinfo">SMBIOS System Information</a></h3>
<p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d467dce..b98f8d5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -59,6 +59,9 @@ <ref name="idmap"/> </optional> <optional> + <ref name="rlimits"/> + </optional> + <optional> <ref name="devices"/> </optional> <zeroOrMore> @@ -570,6 +573,92 @@ </interleave> </element> </define> + <define name="rlimits"> + <element name="rlimits"> + <interleave> + <optional> + <element name="cpu"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="fsize"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="data"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="stack"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="core"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rss"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nproc"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nofile"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="memlock"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="as"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="locks"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="sigpending"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="msgqueue"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="nice"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rtprio"> + <ref name='scaledInteger'/> + </element> + </optional> + <optional> + <element name="rttime"> + <ref name='scaledInteger'/> + </element> + </optional> + </interleave> + </element> + </define> <!-- Resources usage defines the amount of memory (maximum and possibly current usage) and number of virtual CPUs used by that domain. diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d562e1a..399976e 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -772,6 +772,24 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash")
+VIR_ENUM_IMPL(virDomainRLimit, VIR_DOMAIN_RLIMIT_LAST, + "cpu", + "fsize", + "data", + "stack", + "core", + "rss", + "nproc", + "nofile", + "memlock", + "as", + "locks", + "sigpending", + "msgqueue", + "nice", + "rtprio", + "rttime")
I'm really not a huge fan of this passthrough of arbitrary rlimits in the XML like this. Not least because a number of these limits are actually schedular tunables and so would logically belong under the existing <cputune> XML element.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Fri, Jan 30, 2015 at 4:32 PM, Ryan Cleere <rcleere@gmail.com> wrote:
I guess I don't really have an argument for or against removing some of them from <rlimits>. The original patch that I wrote and we use internally only allowed setting of RLIMIT_NOFILE, but when I went to publish it back to this list is was trivial to just make it a generic interface to all of the RLIMIT_* tunables. I don't have a need for them at this time, but I figured someone else might find them useful. But if this list can come up with a set we want included/excluded then the <rlimits> section can be modified accordingly. Although it might be confusing to an operator who is reading the setrlimit(2) manpage and can't understand why they can't set the limit they are interested in.
BTW: This should depend on idmap (user namespaces set up). Without user namespaces root can bypass/reset all these limits. -- Thanks, //richard

Richard, I have to disagree that it should require idmap. It is true that without idmap the container can freely set it's own rlimits, but I believe this functionality could be useful to containers that don't run /sbin/init. What I mean by that is application specific containers could have their limits set without the application having to set them, or even having to write a shim to set them. Ryan On Sun, Feb 22, 2015 at 5:59 PM, Richard Weinberger < richard.weinberger@gmail.com> wrote:
I guess I don't really have an argument for or against removing some of
from <rlimits>. The original patch that I wrote and we use internally only allowed setting of RLIMIT_NOFILE, but when I went to publish it back to
list is was trivial to just make it a generic interface to all of the RLIMIT_* tunables. I don't have a need for them at this time, but I
someone else might find them useful. But if this list can come up with a set we want included/excluded then the <rlimits> section can be modified accordingly. Although it might be confusing to an operator who is reading the setrlimit(2) manpage and can't understand why they can't set the
On Fri, Jan 30, 2015 at 4:32 PM, Ryan Cleere <rcleere@gmail.com> wrote: them this figured limit
they are interested in.
BTW: This should depend on idmap (user namespaces set up). Without user namespaces root can bypass/reset all these limits.
-- Thanks, //richard

Ryan, Am 23.02.2015 um 18:37 schrieb Ryan Cleere:
Richard,
I have to disagree that it should require idmap. It is true that without idmap the container can freely set it's own rlimits, but I believe this functionality could be useful to containers that don't run /sbin/init. What I mean by that is application specific containers could have their limits set without the application having to set them, or even having to write a shim to set them.
Sorry, I don't understand. What has running a non /sbin/init do to with that? Without user namespaces root within the container can bypass these limits. Thanks, //richard

Hi Richard, All I am suggesting is that someone may want to run a custom process as their <init> process that may or may not have the ability to set the rlimits. This would just allow them to start in a known state. You are absolutely right that without user namespaces the container could set them to whatever the user wanted. However, I think there also exists the possibility that a user not running user namespaces could use the XML to drop the 'CAP_SYS_RESOURCE' capability and therefore would not be able to set rlimits. But I have not tested this scenario. Ryan On Mon, Feb 23, 2015 at 11:44 AM, Richard Weinberger <richard@nod.at> wrote:
Ryan,
Am 23.02.2015 um 18:37 schrieb Ryan Cleere:
Richard,
I have to disagree that it should require idmap. It is true that without idmap the container can freely set it's own rlimits, but I believe this functionality could be useful to containers that don't run /sbin/init. What I mean by that is application specific containers could have their limits set without the application having to set them, or even having to write a shim to set them.
Sorry, I don't understand. What has running a non /sbin/init do to with that? Without user namespaces root within the container can bypass these limits.
Thanks, //richard
participants (4)
-
Daniel P. Berrange
-
Richard Weinberger
-
Richard Weinberger
-
Ryan Cleere