[libvirt] [PATCH 0/4] Support for integrating cgroups with systemd

From: "Daniel P. Berrange" <berrange@redhat.com> This is a much changed / expanded version of my previous work to create cgroups via systemd. The difference is that this time it actually works :-) I'm not proposing this for merge until after the 1.1.1 release. Daniel P. Berrange (4): Add APIs for formatting systemd slice/scope names Add support for systemd cgroup mount Cope with races while killing processes Enable support for systemd-machined in cgroups creation src/libvirt_private.syms | 2 + src/lxc/lxc_process.c | 10 +- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 270 +++++++++++++++++++++++++++++++++++++++++------ src/util/vircgroup.h | 2 + src/util/virsystemd.c | 96 ++++++++++++++++- src/util/virsystemd.h | 5 + tests/vircgrouptest.c | 9 ++ tests/virsystemdtest.c | 48 +++++++++ 9 files changed, 403 insertions(+), 40 deletions(-) -- 1.8.1.4

From: "Daniel P. Berrange" <berrange@redhat.com> There are some interesting escaping rules to consider when dealing with systemd slice/scope names. Thus it is helpful to have APIs for formatting names Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virsystemd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virsystemd.h | 5 +++ tests/virsystemdtest.c | 48 +++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d9615ea..0247a46 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1935,6 +1935,8 @@ virSysinfoSetup; # util/virsystemd.h virSystemdCreateMachine; +virSystemdMakeScopeName; +virSystemdMakeSliceName; # util/virthread.h diff --git a/src/util/virsystemd.c b/src/util/virsystemd.c index 11d1153..251b846 100644 --- a/src/util/virsystemd.c +++ b/src/util/virsystemd.c @@ -27,9 +27,96 @@ #include "viralloc.h" #include "virutil.h" #include "virlog.h" +#include "virerror.h" #define VIR_FROM_THIS VIR_FROM_SYSTEMD + +static void virSystemdEscapeName(virBufferPtr buf, + const char *name) +{ + static const char hextable[16] = "0123456789abcdef"; + +#define ESCAPE(c) \ + do { \ + virBufferAddChar(buf, '\\'); \ + virBufferAddChar(buf, 'x'); \ + virBufferAddChar(buf, hextable[(c >> 4) & 15]); \ + virBufferAddChar(buf, hextable[c & 15]); \ + } while (0) + +#define VALID_CHARS \ + "0123456789" \ + "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + ":-_.\\" + + if (*name == '.') { + ESCAPE(*name); + name++; + } + + while (*name) { + if (*name == '/') + virBufferAddChar(buf, '-'); + else if (*name == '-' || + *name == '\\' || + !strchr(VALID_CHARS, *name)) + ESCAPE(*name); + else + virBufferAddChar(buf, *name); + name++; + } + +#undef ESCAPE +#undef VALID_CHARS +} + + +char *virSystemdMakeScopeName(const char *name, + const char *drivername, + const char *partition) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + if (*partition == '/') + partition++; + + virSystemdEscapeName(&buf, partition); + virBufferAddChar(&buf, '-'); + virSystemdEscapeName(&buf, drivername); + virBufferAddLit(&buf, "\\x2d"); + virSystemdEscapeName(&buf, name); + virBufferAddLit(&buf, ".scope"); + + if (virBufferError(&buf)) { + virReportOOMError(); + return NULL; + } + + return virBufferContentAndReset(&buf); +} + + +char *virSystemdMakeSliceName(const char *partition) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + if (*partition == '/') + partition++; + + virSystemdEscapeName(&buf, partition); + virBufferAddLit(&buf, ".slice"); + + if (virBufferError(&buf)) { + virReportOOMError(); + return NULL; + } + + return virBufferContentAndReset(&buf); +} + + /** * virSystemdCreateMachine: * @name: driver unique name of the machine @@ -75,8 +162,8 @@ int virSystemdCreateMachine(const char *name, goto cleanup; if (partition) { - if (virAsprintf(&slicename, "%s.slice", partition) < 0) - goto cleanup; + if (!(slicename = virSystemdMakeSliceName(partition))) + goto cleanup; } else { if (VIR_STRDUP(slicename, "") < 0) goto cleanup; diff --git a/src/util/virsystemd.h b/src/util/virsystemd.h index 9ca4e0b..414ae5a 100644 --- a/src/util/virsystemd.h +++ b/src/util/virsystemd.h @@ -24,6 +24,11 @@ # include "internal.h" +char *virSystemdMakeScopeName(const char *name, + const char *drivername, + const char *slicename); +char *virSystemdMakeSliceName(const char *partition); + int virSystemdCreateMachine(const char *name, const char *drivername, bool privileged, diff --git a/tests/virsystemdtest.c b/tests/virsystemdtest.c index bcf3ad3..784c45c 100644 --- a/tests/virsystemdtest.c +++ b/tests/virsystemdtest.c @@ -138,6 +138,38 @@ static int testCreateBadSystemd(const void *opaque ATTRIBUTE_UNUSED) return 0; } + +struct testScopeData { + const char *name; + const char *partition; + const char *expected; +}; + +static int +testScopeName(const void *opaque) +{ + const struct testScopeData *data = opaque; + int ret = -1; + char *actual = NULL; + + if (!(actual = virSystemdMakeScopeName(data->name, + "lxc", + data->partition))) + goto cleanup; + + if (STRNEQ(actual, data->expected)) { + fprintf(stderr, "Expected '%s' but got '%s'\n", + data->expected, actual); + goto cleanup; + } + + ret = 0; + + cleanup: + VIR_FREE(actual); + return ret; +} + static int mymain(void) { @@ -152,6 +184,22 @@ mymain(void) if (virtTestRun("Test create bad systemd ", 1, testCreateBadSystemd, NULL) < 0) ret = -1; +#define TEST_SCOPE(name, partition, unitname) \ + do { \ + struct testScopeData data = { \ + name, partition, unitname \ + }; \ + if (virtTestRun("Test scopename", 1, testScopeName, &data) < 0) \ + ret = -1; \ + } while (0) + + TEST_SCOPE("demo", "/machine", "machine-lxc\\x2ddemo.scope"); + TEST_SCOPE("demo-name", "/machine", "machine-lxc\\x2ddemo\\x2dname.scope"); + TEST_SCOPE("demo!name", "/machine", "machine-lxc\\x2ddemo\\x21name.scope"); + TEST_SCOPE(".demo", "/machine", "machine-lxc\\x2d\\x2edemo.scope"); + TEST_SCOPE("demo", "/machine/eng-dept", "machine-eng\\x2ddept-lxc\\x2ddemo.scope"); + TEST_SCOPE("demo", "/machine/eng-dept/testing!stuff", "machine-eng\\x2ddept-testing\\x21stuff-lxc\\x2ddemo.scope"); + return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.8.1.4

On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There are some interesting escaping rules to consider when dealing with systemd slice/scope names. Thus it is helpful to have APIs for formatting names
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virsystemd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virsystemd.h | 5 +++ tests/virsystemdtest.c | 48 +++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 2 deletions(-)
+ +#define VALID_CHARS \ + "0123456789" \ + "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + ":-_.\\"
If you would remove - and \\ from this list...
+ + if (*name == '.') { + ESCAPE(*name); + name++; + } + + while (*name) { + if (*name == '/') + virBufferAddChar(buf, '-'); + else if (*name == '-' || + *name == '\\' || + !strchr(VALID_CHARS, *name))
...then this could be simplified to just !strchr().
+ + +char *virSystemdMakeScopeName(const char *name,
Is it worth using the style: char * virSystemdMakeScopeName(...
+ TEST_SCOPE("demo", "/machine/eng-dept/testing!stuff", "machine-eng\\x2ddept-testing\\x21stuff-lxc\\x2ddemo.scope");
Worth wrapping the long line. ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Tue, Jul 30, 2013 at 12:05:51PM -0600, Eric Blake wrote:
On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There are some interesting escaping rules to consider when dealing with systemd slice/scope names. Thus it is helpful to have APIs for formatting names
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virsystemd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virsystemd.h | 5 +++ tests/virsystemdtest.c | 48 +++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 2 deletions(-)
+ +#define VALID_CHARS \ + "0123456789" \ + "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + ":-_.\\"
If you would remove - and \\ from this list...
This is the set that systemd uses, and we need to match it exactly, otherwise we won't detect the cgroups correctly.
+ + if (*name == '.') { + ESCAPE(*name); + name++; + } + + while (*name) { + if (*name == '/') + virBufferAddChar(buf, '-'); + else if (*name == '-' || + *name == '\\' || + !strchr(VALID_CHARS, *name))
...then this could be simplified to just !strchr().
+ + +char *virSystemdMakeScopeName(const char *name,
Is it worth using the style:
char * virSystemdMakeScopeName(...
+ TEST_SCOPE("demo", "/machine/eng-dept/testing!stuff", "machine-eng\\x2ddept-testing\\x21stuff-lxc\\x2ddemo.scope");
Worth wrapping the long line.
ACK.
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 07/30/2013 12:47 PM, Daniel P. Berrange wrote:
On Tue, Jul 30, 2013 at 12:05:51PM -0600, Eric Blake wrote:
On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There are some interesting escaping rules to consider when dealing with systemd slice/scope names. Thus it is helpful to have APIs for formatting names
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virsystemd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virsystemd.h | 5 +++ tests/virsystemdtest.c | 48 +++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 2 deletions(-)
+ +#define VALID_CHARS \ + "0123456789" \ + "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + ":-_.\\"
If you would remove - and \\ from this list...
This is the set that systemd uses, and we need to match it exactly, otherwise we won't detect the cgroups correctly.
My point is that your only use of VALID_CHARS was in the strchr() to see which characters need escaping. systemd is using the full set for characters present _after_ escaping, whereas you are only using it _before_ escaping, and have to special-case your escaping as a result. But I'm not too concerned about the efficiency to insist on that micro-optimization. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Fri, Jul 26, 2013 at 5:48 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
There are some interesting escaping rules to consider when dealing with systemd slice/scope names. Thus it is helpful to have APIs for formatting names
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virsystemd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virsystemd.h | 5 +++ tests/virsystemdtest.c | 48 +++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d9615ea..0247a46 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1935,6 +1935,8 @@ virSysinfoSetup;
# util/virsystemd.h virSystemdCreateMachine; +virSystemdMakeScopeName; +virSystemdMakeSliceName;
# util/virthread.h diff --git a/src/util/virsystemd.c b/src/util/virsystemd.c index 11d1153..251b846 100644 --- a/src/util/virsystemd.c +++ b/src/util/virsystemd.c @@ -27,9 +27,96 @@ #include "viralloc.h" #include "virutil.h" #include "virlog.h" +#include "virerror.h"
#define VIR_FROM_THIS VIR_FROM_SYSTEMD
+ +static void virSystemdEscapeName(virBufferPtr buf, + const char *name) +{ + static const char hextable[16] = "0123456789abcdef"; + +#define ESCAPE(c) \ + do { \ + virBufferAddChar(buf, '\\'); \ + virBufferAddChar(buf, 'x'); \ + virBufferAddChar(buf, hextable[(c >> 4) & 15]); \ + virBufferAddChar(buf, hextable[c & 15]); \ + } while (0) + +#define VALID_CHARS \ + "0123456789" \ + "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + ":-_.\\" + + if (*name == '.') { + ESCAPE(*name); + name++; + } + + while (*name) { + if (*name == '/') + virBufferAddChar(buf, '-'); + else if (*name == '-' || + *name == '\\' || + !strchr(VALID_CHARS, *name)) + ESCAPE(*name); + else + virBufferAddChar(buf, *name); + name++; + } + +#undef ESCAPE +#undef VALID_CHARS +} + + +char *virSystemdMakeScopeName(const char *name, + const char *drivername, + const char *partition) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + if (*partition == '/') + partition++; + + virSystemdEscapeName(&buf, partition); + virBufferAddChar(&buf, '-'); + virSystemdEscapeName(&buf, drivername); + virBufferAddLit(&buf, "\\x2d");
What is the idea behind this? Now we end up with paths like: /sys/fs/cgroup/memory/machine.slice/machine-lxc\x2dmyfunnycontainer.scope -- Thanks, //richard

On Sun, Dec 15, 2013 at 07:09:19PM +0100, Richard Weinberger wrote:
On Fri, Jul 26, 2013 at 5:48 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
+char *virSystemdMakeScopeName(const char *name, + const char *drivername, + const char *partition) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + if (*partition == '/') + partition++; + + virSystemdEscapeName(&buf, partition); + virBufferAddChar(&buf, '-'); + virSystemdEscapeName(&buf, drivername); + virBufferAddLit(&buf, "\\x2d");
What is the idea behind this? Now we end up with paths like: /sys/fs/cgroup/memory/machine.slice/machine-lxc\x2dmyfunnycontainer.scope
The string prefix 'machine' is a special systemd string. The second part is the libvirt name 'lxc-$containername'. Systemd requires that the '-' be escaped hence we use '\x2d' here. You'll see this in a number of other systemd unit names too. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Am Montag, 16. Dezember 2013, 10:51:01 schrieb Daniel P. Berrange:
On Sun, Dec 15, 2013 at 07:09:19PM +0100, Richard Weinberger wrote:
On Fri, Jul 26, 2013 at 5:48 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
+char *virSystemdMakeScopeName(const char *name, + const char *drivername, + const char *partition) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + if (*partition == '/') + partition++; + + virSystemdEscapeName(&buf, partition); + virBufferAddChar(&buf, '-'); + virSystemdEscapeName(&buf, drivername); + virBufferAddLit(&buf, "\\x2d");
What is the idea behind this? Now we end up with paths like: /sys/fs/cgroup/memory/machine.slice/machine-lxc\x2dmyfunnycontainer.scope
The string prefix 'machine' is a special systemd string. The second part is the libvirt name 'lxc-$containername'. Systemd requires that the '-' be escaped hence we use '\x2d' here. You'll see this in a number of other systemd unit names too.
Srsly? I really hoped that this is a libvirt bug and not something by (systemd-)design. :-\ Thanks, //richard

From: "Daniel P. Berrange" <berrange@redhat.com> Systemd uses a named cgroup mount for tracking processes. Add it as another type of controller, albeit one which we have to special case in a number of places. In particular we must never create/delete directories there, nor add tasks. Essentially the systemd mount is to be considered read-only for libvirt. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------ src/util/vircgroup.h | 1 + tests/vircgrouptest.c | 9 +++++++ 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 68ec953..5ff8850 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -57,7 +57,8 @@ VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, "cpu", "cpuacct", "cpuset", "memory", "devices", - "freezer", "blkio", "net_cls", "perf_event"); + "freezer", "blkio", "net_cls", "perf_event", + "name=systemd"); typedef enum { VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */ @@ -115,6 +116,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *tmp; + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (!group->controllers[i].placement) continue; @@ -320,6 +324,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group, if (!group->controllers[i].mountPoint) continue; + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (path[0] == '/') { if (VIR_STRDUP(group->controllers[i].placement, path) < 0) return -1; @@ -375,6 +382,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group, int ret = -1; char *procfile; + VIR_DEBUG("Detecting placement for pid %lld path %s", + (unsigned long long)pid, path); if (pid == -1) { if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0) goto cleanup; @@ -411,6 +420,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group, const char *typestr = virCgroupControllerTypeToString(i); int typelen = strlen(typestr); char *tmp = controllers; + while (tmp) { char *next = strchr(tmp, ','); int len; @@ -427,13 +437,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group, * selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo" */ if (typelen == len && STREQLEN(typestr, tmp, len) && - group->controllers[i].mountPoint != NULL) { - if (virAsprintf(&group->controllers[i].placement, - "%s%s%s", selfpath, - (STREQ(selfpath, "/") || - STREQ(path, "") ? "" : "/"), - path) < 0) - goto cleanup; + group->controllers[i].mountPoint != NULL && + group->controllers[i].placement == NULL) { + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { + if (VIR_STRDUP(group->controllers[i].placement, + selfpath) < 0) + goto cleanup; + } else { + if (virAsprintf(&group->controllers[i].placement, + "%s%s%s", selfpath, + (STREQ(selfpath, "/") || + STREQ(path, "") ? "" : "/"), + path) < 0) + goto cleanup; + } } tmp = next; @@ -524,13 +541,16 @@ static int virCgroupDetect(virCgroupPtr group, return -1; } - if (parent || path[0] == '/') { - if (virCgroupCopyPlacement(group, path, parent) < 0) - return -1; - } else { - if (virCgroupDetectPlacement(group, pid, path) < 0) - return -1; - } + /* In some cases we can copy part of the placement info + * based on the parent cgroup... + */ + if ((parent || path[0] == '/') && + virCgroupCopyPlacement(group, path, parent) < 0) + return -1; + + /* ... but use /proc/cgroups to fill in the rest */ + if (virCgroupDetectPlacement(group, pid, path) < 0) + return -1; /* Check that for every mounted controller, we found our placement */ for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { @@ -822,6 +842,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *path = NULL; + /* We must never mkdir() in systemd's hierachy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { + VIR_DEBUG("Not creating systemd controller group"); + continue; + } + /* Skip over controllers that aren't mounted */ if (!group->controllers[i].mountPoint) { VIR_DEBUG("Skipping unmounted controller %s", @@ -1026,6 +1052,10 @@ int virCgroupRemove(virCgroupPtr group) if (!group->controllers[i].mountPoint) continue; + /* We must never rmdir() in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + /* Don't delete the root group, if we accidentally ended up in it for some reason */ if (STREQ(group->controllers[i].placement, "/")) @@ -1065,6 +1095,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) if (!group->controllers[i].mountPoint) continue; + /* We must never add tasks in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0) goto cleanup; } @@ -1166,6 +1200,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) !dest_group->controllers[i].mountPoint) continue; + /* We must never move tasks in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + /* New threads are created in the same group as their parent; * but if a thread is created after we first read we aren't * aware that it needs to move. Therefore, we must iterate diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 3aaf081..e579f41 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -40,6 +40,7 @@ enum { VIR_CGROUP_CONTROLLER_BLKIO, VIR_CGROUP_CONTROLLER_NET_CLS, VIR_CGROUP_CONTROLLER_PERF_EVENT, + VIR_CGROUP_CONTROLLER_SYSTEMD, VIR_CGROUP_CONTROLLER_LAST }; diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c index 20ac494..4bdd4c9 100644 --- a/tests/vircgrouptest.c +++ b/tests/vircgrouptest.c @@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct", @@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer", [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd", }; const char *links[VIR_CGROUP_CONTROLLER_LAST] = { @@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/", [VIR_CGROUP_CONTROLLER_BLKIO] = "/", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if (virCgroupNewSelf(&cgroup) < 0) { @@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition", @@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) { @@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) { @@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) { @@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc", [VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) { @@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) { -- 1.8.1.4

On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Systemd uses a named cgroup mount for tracking processes. Add it as another type of controller, albeit one which we have to special case in a number of places. In particular we must never create/delete directories there, nor add tasks. Essentially the systemd mount is to be considered read-only for libvirt.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------ src/util/vircgroup.h | 1 + tests/vircgrouptest.c | 9 +++++++ 3 files changed, 63 insertions(+), 15 deletions(-)
@@ -524,13 +541,16 @@ static int virCgroupDetect(virCgroupPtr group, return -1; }
- if (parent || path[0] == '/') { - if (virCgroupCopyPlacement(group, path, parent) < 0) - return -1; - } else { - if (virCgroupDetectPlacement(group, pid, path) < 0) - return -1;
This previously called only one of the two functions...
- } + /* In some cases we can copy part of the placement info + * based on the parent cgroup... + */ + if ((parent || path[0] == '/') && + virCgroupCopyPlacement(group, path, parent) < 0) + return -1; + + /* ... but use /proc/cgroups to fill in the rest */ + if (virCgroupDetectPlacement(group, pid, path) < 0)
...now, if virCgroupCopyPlacement returns 0, it calls both functions. Is that intentional?
@@ -822,6 +842,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *path = NULL;
+ /* We must never mkdir() in systemd's hierachy */
s/hierachy/hierarchy/
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { + VIR_DEBUG("Not creating systemd controller group"); + continue; + } + /* Skip over controllers that aren't mounted */ if (!group->controllers[i].mountPoint) { VIR_DEBUG("Skipping unmounted controller %s", @@ -1026,6 +1052,10 @@ int virCgroupRemove(virCgroupPtr group) if (!group->controllers[i].mountPoint) continue;
+ /* We must never rmdir() in systemd's hiearchy */
s/hiearchy/hierarchy/ (hmm, you copied-and-pasted, but ended up with a different typo between the two comments?)
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + /* Don't delete the root group, if we accidentally ended up in it for some reason */ if (STREQ(group->controllers[i].placement, "/")) @@ -1065,6 +1095,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) if (!group->controllers[i].mountPoint) continue;
+ /* We must never add tasks in systemd's hiearchy */
s/hiearchy/hierarchy/
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0) goto cleanup; } @@ -1166,6 +1200,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) !dest_group->controllers[i].mountPoint) continue;
+ /* We must never move tasks in systemd's hiearchy */
s/hiearchy/hierarchy/ Conditional ACK if you can address my question and fix the typos. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Tue, Jul 30, 2013 at 12:13:58PM -0600, Eric Blake wrote:
On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Systemd uses a named cgroup mount for tracking processes. Add it as another type of controller, albeit one which we have to special case in a number of places. In particular we must never create/delete directories there, nor add tasks. Essentially the systemd mount is to be considered read-only for libvirt.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------ src/util/vircgroup.h | 1 + tests/vircgrouptest.c | 9 +++++++ 3 files changed, 63 insertions(+), 15 deletions(-)
@@ -524,13 +541,16 @@ static int virCgroupDetect(virCgroupPtr group, return -1; }
- if (parent || path[0] == '/') { - if (virCgroupCopyPlacement(group, path, parent) < 0) - return -1; - } else { - if (virCgroupDetectPlacement(group, pid, path) < 0) - return -1;
This previously called only one of the two functions...
- } + /* In some cases we can copy part of the placement info + * based on the parent cgroup... + */ + if ((parent || path[0] == '/') && + virCgroupCopyPlacement(group, path, parent) < 0) + return -1; + + /* ... but use /proc/cgroups to fill in the rest */ + if (virCgroupDetectPlacement(group, pid, path) < 0)
...now, if virCgroupCopyPlacement returns 0, it calls both functions. Is that intentional?
Yes, when we use CopyPlacement it sets up all mounts, except for the systemd mount, which we must always probe for. The DetectPlacement method was changed so that it only fills in data, not already filled in by CopyPlacement. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 07/30/2013 12:49 PM, Daniel P. Berrange wrote:
On Tue, Jul 30, 2013 at 12:13:58PM -0600, Eric Blake wrote:
On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Systemd uses a named cgroup mount for tracking processes. Add it as another type of controller, albeit one which we have to special case in a number of places. In particular we must never create/delete directories there, nor add tasks. Essentially the systemd mount is to be considered read-only for libvirt.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
- } + /* In some cases we can copy part of the placement info + * based on the parent cgroup... + */ + if ((parent || path[0] == '/') && + virCgroupCopyPlacement(group, path, parent) < 0) + return -1; + + /* ... but use /proc/cgroups to fill in the rest */ + if (virCgroupDetectPlacement(group, pid, path) < 0)
...now, if virCgroupCopyPlacement returns 0, it calls both functions. Is that intentional?
Yes, when we use CopyPlacement it sets up all mounts, except for the systemd mount, which we must always probe for. The DetectPlacement method was changed so that it only fills in data, not already filled in by CopyPlacement.
That explains it, then. Probably worth mentioning in the commit message, so the next reader won't miss it like I did. Condition has been met, ACK is now granted with the typo fixes. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> When systemd is involved in managing processes, it may start killing off & tearing down croups associated with the process while we're still doing virCgroupKillPainfully. We must explicitly check for ENOENT and treat it as if we had finished killing processes Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 5ff8850..94f6692 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -2539,6 +2539,12 @@ static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr while (!done) { done = true; if (!(fp = fopen(keypath, "r"))) { + if (errno == ENOENT) { + VIR_DEBUG("No file %s, assuming done", keypath); + killedAny = false; + goto done; + } + virReportSystemError(errno, _("Failed to read %s"), keypath); @@ -2578,6 +2584,7 @@ static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr } } + done: ret = killedAny ? 1 : 0; cleanup: @@ -2647,8 +2654,13 @@ static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHas if (rc == 1) killedAny = true; - VIR_DEBUG("Iterate over children of %s", keypath); + VIR_DEBUG("Iterate over children of %s (killedAny=%d)", keypath, killedAny); if (!(dp = opendir(keypath))) { + if (errno == ENOENT) { + VIR_DEBUG("Path %s does not exist, assuming done", keypath); + killedAny = false; + goto done; + } virReportSystemError(errno, _("Cannot open %s"), keypath); return -1; @@ -2678,6 +2690,7 @@ static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHas virCgroupFree(&subgroup); } + done: ret = killedAny ? 1 : 0; cleanup: -- 1.8.1.4

On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
When systemd is involved in managing processes, it may start killing off & tearing down croups associated with the process while we're still doing virCgroupKillPainfully. We must explicitly check for ENOENT and treat it as if we had finished killing processes
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> Make the virCgroupNewMachine method try to use systemd-machined first. If that fails, then fallback to using the traditional cgroup setup code path. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/lxc/lxc_process.c | 10 +-- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 187 ++++++++++++++++++++++++++++++++++++++++++++----- src/util/vircgroup.h | 1 + src/util/virsystemd.c | 5 +- 5 files changed, 182 insertions(+), 22 deletions(-) diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 247e516..0a28305 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1203,8 +1203,9 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } - if (virCgroupNewDetectMachine(vm->def->name, "lxc", - vm->pid, -1, &priv->cgroup) < 0) + if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, + vm->def->resource->partition, + -1, &priv->cgroup) < 0) goto error; if (!priv->cgroup) { @@ -1411,8 +1412,9 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm, if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) goto error; - if (virCgroupNewDetectMachine(vm->def->name, "lxc", - vm->pid, -1, &priv->cgroup) < 0) + if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, + vm->def->resource->partition, + -1, &priv->cgroup) < 0) goto error; if (!priv->cgroup) { diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 9f6b251..787ddeb 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -707,6 +707,7 @@ qemuConnectCgroup(virQEMUDriverPtr driver, if (virCgroupNewDetectMachine(vm->def->name, "qemu", vm->pid, + vm->def->resource->partition, cfg->cgroupControllers, &priv->cgroup) < 0) goto cleanup; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 94f6692..e857362 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -50,6 +50,7 @@ #include "virhash.h" #include "virhashcode.h" #include "virstring.h" +#include "virsystemd.h" #define CGROUP_MAX_VAL 512 @@ -100,11 +101,13 @@ static bool virCgroupValidateMachineGroup(virCgroupPtr group, const char *name, const char *drivername, + const char *partition, bool stripEmulatorSuffix) { size_t i; bool valid = false; char *partname; + char *scopename; if (virAsprintf(&partname, "%s.libvirt-%s", name, drivername) < 0) @@ -113,6 +116,15 @@ virCgroupValidateMachineGroup(virCgroupPtr group, if (virCgroupPartitionEscape(&partname) < 0) goto cleanup; + if (!partition) + partition = "/machine"; + + if (!(scopename = virSystemdMakeScopeName(name, drivername, partition))) + goto cleanup; + + if (virCgroupPartitionEscape(&scopename) < 0) + goto cleanup; + for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *tmp; @@ -140,9 +152,10 @@ virCgroupValidateMachineGroup(virCgroupPtr group, tmp++; if (STRNEQ(tmp, name) && - STRNEQ(tmp, partname)) { - VIR_DEBUG("Name '%s' for controller '%s' does not match '%s' or '%s'", - tmp, virCgroupControllerTypeToString(i), name, partname); + STRNEQ(tmp, partname) && + STRNEQ(tmp, scopename)) { + VIR_DEBUG("Name '%s' for controller '%s' does not match '%s', '%s' or '%s'", + tmp, virCgroupControllerTypeToString(i), name, partname, scopename); goto cleanup; } } @@ -151,6 +164,7 @@ virCgroupValidateMachineGroup(virCgroupPtr group, cleanup: VIR_FREE(partname); + VIR_FREE(scopename); return valid; } @@ -1638,6 +1652,7 @@ int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, int virCgroupNewDetectMachine(const char *name, const char *drivername, pid_t pid, + const char *partition, int controllers, virCgroupPtr *group) { @@ -1647,7 +1662,7 @@ int virCgroupNewDetectMachine(const char *name, return -1; } - if (!virCgroupValidateMachineGroup(*group, name, drivername, true)) { + if (!virCgroupValidateMachineGroup(*group, name, drivername, partition, true)) { VIR_DEBUG("Failed to validate machine name for '%s' driver '%s'", name, drivername); virCgroupFree(group); @@ -1657,22 +1672,124 @@ int virCgroupNewDetectMachine(const char *name, return 0; } -int virCgroupNewMachine(const char *name, - const char *drivername, - bool privileged ATTRIBUTE_UNUSED, - const unsigned char *uuid ATTRIBUTE_UNUSED, - const char *rootdir ATTRIBUTE_UNUSED, - pid_t pidleader ATTRIBUTE_UNUSED, - bool isContainer ATTRIBUTE_UNUSED, - const char *partition, - int controllers, - virCgroupPtr *group) +/* + * Retujrns 0 on success, -1 on fatal error, -2 on systemd not available + */ +static int +virCgroupNewMachineSystemd(const char *name, + const char *drivername, + bool privileged, + const unsigned char *uuid, + const char *rootdir, + pid_t pidleader, + bool isContainer, + const char *partition, + int controllers, + virCgroupPtr *group) { - virCgroupPtr parent = NULL; int ret = -1; + int rv; + virCgroupPtr init, parent = NULL; + char *path = NULL; + char *offset; + + VIR_DEBUG("Trying to setup machine '%s' via systemd", name); + if ((rv = virSystemdCreateMachine(name, + drivername, + privileged, + uuid, + rootdir, + pidleader, + isContainer, + partition)) < 0) + return rv; + + if (controllers != -1) + controllers |= (1 << VIR_CGROUP_CONTROLLER_SYSTEMD); + + VIR_DEBUG("Detecting systemd placement"); + if (virCgroupNewDetect(pidleader, + controllers, + &init) < 0) + return -1; - *group = NULL; + path = (init)->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement; + (init)->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement = NULL; + virCgroupFree(&init); + + if (!path || STREQ(path, "/") || path[0] != '/') { + VIR_DEBUG("Systemd didn't setup its controller"); + ret = -2; + goto cleanup; + } + + offset = path; + + if (virCgroupNew(pidleader, + "", + NULL, + controllers, + &parent) < 0) + goto cleanup; + + + for (;;) { + virCgroupPtr tmp; + char *t = strchr(offset + 1, '/'); + if (t) + *t = '\0'; + + if (virCgroupNew(pidleader, + path, + parent, + controllers, + &tmp) < 0) + goto cleanup; + + if (virCgroupMakeGroup(parent, tmp, true, VIR_CGROUP_NONE) < 0) { + virCgroupFree(&tmp); + goto cleanup; + } + if (t) { + *t = '/'; + offset = t; + virCgroupFree(&parent); + parent = tmp; + } else { + *group = tmp; + break; + } + } + + if (virCgroupAddTask(*group, pidleader) < 0) { + virErrorPtr saved = virSaveLastError(); + virCgroupRemove(*group); + virCgroupFree(group); + if (saved) { + virSetError(saved); + virFreeError(saved); + } + } + + ret = 0; + cleanup: + virCgroupFree(&parent); + VIR_FREE(path); + return ret; +} +static int +virCgroupNewMachineManual(const char *name, + const char *drivername, + pid_t pidleader, + const char *partition, + int controllers, + virCgroupPtr *group) +{ + virCgroupPtr parent = NULL; + int ret = -1; + + VIR_DEBUG("Fallback to non-systemd setup"); if (virCgroupNewPartition(partition, STREQ(partition, "/machine"), controllers, @@ -1708,6 +1825,44 @@ cleanup: return ret; } +int virCgroupNewMachine(const char *name, + const char *drivername, + bool privileged, + const unsigned char *uuid, + const char *rootdir, + pid_t pidleader, + bool isContainer, + const char *partition, + int controllers, + virCgroupPtr *group) +{ + int rv; + + *group = NULL; + + if ((rv = virCgroupNewMachineSystemd(name, + drivername, + privileged, + uuid, + rootdir, + pidleader, + isContainer, + partition, + controllers, + group)) == 0) + return 0; + + if (rv == -1) + return -1; + + return virCgroupNewMachineManual(name, + drivername, + pidleader, + partition, + controllers, + group); +} + bool virCgroupNewIgnoreError(void) { if (virLastErrorIsSystemErrno(ENXIO) || diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index e579f41..d7ce892 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -83,6 +83,7 @@ int virCgroupNewDetect(pid_t pid, int virCgroupNewDetectMachine(const char *name, const char *drivername, pid_t pid, + const char *partition, int controllers, virCgroupPtr *group); diff --git a/src/util/virsystemd.c b/src/util/virsystemd.c index 251b846..3fd653d 100644 --- a/src/util/virsystemd.c +++ b/src/util/virsystemd.c @@ -225,8 +225,9 @@ int virSystemdCreateMachine(const char *name, iscontainer ? "container" : "vm", (unsigned int)pidleader, rootdir ? rootdir : "", - 1, "Slice", "s", - slicename) < 0) { + 1, + "Slice", "s", slicename + ) < 0) { virErrorPtr err = virGetLastError(); if (err->code == VIR_ERR_DBUS_SERVICE && STREQ(err->str2, "org.freedesktop.DBus.Error.ServiceUnknown")) { -- 1.8.1.4

On 07/26/2013 09:48 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Make the virCgroupNewMachine method try to use systemd-machined first. If that fails, then fallback to using the traditional cgroup setup code path.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/lxc/lxc_process.c | 10 +-- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 187 ++++++++++++++++++++++++++++++++++++++++++++----- src/util/vircgroup.h | 1 + src/util/virsystemd.c | 5 +- 5 files changed, 182 insertions(+), 22 deletions(-)
+/* + * Retujrns 0 on success, -1 on fatal error, -2 on systemd not available + */
s/Retujrns/Returns/
- *group = NULL; + path = (init)->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement; + (init)->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement = NULL;
s/(init)/init/ (looks like you refactored, and had (*init) at one point)
+++ b/src/util/virsystemd.c @@ -225,8 +225,9 @@ int virSystemdCreateMachine(const char *name, iscontainer ? "container" : "vm", (unsigned int)pidleader, rootdir ? rootdir : "", - 1, "Slice", "s", - slicename) < 0) { + 1, + "Slice", "s", slicename + ) < 0) {
Spurious reformatting? ACK with nits fixed. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Fri, Jul 26, 2013 at 04:48:20PM +0100, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
This is a much changed / expanded version of my previous work to create cgroups via systemd. The difference is that this time it actually works :-)
I'm not proposing this for merge until after the 1.1.1 release.
This is now ready for review...
Daniel P. Berrange (4): Add APIs for formatting systemd slice/scope names Add support for systemd cgroup mount Cope with races while killing processes Enable support for systemd-machined in cgroups creation
src/libvirt_private.syms | 2 + src/lxc/lxc_process.c | 10 +- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 270 +++++++++++++++++++++++++++++++++++++++++------ src/util/vircgroup.h | 2 + src/util/virsystemd.c | 96 ++++++++++++++++- src/util/virsystemd.h | 5 + tests/vircgrouptest.c | 9 ++ tests/virsystemdtest.c | 48 +++++++++ 9 files changed, 403 insertions(+), 40 deletions(-)
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (4)
-
Daniel P. Berrange
-
Eric Blake
-
Richard Weinberger
-
Richard Weinberger