[libvirt] [PATCH] Fix launching of VMs on when only logind part of systemd is present

From: "Daniel P. Berrange" <berrange@redhat.com> Debian systems may run the 'systemd-logind' daemon, which causes the /sys/fs/cgroup/systemd mount to be setup, but no other cgroup controllers are created. While the LXC driver considers cgroups to be mandatory, the QEMU driver is supposed to accept them as optional. We detect whether they are present by looking in /proc/mounts for any mounts of type 'cgroups', but this is not sufficient. We need to skip any named mounts (as seen by a name=XXX string in the mount options), so that we only detect actual resource controllers. http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=721979 Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 5 +++- tests/vircgroupmock.c | 40 ++++++++++++++++++++++++--- tests/vircgrouptest.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 4 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index a260356..626bbc6 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -91,7 +91,10 @@ virCgroupAvailable(void) return false; while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup")) { + /* We're looking for at least one 'cgroup' fs mount, + * which is *not* a named mount. */ + if (STREQ(entry.mnt_type, "cgroup") && + !strstr(entry.mnt_opts, "name=")) { ret = true; break; } diff --git a/tests/vircgroupmock.c b/tests/vircgroupmock.c index d83496c..adc1718 100644 --- a/tests/vircgroupmock.c +++ b/tests/vircgroupmock.c @@ -124,6 +124,27 @@ const char *proccgroupsallinone = "devices 6 1 1\n" "blkio 6 1 1\n"; +const char *procmountslogind = + "none /not/really/sys/fs/cgroup tmpfs rw,rootcontext=system_u:object_r:sysfs_t:s0,seclabel,relatime,size=4k,mode=755 0 0\n" + "systemd /not/really/sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,name=systemd 0 0\n"; + +const char *procselfcgroupslogind = + "1:name=systemd:/\n"; + +const char *proccgroupslogind = + "#subsys_name hierarchy num_cgroups enabled\n" + "cpuset 0 1 1\n" + "cpu 0 1 1\n" + "cpuacct 0 1 1\n" + "memory 0 1 0\n" + "devices 0 1 1\n" + "freezer 0 1 1\n" + "net_cls 0 1 1\n" + "blkio 0 1 1\n" + "perf_event 0 1 1\n"; + + + static int make_file(const char *path, const char *name, const char *value) @@ -400,18 +421,25 @@ static void init_sysfs(void) FILE *fopen(const char *path, const char *mode) { const char *mock; - bool allinone = false; + bool allinone = false, logind = false; init_syms(); mock = getenv("VIR_CGROUP_MOCK_MODE"); - if (mock && STREQ(mock, "allinone")) - allinone = true; + if (mock) { + if (STREQ(mock, "allinone")) + allinone = true; + else if (STREQ(mock, "logind")) + logind = true; + } if (STREQ(path, "/proc/mounts")) { if (STREQ(mode, "r")) { if (allinone) return fmemopen((void *)procmountsallinone, strlen(procmountsallinone), mode); + else if (logind) + return fmemopen((void *)procmountslogind, + strlen(procmountslogind), mode); else return fmemopen((void *)procmounts, strlen(procmounts), mode); } else { @@ -424,6 +452,9 @@ FILE *fopen(const char *path, const char *mode) if (allinone) return fmemopen((void *)proccgroupsallinone, strlen(proccgroupsallinone), mode); + else if (logind) + return fmemopen((void *)proccgroupslogind, + strlen(proccgroupslogind), mode); else return fmemopen((void *)proccgroups, strlen(proccgroups), mode); } else { @@ -436,6 +467,9 @@ FILE *fopen(const char *path, const char *mode) if (allinone) return fmemopen((void *)procselfcgroupsallinone, strlen(procselfcgroupsallinone), mode); + else if (logind) + return fmemopen((void *)procselfcgroupslogind, + strlen(procselfcgroupslogind), mode); else return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode); } else { diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c index f12587c..570e061 100644 --- a/tests/vircgrouptest.c +++ b/tests/vircgrouptest.c @@ -109,6 +109,16 @@ const char *mountsAllInOne[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup", [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; +const char *mountsLogind[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd", +}; const char *links[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu", @@ -132,6 +142,17 @@ const char *linksAllInOne[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; +const char *linksLogind[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, +}; + static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED) { @@ -466,6 +487,48 @@ cleanup: } +static int testCgroupNewForSelfLogind(const void *args ATTRIBUTE_UNUSED) +{ + virCgroupPtr cgroup = NULL; + int ret = -1; + const char *placement[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/", + }; + + if (virCgroupNewSelf(&cgroup) < 0) { + fprintf(stderr, "Cannot create cgroup for self\n"); + goto cleanup; + } + + ret = validateCgroup(cgroup, "", mountsLogind, linksLogind, placement); + +cleanup: + virCgroupFree(&cgroup); + return ret; +} + + +static int testCgroupAvailable(const void *args) +{ + bool got = virCgroupAvailable(); + bool want = args == (void*)0x1; + + if (got != want) { + fprintf(stderr, "Expected cgroup %savailable, but state was wrong\n", + want ? "" : "not "); + return -1; + } + + return 0; +} + # define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX" @@ -505,9 +568,21 @@ mymain(void) if (virtTestRun("New cgroup for domain partition escaped", 1, testCgroupNewForPartitionDomainEscaped, NULL) < 0) ret = -1; + if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x1) < 0) + ret = -1; + setenv("VIR_CGROUP_MOCK_MODE", "allinone", 1); if (virtTestRun("New cgroup for self (allinone)", 1, testCgroupNewForSelfAllInOne, NULL) < 0) ret = -1; + if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x1) < 0) + ret = -1; + unsetenv("VIR_CGROUP_MOCK_MODE"); + + setenv("VIR_CGROUP_MOCK_MODE", "logind", 1); + if (virtTestRun("New cgroup for self (logind)", 1, testCgroupNewForSelfLogind, NULL) < 0) + ret = -1; + if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x0) < 0) + ret = -1; unsetenv("VIR_CGROUP_MOCK_MODE"); if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) -- 1.8.3.1

On Wed, Sep 11, 2013 at 1:18 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Debian systems may run the 'systemd-logind' daemon, which causes the /sys/fs/cgroup/systemd mount to be setup, but no other cgroup controllers are created. While the LXC driver considers cgroups to be mandatory, the QEMU driver is supposed to accept them as optional.
We detect whether they are present by looking in /proc/mounts for any mounts of type 'cgroups', but this is not sufficient. We need to skip any named mounts (as seen by a name=XXX string in the mount options), so that we only detect actual resource controllers.
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=721979
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 5 +++- tests/vircgroupmock.c | 40 ++++++++++++++++++++++++--- tests/vircgrouptest.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 4 deletions(-)
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index a260356..626bbc6 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -91,7 +91,10 @@ virCgroupAvailable(void) return false;
while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STREQ(entry.mnt_type, "cgroup")) { + /* We're looking for at least one 'cgroup' fs mount, + * which is *not* a named mount. */ + if (STREQ(entry.mnt_type, "cgroup") && + !strstr(entry.mnt_opts, "name=")) { ret = true; break; } diff --git a/tests/vircgroupmock.c b/tests/vircgroupmock.c index d83496c..adc1718 100644 --- a/tests/vircgroupmock.c +++ b/tests/vircgroupmock.c @@ -124,6 +124,27 @@ const char *proccgroupsallinone = "devices 6 1 1\n" "blkio 6 1 1\n";
+const char *procmountslogind = + "none /not/really/sys/fs/cgroup tmpfs rw,rootcontext=system_u:object_r:sysfs_t:s0,seclabel,relatime,size=4k,mode=755 0 0\n" + "systemd /not/really/sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,name=systemd 0 0\n"; + +const char *procselfcgroupslogind = + "1:name=systemd:/\n"; + +const char *proccgroupslogind = + "#subsys_name hierarchy num_cgroups enabled\n" + "cpuset 0 1 1\n" + "cpu 0 1 1\n" + "cpuacct 0 1 1\n" + "memory 0 1 0\n" + "devices 0 1 1\n" + "freezer 0 1 1\n" + "net_cls 0 1 1\n" + "blkio 0 1 1\n" + "perf_event 0 1 1\n"; + + + static int make_file(const char *path, const char *name, const char *value) @@ -400,18 +421,25 @@ static void init_sysfs(void) FILE *fopen(const char *path, const char *mode) { const char *mock; - bool allinone = false; + bool allinone = false, logind = false; init_syms();
mock = getenv("VIR_CGROUP_MOCK_MODE"); - if (mock && STREQ(mock, "allinone")) - allinone = true; + if (mock) { + if (STREQ(mock, "allinone")) + allinone = true; + else if (STREQ(mock, "logind")) + logind = true; + }
if (STREQ(path, "/proc/mounts")) { if (STREQ(mode, "r")) { if (allinone) return fmemopen((void *)procmountsallinone, strlen(procmountsallinone), mode); + else if (logind) + return fmemopen((void *)procmountslogind, + strlen(procmountslogind), mode); else return fmemopen((void *)procmounts, strlen(procmounts), mode); } else { @@ -424,6 +452,9 @@ FILE *fopen(const char *path, const char *mode) if (allinone) return fmemopen((void *)proccgroupsallinone, strlen(proccgroupsallinone), mode); + else if (logind) + return fmemopen((void *)proccgroupslogind, + strlen(proccgroupslogind), mode); else return fmemopen((void *)proccgroups, strlen(proccgroups), mode); } else { @@ -436,6 +467,9 @@ FILE *fopen(const char *path, const char *mode) if (allinone) return fmemopen((void *)procselfcgroupsallinone, strlen(procselfcgroupsallinone), mode); + else if (logind) + return fmemopen((void *)procselfcgroupslogind, + strlen(procselfcgroupslogind), mode); else return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode); } else { diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c index f12587c..570e061 100644 --- a/tests/vircgrouptest.c +++ b/tests/vircgrouptest.c @@ -109,6 +109,16 @@ const char *mountsAllInOne[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup", [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; +const char *mountsLogind[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd", +};
const char *links[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu", @@ -132,6 +142,17 @@ const char *linksAllInOne[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, };
+const char *linksLogind[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, +}; +
static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED) { @@ -466,6 +487,48 @@ cleanup: }
+static int testCgroupNewForSelfLogind(const void *args ATTRIBUTE_UNUSED) +{ + virCgroupPtr cgroup = NULL; + int ret = -1; + const char *placement[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = NULL, + [VIR_CGROUP_CONTROLLER_CPUACCT] = NULL, + [VIR_CGROUP_CONTROLLER_CPUSET] = NULL, + [VIR_CGROUP_CONTROLLER_MEMORY] = NULL, + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, + [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/", + }; + + if (virCgroupNewSelf(&cgroup) < 0) { + fprintf(stderr, "Cannot create cgroup for self\n"); + goto cleanup; + } + + ret = validateCgroup(cgroup, "", mountsLogind, linksLogind, placement); + +cleanup: + virCgroupFree(&cgroup); + return ret; +} + + +static int testCgroupAvailable(const void *args) +{ + bool got = virCgroupAvailable(); + bool want = args == (void*)0x1; + + if (got != want) { + fprintf(stderr, "Expected cgroup %savailable, but state was wrong\n",
Missing a space between the %s and available.
+ want ? "" : "not "); + return -1; + } + + return 0; +} +
# define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX"
@@ -505,9 +568,21 @@ mymain(void) if (virtTestRun("New cgroup for domain partition escaped", 1, testCgroupNewForPartitionDomainEscaped, NULL) < 0) ret = -1;
+ if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x1) < 0) + ret = -1; + setenv("VIR_CGROUP_MOCK_MODE", "allinone", 1); if (virtTestRun("New cgroup for self (allinone)", 1, testCgroupNewForSelfAllInOne, NULL) < 0) ret = -1; + if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x1) < 0) + ret = -1; + unsetenv("VIR_CGROUP_MOCK_MODE"); + + setenv("VIR_CGROUP_MOCK_MODE", "logind", 1); + if (virtTestRun("New cgroup for self (logind)", 1, testCgroupNewForSelfLogind, NULL) < 0) + ret = -1; + if (virtTestRun("Cgroup available", 1, testCgroupAvailable, (void*)0x0) < 0) + ret = -1; unsetenv("VIR_CGROUP_MOCK_MODE");
if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) -- 1.8.3.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
ACK with the fix in the white space. -- Doug Goldstein

On 09/11/2013 12:54 PM, Doug Goldstein wrote:
+ if (got != want) { + fprintf(stderr, "Expected cgroup %savailable, but state was wrong\n",
Missing a space between the %s and available.
+ want ? "" : "not ");
Intentional; it gives: 'Expected cgroup available' or 'Expected cgroup not available', depending on the expected outcome of the test.
ACK with the fix in the white space.
Actually, looked fine as-is. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (3)
-
Daniel P. Berrange
-
Doug Goldstein
-
Eric Blake