[libvirt] [PATCH 0/5] Fix detecting cgroups at libvirtd restart with QEMU

From: "Daniel P. Berrange" <berrange@redhat.com> The recent refactoring of cgroups broke the ability to detect cgroups for running guests in the QEMU driver during libvirtd startup. This was due to it not considering the existance of the 'emulator' child group, as well as not honouring the 'cgroups_controllers' setting when it was present Daniel P. Berrange (5): Introduce a more convenient virCgroupNewDetectMachine Make virCgroupIsValidMachine static Fix detection of 'emulator' cgroup Add 'controllers' arg to virCgroupNewDetect Skip detecting placement if controller is disabled src/libvirt_private.syms | 2 +- src/lxc/lxc_process.c | 20 +++++++--------- src/qemu/qemu_cgroup.c | 17 ++++---------- src/util/vircgroup.c | 60 +++++++++++++++++++++++++++++++++++++++++------- src/util/vircgroup.h | 12 ++++++---- 5 files changed, 73 insertions(+), 38 deletions(-) -- 1.8.1.4

From: "Daniel P. Berrange" <berrange@redhat.com> Instead of requiring drivers to use a combination of calls to virCgroupNewDetect and virCgroupIsValidMachine, combine the two into virCgroupNewDetectMachine Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_process.c | 20 ++++++++------------ src/qemu/qemu_cgroup.c | 16 ++++------------ src/util/vircgroup.c | 22 ++++++++++++++++++++++ src/util/vircgroup.h | 5 +++++ 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d5ec146..b076e60 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1192,6 +1192,7 @@ virCgroupKillPainfully; virCgroupKillRecursive; virCgroupMoveTask; virCgroupNewDetect; +virCgroupNewDetectMachine; virCgroupNewDomainPartition; virCgroupNewEmulator; virCgroupNewIgnoreError; diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 06ead9f..e632e13 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1189,16 +1189,14 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } - if (virCgroupNewDetect(vm->pid, &priv->cgroup) < 0) + if (virCgroupNewDetectMachine(vm->def->name, "lxc", + vm->pid, &priv->cgroup) < 0) goto error; - if (!virCgroupIsValidMachineGroup(priv->cgroup, - vm->def->name, - "lxc")) { + if (!priv->cgroup) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cgroup name is not valid for machine %s"), + _("No valid cgroup for machine %s"), vm->def->name); - virCgroupFree(&priv->cgroup); goto error; } @@ -1399,16 +1397,14 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm, if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) goto error; - if (virCgroupNewDetect(vm->pid, &priv->cgroup) < 0) + if (virCgroupNewDetectMachine(vm->def->name, "lxc", + vm->pid, &priv->cgroup) < 0) goto error; - if (!virCgroupIsValidMachineGroup(priv->cgroup, - vm->def->name, - "lxc")) { + if (!priv->cgroup) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cgroup name is not valid for machine %s"), + _("No valid cgroup for machine %s"), vm->def->name); - virCgroupFree(&priv->cgroup); goto error; } diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index bca8630..07e901c 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -704,19 +704,11 @@ qemuConnectCgroup(virQEMUDriverPtr driver, virCgroupFree(&priv->cgroup); - if (virCgroupNewDetect(vm->pid, &priv->cgroup) < 0) { - if (virCgroupNewIgnoreError()) - goto done; + if (virCgroupNewDetectMachine(vm->def->name, + "qemu", + vm->pid, + &priv->cgroup) < 0) goto cleanup; - } - - if (!virCgroupIsValidMachineGroup(priv->cgroup, - vm->def->name, - "qemu")) { - VIR_DEBUG("Cgroup name is not valid for machine"); - virCgroupFree(&priv->cgroup); - goto done; - } done: ret = 0; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 87325c0..fe6c314 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1575,6 +1575,28 @@ int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, } #endif +/* + * Returns 0 on success, -1 on fatal error, -2 on no valid cgroup + */ +int virCgroupNewDetectMachine(const char *name, + const char *drivername, + pid_t pid, + virCgroupPtr *group) +{ + if (virCgroupNewDetect(pid, group) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + return -1; + } + + if (!virCgroupIsValidMachineGroup(*group, name, drivername)) { + virCgroupFree(group); + return 0; + } + + return 0; +} + int virCgroupNewMachine(const char *name, const char *drivername, bool privileged ATTRIBUTE_UNUSED, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index e47367c..4f72aa8 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -83,6 +83,11 @@ int virCgroupNewEmulator(virCgroupPtr domain, int virCgroupNewDetect(pid_t pid, virCgroupPtr *group); +int virCgroupNewDetectMachine(const char *name, + const char *drivername, + pid_t pid, + virCgroupPtr *group); + int virCgroupNewMachine(const char *name, const char *drivername, bool privileged, -- 1.8.1.4

On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Instead of requiring drivers to use a combination of calls to virCgroupNewDetect and virCgroupIsValidMachine, combine the two into virCgroupNewDetectMachine
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_process.c | 20 ++++++++------------ src/qemu/qemu_cgroup.c | 16 ++++------------ src/util/vircgroup.c | 22 ++++++++++++++++++++++ src/util/vircgroup.h | 5 +++++ 5 files changed, 40 insertions(+), 24 deletions(-)
@@ -1575,6 +1575,28 @@ int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, } #endif
+/* + * Returns 0 on success, -1 on fatal error, -2 on no valid cgroup + */ +int virCgroupNewDetectMachine(const char *name, + const char *drivername, + pid_t pid, + virCgroupPtr *group) +{ + if (virCgroupNewDetect(pid, group) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + return -1; + } + + if (!virCgroupIsValidMachineGroup(*group, name, drivername)) { + virCgroupFree(group); + return 0;
Huh? This says you are returning success. Also, none of the lxc or qemu callers checked for a -2 return; do you really need the differentiated return type? -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Thu, Jul 25, 2013 at 11:37:12AM -0600, Eric Blake wrote:
On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Instead of requiring drivers to use a combination of calls to virCgroupNewDetect and virCgroupIsValidMachine, combine the two into virCgroupNewDetectMachine
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_process.c | 20 ++++++++------------ src/qemu/qemu_cgroup.c | 16 ++++------------ src/util/vircgroup.c | 22 ++++++++++++++++++++++ src/util/vircgroup.h | 5 +++++ 5 files changed, 40 insertions(+), 24 deletions(-)
@@ -1575,6 +1575,28 @@ int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, } #endif
+/* + * Returns 0 on success, -1 on fatal error, -2 on no valid cgroup + */ +int virCgroupNewDetectMachine(const char *name, + const char *drivername, + pid_t pid, + virCgroupPtr *group) +{ + if (virCgroupNewDetect(pid, group) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + return -1; + } + + if (!virCgroupIsValidMachineGroup(*group, name, drivername)) { + virCgroupFree(group); + return 0;
Huh? This says you are returning success. Also, none of the lxc or qemu callers checked for a -2 return; do you really need the differentiated return type?
Opps the comment is wrong. I originally had it returning -2, but I removed that and just useed '0' and let the caller check if 'group != NULL' instead. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 07/25/2013 12:11 PM, Daniel P. Berrange wrote:
+/* + * Returns 0 on success, -1 on fatal error, -2 on no valid cgroup + */ +int virCgroupNewDetectMachine(const char *name,
+ + if (!virCgroupIsValidMachineGroup(*group, name, drivername)) { + virCgroupFree(group); + return 0;
Huh? This says you are returning success. Also, none of the lxc or qemu callers checked for a -2 return; do you really need the differentiated return type?
Opps the comment is wrong. I originally had it returning -2, but I removed that and just useed '0' and let the caller check if 'group != NULL' instead.
Ah, then ACK with a fixed doc comment that describes the real convention. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> The virCgroupIsValidMachine does not need to be called from outside the cgroups file now, so make it static. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 - src/util/vircgroup.c | 7 ++++--- src/util/vircgroup.h | 5 ----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index b076e60..d9615ea 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1186,7 +1186,6 @@ virCgroupGetMemSwapHardLimit; virCgroupGetMemSwapUsage; virCgroupHasController; virCgroupIsolateMount; -virCgroupIsValidMachineGroup; virCgroupKill; virCgroupKillPainfully; virCgroupKillRecursive; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index fe6c314..308f1a1 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -95,9 +95,10 @@ bool virCgroupAvailable(void) return ret; } -bool virCgroupIsValidMachineGroup(virCgroupPtr group, - const char *name, - const char *drivername) +static bool +virCgroupIsValidMachineGroup(virCgroupPtr group, + const char *name, + const char *drivername) { size_t i; bool valid = false; diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 4f72aa8..d6222d7 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -48,11 +48,6 @@ VIR_ENUM_DECL(virCgroupController); bool virCgroupAvailable(void); -bool virCgroupIsValidMachineGroup(virCgroupPtr group, - const char *machinename, - const char *drivername); - - int virCgroupNewPartition(const char *path, bool create, int controllers, -- 1.8.1.4

On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
The virCgroupIsValidMachine does not need to be called from outside the cgroups file now, so make it static.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 - src/util/vircgroup.c | 7 ++++--- src/util/vircgroup.h | 5 ----- 3 files changed, 4 insertions(+), 9 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> When a VM has an 'emulator' child cgroup present, we must strip off that suffix when detecting the cgroup for a machine Rename the virCgroupIsValidMachineGroup method to virCgroupValidateMachineGroup to make a bit clearer that this isn't simply a boolean check, it will make changes to the object. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 308f1a1..9065675 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -96,9 +96,10 @@ bool virCgroupAvailable(void) } static bool -virCgroupIsValidMachineGroup(virCgroupPtr group, - const char *name, - const char *drivername) +virCgroupValidateMachineGroup(virCgroupPtr group, + const char *name, + const char *drivername, + bool stripEmulatorSuffix) { size_t i; bool valid = false; @@ -120,12 +121,26 @@ virCgroupIsValidMachineGroup(virCgroupPtr group, tmp = strrchr(group->controllers[i].placement, '/'); if (!tmp) goto cleanup; + + if (stripEmulatorSuffix && + (i == VIR_CGROUP_CONTROLLER_CPU || + i == VIR_CGROUP_CONTROLLER_CPUACCT || + i == VIR_CGROUP_CONTROLLER_CPUSET)) { + if (STREQ(tmp, "/emulator")) + *tmp = '\0'; + tmp = strrchr(group->controllers[i].placement, '/'); + if (!tmp) + goto cleanup; + } + tmp++; if (STRNEQ(tmp, name) && - STRNEQ(tmp, partname)) + STRNEQ(tmp, partname)) { + VIR_DEBUG("Name '%s' does not match '%s' or '%s'", + tmp, name, partname); goto cleanup; - + } } valid = true; @@ -1590,7 +1605,9 @@ int virCgroupNewDetectMachine(const char *name, return -1; } - if (!virCgroupIsValidMachineGroup(*group, name, drivername)) { + if (!virCgroupValidateMachineGroup(*group, name, drivername, true)) { + VIR_DEBUG("Failed to validate machine name for '%s' driver '%s'", + name, drivername); virCgroupFree(group); return 0; } -- 1.8.1.4

On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
When a VM has an 'emulator' child cgroup present, we must strip off that suffix when detecting the cgroup for a machine
Rename the virCgroupIsValidMachineGroup method to virCgroupValidateMachineGroup to make a bit clearer that this isn't simply a boolean check, it will make changes to the object.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> When detecting cgroups we must honour any controllers whitelist the driver may have. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/lxc/lxc_process.c | 4 ++-- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 9 ++++++--- src/util/vircgroup.h | 2 ++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index e632e13..1a5686f 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1190,7 +1190,7 @@ int virLXCProcessStart(virConnectPtr conn, } if (virCgroupNewDetectMachine(vm->def->name, "lxc", - vm->pid, &priv->cgroup) < 0) + vm->pid, -1, &priv->cgroup) < 0) goto error; if (!priv->cgroup) { @@ -1398,7 +1398,7 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm, goto error; if (virCgroupNewDetectMachine(vm->def->name, "lxc", - vm->pid, &priv->cgroup) < 0) + vm->pid, -1, &priv->cgroup) < 0) goto error; if (!priv->cgroup) { diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 07e901c..9f6b251 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -707,6 +707,7 @@ qemuConnectCgroup(virQEMUDriverPtr driver, if (virCgroupNewDetectMachine(vm->def->name, "qemu", vm->pid, + cfg->cgroupControllers, &priv->cgroup) < 0) goto cleanup; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 9065675..3818172 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1406,7 +1406,7 @@ int virCgroupNewPartition(const char *path ATTRIBUTE_UNUSED, */ int virCgroupNewSelf(virCgroupPtr *group) { - return virCgroupNewDetect(-1, group); + return virCgroupNewDetect(-1, -1, group); } @@ -1577,12 +1577,14 @@ int virCgroupNewEmulator(virCgroupPtr domain ATTRIBUTE_UNUSED, #if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R int virCgroupNewDetect(pid_t pid, + int controllers, virCgroupPtr *group) { - return virCgroupNew(pid, "", NULL, -1, group); + return virCgroupNew(pid, "", NULL, controllers, group); } #else int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, + int controllers ATTRIBUTE_UNUSED, virCgroupPtr *group ATTRIBUTE_UNUSED) { virReportSystemError(ENXIO, "%s", @@ -1597,9 +1599,10 @@ int virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED, int virCgroupNewDetectMachine(const char *name, const char *drivername, pid_t pid, + int controllers, virCgroupPtr *group) { - if (virCgroupNewDetect(pid, group) < 0) { + if (virCgroupNewDetect(pid, controllers, group) < 0) { if (virCgroupNewIgnoreError()) return 0; return -1; diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index d6222d7..3aaf081 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -76,11 +76,13 @@ int virCgroupNewEmulator(virCgroupPtr domain, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(3); int virCgroupNewDetect(pid_t pid, + int controllers, virCgroupPtr *group); int virCgroupNewDetectMachine(const char *name, const char *drivername, pid_t pid, + int controllers, virCgroupPtr *group); int virCgroupNewMachine(const char *name, -- 1.8.1.4

On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
When detecting cgroups we must honour any controllers whitelist the driver may have.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/lxc/lxc_process.c | 4 ++-- src/qemu/qemu_cgroup.c | 1 + src/util/vircgroup.c | 9 ++++++--- src/util/vircgroup.h | 2 ++ 4 files changed, 11 insertions(+), 5 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

From: "Daniel P. Berrange" <berrange@redhat.com> If the app has provided a whitelist of controllers to be used, we skip detecting its mount point. We still, however, fill in the placement info which later confuses the machine name validation code. Skip detecting placement if the controller mount point is not set Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 3818172..827d894 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -137,8 +137,8 @@ virCgroupValidateMachineGroup(virCgroupPtr group, if (STRNEQ(tmp, name) && STRNEQ(tmp, partname)) { - VIR_DEBUG("Name '%s' does not match '%s' or '%s'", - tmp, name, partname); + VIR_DEBUG("Name '%s' for controller '%s' does not match '%s' or '%s'", + tmp, virCgroupControllerTypeToString(i), name, partname); goto cleanup; } } @@ -426,7 +426,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group, * selfpath=="/libvirt.service" + path="" -> "/libvirt.service" * selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo" */ - if (typelen == len && STREQLEN(typestr, tmp, len)) { + if (typelen == len && STREQLEN(typestr, tmp, len) && + group->controllers[i].mountPoint != NULL) { if (virAsprintf(&group->controllers[i].placement, "%s%s%s", selfpath, (STREQ(selfpath, "/") || -- 1.8.1.4

On 07/25/2013 06:20 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
If the app has provided a whitelist of controllers to be used, we skip detecting its mount point. We still, however, fill in the placement info which later confuses the machine name validation code. Skip detecting placement if the controller mount point is not set
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/util/vircgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
ACK. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (2)
-
Daniel P. Berrange
-
Eric Blake