[libvirt] [PATCH 1/2] cgroup: export virCgroupRemoveRecursively

We will use virCgroupRemoveRecursively to remove cgroup directories in the coming patch. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/vircgroup.c | 4 ++-- src/util/vircgroup.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index dc01bfa..8cc50c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1118,6 +1118,7 @@ virCgroupKillRecursive; virCgroupMounted; virCgroupMoveTask; virCgroupPathOfController; +virCgroupRemoveRecursively; virCgroupRemove; virCgroupSetBlkioDeviceWeight; virCgroupSetBlkioWeight; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 532e704..6998f13 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -686,7 +686,7 @@ cleanup: #endif #if defined _DIRENT_HAVE_D_TYPE -static int virCgroupRemoveRecursively(char *grppath) +int virCgroupRemoveRecursively(char *grppath) { DIR *grpdir; struct dirent *ent; @@ -735,7 +735,7 @@ static int virCgroupRemoveRecursively(char *grppath) return rc; } #else -static int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED) +int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED) { /* Claim no support */ return -ENXIO; diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 2ed6ff9..ea42fa2 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -157,6 +157,7 @@ int virCgroupGetCpusetMems(virCgroupPtr group, char **mems); int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus); +int virCgroupRemoveRecursively(char *grppath); int virCgroupRemove(virCgroupPtr group); void virCgroupFree(virCgroupPtr *group); -- 1.7.11.7

There are 3 reason we need to rework the cgroupfs mounting in container. 1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start. 2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container. 3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns. This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container. It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_container.c | 107 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 13 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 4d09791..113459b 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -1834,13 +1834,69 @@ cleanup: } +static int lxcContainerMoveCGroups(struct lxcContainerCGroup *mounts, + size_t nmounts, const char *path) +{ + size_t i; + char *dst = NULL; + int ret = -1; + + for (i = 0; i < nmounts; i++) { + if (mounts[i].linkDest) + continue; + + if (virAsprintf(&dst, "/%s/%s", path, mounts[i].dir) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (virFileMakePath(dst) < 0) { + virReportSystemError(errno, + _("Unable to create directory %s"), + dst); + goto cleanup; + } + + VIR_DEBUG("Move cgroupfs from '%s' to '%s'", mounts[i].dir, dst); + + if (mount(mounts[i].dir, dst, NULL, MS_MOVE, NULL) < 0) { + virReportSystemError(errno, + _("Unable to move cgroup from %s to %s"), + mounts[i].dir, dst); + goto cleanup; + } + + VIR_FREE(dst); + } + + ret = 0; +cleanup: + VIR_FREE(dst); + return ret; +} + + +static int lxcContainerUmountCGroups(const char *prefix) +{ + if (lxcContainerUnmountSubtree(prefix, false) < 0) + return -1; + + virCgroupRemoveRecursively((char *)prefix); + return 0; +} + + static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts, size_t nmounts, + const char *prefix, + const char *domain, const char *root, char *sec_mount_options) { size_t i; char *opts = NULL; + char *src = NULL; + int ret = -1; VIR_DEBUG("Mounting cgroups at '%s'", root); @@ -1854,17 +1910,15 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts, if (virAsprintf(&opts, "mode=755,size=65536%s", sec_mount_options) < 0) { virReportOOMError(); - return -1; + goto cleanup; } if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC, opts) < 0) { - VIR_FREE(opts); virReportSystemError(errno, _("Failed to mount %s on %s type %s"), "tmpfs", root, "tmpfs"); - return -1; + goto cleanup; } - VIR_FREE(opts); for (i = 0 ; i < nmounts ; i++) { if (mounts[i].linkDest) { @@ -1877,6 +1931,10 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts, return -1; } } else { + if (virAsprintf(&src, "%s/%s/libvirt/lxc/%s/", + prefix, mounts[i].dir, domain) < 0) + goto cleanup; + VIR_DEBUG("Create mount point '%s'", mounts[i].dir); if (virFileMakePath(mounts[i].dir) < 0) { virReportSystemError(errno, @@ -1885,17 +1943,28 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts, return -1; } - if (mount("cgroup", mounts[i].dir, "cgroup", - 0, mounts[i].dir + strlen(root) + 1) < 0) { - virReportSystemError(errno, - _("Failed to mount cgroup on '%s'"), - mounts[i].dir); - return -1; + if (mount(src, mounts[i].dir, NULL, MS_BIND, NULL) < 0) { + VIR_FREE(src); + if (virAsprintf(&src, "%s/%s/", prefix, mounts[i].dir) < 0) + goto cleanup; + + if (mount(src, mounts[i].dir, NULL, MS_BIND, NULL) < 0) { + virReportSystemError(errno, + _("Failed to bind cgroup '%s' on '%s'"), + src, mounts[i].dir); + goto cleanup; + } } + + VIR_FREE(src); } } + ret = 0; - return 0; +cleanup: + VIR_FREE(opts); + VIR_FREE(src); + return ret; } @@ -1956,7 +2025,7 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, /* Now we can re-mount the cgroups controllers in the * same configuration as before */ - if (lxcContainerMountCGroups(mounts, nmounts, + if (lxcContainerMountCGroups(mounts, nmounts, "/.oldroot/", vmDef->name, cgroupRoot, sec_mount_options) < 0) goto cleanup; @@ -2039,6 +2108,13 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef, if (lxcContainerIdentifyCGroups(&mounts, &nmounts, &cgroupRoot) < 0) goto cleanup; + /* Move cgroupfs to /.oldroot/, otherwise cgroupfs will be + * umounted. when userns is enabled, we can't mount cgroupfs, + * we need to keep the cgroupfs being mounted and then bind the + * old cgroupfs to the new directory of cgroupfs */ + if (lxcContainerMoveCGroups(mounts, nmounts, "/.oldcgroup") < 0) + goto cleanup; + #if WITH_SELINUX /* Some versions of Linux kernel don't let you overmount * the selinux filesystem, so make sure we kill it first @@ -2064,10 +2140,15 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef, /* Now we can re-mount the cgroups controllers in the * same configuration as before */ - if (lxcContainerMountCGroups(mounts, nmounts, + if (lxcContainerMountCGroups(mounts, nmounts, "/.oldcgroup", vmDef->name, cgroupRoot, sec_mount_options) < 0) goto cleanup; + /* After mount cgroupfs, we should umount and remove the + * /.oldroot/ */ + if (lxcContainerUmountCGroups("/.oldcgroup") < 0) + goto cleanup; + VIR_DEBUG("Mounting completed"); ret = 0; -- 1.7.11.7

On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
Hi Daniel, what's your idea about this patch? Thanks

On 2013/03/27 13:26, Gao feng wrote:
On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
Hi Daniel,
what's your idea about this patch?
Ping Again

On Fri, Apr 05, 2013 at 10:16:43AM +0800, Gao feng wrote:
On 2013/03/27 13:26, Gao feng wrote:
On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
what's your idea about this patch?
Ping Again
The pach has the right idea, but it clashes with the refactoring I've done for cgroups and LXC. I'll update your patch to apply ontop of this series: https://www.redhat.com/archives/libvir-list/2013-April/msg00352.html and copy you on the mail when i post it. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Hi Feng, I need run the below steps with libvirt-1.0.3. $ mkdir /cgroup $ mount -t tmpfs cgroup /cgroup $ mkdir /cgroup/{freezer,devices,memory,cpuacct,cpuset} $ mount -t cgroup -ofreezer cgroup /cgroup/freezer $ mount -t cgroup -odevices cgroup /cgroup/devices $ mount -t cgroup -omemory cgroup /cgroup/memory $ mount -t cgroup -ocpuacct cgroup /cgroup/cpuacct $ mount -t cgroup -ocpuset cgroup /cgroup/cpuset What's the new to mount cgroup with libvirt-1.0.4? Best Regards, Olivia
-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Friday, April 05, 2013 7:29 PM To: Gao feng Cc: libvir-list@redhat.com; Yin Olivia-R63875 Subject: Re: [PATCH 2/2] LXC: rework mounting cgroupfs in container
On Fri, Apr 05, 2013 at 10:16:43AM +0800, Gao feng wrote:
On 2013/03/27 13:26, Gao feng wrote:
On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
what's your idea about this patch?
Ping Again
The pach has the right idea, but it clashes with the refactoring I've done for cgroups and LXC. I'll update your patch to apply ontop of this series:
https://www.redhat.com/archives/libvir-list/2013-April/msg00352.html
and copy you on the mail when i post it.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt- manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk- vnc :|

Hi Yin, On 2013/04/08 11:54, Yin Olivia-R63875 wrote:
Hi Feng,
I need run the below steps with libvirt-1.0.3. $ mkdir /cgroup $ mount -t tmpfs cgroup /cgroup $ mkdir /cgroup/{freezer,devices,memory,cpuacct,cpuset} $ mount -t cgroup -ofreezer cgroup /cgroup/freezer $ mount -t cgroup -odevices cgroup /cgroup/devices $ mount -t cgroup -omemory cgroup /cgroup/memory $ mount -t cgroup -ocpuacct cgroup /cgroup/cpuacct $ mount -t cgroup -ocpuset cgroup /cgroup/cpuset
What's the new to mount cgroup with libvirt-1.0.4?
After this patch being applied,your can almost mount cgroup any way you like :) Thanks, Gao
Best Regards, Olivia
-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Friday, April 05, 2013 7:29 PM To: Gao feng Cc: libvir-list@redhat.com; Yin Olivia-R63875 Subject: Re: [PATCH 2/2] LXC: rework mounting cgroupfs in container
On Fri, Apr 05, 2013 at 10:16:43AM +0800, Gao feng wrote:
On 2013/03/27 13:26, Gao feng wrote:
On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
what's your idea about this patch?
Ping Again
The pach has the right idea, but it clashes with the refactoring I've done for cgroups and LXC. I'll update your patch to apply ontop of this series:
https://www.redhat.com/archives/libvir-list/2013-April/msg00352.html
and copy you on the mail when i post it.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt- manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk- vnc :|

On 2013/04/05 19:29, Daniel P. Berrange wrote:
On Fri, Apr 05, 2013 at 10:16:43AM +0800, Gao feng wrote:
On 2013/03/27 13:26, Gao feng wrote:
On 2013/03/20 16:14, Gao feng wrote:
There are 3 reason we need to rework the cgroupfs mounting in container.
1, Yin Olivia reported a "failed to mount cgroup" problem, now we given that the name of cgroup mount point is same with the subsystem type, Or libvirt_lxc will fail to start.
2, The cgroup configuration is leaked to the container, even user can change host's cgroup configuration in container.
3, After we enable userns, the cgroupfs is unable to be mounted in uninit-userns.
This patch tries to resolve these 3 problem, uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain of host will be binded to the directory /sys/fs/cgroup/memory of container.
what's your idea about this patch?
Ping Again
The pach has the right idea, but it clashes with the refactoring I've done for cgroups and LXC. I'll update your patch to apply ontop of this series:
https://www.redhat.com/archives/libvir-list/2013-April/msg00352.html
and copy you on the mail when i post it.
Ok,I will wait for your upgrade, Thanks for your work. Gao

On 2013年03月20日 16:14, Gao feng wrote:
We will use virCgroupRemoveRecursively to remove cgroup directories in the coming patch.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/vircgroup.c | 4 ++-- src/util/vircgroup.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index dc01bfa..8cc50c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1118,6 +1118,7 @@ virCgroupKillRecursive; virCgroupMounted; virCgroupMoveTask; virCgroupPathOfController; +virCgroupRemoveRecursively; virCgroupRemove;
Not alphabetically sorted.

On 03/20/2013 07:59 AM, Osier Yang wrote:
On 2013年03月20日 16:14, Gao feng wrote:
We will use virCgroupRemoveRecursively to remove cgroup directories in the coming patch.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/vircgroup.c | 4 ++-- src/util/vircgroup.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index dc01bfa..8cc50c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1118,6 +1118,7 @@ virCgroupKillRecursive; virCgroupMounted; virCgroupMoveTask; virCgroupPathOfController; +virCgroupRemoveRecursively; virCgroupRemove;
Not alphabetically sorted.
But beyond that, it looks okay, so: ACK and pushed, with the sorting fixed. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 2013/03/21 04:19, Eric Blake wrote:
On 03/20/2013 07:59 AM, Osier Yang wrote:
On 2013年03月20日 16:14, Gao feng wrote:
We will use virCgroupRemoveRecursively to remove cgroup directories in the coming patch.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/vircgroup.c | 4 ++-- src/util/vircgroup.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index dc01bfa..8cc50c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1118,6 +1118,7 @@ virCgroupKillRecursive; virCgroupMounted; virCgroupMoveTask; virCgroupPathOfController; +virCgroupRemoveRecursively; virCgroupRemove;
Not alphabetically sorted.
But beyond that, it looks okay, so:
ACK and pushed, with the sorting fixed.
Oh,Appologize for my carelessness... Thanks you guys!
participants (5)
-
Daniel P. Berrange
-
Eric Blake
-
Gao feng
-
Osier Yang
-
Yin Olivia-R63875