There are 3 reason we need to rework the cgroupfs
mounting in container.
1, Yin Olivia reported a "failed to mount cgroup"
problem, now we given that the name of cgroup mount point
is same with the subsystem type, Or libvirt_lxc
will fail to start.
2, The cgroup configuration is leaked to the container,
even user can change host's cgroup configuration in
container.
3, After we enable userns, the cgroupfs is unable to be
mounted in uninit-userns.
This patch tries to resolve these 3 problem,
uses mount --bind to set cgroupfs for container.
It means the directory /sys/fs/cgroup/memory/libvirt/lxc/domain
of host will be binded to the directory /sys/fs/cgroup/memory of
container.
Signed-off-by: Gao feng <gaofeng(a)cn.fujitsu.com>
---
src/lxc/lxc_container.c | 107 ++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 94 insertions(+), 13 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 4d09791..113459b 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1834,13 +1834,69 @@ cleanup:
}
+static int lxcContainerMoveCGroups(struct lxcContainerCGroup *mounts,
+ size_t nmounts, const char *path)
+{
+ size_t i;
+ char *dst = NULL;
+ int ret = -1;
+
+ for (i = 0; i < nmounts; i++) {
+ if (mounts[i].linkDest)
+ continue;
+
+ if (virAsprintf(&dst, "/%s/%s", path, mounts[i].dir) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (virFileMakePath(dst) < 0) {
+ virReportSystemError(errno,
+ _("Unable to create directory %s"),
+ dst);
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Move cgroupfs from '%s' to '%s'",
mounts[i].dir, dst);
+
+ if (mount(mounts[i].dir, dst, NULL, MS_MOVE, NULL) < 0) {
+ virReportSystemError(errno,
+ _("Unable to move cgroup from %s to %s"),
+ mounts[i].dir, dst);
+ goto cleanup;
+ }
+
+ VIR_FREE(dst);
+ }
+
+ ret = 0;
+cleanup:
+ VIR_FREE(dst);
+ return ret;
+}
+
+
+static int lxcContainerUmountCGroups(const char *prefix)
+{
+ if (lxcContainerUnmountSubtree(prefix, false) < 0)
+ return -1;
+
+ virCgroupRemoveRecursively((char *)prefix);
+ return 0;
+}
+
+
static int lxcContainerMountCGroups(struct lxcContainerCGroup *mounts,
size_t nmounts,
+ const char *prefix,
+ const char *domain,
const char *root,
char *sec_mount_options)
{
size_t i;
char *opts = NULL;
+ char *src = NULL;
+ int ret = -1;
VIR_DEBUG("Mounting cgroups at '%s'", root);
@@ -1854,17 +1910,15 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup
*mounts,
if (virAsprintf(&opts,
"mode=755,size=65536%s", sec_mount_options) < 0) {
virReportOOMError();
- return -1;
+ goto cleanup;
}
if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC,
opts) < 0) {
- VIR_FREE(opts);
virReportSystemError(errno,
_("Failed to mount %s on %s type %s"),
"tmpfs", root, "tmpfs");
- return -1;
+ goto cleanup;
}
- VIR_FREE(opts);
for (i = 0 ; i < nmounts ; i++) {
if (mounts[i].linkDest) {
@@ -1877,6 +1931,10 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup
*mounts,
return -1;
}
} else {
+ if (virAsprintf(&src, "%s/%s/libvirt/lxc/%s/",
+ prefix, mounts[i].dir, domain) < 0)
+ goto cleanup;
+
VIR_DEBUG("Create mount point '%s'", mounts[i].dir);
if (virFileMakePath(mounts[i].dir) < 0) {
virReportSystemError(errno,
@@ -1885,17 +1943,28 @@ static int lxcContainerMountCGroups(struct lxcContainerCGroup
*mounts,
return -1;
}
- if (mount("cgroup", mounts[i].dir, "cgroup",
- 0, mounts[i].dir + strlen(root) + 1) < 0) {
- virReportSystemError(errno,
- _("Failed to mount cgroup on
'%s'"),
- mounts[i].dir);
- return -1;
+ if (mount(src, mounts[i].dir, NULL, MS_BIND, NULL) < 0) {
+ VIR_FREE(src);
+ if (virAsprintf(&src, "%s/%s/", prefix, mounts[i].dir) <
0)
+ goto cleanup;
+
+ if (mount(src, mounts[i].dir, NULL, MS_BIND, NULL) < 0) {
+ virReportSystemError(errno,
+ _("Failed to bind cgroup '%s' on
'%s'"),
+ src, mounts[i].dir);
+ goto cleanup;
+ }
}
+
+ VIR_FREE(src);
}
}
+ ret = 0;
- return 0;
+cleanup:
+ VIR_FREE(opts);
+ VIR_FREE(src);
+ return ret;
}
@@ -1956,7 +2025,7 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
/* Now we can re-mount the cgroups controllers in the
* same configuration as before */
- if (lxcContainerMountCGroups(mounts, nmounts,
+ if (lxcContainerMountCGroups(mounts, nmounts, "/.oldroot/",
vmDef->name,
cgroupRoot, sec_mount_options) < 0)
goto cleanup;
@@ -2039,6 +2108,13 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef,
if (lxcContainerIdentifyCGroups(&mounts, &nmounts, &cgroupRoot) < 0)
goto cleanup;
+ /* Move cgroupfs to /.oldroot/, otherwise cgroupfs will be
+ * umounted. when userns is enabled, we can't mount cgroupfs,
+ * we need to keep the cgroupfs being mounted and then bind the
+ * old cgroupfs to the new directory of cgroupfs */
+ if (lxcContainerMoveCGroups(mounts, nmounts, "/.oldcgroup") < 0)
+ goto cleanup;
+
#if WITH_SELINUX
/* Some versions of Linux kernel don't let you overmount
* the selinux filesystem, so make sure we kill it first
@@ -2064,10 +2140,15 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef,
/* Now we can re-mount the cgroups controllers in the
* same configuration as before */
- if (lxcContainerMountCGroups(mounts, nmounts,
+ if (lxcContainerMountCGroups(mounts, nmounts, "/.oldcgroup",
vmDef->name,
cgroupRoot, sec_mount_options) < 0)
goto cleanup;
+ /* After mount cgroupfs, we should umount and remove the
+ * /.oldroot/ */
+ if (lxcContainerUmountCGroups("/.oldcgroup") < 0)
+ goto cleanup;
+
VIR_DEBUG("Mounting completed");
ret = 0;
--
1.7.11.7