On 06/14/2013 02:02 AM, Richard Weinberger wrote:
Within a user namespace root can remount these filesysems at any
time rw.
Create these mappings only if we're not playing with user namespaces.
Without user namespace,the root user of container can remount all of the
filesystem too, since he is the root user of host.
The reason we can allow filesystem to be mounted as writable is that
with user namespace we can make sure the root user in container has no rights
to change some sysfs/sysctl configuration that we don't want him to change.
Signed-off-by: Richard Weinberger <richard(a)nod.at>
---
src/lxc/lxc_container.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 4f00420..a003ec8 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -682,8 +682,17 @@ err:
return ret;
}
+static int userns_supported(void)
+{
+ return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
+}
-static int lxcContainerMountBasicFS(void)
+static int userns_required(virDomainDefPtr def)
+{
+ return def->idmap.uidmap && def->idmap.gidmap;
+}
+
+static int lxcContainerMountBasicFS(virDomainDefPtr vmDef)
{
const struct {
const char *src;
@@ -691,6 +700,7 @@ static int lxcContainerMountBasicFS(void)
const char *type;
const char *opts;
int mflags;
+ bool paranoia;
} mnts[] = {
/* When we want to make a bind mount readonly, for unknown reasons,
* it is currently necessary to bind it once, and then remount the
@@ -698,14 +708,14 @@ static int lxcContainerMountBasicFS(void)
* mount point in the main OS becomes readonly too which is not what
* we want. Hence some things have two entries here.
*/
- { "proc", "/proc", "proc", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV },
- { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND },
- { "/proc/sys", "/proc/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
- { "sysfs", "/sys", "sysfs", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV },
- { "sysfs", "/sys", "sysfs", NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
+ { "proc", "/proc", "proc", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true },
+ { "/proc/sys", "/proc/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY, true },
+ { "sysfs", "/sys", "sysfs", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+ { "sysfs", "/sys", "sysfs", NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY, true },
#if WITH_SELINUX
- { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV },
- { SELINUX_MOUNT, SELINUX_MOUNT, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
+ { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+ { SELINUX_MOUNT, SELINUX_MOUNT, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, true
},
#endif
};
int i, rc = -1;
@@ -720,6 +730,10 @@ static int lxcContainerMountBasicFS(void)
srcpath = mnts[i].src;
+ /* Skip ro overlay mounts if we build a userns as root can remount it rw at any
time */
+ if (userns_required(vmDef) && mnts[i].paranoia)
+ continue;
+
/* Skip if mount doesn't exist in source */
if ((srcpath[0] == '/') &&
(access(srcpath, R_OK) < 0))
@@ -1780,7 +1794,7 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
goto cleanup;
/* Mounts the core /proc, /sys, etc filesystems */
- if (lxcContainerMountBasicFS() < 0)
+ if (lxcContainerMountBasicFS(vmDef) < 0)
goto cleanup;
/* Mounts /proc/meminfo etc sysinfo */
@@ -1896,16 +1910,6 @@ static int lxcContainerDropCapabilities(bool keepReboot
ATTRIBUTE_UNUSED)
return 0;
}
-static int userns_supported(void)
-{
- return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0;
-}
-
-static int userns_required(virDomainDefPtr def)
-{
- return def->idmap.uidmap && def->idmap.gidmap;
-}
-
/**
* lxcContainerChild:
* @data: pointer to container arguments