From: "Daniel P. Berrange" <berrange(a)redhat.com>
To allow the container to access /dev and /dev/pts when under
sVirt, set an explicit mount option. Also set a max size on
the /dev mount to prevent DOS on memory usage
* src/lxc/lxc_container.c: Set /dev mount context
* src/lxc/lxc_controller.c: Set /dev/pts mount context
---
src/lxc/lxc_container.c | 75 +++++++++++++++++++++++++++++++++++----------
src/lxc/lxc_controller.c | 43 +++++++++++++++++++++++---
2 files changed, 96 insertions(+), 22 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 4df0b55..3c8e0e3 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -36,6 +36,10 @@
#include <unistd.h>
#include <mntent.h>
+#if HAVE_SELINUX
+# include <selinux/selinux.h>
+#endif
+
/* Yes, we want linux private one, for _syscall2() macro */
#include <linux/unistd.h>
@@ -419,7 +423,6 @@ err:
static int lxcContainerMountBasicFS(const char *srcprefix, bool pivotRoot)
{
const struct {
- bool onlyPivotRoot;
bool needPrefix;
const char *src;
const char *dst;
@@ -433,16 +436,19 @@ static int lxcContainerMountBasicFS(const char *srcprefix, bool
pivotRoot)
* mount point in the main OS becomes readonly too which is not what
* we want. Hence some things have two entries here.
*/
- { true, false, "devfs", "/dev", "tmpfs",
"mode=755", MS_NOSUID },
- { false, false, "proc", "/proc", "proc", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV },
- { false, false, "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND
},
- { false, false, "/proc/sys", "/proc/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
- { false, true, "/sys", "/sys", NULL, NULL, MS_BIND },
- { false, true, "/sys", "/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
- { false, true, "/selinux", "/selinux", NULL, NULL, MS_BIND
},
- { false, true, "/selinux", "/selinux", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
+ { false, "proc", "/proc", "proc", NULL,
MS_NOSUID|MS_NOEXEC|MS_NODEV },
+ { false, "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND },
+ { false, "/proc/sys", "/proc/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
+ { true, "/sys", "/sys", NULL, NULL, MS_BIND },
+ { true, "/sys", "/sys", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
+ { true, "/selinux", "/selinux", NULL, NULL, MS_BIND },
+ { true, "/selinux", "/selinux", NULL, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY },
};
int i, rc = -1;
+ char *opts = NULL;
+#if HAVE_SELINUX
+ security_context_t con;
+#endif
VIR_DEBUG("Mounting basic filesystems %s pivotRoot=%d", NULLSTR(srcprefix),
pivotRoot);
@@ -450,10 +456,8 @@ static int lxcContainerMountBasicFS(const char *srcprefix, bool
pivotRoot)
char *src = NULL;
const char *srcpath = NULL;
- VIR_DEBUG("Consider %s onlyPivotRoot=%d",
- mnts[i].src, mnts[i].onlyPivotRoot);
- if (mnts[i].onlyPivotRoot && !pivotRoot)
- continue;
+ VIR_DEBUG("Process %s -> %s",
+ mnts[i].src, mnts[i].dst);
if (virFileMakePath(mnts[i].dst) < 0) {
virReportSystemError(errno,
@@ -474,8 +478,10 @@ static int lxcContainerMountBasicFS(const char *srcprefix, bool
pivotRoot)
/* Skip if mount doesn't exist in source */
if ((srcpath[0] == '/') &&
- (access(srcpath, R_OK) < 0))
+ (access(srcpath, R_OK) < 0)) {
+ VIR_FREE(src);
continue;
+ }
VIR_DEBUG("Mount %s on %s type=%s flags=%x, opts=%s",
srcpath, mnts[i].dst, mnts[i].type, mnts[i].mflags, mnts[i].opts);
@@ -489,15 +495,50 @@ static int lxcContainerMountBasicFS(const char *srcprefix, bool
pivotRoot)
VIR_FREE(src);
}
+ if (pivotRoot) {
+#if HAVE_SELINUX
+ if (getfilecon("/", &con) < 0 &&
+ errno != ENOTSUP) {
+ virReportSystemError(errno, "%s",
+ _("Failed to query file context on /"));
+ goto cleanup;
+ } else {
+#endif
+ /*
+ * tmpfs is limited to 64kb, since we only have device nodes in there
+ * and don't want to DOS the entire OS RAM usage
+ */
+ if (virAsprintf(&opts, "mode=755,size=65536%%%s%s%s",
+ con ? ",context=\"" : "",
+ con ? (const char *)con : "",
+ con ? "\"" : "") < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+#if HAVE_SELINUX
+ }
+#endif
+
+ VIR_DEBUG("Mount devfs on /dev type=tmpfs flags=%x, opts=%s",
+ MS_NOSUID, opts);
+ if (mount("devfs", "/dev", "tmpfs", MS_NOSUID,
opts) < 0) {
+ virReportSystemError(errno,
+ _("Failed to mount %s on %s type %s"),
+ "devfs", "/dev",
"tmpfs");
+ goto cleanup;
+ }
+ }
+
rc = 0;
cleanup:
VIR_DEBUG("rc=%d", rc);
+ VIR_FREE(opts);
return rc;
}
-static int lxcContainerMountDevFS(virDomainFSDefPtr root)
+static int lxcContainerMountFSDevPTS(virDomainFSDefPtr root)
{
char *devpts = NULL;
int rc = -1;
@@ -1069,8 +1110,8 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
if (lxcContainerMountBasicFS("/.oldroot", true) < 0)
return -1;
- /* Mounts /dev and /dev/pts */
- if (lxcContainerMountDevFS(root) < 0)
+ /* Mounts /dev/pts */
+ if (lxcContainerMountFSDevPTS(root) < 0)
return -1;
/* Populates device nodes in /dev/ */
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 6bc54b7..edd9dca 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -52,6 +52,9 @@
# define NUMA_VERSION1_COMPATIBILITY 1
# include <numa.h>
#endif
+#if HAVE_SELINUX
+# include <selinux/selinux.h>
+#endif
#include "virterror_internal.h"
#include "logging.h"
@@ -1339,6 +1342,10 @@ lxcControllerRun(virDomainDefPtr def,
* marked as shared
*/
if (root) {
+#if HAVE_SELINUX
+ security_context_t con;
+#endif
+ char *opts;
VIR_DEBUG("Setting up private /dev/pts");
if (!virFileExists(root->src)) {
@@ -1373,16 +1380,42 @@ lxcControllerRun(virDomainDefPtr def,
goto cleanup;
}
- /* XXX should we support gid=X for X!=5 for distros which use
- * a different gid for tty? */
- VIR_DEBUG("Mounting 'devpts' on %s", devpts);
- if (mount("devpts", devpts, "devpts", 0,
- "newinstance,ptmxmode=0666,mode=0620,gid=5") < 0) {
+#if HAVE_SELINUX
+ if (getfilecon(root->src, &con) < 0 &&
+ errno != ENOTSUP) {
+ virReportSystemError(errno,
+ _("Failed to query file context on %s"),
+ root->src);
+ goto cleanup;
+ } else {
+#endif
+ /*
+ * tmpfs is limited to 64kb, since we only have device nodes in there
+ * and don't want to DOS the entire OS RAM usage
+ */
+ /* XXX should we support gid=X for X!=5 for distros which use
+ * a different gid for tty? */
+ if (virAsprintf(&opts,
"newinstance,ptmxmode=0666,mode=0620,gid=5%s%s%s",
+ con ? ",context=\"" : "",
+ con ? (const char *)con : "",
+ con ? "\"" : "") < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+#if HAVE_SELINUX
+ }
+#endif
+
+ VIR_DEBUG("Mount devpts on %s type=tmpfs flags=%x, opts=%s",
+ devpts, MS_NOSUID, opts);
+ if (mount("devpts", devpts, "devpts", MS_NOSUID, opts) <
0) {
+ VIR_FREE(opts);
virReportSystemError(errno,
_("Failed to mount devpts on %s"),
devpts);
goto cleanup;
}
+ VIR_FREE(opts);
if (access(devptmx, R_OK) < 0) {
VIR_WARN("Kernel does not support private devpts, using shared
devpts");
--
1.7.7.5