On Fri, Apr 17, 2009 at 09:39:19AM -0500, Serge E. Hallyn wrote:
Quoting Daniel P. Berrange (berrange(a)redhat.com):
> > Calling unshare(CLONE_NEWNS) will not prevent the host OS from
> > seeing the new /dev/pts if / was MS_SHARED. That isn't taken
> > care of anywhere else for this process's namespace, is it?
>
> Yeah, so this is the place where I think we must still have a difference
> in our host setups. I'm testing this patch on a Fedora 11 host, and with
> my current code, the new /dev/pts is not visible in the host.
Well I haven't tested your patch as is, was just looking at the code.
My pivot_root patch did a remount --make-slave, but I think it is only
for the container itself, whereas your patch here affects the driver
so it hasn't yet hit that remount, right?
> So I can only assume this means my host / is *not* MS_SHARED, while
If on your F11 host you look at
cat /proc/self/mountinfo
do the top lines show / as being shared? (Mine does)
No, all the F11 installs I have just show '-' and I can't find any
sign of something in the init process which makes them shared, so
perhaps its some other software you've since installed ?
For sake of testing though I've run 'mount --make-rshared /' on one
of my systems and can now reproduce the behaviour you describe with
the mount appearing for all processes.
> yours is. I'm struggling to find out why this is different because
> I'm testing on an Fedora 11 up2date system.
It's possible this is just something that has been changed since.
> Anyway, would it be sufficiently to add in a call
>
> if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) <
0) {
> virReportSystemError(NULL, errno, "%s",
> _("failed to make root private"));
> goto cleanup;
> }
Maybe the best thing to do would be:
> if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
{
> virReportSystemError(NULL, errno, "%s",
> _("failed to make root slave"));
> goto cleanup;
> }
> if (mount("", "/", NULL, MS_SHARED|MS_REC, NULL) < 0)
{
> virReportSystemError(NULL, errno, "%s",
> _("failed to make root shared"));
> goto cleanup;
> }
So we are making it slave (so it will receive mounts from the host
still), then shared (so the rest of the container will start out
shared). That, or just turn it SLAVE and leave it like that.
This patch attached now just makes it MS_SLAVE. There's no need for the
extra SHARED flag, since the only process libvirt_lxc spawns is the 'init'
process inside the container and that immediately makes its own root
private.
Daniel
diff -r fadb4a5b251a src/domain_conf.c
--- a/src/domain_conf.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.c Mon Apr 20 11:00:58 2009 +0100
@@ -3856,6 +3858,21 @@ const char *virDomainDefDefaultEmulator(
return emulator;
}
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def)
+{
+ int i;
+
+ for (i = 0 ; i < def->nfss ; i++) {
+ if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+ continue;
+
+ if (STREQ(def->fss[i]->dst, "/"))
+ return def->fss[i];
+ }
+
+ return NULL;
+}
+
void virDomainObjLock(virDomainObjPtr obj)
{
diff -r fadb4a5b251a src/domain_conf.h
--- a/src/domain_conf.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/domain_conf.h Mon Apr 20 11:00:58 2009 +0100
@@ -636,6 +636,8 @@ const char *virDomainDefDefaultEmulator(
virDomainDefPtr def,
virCapsPtr caps);
+virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def);
+
void virDomainObjLock(virDomainObjPtr obj);
void virDomainObjUnlock(virDomainObjPtr obj);
diff -r fadb4a5b251a src/libvirt_private.syms
--- a/src/libvirt_private.syms Mon Apr 20 10:53:47 2009 +0100
+++ b/src/libvirt_private.syms Mon Apr 20 11:00:58 2009 +0100
@@ -79,6 +79,7 @@ virDomainDiskQSort;
virDomainFindByID;
virDomainFindByName;
virDomainFindByUUID;
+virDomainGetRootFilesystem;
virDomainGraphicsTypeFromString;
virDomainGraphicsDefFree;
virDomainInputDefFree;
diff -r fadb4a5b251a src/lxc_container.c
--- a/src/lxc_container.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.c Mon Apr 20 11:00:58 2009 +0100
@@ -308,7 +308,7 @@ static int lxcContainerPivotRoot(virDoma
/* Create a tmpfs root since old and new roots must be
* on separate filesystems */
- if (mount("", oldroot, "tmpfs", 0, NULL) < 0) {
+ if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) {
virReportSystemError(NULL, errno,
_("failed to mount empty tmpfs at %s"),
oldroot);
@@ -338,15 +338,9 @@ static int lxcContainerPivotRoot(virDoma
/* Now we chroot into the tmpfs, then pivot into the
* root->src bind-mounted onto '/new' */
- if (chroot(oldroot) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to chroot into tmpfs"));
- goto err;
- }
-
- if (chdir("/new") < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to chdir into /new on tmpfs"));
+ if (chdir(newroot) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to chroot into %s"), newroot);
goto err;
}
@@ -362,12 +356,6 @@ static int lxcContainerPivotRoot(virDoma
if (chdir("/") < 0)
goto err;
- if (umount2(".oldroot", MNT_DETACH) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to lazily unmount old root"));
- goto err;
- }
-
ret = 0;
err:
@@ -377,10 +365,64 @@ err:
return ret;
}
+
+static int lxcContainerMountBasicFS(virDomainFSDefPtr root)
+{
+ const struct {
+ const char *src;
+ const char *dst;
+ const char *type;
+ } mnts[] = {
+ { "/dev", "/dev", "tmpfs" },
+ { "/proc", "/proc", "proc" },
+ { "/sys", "/sys", "sysfs" },
+#if WITH_SELINUX
+ { "none", "/selinux", "selinuxfs" },
+#endif
+ };
+ int i, rc;
+ char *devpts;
+
+ if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0)
{
+ virReportOOMError(NULL);
+ return -1;
+ }
+
+ for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) {
+ if (virFileMakePath(mnts[i].dst) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mkdir %s"),
+ mnts[i].src);
+ return -1;
+ }
+ if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount %s on %s"),
+ mnts[i].type, mnts[i].type);
+ return -1;
+ }
+ }
+
+ if ((rc = virFileMakePath("/dev/pts") < 0)) {
+ virReportSystemError(NULL, rc, "%s",
+ _("cannot create /dev/pts"));
+ return -1;
+ }
+
+ VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts");
+ if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to mount /dev/pts in container"));
+ return -1;
+ }
+ VIR_FREE(devpts);
+
+ return 0;
+}
+
static int lxcContainerPopulateDevices(void)
{
int i;
- int rc;
const struct {
int maj;
int min;
@@ -395,33 +437,6 @@ static int lxcContainerPopulateDevices(v
{ LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" },
};
- if ((rc = virFileMakePath("/dev")) < 0) {
- virReportSystemError(NULL, rc, "%s",
- _("cannot create /dev/"));
- return -1;
- }
- if (mount("none", "/dev", "tmpfs", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /dev tmpfs"));
- return -1;
- }
- /* Move old devpts into container, since we have to
- connect to the master ptmx which was opened in
- the parent.
- XXX This sucks, we need to figure out how to get our
- own private devpts for isolation
- */
- if ((rc = virFileMakePath("/dev/pts") < 0)) {
- virReportSystemError(NULL, rc, "%s",
- _("cannot create /dev/pts"));
- return -1;
- }
- if (mount("devpts", "/dev/pts", "devpts", 0, NULL) <
0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /dev/pts in container"));
- return -1;
- }
-
/* Populate /dev/ with a few important bits */
for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
dev_t dev = makedev(devs[i].maj, devs[i].min);
@@ -434,6 +449,23 @@ static int lxcContainerPopulateDevices(v
}
}
+ if (access("/dev/pts/ptmx", W_OK) == 0) {
+ if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to create symlink /dev/ptmx to
/dev/pts/ptmx"));
+ return -1;
+ }
+ } else {
+ dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
+ if (mknod("/dev/ptmx", 0, dev) < 0 ||
+ chmod("/dev/ptmx", 0666)) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make device /dev/ptmx"));
+ return -1;
+ }
+ }
+
+
return 0;
}
@@ -493,6 +525,7 @@ static int lxcContainerUnmountOldFS(void
return -1;
}
while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) {
+ VIR_DEBUG("Got %s", mntent.mnt_dir);
if (!STRPREFIX(mntent.mnt_dir, "/.oldroot"))
continue;
@@ -513,6 +546,7 @@ static int lxcContainerUnmountOldFS(void
lxcContainerChildMountSort);
for (i = 0 ; i < nmounts ; i++) {
+ VIR_DEBUG("Umount %s", mounts[i]);
if (umount(mounts[i]) < 0) {
virReportSystemError(NULL, errno,
_("failed to unmount '%s'"),
@@ -534,22 +568,23 @@ static int lxcContainerUnmountOldFS(void
static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
virDomainFSDefPtr root)
{
+ /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
if (lxcContainerPivotRoot(root) < 0)
return -1;
- if (virFileMakePath("/proc") < 0 ||
- mount("none", "/proc", "proc", 0, NULL) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to mount /proc"));
+ /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */
+ if (lxcContainerMountBasicFS(root) < 0)
return -1;
- }
+ /* Populates device nodes in /dev/ */
if (lxcContainerPopulateDevices() < 0)
return -1;
+ /* Sets up any non-root mounts from guest config */
if (lxcContainerMountNewFS(vmDef) < 0)
return -1;
+ /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
if (lxcContainerUnmountOldFS() < 0)
return -1;
@@ -595,18 +630,9 @@ static int lxcContainerSetupExtraMounts(
return 0;
}
-static int lxcContainerSetupMounts(virDomainDefPtr vmDef)
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef,
+ virDomainFSDefPtr root)
{
- int i;
- virDomainFSDefPtr root = NULL;
-
- for (i = 0 ; i < vmDef->nfss ; i++) {
- if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT)
- continue;
- if (STREQ(vmDef->fss[i]->dst, "/"))
- root = vmDef->fss[i];
- }
-
if (root)
return lxcContainerSetupPivotRoot(vmDef, root);
else
@@ -630,6 +656,8 @@ static int lxcContainerChild( void *data
lxc_child_argv_t *argv = data;
virDomainDefPtr vmDef = argv->config;
int ttyfd;
+ char *ttyPath;
+ virDomainFSDefPtr root;
if (NULL == vmDef) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -637,16 +665,28 @@ static int lxcContainerChild( void *data
return -1;
}
- if (lxcContainerSetupMounts(vmDef) < 0)
- return -1;
+ root = virDomainGetRootFilesystem(vmDef);
- ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY);
+ if (root) {
+ if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath)
< 0) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ } else {
+ if (!(ttyPath = strdup(argv->ttyPath))) {
+ virReportOOMError(NULL);
+ return -1;
+ }
+ }
+
+ ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
if (ttyfd < 0) {
virReportSystemError(NULL, errno,
- _("failed to open %s"),
- argv->ttyPath);
+ _("failed to open tty %s"),
+ ttyPath);
return -1;
}
+ VIR_FREE(ttyPath);
if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
close(ttyfd);
@@ -654,6 +694,9 @@ static int lxcContainerChild( void *data
}
close(ttyfd);
+ if (lxcContainerSetupMounts(vmDef, root) < 0)
+ return -1;
+
/* Wait for interface devices to show up */
if (lxcContainerWaitForContinue(argv->monitor) < 0)
return -1;
diff -r fadb4a5b251a src/lxc_container.h
--- a/src/lxc_container.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_container.h Mon Apr 20 11:00:58 2009 +0100
@@ -39,6 +39,7 @@ enum {
#define LXC_DEV_MAJ_TTY 5
#define LXC_DEV_MIN_CONSOLE 1
+#define LXC_DEV_MIN_PTMX 2
#define LXC_DEV_MAJ_PTY 136
diff -r fadb4a5b251a src/lxc_controller.c
--- a/src/lxc_controller.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/lxc_controller.c Mon Apr 20 11:00:58 2009 +0100
@@ -33,6 +33,7 @@
#include <fcntl.h>
#include <signal.h>
#include <getopt.h>
+#include <sys/mount.h>
#include "virterror_internal.h"
#include "logging.h"
@@ -426,6 +427,13 @@ static int lxcControllerCleanupInterface
return 0;
}
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
static int
lxcControllerRun(virDomainDefPtr def,
@@ -440,6 +448,9 @@ lxcControllerRun(virDomainDefPtr def,
int containerPty;
char *containerPtyPath;
pid_t container = -1;
+ virDomainFSDefPtr root;
+ char *devpts = NULL;
+ char *devptmx = NULL;
if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
virReportSystemError(NULL, errno, "%s",
@@ -447,14 +458,91 @@ lxcControllerRun(virDomainDefPtr def,
goto cleanup;
}
- if (virFileOpenTty(&containerPty,
- &containerPtyPath,
- 0) < 0) {
- virReportSystemError(NULL, errno, "%s",
- _("failed to allocate tty"));
- goto cleanup;
+ root = virDomainGetRootFilesystem(def);
+
+ /*
+ * If doing a chroot style setup, we need to prepare
+ * a private /dev/pts for the child now, which they
+ * will later move into position.
+ *
+ * This is complex because 'virsh console' needs to
+ * use /dev/pts from the host OS, and the guest OS
+ * needs to use /dev/pts from the guest.
+ *
+ * This means that we (libvirt_lxc) need to see and
+ * use both /dev/pts instances. We're running in the
+ * host OS context though and don't want to expose
+ * the guest OS /dev/pts there.
+ *
+ * Thus we call unshare(CLONE_NS) so that we can see
+ * the guest's new /dev/pts, without it becoming
+ * visible to the host OS. We also put the root FS
+ * into slave mode, just in case it was currently
+ * marked as shared
+ */
+ if (root) {
+ VIR_DEBUG0("Setting up private /dev/pts");
+ if (unshare(CLONE_NEWNS) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("cannot unshare mount namespace"));
+ goto cleanup;
+ }
+
+ if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to switch root mount into slave
mode"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 ||
+ virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0)
{
+ virReportOOMError(NULL);
+ goto cleanup;
+ }
+
+ if (virFileMakePath(devpts) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to make path %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Mouting 'devpts' on %s", devpts);
+ if (mount("devpts", devpts, "devpts", 0,
"newinstance,ptmxmode=0666") < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount devpts on %s"),
+ devpts);
+ goto cleanup;
+ }
+
+ if (access(devptmx, R_OK) < 0) {
+ VIR_WARN0("kernel does not support private devpts, using shared
devpts");
+ VIR_FREE(devptmx);
+ }
}
+ if (devptmx) {
+ VIR_DEBUG("Opening tty on private %s", devptmx);
+ if (virFileOpenTtyAt(devptmx,
+ &containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ } else {
+ VIR_DEBUG0("Opening tty on shared /dev/ptmx");
+ if (virFileOpenTty(&containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to allocate tty"));
+ goto cleanup;
+ }
+ }
+
+
if (lxcSetContainerResources(def) < 0)
goto cleanup;
@@ -476,6 +564,8 @@ lxcControllerRun(virDomainDefPtr def,
rc = lxcControllerMain(monitor, client, appPty, containerPty);
cleanup:
+ VIR_FREE(devptmx);
+ VIR_FREE(devpts);
if (control[0] != -1)
close(control[0]);
if (control[1] != -1)
diff -r fadb4a5b251a src/util.c
--- a/src/util.c Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.c Mon Apr 20 11:00:58 2009 +0100
@@ -1050,14 +1050,25 @@ int virFileBuildPath(const char *dir,
}
-#ifdef __linux__
int virFileOpenTty(int *ttymaster,
char **ttyName,
int rawmode)
{
+ return virFileOpenTtyAt("/dev/ptmx",
+ ttymaster,
+ ttyName,
+ rawmode);
+}
+
+#ifdef __linux__
+int virFileOpenTtyAt(const char *ptmx,
+ int *ttymaster,
+ char **ttyName,
+ int rawmode)
+{
int rc = -1;
- if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
+ if ((*ttymaster = open(ptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
goto cleanup;
if (unlockpt(*ttymaster) < 0)
@@ -1100,9 +1111,10 @@ cleanup:
}
#else
-int virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED,
- char **ttyName ATTRIBUTE_UNUSED,
- int rawmode ATTRIBUTE_UNUSED)
+int virFileOpenTtyAt(const char *ptmx ATTRIBUTE_UNUSED,
+ int *ttymaster ATTRIBUTE_UNUSED,
+ char **ttyName ATTRIBUTE_UNUSED,
+ int rawmode ATTRIBUTE_UNUSED)
{
return -1;
}
diff -r fadb4a5b251a src/util.h
--- a/src/util.h Mon Apr 20 10:53:47 2009 +0100
+++ b/src/util.h Mon Apr 20 11:00:58 2009 +0100
@@ -103,6 +103,10 @@ int virFileBuildPath(const char *dir,
int virFileOpenTty(int *ttymaster,
char **ttyName,
int rawmode);
+int virFileOpenTtyAt(const char *ptmx,
+ int *ttymaster,
+ char **ttyName,
+ int rawmode);
char* virFilePid(const char *dir,
const char *name);
--
|: Red Hat, Engineering, London -o-
http://people.redhat.com/berrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org -o-
http://ovirt.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|