On Mon, Apr 06, 2009 at 02:13:04PM -0500, Serge E. Hallyn wrote:
libvirt/lxc is broken on F11. The pivot_root() call returns
-EINVAL. The below is one way we can fix it. I'm also sending
another patch which takes the simpler approach of using chroot.
However, chroot is trivially escapable (see for instance
http://www.linuxsecurity.com/content/view/117632/49/). I don't
know whether the typical libvirt user would care. If so, then
the below patch is probably the way to go.
We intended to cover 2 use cases for LXC in libvirt
- Resource control / isolation
- Secure separation of applications
For the second case, we really need to use pivot_root(), though
it in itself is not sufficient for security of course. We also
have a TODO item to make use of private devpts mounts in this
bit of code now that's in 2.6.29 kernels.
>From 26cac415771a2d9712af0e1ce60a0bcb41b44665 Mon Sep 17 00:00:00 2001
From: root <root(a)localhost.localdomain>
Date: Sat, 4 Apr 2009 22:49:20 -0400
Subject: [PATCH 1/1] lxc: make the pivot_root more robust.
The libvirt lxc driver uses pivot_root instead of chroot, because
the latter is trivially escapable. However, the pivot_root(2)
system call can fail for several subtle reasons. Depending upon
your distro init sequence you may get lucky and have the old
recipe work, but on a Fedora 11 standard install, for instance,
it will fail.
Do a few more steps to make pivot_root hopefully always
succeed. We mark / as MS_PRIVATE, create an empty tmpfs,
and bind-mount the container root onto /new in that fs.
In this way, we ensure two reasons for pivot_root to fail -
namely old_root->parent being MS_SHARED and old_root and
new_root being on the same fs - won't happen.
Signed-off-by: Serge Hallyn <serue(a)us.ibm.com>
---
src/lxc_container.c | 108 ++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 85 insertions(+), 23 deletions(-)
diff --git a/src/lxc_container.c b/src/lxc_container.c
index 3f17b8d..d3959f6 100644
--- a/src/lxc_container.c
+++ b/src/lxc_container.c
@@ -264,50 +264,113 @@ static int lxcContainerChildMountSort(const void *a, const void
*b)
return strcmp(*sb, *sa);
}
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MNT_DETACH
+#define MNT_DETACH 0x00000002
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE 1<<18
+#endif
+
static int lxcContainerPivotRoot(virDomainFSDefPtr root)
{
int rc;
- char *oldroot;
+ char *oldroot = NULL, *newroot = NULL;
- /* First step is to ensure the new root itself is
- a mount point */
- if (mount(root->src, root->src, NULL, MS_BIND, NULL) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to bind new root %s"),
- root->src);
- return -1;
+ /* root->parent must be private, so make / private. */
+ if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to make root private"));
+ goto err;
}
if (virAsprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
virReportOOMError(NULL);
- return -1;
+ goto err;
}
if ((rc = virFileMakePath(oldroot)) < 0) {
virReportSystemError(NULL, rc,
_("failed to create %s"),
oldroot);
- VIR_FREE(oldroot);
- return -1;
+ goto err;
+ }
+
+ /* Create a tmpfs root since old and new roots must be
+ * on separate filesystems */
+ if (mount("", oldroot, "tmpfs", 0, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to mount empty tmpfs at %s"),
+ oldroot);
+ goto err;
+ }
+
+ /* Create a directory called 'new' in tmpfs */
+ if (virAsprintf(&newroot, "%s/new", oldroot) < 0) {
+ virReportOOMError(NULL);
+ goto err;
+ }
+
+ if ((rc = virFileMakePath(newroot)) < 0) {
+ virReportSystemError(NULL, rc,
+ _("failed to create %s"),
+ newroot);
+ goto err;
+ }
+
+ /* ... and mount our root onto it */
+ if (mount(root->src, newroot, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ virReportSystemError(NULL, errno,
+ _("failed to bind new root %s into tmpfs"),
+ root->src);
+ goto err;
+ }
+
+ /* Now we chroot into the tmpfs, then pivot into the
+ * root->src bind-mounted onto '/new' */
+ if (chroot(oldroot) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to chroot into tmpfs"));
+ goto err;
+ }
+
+ if (chdir("/new") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to chdir into /new on tmpfs"));
+ goto err;
}
/* The old root directory will live at /.oldroot after
* this and will soon be unmounted completely */
- if (pivot_root(root->src, oldroot) < 0) {
- virReportSystemError(NULL, errno,
- _("failed to pivot root %s to %s"),
- oldroot, root->src);
- VIR_FREE(oldroot);
- return -1;
+ if (pivot_root(".", ".oldroot") < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to pivot root"));
+ goto err;
}
- VIR_FREE(oldroot);
/* CWD is undefined after pivot_root, so go to / */
- if (chdir("/") < 0) {
- return -1;
+ if (chdir("/") < 0)
+ goto err;
+
+ if (umount2(".oldroot", MNT_DETACH) < 0) {
+ virReportSystemError(NULL, errno, "%s",
+ _("failed to lazily unmount old root"));
+ goto err;
}
+ VIR_FREE(oldroot);
+ VIR_FREE(newroot);
+
return 0;
+
+err:
+ if (oldroot) VIR_FREE(oldroot);
+ if (newroot) VIR_FREE(newroot);
+ return -1;
}
static int lxcContainerPopulateDevices(void)
@@ -349,10 +412,9 @@ static int lxcContainerPopulateDevices(void)
_("cannot create /dev/pts"));
return -1;
}
- if (mount("/.oldroot/dev/pts", "/dev/pts", NULL,
- MS_MOVE, NULL) < 0) {
+ if (mount("devpts", "/dev/pts", "devpts", 0, NULL)
< 0) {
virReportSystemError(NULL, errno, "%s",
- _("failed to move /dev/pts into container"));
+ _("failed to mount /dev/pts in container"));
return -1;
}
ACK
This looks much more robust that my original code
Daniel
--
|: Red Hat, Engineering, London -o-
http://people.redhat.com/berrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org -o-
http://ovirt.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|