On 05/10/2013 06:28 PM, Richard RW. Weinberger wrote:
----- Ursprüngliche Mail -----
> user namespace doesn't allow to create devices in
> uninit userns. We should create devices on host side.
>
> Signed-off-by: Gao feng <gaofeng(a)cn.fujitsu.com>
> ---
> src/lxc/lxc_container.c | 47 +++++++----------------------
> src/lxc/lxc_controller.c | 77
> ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 87 insertions(+), 37 deletions(-)
>
> diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
> index eabaa34..540246f 100644
> --- a/src/lxc/lxc_container.c
> +++ b/src/lxc/lxc_container.c
> @@ -833,22 +833,10 @@ cleanup:
> return ret;
> }
>
> -static int lxcContainerPopulateDevices(char **ttyPaths, size_t
> nttyPaths)
> +static int lxcContainerSetupDevices(char **ttyPaths, size_t
> nttyPaths)
> {
> size_t i;
> const struct {
> - int maj;
> - int min;
> - mode_t mode;
> - const char *path;
> - } devs[] = {
> - { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
> - { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
> - { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
> - { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666,
> "/dev/random" },
> - { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666,
> "/dev/urandom" },
> - };
> - const struct {
> const char *src;
> const char *dst;
> } links[] = {
> @@ -858,18 +846,6 @@ static int lxcContainerPopulateDevices(char
> **ttyPaths, size_t nttyPaths)
> { "/proc/self/fd", "/dev/fd" },
> };
>
> - /* Populate /dev/ with a few important bits */
> - for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
> - dev_t dev = makedev(devs[i].maj, devs[i].min);
> - if (mknod(devs[i].path, S_IFCHR, dev) < 0 ||
> - chmod(devs[i].path, devs[i].mode)) {
> - virReportSystemError(errno,
> - _("Failed to make device %s"),
> - devs[i].path);
> - return -1;
> - }
> - }
> -
> for (i = 0 ; i < ARRAY_CARDINALITY(links) ; i++) {
> if (symlink(links[i].src, links[i].dst) < 0) {
> virReportSystemError(errno,
> @@ -882,22 +858,13 @@ static int lxcContainerPopulateDevices(char
> **ttyPaths, size_t nttyPaths)
> if (access("/dev/pts/ptmx", W_OK) == 0) {
> /* We have private devpts capability, so bind that */
> if (virFileTouch("/dev/ptmx", 0666) < 0)
> - return -1;
> + return -1;
>
> if (mount("/dev/pts/ptmx", "/dev/ptmx",
"ptmx", MS_BIND,
> NULL) < 0) {
> virReportSystemError(errno, "%s",
> _("Failed to bind /dev/pts/ptmx on
> to /dev/ptmx"));
> return -1;
> }
> - } else {
> - /* Legacy devpts, so we need to just use shared one */
> - dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
> - if (mknod("/dev/ptmx", S_IFCHR, dev) < 0 ||
> - chmod("/dev/ptmx", 0666)) {
> - virReportSystemError(errno, "%s",
> - _("Failed to make device
> /dev/ptmx"));
> - return -1;
> - }
> }
>
> for (i = 0 ; i < nttyPaths ; i++) {
> @@ -1825,8 +1792,8 @@ static int
> lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
> if (lxcContainerMountFSDevPTS(vmDef, "/.oldroot") < 0)
> goto cleanup;
>
> - /* Populates device nodes in /dev/ */
> - if (lxcContainerPopulateDevices(ttyPaths, nttyPaths) < 0)
> + /* Sets up device nodes in /dev/ */
> + if (lxcContainerSetupDevices(ttyPaths, nttyPaths) < 0)
> goto cleanup;
>
> /* Sets up any non-root mounts from guest config */
> @@ -2037,6 +2004,12 @@ static int lxcContainerChild(void *data)
> goto cleanup;
> }
>
> + if (lxcContainerWaitForContinue(argv->monitor) < 0) {
> + virReportSystemError(errno, "%s",
> + _("Failed to read the container
> continue message"));
> + goto cleanup;
> + }
> +
> ret = 0;
> cleanup:
> VIR_FREE(ttyPath);
> diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
> index e9b90bf..2072e9a 100644
> --- a/src/lxc/lxc_controller.c
> +++ b/src/lxc/lxc_controller.c
> @@ -1103,6 +1103,73 @@ cleanup:
> }
>
>
> +static int virLXCControllerPopulateDevices(virLXCControllerPtr ctrl)
> +{
> + size_t i;
> + int ret = -1;
> + char *ptmx = NULL;
> + char *path = NULL;
> + const struct {
> + int maj;
> + int min;
> + mode_t mode;
> + const char *path;
> + } devs[] = {
> + { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/dev/null" },
> + { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/dev/zero" },
> + { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/dev/full" },
> + { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666,
> "/dev/random" },
> + { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666,
> "/dev/urandom" },
> + };
> +
> + /* Populate /dev/ with a few important bits */
> + for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
> + if (virAsprintf(&path, "/proc/%llu/root/%s",
> + (unsigned long long)ctrl->initpid,
> + devs[i].path) < 0) {
> + virReportOOMError();
> + goto out;
> + }
> +
> + dev_t dev = makedev(devs[i].maj, devs[i].min);
> + if (mknod(path, S_IFCHR, dev) < 0 ||
> + chmod(path, devs[i].mode)) {
> + virReportSystemError(errno,
> + _("Failed to make device %s"),
> + devs[i].path);
> + goto out;
> + }
> + }
> +
> + if (virAsprintf(&ptmx, "/proc/%llu/root/dev/pts/ptmx",
> + (unsigned long long)ctrl->initpid) < 0) {
> + virReportOOMError();
> + goto out;
> + }
> +
> + if (access(ptmx, W_OK)) {
> + VIR_FREE(path);
> +
> + if (virAsprintf(&path, "/proc/%llu/root/dev/ptmx",
> + (unsigned long long)ctrl->initpid) < 0) {
> + virReportOOMError();
> + goto out;
> + }
> + /* Legacy devpts, so we need to just use shared one */
> + dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX);
> + if (mknod(path, S_IFCHR, dev) < 0 ||
> + chmod(path, 0666)) {
> + virReportSystemError(errno, _("Failed to make device
> %s"), path);
> + goto out;
> + }
> + }
> +
> + ret = 0;
> +out:
> + VIR_FREE(ptmx);
> + VIR_FREE(path);
> + return ret;
> +}
>
> /**
> * virLXCControllerMoveInterfaces
> @@ -1552,6 +1619,16 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
> goto cleanup;
> }
>
> + /* Populate devices for container */
> + if (virLXCControllerPopulateDevices(ctrl) < 0)
> + goto cleanup;
systemd will be not happy with this.
If /dev/ is not already a mountpoint it will try to mount devtmpfs which
will fail an systemd stops booting.
It would be nice to have:
a) A way to disable virLXCControllerPopulateDevices(), i.e. one bind mounts his own
lxc-/dev to /dev
b) libvirt mounts a tmpfs onto /dev and then creates the devices nodes.
Thanks for you pointing it out, I will pay attention to this problem in my next round
patchset.
Thanks,
Gao