[libvirt] [PATCH 0/3] Misc fixes for LXC driver

This series provides some small fixes to the LXC driver which bring it closer to being able to run a full Fedora OS with regular init (upstart) startup process

When getting the driver/domain cgroup it is possible to specify whether it should be auto created. If auto-creation was turned off, libvirt still mistakenly created its own top level cgroup * src/util/cgroup.c: Honour autocreate flag for top level cgroup --- src/util/cgroup.c | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/util/cgroup.c b/src/util/cgroup.c index ad0d595..b4c3353 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -519,7 +519,8 @@ err: } static int virCgroupAppRoot(int privileged, - virCgroupPtr *group) + virCgroupPtr *group, + int create) { virCgroupPtr rootgrp = NULL; int rc; @@ -551,7 +552,7 @@ static int virCgroupAppRoot(int privileged, if (rc != 0) goto cleanup; - rc = virCgroupMakeGroup(rootgrp, *group, 1); + rc = virCgroupMakeGroup(rootgrp, *group, create); cleanup: virCgroupFree(&rootgrp); @@ -638,7 +639,7 @@ int virCgroupForDriver(const char *name, char *path = NULL; virCgroupPtr rootgrp = NULL; - rc = virCgroupAppRoot(privileged, &rootgrp); + rc = virCgroupAppRoot(privileged, &rootgrp, create); if (rc != 0) goto out; -- 1.6.6.1

On Thu, Mar 04, 2010 at 11:27:17AM +0000, Daniel P. Berrange wrote:
When getting the driver/domain cgroup it is possible to specify whether it should be auto created. If auto-creation was turned off, libvirt still mistakenly created its own top level cgroup
* src/util/cgroup.c: Honour autocreate flag for top level cgroup --- src/util/cgroup.c | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/util/cgroup.c b/src/util/cgroup.c index ad0d595..b4c3353 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -519,7 +519,8 @@ err: }
static int virCgroupAppRoot(int privileged, - virCgroupPtr *group) + virCgroupPtr *group, + int create) { virCgroupPtr rootgrp = NULL; int rc; @@ -551,7 +552,7 @@ static int virCgroupAppRoot(int privileged, if (rc != 0) goto cleanup;
- rc = virCgroupMakeGroup(rootgrp, *group, 1); + rc = virCgroupMakeGroup(rootgrp, *group, create);
cleanup: virCgroupFree(&rootgrp); @@ -638,7 +639,7 @@ int virCgroupForDriver(const char *name, char *path = NULL; virCgroupPtr rootgrp = NULL;
- rc = virCgroupAppRoot(privileged, &rootgrp); + rc = virCgroupAppRoot(privileged, &rootgrp, create); if (rc != 0) goto out;
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

When using the 'ns' cgroup controller, the moment a process calls 'unshare(CLONE_NEWNS)', it will be given a private cgroup tree under its current location. This really messages up the LXC controller process, because it ends up creating the containers' cgroup in the wrong place. The fix is fairly easy, just move the cgroup setup before the code which calls unshare(). The 'ns' controller will still create extra undesired cgroups, but they at least won't break libvirt's setup now. The patch also adds a missing cgroups allow rule for /dev/tty device node --- src/lxc/lxc_container.h | 1 + src/lxc/lxc_controller.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index a1dd5a1..9e15642 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -39,6 +39,7 @@ enum { #define LXC_DEV_MIN_URANDOM 9 #define LXC_DEV_MAJ_TTY 5 +#define LXC_DEV_MIN_TTY 0 #define LXC_DEV_MIN_CONSOLE 1 #define LXC_DEV_MIN_PTMX 2 diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 95970cc..525c6cb 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -78,6 +78,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE}, {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; @@ -301,7 +302,7 @@ static int lxcControllerMain(int monitor, fdArray[0].active = 0; fdArray[1].fd = contPty; fdArray[1].active = 0; - + VIR_ERROR("monitor=%d client=%d appPty=%d contPty=%d", monitor,client, appPty, contPty); /* create the epoll fild descriptor */ epollFd = epoll_create(2); if (0 > epollFd) { @@ -516,6 +517,9 @@ lxcControllerRun(virDomainDefPtr def, root = virDomainGetRootFilesystem(def); + if (lxcSetContainerResources(def) < 0) + goto cleanup; + /* * If doing a chroot style setup, we need to prepare * a private /dev/pts for the child now, which they @@ -599,9 +603,6 @@ lxcControllerRun(virDomainDefPtr def, } - if (lxcSetContainerResources(def) < 0) - goto cleanup; - if ((container = lxcContainerStart(def, nveths, veths, -- 1.6.6.1

On Thu, Mar 04, 2010 at 11:27:18AM +0000, Daniel P. Berrange wrote:
When using the 'ns' cgroup controller, the moment a process calls 'unshare(CLONE_NEWNS)', it will be given a private cgroup tree under its current location. This really messages up the LXC controller process, because it ends up creating the containers' cgroup in the wrong place. The fix is fairly easy, just move the cgroup setup before the code which calls unshare(). The 'ns' controller will still create extra undesired cgroups, but they at least won't break libvirt's setup now.
The patch also adds a missing cgroups allow rule for /dev/tty device node --- src/lxc/lxc_container.h | 1 + src/lxc/lxc_controller.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index a1dd5a1..9e15642 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -39,6 +39,7 @@ enum { #define LXC_DEV_MIN_URANDOM 9
#define LXC_DEV_MAJ_TTY 5 +#define LXC_DEV_MIN_TTY 0 #define LXC_DEV_MIN_CONSOLE 1 #define LXC_DEV_MIN_PTMX 2
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 95970cc..525c6cb 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -78,6 +78,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_CONSOLE}, {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; @@ -301,7 +302,7 @@ static int lxcControllerMain(int monitor, fdArray[0].active = 0; fdArray[1].fd = contPty; fdArray[1].active = 0; - + VIR_ERROR("monitor=%d client=%d appPty=%d contPty=%d", monitor,client, appPty, contPty); /* create the epoll fild descriptor */ epollFd = epoll_create(2); if (0 > epollFd) { @@ -516,6 +517,9 @@ lxcControllerRun(virDomainDefPtr def,
root = virDomainGetRootFilesystem(def);
+ if (lxcSetContainerResources(def) < 0) + goto cleanup; + /* * If doing a chroot style setup, we need to prepare * a private /dev/pts for the child now, which they @@ -599,9 +603,6 @@ lxcControllerRun(virDomainDefPtr def, }
- if (lxcSetContainerResources(def) < 0) - goto cleanup; - if ((container = lxcContainerStart(def, nveths, veths,
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Upstart crashes & burns in a heap if $TERM environment variable is missing. Presumably the kernel always sets this when booting init on a real machine, so libvirt should set it for containers too. To make a typical inittab / mingetty setup happier, we need to symlink the primary console /dev/pts/0 to /dev/tty1. Improve logging in certain scenarios to make troubleshooting easier * src/lxc/lxc_container.c: Create /dev/tty1 and set $TERM --- src/lxc/lxc_container.c | 26 ++++++++++++++++++++++---- 1 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index c425154..b1e895d 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -105,8 +105,13 @@ static int lxcContainerExecInit(virDomainDefPtr vmDef) vmDef->os.init, NULL, }; + const char *const envp[] = { + "PATH=/bin:/sbin", + "TERM=linux", + NULL, + }; - return execve(argv[0], (char **)argv, NULL); + return execve(argv[0], (char **)argv,(char**)envp); } /** @@ -488,6 +493,15 @@ static int lxcContainerPopulateDevices(void) } } + /* XXX we should allow multiple consoles per container + * for tty2, tty3, etc, but the domain XML does not + * handle this yet + */ + if (symlink("/dev/pts/0", "/dev/tty1") < 0) { + virReportSystemError(errno, "%s", + _("Failed to symlink /dev/pts/0 to /dev/tty1")); + return -1; + } return 0; } @@ -822,15 +836,19 @@ int lxcContainerStart(virDomainDefPtr def, flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD; - if (userns_supported()) + if (userns_supported()) { + DEBUG0("Enable user namespaces"); flags |= CLONE_NEWUSER; + } - if (def->nets != NULL) + if (def->nets != NULL) { + DEBUG0("Enable network namespaces"); flags |= CLONE_NEWNET; + } pid = clone(lxcContainerChild, stacktop, flags, &args); VIR_FREE(stack); - DEBUG("clone() returned, %d", pid); + DEBUG("clone() completed, new container PID is %d", pid); if (pid < 0) { virReportSystemError(errno, "%s", -- 1.6.6.1

On Thu, Mar 04, 2010 at 11:27:19AM +0000, Daniel P. Berrange wrote:
Upstart crashes & burns in a heap if $TERM environment variable is missing. Presumably the kernel always sets this when booting init on a real machine, so libvirt should set it for containers too.
To make a typical inittab / mingetty setup happier, we need to symlink the primary console /dev/pts/0 to /dev/tty1.
Improve logging in certain scenarios to make troubleshooting easier
* src/lxc/lxc_container.c: Create /dev/tty1 and set $TERM --- src/lxc/lxc_container.c | 26 ++++++++++++++++++++++---- 1 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index c425154..b1e895d 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -105,8 +105,13 @@ static int lxcContainerExecInit(virDomainDefPtr vmDef) vmDef->os.init, NULL, }; + const char *const envp[] = { + "PATH=/bin:/sbin", + "TERM=linux", + NULL, + };
- return execve(argv[0], (char **)argv, NULL); + return execve(argv[0], (char **)argv,(char**)envp); }
/** @@ -488,6 +493,15 @@ static int lxcContainerPopulateDevices(void) } }
+ /* XXX we should allow multiple consoles per container + * for tty2, tty3, etc, but the domain XML does not + * handle this yet + */ + if (symlink("/dev/pts/0", "/dev/tty1") < 0) { + virReportSystemError(errno, "%s", + _("Failed to symlink /dev/pts/0 to /dev/tty1")); + return -1; + }
return 0; } @@ -822,15 +836,19 @@ int lxcContainerStart(virDomainDefPtr def,
flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD;
- if (userns_supported()) + if (userns_supported()) { + DEBUG0("Enable user namespaces"); flags |= CLONE_NEWUSER; + }
- if (def->nets != NULL) + if (def->nets != NULL) { + DEBUG0("Enable network namespaces"); flags |= CLONE_NEWNET; + }
pid = clone(lxcContainerChild, stacktop, flags, &args); VIR_FREE(stack); - DEBUG("clone() returned, %d", pid); + DEBUG("clone() completed, new container PID is %d", pid);
if (pid < 0) { virReportSystemError(errno, "%s",
ACK, looks fine, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/
participants (2)
-
Daniel P. Berrange
-
Daniel Veillard