On Wed, Mar 26, 2008 at 11:20:59PM -0700, Dave Leskovec wrote:
This is a repost of patch four in the last series I posted. It
contains the
start container support. I've made some changes corresponding to Dan B's patch
moving the lxc driver under libvirtd. I removed the isolation forks and cleaned
up the status handling and PID storing.
General comment over the patch is that I prefer to see all function with
an header comment, it's a bit painful, but helps graps the context when an
error arise and someone who don't know the code try to sort it out. That's
not a blocker for commits but better for long term maintainance :-)
[...]
+/* Functions */
+static int lxcExecContainerInit(lxc_vm_def_t *vmDef)
+{
+ int rc = -1;
+ char* execString;
+ int execStringLen = strlen(vmDef->init) + 1 + 5;
+
+ if(NULL == (execString = calloc(execStringLen, sizeof(char)))) {
An error should really be reported here.
+ goto error_out;
+ }
+
+ strcpy(execString, "exec ");
+ strcat(execString, vmDef->init);
Hum, it seems there is an off by one allocation error, you don't allocate
the space for the terminating 0
+ execl("/bin/sh", "sh", "-c",
execString, (char*)NULL);
+ DEBUG("execl failed: %s", strerror(errno));
+
+error_out:
+ exit(rc);
+}
[...]
+static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int
pollmsecs)
+{
+ int rc = -1;
+ int i;
+ char buf[2];
+ struct pollfd fds[2];
+ int numFds = 0;
+
+ if (0 <= fd1) {
+ fds[numFds].fd = fd1;
+ fds[numFds].events = POLLIN;
+ ++numFds;
+ }
+
+ if (0 <= fd2) {
+ fds[numFds].fd = fd2;
+ fds[numFds].events = POLLIN;
+ ++numFds;
+ }
+
+ if (0 == numFds) {
+ DEBUG0("No fds to monitor, return");
+ goto cleanup;
+ }
+
+ while (!(*loopFlag)) {
+ if ((rc = poll(fds, numFds, pollmsecs)) <= 0) {
+ if(*loopFlag) {
+ goto cleanup;
+ }
+
+ if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) {
+ continue;
+ }
+
+ DEBUG("poll returned error: %s", strerror(errno));
+ goto cleanup;
+ }
+
+ for (i = 0; i < numFds; ++i) {
+ if (!fds[i].revents) {
+ continue;
+ }
+
+ if (fds[i].revents & POLLIN) {
+ saferead(fds[i].fd, buf, 1);
+ if (1 < numFds) {
+ safewrite(fds[i ^ 1].fd, buf, 1);
+ }
+
So if only one fd is given we discard all data read, and if 2 fds are given
all data from one is forwarded to the other, right ?
+static int exitChildLoop;
+static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED,
+ siginfo_t *signalInfo,
+ void *context ATTRIBUTE_UNUSED)
+{
+ DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid);
+
+ if (signalInfo->si_pid == initPid) {
+ exitChildLoop = 1;
+ } else {
+ waitpid(signalInfo->si_pid, NULL, WNOHANG);
+ }
+
+}
+
+static int lxcExecWithTty(lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_vm_def_t *vmDef = vm->def;
+ int ttymaster = -1;
+ int ttyslave = -1;
+ struct sigaction sigAction;
+ sigset_t sigMask;
+ int childStatus;
+
+ if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) {
+ goto exit_with_error;
+ }
+
+ sigAction.sa_sigaction = lxcExecChildHandler;
+ sigfillset(&sigMask);
+ sigAction.sa_mask = sigMask;
+ sigAction.sa_flags = SA_SIGINFO;
+ if (0 != sigaction(SIGCHLD, &sigAction, NULL)) {
+ DEBUG("sigaction failed: %s\n", strerror(errno));
+ goto exit_with_error;
+ }
+
+ exitChildLoop = 0;
+ if ((initPid = fork()) == 0) {
+ if(lxcSetContainerStdio(ttyslave) < 0) {
+ exitChildLoop = 1;
+ goto exit_with_error;
+ }
+
+ lxcExecContainerInit(vmDef);
+ /* this function will not return. if it fails, it will exit */
+ }
+
+ close(ttyslave);
+ lxcTtyForward(ttymaster, vm->parentTty,
+ &exitChildLoop, 100);
+
+ DEBUG("child waiting on pid %d", initPid);
+ waitpid(initPid, &childStatus, 0);
+ rc = WEXITSTATUS(childStatus);
+ DEBUG("container exited with rc: %d", rc);
+
+exit_with_error:
+ exit(rc);
+}
So this is forked for each container created, right ?
[...]
Index: b/src/lxc_container.h
ok
Index: b/src/lxc_driver.c
===================================================================
--- a/src/lxc_driver.c 2008-03-21 11:46:11.000000000 -0700
+++ b/src/lxc_driver.c 2008-03-24 16:46:48.000000000 -0700
@@ -25,14 +25,17 @@
[...]
+static int lxcStartContainer(virConnectPtr conn,
+ lxc_driver_t* driver,
+ lxc_vm_t *vm)
+{
+ int rc = -1;
+ int flags;
+ int stacksize = getpagesize() * 4;
+ void *stack, *stacktop;
+
+ /* allocate a stack for the container */
+ stack = malloc(stacksize);
+ if (!stack) {
+ lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
+ _("unable to allocate container stack"));
+ goto error_exit;
+ }
+ stacktop = (char*)stack + stacksize;
+
+ flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
+
+ vm->pid = clone(lxcChild, stacktop, flags, (void *)vm);
+
+ DEBUG("clone() returned, %d", vm->pid);
+
+ if (vm->pid < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("clone() failed, %s"), strerror(errno));
+ goto error_exit;
+ }
+
+ vm->def->id = vm->pid;
+ lxcSaveConfig(NULL, driver, vm, vm->def);
We should make sure at some point taht there is some kind of locking
mechanism when creating those config files, no ?
+ rc = 0;
+
+error_exit:
+ return rc;
+}
[...]
+static int lxcVmStart(virConnectPtr conn,
+ lxc_driver_t * driver,
+ lxc_vm_t * vm)
+{
+ int rc = -1;
+ lxc_vm_def_t *vmDef = vm->def;
+
+ /* open tty for the container */
+ if(lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) {
+ goto cleanup;
+ }
+
+ rc = lxcStartContainer(conn, driver, vm);
+
+ if(rc == 0) {
+ vm->state = VIR_DOMAIN_RUNNING;
+ driver->ninactivevms--;
+ driver->nactivevms++;
+ }
+
+cleanup:
+ return rc;
+}
[...]
Hum, now that config names are saved based on domain names, wouldn't
it be better to use a name based lookup ? Less precise but more direct
[...]
static int lxcStartup(void)
{
@@ -459,7 +666,7 @@
NULL, /* getCapabilities */
lxcListDomains, /* listDomains */
lxcNumDomains, /* numOfDomains */
- NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */
+ lxcDomainCreateAndStart, /* domainCreateLinux */
lxcDomainLookupByID, /* domainLookupByID */
lxcDomainLookupByUUID, /* domainLookupByUUID */
lxcDomainLookupByName, /* domainLookupByName */
@@ -483,7 +690,7 @@
lxcDomainDumpXML, /* domainDumpXML */
lxcListDefinedDomains, /* listDefinedDomains */
lxcNumDefinedDomains, /* numOfDefinedDomains */
- NULL, /* domainCreate */
+ lxcDomainStart, /* domainCreate */
lxcDomainDefine, /* domainDefineXML */
lxcDomainUndefine, /* domainUndefine */
NULL, /* domainAttachDevice */
Looks fine overall. I wonder if 1 fork/clone per container can't be
reduced to a single process doing the fd processing for all the containers
running. But that's probably harder, more fragile and for little gain.
Daniel
--
Red Hat Virtualization group
http://redhat.com/virtualization/
Daniel Veillard | virtualization library
http://libvirt.org/
veillard(a)redhat.com | libxml GNOME XML XSLT toolkit
http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine
http://rpmfind.net/