[libvirt] PATCH: 0/5: Refactoring LXC & switching to generic domain APIs

The following series of 5 patches perform some general re-factoring of the LXC driver, the relationship between libvirtd & the processes it spawns, and finally switches LXC over to the generic domain XML APIs. This is just basically 1 day's work, so its a little rough around the edges in places, but I can confim that the core functionality is still all operational - by which I mean, start, stop, list, console, & re-start of daemon. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

This is same as the patch I sent yesterday... This patch does some simple re-factoring of the way the TTYs and control socket are handled to reduce the amount of state stored in the lxc_vm_t structure, in preparation for the switchover to the generic domain handling APIs. lxc_conf.c | 1 lxc_conf.h | 12 --- lxc_container.c | 60 +++++++-------- lxc_container.h | 14 +++ lxc_driver.c | 213 ++++++++++++++++---------------------------------------- 5 files changed, 104 insertions(+), 196 deletions(-) Daniel diff -r 63b8398c302e src/lxc_conf.c --- a/src/lxc_conf.c Mon Jul 14 12:18:23 2008 +0100 +++ b/src/lxc_conf.c Tue Jul 15 11:55:48 2008 +0100 @@ -1041,7 +1041,6 @@ void lxcFreeVM(lxc_vm_t *vm) { lxcFreeVMDef(vm->def); - VIR_FREE(vm->containerTty); VIR_FREE(vm); } diff -r 63b8398c302e src/lxc_conf.h --- a/src/lxc_conf.h Mon Jul 14 12:18:23 2008 +0100 +++ b/src/lxc_conf.h Tue Jul 15 11:55:48 2008 +0100 @@ -35,12 +35,6 @@ #define LXC_MAX_XML_LENGTH 16384 #define LXC_MAX_ERROR_LEN 1024 #define LXC_DOMAIN_TYPE "lxc" -#define LXC_PARENT_SOCKET 0 -#define LXC_CONTAINER_SOCKET 1 - -/* messages between parent and container */ -typedef char lxc_message_t; -#define LXC_CONTINUE_MSG 'c' /* types of networks for containers */ enum lxc_net_type { @@ -98,12 +92,6 @@ char configFileBase[PATH_MAX]; char ttyPidFile[PATH_MAX]; - - int parentTty; - int containerTtyFd; - char *containerTty; - - int sockpair[2]; lxc_vm_def_t *def; diff -r 63b8398c302e src/lxc_container.c --- a/src/lxc_container.c Mon Jul 14 12:18:23 2008 +0100 +++ b/src/lxc_container.c Tue Jul 15 11:55:48 2008 +0100 @@ -33,7 +33,6 @@ #include <unistd.h> #include "lxc_container.h" -#include "lxc_conf.h" #include "util.h" #include "memory.h" #include "veth.h" @@ -83,10 +82,11 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcSetContainerStdio(const char *ttyName) +static int lxcSetContainerStdio(const char *ttyPath) { int rc = -1; int ttyfd; + int open_max, i; if (setsid() < 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -94,10 +94,10 @@ goto error_out; } - ttyfd = open(ttyName, O_RDWR|O_NOCTTY); + ttyfd = open(ttyPath, O_RDWR|O_NOCTTY); if (ttyfd < 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("open(%s) failed: %s"), ttyName, strerror(errno)); + _("open(%s) failed: %s"), ttyPath, strerror(errno)); goto error_out; } @@ -107,7 +107,12 @@ goto cleanup; } - close(0); close(1); close(2); + /* Just in case someone forget to set FD_CLOEXEC, explicitly + * close all FDs before executing the container */ + open_max = sysconf (_SC_OPEN_MAX); + for (i = 0; i < open_max; i++) + if (i != ttyfd) + close(i); if (dup2(ttyfd, 0) < 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -144,12 +149,11 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcExecWithTty(lxc_vm_t *vm) +static int lxcExecWithTty(lxc_vm_def_t *vmDef, char *ttyPath) { int rc = -1; - lxc_vm_def_t *vmDef = vm->def; - if(lxcSetContainerStdio(vm->containerTty) < 0) { + if(lxcSetContainerStdio(ttyPath) < 0) { goto exit_with_error; } @@ -161,7 +165,7 @@ /** * lxcWaitForContinue: - * @vm: Pointer to vm structure + * @monitor: monitor FD from parent * * This function will wait for the container continue message from the * parent process. It will send this message on the socket pair stored in @@ -169,31 +173,23 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcWaitForContinue(lxc_vm_t *vm) +static int lxcWaitForContinue(int monitor) { - int rc = -1; lxc_message_t msg; int readLen; - readLen = saferead(vm->sockpair[LXC_CONTAINER_SOCKET], &msg, sizeof(msg)); - if (readLen != sizeof(msg)) { + readLen = saferead(monitor, &msg, sizeof(msg)); + if (readLen != sizeof(msg) || + msg != LXC_CONTINUE_MSG) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("Failed to read the container continue message: %s"), strerror(errno)); - goto error_out; + return -1; } DEBUG0("Received container continue message"); - close(vm->sockpair[LXC_PARENT_SOCKET]); - vm->sockpair[LXC_PARENT_SOCKET] = -1; - close(vm->sockpair[LXC_CONTAINER_SOCKET]); - vm->sockpair[LXC_CONTAINER_SOCKET] = -1; - - rc = 0; - -error_out: - return rc; + return 0; } /** @@ -204,12 +200,12 @@ * * Returns 0 on success or nonzero in case of error */ -static int lxcEnableInterfaces(const lxc_vm_t *vm) +static int lxcEnableInterfaces(const lxc_vm_def_t *def) { int rc = 0; const lxc_net_def_t *net; - for (net = vm->def->nets; net; net = net->next) { + for (net = def->nets; net; net = net->next) { DEBUG("Enabling %s", net->containerVeth); rc = vethInterfaceUpOrDown(net->containerVeth, 1); if (0 != rc) { @@ -218,7 +214,7 @@ } /* enable lo device only if there were other net devices */ - if (vm->def->nets) + if (def->nets) rc = vethInterfaceUpOrDown("lo", 1); error_out: @@ -237,11 +233,11 @@ * * Returns 0 on success or -1 in case of error */ -int lxcChild( void *argv ) +int lxcChild( void *data ) { int rc = -1; - lxc_vm_t *vm = (lxc_vm_t *)argv; - lxc_vm_def_t *vmDef = vm->def; + lxc_child_argv_t *argv = data; + lxc_vm_def_t *vmDef = argv->config; lxc_mount_t *curMount; int i; @@ -278,16 +274,16 @@ } /* Wait for interface devices to show up */ - if (0 != (rc = lxcWaitForContinue(vm))) { + if (0 != (rc = lxcWaitForContinue(argv->monitor))) { goto cleanup; } /* enable interfaces */ - if (0 != (rc = lxcEnableInterfaces(vm))) { + if (0 != (rc = lxcEnableInterfaces(vmDef))) { goto cleanup; } - rc = lxcExecWithTty(vm); + rc = lxcExecWithTty(vmDef, argv->ttyPath); /* this function will only return if an error occured */ cleanup: diff -r 63b8398c302e src/lxc_container.h --- a/src/lxc_container.h Mon Jul 14 12:18:23 2008 +0100 +++ b/src/lxc_container.h Tue Jul 15 11:55:48 2008 +0100 @@ -24,7 +24,21 @@ #ifndef LXC_CONTAINER_H #define LXC_CONTAINER_H +#include "lxc_conf.h" + #ifdef WITH_LXC + +typedef struct __lxc_child_argv lxc_child_argv_t; +struct __lxc_child_argv { + lxc_vm_def_t *config; + int monitor; + char *ttyPath; +}; + +/* messages between parent and container */ +typedef char lxc_message_t; +#define LXC_CONTINUE_MSG 'c' + /* Function declarations */ int lxcChild( void *argv ); diff -r 63b8398c302e src/lxc_driver.c --- a/src/lxc_driver.c Mon Jul 14 12:18:23 2008 +0100 +++ b/src/lxc_driver.c Tue Jul 15 11:55:48 2008 +0100 @@ -561,27 +561,23 @@ /** * lxcSendContainerContinue: - * @vm: pointer to virtual machine structure + * @monitor: FD for communicating with child * * Sends the continue message via the socket pair stored in the vm * structure. * * Returns 0 on success or -1 in case of error */ -static int lxcSendContainerContinue(const lxc_vm_t *vm) +static int lxcSendContainerContinue(virConnectPtr conn, + int monitor) { int rc = -1; lxc_message_t msg = LXC_CONTINUE_MSG; int writeCount = 0; - if (NULL == vm) { - goto error_out; - } - - writeCount = safewrite(vm->sockpair[LXC_PARENT_SOCKET], &msg, - sizeof(msg)); + writeCount = safewrite(monitor, &msg, sizeof(msg)); if (writeCount != sizeof(msg)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, _("unable to send container continue message: %s"), strerror(errno)); goto error_out; @@ -605,12 +601,15 @@ */ static int lxcStartContainer(virConnectPtr conn, lxc_driver_t* driver, - lxc_vm_t *vm) + lxc_vm_t *vm, + int monitor, + char *ttyPath) { int rc = -1; int flags; int stacksize = getpagesize() * 4; char *stack, *stacktop; + lxc_child_argv_t args = { vm->def, monitor, ttyPath }; /* allocate a stack for the container */ if (VIR_ALLOC_N(stack, stacksize) < 0) { @@ -625,7 +624,7 @@ if (vm->def->nets != NULL) flags |= CLONE_NEWNET; - vm->def->id = clone(lxcChild, stacktop, flags, (void *)vm); + vm->def->id = clone(lxcChild, stacktop, flags, &args); DEBUG("clone() returned, %d", vm->def->id); @@ -643,117 +642,9 @@ return rc; } -/** - * lxcPutTtyInRawMode: - * @conn: pointer to connection - * @ttyDev: file descriptor for tty - * - * Sets tty attributes via cfmakeraw() - * - * Returns 0 on success or -1 in case of error - */ -static int lxcPutTtyInRawMode(virConnectPtr conn, int ttyDev) -{ - int rc = -1; - - struct termios ttyAttr; - - if (tcgetattr(ttyDev, &ttyAttr) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "tcgetattr() failed: %s", strerror(errno)); - goto cleanup; - } - - cfmakeraw(&ttyAttr); - - if (tcsetattr(ttyDev, TCSADRAIN, &ttyAttr) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "tcsetattr failed: %s", strerror(errno)); - goto cleanup; - } - - rc = 0; - -cleanup: - return rc; -} /** - * lxcSetupTtyTunnel: - * @conn: pointer to connection - * @vmDef: pointer to virtual machine definition structure - * @ttyDev: pointer to int. On success will be set to fd for master - * end of tty - * - * Opens and configures the parent side tty - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSetupTtyTunnel(virConnectPtr conn, - lxc_vm_def_t *vmDef, - int* ttyDev) -{ - int rc = -1; - char *ptsStr; - - if (0 < strlen(vmDef->tty)) { - *ttyDev = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK); - if (*ttyDev < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "open() tty failed: %s", strerror(errno)); - goto setup_complete; - } - - rc = grantpt(*ttyDev); - if (rc < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "grantpt() failed: %s", strerror(errno)); - goto setup_complete; - } - - rc = unlockpt(*ttyDev); - if (rc < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "unlockpt() failed: %s", strerror(errno)); - goto setup_complete; - } - - /* get the name and print it to stdout */ - ptsStr = ptsname(*ttyDev); - if (ptsStr == NULL) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "ptsname() failed"); - goto setup_complete; - } - /* This value needs to be stored in the container configuration file */ - VIR_FREE(vmDef->tty); - if (!(vmDef->tty = strdup(ptsStr))) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("unable to get storage for vm tty name")); - goto setup_complete; - } - - /* Enter raw mode, so all characters are passed directly to child */ - if (lxcPutTtyInRawMode(conn, *ttyDev) < 0) { - goto setup_complete; - } - - } else { - *ttyDev = -1; - } - - rc = 0; - -setup_complete: - if((0 != rc) && (*ttyDev > 0)) { - close(*ttyDev); - } - - return rc; -} - -/** - * lxcSetupContainerTty: + * lxcOpenTty: * @conn: pointer to connection * @ttymaster: pointer to int. On success, set to fd for master end * @ttyName: On success, will point to string slave end of tty. Caller @@ -763,12 +654,12 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcSetupContainerTty(virConnectPtr conn, - int *ttymaster, - char **ttyName) +static int lxcOpenTty(virConnectPtr conn, + int *ttymaster, + char **ttyName, + int rawmode) { int rc = -1; - char tempTtyName[PATH_MAX]; *ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK); if (*ttymaster < 0) { @@ -783,27 +674,43 @@ goto cleanup; } - if (0 != ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("ptsname_r failed: %s"), strerror(errno)); - goto cleanup; + if (rawmode) { + struct termios ttyAttr; + if (tcgetattr(*ttymaster, &ttyAttr) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "tcgetattr() failed: %s", strerror(errno)); + goto cleanup; + } + + cfmakeraw(&ttyAttr); + + if (tcsetattr(*ttymaster, TCSADRAIN, &ttyAttr) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "tcsetattr failed: %s", strerror(errno)); + goto cleanup; + } } - if (VIR_ALLOC_N(*ttyName, strlen(tempTtyName) + 1) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("unable to allocate container name string")); - goto cleanup; + if (ttyName) { + char tempTtyName[PATH_MAX]; + if (0 != ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("ptsname_r failed: %s"), strerror(errno)); + goto cleanup; + } + + if ((*ttyName = strdup(tempTtyName)) == NULL) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); + goto cleanup; + } } - - strcpy(*ttyName, tempTtyName); rc = 0; cleanup: - if (0 != rc) { - if (-1 != *ttymaster) { - close(*ttymaster); - } + if (rc != 0 && + *ttymaster != -1) { + close(*ttymaster); } return rc; @@ -989,15 +896,18 @@ lxc_vm_t * vm) { int rc = -1; - lxc_vm_def_t *vmDef = vm->def; + int sockpair[2]; + int containerTty, parentTty; + char *containerTtyPath = NULL; /* open parent tty */ - if (lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) { + VIR_FREE(vm->def->tty); + if (lxcOpenTty(conn, &parentTty, &vm->def->tty, 1) < 0) { goto cleanup; } /* open container tty */ - if (lxcSetupContainerTty(conn, &(vm->containerTtyFd), &(vm->containerTty)) < 0) { + if (lxcOpenTty(conn, &containerTty, &containerTtyPath, 0) < 0) { goto cleanup; } @@ -1011,15 +921,15 @@ if (vm->pid == 0) { /* child process calls forward routine */ - lxcTtyForward(vm->parentTty, vm->containerTtyFd); + lxcTtyForward(parentTty, containerTty); } if (lxcStoreTtyPid(driver, vm)) { DEBUG0("unable to store tty pid"); } - close(vm->parentTty); - close(vm->containerTtyFd); + close(parentTty); + close(containerTty); if (0 != (rc = lxcSetupInterfaces(conn, vm))) { goto cleanup; @@ -1027,7 +937,7 @@ /* create a socket pair to send continue message to the container once */ /* we've completed the post clone configuration */ - if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, vm->sockpair)) { + if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, sockpair)) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, _("sockpair failed: %s"), strerror(errno)); goto cleanup; @@ -1035,7 +945,9 @@ /* check this rc */ - rc = lxcStartContainer(conn, driver, vm); + rc = lxcStartContainer(conn, driver, vm, + sockpair[1], + containerTtyPath); if (rc != 0) goto cleanup; @@ -1043,7 +955,7 @@ if (rc != 0) goto cleanup; - rc = lxcSendContainerContinue(vm); + rc = lxcSendContainerContinue(conn, sockpair[0]); if (rc != 0) goto cleanup; @@ -1052,10 +964,9 @@ driver->nactivevms++; cleanup: - close(vm->sockpair[LXC_PARENT_SOCKET]); - vm->sockpair[LXC_PARENT_SOCKET] = -1; - close(vm->sockpair[LXC_CONTAINER_SOCKET]); - vm->sockpair[LXC_CONTAINER_SOCKET] = -1; + close(sockpair[0]); + close(sockpair[1]); + VIR_FREE(containerTtyPath); return rc; } -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

The lxc_driver.c file contains quite a large amount of code, serving two reasonably well separated purposes. First there is the direct implemntation of each of the libvirt driver APIs. Second there is the code to spawn a container and a controller for forwarding I/O to/from the PTYs. This patch attempts to re-arrange the code across files to better reflect the split in functionality. The general idea is thus: - lxc_driver.c: implementation of the libvirt driver APIs - lxc_container.c: code for creating containers - lxc_controller.c: code for managing an active container So this entails the following re-arrangement: - All calls to clone() move into lxc_container.c. In particular there is a lxcContainerAvailable() method to querying capabilities of current kernel, and the lxcContainerStart() method for actually starting a new container. - The I/O forwarding code moves into lxc_controller.c with a lxcControllerMain() function containing the epoll() event loop. The container code previously would pass the 'init' string path for the container's init program to /bin/sh for execution. This is bad because it assumes /bin/sh exists in the container's root, and more seriously does no escaping, so allows arbitrary shell code to run. So this also switches to passing the 'init' string directly to execve(). If we want to support passing args to the container startup program, we need explicit representation of the args in the XML, so we can safely pass them to execve() via the argv[] array. Aside from that, this patch should have no functional change to the way containers run. b/src/lxc_controller.c | 205 +++++++++++++++++++++++++++++ b/src/lxc_controller.h | 33 ++++ src/Makefile.am | 1 src/lxc_container.c | 219 +++++++++++++++++++++++-------- src/lxc_container.h | 19 +- src/lxc_driver.c | 345 ++----------------------------------------------- 6 files changed, 433 insertions(+), 389 deletions(-) Daniel diff -r 985e6f9b7d78 src/Makefile.am --- a/src/Makefile.am Mon Jul 14 17:18:28 2008 +0100 +++ b/src/Makefile.am Mon Jul 14 17:18:49 2008 +0100 @@ -65,6 +65,7 @@ openvz_conf.c openvz_conf.h \ openvz_driver.c openvz_driver.h \ lxc_driver.c lxc_driver.h \ + lxc_controller.c lxc_controller.h \ lxc_conf.c lxc_conf.h \ lxc_container.c lxc_container.h \ veth.c veth.h \ diff -r 985e6f9b7d78 src/lxc_container.c --- a/src/lxc_container.c Mon Jul 14 17:18:28 2008 +0100 +++ b/src/lxc_container.c Mon Jul 14 17:18:49 2008 +0100 @@ -30,6 +30,7 @@ #include <stdlib.h> #include <sys/ioctl.h> #include <sys/mount.h> +#include <sys/wait.h> #include <unistd.h> #include "lxc_container.h" @@ -40,49 +41,69 @@ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) +/* + * GLibc headers are behind the kernel, so we define these + * constants if they're not present already. + */ + +#ifndef CLONE_NEWPID +#define CLONE_NEWPID 0x20000000 +#endif +#ifndef CLONE_NEWUTS +#define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWIPC +#define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 /* New network namespace */ +#endif + +/* messages between parent and container */ +typedef char lxc_message_t; +#define LXC_CONTINUE_MSG 'c' + +typedef struct __lxc_child_argv lxc_child_argv_t; +struct __lxc_child_argv { + lxc_vm_def_t *config; + int monitor; + char *ttyPath; +}; + + /** - * lxcExecContainerInit: + * lxcContainerExecInit: * @vmDef: Ptr to vm definition structure * - * Exec the container init string. The container init will replace then + * Exec the container init string. The container init will replace then * be running in the current process * - * Returns 0 on success or -1 in case of error + * Does not return */ -static int lxcExecContainerInit(const lxc_vm_def_t *vmDef) +static int lxcContainerExecInit(const lxc_vm_def_t *vmDef) { - int rc = -1; - char* execString; - size_t execStringLen = strlen(vmDef->init) + 1 + 5; + const char *const argv[] = { + vmDef->init, + NULL, + }; - if (VIR_ALLOC_N(execString, execStringLen) < 0) { - lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, - _("failed to calloc memory for init string: %s"), - strerror(errno)); - goto error_out; - } - - strcpy(execString, "exec "); - strcat(execString, vmDef->init); - - execl("/bin/sh", "sh", "-c", execString, (char*)NULL); - lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, - _("execl failed to exec init: %s"), strerror(errno)); - -error_out: - exit(rc); + return execve(argv[0], (char **)argv, NULL); } /** - * lxcSetContainerStdio: - * @ttyName: Name of tty to set as the container console + * lxcContainerSetStdio: + * @control: the conrol FD + * @ttyPath: Name of tty to set as the container console * * Sets the given tty as the primary conosole for the container as well as * stdout, stdin and stderr. * * Returns 0 on success or -1 in case of error */ -static int lxcSetContainerStdio(const char *ttyPath) +static int lxcContainerSetStdio(int control, const char *ttyPath) { int rc = -1; int ttyfd; @@ -111,7 +132,7 @@ * close all FDs before executing the container */ open_max = sysconf (_SC_OPEN_MAX); for (i = 0; i < open_max; i++) - if (i != ttyfd) + if (i != ttyfd && i != control) close(i); if (dup2(ttyfd, 0) < 0) { @@ -142,30 +163,38 @@ } /** - * lxcExecWithTty: - * @vm: Ptr to vm structure + * lxcContainerSendContinue: + * @monitor: control FD to child * - * Sets container console and stdio and then execs container init + * Sends the continue message via the socket pair stored in the vm + * structure. * * Returns 0 on success or -1 in case of error */ -static int lxcExecWithTty(lxc_vm_def_t *vmDef, char *ttyPath) +int lxcContainerSendContinue(virConnectPtr conn, + int control) { int rc = -1; + lxc_message_t msg = LXC_CONTINUE_MSG; + int writeCount = 0; - if(lxcSetContainerStdio(ttyPath) < 0) { - goto exit_with_error; + writeCount = safewrite(control, &msg, sizeof(msg)); + if (writeCount != sizeof(msg)) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("unable to send container continue message: %s"), + strerror(errno)); + goto error_out; } - lxcExecContainerInit(vmDef); + rc = 0; -exit_with_error: - exit(rc); +error_out: + return rc; } /** - * lxcWaitForContinue: - * @monitor: monitor FD from parent + * lxcContainerWaitForContinue: + * @control: control FD from parent * * This function will wait for the container continue message from the * parent process. It will send this message on the socket pair stored in @@ -173,12 +202,12 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcWaitForContinue(int monitor) +static int lxcContainerWaitForContinue(int control) { lxc_message_t msg; int readLen; - readLen = saferead(monitor, &msg, sizeof(msg)); + readLen = saferead(control, &msg, sizeof(msg)); if (readLen != sizeof(msg) || msg != LXC_CONTINUE_MSG) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -186,6 +215,7 @@ strerror(errno)); return -1; } + close(control); DEBUG0("Received container continue message"); @@ -200,7 +230,7 @@ * * Returns 0 on success or nonzero in case of error */ -static int lxcEnableInterfaces(const lxc_vm_def_t *def) +static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def) { int rc = 0; const lxc_net_def_t *net; @@ -233,7 +263,7 @@ * * Returns 0 on success or -1 in case of error */ -int lxcChild( void *data ) +static int lxcContainerChild( void *data ) { int rc = -1; lxc_child_argv_t *argv = data; @@ -244,7 +274,7 @@ if (NULL == vmDef) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("lxcChild() passed invalid vm definition")); - goto cleanup; + return -1; } /* handle the bind mounts first before doing anything else that may */ @@ -260,7 +290,7 @@ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to mount %s at %s for container: %s"), curMount->source, curMount->target, strerror(errno)); - goto cleanup; + return -1; } } @@ -270,24 +300,105 @@ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to mount /proc for container: %s"), strerror(errno)); - goto cleanup; + return -1; } + if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0) + return -1; + /* Wait for interface devices to show up */ - if (0 != (rc = lxcWaitForContinue(argv->monitor))) { - goto cleanup; + if (lxcContainerWaitForContinue(argv->monitor) < 0) + return -1; + + /* enable interfaces */ + if (lxcContainerEnableInterfaces(vmDef) < 0) + return -1; + + /* this function will only return if an error occured */ + return lxcContainerExecInit(vmDef); +} + +/** + * lxcContainerStart: + * @conn: pointer to connection + * @driver: pointer to driver structure + * @vm: pointer to virtual machine structure + * + * Starts a container process by calling clone() with the namespace flags + * + * Returns PID of container on success or -1 in case of error + */ +int lxcContainerStart(virConnectPtr conn, + lxc_vm_def_t *def, + int control, + char *ttyPath) +{ + pid_t pid; + int flags; + int stacksize = getpagesize() * 4; + char *stack, *stacktop; + lxc_child_argv_t args = { def, control, ttyPath }; + + /* allocate a stack for the container */ + if (VIR_ALLOC_N(stack, stacksize) < 0) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, + _("unable to allocate container stack")); + return -1; + } + stacktop = stack + stacksize; + + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; + + if (def->nets != NULL) + flags |= CLONE_NEWNET; + + pid = clone(lxcContainerChild, stacktop, flags, &args); + VIR_FREE(stack); + DEBUG("clone() returned, %d", pid); + + if (pid < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("clone() failed, %s"), strerror(errno)); + return -1; } - /* enable interfaces */ - if (0 != (rc = lxcEnableInterfaces(vmDef))) { - goto cleanup; + return pid; +} + +static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED) +{ + _exit(0); +} + +int lxcContainerAvailable(int features) +{ + int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| + CLONE_NEWIPC|SIGCHLD; + int cpid; + char *childStack; + char *stack; + int childStatus; + + if (features & LXC_CONTAINER_FEATURE_NET) + flags |= CLONE_NEWNET; + + if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) { + DEBUG0("Unable to allocate stack"); + return -1; } - rc = lxcExecWithTty(vmDef, argv->ttyPath); - /* this function will only return if an error occured */ + childStack = stack + (getpagesize() * 4); -cleanup: - return rc; + cpid = clone(lxcContainerDummyChild, childStack, flags, NULL); + VIR_FREE(stack); + if ((0 > cpid) && (EINVAL == errno)) { + DEBUG0("clone call returned EINVAL, container support is not enabled"); + return -1; + } else { + waitpid(cpid, &childStatus, 0); + } + + return 0; } #endif /* WITH_LXC */ diff -r 985e6f9b7d78 src/lxc_container.h --- a/src/lxc_container.h Mon Jul 14 17:18:28 2008 +0100 +++ b/src/lxc_container.h Mon Jul 14 17:18:49 2008 +0100 @@ -28,20 +28,19 @@ #ifdef WITH_LXC -typedef struct __lxc_child_argv lxc_child_argv_t; -struct __lxc_child_argv { - lxc_vm_def_t *config; - int monitor; - char *ttyPath; +enum { + LXC_CONTAINER_FEATURE_NET = (1 << 0), }; -/* messages between parent and container */ -typedef char lxc_message_t; -#define LXC_CONTINUE_MSG 'c' +int lxcContainerSendContinue(virConnectPtr conn, + int control); +int lxcContainerStart(virConnectPtr conn, + lxc_vm_def_t *def, + int control, + char *ttyPath); -/* Function declarations */ -int lxcChild( void *argv ); +int lxcContainerAvailable(int features); #endif /* LXC_DRIVER_H */ diff -r 985e6f9b7d78 src/lxc_controller.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lxc_controller.c Mon Jul 14 17:18:49 2008 +0100 @@ -0,0 +1,205 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_controller.c: linux container process controller + * + * Authors: + * David L. Leskovec <dlesko at linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <config.h> + +#ifdef WITH_LXC + +#include <sys/epoll.h> +#include <unistd.h> + +#include "internal.h" +#include "util.h" + +#include "lxc_conf.h" +#include "lxc_controller.h" + + +#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) + +/** + * lxcFdForward: + * @readFd: file descriptor to read + * @writeFd: file desriptor to write + * + * Reads 1 byte of data from readFd and writes to writeFd. + * + * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error + */ +static int lxcFdForward(int readFd, int writeFd) +{ + int rc = -1; + char buf[2]; + + if (1 != (saferead(readFd, buf, 1))) { + if (EAGAIN == errno) { + rc = EAGAIN; + goto cleanup; + } + + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("read of fd %d failed: %s"), readFd, strerror(errno)); + goto cleanup; + } + + if (1 != (safewrite(writeFd, buf, 1))) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("write to fd %d failed: %s"), writeFd, strerror(errno)); + goto cleanup; + } + + rc = 0; + +cleanup: + return rc; +} + +typedef struct _lxcTtyForwardFd_t { + int fd; + int active; +} lxcTtyForwardFd_t; + +/** + * lxcTtyForward: + * @appPty: Open fd for application facing Pty + * @contPty: Open fd for container facing Pty + * + * Forwards traffic between fds. Data read from appPty will be written to contPty + * This process loops forever. + * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP + * events when the user disconnects the virsh console via ctrl-] + * + * Returns 0 on success or -1 in case of error + */ +int lxcControllerMain(int appPty, int contPty) +{ + int rc = -1; + int epollFd; + struct epoll_event epollEvent; + int numEvents; + int numActive = 0; + lxcTtyForwardFd_t fdArray[2]; + int timeout = -1; + int curFdOff = 0; + int writeFdOff = 0; + + fdArray[0].fd = appPty; + fdArray[0].active = 0; + fdArray[1].fd = contPty; + fdArray[1].active = 0; + + /* create the epoll fild descriptor */ + epollFd = epoll_create(2); + if (0 > epollFd) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_create(2) failed: %s"), strerror(errno)); + goto cleanup; + } + + /* add the file descriptors the epoll fd */ + memset(&epollEvent, 0x00, sizeof(epollEvent)); + epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */ + epollEvent.data.fd = appPty; + epollEvent.data.u32 = 0; /* fdArray position */ + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(appPty) failed: %s"), strerror(errno)); + goto cleanup; + } + epollEvent.data.fd = contPty; + epollEvent.data.u32 = 1; /* fdArray position */ + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + + while (1) { + /* if active fd's, return if no events, else wait forever */ + timeout = (numActive > 0) ? 0 : -1; + numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout); + if (0 < numEvents) { + if (epollEvent.events & EPOLLIN) { + curFdOff = epollEvent.data.u32; + if (!fdArray[curFdOff].active) { + fdArray[curFdOff].active = 1; + ++numActive; + } + + } else if (epollEvent.events & EPOLLHUP) { + DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); + continue; + } else { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("error event %d"), epollEvent.events); + goto cleanup; + } + + } else if (0 == numEvents) { + if (2 == numActive) { + /* both fds active, toggle between the two */ + curFdOff ^= 1; + } else { + /* only one active, if current is active, use it, else it */ + /* must be the other one (ie. curFd just went inactive) */ + curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1; + } + + } else { + if (EINTR == errno) { + continue; + } + + /* error */ + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_wait() failed: %s"), strerror(errno)); + goto cleanup; + + } + + if (0 < numActive) { + writeFdOff = curFdOff ^ 1; + rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd); + + if (EAGAIN == rc) { + /* this fd no longer has data, set it as inactive */ + --numActive; + fdArray[curFdOff].active = 0; + } else if (-1 == rc) { + goto cleanup; + } + + } + + } + + rc = 0; + +cleanup: + close(appPty); + close(contPty); + close(epollFd); + return rc; +} + +#endif diff -r 985e6f9b7d78 src/lxc_controller.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lxc_controller.h Mon Jul 14 17:18:49 2008 +0100 @@ -0,0 +1,33 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_controller.h: linux container process controller + * + * Authors: + * David L. Leskovec <dlesko at linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LXC_CONTROLLER_H +#define LXC_CONTROLLER_H + +#ifdef WITH_LXC + +int lxcControllerMain(int appPty, int contPty); + +#endif /* WITH_LXC */ + +#endif /* LXC_CONTROLLER_H */ diff -r 985e6f9b7d78 src/lxc_driver.c --- a/src/lxc_driver.c Mon Jul 14 17:18:28 2008 +0100 +++ b/src/lxc_driver.c Mon Jul 14 17:18:49 2008 +0100 @@ -26,7 +26,6 @@ #ifdef WITH_LXC #include <fcntl.h> -#include <sys/epoll.h> #include <sched.h> #include <sys/utsname.h> #include <stdbool.h> @@ -39,6 +38,7 @@ #include "lxc_conf.h" #include "lxc_container.h" #include "lxc_driver.h" +#include "lxc_controller.h" #include "driver.h" #include "internal.h" #include "memory.h" @@ -52,77 +52,19 @@ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) -/* - * GLibc headers are behind the kernel, so we define these - * constants if they're not present already. - */ - -#ifndef CLONE_NEWPID -#define CLONE_NEWPID 0x20000000 -#endif -#ifndef CLONE_NEWUTS -#define CLONE_NEWUTS 0x04000000 -#endif -#ifndef CLONE_NEWUSER -#define CLONE_NEWUSER 0x10000000 -#endif -#ifndef CLONE_NEWIPC -#define CLONE_NEWIPC 0x08000000 -#endif -#ifndef CLONE_NEWNET -#define CLONE_NEWNET 0x40000000 /* New network namespace */ -#endif static int lxcStartup(void); static int lxcShutdown(void); static lxc_driver_t *lxc_driver = NULL; /* Functions */ -static int lxcDummyChild( void *argv ATTRIBUTE_UNUSED ) -{ - exit(0); -} - -static int lxcCheckContainerSupport(int extra_flags) -{ - int rc = 0; - int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| - CLONE_NEWIPC|SIGCHLD|extra_flags; - int cpid; - char *childStack; - char *stack; - int childStatus; - - if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) { - DEBUG0("Unable to allocate stack"); - rc = -1; - goto check_complete; - } - - childStack = stack + (getpagesize() * 4); - - cpid = clone(lxcDummyChild, childStack, flags, NULL); - if ((0 > cpid) && (EINVAL == errno)) { - DEBUG0("clone call returned EINVAL, container support is not enabled"); - rc = -1; - } else { - waitpid(cpid, &childStatus, 0); - } - - VIR_FREE(stack); - -check_complete: - return rc; -} static const char *lxcProbe(void) { -#ifdef __linux__ - if (0 == lxcCheckContainerSupport(0)) { - return("lxc:///"); - } -#endif - return(NULL); + if (lxcContainerAvailable(0) < 0) + return NULL; + + return("lxc:///"); } static virDrvOpenStatus lxcOpen(virConnectPtr conn, @@ -559,89 +501,6 @@ return 0; } -/** - * lxcSendContainerContinue: - * @monitor: FD for communicating with child - * - * Sends the continue message via the socket pair stored in the vm - * structure. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSendContainerContinue(virConnectPtr conn, - int monitor) -{ - int rc = -1; - lxc_message_t msg = LXC_CONTINUE_MSG; - int writeCount = 0; - - writeCount = safewrite(monitor, &msg, sizeof(msg)); - if (writeCount != sizeof(msg)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("unable to send container continue message: %s"), - strerror(errno)); - goto error_out; - } - - rc = 0; - -error_out: - return rc; -} - -/** - * lxcStartContainer: - * @conn: pointer to connection - * @driver: pointer to driver structure - * @vm: pointer to virtual machine structure - * - * Starts a container process by calling clone() with the namespace flags - * - * Returns 0 on success or -1 in case of error - */ -static int lxcStartContainer(virConnectPtr conn, - lxc_driver_t* driver, - lxc_vm_t *vm, - int monitor, - char *ttyPath) -{ - int rc = -1; - int flags; - int stacksize = getpagesize() * 4; - char *stack, *stacktop; - lxc_child_argv_t args = { vm->def, monitor, ttyPath }; - - /* allocate a stack for the container */ - if (VIR_ALLOC_N(stack, stacksize) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("unable to allocate container stack")); - goto error_exit; - } - stacktop = stack + stacksize; - - flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; - - if (vm->def->nets != NULL) - flags |= CLONE_NEWNET; - - vm->def->id = clone(lxcChild, stacktop, flags, &args); - - DEBUG("clone() returned, %d", vm->def->id); - - if (vm->def->id < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("clone() failed, %s"), strerror(errno)); - goto error_exit; - } - - lxcSaveConfig(NULL, driver, vm, vm->def); - - rc = 0; - -error_exit: - return rc; -} - /** * lxcOpenTty: @@ -716,170 +575,6 @@ return rc; } -/** - * lxcFdForward: - * @readFd: file descriptor to read - * @writeFd: file desriptor to write - * - * Reads 1 byte of data from readFd and writes to writeFd. - * - * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error - */ -static int lxcFdForward(int readFd, int writeFd) -{ - int rc = -1; - char buf[2]; - - if (1 != (saferead(readFd, buf, 1))) { - if (EAGAIN == errno) { - rc = EAGAIN; - goto cleanup; - } - - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("read of fd %d failed: %s"), readFd, strerror(errno)); - goto cleanup; - } - - if (1 != (safewrite(writeFd, buf, 1))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("write to fd %d failed: %s"), writeFd, strerror(errno)); - goto cleanup; - } - - rc = 0; - -cleanup: - return rc; -} - -typedef struct _lxcTtyForwardFd_t { - int fd; - bool active; -} lxcTtyForwardFd_t; - -/** - * lxcTtyForward: - * @fd1: Open fd - * @fd1: Open fd - * - * Forwards traffic between fds. Data read from fd1 will be written to fd2 - * This process loops forever. - * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP - * events when the user disconnects the virsh console via ctrl-] - * - * Returns 0 on success or -1 in case of error - */ -static int lxcTtyForward(int fd1, int fd2) -{ - int rc = -1; - int epollFd; - struct epoll_event epollEvent; - int numEvents; - int numActive = 0; - lxcTtyForwardFd_t fdArray[2]; - int timeout = -1; - int curFdOff = 0; - int writeFdOff = 0; - - fdArray[0].fd = fd1; - fdArray[0].active = false; - fdArray[1].fd = fd2; - fdArray[1].active = false; - - /* create the epoll fild descriptor */ - epollFd = epoll_create(2); - if (0 > epollFd) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_create(2) failed: %s"), strerror(errno)); - goto cleanup; - } - - /* add the file descriptors the epoll fd */ - memset(&epollEvent, 0x00, sizeof(epollEvent)); - epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */ - epollEvent.data.fd = fd1; - epollEvent.data.u32 = 0; /* fdArray position */ - if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd1, &epollEvent)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_ctl(fd1) failed: %s"), strerror(errno)); - goto cleanup; - } - epollEvent.data.fd = fd2; - epollEvent.data.u32 = 1; /* fdArray position */ - if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd2, &epollEvent)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_ctl(fd2) failed: %s"), strerror(errno)); - goto cleanup; - } - - while (1) { - /* if active fd's, return if no events, else wait forever */ - timeout = (numActive > 0) ? 0 : -1; - numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout); - if (0 < numEvents) { - if (epollEvent.events & EPOLLIN) { - curFdOff = epollEvent.data.u32; - if (!fdArray[curFdOff].active) { - fdArray[curFdOff].active = true; - ++numActive; - } - - } else if (epollEvent.events & EPOLLHUP) { - DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); - continue; - } else { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("error event %d"), epollEvent.events); - goto cleanup; - } - - } else if (0 == numEvents) { - if (2 == numActive) { - /* both fds active, toggle between the two */ - curFdOff ^= 1; - } else { - /* only one active, if current is active, use it, else it */ - /* must be the other one (ie. curFd just went inactive) */ - curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1; - } - - } else { - if (EINTR == errno) { - continue; - } - - /* error */ - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("epoll_wait() failed: %s"), strerror(errno)); - goto cleanup; - - } - - if (0 < numActive) { - writeFdOff = curFdOff ^ 1; - rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd); - - if (EAGAIN == rc) { - /* this fd no longer has data, set it as inactive */ - --numActive; - fdArray[curFdOff].active = false; - } else if (-1 == rc) { - goto cleanup; - } - - } - - } - - rc = 0; - -cleanup: - close(fd1); - close(fd2); - close(epollFd); - exit(rc); -} /** * lxcVmStart: @@ -921,7 +616,7 @@ if (vm->pid == 0) { /* child process calls forward routine */ - lxcTtyForward(parentTty, containerTty); + lxcControllerMain(parentTty, containerTty); } if (lxcStoreTtyPid(driver, vm)) { @@ -945,17 +640,19 @@ /* check this rc */ - rc = lxcStartContainer(conn, driver, vm, - sockpair[1], - containerTtyPath); - if (rc != 0) + vm->def->id = lxcContainerStart(conn, + vm->def, + sockpair[1], + containerTtyPath); + if (vm->def->id == -1) goto cleanup; + lxcSaveConfig(conn, driver, vm, vm->def); rc = lxcMoveInterfacesToNetNs(conn, vm); if (rc != 0) goto cleanup; - rc = lxcSendContainerContinue(conn, sockpair[0]); + rc = lxcContainerSendContinue(conn, sockpair[0]); if (rc != 0) goto cleanup; @@ -1196,16 +893,15 @@ { const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL}; int ip_rc; - int user_netns = 0; - int kern_netns = 0; - if (virRun(NULL, (char **)argv, &ip_rc) == 0) - user_netns = WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255); + if (virRun(NULL, (char **)argv, &ip_rc) < 0 || + !(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255))) + return 0; - if (lxcCheckContainerSupport(CLONE_NEWNET) == 0) - kern_netns = 1; + if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0) + return 0; - return kern_netns && user_netns; + return 1; } static int lxcStartup(void) @@ -1222,9 +918,8 @@ } /* Check that this is a container enabled kernel */ - if(0 != lxcCheckContainerSupport(0)) { + if(lxcContainerAvailable(0) < 0) return -1; - } lxc_driver->have_netns = lxcCheckNetNsSupport(); -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Every LXC container has 2 processes, the leader of the actual container and a helper process to do the I/O forwarding. The LXC driver is written to allow libvirtd to be restarted without needing to shutdown the active containers. For this it uses a PID file in /var/run/libvirt/lxc/NAME.pid The driver also uses SIGCHLD to detect when a container has terminated. Here-in lies the problem - if you restart libvirtd, the container process gets re-parented to init, and so libvirtd will never get any further SIGCHLD signals for it. This patch attempts to address this problem by changing the relationship between libvirtd and the container processes. Instad of there being two processes which are siblings, the I/O process becomes the parent of the actual container. So the general idea is: - libvirtd LXC driver spawns a 'controller' process - this immediately double-forks itself into the background, making itself process leader, changing its root directory to /, and redirecting its stdin to /dev/null and its stdout/err to /var/log/libvirt/lxc/NAME.log. - The 'controller' process inherits a UNIX domain socket from libvirtd which has had bind() called against /var/run/libvirt/lxc/NAME.sock - Once it has backgrounded itself, the controller calls accept() on the socket, blocking until a client connects. If this fails for any reason the controller exits. - Immediately after forking the 'controller' process, the libvirtd LXC driver, calls connect() on /var/run/libvirt/lxc/NAME.sock. And does a blocking read of sizeof(pid_t) bytes. - Upon getting a client connection 'controller' writes sizeof(pid_t) bytes containing its PID. Again if this fails, it exits. At this point in time, the libvirtd LXC driver knows what the controller process' PID is. This becomes the 'ID' of the virDomain object associated with this. If anything goes wrong from here-on, the libvirtd LXC driver also now knows what PID it has to kill off. The UNIX socket to the controller process is kept open, and registered with the libvirtd event loop for POLLHUP events. This means the LXC driver can get notification when the controller terminates, without needing to rely on SIGCHLD events. - Now the 'controller' has told libvirtd what its PID is, it goes ahead and starts the real 'container' process. - When the 'container' is up and running, the controller goes into the event loop where it handles I/O from the PTYs. It also keeps an eye out for SIGHUP on the client socket to the libvirtd daemon. - If the client goes away, it'll know it needs to accept() a new client (ie the libvirtd daemon starting up again). Upon a new client connecting it'll do the PID handshake again, so that libvirtd knows what the container ID is again. - When libvirtd starts up, it reads the container configs from /etc/libvirt/lxc and for each one, tries to connect to /var/run/libvirt/lxc/NAME.sock. This lets it figure out which containers are running. Notice that throughout this, libvirtd's LXC driver doesn't need to know the PID of the actual container process - only that of the controller process. Think of the controller as serving the equivalent role of QEMU in context of KVM. QEMU provides the backend device model for KVM. When QEMU dies, the guest domain goes away. Well the controller provides the 'backend' device model for the container. - though in this case it really only the text console backend. When the controller dies, the container goes away. So architecturally the LXC driver is now very closely aligned with the QEMU driver. The main differences are that LXC can handle libvirtd restarts, and that the LXC driver simply forks() the controller. One could imagine these distinctions going away - the QEMU driver can get a restart capability in much the same way as the LXC restart works. We may want to make the controller process a proper standalone binary which can be directly exec'd, rather than just forked off from libvirtd. lxc_conf.c | 194 +------------------- lxc_conf.h | 12 - lxc_container.c | 36 +-- lxc_container.h | 8 lxc_controller.c | 305 +++++++++++++++++++++++++++++-- lxc_controller.h | 11 + lxc_driver.c | 532 ++++++++++++++++++++++--------------------------------- util.c | 53 +++++ util.h | 4 9 files changed, 609 insertions(+), 546 deletions(-) Daniel diff -r e5aefa90abaf src/lxc_conf.c --- a/src/lxc_conf.c Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_conf.c Mon Jul 14 21:03:45 2008 +0100 @@ -833,25 +833,24 @@ strncpy(vm->configFileBase, file, PATH_MAX); vm->configFile[PATH_MAX-1] = '\0'; - if (lxcLoadTtyPid(driver, vm) < 0) { - DEBUG0("failed to load tty pid"); - } - return vm; } int lxcLoadDriverConfig(lxc_driver_t *driver) { /* Set the container configuration directory */ - driver->configDir = strdup(SYSCONF_DIR "/libvirt/lxc"); - if (NULL == driver->configDir) { - lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, "configDir"); - return -1; - } - - driver->stateDir = strdup(LOCAL_STATE_DIR "/run/libvirt/lxc"); + if ((driver->configDir = strdup(SYSCONF_DIR "/libvirt/lxc")) == NULL) + goto no_memory; + if ((driver->stateDir = strdup(LOCAL_STATE_DIR "/run/libvirt/lxc")) == NULL) + goto no_memory; + if ((driver->logDir = strdup(LOCAL_STATE_DIR "/log/libvirt/lxc")) == NULL) + goto no_memory; return 0; + +no_memory: + lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, "configDir"); + return -1; } int lxcLoadContainerConfigFile(lxc_driver_t *driver, @@ -1012,9 +1011,7 @@ curNet = vmdef->nets; while (curNet) { nextNet = curNet->next; - printf("Freeing %s:%s\n", curNet->parentVeth, curNet->containerVeth); VIR_FREE(curNet->parentVeth); - VIR_FREE(curNet->containerVeth); VIR_FREE(curNet->txName); VIR_FREE(curNet); curNet = nextNet; @@ -1106,176 +1103,5 @@ return 0; } -/** - * lxcStoreTtyPid: - * @driver: pointer to driver - * @vm: Ptr to VM - * - * Stores the pid of the tty forward process contained in vm->pid - * LOCAL_STATE_DIR/run/libvirt/lxc/{container_name}.pid - * - * Returns 0 on success or -1 in case of error - */ -int lxcStoreTtyPid(const lxc_driver_t *driver, lxc_vm_t *vm) -{ - int rc = -1; - int fd; - FILE *file = NULL; - - if (vm->ttyPidFile[0] == 0x00) { - if ((rc = virFileMakePath(driver->stateDir))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot create lxc state directory %s: %s"), - driver->stateDir, strerror(rc)); - goto error_out; - } - - if (virFileBuildPath(driver->stateDir, vm->def->name, ".pid", - vm->ttyPidFile, PATH_MAX) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot construct tty pid file path")); - goto error_out; - } - } - - if ((fd = open(vm->ttyPidFile, - O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR)) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot create tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - goto error_out; - } - - if (!(file = fdopen(fd, "w"))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot fdopen tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - - if (close(fd) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to close tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - } - - goto error_out; - } - - if (fprintf(file, "%d", vm->pid) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot write tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - - goto fclose_error_out; - } - - rc = 0; - -fclose_error_out: - if (fclose(file) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to close tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - } - -error_out: - return rc; -} - -/** - * lxcLoadTtyPid: - * @driver: pointer to driver - * @vm: Ptr to VM - * - * Loads the pid of the tty forward process from the pid file. - * LOCAL_STATE_DIR/run/libvirt/lxc/{container_name}.pid - * - * Returns - * > 0 - pid of tty process - * 0 - no tty pid file - * -1 - error - */ -int lxcLoadTtyPid(const lxc_driver_t *driver, lxc_vm_t *vm) -{ - int rc = -1; - FILE *file; - - if (vm->ttyPidFile[0] == 0x00) { - if ((rc = virFileMakePath(driver->stateDir))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot create lxc state directory %s: %s"), - driver->stateDir, strerror(rc)); - goto cleanup; - } - - if (virFileBuildPath(driver->stateDir, vm->def->name, ".pid", - vm->ttyPidFile, PATH_MAX) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot construct tty pid file path")); - goto cleanup; - } - } - - if (!(file = fopen(vm->ttyPidFile, "r"))) { - if (ENOENT == errno) { - rc = 0; - goto cleanup; - } - - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot open tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - goto cleanup; - } - - if (fscanf(file, "%d", &(vm->pid)) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot read tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - goto cleanup; - } - - if (fclose(file) < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to close tty pid file %s: %s"), - vm->ttyPidFile, strerror(errno)); - goto cleanup; - } - - rc = vm->pid; - - cleanup: - return rc; -} - -/** - * lxcDeleteTtyPid: - * @vm: Ptr to VM - * - * Unlinks the tty pid file for the vm - * LOCAL_STATE_DIR/run/libvirt/lxc/{container_name}.pid - * - * Returns on 0 success or -1 in case of error - */ -int lxcDeleteTtyPidFile(const lxc_vm_t *vm) -{ - if (vm->ttyPidFile[0] == 0x00) { - goto no_file; - } - - if (unlink(vm->ttyPidFile) < 0) { - if (errno == ENOENT) { - goto no_file; - } - - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot remove ttyPidFile %s: %s"), vm->ttyPidFile, - strerror(errno)); - return -1; - } - -no_file: - return 0; -} #endif /* WITH_LXC */ diff -r e5aefa90abaf src/lxc_conf.h --- a/src/lxc_conf.h Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_conf.h Mon Jul 14 21:03:45 2008 +0100 @@ -46,7 +46,6 @@ struct __lxc_net_def { int type; char *parentVeth; /* veth device in parent namespace */ - char *containerVeth; /* veth device in container namespace */ char *txName; /* bridge or network name */ lxc_net_def_t *next; @@ -87,11 +86,10 @@ struct __lxc_vm { int pid; int state; + int monitor; char configFile[PATH_MAX]; char configFileBase[PATH_MAX]; - - char ttyPidFile[PATH_MAX]; lxc_vm_def_t *def; @@ -103,8 +101,9 @@ lxc_vm_t *vms; int nactivevms; int ninactivevms; - char* configDir; - char* stateDir; + char *configDir; + char *stateDir; + char *logDir; int have_netns; }; @@ -154,9 +153,6 @@ lxc_driver_t *driver, const char *configFile, const char *name); -int lxcStoreTtyPid(const lxc_driver_t *driver, lxc_vm_t *vm); -int lxcLoadTtyPid(const lxc_driver_t *driver, lxc_vm_t *vm); -int lxcDeleteTtyPidFile(const lxc_vm_t *vm); void lxcError(virConnectPtr conn, virDomainPtr dom, diff -r e5aefa90abaf src/lxc_container.c --- a/src/lxc_container.c Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_container.c Mon Jul 14 21:03:45 2008 +0100 @@ -69,6 +69,8 @@ typedef struct __lxc_child_argv lxc_child_argv_t; struct __lxc_child_argv { lxc_vm_def_t *config; + int nveths; + char **veths; int monitor; char *ttyPath; }; @@ -171,8 +173,7 @@ * * Returns 0 on success or -1 in case of error */ -int lxcContainerSendContinue(virConnectPtr conn, - int control) +int lxcContainerSendContinue(int control) { int rc = -1; lxc_message_t msg = LXC_CONTINUE_MSG; @@ -180,7 +181,7 @@ writeCount = safewrite(control, &msg, sizeof(msg)); if (writeCount != sizeof(msg)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("unable to send container continue message: %s"), strerror(errno)); goto error_out; @@ -230,21 +231,21 @@ * * Returns 0 on success or nonzero in case of error */ -static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def) +static int lxcContainerEnableInterfaces(int nveths, + char **veths) { - int rc = 0; - const lxc_net_def_t *net; + int rc = 0, i; - for (net = def->nets; net; net = net->next) { - DEBUG("Enabling %s", net->containerVeth); - rc = vethInterfaceUpOrDown(net->containerVeth, 1); + for (i = 0 ; i < nveths ; i++) { + DEBUG("Enabling %s", veths[i]); + rc = vethInterfaceUpOrDown(veths[i], 1); if (0 != rc) { goto error_out; } } /* enable lo device only if there were other net devices */ - if (def->nets) + if (veths) rc = vethInterfaceUpOrDown("lo", 1); error_out: @@ -311,7 +312,7 @@ return -1; /* enable interfaces */ - if (lxcContainerEnableInterfaces(vmDef) < 0) + if (lxcContainerEnableInterfaces(argv->nveths, argv->veths) < 0) return -1; /* this function will only return if an error occured */ @@ -320,7 +321,6 @@ /** * lxcContainerStart: - * @conn: pointer to connection * @driver: pointer to driver structure * @vm: pointer to virtual machine structure * @@ -328,8 +328,9 @@ * * Returns PID of container on success or -1 in case of error */ -int lxcContainerStart(virConnectPtr conn, - lxc_vm_def_t *def, +int lxcContainerStart(lxc_vm_def_t *def, + int nveths, + char **veths, int control, char *ttyPath) { @@ -337,12 +338,11 @@ int flags; int stacksize = getpagesize() * 4; char *stack, *stacktop; - lxc_child_argv_t args = { def, control, ttyPath }; + lxc_child_argv_t args = { def, nveths, veths, control, ttyPath }; /* allocate a stack for the container */ if (VIR_ALLOC_N(stack, stacksize) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("unable to allocate container stack")); + lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL); return -1; } stacktop = stack + stacksize; @@ -357,7 +357,7 @@ DEBUG("clone() returned, %d", pid); if (pid < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("clone() failed, %s"), strerror(errno)); return -1; } diff -r e5aefa90abaf src/lxc_container.h --- a/src/lxc_container.h Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_container.h Mon Jul 14 21:03:45 2008 +0100 @@ -32,11 +32,11 @@ LXC_CONTAINER_FEATURE_NET = (1 << 0), }; -int lxcContainerSendContinue(virConnectPtr conn, - int control); +int lxcContainerSendContinue(int control); -int lxcContainerStart(virConnectPtr conn, - lxc_vm_def_t *def, +int lxcContainerStart(lxc_vm_def_t *def, + int nveths, + char **veths, int control, char *ttyPath); diff -r e5aefa90abaf src/lxc_controller.c --- a/src/lxc_controller.c Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_controller.c Mon Jul 14 21:03:45 2008 +0100 @@ -26,16 +26,35 @@ #ifdef WITH_LXC #include <sys/epoll.h> +#include <sys/wait.h> +#include <sys/socket.h> #include <unistd.h> +#include <paths.h> +#include <fcntl.h> #include "internal.h" #include "util.h" #include "lxc_conf.h" +#include "lxc_container.h" #include "lxc_controller.h" +#include "veth.h" +#include "memory.h" +#include "util.h" #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) +#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) + +pid_t lxcControllerPID(int monitor) +{ + pid_t pid; + + if (saferead(monitor, &pid, sizeof(pid)) != sizeof(pid)) + return -1; + + return pid; +} /** * lxcFdForward: @@ -91,7 +110,10 @@ * * Returns 0 on success or -1 in case of error */ -int lxcControllerMain(int appPty, int contPty) +static int lxcControllerMain(int monitor, + int client, + int appPty, + int contPty) { int rc = -1; int epollFd; @@ -120,15 +142,29 @@ memset(&epollEvent, 0x00, sizeof(epollEvent)); epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */ epollEvent.data.fd = appPty; - epollEvent.data.u32 = 0; /* fdArray position */ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("epoll_ctl(appPty) failed: %s"), strerror(errno)); goto cleanup; } epollEvent.data.fd = contPty; - epollEvent.data.u32 = 1; /* fdArray position */ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + + epollEvent.events = EPOLLIN; + epollEvent.data.fd = monitor; + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, monitor, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + + epollEvent.events = EPOLLHUP; + epollEvent.data.fd = client; + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("epoll_ctl(contPty) failed: %s"), strerror(errno)); goto cleanup; @@ -138,23 +174,52 @@ /* if active fd's, return if no events, else wait forever */ timeout = (numActive > 0) ? 0 : -1; numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout); - if (0 < numEvents) { - if (epollEvent.events & EPOLLIN) { - curFdOff = epollEvent.data.u32; - if (!fdArray[curFdOff].active) { - fdArray[curFdOff].active = 1; - ++numActive; + if (numEvents > 0) { + fprintf(stderr, "%d %d\n", epollEvent.data.fd, epollEvent.events); + if (epollEvent.data.fd == monitor) { + int fd = accept(monitor, NULL, 0); + pid_t pid = getpid(); + if (client != -1) { + close(fd); + continue; } - - } else if (epollEvent.events & EPOLLHUP) { - DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); - continue; + /* Sync up with libvirtd LXC driver to tell it our PID */ + if (safewrite(fd, &pid, sizeof(pid)) != sizeof(pid)) { + close(fd); + continue; + } + client = fd; + epollEvent.events = EPOLLHUP; + epollEvent.data.fd = client; + if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + } else if (client != -1 && epollEvent.data.fd == client) { + if (0 > epoll_ctl(epollFd, EPOLL_CTL_DEL, client, &epollEvent)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("epoll_ctl(contPty) failed: %s"), strerror(errno)); + goto cleanup; + } + close(client); + client = -1; } else { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("error event %d"), epollEvent.events); - goto cleanup; + if (epollEvent.events & EPOLLIN) { + curFdOff = epollEvent.data.fd == appPty ? 0 : 1; + if (!fdArray[curFdOff].active) { + fdArray[curFdOff].active = 1; + ++numActive; + } + } else if (epollEvent.events & EPOLLHUP) { + DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); + continue; + } else { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("error event %d"), epollEvent.events); + goto cleanup; + } } - } else if (0 == numEvents) { if (2 == numActive) { /* both fds active, toggle between the two */ @@ -202,4 +267,210 @@ return rc; } + + +/** + * lxcControllerMoveInterfaces + * @nveths: number of interfaces + * @veths: interface names + * @container: pid of container + * + * Moves network interfaces into a container's namespace + * + * Returns 0 on success or -1 in case of error + */ +static int lxcControllerMoveInterfaces(int nveths, + char **veths, + pid_t container) +{ + int i; + for (i = 0 ; i < nveths ; i++) + if (moveInterfaceToNetNs(veths[i], container) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to move interface %s to ns %d"), + veths[i], container); + return -1; + } + + return 0; +} + + +/** + * lxcCleanupInterfaces: + * @conn: pointer to connection + * @vm: pointer to virtual machine structure + * + * Cleans up the container interfaces by deleting the veth device pairs. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcControllerCleanupInterfaces(int nveths, + char **veths) +{ + int i; + for (i = 0 ; i < nveths ; i++) + if (vethDelete(veths[i]) < 0) + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to delete veth: %s"), veths[i]); + /* will continue to try to cleanup any other interfaces */ + + return 0; +} + + +static int +lxcControllerRun(lxc_vm_def_t *def, + int nveths, + char **veths, + int monitor, + int client, + int appPty) +{ + int rc = -1; + int control[2] = { -1, -1}; + int containerPty; + char *containerPtyPath; + pid_t container = -1; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("sockpair failed: %s"), strerror(errno)); + goto cleanup; + } + + if (virFileOpenTty(&containerPty, + &containerPtyPath, + 0) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to allocate tty: %s"), strerror(errno)); + goto cleanup; + } + + if ((container = lxcContainerStart(def, + nveths, + veths, + control[1], + containerPtyPath)) < 0) + goto cleanup; + close(control[1]); + control[1] = -1; + + if (lxcControllerMoveInterfaces(nveths, veths, container) < 0) + goto cleanup; + + if (lxcContainerSendContinue(control[0]) < 0) + goto cleanup; + + rc = lxcControllerMain(monitor, client, appPty, containerPty); + +cleanup: + if (control[0] != -1) + close(control[0]); + if (control[1] != -1) + close(control[1]); + VIR_FREE(containerPtyPath); + if (containerPty != -1) + close(containerPty); + + kill(container, SIGTERM); + waitpid(container, NULL, 0); + lxcControllerCleanupInterfaces(nveths, veths); + + return -1; +} + + +int lxcControllerStart(lxc_vm_def_t *def, + int nveths, + char **veths, + int monitor, + int appPty, + const char *logfile) +{ + pid_t pid; + int rc; + int status, logfd, null; + int open_max, i; + int client; + + if ((pid = fork()) < 0) + return -1; + + if (pid > 0) { + /* Original caller waits for first child to exit */ + while (1) { + rc = waitpid(pid, &status, 0); + if (rc < 0) { + if (errno == EINTR) + continue; + return -1; + } + if (rc != pid) + return -1; + if (WIFEXITED(status) && + WEXITSTATUS(status) == 0) + return 0; + else + return -1; + } + } + + /* First child is running here */ + + if (chdir("/") < 0) + _exit(-1); + + if (setsid() < 0) + _exit(-1); + + if ((null = open(_PATH_DEVNULL, O_RDONLY)) < 0) + _exit(-1); + + if ((logfd = open(logfile, O_WRONLY | O_TRUNC)) < 0) + _exit(-1); + + open_max = sysconf (_SC_OPEN_MAX); + for (i = 0; i < open_max; i++) + if (i != appPty && + i != monitor && + i != logfd && + i != null) + close(i); + + if (dup2(null, STDIN_FILENO) < 0 || + dup2(logfd, STDOUT_FILENO) < 0 || + dup2(logfd, STDERR_FILENO) < 0) + _exit(-1); + + close(null); + close(logfd); + + /* Now fork the real controller process */ + if ((pid = fork()) < 0) + _exit(-1); + + if (pid > 0) { + /* First child now exits */ + _exit(0); + } + + /* This is real controller running... */ + pid = getpid(); + + /* Accept initial client which is the libvirtd daemon */ + if ((client = accept(monitor, NULL, 0)) < 0) + _exit(-1); + + /* Sync up with libvirtd LXC driver to tell it our PID */ + if (safewrite(client, &pid, sizeof(pid)) != sizeof(pid)) + _exit(-1); + + /* Controlling libvirtd LXC driver now knows + what our PID is, and is able to cleanup after + us from now on */ + _exit(lxcControllerRun(def, nveths, veths, monitor, client, appPty)); +} + + #endif diff -r e5aefa90abaf src/lxc_controller.h --- a/src/lxc_controller.h Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_controller.h Mon Jul 14 21:03:45 2008 +0100 @@ -26,7 +26,16 @@ #ifdef WITH_LXC -int lxcControllerMain(int appPty, int contPty); +#include "lxc_conf.h" + +pid_t lxcControllerPID(int monitor); + +int lxcControllerStart(lxc_vm_def_t *def, + int nveths, + char **veths, + int monitor, + int appPty, + const char *logfile); #endif /* WITH_LXC */ diff -r e5aefa90abaf src/lxc_driver.c --- a/src/lxc_driver.c Mon Jul 14 17:18:49 2008 +0100 +++ b/src/lxc_driver.c Mon Jul 14 21:03:45 2008 +0100 @@ -31,21 +31,19 @@ #include <stdbool.h> #include <string.h> #include <sys/types.h> -#include <termios.h> +#include <sys/socket.h> +#include <sys/un.h> #include <unistd.h> #include <wait.h> +#include "internal.h" #include "lxc_conf.h" #include "lxc_container.h" #include "lxc_driver.h" #include "lxc_controller.h" -#include "driver.h" -#include "internal.h" #include "memory.h" #include "util.h" -#include "memory.h" #include "bridge.h" -#include "qemu_conf.h" #include "veth.h" /* debug macros */ @@ -284,8 +282,6 @@ vm->configFile[0] = '\0'; - lxcDeleteTtyPidFile(vm); - lxcRemoveInactiveVM(driver, vm); return 0; @@ -339,10 +335,51 @@ return lxcGenerateXML(dom->conn, driver, vm, vm->def); } + +/** + * lxcVmCleanup: + * @vm: Ptr to VM to clean up + * + * waitpid() on the container process. kill and wait the tty process + * This is called by boh lxcDomainDestroy and lxcSigHandler when a + * container exits. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcVMCleanup(lxc_driver_t *driver, lxc_vm_t * vm) +{ + int rc = -1; + int waitRc; + int childStatus = -1; + + while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) && + errno == EINTR); + + if ((waitRc != vm->pid) && (errno != ECHILD)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("waitpid failed to wait for container %d: %d %s"), + vm->pid, waitRc, strerror(errno)); + } + + rc = 0; + + if (WIFEXITED(childStatus)) { + rc = WEXITSTATUS(childStatus); + DEBUG("container exited with rc: %d", rc); + } + + vm->state = VIR_DOMAIN_SHUTOFF; + vm->pid = -1; + vm->def->id = -1; + driver->nactivevms--; + driver->ninactivevms++; + + return rc; +} + /** * lxcSetupInterfaces: - * @conn: pointer to connection - * @vm: pointer to virtual machine structure + * @def: pointer to virtual machine structure * * Sets up the container interfaces by creating the veth device pairs and * attaching the parent end to the appropriate bridge. The container end @@ -351,24 +388,21 @@ * Returns 0 on success or -1 in case of error */ static int lxcSetupInterfaces(virConnectPtr conn, - lxc_vm_t *vm) + lxc_vm_def_t *def, + int *nveths, + char ***veths) { int rc = -1; - lxc_driver_t *driver = conn->privateData; - struct qemud_driver *networkDriver = - (struct qemud_driver *)(conn->networkPrivateData); - lxc_net_def_t *net = vm->def->nets; - char* bridge; + lxc_net_def_t *net; + char *bridge = NULL; char parentVeth[PATH_MAX] = ""; char containerVeth[PATH_MAX] = ""; + brControl *brctl = NULL; - if ((vm->def->nets != NULL) && (driver->have_netns == 0)) { - lxcError(conn, NULL, VIR_ERR_NO_SUPPORT, - _("System lacks NETNS support")); + if (brInit(&brctl) != 0) return -1; - } - for (net = vm->def->nets; net; net = net->next) { + for (net = def->nets; net; net = net->next) { if (LXC_NET_NETWORK == net->type) { virNetworkPtr network = virNetworkLookupByName(conn, net->txName); if (!network) { @@ -378,7 +412,6 @@ bridge = virNetworkGetBridgeName(network); virNetworkFree(network); - } else { bridge = net->txName; } @@ -394,9 +427,6 @@ if (NULL != net->parentVeth) { strcpy(parentVeth, net->parentVeth); } - if (NULL != net->containerVeth) { - strcpy(containerVeth, net->containerVeth); - } DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth); if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, @@ -406,24 +436,18 @@ if (NULL == net->parentVeth) { net->parentVeth = strdup(parentVeth); } - if (NULL == net->containerVeth) { - net->containerVeth = strdup(containerVeth); - } + if (VIR_REALLOC_N(*veths, (*nveths)+1) < 0) + goto error_exit; + if (((*veths)[(*nveths)++] = strdup(containerVeth)) == NULL) + goto error_exit; - if ((NULL == net->parentVeth) || (NULL == net->containerVeth)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + if (NULL == net->parentVeth) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to allocate veth names")); goto error_exit; } - if (!(networkDriver->brctl) && (rc = brInit(&(networkDriver->brctl)))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot initialize bridge support: %s"), - strerror(rc)); - goto error_exit; - } - - if (0 != (rc = brAddInterface(networkDriver->brctl, bridge, parentVeth))) { + if (0 != (rc = brAddInterface(brctl, bridge, parentVeth))) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to add %s device to %s: %s"), parentVeth, @@ -433,7 +457,7 @@ } if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to enable parent ns veth device: %d"), rc); goto error_exit; } @@ -443,136 +467,100 @@ rc = 0; error_exit: + brShutdown(brctl); return rc; } -/** - * lxcMoveInterfacesToNetNs: - * @conn: pointer to connection - * @vm: pointer to virtual machine structure - * - * Starts a container process by calling clone() with the namespace flags - * - * Returns 0 on success or -1 in case of error - */ -static int lxcMoveInterfacesToNetNs(virConnectPtr conn, - const lxc_vm_t *vm) +static int lxcMonitorServer(virConnectPtr conn, + lxc_driver_t * driver, + lxc_vm_t *vm) { - int rc = -1; - lxc_net_def_t *net; + char *sockpath = NULL; + int fd; + struct sockaddr_un addr; - for (net = vm->def->nets; net; net = net->next) { - if (0 != moveInterfaceToNetNs(net->containerVeth, vm->def->id)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to move interface %s to ns %d"), - net->containerVeth, vm->def->id); - goto error_exit; - } + if (asprintf(&sockpath, "%s/%s.sock", + driver->stateDir, vm->def->name) < 0) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); + return -1; } - rc = 0; + if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to create server socket: %s"), + strerror(errno)); + goto error; + } -error_exit: - return rc; + unlink(sockpath); + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path)); + + if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to bind server socket: %s"), + strerror(errno)); + goto error; + } + if (listen(fd, 30 /* backlog */ ) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to listen server socket: %s"), + strerror(errno)); + goto error; + return (-1); + } + + VIR_FREE(sockpath); + return fd; + +error: + VIR_FREE(sockpath); + if (fd != -1) + close(fd); + return -1; } -/** - * lxcCleanupInterfaces: - * @conn: pointer to connection - * @vm: pointer to virtual machine structure - * - * Cleans up the container interfaces by deleting the veth device pairs. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcCleanupInterfaces(const lxc_vm_t *vm) +static int lxcMonitorClient(virConnectPtr conn, + lxc_driver_t * driver, + lxc_vm_t *vm) { - int rc = -1; - lxc_net_def_t *net; + char *sockpath = NULL; + int fd; + struct sockaddr_un addr; - for (net = vm->def->nets; net; net = net->next) { - if (0 != (rc = vethDelete(net->parentVeth))) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to delete veth: %s"), net->parentVeth); - /* will continue to try to cleanup any other interfaces */ - } + if (asprintf(&sockpath, "%s/%s.sock", + driver->stateDir, vm->def->name) < 0) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); + return -1; } - return 0; -} - - -/** - * lxcOpenTty: - * @conn: pointer to connection - * @ttymaster: pointer to int. On success, set to fd for master end - * @ttyName: On success, will point to string slave end of tty. Caller - * must free when done (such as in lxcFreeVM). - * - * Opens and configures container tty. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcOpenTty(virConnectPtr conn, - int *ttymaster, - char **ttyName, - int rawmode) -{ - int rc = -1; - - *ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK); - if (*ttymaster < 0) { + if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("posix_openpt failed: %s"), strerror(errno)); - goto cleanup; + _("failed to create client socket: %s"), + strerror(errno)); + goto error; } - if (unlockpt(*ttymaster) < 0) { + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path)); + + if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("unlockpt failed: %s"), strerror(errno)); - goto cleanup; + _("failed to connect to client socket: %s"), + strerror(errno)); + goto error; } - if (rawmode) { - struct termios ttyAttr; - if (tcgetattr(*ttymaster, &ttyAttr) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "tcgetattr() failed: %s", strerror(errno)); - goto cleanup; - } + VIR_FREE(sockpath); + return fd; - cfmakeraw(&ttyAttr); - - if (tcsetattr(*ttymaster, TCSADRAIN, &ttyAttr) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - "tcsetattr failed: %s", strerror(errno)); - goto cleanup; - } - } - - if (ttyName) { - char tempTtyName[PATH_MAX]; - if (0 != ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("ptsname_r failed: %s"), strerror(errno)); - goto cleanup; - } - - if ((*ttyName = strdup(tempTtyName)) == NULL) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); - goto cleanup; - } - } - - rc = 0; - -cleanup: - if (rc != 0 && - *ttymaster != -1) { - close(*ttymaster); - } - - return rc; +error: + VIR_FREE(sockpath); + if (fd != -1) + close(fd); + return -1; } @@ -590,82 +578,75 @@ lxc_driver_t * driver, lxc_vm_t * vm) { - int rc = -1; - int sockpair[2]; - int containerTty, parentTty; - char *containerTtyPath = NULL; + int rc = -1, i; + int monitor; + int parentTty; + char *logfile = NULL; + pid_t pid; + int nveths = 0; + char **veths = NULL; + + if ((rc = virFileMakePath(driver->logDir))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("cannot create log directory %s: %s"), + driver->logDir, strerror(rc)); + return -1; + } + + if (asprintf(&logfile, "%s/%s.log", + driver->logDir, vm->def->name) < 0) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); + return -1; + } + + if ((monitor = lxcMonitorServer(conn, driver, vm)) < 0) + goto cleanup; /* open parent tty */ VIR_FREE(vm->def->tty); - if (lxcOpenTty(conn, &parentTty, &vm->def->tty, 1) < 0) { - goto cleanup; + if (virFileOpenTty(&parentTty, &vm->def->tty, 1) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to allocate tty: %s"), + strerror(errno)); } - /* open container tty */ - if (lxcOpenTty(conn, &containerTty, &containerTtyPath, 0) < 0) { - goto cleanup; - } - - /* fork process to handle the tty io forwarding */ - if ((vm->pid = fork()) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("unable to fork tty forwarding process: %s"), - strerror(errno)); - goto cleanup; - } - - if (vm->pid == 0) { - /* child process calls forward routine */ - lxcControllerMain(parentTty, containerTty); - } - - if (lxcStoreTtyPid(driver, vm)) { - DEBUG0("unable to store tty pid"); - } - - close(parentTty); - close(containerTty); - - if (0 != (rc = lxcSetupInterfaces(conn, vm))) { - goto cleanup; - } - - /* create a socket pair to send continue message to the container once */ - /* we've completed the post clone configuration */ - if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, sockpair)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("sockpair failed: %s"), strerror(errno)); - goto cleanup; - } - - /* check this rc */ - - vm->def->id = lxcContainerStart(conn, - vm->def, - sockpair[1], - containerTtyPath); - if (vm->def->id == -1) - goto cleanup; - lxcSaveConfig(conn, driver, vm, vm->def); - - rc = lxcMoveInterfacesToNetNs(conn, vm); - if (rc != 0) + if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) < 0) goto cleanup; - rc = lxcContainerSendContinue(conn, sockpair[0]); - if (rc != 0) + if (lxcControllerStart(vm->def, nveths, veths, + monitor, parentTty, logfile) < 0) goto cleanup; + if ((vm->monitor = lxcMonitorClient(conn, driver, vm)) < 0) + goto cleanup; + + /* Read pid from controller */ + if ((pid = lxcControllerPID(vm->monitor)) < 0) + goto cleanup; + + vm->pid = vm->def->id = pid; vm->state = VIR_DOMAIN_RUNNING; driver->ninactivevms--; driver->nactivevms++; + rc = 0; + cleanup: - close(sockpair[0]); - close(sockpair[1]); - VIR_FREE(containerTtyPath); - - return rc; + for (i = 0 ; i < nveths ; i++) { + if (rc != 0) + vethDelete(veths[i]); + VIR_FREE(veths[i]); + } + if (monitor != -1) + close(monitor); + if (rc != 0 && vm->monitor != -1) { + close(vm->monitor); + vm->monitor = -1; + } + if (parentTty != -1) + close(parentTty); + VIR_FREE(logfile); + return -rc; } /** @@ -773,84 +754,12 @@ vm->state = VIR_DOMAIN_SHUTDOWN; - rc = 0; + rc = lxcVMCleanup(driver, vm); error_out: return rc; } -/** - * lxcVmCleanup: - * @vm: Ptr to VM to clean up - * - * waitpid() on the container process. kill and wait the tty process - * This is called by boh lxcDomainDestroy and lxcSigHandler when a - * container exits. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcVMCleanup(lxc_driver_t *driver, lxc_vm_t * vm) -{ - int rc = -1; - int waitRc; - int childStatus = -1; - - /* if this fails, we'll continue. it will report any errors */ - lxcCleanupInterfaces(vm); - - while (((waitRc = waitpid(vm->def->id, &childStatus, 0)) == -1) && - errno == EINTR); - - if ((waitRc != vm->def->id) && (errno != ECHILD)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("waitpid failed to wait for container %d: %d %s"), - vm->def->id, waitRc, strerror(errno)); - goto kill_tty; - } - - rc = 0; - - if (WIFEXITED(childStatus)) { - rc = WEXITSTATUS(childStatus); - DEBUG("container exited with rc: %d", rc); - } - -kill_tty: - if (2 > vm->pid) { - DEBUG("not killing tty process with pid %d", vm->pid); - goto tty_error_out; - } - - if (0 > (kill(vm->pid, SIGKILL))) { - if (ESRCH != errno) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("sending SIGKILL to tty process failed: %s"), - strerror(errno)); - - goto tty_error_out; - } - } - - while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) && - errno == EINTR); - - if ((waitRc != vm->pid) && (errno != ECHILD)) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("waitpid failed to wait for tty %d: %d %s"), - vm->pid, waitRc, strerror(errno)); - } - -tty_error_out: - vm->state = VIR_DOMAIN_SHUTOFF; - vm->pid = -1; - lxcDeleteTtyPidFile(vm); - vm->def->id = -1; - driver->nactivevms--; - driver->ninactivevms++; - lxcSaveConfig(NULL, driver, vm, vm->def); - - return rc; - } /** * lxcDomainDestroy: @@ -907,6 +816,7 @@ static int lxcStartup(void) { uid_t uid = getuid(); + lxc_vm_t *vm; /* Check that the user is root */ if (0 != uid) { @@ -935,6 +845,30 @@ return -1; } + vm = lxc_driver->vms; + while (vm) { + pid_t pid; + if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) { + vm = vm->next; + continue; + } + + /* Read pid from controller */ + if ((pid = lxcControllerPID(vm->monitor)) < 0) { + close(vm->monitor); + vm->monitor = -1; + vm = vm->next; + continue; + } + + vm->pid = vm->def->id = pid; + vm->state = VIR_DOMAIN_RUNNING; + lxc_driver->ninactivevms--; + lxc_driver->nactivevms++; + + vm = vm->next; + } + return 0; } @@ -942,6 +876,7 @@ { VIR_FREE(driver->configDir); VIR_FREE(driver->stateDir); + VIR_FREE(driver->logDir); VIR_FREE(driver); } @@ -976,37 +911,6 @@ /* Otherwise we're happy to deal with a shutdown */ return 0; -} - -/** - * lxcSigHandler: - * @siginfo: Pointer to siginfo_t structure - * - * Handles signals received by libvirtd. Currently this is used to - * catch SIGCHLD from an exiting container. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSigHandler(siginfo_t *siginfo) -{ - int rc = -1; - lxc_vm_t *vm; - - if (siginfo->si_signo == SIGCHLD) { - vm = lxcFindVMByID(lxc_driver, siginfo->si_pid); - - if (NULL == vm) { - DEBUG("Ignoring SIGCHLD from non-container process %d\n", - siginfo->si_pid); - goto cleanup; - } - - rc = lxcVMCleanup(lxc_driver, vm); - - } - -cleanup: - return rc; } @@ -1079,7 +983,7 @@ lxcShutdown, NULL, /* reload */ lxcActive, - lxcSigHandler + NULL, }; int lxcRegister(void) diff -r e5aefa90abaf src/util.c --- a/src/util.c Mon Jul 14 17:18:49 2008 +0100 +++ b/src/util.c Mon Jul 14 21:03:45 2008 +0100 @@ -37,6 +37,7 @@ #include <sys/wait.h> #endif #include <string.h> +#include <termios.h> #include "c-ctype.h" #ifdef HAVE_PATHS_H @@ -556,6 +557,58 @@ return 0; } + +int virFileOpenTty(int *ttymaster, + char **ttyName, + int rawmode) +{ + int rc = -1; + + if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0) + goto cleanup; + + if (unlockpt(*ttymaster) < 0) + goto cleanup; + + if (grantpt(*ttymaster) < 0) + goto cleanup; + + if (rawmode) { + struct termios ttyAttr; + if (tcgetattr(*ttymaster, &ttyAttr) < 0) + goto cleanup; + + cfmakeraw(&ttyAttr); + + if (tcsetattr(*ttymaster, TCSADRAIN, &ttyAttr) < 0) + goto cleanup; + } + + if (ttyName) { + char tempTtyName[PATH_MAX]; + if (ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName)) < 0) + goto cleanup; + + if ((*ttyName = strdup(tempTtyName)) == NULL) { + errno = ENOMEM; + goto cleanup; + } + } + + rc = 0; + +cleanup: + if (rc != 0 && + *ttymaster != -1) { + close(*ttymaster); + } + + return rc; + +} + + + /* Like strtol, but produce an "int" result, and check more carefully. Return 0 upon success; return -1 to indicate failure. When END_PTR is NULL, the byte after the final valid digit must be NUL. diff -r e5aefa90abaf src/util.h --- a/src/util.h Mon Jul 14 17:18:49 2008 +0100 +++ b/src/util.h Mon Jul 14 21:03:45 2008 +0100 @@ -58,6 +58,10 @@ char *buf, unsigned int buflen); +int virFileOpenTty(int *ttymaster, + char **ttyName, + int rawmode); + int __virStrToLong_i(char const *s, char **end_ptr, -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

DB> +/** DB> + * lxcControllerMoveInterfaces DB> + * @nveths: number of interfaces DB> + * @veths: interface names DB> + * @container: pid of container DB> + * DB> + * Moves network interfaces into a container's namespace DB> + * DB> + * Returns 0 on success or -1 in case of error DB> + */ DB> +static int lxcControllerMoveInterfaces(int nveths, DB> + char **veths, DB> + pid_t container) DB> +{ DB> + int i; DB> + for (i = 0 ; i < nveths ; i++) DB> + if (moveInterfaceToNetNs(veths[i], container) < 0) { DB> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, DB> + _("failed to move interface %s to ns %d"), DB> + veths[i], container); DB> + return -1; DB> + } DB> + DB> + return 0; DB> +} I'm not sure why, but the call to this function causes a failure on my system. I think that it's related to luck somehow, so I don't think it's something that was really introduced by this set, but it prevents me from starting a container with a network interface. I've tracked it down to the actual virRun() of the 'ip' command. If I comment out the line that virRun()'s ip, everything works fine. However, if it actually gets run, the container never receives the continue message. I added a loop in lxcWaitForContinue() that reads the socket character-by-character instead of bailing out if the first character isn't 'c'. The result was a bunch of control characters followed by 'c'. Based on these two facts, I would tend to guess that somehow the exec of ip is causing some terminal stuff (yes, that's my technical term for it) to be written to the socket ahead of the continue message. I changed 'ip' to 'true' in moveInterfaceToNetNs() and it behaves the same (which absolves ip itself). I removed the virRun() and replaced with a fork()..exec() and it behaves the same. I set FD_CLOEXEC on the socket pair, and it behaves the same. I have no idea (or rather, I'm out of ideas) why this is happening. I assume you're not seeing it because you're not playing with network-enabled guests. DB> +int lxcControllerStart(lxc_vm_def_t *def, DB> + int nveths, DB> + char **veths, DB> + int monitor, DB> + int appPty, DB> + const char *logfile) DB> +{ <snip> DB> + if ((logfd = open(logfile, O_WRONLY | O_TRUNC)) < 0) You need O_CREAT here. DB> @@ -590,82 +578,75 @@ <snip> DB> - if (0 != (rc = lxcSetupInterfaces(conn, vm))) { You changed the condition from "nonzero" here... <snip> DB> + if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) < 0) ...to "negative" here, which makes the start process not notice some of the failure paths in that function, and thus will erroneously return success. In general, I think this is an excellent approach, but I don't really like how many (dozens of) failure points there are in the controller and container setup procedure that result in a silent failure. Almost all of them leave libvirtd thinking that the container/controller is running, but in fact, it _exit(1)'d long ago. -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com

On Wed, Jul 16, 2008 at 11:30:53AM -0700, Dan Smith wrote:
DB> +static int lxcControllerMoveInterfaces(int nveths, DB> + char **veths, DB> + pid_t container) DB> +{ DB> + int i; DB> + for (i = 0 ; i < nveths ; i++) DB> + if (moveInterfaceToNetNs(veths[i], container) < 0) { DB> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, DB> + _("failed to move interface %s to ns %d"), DB> + veths[i], container); DB> + return -1; DB> + } DB> + DB> + return 0; DB> +}
I'm not sure why, but the call to this function causes a failure on my system. I think that it's related to luck somehow, so I don't think it's something that was really introduced by this set, but it prevents me from starting a container with a network interface.
I've tracked it down to the actual virRun() of the 'ip' command. If I comment out the line that virRun()'s ip, everything works fine. However, if it actually gets run, the container never receives the continue message.
Yep, as I mentioned on IRC I've not been able to test the network stuff myself yet, since 2.6.26 hangs on my laptop. I'll hopefully be able to get it running on one of my development servers when I'm back in the office next.
I added a loop in lxcWaitForContinue() that reads the socket character-by-character instead of bailing out if the first character isn't 'c'. The result was a bunch of control characters followed by 'c'.
Based on these two facts, I would tend to guess that somehow the exec of ip is causing some terminal stuff (yes, that's my technical term for it) to be written to the socket ahead of the continue message. I changed 'ip' to 'true' in moveInterfaceToNetNs() and it behaves the same (which absolves ip itself). I removed the virRun() and replaced with a fork()..exec() and it behaves the same. I set FD_CLOEXEC on the socket pair, and it behaves the same.
Ok. that's useful info - I'll do some poking around this.
DB> - if (0 != (rc = lxcSetupInterfaces(conn, vm))) {
You changed the condition from "nonzero" here...
Opps.
<snip>
DB> + if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) < 0)
...to "negative" here, which makes the start process not notice some of the failure paths in that function, and thus will erroneously return success.
In general, I think this is an excellent approach, but I don't really like how many (dozens of) failure points there are in the controller and container setup procedure that result in a silent failure. Almost all of them leave libvirtd thinking that the container/controller is running, but in fact, it _exit(1)'d long ago.
Yes, those are bugs. THe series of patches I sent missed out my change to have the libvirtd daemon register the socket connection to the controller with the event loop. Basically libvirtd needs to watch for SIGHUP on its socket to the conttroller so it detects death. The controller also doesn't seem to always exit when the container dies so its probaly another condition i broke in the I/o forwarding loop. The one difference i gues is that the failures are not directly fed back as errors from the virDomainCreate() call. Ultimately I hope this isn't too much of a problem - the really interesting errors are going to be in the container process, and they were never fed back - the errors in the controller process are mostly my coding bugs which should be fixed :-) much Btw, anything fprintf'd in the controller (eg, via lxcError() ) will end up in /var/log/libvirt/lxc/NAME.log Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

The re-architecting of the LXC controller/container process relationship in the previous patch removed the last obstacle to switching over to the generic domain XML routines. So this patch switches the driver over. First the domain_conf.c file gets support for a new type of device, a 'filesystem'. This contains a source, and a mount point. The source can be either an existing directory, or a file or a block device. If it is a directory the source is just bind mounted into the container; for a block device just a regular filesystem mount is done. A file must first be attached to a loop device. LXC only handles directory bind mounts at this time, but it seemed sensible to let the XML parser handle all three options. Then the vast majority of lxc_conf.h/c is deleted - this is all redundant when using the domain_conf.h APIs. Finally, all references to lxc_vm_t are changed to virDomainObj, and lxc_vm_def_t switches to virDomainDef. Finally the LXC driver registers its capabilities data. For this I have chosen an OS type of 'exe', since the 'operating system' we're running in the container is just any plain executable process. domain_conf.c | 178 +++++++++ domain_conf.h | 27 + lxc_conf.c | 1054 +------------------------------------------------------ lxc_conf.h | 121 ------ lxc_container.c | 23 - lxc_container.h | 2 lxc_controller.c | 4 lxc_controller.h | 2 lxc_driver.c | 276 ++++++++------ 9 files changed, 416 insertions(+), 1271 deletions(-) Daniel diff -r 7059fd49ed7e src/domain_conf.c --- a/src/domain_conf.c Mon Jul 14 21:03:46 2008 +0100 +++ b/src/domain_conf.c Mon Jul 14 21:34:06 2008 +0100 @@ -86,6 +86,11 @@ "virtio", "xen") +VIR_ENUM_IMPL(virDomainFS, VIR_DOMAIN_FS_TYPE_LAST, + "mount", + "block", + "file") + VIR_ENUM_IMPL(virDomainNet, VIR_DOMAIN_NET_TYPE_LAST, "user", "ethernet", @@ -230,6 +235,18 @@ VIR_FREE(def->driverType); virDomainDiskDefFree(def->next); + VIR_FREE(def); +} + +void virDomainFSDefFree(virDomainFSDefPtr def) +{ + if (!def) + return; + + VIR_FREE(def->src); + VIR_FREE(def->dst); + + virDomainFSDefFree(def->next); VIR_FREE(def); } @@ -341,6 +358,7 @@ virDomainGraphicsDefFree(def->graphics); virDomainInputDefFree(def->inputs); virDomainDiskDefFree(def->disks); + virDomainFSDefFree(def->fss); virDomainNetDefFree(def->nets); virDomainChrDefFree(def->serials); virDomainChrDefFree(def->parallels); @@ -351,6 +369,7 @@ VIR_FREE(def->os.type); VIR_FREE(def->os.arch); VIR_FREE(def->os.machine); + VIR_FREE(def->os.init); VIR_FREE(def->os.kernel); VIR_FREE(def->os.initrd); VIR_FREE(def->os.cmdline); @@ -610,6 +629,87 @@ error: virDomainDiskDefFree(def); + def = NULL; + goto cleanup; +} + + +/* Parse the XML definition for a disk + * @param node XML nodeset to parse for disk definition + */ +static virDomainFSDefPtr +virDomainFSDefParseXML(virConnectPtr conn, + xmlNodePtr node) { + virDomainFSDefPtr def; + xmlNodePtr cur; + char *type = NULL; + char *source = NULL; + char *target = NULL; + + if (VIR_ALLOC(def) < 0) { + virDomainReportError(conn, VIR_ERR_NO_MEMORY, NULL); + return NULL; + } + + type = virXMLPropString(node, "type"); + if (type) { + if ((def->type = virDomainFSTypeFromString(type)) < 0) { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + _("unknown filesystem type '%s'"), type); + goto error; + } + } else { + def->type = VIR_DOMAIN_FS_TYPE_MOUNT; + } + + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((source == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "source"))) { + + if (def->type == VIR_DOMAIN_FS_TYPE_MOUNT) + source = virXMLPropString(cur, "dir"); + else if (def->type == VIR_DOMAIN_FS_TYPE_FILE) + source = virXMLPropString(cur, "file"); + else if (def->type == VIR_DOMAIN_FS_TYPE_BLOCK) + source = virXMLPropString(cur, "dev"); + } else if ((target == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + target = virXMLPropString(cur, "dir"); + } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { + def->readonly = 1; + } + } + cur = cur->next; + } + + if (source == NULL) { + virDomainReportError(conn, VIR_ERR_NO_SOURCE, + target ? "%s" : NULL, target); + goto error; + } + + if (target == NULL) { + virDomainReportError(conn, VIR_ERR_NO_TARGET, + source ? "%s" : NULL, source); + goto error; + } + + def->src = source; + source = NULL; + def->dst = target; + target = NULL; + +cleanup: + VIR_FREE(type); + VIR_FREE(target); + VIR_FREE(source); + + return def; + + error: + virDomainFSDefFree(def); def = NULL; goto cleanup; } @@ -1344,6 +1444,10 @@ dev->type = VIR_DOMAIN_DEVICE_DISK; if (!(dev->data.disk = virDomainDiskDefParseXML(conn, node))) goto error; + } else if (xmlStrEqual(node->name, BAD_CAST "filesystem")) { + dev->type = VIR_DOMAIN_DEVICE_FS; + if (!(dev->data.fs = virDomainFSDefParseXML(conn, node))) + goto error; } else if (xmlStrEqual(node->name, BAD_CAST "interface")) { dev->type = VIR_DOMAIN_DEVICE_NET; if (!(dev->data.net = virDomainNetDefParseXML(conn, node))) @@ -1554,6 +1658,7 @@ } if (!def->os.bootloader) { + def->os.init = virXPathString(conn, "string(./os/init[1])", ctxt); def->os.kernel = virXPathString(conn, "string(./os/kernel[1])", ctxt); def->os.initrd = virXPathString(conn, "string(./os/initrd[1])", ctxt); def->os.cmdline = virXPathString(conn, "string(./os/cmdline[1])", ctxt); @@ -1603,12 +1708,15 @@ def->os.type, def->os.arch, type); +#if 0 if (!emulator) { virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("unsupported guest type")); goto error; } - if (!(def->emulator = strdup(emulator))) { +#endif + if (emulator && + !(def->emulator = strdup(emulator))) { virDomainReportError(conn, VIR_ERR_NO_MEMORY, NULL); goto error; } @@ -1641,6 +1749,23 @@ ptr = ptr->next; } } + } + VIR_FREE(nodes); + + /* analysis of the filesystems */ + if ((n = virXPathNodeSet(conn, "./devices/filesystem", ctxt, &nodes)) < 0) { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot extract filesystem devices")); + goto error; + } + for (i = n - 1 ; i >= 0 ; i--) { + virDomainFSDefPtr fs = virDomainFSDefParseXML(conn, + nodes[i]); + if (!fs) + goto error; + + fs->next = def->fss; + def->fss = fs; } VIR_FREE(nodes); @@ -2195,6 +2320,46 @@ } static int +virDomainFSDefFormat(virConnectPtr conn, + virBufferPtr buf, + virDomainFSDefPtr def) +{ + const char *type = virDomainFSTypeToString(def->type); + + if (!type) { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + _("unexpected filesystem type %d"), def->type); + return -1; + } + + virBufferVSprintf(buf, + " <filesystem type='%s'>\n", + type); + + if (def->src) { + if (def->type == VIR_DOMAIN_FS_TYPE_MOUNT) + virBufferEscapeString(buf, " <source dir='%s'/>\n", + def->src); + else if (def->type == VIR_DOMAIN_FS_TYPE_BLOCK) + virBufferEscapeString(buf, " <source dev='%s'/>\n", + def->src); + else + virBufferEscapeString(buf, " <source file='%s'/>\n", + def->src); + } + + virBufferVSprintf(buf, " <target dir='%s'/>\n", + def->dst); + + if (def->readonly) + virBufferAddLit(buf, " <readonly/>\n"); + + virBufferAddLit(buf, " </filesystem>\n"); + + return 0; +} + +static int virDomainNetDefFormat(virConnectPtr conn, virBufferPtr buf, virDomainNetDefPtr def) @@ -2472,6 +2637,7 @@ unsigned char *uuid; char uuidstr[VIR_UUID_STRING_BUFLEN]; virDomainDiskDefPtr disk; + virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainSoundDefPtr sound; virDomainInputDefPtr input; @@ -2541,6 +2707,9 @@ else virBufferVSprintf(&buf, ">%s</type>\n", def->os.type); + if (def->os.init) + virBufferEscapeString(&buf, " <init>%s</init>\n", + def->os.init); if (def->os.loader) virBufferEscapeString(&buf, " <loader>%s</loader>\n", def->os.loader); @@ -2614,6 +2783,13 @@ if (virDomainDiskDefFormat(conn, &buf, disk) < 0) goto cleanup; disk = disk->next; + } + + fs = def->fss; + while (fs) { + if (virDomainFSDefFormat(conn, &buf, fs) < 0) + goto cleanup; + fs = fs->next; } net = def->nets; diff -r 7059fd49ed7e src/domain_conf.h --- a/src/domain_conf.h Mon Jul 14 21:03:46 2008 +0100 +++ b/src/domain_conf.h Mon Jul 14 21:34:06 2008 +0100 @@ -91,6 +91,27 @@ unsigned int shared : 1; virDomainDiskDefPtr next; +}; + + +/* Two types of disk backends */ +enum virDomainFSType { + VIR_DOMAIN_FS_TYPE_MOUNT, + VIR_DOMAIN_FS_TYPE_BLOCK, + VIR_DOMAIN_FS_TYPE_FILE, + + VIR_DOMAIN_FS_TYPE_LAST +}; + +typedef struct _virDomainFSDef virDomainFSDef; +typedef virDomainFSDef *virDomainFSDefPtr; +struct _virDomainFSDef { + int type; + char *src; + char *dst; + unsigned int readonly : 1; + + virDomainFSDefPtr next; }; @@ -262,6 +283,7 @@ /* Flags for the 'type' field in next struct */ enum virDomainDeviceType { VIR_DOMAIN_DEVICE_DISK, + VIR_DOMAIN_DEVICE_FS, VIR_DOMAIN_DEVICE_NET, VIR_DOMAIN_DEVICE_INPUT, VIR_DOMAIN_DEVICE_SOUND, @@ -273,6 +295,7 @@ int type; union { virDomainDiskDefPtr disk; + virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainInputDefPtr input; virDomainSoundDefPtr sound; @@ -318,6 +341,7 @@ char *machine; int nBootDevs; int bootDevs[VIR_DOMAIN_BOOT_LAST]; + char *init; char *kernel; char *initrd; char *cmdline; @@ -357,6 +381,7 @@ virDomainGraphicsDefPtr graphics; virDomainDiskDefPtr disks; + virDomainFSDefPtr fss; virDomainNetDefPtr nets; virDomainInputDefPtr inputs; virDomainSoundDefPtr sounds; @@ -411,6 +436,7 @@ void virDomainGraphicsDefFree(virDomainGraphicsDefPtr def); void virDomainInputDefFree(virDomainInputDefPtr def); void virDomainDiskDefFree(virDomainDiskDefPtr def); +void virDomainFSDefFree(virDomainFSDefPtr def); void virDomainNetDefFree(virDomainNetDefPtr def); void virDomainChrDefFree(virDomainChrDefPtr def); void virDomainSoundDefFree(virDomainSoundDefPtr def); @@ -481,6 +507,7 @@ VIR_ENUM_DECL(virDomainDisk) VIR_ENUM_DECL(virDomainDiskDevice) VIR_ENUM_DECL(virDomainDiskBus) +VIR_ENUM_DECL(virDomainFS) VIR_ENUM_DECL(virDomainNet) VIR_ENUM_DECL(virDomainChr) VIR_ENUM_DECL(virDomainSoundModel) diff -r 7059fd49ed7e src/lxc_conf.c --- a/src/lxc_conf.c Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_conf.c Mon Jul 14 21:34:06 2008 +0100 @@ -27,22 +27,8 @@ #ifdef WITH_LXC -#include <dirent.h> -#include <errno.h> -#include <fcntl.h> -#include <string.h> -#include <unistd.h> +#include <sys/utsname.h> -#include <libxml/parser.h> -#include <libxml/tree.h> -#include <libxml/uri.h> -#include <libxml/xpath.h> - -#include "buf.h" -#include "util.h" -#include "uuid.h" -#include "xml.h" -#include "memory.h" #include "lxc_conf.h" /* debug macros */ @@ -54,12 +40,12 @@ const char *fmt, ...) { va_list args; - char errorMessage[LXC_MAX_ERROR_LEN]; + char errorMessage[1024]; const char *codeErrorMessage; if (fmt) { va_start(args, fmt); - vsnprintf(errorMessage, LXC_MAX_ERROR_LEN-1, fmt, args); + vsnprintf(errorMessage, sizeof(errorMessage)-1, fmt, args); va_end(args); } else { errorMessage[0] = '\0'; @@ -71,775 +57,51 @@ codeErrorMessage, errorMessage); } -/** - * lxcParseInterfaceXML: - * @conn: pointer to connection - * @nodePtr: pointer to xml node structure - * @vm: pointer to net definition structure to fill in - * - * Parses the XML for a network interface and places the configuration - * in the given structure. - * - * Returns 0 on success or -1 in case of error - */ -static int lxcParseInterfaceXML(virConnectPtr conn, xmlNodePtr nodePtr, - lxc_net_def_t *netDef) +virCapsPtr lxcCapsInit(void) { - int rc = -1; - xmlNodePtr cur; - xmlChar *type = NULL; - xmlChar *parentIfName = NULL; - xmlChar *network = NULL; - xmlChar *bridge = NULL; - xmlChar *macaddr = NULL; + struct utsname utsname; + virCapsPtr caps; + virCapsGuestPtr guest; - netDef->type = LXC_NET_NETWORK; + /* Really, this never fails - look at the man-page. */ + uname (&utsname); - type = xmlGetProp(nodePtr, BAD_CAST "type"); - if (type != NULL) { - if (xmlStrEqual(type, BAD_CAST "network")) { - netDef->type = LXC_NET_NETWORK; - } - else if (xmlStrEqual(type, BAD_CAST "bridge")) { - netDef->type = LXC_NET_BRIDGE; - } - else { - lxcError(conn, NULL, VIR_ERR_XML_ERROR, - _("invalid interface type: %s"), type); - goto error_out; - } - } + if ((caps = virCapabilitiesNew(utsname.machine, + 0, 0)) == NULL) + goto no_memory; - cur = nodePtr->children; - for (cur = nodePtr->children; cur != NULL; cur = cur->next) { - if (cur->type == XML_ELEMENT_NODE) { - DEBUG("cur->name: %s", (char*)(cur->name)); - if ((macaddr == NULL) && - (xmlStrEqual(cur->name, BAD_CAST "mac"))) { - macaddr = xmlGetProp(cur, BAD_CAST "address"); - } else if ((network == NULL) && - (netDef->type == LXC_NET_NETWORK) && - (xmlStrEqual(cur->name, BAD_CAST "source"))) { - network = xmlGetProp(cur, BAD_CAST "network"); - parentIfName = xmlGetProp(cur, BAD_CAST "dev"); - } else if ((bridge == NULL) && - (netDef->type == LXC_NET_BRIDGE) && - (xmlStrEqual(cur->name, BAD_CAST "source"))) { - bridge = xmlGetProp(cur, BAD_CAST "bridge"); - } else if ((parentIfName == NULL) && - (xmlStrEqual(cur->name, BAD_CAST "target"))) { - parentIfName = xmlGetProp(cur, BAD_CAST "dev"); - } - } - } + if ((guest = virCapabilitiesAddGuest(caps, + "exe", + utsname.machine, + sizeof(int) == 4 ? 32 : 8, + NULL, + NULL, + 0, + NULL)) == NULL) + goto no_memory; - if (netDef->type == LXC_NET_NETWORK) { - if (network == NULL) { - lxcError(conn, NULL, VIR_ERR_XML_ERROR, - _("No <source> 'network' attribute specified with <interface type='network'/>")); - goto error_out; - } + if (virCapabilitiesAddGuestDomain(guest, + "lxc", + NULL, + NULL, + 0, + NULL) == NULL) + goto no_memory; - netDef->txName = strdup((char *)network); - if (NULL == netDef->txName) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("No storage for network name")); - goto error_out; - } + return caps; - } else if (netDef->type == LXC_NET_BRIDGE) { - if (bridge == NULL) { - lxcError(conn, NULL, VIR_ERR_XML_ERROR, - _("No <source> 'bridge' attribute specified with <interface type='bridge'/>")); - goto error_out; - } - - netDef->txName = strdup((char *)bridge); - if (NULL == netDef->txName) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("No storage for bridge name")); - goto error_out; - } - } - - if (parentIfName != NULL) { - DEBUG("set netDef->parentVeth: %s", netDef->parentVeth); - netDef->parentVeth = strdup((char *)parentIfName); - if (NULL == netDef->parentVeth) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("No storage for parent veth device name")); - goto error_out; - } - } else { - netDef->parentVeth = NULL; - DEBUG0("set netDef->parentVeth: NULL"); - } - - rc = 0; - -error_out: - xmlFree(macaddr); - xmlFree(network); - xmlFree(bridge); - xmlFree(parentIfName); - - return rc; -} - -/** - * lxcParseDomainInterfaces: - * @conn: pointer to connection - * @nets: on success, points to an list of net def structs - * @contextPtr: pointer to xml context - * - * Parses the domain network interfaces and returns the information in a list - * - * Returns 0 on success or -1 in case of error - */ -static int lxcParseDomainInterfaces(virConnectPtr conn, - lxc_net_def_t **nets, - xmlXPathContextPtr contextPtr) -{ - int rc = -1; - int i; - lxc_net_def_t *netDef; - lxc_net_def_t *prevDef = NULL; - int numNets = 0; - xmlNodePtr *list; - int res; - - DEBUG0("parsing nets"); - - res = virXPathNodeSet(conn, "/domain/devices/interface", contextPtr, &list); - if (res > 0) { - for (i = 0; i < res; ++i) { - netDef = calloc(1, sizeof(lxc_net_def_t)); - if (NULL == netDef) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, - _("No storage for net def structure")); - free(list); - goto parse_complete; - } - - rc = lxcParseInterfaceXML(conn, list[i], netDef); - if (0 > rc) { - DEBUG("failed parsing a net: %d", rc); - - free(netDef); - free(list); - goto parse_complete; - } - - DEBUG0("parsed a net"); - - /* set the linked list pointers */ - numNets++; - netDef->next = NULL; - if (0 == i) { - *nets = netDef; - } else { - prevDef->next = netDef; - } - prevDef = netDef; - } - free(list); - } - - rc = numNets; - -parse_complete: - DEBUG("parsed %d nets", rc); - return rc; -} - -static int lxcParseMountXML(virConnectPtr conn, xmlNodePtr nodePtr, - lxc_mount_t *lxcMount) -{ - xmlChar *fsType = NULL; - xmlNodePtr curNode; - xmlChar *mountSource = NULL; - xmlChar *mountTarget = NULL; - int strLen; - int rc = -1; - - fsType = xmlGetProp(nodePtr, BAD_CAST "type"); - if (NULL == fsType) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("missing filesystem type")); - goto error; - } - - if (xmlStrEqual(fsType, BAD_CAST "mount") == 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid filesystem type")); - goto error; - } - - for (curNode = nodePtr->children; - NULL != curNode; - curNode = curNode->next) { - if (curNode->type != XML_ELEMENT_NODE) { - continue; - } - - if ((mountSource == NULL) && - (xmlStrEqual(curNode->name, BAD_CAST "source"))) { - mountSource = xmlGetProp(curNode, BAD_CAST "dir"); - } else if ((mountTarget == NULL) && - (xmlStrEqual(curNode->name, BAD_CAST "target"))) { - mountTarget = xmlGetProp(curNode, BAD_CAST "dir"); - } - } - - if (mountSource == NULL) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("missing mount source")); - goto error; - } - - strLen = xmlStrlen(mountSource); - if ((strLen > (PATH_MAX-1)) || (0 == strLen)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("empty or invalid mount source")); - goto error; - } - - strncpy(lxcMount->source, (char *)mountSource, strLen); - lxcMount->source[strLen] = '\0'; - - if (mountTarget == NULL) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("missing mount target")); - goto error; - } - - strLen = xmlStrlen(mountTarget); - if ((strLen > (PATH_MAX-1)) || (0 == strLen)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("empty or invalid mount target")); - goto error; - } - - strncpy(lxcMount->target, (char *)mountTarget, strLen); - lxcMount->target[strLen] = '\0'; - - rc = 0; - -error: - xmlFree(fsType); - xmlFree(mountSource); - xmlFree(mountTarget); - - return rc; -} - -static int lxcParseDomainName(virConnectPtr conn, char **name, - xmlXPathContextPtr contextPtr) -{ - char *res; - - res = virXPathString(conn, "string(/domain/name[1])", contextPtr); - if (res == NULL) { - lxcError(conn, NULL, VIR_ERR_NO_NAME, NULL); - return(-1); - } - - *name = res; - return(0); -} - -static int lxcParseDomainUUID(virConnectPtr conn, unsigned char *uuid, - xmlXPathContextPtr contextPtr) -{ - char *res; - - res = virXPathString(conn, "string(/domain/uuid[1])", contextPtr); - if (res == NULL) { - if (virUUIDGenerate(uuid)) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to generate uuid")); - return(-1); - } - } else { - if (virUUIDParse(res, uuid) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid uuid element")); - VIR_FREE(res); - return(-1); - } - VIR_FREE(res); - } - return(0); -} - -static int lxcParseDomainMounts(virConnectPtr conn, - lxc_mount_t **mounts, - xmlXPathContextPtr contextPtr) -{ - int rc = -1; - int i; - lxc_mount_t *mountObj; - lxc_mount_t *prevObj = NULL; - int nmounts = 0; - xmlNodePtr *list; - int res; - - res = virXPathNodeSet(conn, "/domain/devices/filesystem", contextPtr, &list); - if (res > 0) { - for (i = 0; i < res; ++i) { - if (VIR_ALLOC(mountObj) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, "mount"); - goto parse_complete; - } - - rc = lxcParseMountXML(conn, list[i], mountObj); - if (0 > rc) { - VIR_FREE(mountObj); - goto parse_complete; - } - - /* set the linked list pointers */ - nmounts++; - mountObj->next = NULL; - if (0 == i) { - *mounts = mountObj; - } else { - prevObj->next = mountObj; - } - prevObj = mountObj; - } - VIR_FREE(list); - } - - rc = nmounts; - -parse_complete: - return rc; -} - -static int lxcParseDomainInit(virConnectPtr conn, char** init, - xmlXPathContextPtr contextPtr) -{ - char *res; - - res = virXPathString(conn, "string(/domain/os/init[1])", contextPtr); - if (res == NULL) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid or missing init element")); - return(-1); - } - - if (strlen(res) >= PATH_MAX - 1) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("init string too long")); - VIR_FREE(res); - return(-1); - } - - *init = res; - - return(0); -} - - -static int lxcParseDomainTty(virConnectPtr conn, char **tty, xmlXPathContextPtr contextPtr) -{ - char *res; - - res = virXPathString(conn, "string(/domain/devices/console[1]/@tty)", contextPtr); - if (res == NULL) { - /* make sure the tty string is empty */ - *tty = strdup(""); - if (*tty == NULL) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); - return(-1); - } - } else { - *tty = res; - } - - return(0); -} - -static int lxcParseDomainMemory(virConnectPtr conn, int* memory, xmlXPathContextPtr contextPtr) -{ - long res; - int rc; - - rc = virXPathLong(conn, "string(/domain/memory[1])", contextPtr, &res); - if ((rc == -2) || ((rc == 0) && (res <= 0))) { - *memory = -1; - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid memory value")); - } else if (rc < 0) { - /* not an error, default to an invalid value so it's not used */ - *memory = -1; - } else { - *memory = (int) res; - } - return(0); -} - -static lxc_vm_def_t * lxcParseXML(virConnectPtr conn, xmlDocPtr docPtr) -{ - xmlNodePtr rootNodePtr = NULL; - xmlXPathContextPtr contextPtr = NULL; - xmlChar *xmlProp = NULL; - lxc_vm_def_t *containerDef; - - if (VIR_ALLOC(containerDef) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, "containerDef"); - return NULL; - } - - /* Prepare parser / xpath context */ - rootNodePtr = xmlDocGetRootElement(docPtr); - if ((rootNodePtr == NULL) || - (!xmlStrEqual(rootNodePtr->name, BAD_CAST "domain"))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid root element")); - goto error; - } - - contextPtr = xmlXPathNewContext(docPtr); - if (contextPtr == NULL) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, "context"); - goto error; - } - - /* Verify the domain type is linuxcontainer */ - if (!(xmlProp = xmlGetProp(rootNodePtr, BAD_CAST "type"))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("missing domain type")); - goto error; - } - - if (!(xmlStrEqual(xmlProp, BAD_CAST LXC_DOMAIN_TYPE))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid domain type")); - goto error; - } - VIR_FREE(xmlProp); - - if ((xmlProp = xmlGetProp(rootNodePtr, BAD_CAST "id"))) { - if (0 > virStrToLong_i((char*)xmlProp, NULL, 10, &(containerDef->id))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("invalid domain id")); - goto error; - } - - /* verify the container process still exists */ - if (1 != lxcCheckContainerProcess(containerDef)) { - containerDef->id = -1; - } - - } else { - containerDef->id = -1; - } - VIR_FREE(xmlProp); - - if (lxcParseDomainName(conn, &(containerDef->name), contextPtr) < 0) { - goto error; - } - - if (lxcParseDomainInit(conn, &(containerDef->init), contextPtr) < 0) { - goto error; - } - - if (lxcParseDomainUUID(conn, containerDef->uuid, contextPtr) < 0) { - goto error; - } - - containerDef->nmounts = lxcParseDomainMounts(conn, &(containerDef->mounts), - contextPtr); - if (0 > containerDef->nmounts) { - goto error; - } - - containerDef->numNets = lxcParseDomainInterfaces(conn, - &(containerDef->nets), - contextPtr); - if (0 > containerDef->numNets) { - goto error; - } - - if (lxcParseDomainTty(conn, &(containerDef->tty), contextPtr) < 0) { - goto error; - } - - if (lxcParseDomainMemory(conn, &(containerDef->maxMemory), contextPtr) < 0) { - goto error; - } - - xmlXPathFreeContext(contextPtr); - - return containerDef; - - error: - VIR_FREE(xmlProp); - xmlXPathFreeContext(contextPtr); - lxcFreeVMDef(containerDef); - + no_memory: + virCapabilitiesFree(caps); return NULL; } - -lxc_vm_def_t * lxcParseVMDef(virConnectPtr conn, - const char* xmlString, - const char* fileName) -{ - xmlDocPtr xml; - lxc_vm_def_t *containerDef; - - xml = xmlReadDoc(BAD_CAST xmlString, - fileName ? fileName : "domain.xml", - NULL, XML_PARSE_NOENT | - XML_PARSE_NONET | XML_PARSE_NOERROR | - XML_PARSE_NOWARNING); - if (!xml) { - lxcError(conn, NULL, VIR_ERR_XML_ERROR, NULL); - return NULL; - } - - containerDef = lxcParseXML(conn, xml); - - xmlFreeDoc(xml); - - return containerDef; -} - -lxc_vm_t * lxcAssignVMDef(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_def_t *def) -{ - lxc_vm_t *vm = NULL; - - if ((vm = lxcFindVMByName(driver, def->name))) { - if (!lxcIsActiveVM(vm)) { - lxcFreeVMDef(vm->def); - vm->def = def; - } else { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("Can't redefine active VM with name %s"), def->name); - return NULL; - } - - return vm; - } - - if (VIR_ALLOC(vm) < 0) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, "vm"); - return NULL; - } - - vm->pid = -1; - vm->def = def; - vm->next = driver->vms; - - driver->vms = vm; - - if (lxcIsActiveVM(vm)) { - vm->state = VIR_DOMAIN_RUNNING; - driver->nactivevms++; - } else { - vm->state = VIR_DOMAIN_SHUTOFF; - driver->ninactivevms++; - } - - return vm; -} - -/** - * lxcCheckContainerProcess: - * @def: Ptr to VM definition - * - * Checks if the container process (stored at def->id is running - * - * Returns on success or -1 in case of error - * 0 - no process with id vm->def->id - * 1 - container process exists - * -1 - error - */ -int lxcCheckContainerProcess(lxc_vm_def_t *def) -{ - int rc = -1; - - if (1 < def->id) { - if (-1 == kill(def->id, 0)) { - if (ESRCH == errno) { - rc = 0; - DEBUG("pid %d no longer exists", def->id); - goto done; - } - - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("error checking container process: %d %s"), - def->id, strerror(errno)); - goto done; - } - - DEBUG("pid %d still exists", def->id); - rc = 1; - goto done; - } - - rc = 0; - -done: - return rc; -} - -void lxcRemoveInactiveVM(lxc_driver_t *driver, - lxc_vm_t *vm) -{ - lxc_vm_t *prevVm = NULL; - lxc_vm_t *curVm; - - for (curVm = driver->vms; - (curVm != vm) && (NULL != curVm); - curVm = curVm->next) { - prevVm = curVm; - } - - if (curVm) { - if (prevVm) { - prevVm->next = curVm->next; - } else { - driver->vms = curVm->next; - } - - driver->ninactivevms--; - } - - lxcFreeVM(vm); -} - -/* Save a container's config data into a persistent file */ -int lxcSaveConfig(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_t *vm, - lxc_vm_def_t *def) -{ - int rc = -1; - char *xmlDef; - int fd = -1; - int amtToWrite; - - if (!(xmlDef = lxcGenerateXML(conn, driver, vm, def))) { - return -1; - } - - if ((fd = open(vm->configFile, - O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR )) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot create config file %s: %s"), - vm->configFile, strerror(errno)); - goto cleanup; - } - - amtToWrite = strlen(xmlDef); - if (safewrite(fd, xmlDef, amtToWrite) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot write config file %s: %s"), - vm->configFile, strerror(errno)); - goto cleanup; - } - - if (close(fd) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot save config file %s: %s"), - vm->configFile, strerror(errno)); - goto cleanup; - } - - rc = 0; - - cleanup: - if (fd != -1) { - close(fd); - } - - VIR_FREE(xmlDef); - - return rc; -} - -int lxcSaveVMDef(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_t *vm, - lxc_vm_def_t *def) -{ - int rc = -1; - - if (vm->configFile[0] == '\0') { - if ((rc = virFileMakePath(driver->configDir))) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot create config directory %s: %s"), - driver->configDir, strerror(rc)); - goto save_complete; - } - - if (virFileBuildPath(driver->configDir, vm->def->name, ".xml", - vm->configFile, PATH_MAX) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot construct config file path")); - goto save_complete; - } - - strncpy(vm->configFileBase, vm->def->name, PATH_MAX-1); - strncat(vm->configFileBase, ".xml", PATH_MAX - strlen(vm->def->name)-1); - - } - - rc = lxcSaveConfig(conn, driver, vm, def); - -save_complete: - return rc; -} - - - -static lxc_vm_t * lxcLoadConfig(lxc_driver_t *driver, - const char *file, - const char *fullFilePath, - const char *xmlData) -{ - lxc_vm_def_t *containerDef; - lxc_vm_t * vm; - - containerDef = lxcParseVMDef(NULL, xmlData, file); - if (NULL == containerDef) { - DEBUG0("Error parsing container config"); - return NULL; - } - - if (!virFileMatchesNameSuffix(file, containerDef->name, ".xml")) { - DEBUG0("Container name does not match config file name"); - lxcFreeVMDef(containerDef); - return NULL; - } - - vm = lxcAssignVMDef(NULL, driver, containerDef); - if (NULL == vm) { - DEBUG0("Failed to load container config"); - lxcFreeVMDef(containerDef); - return NULL; - } - - strncpy(vm->configFile, fullFilePath, PATH_MAX); - vm->configFile[PATH_MAX-1] = '\0'; - - strncpy(vm->configFileBase, file, PATH_MAX); - vm->configFile[PATH_MAX-1] = '\0'; - - return vm; -} int lxcLoadDriverConfig(lxc_driver_t *driver) { /* Set the container configuration directory */ if ((driver->configDir = strdup(SYSCONF_DIR "/libvirt/lxc")) == NULL) + goto no_memory; + if ((driver->autostartDir = strdup(SYSCONF_DIR "/libvirt/lxc/autostart")) == NULL) goto no_memory; if ((driver->stateDir = strdup(LOCAL_STATE_DIR "/run/libvirt/lxc")) == NULL) goto no_memory; @@ -853,255 +115,5 @@ return -1; } -int lxcLoadContainerConfigFile(lxc_driver_t *driver, - const char *file) -{ - int rc = -1; - char tempPath[PATH_MAX]; - char* xmlData; - - rc = virFileBuildPath(driver->configDir, file, NULL, tempPath, - PATH_MAX); - if (0 > rc) { - DEBUG0("config file name too long"); - goto load_complete; - } - - if ((rc = virFileReadAll(tempPath, LXC_MAX_XML_LENGTH, &xmlData)) < 0) { - goto load_complete; - } - - lxcLoadConfig(driver, file, tempPath, xmlData); - - VIR_FREE(xmlData); - -load_complete: - return rc; -} - -int lxcLoadContainerInfo(lxc_driver_t *driver) -{ - int rc = -1; - DIR *dir; - struct dirent *dirEntry; - - if (!(dir = opendir(driver->configDir))) { - if (ENOENT == errno) { - /* no config dir => no containers */ - rc = 0; - } else { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to open config directory %s: %s"), - driver->configDir, strerror(errno)); - } - - goto load_complete; - } - - while ((dirEntry = readdir(dir))) { - if (dirEntry->d_name[0] == '.') { - continue; - } - - if (!virFileHasSuffix(dirEntry->d_name, ".xml")) { - continue; - } - - lxcLoadContainerConfigFile(driver, dirEntry->d_name); - } - - closedir(dir); - - rc = 0; - -load_complete: - return rc; -} - -/* Generate an XML document describing the vm's configuration */ -char *lxcGenerateXML(virConnectPtr conn, - lxc_driver_t *driver ATTRIBUTE_UNUSED, - lxc_vm_t *vm, - lxc_vm_def_t *def) -{ - virBuffer buf = VIR_BUFFER_INITIALIZER; - unsigned char *uuid; - char uuidstr[VIR_UUID_STRING_BUFLEN]; - lxc_mount_t *mount; - lxc_net_def_t *net; - - if (lxcIsActiveVM(vm)) - virBufferVSprintf(&buf, "<domain type='%s' id='%d'>\n", - LXC_DOMAIN_TYPE, vm->def->id); - else - virBufferVSprintf(&buf, "<domain type='%s'>\n", - LXC_DOMAIN_TYPE); - - virBufferVSprintf(&buf, " <name>%s</name>\n", def->name); - - uuid = def->uuid; - virUUIDFormat(uuid, uuidstr); - virBufferVSprintf(&buf, " <uuid>%s</uuid>\n", uuidstr); - virBufferAddLit(&buf, " <os>\n"); - virBufferVSprintf(&buf, " <init>%s</init>\n", def->init); - virBufferAddLit(&buf, " </os>\n"); - virBufferVSprintf(&buf, " <memory>%d</memory>\n", def->maxMemory); - virBufferAddLit(&buf, " <devices>\n"); - - /* loop adding mounts */ - for (mount = def->mounts; mount; mount = mount->next) { - virBufferAddLit(&buf, " <filesystem type='mount'>\n"); - virBufferVSprintf(&buf, " <source dir='%s'/>\n", - mount->source); - virBufferVSprintf(&buf, " <target dir='%s'/>\n", - mount->target); - virBufferAddLit(&buf, " </filesystem>\n"); - } - - /* loop adding nets */ - for (net = def->nets; net; net = net->next) { - if (net->type == LXC_NET_NETWORK) { - virBufferAddLit(&buf, " <interface type='network'>\n"); - virBufferVSprintf(&buf, " <source network='%s'/>\n", - net->txName); - } else { - virBufferAddLit(&buf, " <interface type='bridge'>\n"); - virBufferVSprintf(&buf, " <source bridge='%s'/>\n", - net->txName); - } - - if (NULL != net->parentVeth) { - virBufferVSprintf(&buf, " <target dev='%s'/>\n", - net->parentVeth); - } - - virBufferAddLit(&buf, " </interface>\n"); - - } - - virBufferVSprintf(&buf, " <console tty='%s'/>\n", def->tty); - virBufferAddLit(&buf, " </devices>\n"); - virBufferAddLit(&buf, "</domain>\n"); - - if (virBufferError(&buf)) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY,_("allocate buffer")); - return NULL; - } - - return virBufferContentAndReset(&buf); -} - -void lxcFreeVMDef(lxc_vm_def_t *vmdef) -{ - lxc_mount_t *curMount; - lxc_mount_t *nextMount; - lxc_net_def_t *curNet; - lxc_net_def_t *nextNet; - - if (vmdef == NULL) - return; - - curMount = vmdef->mounts; - while (curMount) { - nextMount = curMount->next; - VIR_FREE(curMount); - curMount = nextMount; - } - - curNet = vmdef->nets; - while (curNet) { - nextNet = curNet->next; - VIR_FREE(curNet->parentVeth); - VIR_FREE(curNet->txName); - VIR_FREE(curNet); - curNet = nextNet; - } - - VIR_FREE(vmdef->name); - VIR_FREE(vmdef->init); - VIR_FREE(vmdef->tty); - VIR_FREE(vmdef); -} - -void lxcFreeVMs(lxc_vm_t *vms) -{ - lxc_vm_t *curVm = vms; - lxc_vm_t *nextVm; - - while (curVm) { - nextVm = curVm->next; - lxcFreeVM(curVm); - curVm = nextVm; - } -} - -void lxcFreeVM(lxc_vm_t *vm) -{ - lxcFreeVMDef(vm->def); - VIR_FREE(vm); -} - -lxc_vm_t *lxcFindVMByID(const lxc_driver_t *driver, int id) -{ - lxc_vm_t *vm; - - for (vm = driver->vms; vm; vm = vm->next) { - if (lxcIsActiveVM(vm) && (vm->def->id == id)) { - return vm; - } - - } - - return NULL; -} - -lxc_vm_t *lxcFindVMByUUID(const lxc_driver_t *driver, - const unsigned char *uuid) -{ - lxc_vm_t *vm; - - for (vm = driver->vms; vm; vm = vm->next) { - if (!memcmp(vm->def->uuid, uuid, VIR_UUID_BUFLEN)) { - return vm; - } - } - - return NULL; -} - -lxc_vm_t *lxcFindVMByName(const lxc_driver_t *driver, - const char *name) -{ - lxc_vm_t *vm; - - for (vm = driver->vms; vm; vm = vm->next) { - if (STREQ(vm->def->name, name)) { - return vm; - } - } - - return NULL; -} - -int lxcDeleteConfig(virConnectPtr conn, - lxc_driver_t *driver ATTRIBUTE_UNUSED, - const char *configFile, - const char *name) -{ - if (!configFile[0]) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("no config file for %s"), name); - return -1; - } - - if (unlink(configFile) < 0) { - lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, - _("cannot remove config for %s"), name); - return -1; - } - - return 0; -} - #endif /* WITH_LXC */ diff -r 7059fd49ed7e src/lxc_conf.h --- a/src/lxc_conf.h Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_conf.h Mon Jul 14 21:34:06 2008 +0100 @@ -29,130 +29,23 @@ #ifdef WITH_LXC #include "internal.h" - -/* Defines */ -#define LXC_MAX_TTY_NAME 32 -#define LXC_MAX_XML_LENGTH 16384 -#define LXC_MAX_ERROR_LEN 1024 -#define LXC_DOMAIN_TYPE "lxc" - -/* types of networks for containers */ -enum lxc_net_type { - LXC_NET_NETWORK, - LXC_NET_BRIDGE -}; - -typedef struct __lxc_net_def lxc_net_def_t; -struct __lxc_net_def { - int type; - char *parentVeth; /* veth device in parent namespace */ - char *txName; /* bridge or network name */ - - lxc_net_def_t *next; -}; - -typedef struct __lxc_mount lxc_mount_t; -struct __lxc_mount { - char source[PATH_MAX]; /* user's directory */ - char target[PATH_MAX]; - - lxc_mount_t *next; -}; - -typedef struct __lxc_vm_def lxc_vm_def_t; -struct __lxc_vm_def { - unsigned char uuid[VIR_UUID_BUFLEN]; - char* name; - int id; - - /* init command string */ - char *init; - - int maxMemory; - - /* mounts - list of mount structs */ - int nmounts; - lxc_mount_t *mounts; - - /* tty device */ - char *tty; - - /* network devices */ - int numNets; - lxc_net_def_t *nets; -}; - -typedef struct __lxc_vm lxc_vm_t; -struct __lxc_vm { - int pid; - int state; - int monitor; - - char configFile[PATH_MAX]; - char configFileBase[PATH_MAX]; - - lxc_vm_def_t *def; - - lxc_vm_t *next; -}; +#include "domain_conf.h" +#include "capabilities.h" typedef struct __lxc_driver lxc_driver_t; struct __lxc_driver { - lxc_vm_t *vms; - int nactivevms; - int ninactivevms; + virCapsPtr caps; + + virDomainObjPtr domains; char *configDir; + char *autostartDir; char *stateDir; char *logDir; int have_netns; }; -/* Types and structs */ - -/* Inline Functions */ -static inline int lxcIsActiveVM(lxc_vm_t *vm) -{ - return vm->def->id != -1; -} - -/* Function declarations */ -lxc_vm_def_t * lxcParseVMDef(virConnectPtr conn, - const char* xmlString, - const char* fileName); -int lxcSaveVMDef(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_t *vm, - lxc_vm_def_t *def); int lxcLoadDriverConfig(lxc_driver_t *driver); -int lxcSaveConfig(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_t *vm, - lxc_vm_def_t *def); -int lxcLoadContainerInfo(lxc_driver_t *driver); -int lxcLoadContainerConfigFile(lxc_driver_t *driver, - const char *file); -lxc_vm_t * lxcAssignVMDef(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_def_t *def); -char *lxcGenerateXML(virConnectPtr conn, - lxc_driver_t *driver, - lxc_vm_t *vm, - lxc_vm_def_t *def); -lxc_vm_t *lxcFindVMByID(const lxc_driver_t *driver, int id); -lxc_vm_t *lxcFindVMByUUID(const lxc_driver_t *driver, - const unsigned char *uuid); -lxc_vm_t *lxcFindVMByName(const lxc_driver_t *driver, - const char *name); -int lxcCheckContainerProcess(lxc_vm_def_t *vm); -void lxcRemoveInactiveVM(lxc_driver_t *driver, - lxc_vm_t *vm); -void lxcFreeVMs(lxc_vm_t *vms); -void lxcFreeVM(lxc_vm_t *vm); -void lxcFreeVMDef(lxc_vm_def_t *vmdef); -int lxcDeleteConfig(virConnectPtr conn, - lxc_driver_t *driver, - const char *configFile, - const char *name); +virCapsPtr lxcCapsInit(void); void lxcError(virConnectPtr conn, virDomainPtr dom, diff -r 7059fd49ed7e src/lxc_container.c --- a/src/lxc_container.c Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_container.c Mon Jul 14 21:34:06 2008 +0100 @@ -68,7 +68,7 @@ typedef struct __lxc_child_argv lxc_child_argv_t; struct __lxc_child_argv { - lxc_vm_def_t *config; + virDomainDefPtr config; int nveths; char **veths; int monitor; @@ -85,10 +85,10 @@ * * Does not return */ -static int lxcContainerExecInit(const lxc_vm_def_t *vmDef) +static int lxcContainerExecInit(virDomainDefPtr vmDef) { const char *const argv[] = { - vmDef->init, + vmDef->os.init, NULL, }; @@ -268,8 +268,8 @@ { int rc = -1; lxc_child_argv_t *argv = data; - lxc_vm_def_t *vmDef = argv->config; - lxc_mount_t *curMount; + virDomainDefPtr vmDef = argv->config; + virDomainFSDefPtr curMount; int i; if (NULL == vmDef) { @@ -280,17 +280,20 @@ /* handle the bind mounts first before doing anything else that may */ /* then access those mounted dirs */ - curMount = vmDef->mounts; + curMount = vmDef->fss; for (i = 0; curMount; curMount = curMount->next) { - rc = mount(curMount->source, - curMount->target, + // XXX fix + if (curMount->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + rc = mount(curMount->src, + curMount->dst, NULL, MS_BIND, NULL); if (0 != rc) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to mount %s at %s for container: %s"), - curMount->source, curMount->target, strerror(errno)); + curMount->src, curMount->dst, strerror(errno)); return -1; } } @@ -328,7 +331,7 @@ * * Returns PID of container on success or -1 in case of error */ -int lxcContainerStart(lxc_vm_def_t *def, +int lxcContainerStart(virDomainDefPtr def, int nveths, char **veths, int control, diff -r 7059fd49ed7e src/lxc_container.h --- a/src/lxc_container.h Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_container.h Mon Jul 14 21:34:06 2008 +0100 @@ -34,7 +34,7 @@ int lxcContainerSendContinue(int control); -int lxcContainerStart(lxc_vm_def_t *def, +int lxcContainerStart(virDomainDefPtr def, int nveths, char **veths, int control, diff -r 7059fd49ed7e src/lxc_controller.c --- a/src/lxc_controller.c Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_controller.c Mon Jul 14 21:34:06 2008 +0100 @@ -320,7 +320,7 @@ static int -lxcControllerRun(lxc_vm_def_t *def, +lxcControllerRun(virDomainDefPtr def, int nveths, char **veths, int monitor, @@ -381,7 +381,7 @@ } -int lxcControllerStart(lxc_vm_def_t *def, +int lxcControllerStart(virDomainDefPtr def, int nveths, char **veths, int monitor, diff -r 7059fd49ed7e src/lxc_controller.h --- a/src/lxc_controller.h Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_controller.h Mon Jul 14 21:34:06 2008 +0100 @@ -30,7 +30,7 @@ pid_t lxcControllerPID(int monitor); -int lxcControllerStart(lxc_vm_def_t *def, +int lxcControllerStart(virDomainDefPtr def, int nveths, char **veths, int monitor, diff -r 7059fd49ed7e src/lxc_driver.c --- a/src/lxc_driver.c Mon Jul 14 21:03:46 2008 +0100 +++ b/src/lxc_driver.c Mon Jul 14 21:34:06 2008 +0100 @@ -36,7 +36,6 @@ #include <unistd.h> #include <wait.h> -#include "internal.h" #include "lxc_conf.h" #include "lxc_container.h" #include "lxc_driver.h" @@ -108,7 +107,7 @@ int id) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm = lxcFindVMByID(driver, id); + virDomainObjPtr vm = virDomainFindByID(driver->domains, id); virDomainPtr dom; if (!vm) { @@ -128,7 +127,7 @@ const unsigned char *uuid) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm = lxcFindVMByUUID(driver, uuid); + virDomainObjPtr vm = virDomainFindByUUID(driver->domains, uuid); virDomainPtr dom; if (!vm) { @@ -148,7 +147,7 @@ const char *name) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm = lxcFindVMByName(driver, name); + virDomainObjPtr vm = virDomainFindByName(driver->domains, name); virDomainPtr dom; if (!vm) { @@ -164,90 +163,96 @@ return dom; } -static int lxcListDomains(virConnectPtr conn, int *ids, int nids) -{ +static int lxcListDomains(virConnectPtr conn, int *ids, int nids) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm; - int numDoms = 0; - - for (vm = driver->vms; vm && (numDoms < nids); vm = vm->next) { - if (lxcIsActiveVM(vm)) { - ids[numDoms] = vm->def->id; - numDoms++; + virDomainObjPtr vm = driver->domains; + int got = 0; + while (vm && got < nids) { + if (virDomainIsActive(vm)) { + ids[got] = vm->def->id; + got++; } + vm = vm->next; } - - return numDoms; + return got; } - -static int lxcNumDomains(virConnectPtr conn) -{ +static int lxcNumDomains(virConnectPtr conn) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - return driver->nactivevms; + int n = 0; + virDomainObjPtr dom = driver->domains; + while (dom) { + if (virDomainIsActive(dom)) + n++; + dom = dom->next; + } + return n; } static int lxcListDefinedDomains(virConnectPtr conn, - char **const names, int nnames) -{ + char **const names, int nnames) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm; - int numDoms = 0; - int i; - - for (vm = driver->vms; vm && (numDoms < nnames); vm = vm->next) { - if (!lxcIsActiveVM(vm)) { - if (!(names[numDoms] = strdup(vm->def->name))) { - lxcError(conn, NULL, VIR_ERR_NO_MEMORY, "names"); + virDomainObjPtr vm = driver->domains; + int got = 0, i; + while (vm && got < nnames) { + if (!virDomainIsActive(vm)) { + if (!(names[got] = strdup(vm->def->name))) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL); goto cleanup; } - - numDoms++; + got++; } - + vm = vm->next; } - - return numDoms; + return got; cleanup: - for (i = 0 ; i < numDoms ; i++) { + for (i = 0 ; i < got ; i++) VIR_FREE(names[i]); - } - return -1; } -static int lxcNumDefinedDomains(virConnectPtr conn) -{ +static int lxcNumDefinedDomains(virConnectPtr conn) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - return driver->ninactivevms; + int n = 0; + virDomainObjPtr dom = driver->domains; + while (dom) { + if (!virDomainIsActive(dom)) + n++; + dom = dom->next; + } + return n; } + + static virDomainPtr lxcDomainDefine(virConnectPtr conn, const char *xml) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_def_t *def; - lxc_vm_t *vm; + virDomainDefPtr def; + virDomainObjPtr vm; virDomainPtr dom; - if (!(def = lxcParseVMDef(conn, xml, NULL))) { + if (!(def = virDomainDefParseString(conn, driver->caps, xml))) return NULL; - } if ((def->nets != NULL) && !(driver->have_netns)) { lxcError(conn, NULL, VIR_ERR_NO_SUPPORT, _("System lacks NETNS support")); - lxcFreeVMDef(def); + virDomainDefFree(def); return NULL; } - if (!(vm = lxcAssignVMDef(conn, driver, def))) { - lxcFreeVMDef(def); + if (!(vm = virDomainAssignDef(conn, &driver->domains, def))) { + virDomainDefFree(def); return NULL; } - if (lxcSaveVMDef(conn, driver, vm, def) < 0) { - lxcRemoveInactiveVM(driver, vm); + if (virDomainSaveConfig(conn, + driver->configDir, + driver->autostartDir, + vm) < 0) { + virDomainRemoveInactive(&driver->domains, vm); return NULL; } @@ -262,7 +267,7 @@ static int lxcDomainUndefine(virDomainPtr dom) { lxc_driver_t *driver = (lxc_driver_t *)dom->conn->privateData; - lxc_vm_t *vm = lxcFindVMByUUID(driver, dom->uuid); + virDomainObjPtr vm = virDomainFindByUUID(driver->domains, dom->uuid); if (!vm) { lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -270,19 +275,18 @@ return -1; } - if (lxcIsActiveVM(vm)) { + if (virDomainIsActive(vm)) { lxcError(dom->conn, dom, VIR_ERR_INTERNAL_ERROR, _("cannot delete active domain")); return -1; } - if (lxcDeleteConfig(dom->conn, driver, vm->configFile, vm->def->name) < 0) { + if (virDomainDeleteConfig(dom->conn, vm) <0) return -1; - } vm->configFile[0] = '\0'; - lxcRemoveInactiveVM(driver, vm); + virDomainRemoveInactive(&driver->domains, vm); return 0; } @@ -291,7 +295,7 @@ virDomainInfoPtr info) { lxc_driver_t *driver = (lxc_driver_t *)dom->conn->privateData; - lxc_vm_t *vm = lxcFindVMByUUID(driver, dom->uuid); + virDomainObjPtr vm = virDomainFindByUUID(driver->domains, dom->uuid); if (!vm) { lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -301,30 +305,23 @@ info->state = vm->state; - if (!lxcIsActiveVM(vm)) { + if (!virDomainIsActive(vm)) { info->cpuTime = 0; } else { info->cpuTime = 0; } - info->maxMem = vm->def->maxMemory; - info->memory = vm->def->maxMemory; + info->maxMem = vm->def->maxmem; + info->memory = vm->def->memory; info->nrVirtCpu = 1; return 0; } -static char *lxcGetOSType(virDomainPtr dom ATTRIBUTE_UNUSED) -{ - /* Linux containers only run on Linux */ - return strdup("linux"); -} - -static char *lxcDomainDumpXML(virDomainPtr dom, - int flags ATTRIBUTE_UNUSED) +static char *lxcGetOSType(virDomainPtr dom) { lxc_driver_t *driver = (lxc_driver_t *)dom->conn->privateData; - lxc_vm_t *vm = lxcFindVMByUUID(driver, dom->uuid); + virDomainObjPtr vm = virDomainFindByUUID(driver->domains, dom->uuid); if (!vm) { lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -332,7 +329,25 @@ return NULL; } - return lxcGenerateXML(dom->conn, driver, vm, vm->def); + return strdup(vm->def->os.type); +} + +static char *lxcDomainDumpXML(virDomainPtr dom, + int flags) +{ + lxc_driver_t *driver = (lxc_driver_t *)dom->conn->privateData; + virDomainObjPtr vm = virDomainFindByUUID(driver->domains, dom->uuid); + + if (!vm) { + lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, + _("no domain with matching uuid")); + return NULL; + } + + return virDomainDefFormat(dom->conn, + (flags & VIR_DOMAIN_XML_INACTIVE) && + vm->newDef ? vm->newDef : vm->def, + flags); } @@ -346,7 +361,7 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcVMCleanup(lxc_driver_t *driver, lxc_vm_t * vm) +static int lxcVMCleanup(virDomainObjPtr vm) { int rc = -1; int waitRc; @@ -371,8 +386,6 @@ vm->state = VIR_DOMAIN_SHUTOFF; vm->pid = -1; vm->def->id = -1; - driver->nactivevms--; - driver->ninactivevms++; return rc; } @@ -388,12 +401,12 @@ * Returns 0 on success or -1 in case of error */ static int lxcSetupInterfaces(virConnectPtr conn, - lxc_vm_def_t *def, + virDomainDefPtr def, int *nveths, char ***veths) { int rc = -1; - lxc_net_def_t *net; + virDomainNetDefPtr net; char *bridge = NULL; char parentVeth[PATH_MAX] = ""; char containerVeth[PATH_MAX] = ""; @@ -403,8 +416,11 @@ return -1; for (net = def->nets; net; net = net->next) { - if (LXC_NET_NETWORK == net->type) { - virNetworkPtr network = virNetworkLookupByName(conn, net->txName); + switch (net->type) { + case VIR_DOMAIN_NET_TYPE_NETWORK: + { + virNetworkPtr network = virNetworkLookupByName(conn, + net->data.network.name); if (!network) { goto error_exit; } @@ -412,8 +428,11 @@ bridge = virNetworkGetBridgeName(network); virNetworkFree(network); - } else { - bridge = net->txName; + break; + } + case VIR_DOMAIN_NET_TYPE_BRIDGE: + bridge = net->data.bridge.brname; + break; } DEBUG("bridge: %s", bridge); @@ -424,8 +443,8 @@ } DEBUG0("calling vethCreate()"); - if (NULL != net->parentVeth) { - strcpy(parentVeth, net->parentVeth); + if (NULL != net->ifname) { + strcpy(parentVeth, net->ifname); } DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth); if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) { @@ -433,15 +452,15 @@ _("failed to create veth device pair: %d"), rc); goto error_exit; } - if (NULL == net->parentVeth) { - net->parentVeth = strdup(parentVeth); + if (NULL == net->ifname) { + net->ifname = strdup(parentVeth); } if (VIR_REALLOC_N(*veths, (*nveths)+1) < 0) goto error_exit; if (((*veths)[(*nveths)++] = strdup(containerVeth)) == NULL) goto error_exit; - if (NULL == net->parentVeth) { + if (NULL == net->ifname) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to allocate veth names")); goto error_exit; @@ -473,7 +492,7 @@ static int lxcMonitorServer(virConnectPtr conn, lxc_driver_t * driver, - lxc_vm_t *vm) + virDomainObjPtr vm) { char *sockpath = NULL; int fd; @@ -523,7 +542,7 @@ static int lxcMonitorClient(virConnectPtr conn, lxc_driver_t * driver, - lxc_vm_t *vm) + virDomainObjPtr vm) { char *sockpath = NULL; int fd; @@ -576,11 +595,12 @@ */ static int lxcVmStart(virConnectPtr conn, lxc_driver_t * driver, - lxc_vm_t * vm) + virDomainObjPtr vm) { int rc = -1, i; int monitor; int parentTty; + char *parentTtyPath = NULL; char *logfile = NULL; pid_t pid; int nveths = 0; @@ -603,11 +623,17 @@ goto cleanup; /* open parent tty */ - VIR_FREE(vm->def->tty); - if (virFileOpenTty(&parentTty, &vm->def->tty, 1) < 0) { + if (virFileOpenTty(&parentTty, &parentTtyPath, 1) < 0) { lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, _("failed to allocate tty: %s"), strerror(errno)); + } + if (vm->def->console && + vm->def->console->type == VIR_DOMAIN_CHR_TYPE_PTY) { + VIR_FREE(vm->def->console->data.file.path); + vm->def->console->data.file.path = parentTtyPath; + } else { + VIR_FREE(parentTtyPath); } if (lxcSetupInterfaces(conn, vm->def, &nveths, &veths) < 0) @@ -626,8 +652,6 @@ vm->pid = vm->def->id = pid; vm->state = VIR_DOMAIN_RUNNING; - driver->ninactivevms--; - driver->nactivevms++; rc = 0; @@ -662,7 +686,7 @@ int rc = -1; virConnectPtr conn = dom->conn; lxc_driver_t *driver = (lxc_driver_t *)(conn->privateData); - lxc_vm_t *vm = lxcFindVMByName(driver, dom->name); + virDomainObjPtr vm = virDomainFindByName(driver->domains, dom->name); if (!vm) { lxcError(conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -691,26 +715,20 @@ const char *xml, unsigned int flags ATTRIBUTE_UNUSED) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; - lxc_vm_t *vm; - lxc_vm_def_t *def; + virDomainObjPtr vm; + virDomainDefPtr def; virDomainPtr dom = NULL; - if (!(def = lxcParseVMDef(conn, xml, NULL))) { + if (!(def = virDomainDefParseString(conn, driver->caps, xml))) + goto return_point; + + if (!(vm = virDomainAssignDef(conn, &driver->domains, def))) { + virDomainDefFree(def); goto return_point; } - if (!(vm = lxcAssignVMDef(conn, driver, def))) { - lxcFreeVMDef(def); - goto return_point; - } - - if (lxcSaveVMDef(conn, driver, vm, def) < 0) { - lxcRemoveInactiveVM(driver, vm); - return NULL; - } - if (lxcVmStart(conn, driver, vm) < 0) { - lxcRemoveInactiveVM(driver, vm); + virDomainRemoveInactive(&driver->domains, vm); goto return_point; } @@ -735,7 +753,7 @@ { int rc = -1; lxc_driver_t *driver = (lxc_driver_t*)dom->conn->privateData; - lxc_vm_t *vm = lxcFindVMByID(driver, dom->id); + virDomainObjPtr vm = virDomainFindByID(driver->domains, dom->id); if (!vm) { lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -754,7 +772,7 @@ vm->state = VIR_DOMAIN_SHUTDOWN; - rc = lxcVMCleanup(driver, vm); + rc = lxcVMCleanup(vm); error_out: return rc; @@ -773,7 +791,7 @@ { int rc = -1; lxc_driver_t *driver = (lxc_driver_t*)dom->conn->privateData; - lxc_vm_t *vm = lxcFindVMByID(driver, dom->id); + virDomainObjPtr vm = virDomainFindByID(driver->domains, dom->id); if (!vm) { lxcError(dom->conn, dom, VIR_ERR_INVALID_DOMAIN, @@ -792,7 +810,7 @@ vm->state = VIR_DOMAIN_SHUTDOWN; - rc = lxcVMCleanup(driver, vm); + rc = lxcVMCleanup(vm); error_out: return rc; @@ -816,7 +834,7 @@ static int lxcStartup(void) { uid_t uid = getuid(); - lxc_vm_t *vm; + virDomainObjPtr vm; /* Check that the user is root */ if (0 != uid) { @@ -839,13 +857,21 @@ return -1; } - /* Call function to load the container configuration files */ - if (lxcLoadContainerInfo(lxc_driver) < 0) { + if ((lxc_driver->caps = lxcCapsInit()) == NULL) { lxcShutdown(); return -1; } - vm = lxc_driver->vms; + if (virDomainLoadAllConfigs(NULL, + lxc_driver->caps, + &lxc_driver->domains, + lxc_driver->configDir, + lxc_driver->autostartDir) < 0) { + lxcShutdown(); + return -1; + } + + vm = lxc_driver->domains; while (vm) { pid_t pid; if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) { @@ -863,8 +889,6 @@ vm->pid = vm->def->id = pid; vm->state = VIR_DOMAIN_RUNNING; - lxc_driver->ninactivevms--; - lxc_driver->nactivevms++; vm = vm->next; } @@ -875,6 +899,7 @@ static void lxcFreeDriver(lxc_driver_t *driver) { VIR_FREE(driver->configDir); + VIR_FREE(driver->autostartDir); VIR_FREE(driver->stateDir); VIR_FREE(driver->logDir); VIR_FREE(driver); @@ -882,10 +907,15 @@ static int lxcShutdown(void) { + virDomainObjPtr vm; if (lxc_driver == NULL) return(-1); - lxcFreeVMs(lxc_driver->vms); - lxc_driver->vms = NULL; + vm = lxc_driver->domains; + while (vm) { + virDomainObjPtr next = vm->next; + virDomainObjFree(vm); + vm = next; + } lxcFreeDriver(lxc_driver); lxc_driver = NULL; @@ -901,13 +931,17 @@ */ static int lxcActive(void) { + virDomainObjPtr dom; + if (lxc_driver == NULL) return(0); - /* If we've any active networks or guests, then we - * mark this driver as active - */ - if (lxc_driver->nactivevms) - return 1; + + dom = lxc_driver->domains; + while (dom) { + if (virDomainIsActive(dom)) + return 1; + dom = dom->next; + } /* Otherwise we're happy to deal with a shutdown */ return 0; -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Although the LXC driver can cope with libvirtd restarts, there is one small problem relating to the XML format. The XML format as stored in /etc/libvirtd/lxc/NAME.xml is the canonical 'inactive' configuration data. There are a few pieces of data which are filled in at runtime, such as the current /dev/pty/XXX path, and vethXXX inteface name. Previously, the LXC drive would re-write /etc/libvirtd/lxc/NAME.xml to add in this data, but this is not good practice. Even though we tell sysadmins not to change this files directly, many people do, and this obliterates data important to libvirt. The 'controller' process however knows what the live config data is since it has a copy of the virDomainDefPtr object from when it was started up. The controller process stays around even across restarts of the libvirtd daemon. So this patch updates the handshake between the libvirtd LXC driver & controller, so that as well as sending back the PID, it also sends back the live XML configuration. So when starting up, libvirtd can get back a guarenteed accurate XML doc for the current live container config. I'm not entirely happy with this approach though - I'd ideally like to avoid having communications between the libvirtd & the controller process if practical, because it forms an ABI we have to preserve across daemon upgrades. So I'm sort of inclined to change this so that the 'live' XML config and PID are simply kept on disk in the paths /var/run/libvirt/lxc/NAME.{xml,pid} alongside the UNIX socket, and then just keep the UNIX socket connection as a way for getting notification of container shutdown (via POLLHUP). domain_conf.c | 6 +---- lxc_controller.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++----- lxc_controller.h | 1 lxc_driver.c | 20 ++++++++++++++++ 4 files changed, 83 insertions(+), 10 deletions(-) Daniel diff -r 33a641a0a0d9 src/domain_conf.c --- a/src/domain_conf.c Mon Jul 14 21:34:07 2008 +0100 +++ b/src/domain_conf.c Tue Jul 15 11:35:10 2008 +0100 @@ -1078,13 +1078,11 @@ break; case VIR_DOMAIN_CHR_TYPE_PTY: - /* @path attribute is an output only property - pty is auto-allocted */ - break; - case VIR_DOMAIN_CHR_TYPE_DEV: case VIR_DOMAIN_CHR_TYPE_FILE: case VIR_DOMAIN_CHR_TYPE_PIPE: - if (path == NULL) { + if (path == NULL && + def->type != VIR_DOMAIN_CHR_TYPE_PTY) { virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("Missing source path attribute for char device")); goto error; diff -r 33a641a0a0d9 src/lxc_controller.c --- a/src/lxc_controller.c Mon Jul 14 21:34:07 2008 +0100 +++ b/src/lxc_controller.c Tue Jul 15 11:35:10 2008 +0100 @@ -54,6 +54,55 @@ return -1; return pid; +} + +char *lxcControllerLiveConfig(int monitor) +{ + unsigned int len; + char *data; + + if (saferead(monitor, &len, sizeof(len)) != sizeof(len)) + return NULL; + + /* Refuse stupid size allocations */ + if (len > (1024 * 1024)) + return NULL; + + if (VIR_ALLOC_N(data, len+1) < 0) + return NULL; + + if (saferead(monitor, data, len) != len) { + VIR_FREE(data); + return NULL; + } + data[len] = '\0'; + printf("[%s]\n", data); + return data; +} + + +static int lxcControllerSendGreeting(int client, + virDomainDefPtr def) +{ + pid_t pid = getpid(); + char *xml; + int ret = -1; + unsigned int len; + + /* Sync up with libvirtd LXC driver to tell it our PID */ + if (safewrite(client, &pid, sizeof(pid)) != sizeof(pid)) + return -1; + + if (!(xml = virDomainDefFormat(NULL, def, VIR_DOMAIN_XML_SECURE))) + return -1; + + len = strlen(xml); + if (safewrite(client, &len, sizeof(len)) == sizeof(len) && + safewrite(client, xml, len) == len) + ret = 0; + VIR_FREE(xml); + + return ret; } /** @@ -110,7 +159,8 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcControllerMain(int monitor, +static int lxcControllerMain(virDomainDefPtr def, + int monitor, int client, int appPty, int contPty) @@ -178,13 +228,11 @@ fprintf(stderr, "%d %d\n", epollEvent.data.fd, epollEvent.events); if (epollEvent.data.fd == monitor) { int fd = accept(monitor, NULL, 0); - pid_t pid = getpid(); if (client != -1) { close(fd); continue; } - /* Sync up with libvirtd LXC driver to tell it our PID */ - if (safewrite(fd, &pid, sizeof(pid)) != sizeof(pid)) { + if (lxcControllerSendGreeting(fd, def) < 0) { close(fd); continue; } @@ -362,7 +410,11 @@ if (lxcContainerSendContinue(control[0]) < 0) goto cleanup; - rc = lxcControllerMain(monitor, client, appPty, containerPty); + rc = lxcControllerMain(def, + monitor, + client, + appPty, + containerPty); cleanup: if (control[0] != -1) @@ -463,8 +515,10 @@ _exit(-1); /* Sync up with libvirtd LXC driver to tell it our PID */ - if (safewrite(client, &pid, sizeof(pid)) != sizeof(pid)) + if (lxcControllerSendGreeting(client, def) < 0) { + close(client); _exit(-1); + } /* Controlling libvirtd LXC driver now knows what our PID is, and is able to cleanup after diff -r 33a641a0a0d9 src/lxc_controller.h --- a/src/lxc_controller.h Mon Jul 14 21:34:07 2008 +0100 +++ b/src/lxc_controller.h Tue Jul 15 11:35:10 2008 +0100 @@ -29,6 +29,7 @@ #include "lxc_conf.h" pid_t lxcControllerPID(int monitor); +char *lxcControllerLiveConfig(int monitor); int lxcControllerStart(virDomainDefPtr def, int nveths, diff -r 33a641a0a0d9 src/lxc_driver.c --- a/src/lxc_driver.c Mon Jul 14 21:34:07 2008 +0100 +++ b/src/lxc_driver.c Tue Jul 15 11:35:10 2008 +0100 @@ -874,6 +874,8 @@ vm = lxc_driver->domains; while (vm) { pid_t pid; + char *xml; + virDomainDefPtr def; if ((vm->monitor = lxcMonitorClient(NULL, lxc_driver, vm)) < 0) { vm = vm->next; continue; @@ -881,10 +883,28 @@ /* Read pid from controller */ if ((pid = lxcControllerPID(vm->monitor)) < 0) { + printf("Failed to get PID\n"); close(vm->monitor); vm->monitor = -1; vm = vm->next; continue; + } + + /* Fetch and load the live container config */ + if ((xml = lxcControllerLiveConfig(vm->monitor)) == NULL) { + printf("Failed to get config\n"); + /* XXX should we kill off the container if this + happens ? */ + vm = vm->next; + continue; + } + def = virDomainDefParseString(NULL, lxc_driver->caps, xml); + VIR_FREE(xml); + if (def) { + vm->newDef = vm->def; + vm->def = def; + } else { + printf("Failed to pase config\n"); } vm->pid = vm->def->id = pid; -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Daniel P. Berrange schreef:
I'm not entirely happy with this approach though - I'd ideally like to avoid having communications between the libvirtd & the controller process if practical,
Yesterday I was reading into the Xen approach that is taken on this, with respect to defining and undefining domains. Would it be possible to have libvirt(d) handle the defines without xenstore to know? Stefan
participants (3)
-
Dan Smith
-
Daniel P. Berrange
-
Stefan de Konink