"Daniel P. Berrange" <berrange(a)redhat.com> wrote:
...
[ Nice long explanation. ]
It'd be great to put that in the code.
lxc_conf.c | 195 ----------------
lxc_conf.h | 12
lxc_container.c | 39 +--
lxc_container.h | 8
lxc_controller.c | 349 +++++++++++++++++++++++++++-
lxc_controller.h | 12
lxc_driver.c | 662 ++++++++++++++++++++++++++-----------------------------
util.c | 158 +++++++++++++
util.h | 13 +
9 files changed, 870 insertions(+), 578 deletions(-)
...
diff -r 8093fb566748 src/lxc_conf.c
diff -r 8093fb566748 src/lxc_conf.h
--- a/src/lxc_conf.h Fri Aug 01 14:47:33 2008 +0100
+++ b/src/lxc_conf.h Tue Aug 05 12:13:24 2008 +0100
@@ -46,7 +46,6 @@
struct __lxc_net_def {
int type;
char *parentVeth; /* veth device in parent namespace */
- char *containerVeth; /* veth device in container namespace */
char *txName; /* bridge or network name */
lxc_net_def_t *next;
@@ -87,11 +86,10 @@
struct __lxc_vm {
int pid;
int state;
+ int monitor;
I like "monitor_fd" ;-)
diff -r 8093fb566748 src/lxc_container.c
--- a/src/lxc_container.c Fri Aug 01 14:47:33 2008 +0100
+++ b/src/lxc_container.c Tue Aug 05 12:13:24 2008 +0100
@@ -69,6 +69,8 @@
typedef struct __lxc_child_argv lxc_child_argv_t;
struct __lxc_child_argv {
lxc_vm_def_t *config;
+ int nveths;
From the looks of all uses, this can be an unsigned type.
Then
readers won't wonder if negative values are significant.
+ char **veths;
int monitor;
char *ttyPath;
};
@@ -171,8 +173,7 @@
*
* Returns 0 on success or -1 in case of error
*/
-int lxcContainerSendContinue(virConnectPtr conn,
- int control)
+int lxcContainerSendContinue(int control)
I like "control_fd", but hesitated to mention it,
since this is a preexisting name.
{
int rc = -1;
lxc_message_t msg = LXC_CONTINUE_MSG;
@@ -180,7 +181,7 @@
writeCount = safewrite(control, &msg, sizeof(msg));
if (writeCount != sizeof(msg)) {
- lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
_("unable to send container continue message: %s"),
strerror(errno));
goto error_out;
@@ -230,21 +231,21 @@
*
* Returns 0 on success or nonzero in case of error
*/
-static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def)
+static int lxcContainerEnableInterfaces(int nveths,
+ char **veths)
{
- int rc = 0;
- const lxc_net_def_t *net;
+ int rc = 0, i;
Many style guides recommend against putting multiple ","-separated
declarations on the same line...
...
@@ -337,12 +338,11 @@
int flags;
int stacksize = getpagesize() * 4;
char *stack, *stacktop;
- lxc_child_argv_t args = { def, control, ttyPath };
+ lxc_child_argv_t args = { def, nveths, veths, control, ttyPath };
If possible, it'd be nice to make the struct "const".
...
@@ -379,8 +379,9 @@
char *stack;
int childStatus;
- if (features & LXC_CONTAINER_FEATURE_NET)
+ if (features & LXC_CONTAINER_FEATURE_NET) {
flags |= CLONE_NEWNET;
+ }
Since you're making a change like this, I suspect it's worth adding a
sentence+example to HACKING saying that we prefer to use braces even
when there's only one statement. Out of habit, I tend *not* to use
braces in that case, but have no trouble adapting. Codifying it might
help avoid a little churn.
Let me know and I'll be happy to make the change.
diff -r 8093fb566748 src/lxc_controller.c
...
-int lxcControllerMain(int appPty, int contPty)
+static int lxcControllerMain(int monitor,
+ int client,
+ int appPty,
+ int contPty)
{
int rc = -1;
int epollFd;
@@ -120,15 +166,29 @@
memset(&epollEvent, 0x00, sizeof(epollEvent));
epollEvent.events = EPOLLIN|EPOLLET; /* edge triggered */
epollEvent.data.fd = appPty;
- epollEvent.data.u32 = 0; /* fdArray position */
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
_("epoll_ctl(appPty) failed: %s"), strerror(errno));
goto cleanup;
}
epollEvent.data.fd = contPty;
- epollEvent.data.u32 = 1; /* fdArray position */
if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("epoll_ctl(contPty) failed: %s"), strerror(errno));
+ goto cleanup;
+ }
+
+ epollEvent.events = EPOLLIN;
+ epollEvent.data.fd = monitor;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, monitor, &epollEvent)) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("epoll_ctl(contPty) failed: %s"), strerror(errno));
+ goto cleanup;
+ }
+
+ epollEvent.events = EPOLLHUP;
+ epollEvent.data.fd = client;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent)) {
lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
_("epoll_ctl(contPty) failed: %s"), strerror(errno));
goto cleanup;
@@ -138,23 +198,46 @@
/* if active fd's, return if no events, else wait forever */
timeout = (numActive > 0) ? 0 : -1;
numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
- if (0 < numEvents) {
- if (epollEvent.events & EPOLLIN) {
- curFdOff = epollEvent.data.u32;
- if (!fdArray[curFdOff].active) {
- fdArray[curFdOff].active = 1;
- ++numActive;
+ if (numEvents > 0) {
+ if (epollEvent.data.fd == monitor) {
+ int fd = accept(monitor, NULL, 0);
+ if (client != -1 || /* Already connected, so kick new one out */
I presume you meant to insert "client = fd;" right after "fd = ...",
(or compare fd != -1, and then set client = fd after the "}"),
since it looks like "client" must be defined after this if/else chain.
+ lxcControllerServerHandshake(client) < 0) {
+ close(fd);
+ continue;
}
-
- } else if (epollEvent.events & EPOLLHUP) {
- DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
- continue;
+ client = fd;
+ epollEvent.events = EPOLLHUP;
+ epollEvent.data.fd = client;
+ if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, client, &epollEvent))
{
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("epoll_ctl(contPty) failed: %s"),
strerror(errno));
+ goto cleanup;
+ }
+ } else if (client != -1 && epollEvent.data.fd == client) {
...
+static int
+lxcControllerRun(const char *stateDir,
+ lxc_vm_def_t *def,
+ int nveths,
+ char **veths,
+ int monitor,
+ int client,
+ int appPty)
+{
+ int rc = -1;
+ int control[2] = { -1, -1};
+ int containerPty;
+ char *containerPtyPath;
+ pid_t container = -1;
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("sockpair failed: %s"), strerror(errno));
+ goto cleanup;
+ }
+
+ if (virFileOpenTty(&containerPty,
+ &containerPtyPath,
+ 0) < 0) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to allocate tty: %s"), strerror(errno));
+ goto cleanup;
+ }
+
+ if ((container = lxcContainerStart(def,
+ nveths,
+ veths,
+ control[1],
+ containerPtyPath)) < 0)
+ goto cleanup;
+ close(control[1]);
+ control[1] = -1;
+
+ if (lxcControllerMoveInterfaces(nveths, veths, container) < 0)
+ goto cleanup;
+
+ if (lxcContainerSendContinue(control[0]) < 0)
+ goto cleanup;
+
+ rc = lxcControllerMain(monitor, client, appPty, containerPty);
+
+cleanup:
+ if (control[0] != -1)
+ close(control[0]);
+ if (control[1] != -1)
+ close(control[1]);
+ VIR_FREE(containerPtyPath);
+ if (containerPty != -1)
+ close(containerPty);
+
+ kill(container, SIGTERM);
+ waitpid(container, NULL, 0);
It might be worthwhile to detect kill or waitpid failure when rc == 0.
+ lxcControllerCleanupInterfaces(nveths, veths);
+ virFileDeletePid(stateDir, def->name);
+ return -1;
I guess this should be "return rc;".
Otherwise, this function always returns -1
and "rc" is set but never used.
+}
+
+
+int lxcControllerStart(const char *stateDir,
...
diff -r 8093fb566748 src/lxc_driver.c
+/**
+ * lxcVmCleanup:
+ * @vm: Ptr to VM to clean up
+ *
+ * waitpid() on the container process. kill and wait the tty process
+ * This is called by boh lxcDomainDestroy and lxcSigHandler when a
s/boh/both/
+ * container exits.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcVMCleanup(virConnectPtr conn,
+ lxc_driver_t *driver,
+ lxc_vm_t * vm)
+{
+ int rc = -1;
+ int waitRc;
+ int childStatus = -1;
+
+ while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
+ errno == EINTR);
It's easier to recognize this as an empty loop if you give
it a comment and put the semicolon on its own line:
while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
errno == EINTR)
; /* empty */
Or give it an empty brace group:
while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
errno == EINTR) {
/* empty */
}
...
+static int lxcMonitorServer(virConnectPtr conn,
+ lxc_driver_t * driver,
+ lxc_vm_t *vm)
{
- int rc = -1;
- lxc_net_def_t *net;
+ char *sockpath = NULL;
+ int fd;
+ struct sockaddr_un addr;
- for (net = vm->def->nets; net; net = net->next) {
- if (0 != moveInterfaceToNetNs(net->containerVeth, vm->def->id)) {
+ if (asprintf(&sockpath, "%s/%s.sock",
+ driver->stateDir, vm->def->name) < 0) {
+ lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+
+ if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to create server socket: %s"),
It'd be nice to include sockpath in these diagnostics.
+ strerror(errno));
+ goto error;
+ }
+
+ unlink(sockpath);
Report failure other than ENOENT.
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+
+ if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to bind server socket: %s"),
+ strerror(errno));
+ goto error;
+ }
+ if (listen(fd, 30 /* backlog */ ) < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to listen server socket: %s"),
+ strerror(errno));
+ goto error;
+ return (-1);
That return is dead code.
...
+static int lxcMonitorClient(virConnectPtr conn,
+ lxc_driver_t * driver,
+ lxc_vm_t *vm)
+{
+ char *sockpath = NULL;
+ int fd;
+ struct sockaddr_un addr;
+
+ if (asprintf(&sockpath, "%s/%s.sock",
+ driver->stateDir, vm->def->name) < 0) {
+ lxcError(conn, NULL, VIR_ERR_NO_MEMORY, NULL);
+ return -1;
+ }
+
+ if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to create client socket: %s"),
+ strerror(errno));
Likewise, it'd be nice to mention sockpath in these.
+ goto error;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+
+ if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to connect to client socket: %s"),
+ strerror(errno));
+ goto error;
+ }
...
I'm quoting the following snippet here (may be out of order),
because I spotted the problem only after eliding the original.
+ /* And get its pid */
+ if ((rc = virFileReadPid(driver->stateDir, vm->def->name, &vm->pid))
!= 0> ) {
lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
- _("sockpair failed: %s"), strerror(errno));
+ _("Failed to read pid file %s/%s.pid: %s"),
+ driver->stateDir, vm->def->name, strerror(errno));
That should be "rc", not "errno".
Otherwise, diagnosing the virFileReadPid parse error that is intended
to map to EINVAL, we'd use some unrelated errno value.
+ rc = -1;
goto cleanup;
}
int lxcRegister(void)
diff -r 8093fb566748 src/util.c
--- a/src/util.c Fri Aug 01 14:47:33 2008 +0100
+++ b/src/util.c Tue Aug 05 12:13:24 2008 +0100
@@ -37,6 +37,7 @@
#include <sys/wait.h>
#endif
#include <string.h>
+#include <termios.h>
#include "c-ctype.h"
#ifdef HAVE_PATHS_H
@@ -556,6 +557,163 @@
return 0;
}
+
+int virFileOpenTty(int *ttymaster,
+ char **ttyName,
+ int rawmode)
+{
+ int rc = -1;
As you know,
this function is a portability "challenge" ;-) (that's an understatement)
I suppose we'll need to guard this function with #ifdef WITH_LXC.
+ if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) <
0)
+ goto cleanup;
+
+ if (unlockpt(*ttymaster) < 0)
+ goto cleanup;
+
+ if (grantpt(*ttymaster) < 0)
+ goto cleanup;
+
+ if (rawmode) {
+ struct termios ttyAttr;
+ if (tcgetattr(*ttymaster, &ttyAttr) < 0)
+ goto cleanup;
+
+ cfmakeraw(&ttyAttr);
This is glibc-specific, but that's probably ok,
for something that is used only WITH_LXC.
+
+ if (tcsetattr(*ttymaster, TCSADRAIN, &ttyAttr) < 0)
+ goto cleanup;
+ }
+
+ if (ttyName) {
+ char tempTtyName[PATH_MAX];
+ if (ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName)) < 0)
+ goto cleanup;
+
+ if ((*ttyName = strdup(tempTtyName)) == NULL) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ }
+
+ rc = 0;
+
+cleanup:
+ if (rc != 0 &&
+ *ttymaster != -1) {
+ close(*ttymaster);
+ }
+
+ return rc;
+
+}
+
+
+int virFileWritePid(const char *dir,
+ const char *name,
+ pid_t pid)
+{
+ int rc;
+ int fd;
+ FILE *file = NULL;
+ char *pidfile = NULL;
+
+ if ((rc = virFileMakePath(dir))) {
+ goto cleanup;
+ }
+
+ if (asprintf(&pidfile, "%s/%s.pid", dir, name) < 0) {
+ rc = ENOMEM;
+ goto cleanup;
+ }
+
+ if ((fd = open(pidfile,
+ O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR)) < 0) {
+ rc = errno;
+ goto cleanup;
+ }
+
+ if (!(file = fdopen(fd, "w"))) {
+ rc = errno;
+ close(fd);
+ goto cleanup;
+ }
+
+ if (fprintf(file, "%d", pid) < 0) {
+ rc = errno;
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ if (file &&
+ fclose(file) < 0) {
+ rc = errno;
+ }
+
+ VIR_FREE(pidfile);
+ return rc;
+}
+
+int virFileReadPid(const char *dir,
+ const char *name,
+ pid_t *pid)
+{
+ int rc;
+ FILE *file;
+ char *pidfile = NULL;
+ *pid = 0;
+ if (asprintf(&pidfile, "%s/%s.pid", dir, name) < 0) {
+ rc = ENOMEM;
+ goto cleanup;
+ }
+
+ if (!(file = fopen(pidfile, "r"))) {
+ rc = errno;
+ goto cleanup;
+ }
+
+ if (fscanf(file, "%d", pid) != 1) {
+ rc = EINVAL;
+ goto cleanup;
+ }
[save this for a rainy day]
fscanf is not robust.
It does the job (and is almost always fine),
but fails to diagnose inputs like "938bogus suffix"
and silently wraps e.g., 4294967296 to 0. Then there's
the possibility that pid_t is not the same size as "int".
An alternative would be to read two "tokens" (e.g., with getdelim),
ensuring you get EOF on the second read, convert the first token to
an integer using virStrToLong_ui, then ensure the resulting value is
fits in a pid_t variable.
+ if (fclose(file) < 0) {
+ rc = errno;
+ goto cleanup;
+ }
+
+ rc = 0;
+
+ cleanup:
+ VIR_FREE(pidfile);
+ return rc;
+}
+
+int virFileDeletePid(const char *dir,
+ const char *name)
+{
+ int rc = 0;
+ char *pidfile = NULL;
+
+ if (asprintf(&pidfile, "%s/%s.pid", dir, name) < 0) {
+ rc = errno;
+ goto cleanup;
+ }
+
+ if (unlink(pidfile) < 0 &&
+ errno != ENOENT) {
+ rc = errno;
I'd put the errno test on the previous line.
Otherwise, it looks deceptively like "errno != ..." is
the beginning of a statement. This is part of the rationale
for putting operators like "&&" at the beginning of each
continued line, rather than at the end of the preceding one.
+ }
+
+cleanup:
+ VIR_FREE(pidfile);
+ return rc;
+}
+
+
+
/* Like strtol, but produce an "int" result, and check more carefully.
Return 0 upon success; return -1 to indicate failure.
When END_PTR is NULL, the byte after the final valid digit must be NUL.