# HG changeset patch
# User Dan Smith <danms(a)us.ibm.com>
# Date 1213891185 25200
# Node ID cb780a7b3ad591f1a9392d6528218b3aa2c3483d
# Parent acf369a2543ad52b235ae8541c8ad05670e255bd
[LXC] Add setup/cleanup of container network interfaces
diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_conf.h
--- a/src/lxc_conf.h Thu Jun 19 08:59:37 2008 -0700
+++ b/src/lxc_conf.h Thu Jun 19 08:59:45 2008 -0700
@@ -35,6 +35,12 @@
#define LXC_MAX_XML_LENGTH 16384
#define LXC_MAX_ERROR_LEN 1024
#define LXC_DOMAIN_TYPE "lxc"
+#define LXC_PARENT_SOCKET 0
+#define LXC_CONTAINER_SOCKET 1
+
+/* messages between parent and container */
+typedef char lxc_message_t;
+#define LXC_CONTINUE_MSG 'c'
/* types of networks for containers */
enum lxc_net_type {
@@ -96,6 +102,8 @@
int parentTty;
int containerTtyFd;
char *containerTty;
+
+ int sockpair[2];
lxc_vm_def_t *def;
diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_container.c
--- a/src/lxc_container.c Thu Jun 19 08:59:37 2008 -0700
+++ b/src/lxc_container.c Thu Jun 19 08:59:45 2008 -0700
@@ -36,6 +36,7 @@
#include "lxc_conf.h"
#include "util.h"
#include "memory.h"
+#include "veth.h"
#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
@@ -159,6 +160,74 @@
}
/**
+ * lxcWaitForContinue:
+ * @vm: Pointer to vm structure
+ *
+ * This function will wait for the container continue message from the
+ * parent process. It will send this message on the socket pair stored in
+ * the vm structure once it has completed the post clone container setup.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcWaitForContinue(lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_message_t msg;
+ int readLen = 0;
+
+ readLen = saferead(vm->sockpair[LXC_CONTAINER_SOCKET], &msg, sizeof(msg));
+ if (readLen != sizeof(msg)) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("Failed to read the container continue message: %s"),
+ strerror(errno));
+ goto error_out;
+ }
+
+ DEBUG0("Received container continue message");
+
+ close(vm->sockpair[LXC_PARENT_SOCKET]);
+ vm->sockpair[LXC_PARENT_SOCKET] = -1;
+ close(vm->sockpair[LXC_CONTAINER_SOCKET]);
+ vm->sockpair[LXC_CONTAINER_SOCKET] = -1;
+
+ rc = 0;
+
+error_out:
+ return rc;
+}
+
+#ifdef HAVE_NETNS
+/**
+ * lxcEnableInterfaces:
+ * @vm: Pointer to vm structure
+ *
+ * This function will enable the interfaces for this container.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcEnableInterfaces(lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_net_def_t *net = vm->def->nets;
+ int i = 0;
+
+ for (i = 0; net; net = net->next) {
+ DEBUG("Enabling %s", net->containerVeth);
+ rc = vethInterfaceUpOrDown(net->containerVeth, 1);
+ if (0 != rc) {
+ goto error_out;
+ }
+ }
+
+ /* enable lo device */
+ rc = vethInterfaceUpOrDown("lo", 1);
+
+error_out:
+ return rc;
+}
+#endif /* HAVE_NETNS */
+
+/**
* lxcChild:
* @argv: Pointer to container arguments
*
@@ -210,6 +279,18 @@
goto cleanup;
}
+ /* Wait for interface devices to show up */
+ if (0 != (rc = lxcWaitForContinue(vm))) {
+ goto cleanup;
+ }
+
+#ifdef HAVE_NETNS
+ /* enable interfaces */
+ if (0 != (rc = lxcEnableInterfaces(vm))) {
+ goto cleanup;
+ }
+#endif
+
rc = lxcExecWithTty(vm);
/* this function will only return if an error occured */
diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_driver.c
--- a/src/lxc_driver.c Thu Jun 19 08:59:37 2008 -0700
+++ b/src/lxc_driver.c Thu Jun 19 08:59:45 2008 -0700
@@ -44,6 +44,9 @@
#include "memory.h"
#include "util.h"
#include "memory.h"
+#include "bridge.h"
+#include "qemu_conf.h"
+#include "veth.h"
/* debug macros */
#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
@@ -66,6 +69,9 @@
#ifndef CLONE_NEWIPC
#define CLONE_NEWIPC 0x08000000
#endif
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#endif
static int lxcStartup(void);
static int lxcShutdown(void);
@@ -81,6 +87,9 @@
{
int rc = 0;
int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
+#ifdef HAVE_NETNS
+ CLONE_NEWNET|
+#endif
CLONE_NEWIPC|SIGCHLD;
int cpid;
char *childStack;
@@ -237,6 +246,9 @@
static int lxcNumDomains(virConnectPtr conn)
{
lxc_driver_t *driver = (lxc_driver_t *)conn->privateData;
+
+ DEBUG("driver: %p network: %p", conn->privateData,
conn->networkPrivateData);
+
return driver->nactivevms;
}
@@ -384,6 +396,197 @@
return lxcGenerateXML(dom->conn, driver, vm, vm->def);
}
+#ifdef HAVE_NETNS
+/**
+ * lxcSetupInterfaces:
+ * @conn: pointer to connection
+ * @vm: pointer to virtual machine structure
+ *
+ * Sets up the container interfaces by creating the veth device pairs and
+ * attaching the parent end to the appropriate bridge. The container end
+ * will moved into the container namespace later after clone has been called.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcSetupInterfaces(virConnectPtr conn,
+ lxc_vm_t *vm)
+{
+ int rc = -1;
+ struct qemud_driver *networkDriver =
+ (struct qemud_driver *)(conn->networkPrivateData);
+ lxc_net_def_t *net = vm->def->nets;
+ int i = 0;
+ char* bridge;
+ char parentVeth[PATH_MAX] = "";
+ char containerVeth[PATH_MAX] = "";
+
+ for (i = 0; net; net = net->next) {
+ if (LXC_NET_NETWORK == net->type) {
+ virNetworkPtr network = virNetworkLookupByName(conn, net->txName);
+ if (!network) {
+ goto error_exit;
+ }
+
+ bridge = virNetworkGetBridgeName(network);
+
+ virNetworkFree(network);
+
+ } else {
+ bridge = net->txName;
+ }
+
+ DEBUG("bridge: %s", bridge);
+ if (NULL == bridge) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to get bridge for interface"));
+ goto error_exit;
+ }
+
+ DEBUG0("calling vethCreate()");
+ if (NULL != net->parentVeth) {
+ strcpy(parentVeth, net->parentVeth);
+ }
+ if (NULL != net->containerVeth) {
+ strcpy(containerVeth, net->containerVeth);
+ }
+ DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
+ if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to create veth device pair: %d"), rc);
+ goto error_exit;
+ }
+ if (NULL == net->parentVeth) {
+ net->parentVeth = strdup(parentVeth);
+ }
+ if (NULL == net->containerVeth) {
+ net->containerVeth = strdup(containerVeth);
+ }
+
+ if (!(networkDriver->brctl) && (rc =
brInit(&(networkDriver->brctl)))) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("cannot initialize bridge support: %s"),
+ strerror(rc));
+ goto error_exit;
+ }
+
+ if (0 != (rc = brAddInterface(networkDriver->brctl, bridge, parentVeth))) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to add %s device to %s: %s"),
+ parentVeth,
+ bridge,
+ strerror(rc));
+ goto error_exit;
+ }
+
+ if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to enable parent ns veth device: %d"), rc);
+ goto error_exit;
+ }
+
+ }
+
+ rc = 0;
+
+error_exit:
+ return rc;
+}
+
+/**
+ * lxcMoveInterfacesToNetNs:
+ * @conn: pointer to connection
+ * @vm: pointer to virtual machine structure
+ *
+ * Starts a container process by calling clone() with the namespace flags
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcMoveInterfacesToNetNs(virConnectPtr conn,
+ lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_net_def_t *net = vm->def->nets;
+ int i = 0;
+
+ for (i = 0; net; net = net->next) {
+ if (0 != moveInterfaceToNetNs(net->containerVeth, vm->def->id)) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to move interface %s to ns %d"),
+ net->containerVeth, vm->def->id);
+ goto error_exit;
+ }
+ }
+
+ rc = 0;
+
+error_exit:
+ return rc;
+}
+
+/**
+ * lxcCleanupInterfaces:
+ * @conn: pointer to connection
+ * @vm: pointer to virtual machine structure
+ *
+ * Cleans up the container interfaces by deleting the veth device pairs.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcCleanupInterfaces(lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_net_def_t *net = vm->def->nets;
+ int i = 0;
+
+ for (i = 0; net; net = net->next) {
+ if (0 != (rc = vethDelete(net->parentVeth))) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("failed to delete veth: %s"), net->parentVeth);
+ /* will continue to try to cleanup any other interfaces */
+ }
+
+ }
+
+ rc = 0;
+
+ return rc;
+}
+#endif /* HAVE_NETNS */
+
+/**
+ * lxcSendContainerContinue:
+ * @vm: pointer to virtual machine structure
+ *
+ * Sends the continue message via the socket pair stored in the vm
+ * structure.
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int lxcSendContainerContinue(lxc_vm_t *vm)
+{
+ int rc = -1;
+ lxc_message_t msg = LXC_CONTINUE_MSG;
+ int writeCount = 0;
+
+ if (NULL == vm) {
+ goto error_out;
+ }
+
+ writeCount = safewrite(vm->sockpair[LXC_PARENT_SOCKET], &msg,
+ sizeof(msg));
+ if (writeCount != sizeof(msg)) {
+ lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("unable to send container continue message: %s"),
+ strerror(errno));
+ goto error_out;
+ }
+
+ rc = 0;
+
+error_out:
+ return rc;
+}
+
/**
* lxcStartContainer:
* @conn: pointer to connection
@@ -411,7 +614,11 @@
}
stacktop = stack + stacksize;
- flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
+ flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
+#ifdef HAVE_NETNS
+ CLONE_NEWNET|
+#endif
+ CLONE_NEWIPC|SIGCHLD;
vm->def->id = clone(lxcChild, stacktop, flags, (void *)vm);
@@ -809,7 +1016,34 @@
close(vm->parentTty);
close(vm->containerTtyFd);
+#ifdef HAVE_NETNS
+ if (0 != (rc = lxcSetupInterfaces(conn, vm))) {
+ goto cleanup;
+ }
+#endif /* HAVE_NETNS */
+
+ /* create a socket pair to send continue message to the container once */
+ /* we've completed the post clone configuration */
+ if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, vm->sockpair)) {
+ lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("sockpair failed: %s"), strerror(errno));
+ goto cleanup;
+ }
+
+ /* check this rc */
+
rc = lxcStartContainer(conn, driver, vm);
+
+#ifdef HAVE_NETNS
+ rc = lxcMoveInterfacesToNetNs(conn, vm);
+#endif
+
+ rc = lxcSendContainerContinue(vm);
+
+ close(vm->sockpair[LXC_PARENT_SOCKET]);
+ vm->sockpair[LXC_PARENT_SOCKET] = -1;
+ close(vm->sockpair[LXC_CONTAINER_SOCKET]);
+ vm->sockpair[LXC_CONTAINER_SOCKET] = -1;
if (rc == 0) {
vm->state = VIR_DOMAIN_RUNNING;
@@ -948,6 +1182,11 @@
int waitRc;
int childStatus = -1;
+ /* if this fails, we'll continue. it will report any errors */
+#ifdef HAVE_NETNS
+ lxcCleanupInterfaces(vm);
+#endif
+
while (((waitRc = waitpid(vm->def->id, &childStatus, 0)) == -1) &&
errno == EINTR);