On Wed, Jul 01, 2015 at 11:07:07PM +0530, ik.nitk wrote:
This patch adds feature for lxc containers to inherit namespaces.
This is very similar to what
lxc-tools or docker provides. Look for "man lxc-start" and you will find that
you can pass command args as
[ --share-[net|ipc|uts] name|pid ]. Or check out docker networking option in which you
can give --net=container:NAME_or_ID as an option for sharing namespace.
>From this patch you can add extra libvirt option to share namespace in following
way.
<lxc:namespace>
<lxc:sharenet type='netns' value='red'/>
<lxc:shareipc type='pid' value='12345'/>
<lxc:shareuts type='name' value='container1'/>
</lxc:namespace>
---
docs/drvlxc.html.in | 18 +++
docs/schemas/domaincommon.rng | 42 ++++++
src/Makefile.am | 4 +-
src/lxc/lxc_conf.c | 2 +-
src/lxc/lxc_conf.h | 15 +++
src/lxc/lxc_container.c | 236 +++++++++++++++++++++++++++++++++-
src/lxc/lxc_domain.c | 164 ++++++++++++++++++++++-
src/lxc/lxc_domain.h | 1 +
tests/lxcxml2xmldata/lxc-sharenet.xml | 33 +++++
tests/lxcxml2xmltest.c | 1 +
10 files changed, 507 insertions(+), 9 deletions(-)
create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml
diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
index a094bd9..d14d4c7 100644
--- a/docs/drvlxc.html.in
+++ b/docs/drvlxc.html.in
@@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default
can serious
affect the security of the container and the host.
</p>
+<h2><a name="share">Inherit namespaces</a></h2>
+
+<p>
+Libvirt allows you to inherit the namespace from container/process just like lxc tools
+or docker provides to share the network namespace. The following can be used to share
+required namespaces. If we want to share only one then the other namespaces can be
ignored.
+</p>
+<pre>
+<domain type='lxc'
xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'>
+...
+<lxc:namespace>
+ <lxc:sharenet type='netns' value='red'/>
+ <lxc:shareuts type='name' value='container1'/>
+ <lxc:shareipc type='pid' value='12345'/>
+</lxc:namespace>
+</domain>
+</pre>
Could you also document the attributes explicitly, the various 'type'
attribute values and what they expect for the corresponding 'value'
argument. In particular I'm unclear on what the value is when
using type='netns', so its a good idea to be explicit about this.
diff --git a/src/Makefile.am b/src/Makefile.am
index be63e26..ef96a5a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \
-I$(srcdir)/access \
-I$(srcdir)/conf \
$(AM_CFLAGS)
-libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS)
+libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS)
libvirt-lxc.la $(FUSE_LIBS)
What was the $LIBXML_LIBS addition needed for ?
I can see why you added libvirt-lxc.la but I will suggested
changes later to avoid that.
if WITH_BLKID
libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS)
libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS)
@@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD = \
libvirt-net-rpc.la \
libvirt_security_manager.la \
libvirt_conf.la \
+ libvirt.la \
+ libvirt-lxc.la \
Again I want us to avoid this too
libvirt_util.la \
../gnulib/lib/libgnu.la
if WITH_DTRACE_PROBES
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 11e9514..d8362ab 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -27,6 +27,7 @@
#include <config.h>
#include <fcntl.h>
+#include <sched.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
@@ -38,7 +39,6 @@
#include <mntent.h>
#include <sys/reboot.h>
#include <linux/reboot.h>
-
/* Yes, we want linux private one, for _syscall2() macro */
#include <linux/unistd.h>
Try to avoid adding/removing random whitespace in patches. If you
think something warrents a cleanup just send a separate patch
@@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch)
return VIR_ARCH_NONE;
}
+struct ns_info {
+ const char *proc_name;
+ int clone_flag;
We usually use capitalization in struct / type names not
underscores. Also try to always use a prefix to make it
clearer that its libvirt code not some system header.
so eg lxcNSInfo
+}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = {
+ [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net",
CLONE_NEWNET},
+ [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC},
+ [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS}
+};
+
+static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST])
We mostly use capitlization in method names rather than underscores
so eg lxcOpenNS
+{
+ int i, n, rc = 0;
+ virDomainPtr dom = NULL;
+ virConnectPtr conn = NULL;
+ pid_t pid;
+ int nfdlist;
+ int *fdlist;
+ char *path = NULL;
+ char *eptr;
+ for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++)
+ ns_fd[i] = -1;
+
+ if (STREQ_NULLABLE("netns",
lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) {
+ if (virAsprintf(&path, "/var/run/netns/%s",
lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0)
+ return -1;
Interesting - what is responsible for the /var/run/netns/ entries ? Is that
a standardized convention somewhere.
+ ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path,
O_RDONLY);
+ VIR_FREE(path);
+ if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) {
+ virReportSystemError(errno,
+ _("failed to open netns %s"),
lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]);
+ return -1;
+ }
+ }
+ for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) {
+ /* If not yet intialized by above: netns*/
+ if (lxcDef->ns_type[i] && ns_fd[i] == -1) {
+ pid = strtol(lxcDef->ns_val[i], &eptr, 10);
+ if (*eptr != '\0' || pid < 1) {
+ /* check if the domain is running, then set the namespaces
+ * to that container
+ */
+ const char *ns[] = { "user", "ipc", "uts",
"net", "pid", "mnt" };
+ conn = virConnectOpen("lxc:///");
+ if (!conn) {
+ virReportError(virGetLastError()->code,
+ _("unable to get connect to lxc %s"),
lxcDef->ns_val[i]);
+ rc = -1;
+ goto cleanup;
+ }
+ dom = virDomainLookupByName(conn, lxcDef->ns_val[i]);
+ if (!dom) {
+ virReportError(virGetLastError()->code,
+ _("Unable to lookup peer containeri %s"),
+ lxcDef->ns_val[i]);
+ rc = -1;
+ goto cleanup;
+ }
+ if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0)
{
+ virReportError(virGetLastError()->code,
+ _("Unable to open %s"),
lxcDef->ns_val[i]);
+ rc = -1;
+ goto cleanup;
+ }
I really hate the idea of the libvirt_lxc program opening a connection
back to libvirtd using virConnectOpen, as that creates a circular
dependancy. It also risks deadlock, because libvirtd will be holding
locks while starting up the container, and you're calling back into
the driver which may then end up acquiring the same lock.
I think a better approach in general is for libvirtd lxc_process.c
code to be responsible for getting access to all the namespace
file descriptors. We can then pass those pre-opened file descrpitors
down into libvirt_lxc using command line args, in the sme way that
we pass down file descriptors for pre-opened TAP devices.
eg so we end up running
libvirt_lxc --netns 23 --pidns 24 --utsns 25
or something like that.
+ for (n = 0; n < ARRAY_CARDINALITY(ns); n++) {
+ if (STREQ(ns[n], ns_info_local[i].proc_name)) {
+ ns_fd[i] = fdlist[n];
+ } else {
+ if (VIR_CLOSE(fdlist[n]) < 0)
+ VIR_ERROR(_("failed to close fd. ignoring.."));
+ }
+ }
+ if (nfdlist > 0)
+ VIR_FREE(fdlist);
+ } else {
+ if (virAsprintf(&path, "/proc/%d/ns/%s", pid,
ns_info_local[i].proc_name) < 0)
+ return -1;
+ ns_fd[i] = open(path, O_RDONLY);
+ VIR_FREE(path);
+ if (ns_fd[i] < 0) {
+ virReportSystemError(errno,
+ _("failed to open ns %s"), lxcDef->ns_val[i]);
+ return -1;
+ }
+ }
+ }
+ }
+ cleanup:
+ if (dom)
+ virDomainFree(dom);
+ if (conn)
+ virConnectClose(conn);
+ return rc;
+}
Regards,
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|