[libvirt] [PATCH] Inherit namespace feature

This patch adds feature for lxc containers to inherit namespaces. This is very similar to what lxc-tools or docker provides. Look for "man lxc-start" and you will find that you can pass command args as [ --share-[net|ipc|uts] name|pid ]. Or check out docker networking option in which you can give --net=container:NAME_or_ID as an option for sharing namespace.
From this patch you can add extra libvirt option to share namespace in following way. <lxc:namespace> <lxc:sharenet type='netns' value='red'/> <lxc:shareipc type='pid' value='12345'/> <lxc:shareuts type='name' value='container1'/> </lxc:namespace>
--- docs/drvlxc.html.in | 18 +++ docs/schemas/domaincommon.rng | 42 ++++++ src/Makefile.am | 4 +- src/lxc/lxc_conf.c | 2 +- src/lxc/lxc_conf.h | 15 +++ src/lxc/lxc_container.c | 236 +++++++++++++++++++++++++++++++++- src/lxc/lxc_domain.c | 164 ++++++++++++++++++++++- src/lxc/lxc_domain.h | 1 + tests/lxcxml2xmldata/lxc-sharenet.xml | 33 +++++ tests/lxcxml2xmltest.c | 1 + 10 files changed, 507 insertions(+), 9 deletions(-) create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index a094bd9..d14d4c7 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default can serious affect the security of the container and the host. </p> +<h2><a name="share">Inherit namespaces</a></h2> + +<p> +Libvirt allows you to inherit the namespace from container/process just like lxc tools +or docker provides to share the network namespace. The following can be used to share +required namespaces. If we want to share only one then the other namespaces can be ignored. +</p> +<pre> +<domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'> +... +<lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareuts type='name' value='container1'/> + <lxc:shareipc type='pid' value='12345'/> +</lxc:namespace> +</domain> +</pre> + <h2><a name="usage">Container usage / management</a></h2> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 1120003..803b327 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -68,6 +68,9 @@ <ref name='qemucmdline'/> </optional> <optional> + <ref name='lxcsharens'/> + </optional> + <optional> <ref name='keywrap'/> </optional> </interleave> @@ -5012,6 +5015,45 @@ </element> </define> + <!-- + Optional hypervisor extensions in their own namespace: + LXC + --> + <define name="lxcsharens"> + <element name="namespace" ns="http://libvirt.org/schemas/domain/lxc/1.0"> + <zeroOrMore> + <element name="sharenet"> + <attribute name="type"> + <choice> + <value>netns</value> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + <element name="shareipc"> + <attribute name="type"> + <choice> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + <element name="shareuts"> + <attribute name="type"> + <choice> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + </zeroOrMore> + </element> + </define> + <define name="metadata"> <element name="metadata"> <zeroOrMore> diff --git a/src/Makefile.am b/src/Makefile.am index be63e26..ef96a5a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \ -I$(srcdir)/access \ -I$(srcdir)/conf \ $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) libvirt-lxc.la $(FUSE_LIBS) if WITH_BLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD = \ libvirt-net-rpc.la \ libvirt_security_manager.la \ libvirt_conf.la \ + libvirt.la \ + libvirt-lxc.la \ libvirt_util.la \ ../gnulib/lib/libgnu.la if WITH_DTRACE_PROBES diff --git a/src/lxc/lxc_conf.c b/src/lxc/lxc_conf.c index c393cb5..96a0f47 100644 --- a/src/lxc/lxc_conf.c +++ b/src/lxc/lxc_conf.c @@ -213,7 +213,7 @@ lxcDomainXMLConfInit(void) { return virDomainXMLOptionNew(&virLXCDriverDomainDefParserConfig, &virLXCDriverPrivateDataCallbacks, - NULL); + &virLXCDriverDomainXMLNamespace); } diff --git a/src/lxc/lxc_conf.h b/src/lxc/lxc_conf.h index 8340b1f..72b1d44 100644 --- a/src/lxc/lxc_conf.h +++ b/src/lxc/lxc_conf.h @@ -67,6 +67,21 @@ struct _virLXCDriverConfig { bool securityRequireConfined; }; + +typedef enum { + VIR_DOMAIN_NAMESPACE_SHARENET = 0, + VIR_DOMAIN_NAMESPACE_SHAREIPC, + VIR_DOMAIN_NAMESPACE_SHAREUTS, + VIR_DOMAIN_NAMESPACE_LAST, +} virDomainNamespace; + +typedef struct _lxcDomainDef lxcDomainDef; +typedef lxcDomainDef *lxcDomainDefPtr; +struct _lxcDomainDef { + char *ns_type[VIR_DOMAIN_NAMESPACE_LAST]; + char *ns_val[VIR_DOMAIN_NAMESPACE_LAST]; +}; + struct _virLXCDriver { virMutex lock; diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 11e9514..d8362ab 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,6 +27,7 @@ #include <config.h> #include <fcntl.h> +#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -38,7 +39,6 @@ #include <mntent.h> #include <sys/reboot.h> #include <linux/reboot.h> - /* Yes, we want linux private one, for _syscall2() macro */ #include <linux/unistd.h> @@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) return VIR_ARCH_NONE; } +struct ns_info { + const char *proc_name; + int clone_flag; +}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = { + [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + +static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i, n, rc = 0; + virDomainPtr dom = NULL; + virConnectPtr conn = NULL; + pid_t pid; + int nfdlist; + int *fdlist; + char *path = NULL; + char *eptr; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (STREQ_NULLABLE("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) { + if (virAsprintf(&path, "/var/run/netns/%s", lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0) + return -1; + ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + for (n = 0; n < ARRAY_CARDINALITY(ns); n++) { + if (STREQ(ns[n], ns_info_local[i].proc_name)) { + ns_fd[i] = fdlist[n]; + } else { + if (VIR_CLOSE(fdlist[n]) < 0) + VIR_ERROR(_("failed to close fd. ignoring..")); + } + } + if (nfdlist > 0) + VIR_FREE(fdlist); + } else { + if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_info_local[i].proc_name) < 0) + return -1; + ns_fd[i] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[i] < 0) { + virReportSystemError(errno, + _("failed to open ns %s"), lxcDef->ns_val[i]); + return -1; + } + } + } + } + cleanup: + if (dom) + virDomainFree(dom); + if (conn) + virConnectClose(conn); + return rc; +} + + +static void lxcClose_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] > -1) { + if (VIR_CLOSE(ns_fd[i]) < 0) + virReportSystemError(errno, "%s", _("failed to close file")); + ns_fd[i] = -1; + } + } +} + + +/** + * lxcPreserve_ns: + * @ns_fd: array to store current namespace + * @clone_flags: namespaces that need to be preserved + */ +static int lxcPreserve_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST], int clone_flags) +{ + int i, saved_errno; + char *path = NULL; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (!virFileExists("/proc/self/ns")) { + virReportSystemError(errno, "%s", + _("Kernel does not support attach; preserve_ns ignored")); + return -1; + } + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if ((clone_flags & ns_info_local[i].clone_flag) == 0) + continue; + if (virAsprintf(&path, "/proc/self/ns/%s", + ns_info_local[i].proc_name) < 0) + goto error; + ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC); + if (ns_fd[i] < 0) + goto error; + VIR_FREE(path); + } + return 0; + error: + saved_errno = errno; + lxcClose_ns(ns_fd); + errno = saved_errno; + virReportSystemError(errno, _("lxcPreserve_ns failed for '%s'"), path); + VIR_FREE(path); + return -1; +} + +/** + * lxcAttach_ns: + * @ns_fd: array of namespaces to attach + */ +static int lxcAttach_ns(const int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] < 0) + continue; + VIR_DEBUG("Setting into namespace\n"); + /* We get EINVAL if new NS is same as the current + * NS, or if the fd namespace doesn't match the + * type passed to setns()'s second param. Since we + * pass 0, we know the EINVAL is harmless + */ + if (setns(ns_fd[i], 0) < 0 && + errno != EINVAL) { + virReportSystemError(errno, _("failed to set namespace '%s'") + , ns_info_local[i].proc_name); + return -1; + } + } + return 0; +} + /** * lxcContainerStart: @@ -2346,9 +2521,13 @@ int lxcContainerStart(virDomainDefPtr def, char **ttyPaths) { pid_t pid; - int cflags; + int cflags, i; int stacksize = getpagesize() * 4; char *stack, *stacktop; + int saved_ns_fd[VIR_DOMAIN_NAMESPACE_LAST]; + int ns_inherit_fd[VIR_DOMAIN_NAMESPACE_LAST]; + int preserve_mask = 0; + lxcDomainDefPtr lxcDef; lxc_child_argv_t args = { .config = def, .securityDriver = securityDriver, @@ -2368,7 +2547,12 @@ int lxcContainerStart(virDomainDefPtr def, stacktop = stack + stacksize; - cflags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD; + lxcDef = def->namespaceData; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + if (lxcDef && lxcDef->ns_type[i]) + preserve_mask |= ns_info_local[i].clone_flag; + + cflags = CLONE_NEWPID|CLONE_NEWNS|SIGCHLD; if (userns_required(def)) { if (userns_supported()) { @@ -2381,10 +2565,43 @@ int lxcContainerStart(virDomainDefPtr def, return -1; } } + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET]) { + if (lxcNeedNetworkNamespace(def)) { + VIR_DEBUG("Enable network namespaces"); + cflags |= CLONE_NEWNET; + } + } else { + VIR_DEBUG("Inheriting a net namespace"); + } + + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHAREIPC]) { + cflags |= CLONE_NEWIPC; + } else { + VIR_DEBUG("Inheriting an IPC namespace"); + } + + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHAREUTS]) { + cflags |= CLONE_NEWUTS; + } else { + VIR_DEBUG("Inheriting a UTS namespace"); + } + + if (lxcDef && lxcPreserve_ns(saved_ns_fd, preserve_mask) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to preserve the namespace")); + return -1; + } - if (lxcNeedNetworkNamespace(def)) { - VIR_DEBUG("Enable network namespaces"); - cflags |= CLONE_NEWNET; + if (lxcDef && lxcOpen_ns(lxcDef, ns_inherit_fd)) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to open the namespace")); + return -1; + } + + if (lxcDef && lxcAttach_ns(ns_inherit_fd) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to attach the namespace")); + return -1; } VIR_DEBUG("Cloning container init process"); @@ -2397,7 +2614,14 @@ int lxcContainerStart(virDomainDefPtr def, _("Failed to run clone container")); return -1; } + if (lxcDef && lxcAttach_ns(saved_ns_fd)) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to restore saved namespaces")); + } + /* clean up */ + if (lxcDef) + lxcClose_ns(ns_inherit_fd); return pid; } diff --git a/src/lxc/lxc_domain.c b/src/lxc/lxc_domain.c index 70606f3..5e63969 100644 --- a/src/lxc/lxc_domain.c +++ b/src/lxc/lxc_domain.c @@ -26,8 +26,14 @@ #include "viralloc.h" #include "virlog.h" #include "virerror.h" +#include <fcntl.h> +#include <libxml/xpathInternals.h> +#include "virstring.h" +#include "virutil.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_LXC +#define LXC_NAMESPACE_HREF "http://libvirt.org/schemas/domain/lxc/1.0" VIR_LOG_INIT("lxc.lxc_domain"); @@ -41,6 +47,163 @@ static void *virLXCDomainObjPrivateAlloc(void) return priv; } +VIR_ENUM_DECL(virDomainNamespace) +VIR_ENUM_IMPL(virDomainNamespace, VIR_DOMAIN_NAMESPACE_LAST, + N_("sharenet"), + N_("shareipc"), + N_("shareuts")) + +static void +lxcDomainDefNamespaceFree(void *nsdata) +{ + int j; + lxcDomainDefPtr lxcDef = nsdata; + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + VIR_FREE(lxcDef->ns_type[j]); + VIR_FREE(lxcDef->ns_val[j]); + } + VIR_FREE(nsdata); +} + +static int +lxcDomainDefNamespaceParse(xmlDocPtr xml ATTRIBUTE_UNUSED, + xmlNodePtr root ATTRIBUTE_UNUSED, + xmlXPathContextPtr ctxt, + void **data) +{ + lxcDomainDefPtr lxcDef = NULL; + xmlNodePtr *nodes = NULL; + bool uses_lxc_ns = false; + xmlNodePtr node; + int feature; + int n; + char *tmp = NULL; + size_t i; + + if (xmlXPathRegisterNs(ctxt, BAD_CAST "lxc", BAD_CAST LXC_NAMESPACE_HREF) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to register xml namespace '%s'"), + LXC_NAMESPACE_HREF); + return -1; + } + + if (VIR_ALLOC(lxcDef) < 0) + return -1; + /* Init ns_herit_fd for namespaces */ + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + lxcDef->ns_type[i] = NULL; + lxcDef->ns_val[i] = NULL; + } + + node = ctxt->node; + if ((n = virXPathNodeSet("./lxc:namespace/*", ctxt, &nodes)) < 0) + goto error; + uses_lxc_ns |= n > 0; + + for (i = 0; i < n; i++) { + feature = + virDomainNamespaceTypeFromString((const char *) nodes[i]->name); + if (feature < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unsupported Namespace feature: %s"), + nodes[i]->name); + goto error; + } + + ctxt->node = nodes[i]; + + switch ((virDomainNamespace) feature) { + case VIR_DOMAIN_NAMESPACE_SHARENET: + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + { + tmp = virXMLPropString(nodes[i], "type"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + /* save the tmp so that its needed while writing to xml */ + lxcDef->ns_type[feature] = tmp; + tmp = virXMLPropString(nodes[i], "value"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + lxcDef->ns_val[feature] = tmp; + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + VIR_FREE(nodes); + ctxt->node = node; + if (uses_lxc_ns) + *data = lxcDef; + else + VIR_FREE(lxcDef); + return 0; + error: + VIR_FREE(nodes); + lxcDomainDefNamespaceFree(lxcDef); + return -1; +} + + +static int +lxcDomainDefNamespaceFormatXML(virBufferPtr buf, + void *nsdata) +{ + lxcDomainDefPtr lxcDef = nsdata; + size_t j; + + if (!lxcDef) + return 0; + + virBufferAddLit(buf, "<lxc:namespace>\n"); + virBufferAdjustIndent(buf, 2); + + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + switch ((virDomainNamespace) j) { + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + case VIR_DOMAIN_NAMESPACE_SHARENET: + { + if (lxcDef->ns_type[j]) { + virBufferAsprintf(buf, "<lxc:%s type='%s' value='%s'/>\n", + virDomainNamespaceTypeToString(j), + lxcDef->ns_type[j], + lxcDef->ns_val[j]); + } + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</lxc:namespace>\n"); + return 0; +} + +static const char * +lxcDomainDefNamespaceHref(void) +{ + return "xmlns:lxc='" LXC_NAMESPACE_HREF "'"; +} + + +virDomainXMLNamespace virLXCDriverDomainXMLNamespace = { + .parse = lxcDomainDefNamespaceParse, + .free = lxcDomainDefNamespaceFree, + .format = lxcDomainDefNamespaceFormatXML, + .href = lxcDomainDefNamespaceHref, +}; + + static void virLXCDomainObjPrivateFree(void *data) { virLXCDomainObjPrivatePtr priv = data; @@ -77,7 +240,6 @@ virLXCDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, } else { priv->initpid = thepid; } - return 0; } diff --git a/src/lxc/lxc_domain.h b/src/lxc/lxc_domain.h index 751aece..25df999 100644 --- a/src/lxc/lxc_domain.h +++ b/src/lxc/lxc_domain.h @@ -41,6 +41,7 @@ struct _virLXCDomainObjPrivate { virCgroupPtr cgroup; }; +extern virDomainXMLNamespace virLXCDriverDomainXMLNamespace; extern virDomainXMLPrivateDataCallbacks virLXCDriverPrivateDataCallbacks; extern virDomainDefParserConfig virLXCDriverDomainDefParserConfig; diff --git a/tests/lxcxml2xmldata/lxc-sharenet.xml b/tests/lxcxml2xmldata/lxc-sharenet.xml new file mode 100644 index 0000000..a2b8d1b --- /dev/null +++ b/tests/lxcxml2xmldata/lxc-sharenet.xml @@ -0,0 +1,33 @@ +<domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'> + <name>jessie</name> + <uuid>e21987a5-e98e-9c99-0e35-803e4d9ad1fe</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <resource> + <partition>/machine</partition> + </resource> + <os> + <type arch='x86_64'>exe</type> + <init>/sbin/init</init> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/libexec/libvirt_lxc</emulator> + <filesystem type='mount' accessmode='passthrough'> + <source dir='/mach/jessie'/> + <target dir='/'/> + </filesystem> + <console type='pty'> + <target type='lxc' port='0'/> + </console> + </devices> + <lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareipc type='pid' value='12345'/> + <lxc:shareuts type='name' value='container1'/> + </lxc:namespace> +</domain> diff --git a/tests/lxcxml2xmltest.c b/tests/lxcxml2xmltest.c index 3e00347..8d824b9 100644 --- a/tests/lxcxml2xmltest.c +++ b/tests/lxcxml2xmltest.c @@ -133,6 +133,7 @@ mymain(void) DO_TEST("filesystem-root"); DO_TEST("idmap"); DO_TEST("capabilities"); + DO_TEST("sharenet"); virObjectUnref(caps); virObjectUnref(xmlopt); -- 1.9.1

Gentle reminder for the experts out there. Need some of your valuable inputs. -imran On Wed, Jul 1, 2015 at 11:07 PM, ik.nitk <ik.nitk@gmail.com> wrote:
This patch adds feature for lxc containers to inherit namespaces. This is very similar to what lxc-tools or docker provides. Look for "man lxc-start" and you will find that you can pass command args as [ --share-[net|ipc|uts] name|pid ]. Or check out docker networking option in which you can give --net=container:NAME_or_ID as an option for sharing namespace.
From this patch you can add extra libvirt option to share namespace in following way. <lxc:namespace> <lxc:sharenet type='netns' value='red'/> <lxc:shareipc type='pid' value='12345'/> <lxc:shareuts type='name' value='container1'/> </lxc:namespace>
--- docs/drvlxc.html.in | 18 +++ docs/schemas/domaincommon.rng | 42 ++++++ src/Makefile.am | 4 +- src/lxc/lxc_conf.c | 2 +- src/lxc/lxc_conf.h | 15 +++ src/lxc/lxc_container.c | 236 +++++++++++++++++++++++++++++++++- src/lxc/lxc_domain.c | 164 ++++++++++++++++++++++- src/lxc/lxc_domain.h | 1 + tests/lxcxml2xmldata/lxc-sharenet.xml | 33 +++++ tests/lxcxml2xmltest.c | 1 + 10 files changed, 507 insertions(+), 9 deletions(-) create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml
diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index a094bd9..d14d4c7 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default can serious affect the security of the container and the host. </p>
+<h2><a name="share">Inherit namespaces</a></h2> + +<p> +Libvirt allows you to inherit the namespace from container/process just like lxc tools +or docker provides to share the network namespace. The following can be used to share +required namespaces. If we want to share only one then the other namespaces can be ignored. +</p> +<pre> +<domain type='lxc' xmlns:lxc=' http://libvirt.org/schemas/domain/lxc/1.0'> +... +<lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareuts type='name' value='container1'/> + <lxc:shareipc type='pid' value='12345'/> +</lxc:namespace> +</domain> +</pre> + <h2><a name="usage">Container usage / management</a></h2>
<p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 1120003..803b327 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -68,6 +68,9 @@ <ref name='qemucmdline'/> </optional> <optional> + <ref name='lxcsharens'/> + </optional> + <optional> <ref name='keywrap'/> </optional> </interleave> @@ -5012,6 +5015,45 @@ </element> </define>
+ <!-- + Optional hypervisor extensions in their own namespace: + LXC + --> + <define name="lxcsharens"> + <element name="namespace" ns=" http://libvirt.org/schemas/domain/lxc/1.0"> + <zeroOrMore> + <element name="sharenet"> + <attribute name="type"> + <choice> + <value>netns</value> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + <element name="shareipc"> + <attribute name="type"> + <choice> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + <element name="shareuts"> + <attribute name="type"> + <choice> + <value>name</value> + <value>pid</value> + </choice> + </attribute> + <attribute name='value'/> + </element> + </zeroOrMore> + </element> + </define> + <define name="metadata"> <element name="metadata"> <zeroOrMore> diff --git a/src/Makefile.am b/src/Makefile.am index be63e26..ef96a5a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \ -I$(srcdir)/access \ -I$(srcdir)/conf \ $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) libvirt-lxc.la $(FUSE_LIBS) if WITH_BLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD = \ libvirt-net-rpc.la \ libvirt_security_manager.la \ libvirt_conf.la \ + libvirt.la \ + libvirt-lxc.la \ libvirt_util.la \ ../gnulib/lib/libgnu.la if WITH_DTRACE_PROBES diff --git a/src/lxc/lxc_conf.c b/src/lxc/lxc_conf.c index c393cb5..96a0f47 100644 --- a/src/lxc/lxc_conf.c +++ b/src/lxc/lxc_conf.c @@ -213,7 +213,7 @@ lxcDomainXMLConfInit(void) { return virDomainXMLOptionNew(&virLXCDriverDomainDefParserConfig, &virLXCDriverPrivateDataCallbacks, - NULL); + &virLXCDriverDomainXMLNamespace); }
diff --git a/src/lxc/lxc_conf.h b/src/lxc/lxc_conf.h index 8340b1f..72b1d44 100644 --- a/src/lxc/lxc_conf.h +++ b/src/lxc/lxc_conf.h @@ -67,6 +67,21 @@ struct _virLXCDriverConfig { bool securityRequireConfined; };
+ +typedef enum { + VIR_DOMAIN_NAMESPACE_SHARENET = 0, + VIR_DOMAIN_NAMESPACE_SHAREIPC, + VIR_DOMAIN_NAMESPACE_SHAREUTS, + VIR_DOMAIN_NAMESPACE_LAST, +} virDomainNamespace; + +typedef struct _lxcDomainDef lxcDomainDef; +typedef lxcDomainDef *lxcDomainDefPtr; +struct _lxcDomainDef { + char *ns_type[VIR_DOMAIN_NAMESPACE_LAST]; + char *ns_val[VIR_DOMAIN_NAMESPACE_LAST]; +}; + struct _virLXCDriver { virMutex lock;
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 11e9514..d8362ab 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,6 +27,7 @@ #include <config.h>
#include <fcntl.h> +#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -38,7 +39,6 @@ #include <mntent.h> #include <sys/reboot.h> #include <linux/reboot.h> - /* Yes, we want linux private one, for _syscall2() macro */ #include <linux/unistd.h>
@@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) return VIR_ARCH_NONE; }
+struct ns_info { + const char *proc_name; + int clone_flag; +}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = { + [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + +static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i, n, rc = 0; + virDomainPtr dom = NULL; + virConnectPtr conn = NULL; + pid_t pid; + int nfdlist; + int *fdlist; + char *path = NULL; + char *eptr; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (STREQ_NULLABLE("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) { + if (virAsprintf(&path, "/var/run/netns/%s", lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0) + return -1; + ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + for (n = 0; n < ARRAY_CARDINALITY(ns); n++) { + if (STREQ(ns[n], ns_info_local[i].proc_name)) { + ns_fd[i] = fdlist[n]; + } else { + if (VIR_CLOSE(fdlist[n]) < 0) + VIR_ERROR(_("failed to close fd. ignoring..")); + } + } + if (nfdlist > 0) + VIR_FREE(fdlist); + } else { + if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_info_local[i].proc_name) < 0) + return -1; + ns_fd[i] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[i] < 0) { + virReportSystemError(errno, + _("failed to open ns %s"), lxcDef->ns_val[i]); + return -1; + } + } + } + } + cleanup: + if (dom) + virDomainFree(dom); + if (conn) + virConnectClose(conn); + return rc; +} + + +static void lxcClose_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] > -1) { + if (VIR_CLOSE(ns_fd[i]) < 0) + virReportSystemError(errno, "%s", _("failed to close file")); + ns_fd[i] = -1; + } + } +} + + +/** + * lxcPreserve_ns: + * @ns_fd: array to store current namespace + * @clone_flags: namespaces that need to be preserved + */ +static int lxcPreserve_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST], int clone_flags) +{ + int i, saved_errno; + char *path = NULL; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (!virFileExists("/proc/self/ns")) { + virReportSystemError(errno, "%s", + _("Kernel does not support attach; preserve_ns ignored")); + return -1; + } + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if ((clone_flags & ns_info_local[i].clone_flag) == 0) + continue; + if (virAsprintf(&path, "/proc/self/ns/%s", + ns_info_local[i].proc_name) < 0) + goto error; + ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC); + if (ns_fd[i] < 0) + goto error; + VIR_FREE(path); + } + return 0; + error: + saved_errno = errno; + lxcClose_ns(ns_fd); + errno = saved_errno; + virReportSystemError(errno, _("lxcPreserve_ns failed for '%s'"), path); + VIR_FREE(path); + return -1; +} + +/** + * lxcAttach_ns: + * @ns_fd: array of namespaces to attach + */ +static int lxcAttach_ns(const int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] < 0) + continue; + VIR_DEBUG("Setting into namespace\n"); + /* We get EINVAL if new NS is same as the current + * NS, or if the fd namespace doesn't match the + * type passed to setns()'s second param. Since we + * pass 0, we know the EINVAL is harmless + */ + if (setns(ns_fd[i], 0) < 0 && + errno != EINVAL) { + virReportSystemError(errno, _("failed to set namespace '%s'") + , ns_info_local[i].proc_name); + return -1; + } + } + return 0; +} +
/** * lxcContainerStart: @@ -2346,9 +2521,13 @@ int lxcContainerStart(virDomainDefPtr def, char **ttyPaths) { pid_t pid; - int cflags; + int cflags, i; int stacksize = getpagesize() * 4; char *stack, *stacktop; + int saved_ns_fd[VIR_DOMAIN_NAMESPACE_LAST]; + int ns_inherit_fd[VIR_DOMAIN_NAMESPACE_LAST]; + int preserve_mask = 0; + lxcDomainDefPtr lxcDef; lxc_child_argv_t args = { .config = def, .securityDriver = securityDriver, @@ -2368,7 +2547,12 @@ int lxcContainerStart(virDomainDefPtr def,
stacktop = stack + stacksize;
- cflags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD; + lxcDef = def->namespaceData; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + if (lxcDef && lxcDef->ns_type[i]) + preserve_mask |= ns_info_local[i].clone_flag; + + cflags = CLONE_NEWPID|CLONE_NEWNS|SIGCHLD;
if (userns_required(def)) { if (userns_supported()) { @@ -2381,10 +2565,43 @@ int lxcContainerStart(virDomainDefPtr def, return -1; } } + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET]) { + if (lxcNeedNetworkNamespace(def)) { + VIR_DEBUG("Enable network namespaces"); + cflags |= CLONE_NEWNET; + } + } else { + VIR_DEBUG("Inheriting a net namespace"); + } + + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHAREIPC]) { + cflags |= CLONE_NEWIPC; + } else { + VIR_DEBUG("Inheriting an IPC namespace"); + } + + if (!lxcDef || !lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHAREUTS]) { + cflags |= CLONE_NEWUTS; + } else { + VIR_DEBUG("Inheriting a UTS namespace"); + } + + if (lxcDef && lxcPreserve_ns(saved_ns_fd, preserve_mask) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to preserve the namespace")); + return -1; + }
- if (lxcNeedNetworkNamespace(def)) { - VIR_DEBUG("Enable network namespaces"); - cflags |= CLONE_NEWNET; + if (lxcDef && lxcOpen_ns(lxcDef, ns_inherit_fd)) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to open the namespace")); + return -1; + } + + if (lxcDef && lxcAttach_ns(ns_inherit_fd) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to attach the namespace")); + return -1; }
VIR_DEBUG("Cloning container init process"); @@ -2397,7 +2614,14 @@ int lxcContainerStart(virDomainDefPtr def, _("Failed to run clone container")); return -1; } + if (lxcDef && lxcAttach_ns(saved_ns_fd)) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to restore saved namespaces")); + }
+ /* clean up */ + if (lxcDef) + lxcClose_ns(ns_inherit_fd); return pid; }
diff --git a/src/lxc/lxc_domain.c b/src/lxc/lxc_domain.c index 70606f3..5e63969 100644 --- a/src/lxc/lxc_domain.c +++ b/src/lxc/lxc_domain.c @@ -26,8 +26,14 @@ #include "viralloc.h" #include "virlog.h" #include "virerror.h" +#include <fcntl.h> +#include <libxml/xpathInternals.h> +#include "virstring.h" +#include "virutil.h" +#include "virfile.h"
#define VIR_FROM_THIS VIR_FROM_LXC +#define LXC_NAMESPACE_HREF "http://libvirt.org/schemas/domain/lxc/1.0"
VIR_LOG_INIT("lxc.lxc_domain");
@@ -41,6 +47,163 @@ static void *virLXCDomainObjPrivateAlloc(void) return priv; }
+VIR_ENUM_DECL(virDomainNamespace) +VIR_ENUM_IMPL(virDomainNamespace, VIR_DOMAIN_NAMESPACE_LAST, + N_("sharenet"), + N_("shareipc"), + N_("shareuts")) + +static void +lxcDomainDefNamespaceFree(void *nsdata) +{ + int j; + lxcDomainDefPtr lxcDef = nsdata; + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + VIR_FREE(lxcDef->ns_type[j]); + VIR_FREE(lxcDef->ns_val[j]); + } + VIR_FREE(nsdata); +} + +static int +lxcDomainDefNamespaceParse(xmlDocPtr xml ATTRIBUTE_UNUSED, + xmlNodePtr root ATTRIBUTE_UNUSED, + xmlXPathContextPtr ctxt, + void **data) +{ + lxcDomainDefPtr lxcDef = NULL; + xmlNodePtr *nodes = NULL; + bool uses_lxc_ns = false; + xmlNodePtr node; + int feature; + int n; + char *tmp = NULL; + size_t i; + + if (xmlXPathRegisterNs(ctxt, BAD_CAST "lxc", BAD_CAST LXC_NAMESPACE_HREF) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to register xml namespace '%s'"), + LXC_NAMESPACE_HREF); + return -1; + } + + if (VIR_ALLOC(lxcDef) < 0) + return -1; + /* Init ns_herit_fd for namespaces */ + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + lxcDef->ns_type[i] = NULL; + lxcDef->ns_val[i] = NULL; + } + + node = ctxt->node; + if ((n = virXPathNodeSet("./lxc:namespace/*", ctxt, &nodes)) < 0) + goto error; + uses_lxc_ns |= n > 0; + + for (i = 0; i < n; i++) { + feature = + virDomainNamespaceTypeFromString((const char *) nodes[i]->name); + if (feature < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unsupported Namespace feature: %s"), + nodes[i]->name); + goto error; + } + + ctxt->node = nodes[i]; + + switch ((virDomainNamespace) feature) { + case VIR_DOMAIN_NAMESPACE_SHARENET: + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + { + tmp = virXMLPropString(nodes[i], "type"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + /* save the tmp so that its needed while writing to xml */ + lxcDef->ns_type[feature] = tmp; + tmp = virXMLPropString(nodes[i], "value"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + lxcDef->ns_val[feature] = tmp; + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + VIR_FREE(nodes); + ctxt->node = node; + if (uses_lxc_ns) + *data = lxcDef; + else + VIR_FREE(lxcDef); + return 0; + error: + VIR_FREE(nodes); + lxcDomainDefNamespaceFree(lxcDef); + return -1; +} + + +static int +lxcDomainDefNamespaceFormatXML(virBufferPtr buf, + void *nsdata) +{ + lxcDomainDefPtr lxcDef = nsdata; + size_t j; + + if (!lxcDef) + return 0; + + virBufferAddLit(buf, "<lxc:namespace>\n"); + virBufferAdjustIndent(buf, 2); + + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + switch ((virDomainNamespace) j) { + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + case VIR_DOMAIN_NAMESPACE_SHARENET: + { + if (lxcDef->ns_type[j]) { + virBufferAsprintf(buf, "<lxc:%s type='%s' value='%s'/>\n", + virDomainNamespaceTypeToString(j), + lxcDef->ns_type[j], + lxcDef->ns_val[j]); + } + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</lxc:namespace>\n"); + return 0; +} + +static const char * +lxcDomainDefNamespaceHref(void) +{ + return "xmlns:lxc='" LXC_NAMESPACE_HREF "'"; +} + + +virDomainXMLNamespace virLXCDriverDomainXMLNamespace = { + .parse = lxcDomainDefNamespaceParse, + .free = lxcDomainDefNamespaceFree, + .format = lxcDomainDefNamespaceFormatXML, + .href = lxcDomainDefNamespaceHref, +}; + + static void virLXCDomainObjPrivateFree(void *data) { virLXCDomainObjPrivatePtr priv = data; @@ -77,7 +240,6 @@ virLXCDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, } else { priv->initpid = thepid; } - return 0; }
diff --git a/src/lxc/lxc_domain.h b/src/lxc/lxc_domain.h index 751aece..25df999 100644 --- a/src/lxc/lxc_domain.h +++ b/src/lxc/lxc_domain.h @@ -41,6 +41,7 @@ struct _virLXCDomainObjPrivate { virCgroupPtr cgroup; };
+extern virDomainXMLNamespace virLXCDriverDomainXMLNamespace; extern virDomainXMLPrivateDataCallbacks virLXCDriverPrivateDataCallbacks; extern virDomainDefParserConfig virLXCDriverDomainDefParserConfig;
diff --git a/tests/lxcxml2xmldata/lxc-sharenet.xml b/tests/lxcxml2xmldata/lxc-sharenet.xml new file mode 100644 index 0000000..a2b8d1b --- /dev/null +++ b/tests/lxcxml2xmldata/lxc-sharenet.xml @@ -0,0 +1,33 @@ +<domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'> + <name>jessie</name> + <uuid>e21987a5-e98e-9c99-0e35-803e4d9ad1fe</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <resource> + <partition>/machine</partition> + </resource> + <os> + <type arch='x86_64'>exe</type> + <init>/sbin/init</init> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/libexec/libvirt_lxc</emulator> + <filesystem type='mount' accessmode='passthrough'> + <source dir='/mach/jessie'/> + <target dir='/'/> + </filesystem> + <console type='pty'> + <target type='lxc' port='0'/> + </console> + </devices> + <lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareipc type='pid' value='12345'/> + <lxc:shareuts type='name' value='container1'/> + </lxc:namespace> +</domain> diff --git a/tests/lxcxml2xmltest.c b/tests/lxcxml2xmltest.c index 3e00347..8d824b9 100644 --- a/tests/lxcxml2xmltest.c +++ b/tests/lxcxml2xmltest.c @@ -133,6 +133,7 @@ mymain(void) DO_TEST("filesystem-root"); DO_TEST("idmap"); DO_TEST("capabilities"); + DO_TEST("sharenet");
virObjectUnref(caps); virObjectUnref(xmlopt); -- 1.9.1

On Wed, Jul 01, 2015 at 11:07:07PM +0530, ik.nitk wrote:
This patch adds feature for lxc containers to inherit namespaces. This is very similar to what lxc-tools or docker provides. Look for "man lxc-start" and you will find that you can pass command args as [ --share-[net|ipc|uts] name|pid ]. Or check out docker networking option in which you can give --net=container:NAME_or_ID as an option for sharing namespace.
From this patch you can add extra libvirt option to share namespace in following way. <lxc:namespace> <lxc:sharenet type='netns' value='red'/> <lxc:shareipc type='pid' value='12345'/> <lxc:shareuts type='name' value='container1'/> </lxc:namespace>
--- docs/drvlxc.html.in | 18 +++ docs/schemas/domaincommon.rng | 42 ++++++ src/Makefile.am | 4 +- src/lxc/lxc_conf.c | 2 +- src/lxc/lxc_conf.h | 15 +++ src/lxc/lxc_container.c | 236 +++++++++++++++++++++++++++++++++- src/lxc/lxc_domain.c | 164 ++++++++++++++++++++++- src/lxc/lxc_domain.h | 1 + tests/lxcxml2xmldata/lxc-sharenet.xml | 33 +++++ tests/lxcxml2xmltest.c | 1 + 10 files changed, 507 insertions(+), 9 deletions(-) create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml
diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index a094bd9..d14d4c7 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default can serious affect the security of the container and the host. </p>
+<h2><a name="share">Inherit namespaces</a></h2> + +<p> +Libvirt allows you to inherit the namespace from container/process just like lxc tools +or docker provides to share the network namespace. The following can be used to share +required namespaces. If we want to share only one then the other namespaces can be ignored. +</p> +<pre> +<domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'> +... +<lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareuts type='name' value='container1'/> + <lxc:shareipc type='pid' value='12345'/> +</lxc:namespace> +</domain> +</pre>
Could you also document the attributes explicitly, the various 'type' attribute values and what they expect for the corresponding 'value' argument. In particular I'm unclear on what the value is when using type='netns', so its a good idea to be explicit about this.
diff --git a/src/Makefile.am b/src/Makefile.am index be63e26..ef96a5a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \ -I$(srcdir)/access \ -I$(srcdir)/conf \ $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) libvirt-lxc.la $(FUSE_LIBS)
What was the $LIBXML_LIBS addition needed for ? I can see why you added libvirt-lxc.la but I will suggested changes later to avoid that.
if WITH_BLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD = \ libvirt-net-rpc.la \ libvirt_security_manager.la \ libvirt_conf.la \ + libvirt.la \ + libvirt-lxc.la \
Again I want us to avoid this too
libvirt_util.la \ ../gnulib/lib/libgnu.la if WITH_DTRACE_PROBES
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 11e9514..d8362ab 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,6 +27,7 @@ #include <config.h>
#include <fcntl.h> +#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -38,7 +39,6 @@ #include <mntent.h> #include <sys/reboot.h> #include <linux/reboot.h> - /* Yes, we want linux private one, for _syscall2() macro */ #include <linux/unistd.h>
Try to avoid adding/removing random whitespace in patches. If you think something warrents a cleanup just send a separate patch
@@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) return VIR_ARCH_NONE; }
+struct ns_info { + const char *proc_name; + int clone_flag;
We usually use capitalization in struct / type names not underscores. Also try to always use a prefix to make it clearer that its libvirt code not some system header. so eg lxcNSInfo
+}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = {
+ [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + +static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST])
We mostly use capitlization in method names rather than underscores so eg lxcOpenNS
+{ + int i, n, rc = 0; + virDomainPtr dom = NULL; + virConnectPtr conn = NULL; + pid_t pid; + int nfdlist; + int *fdlist; + char *path = NULL; + char *eptr; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (STREQ_NULLABLE("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) { + if (virAsprintf(&path, "/var/run/netns/%s", lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0) + return -1;
Interesting - what is responsible for the /var/run/netns/ entries ? Is that a standardized convention somewhere.
+ ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + }
I really hate the idea of the libvirt_lxc program opening a connection back to libvirtd using virConnectOpen, as that creates a circular dependancy. It also risks deadlock, because libvirtd will be holding locks while starting up the container, and you're calling back into the driver which may then end up acquiring the same lock. I think a better approach in general is for libvirtd lxc_process.c code to be responsible for getting access to all the namespace file descriptors. We can then pass those pre-opened file descrpitors down into libvirt_lxc using command line args, in the sme way that we pass down file descriptors for pre-opened TAP devices. eg so we end up running libvirt_lxc --netns 23 --pidns 24 --utsns 25 or something like that.
+ for (n = 0; n < ARRAY_CARDINALITY(ns); n++) { + if (STREQ(ns[n], ns_info_local[i].proc_name)) { + ns_fd[i] = fdlist[n]; + } else { + if (VIR_CLOSE(fdlist[n]) < 0) + VIR_ERROR(_("failed to close fd. ignoring..")); + } + } + if (nfdlist > 0) + VIR_FREE(fdlist); + } else { + if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_info_local[i].proc_name) < 0) + return -1; + ns_fd[i] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[i] < 0) { + virReportSystemError(errno, + _("failed to open ns %s"), lxcDef->ns_val[i]); + return -1; + } + } + } + } + cleanup: + if (dom) + virDomainFree(dom); + if (conn) + virConnectClose(conn); + return rc; +}
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Thanks Daniel and Michal again for your valuable inputs. Please check my reply with text <imran> for some of your comments. And request you to help on those. BTW: should i reply with rework in the new patch. or i should reply to this thread itself? Sorry i am new to the community so yet to get familiar with etiquette. On Thu, Jul 30, 2015 at 7:36 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
This patch adds feature for lxc containers to inherit namespaces. This is very similar to what lxc-tools or docker provides. Look for "man lxc-start" and you will find that you can pass command args as [ --share-[net|ipc|uts] name|pid ]. Or check out docker networking
On Wed, Jul 01, 2015 at 11:07:07PM +0530, ik.nitk wrote: option in which you can give --net=container:NAME_or_ID as an option for sharing namespace.
From this patch you can add extra libvirt option to share namespace in
<lxc:namespace> <lxc:sharenet type='netns' value='red'/> <lxc:shareipc type='pid' value='12345'/> <lxc:shareuts type='name' value='container1'/> </lxc:namespace>
--- docs/drvlxc.html.in | 18 +++ docs/schemas/domaincommon.rng | 42 ++++++ src/Makefile.am | 4 +- src/lxc/lxc_conf.c | 2 +- src/lxc/lxc_conf.h | 15 +++ src/lxc/lxc_container.c | 236 +++++++++++++++++++++++++++++++++- src/lxc/lxc_domain.c | 164 ++++++++++++++++++++++- src/lxc/lxc_domain.h | 1 + tests/lxcxml2xmldata/lxc-sharenet.xml | 33 +++++ tests/lxcxml2xmltest.c | 1 + 10 files changed, 507 insertions(+), 9 deletions(-) create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml
diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index a094bd9..d14d4c7 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default can serious affect the security of the container and the host. </p>
+<h2><a name="share">Inherit namespaces</a></h2> + +<p> +Libvirt allows you to inherit the namespace from container/process just
following way. like lxc tools
+or docker provides to share the network namespace. The following can be used to share +required namespaces. If we want to share only one then the other namespaces can be ignored. +</p> +<pre> +<domain type='lxc' xmlns:lxc=' http://libvirt.org/schemas/domain/lxc/1.0'> +... +<lxc:namespace> + <lxc:sharenet type='netns' value='red'/> + <lxc:shareuts type='name' value='container1'/> + <lxc:shareipc type='pid' value='12345'/> +</lxc:namespace> +</domain> +</pre>
Could you also document the attributes explicitly, the various 'type' attribute values and what they expect for the corresponding 'value' argument. In particular I'm unclear on what the value is when using type='netns', so its a good idea to be explicit about this.
<imran>: Netns is generally the name of network namespace present in /var/run/netns/ So this will be familiar for folks who are using docker netns option. please check http://man7.org/linux/man-pages/man8/ip-netns.8.html
diff --git a/src/Makefile.am b/src/Makefile.am index be63e26..ef96a5a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \ -I$(srcdir)/access \ -I$(srcdir)/conf \ $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) libvirt-lxc.la $(FUSE_LIBS)
What was the $LIBXML_LIBS addition needed for ?
I can see why you added libvirt-lxc.la but I will suggested changes later to avoid that.
<imran>: This required as we are adding new XML options.
if WITH_BLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD = \ libvirt-net-rpc.la \ libvirt_security_manager.la \ libvirt_conf.la \ + libvirt.la \ + libvirt-lxc.la \
Again I want us to avoid this too
libvirt_util.la \ ../gnulib/lib/libgnu.la if WITH_DTRACE_PROBES
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 11e9514..d8362ab 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,6 +27,7 @@ #include <config.h>
#include <fcntl.h> +#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -38,7 +39,6 @@ #include <mntent.h> #include <sys/reboot.h> #include <linux/reboot.h> - /* Yes, we want linux private one, for _syscall2() macro */ #include <linux/unistd.h>
Try to avoid adding/removing random whitespace in patches. If you think something warrents a cleanup just send a separate patch
@@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) return VIR_ARCH_NONE; }
+struct ns_info { + const char *proc_name; + int clone_flag;
We usually use capitalization in struct / type names not underscores. Also try to always use a prefix to make it clearer that its libvirt code not some system header. so eg lxcNSInfo
<imran>: will edit and re-send.
+}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = {
+ [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + +static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST])
We mostly use capitlization in method names rather than underscores so eg lxcOpenNS
+{ + int i, n, rc = 0; + virDomainPtr dom = NULL; + virConnectPtr conn = NULL; + pid_t pid; + int nfdlist; + int *fdlist; + char *path = NULL; + char *eptr; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (STREQ_NULLABLE("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) { + if (virAsprintf(&path, "/var/run/netns/%s", lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0) + return -1;
Interesting - what is responsible for the /var/run/netns/ entries ? Is that a standardized convention somewhere.
<imran>: Yes this is the standard. please check this link
http://man7.org/linux/man-pages/man8/ip-netns.8.html
+ ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + }
I really hate the idea of the libvirt_lxc program opening a connection back to libvirtd using virConnectOpen, as that creates a circular dependancy. It also risks deadlock, because libvirtd will be holding locks while starting up the container, and you're calling back into the driver which may then end up acquiring the same lock.
<imran>: This is where i am finding problem to code. All the driver functions are static and to access them i might have to change the static to non-static.which will not be inline with current design. i don't see circular dependency with this approach as the internal connection is just used to get the fd's. please share any approach to handle this or hope i can keep the current code.
I think a better approach in general is for libvirtd lxc_process.c code to be responsible for getting access to all the namespace file descriptors. We can then pass those pre-opened file descrpitors down into libvirt_lxc using command line args, in the sme way that we pass down file descriptors for pre-opened TAP devices.
eg so we end up running
libvirt_lxc --netns 23 --pidns 24 --utsns 25
or something like that.
<imran>: And useability wise its easier to provide names instead of fds. as if the shared container goes down. the fds wont be valid. with name a simple restart and again get the new fds with names automatically.
+ for (n = 0; n < ARRAY_CARDINALITY(ns); n++) { + if (STREQ(ns[n], ns_info_local[i].proc_name)) { + ns_fd[i] = fdlist[n]; + } else { + if (VIR_CLOSE(fdlist[n]) < 0) + VIR_ERROR(_("failed to close fd. ignoring..")); + } + } + if (nfdlist > 0) + VIR_FREE(fdlist); + } else { + if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_info_local[i].proc_name) < 0) + return -1; + ns_fd[i] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[i] < 0) { + virReportSystemError(errno, + _("failed to open ns %s"), lxcDef->ns_val[i]); + return -1; + } + } + } + } + cleanup: + if (dom) + virDomainFree(dom); + if (conn) + virConnectClose(conn); + return rc; +}
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Mon, Aug 03, 2015 at 10:00:29PM +0530, Imran Khan wrote:
Thanks Daniel and Michal again for your valuable inputs. Please check my reply with text <imran> for some of your comments. And request you to help on those.
BTW: should i reply with rework in the new patch. or i should reply to this thread itself? Sorry i am new to the community so yet to get familiar with etiquette.
Different communities often have different rules on this. For libvirt reply to the comments in the thread, but when posting a new version of a patch post the patch as a new top level thread. ie do *not* use '--in-reply-to' with git send-email
+ ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + }
I really hate the idea of the libvirt_lxc program opening a connection back to libvirtd using virConnectOpen, as that creates a circular dependancy. It also risks deadlock, because libvirtd will be holding locks while starting up the container, and you're calling back into the driver which may then end up acquiring the same lock.
<imran>: This is where i am finding problem to code. All the driver functions are static and to access them i might have to change the static to non-static.which will not be inline with current design. i don't see circular dependency with this approach as the internal connection is just used to get the fd's. please share any approach to handle this or hope i can keep the current code.
The code you are interested in for virDomainLxcOpenNamespace is in lxc_driver.c, which is static. This though, simply calls the global virProcessGetNamespaces() method which is non-static. So when you put the code in lxc_process.c you can directly call virProcssGetNamespaces
I think a better approach in general is for libvirtd lxc_process.c code to be responsible for getting access to all the namespace file descriptors. We can then pass those pre-opened file descrpitors down into libvirt_lxc using command line args, in the sme way that we pass down file descriptors for pre-opened TAP devices.
eg so we end up running
libvirt_lxc --netns 23 --pidns 24 --utsns 25
or something like that.
<imran>: And useability wise its easier to provide names instead of fds. as if the shared container goes down. the fds wont be valid. with name a simple restart and again get the new fds with names automatically.
The libvirt_lxc program is not something that eend users ever run. It is launched by libvirtd itself, so we don't need to care about usability in this particular place. The users still use XML which allows names as you have in your patch already. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Thanks Daniel and Michal, I have made the changes suggested by both of you. Mainly avoid creating new connection to lxc. Sending another version of diffs named "[PATCH] Inherit namespace feature 2" one point to mention is that reason for adding libxml in driver_la_la is because of error i got /usr/bin/ld: ../src/.libs/libvirt_driver_lxc_impl.a(libvirt_driver_lxc_impl_la-lxc_domain.o): undefined reference to symbol 'xmlXPathRegisterNs@@LIBXML2_2.4.30' and reason to add libvirt-lxc is because of the below error. Other wise lot of code would be duplicated to call virProcessGetNamespace. i.e inorder to get the pid of domain before getting namespace. instead just use virDomainLxcOpenNamespace. 8) Test driver "lxc" ... 2015-08-08 17:14:45.434+0000: 19513: info : libvirt version: 1.2.17 2015-08-08 17:14:45.434+0000: 19513: error : virDriverLoadModule:73 : failed to load module /home/imran/programming/libvirt/src/.libs/libvirt_driver_lxc.so /home/imran/programming/libvirt/src/.libs/libvirt_driver_lxc.so: undefined symbol: virDomainLxcOpenNamespace FAILED Thanks a lot for your valuable time and experienced comments, -imran On Mon, Aug 3, 2015 at 10:08 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
On Mon, Aug 03, 2015 at 10:00:29PM +0530, Imran Khan wrote:
Thanks Daniel and Michal again for your valuable inputs. Please check my reply with text <imran> for some of your comments. And request you to help on those.
BTW: should i reply with rework in the new patch. or i should reply to this thread itself? Sorry i am new to the community so yet to get familiar with etiquette.
Different communities often have different rules on this. For libvirt reply to the comments in the thread, but when posting a new version of a patch post the patch as a new top level thread.
ie do *not* use '--in-reply-to' with git send-email
+ ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY); + VIR_FREE(path); + if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) { + virReportSystemError(errno, + _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]); + return -1; + } + } + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + /* If not yet intialized by above: netns*/ + if (lxcDef->ns_type[i] && ns_fd[i] == -1) { + pid = strtol(lxcDef->ns_val[i], &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) { + virReportError(virGetLastError()->code, + _("unable to get connect to lxc %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + dom = virDomainLookupByName(conn, lxcDef->ns_val[i]); + if (!dom) { + virReportError(virGetLastError()->code, + _("Unable to lookup peer containeri %s"), + lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + } + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) { + virReportError(virGetLastError()->code, + _("Unable to open %s"), lxcDef->ns_val[i]); + rc = -1; + goto cleanup; + }
I really hate the idea of the libvirt_lxc program opening a connection back to libvirtd using virConnectOpen, as that creates a circular dependancy. It also risks deadlock, because libvirtd will be holding locks while starting up the container, and you're calling back into the driver which may then end up acquiring the same lock.
<imran>: This is where i am finding problem to code. All the driver functions are static and to access them i might have to change the static to non-static.which will not be inline with current design. i don't see circular dependency with this approach as the internal connection is just used to get the fd's. please share any approach to handle this or hope i can keep the current code.
The code you are interested in for virDomainLxcOpenNamespace is in lxc_driver.c, which is static. This though, simply calls the global virProcessGetNamespaces() method which is non-static. So when you put the code in lxc_process.c you can directly call virProcssGetNamespaces
I think a better approach in general is for libvirtd lxc_process.c code to be responsible for getting access to all the namespace file descriptors. We can then pass those pre-opened file descrpitors down into libvirt_lxc using command line args, in the sme way that we pass down file descriptors for pre-opened TAP devices.
eg so we end up running
libvirt_lxc --netns 23 --pidns 24 --utsns 25
or something like that.
<imran>: And useability wise its easier to provide names instead of fds. as if the shared container goes down. the fds wont be valid. with name a simple restart and again get the new fds with names automatically.
The libvirt_lxc program is not something that eend users ever run. It is launched by libvirtd itself, so we don't need to care about usability in this particular place. The users still use XML which allows names as you have in your patch already.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
ik.nitk
-
Imran Khan