[libvirt] [PATCH v2 0/2] libvirt_lxc: implement <hostdev caps=net> device isolation in containers

This set of patches implements <hostdev caps=net> interface isolation in containers, thus allowing an interface NIC to be assigned exclusively to a container-domain. This is done like moving veth devices in container namespaces, only this time it is actual host devices.

This updates the definitions and supporting structures in the XML schema and domain configuration files. Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> --- docs/formatdomain.html.in | 15 +++++++++++++-- docs/schemas/domaincommon.rng | 14 ++++++++++++++ src/conf/domain_conf.c | 28 +++++++++++++++++++++++++++- src/conf/domain_conf.h | 4 ++++ 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index cf382e8..d0654a5 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2326,18 +2326,29 @@ ... </pre> +... +<hostdev mode='capabilities' type='net'> + <source> + <interface>eth0</interface> + </source> +</hostdev> +... + </pre> + <dl> <dt><code>hostdev</code></dt> <dd>The <code>hostdev</code> element is the main container for describing host devices. For block/character device passthrough <code>mode</code> is always "capabilities" and <code>type</code> is "block" for a block - device and "char" for a character device. + device, "char" for a character device and "iface" for a host network + interface. </dd> <dt><code>source</code></dt> <dd>The source element describes the device as seen from the host. For block devices, the path to the block device in the host OS is provided in the nested "block" element, while for character - devices the "char" element is used + devices the "char" element is used. For network interfaces, the + name of the interface is provided in the "iface" element. </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 454ebdb..73e953f 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2956,6 +2956,9 @@ <group> <ref name="hostdevcapsmisc"/> </group> + <group> + <ref name="hostdevcapsnet"/> + </group> </choice> </define> @@ -3016,6 +3019,17 @@ </element> </define> + <define name="hostdevcapsnet"> + <attribute name="type"> + <value>net</value> + </attribute> + <element name="source"> + <element name="iface"> + <ref name="deviceName"/> + </element> + </element> + </define> + <define name="usbproduct"> <element name="vendor"> <attribute name="id"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index cc26f21..025c1d2 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -577,7 +577,8 @@ VIR_ENUM_IMPL(virDomainHostdevSubsys, VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_LAST, VIR_ENUM_IMPL(virDomainHostdevCaps, VIR_DOMAIN_HOSTDEV_CAPS_TYPE_LAST, "storage", - "misc") + "misc", + "net") VIR_ENUM_IMPL(virDomainPciRombarMode, VIR_DOMAIN_PCI_ROMBAR_LAST, @@ -1566,6 +1567,9 @@ void virDomainHostdevDefClear(virDomainHostdevDefPtr def) case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC: VIR_FREE(def->source.caps.u.misc.chardev); break; + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: + VIR_FREE(def->source.caps.u.net.iface); + break; } } } @@ -3440,6 +3444,14 @@ virDomainHostdevDefParseXMLCaps(xmlNodePtr node ATTRIBUTE_UNUSED, goto error; } break; + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: + if (!(def->source.caps.u.net.iface = + virXPathString("string(./source/iface[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing <iface> element in hostdev net device")); + goto error; + } + break; default: virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("address type='%s' not supported in hostdev interfaces"), @@ -8602,6 +8614,14 @@ virDomainHostdevMatchCapsMisc(virDomainHostdevDefPtr a, b->source.caps.u.misc.chardev); } +static int +virDomainHostdevMatchCapsNet(virDomainHostdevDefPtr a, + virDomainHostdevDefPtr b) +{ + return STREQ_NULLABLE(a->source.caps.u.net.iface, + b->source.caps.u.net.iface); +} + static int virDomainHostdevMatchCaps(virDomainHostdevDefPtr a, @@ -8615,6 +8635,8 @@ virDomainHostdevMatchCaps(virDomainHostdevDefPtr a, return virDomainHostdevMatchCapsStorage(a, b); case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC: return virDomainHostdevMatchCapsMisc(a, b); + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: + return virDomainHostdevMatchCapsNet(a, b); } return 0; } @@ -13295,6 +13317,10 @@ virDomainHostdevDefFormatCaps(virBufferPtr buf, virBufferEscapeString(buf, "<char>%s</char>\n", def->source.caps.u.misc.chardev); break; + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: + virBufferEscapeString(buf, "<iface>%s</iface>\n", + def->source.caps.u.net.iface); + break; default: virReportError(VIR_ERR_INTERNAL_ERROR, _("unexpected hostdev type %d"), diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index edddf25..c4b686f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -407,6 +407,7 @@ struct _virDomainHostdevSubsys { enum virDomainHostdevCapsType { VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE, VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC, + VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET, VIR_DOMAIN_HOSTDEV_CAPS_TYPE_LAST }; @@ -422,6 +423,9 @@ struct _virDomainHostdevCaps { struct { char *chardev; } misc; + struct { + char *iface; + } net; } u; }; -- 1.7.11.7

On Fri, Apr 05, 2013 at 08:26:39AM -0400, Bogdan Purcareata wrote:
This updates the definitions and supporting structures in the XML schema and domain configuration files.
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> --- docs/formatdomain.html.in | 15 +++++++++++++-- docs/schemas/domaincommon.rng | 14 ++++++++++++++ src/conf/domain_conf.c | 28 +++++++++++++++++++++++++++- src/conf/domain_conf.h | 4 ++++ 4 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index cf382e8..d0654a5 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2326,18 +2326,29 @@ ... </pre>
+... +<hostdev mode='capabilities' type='net'> + <source> + <interface>eth0</interface> + </source> +</hostdev> +... + </pre> + <dl> <dt><code>hostdev</code></dt> <dd>The <code>hostdev</code> element is the main container for describing host devices. For block/character device passthrough <code>mode</code> is always "capabilities" and <code>type</code> is "block" for a block - device and "char" for a character device. + device, "char" for a character device and "iface" for a host network + interface. </dd> <dt><code>source</code></dt> <dd>The source element describes the device as seen from the host. For block devices, the path to the block device in the host OS is provided in the nested "block" element, while for character - devices the "char" element is used + devices the "char" element is used. For network interfaces, the + name of the interface is provided in the "iface" element. </dd> </dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 454ebdb..73e953f 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2956,6 +2956,9 @@ <group> <ref name="hostdevcapsmisc"/> </group> + <group> + <ref name="hostdevcapsnet"/> + </group> </choice> </define>
@@ -3016,6 +3019,17 @@ </element> </define>
+ <define name="hostdevcapsnet"> + <attribute name="type"> + <value>net</value> + </attribute> + <element name="source"> + <element name="iface">
Opps, I didn't mean for you to change the XML schema to use 'iface' too, only the C variable names. I've put this back to 'interface' (and the other relevant places too. ACK & merged to GIT Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

This allows a container-type domain to have exclusive access to one of the host's NICs. Wire <hostdev caps=net> with the lxc_controller - when moving the newly created veth devices into a new namespace, also look for any hostdev devices that should be moved. Note: once the container domain has been destroyed, there is no code that moves the interfaces back to the original namespace. This does happen, though, probably due to default cleanup on namespace destruction. Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> --- src/lxc/lxc_container.c | 4 +++- src/lxc/lxc_controller.c | 16 ++++++++++++++++ src/lxc/lxc_hostdev.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 002ba9e..e59bfdf 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -1551,7 +1551,6 @@ cleanup: return ret; } - static int lxcContainerSetupHostdevSubsys(virDomainDefPtr vmDef, virDomainHostdevDefPtr def, const char *dstprefix, @@ -1582,6 +1581,9 @@ static int lxcContainerSetupHostdevCaps(virDomainDefPtr vmDef, case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC: return lxcContainerSetupHostdevCapsMisc(vmDef, def, dstprefix, securityDriver); + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: + return 0; // case is handled in virLXCControllerMoveInterfaces + default: virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Unsupported host device mode %s"), diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index cede445..edd99bf 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -1050,12 +1050,28 @@ cleanup2: static int virLXCControllerMoveInterfaces(virLXCControllerPtr ctrl) { size_t i; + virDomainDefPtr def = ctrl->def; for (i = 0 ; i < ctrl->nveths ; i++) { if (virNetDevSetNamespace(ctrl->veths[i], ctrl->initpid) < 0) return -1; } + for (i = 0; i < def->nhostdevs; i ++) { + virDomainHostdevDefPtr hdev = def->hostdevs[i]; + + if (hdev->mode != VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES) + continue; + + virDomainHostdevCaps hdcaps = hdev->source.caps; + + if (hdcaps.type != VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET) + continue; + + if (virNetDevSetNamespace(hdcaps.u.net.iface, ctrl->initpid) < 0) + return -1; + } + return 0; } diff --git a/src/lxc/lxc_hostdev.c b/src/lxc/lxc_hostdev.c index 33b0b60..53a1a31 100644 --- a/src/lxc/lxc_hostdev.c +++ b/src/lxc/lxc_hostdev.c @@ -307,6 +307,7 @@ int virLXCPrepareHostDevices(virLXCDriverPtr driver, switch (dev->source.subsys.type) { case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE: case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC: + case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET: break; default: virReportError(VIR_ERR_CONFIG_UNSUPPORTED, -- 1.7.11.7

On Fri, Apr 05, 2013 at 08:26:40AM -0400, Bogdan Purcareata wrote:
This allows a container-type domain to have exclusive access to one of the host's NICs.
Wire <hostdev caps=net> with the lxc_controller - when moving the newly created veth devices into a new namespace, also look for any hostdev devices that should be moved. Note: once the container domain has been destroyed, there is no code that moves the interfaces back to the original namespace. This does happen, though, probably due to default cleanup on namespace destruction.
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> --- src/lxc/lxc_container.c | 4 +++- src/lxc/lxc_controller.c | 16 ++++++++++++++++ src/lxc/lxc_hostdev.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-)
ACK, but I added this in before pushing to GIT so that we force private net namespace on startup of container & also validate the parser diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 002ba9e..a494363 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2286,6 +2286,22 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) } +static bool +lxcNeedNetworkNamespace(virDomainDefPtr def) +{ + size_t i; + if (def->nets != NULL) + return true; + if (def->features & (1 << VIR_DOMAIN_FEATURE_PRIVNET)) + return true; + for (i = 0 ; i < def->nhostdevs ; i++) { + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES && + def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET) + return true; + } + return false; +} + /** * lxcContainerStart: * @def: pointer to virtual machine structure @@ -2329,8 +2345,7 @@ int lxcContainerStart(virDomainDefPtr def, cflags |= CLONE_NEWUSER; } - if (def->nets != NULL || - (def->features & (1 << VIR_DOMAIN_FEATURE_PRIVNET))) { + if (lxcNeedNetworkNamespace(def)) { VIR_DEBUG("Enable network namespaces"); cflags |= CLONE_NEWNET; } diff --git a/tests/lxcxml2xmldata/lxc-hostdev.xml b/tests/lxcxml2xmldata/lxc-hostdev.xml index b022cc7..befe0db 100644 --- a/tests/lxcxml2xmldata/lxc-hostdev.xml +++ b/tests/lxcxml2xmldata/lxc-hostdev.xml @@ -31,5 +31,10 @@ <char>/dev/tty0</char> </source> </hostdev> + <hostdev mode='capabilities' type='net'> + <source> + <interface>eth0</interface> + </source> + </hostdev> </devices> </domain> Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (2)
-
Bogdan Purcareata
-
Daniel P. Berrange