[libvirt] [PATCH RFC] Add support for QEMU vhost-user feature

Howdy! This patch adds support for the QEMU vhost-user feature to Libvirt. vhost-user is an efficient interface between guests and userspace network stacks (vswitches). Our goal is to have vhost-user supported in the OpenStack Juno release. We hope it is okay to start code review on qemu, Libvirt, and OpenStack in parallel. We understand that code will have to be merged first into QEMU and then into Libvirt and then into OpenStack. References: vhost-user qemu patches: http://lists.gnu.org/archive/html/qemu-devel/2014-05/msg05443.html vhost-user high level description: http://www.virtualopensystems.com/en/solutions/guides/snabbswitch-qemu/ OpenStack feature that this is intended to support (Snabb NFV): http://snabb.co/nfv.html https://blueprints.launchpad.net/neutron/+spec/snabb-nfv-mech-driver Luke Gorrie (1): Add support for QEMU vhost-user feature docs/schemas/domaincommon.rng | 23 ++++++++ src/conf/domain_conf.c | 42 ++++++++++++++ src/conf/domain_conf.h | 5 ++ src/lxc/lxc_process.c | 1 + src/qemu/qemu_command.c | 66 +++++++++++++++++++++- src/uml/uml_conf.c | 5 ++ src/xenxs/xen_sxpr.c | 1 + tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- 8 files changed, 142 insertions(+), 3 deletions(-) -- 1.9.1

vhost-user is a networking backend based on unix domain sockets instead of tap devices and ioctl(). This makes it possible for userspace networking stacks (vswitches) to provide vhost-networking to guests. Signed-off-by: Luke Gorrie <luke@snabb.co> --- docs/schemas/domaincommon.rng | 23 ++++++++ src/conf/domain_conf.c | 42 ++++++++++++++ src/conf/domain_conf.h | 5 ++ src/lxc/lxc_process.c | 1 + src/qemu/qemu_command.c | 66 +++++++++++++++++++++- src/uml/uml_conf.c | 5 ++ src/xenxs/xen_sxpr.c | 1 + tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- 8 files changed, 142 insertions(+), 3 deletions(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index af67123..30e7daf 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1966,6 +1966,29 @@ <ref name="interface-options"/> </interleave> </group> + <!-- vhostuser interface: specify unix socket path, socket mode --> + <group> + <attribute name="type"> + <value>vhostuser</value> + </attribute> + <interleave> + <optional> + <element name="socket"> + <attribute name="path"> + <ref name="absFilePath"/> + </attribute> + <attribute name="mode"> + <choice> + <value>server</value> + <value>client</value> + </choice> + </attribute> + <empty/> + </element> + </optional> + <ref name="interface-options"/> + </interleave> + </group> <group> <attribute name="type"> <value>network</value> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f1df092..b47554a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -348,6 +348,7 @@ VIR_ENUM_IMPL(virDomainFSWrpolicy, VIR_DOMAIN_FS_WRPOLICY_LAST, VIR_ENUM_IMPL(virDomainNet, VIR_DOMAIN_NET_TYPE_LAST, "user", "ethernet", + "vhostuser", "server", "client", "mcast", @@ -1333,6 +1334,11 @@ void virDomainNetDefFree(virDomainNetDefPtr def) VIR_FREE(def->data.ethernet.ipaddr); break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + VIR_FREE(def->data.vhostuser.socket); + VIR_FREE(def->data.vhostuser.mode); + break; + case VIR_DOMAIN_NET_TYPE_SERVER: case VIR_DOMAIN_NET_TYPE_CLIENT: case VIR_DOMAIN_NET_TYPE_MCAST: @@ -6579,6 +6585,8 @@ virDomainNetDefParseXML(virDomainXMLOptionPtr xmlopt, char *script = NULL; char *address = NULL; char *port = NULL; + char *vhostuser_socket = NULL; + char *vhostuser_mode = NULL; char *model = NULL; char *backend = NULL; char *txmode = NULL; @@ -6636,6 +6644,11 @@ virDomainNetDefParseXML(virDomainXMLOptionPtr xmlopt, xmlStrEqual(cur->name, BAD_CAST "source")) { dev = virXMLPropString(cur, "dev"); mode = virXMLPropString(cur, "mode"); + } else if (!vhostuser_socket && !vhostuser_mode && + def->type == VIR_DOMAIN_NET_TYPE_VHOSTUSER && + xmlStrEqual(cur->name, BAD_CAST "socket")) { + vhostuser_socket = virXMLPropString(cur, "path"); + vhostuser_mode = virXMLPropString(cur, "mode"); } else if (!def->virtPortProfile && xmlStrEqual(cur->name, BAD_CAST "virtualport")) { if (def->type == VIR_DOMAIN_NET_TYPE_NETWORK) { @@ -6804,6 +6817,25 @@ virDomainNetDefParseXML(virDomainXMLOptionPtr xmlopt, } break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + if (vhostuser_socket == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("No <socket> 'path' attribute " + "specified with <interface type='vhostuser'/>")); + goto error; + } + if (vhostuser_mode == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("No <socket> 'mode' attribute " + "specified with <interface type='vhostuser'/>")); + goto error; + } + def->data.vhostuser.socket = vhostuser_socket; + def->data.vhostuser.mode = vhostuser_mode; + vhostuser_socket = NULL; + vhostuser_mode = NULL; + break; + case VIR_DOMAIN_NET_TYPE_BRIDGE: if (bridge == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -7038,6 +7070,8 @@ virDomainNetDefParseXML(virDomainXMLOptionPtr xmlopt, VIR_FREE(portgroup); VIR_FREE(address); VIR_FREE(port); + VIR_FREE(vhostuser_socket); + VIR_FREE(vhostuser_mode); VIR_FREE(ifname); VIR_FREE(dev); virDomainActualNetDefFree(actual); @@ -15764,6 +15798,14 @@ virDomainNetDefFormat(virBufferPtr buf, def->data.ethernet.ipaddr); break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + virBufferEscapeString(buf, "<socket path='%s'", + def->data.vhostuser.socket); + virBufferEscapeString(buf, " mode='%s'", + def->data.vhostuser.mode); + virBufferAddLit(buf, "/>\n"); + break; + case VIR_DOMAIN_NET_TYPE_BRIDGE: virBufferEscapeString(buf, "<source bridge='%s'/>\n", def->data.bridge.brname); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 8f17c74..b5ea2f6 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -791,6 +791,7 @@ struct _virDomainFSDef { enum virDomainNetType { VIR_DOMAIN_NET_TYPE_USER, VIR_DOMAIN_NET_TYPE_ETHERNET, + VIR_DOMAIN_NET_TYPE_VHOSTUSER, VIR_DOMAIN_NET_TYPE_SERVER, VIR_DOMAIN_NET_TYPE_CLIENT, VIR_DOMAIN_NET_TYPE_MCAST, @@ -877,6 +878,10 @@ struct _virDomainNetDef { char *ipaddr; } ethernet; struct { + char *socket; + char *mode; + } vhostuser; + struct { char *address; int port; } socket; /* any of NET_CLIENT or NET_SERVER or NET_MCAST */ diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 0aef13a..854f65d 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -437,6 +437,7 @@ static int virLXCProcessSetupInterfaces(virConnectPtr conn, case VIR_DOMAIN_NET_TYPE_USER: case VIR_DOMAIN_NET_TYPE_ETHERNET: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: case VIR_DOMAIN_NET_TYPE_SERVER: case VIR_DOMAIN_NET_TYPE_CLIENT: case VIR_DOMAIN_NET_TYPE_MCAST: diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1d5bce6..755bf9d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7007,6 +7007,62 @@ qemuBuildGraphicsCommandLine(virQEMUDriverConfigPtr cfg, } static int +qemuBuildVhostuserCommandLine(virCommandPtr cmd, + virDomainDefPtr def, + virDomainNetDefPtr net, + virQEMUCapsPtr qemuCaps) +{ + virBuffer buf1 = VIR_BUFFER_INITIALIZER; + virBuffer buf2 = VIR_BUFFER_INITIALIZER; + + char* nic = NULL; + + if (!qemuDomainSupportsNetdev(def, qemuCaps, net)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Netdev support unavailable")); + goto error; + } + + if (!qemuDomainSupportsNicdev(def, qemuCaps, net)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Nicdev support unavailable")); + goto error; + } + + virBufferAsprintf(&buf1, "socket,id=char%s,path=%s%s", + net->info.alias, net->data.vhostuser.socket, + STRCASEEQ(net->data.vhostuser.mode, "server") ? ",server" : ""); + virBufferAsprintf(&buf2, "type=vhost-user,id=host%s,chardev=char%s", + net->info.alias, net->info.alias); + + if (virBufferError(&buf1) || virBufferError(&buf2)) { + virReportOOMError(); + goto error; + } + + virCommandAddArgList(cmd, "-chardev", virBufferContentAndReset(&buf1), + NULL); + virCommandAddArgList(cmd, "-netdev", virBufferContentAndReset(&buf2), + NULL); + + if (!(nic = qemuBuildNicDevStr(def, net, -1, 0, 0, qemuCaps))) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Error generating NIC -device string")); + goto error; + } + + virCommandAddArgList(cmd, "-device", nic, NULL); + + return 0; + +error: + virBufferFreeAndReset(&buf1); + virBufferFreeAndReset(&buf2); + + return -1; +} + +static int qemuBuildInterfaceCommandLine(virCommandPtr cmd, virQEMUDriverPtr driver, virConnectPtr conn, @@ -7029,6 +7085,10 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, int actualType = virDomainNetGetActualType(net); size_t i; + if (actualType == VIR_DOMAIN_NET_TYPE_VHOSTUSER) { + return qemuBuildVhostuserCommandLine(cmd, def, net, qemuCaps); + } + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { /* NET_TYPE_HOSTDEV devices are really hostdev devices, so * their commandlines are constructed with other hostdevs. @@ -7374,8 +7434,10 @@ qemuBuildCommandLine(virConnectPtr conn, def->emulator); goto error; } - virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", - cfg->hugepagePath, NULL); + virCommandAddArg(cmd, "-mem-path"); + virCommandAddArgFormat(cmd, "%s,prealloc=on,share=%s", + cfg->hugepagePath, + def->mem.nosharepages ? "off" : "on"); } if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_MLOCK)) { diff --git a/src/uml/uml_conf.c b/src/uml/uml_conf.c index 464d56d..d33d9b4 100644 --- a/src/uml/uml_conf.c +++ b/src/uml/uml_conf.c @@ -182,6 +182,11 @@ umlBuildCommandLineNet(virConnectPtr conn, } break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("vhostuser networking type not supported")); + goto error; + case VIR_DOMAIN_NET_TYPE_SERVER: virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("TCP server networking type not supported")); diff --git a/src/xenxs/xen_sxpr.c b/src/xenxs/xen_sxpr.c index 29316a4..ca26694 100644 --- a/src/xenxs/xen_sxpr.c +++ b/src/xenxs/xen_sxpr.c @@ -1933,6 +1933,7 @@ xenFormatSxprNet(virConnectPtr conn, virBufferEscapeSexpr(buf, "(ip '%s')", def->data.ethernet.ipaddr); break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: case VIR_DOMAIN_NET_TYPE_USER: case VIR_DOMAIN_NET_TYPE_SERVER: case VIR_DOMAIN_NET_TYPE_CLIENT: diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args index d42d9fc..5f8df71 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args @@ -1,5 +1,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/qemu -S -M \ -pc -m 214 -mem-prealloc -mem-path /dev/hugepages/libvirt/qemu -smp 1 \ +pc -m 214 -mem-path /dev/hugepages/libvirt/qemu,prealloc=on,share=on -smp 1 \ -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ /dev/HostVG/QEMUGuest1 -net none -serial none -parallel none -- 1.9.1

On Wed, May 28, 2014 at 10:46:27AM +0200, Luke Gorrie wrote:
vhost-user is a networking backend based on unix domain sockets instead of tap devices and ioctl(). This makes it possible for userspace networking stacks (vswitches) to provide vhost-networking to guests.
Signed-off-by: Luke Gorrie <luke@snabb.co> --- docs/schemas/domaincommon.rng | 23 ++++++++
Also need to update docs/formatdomain.html.in to describe the new XML syntax
src/conf/domain_conf.c | 42 ++++++++++++++ src/conf/domain_conf.h | 5 ++ src/lxc/lxc_process.c | 1 + src/qemu/qemu_command.c | 66 +++++++++++++++++++++- src/uml/uml_conf.c | 5 ++ src/xenxs/xen_sxpr.c | 1 + tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- 8 files changed, 142 insertions(+), 3 deletions(-)
You need to introduce a new data file in tests/qemuxml2argvdata/ and add it to tests/qemuxml2argvtest.c and tests/qemuxml2xmltest.c to validate correct XML parsing, formatting and QEMU CLI arg generation for the new NIC type.
@@ -6804,6 +6817,25 @@ virDomainNetDefParseXML(virDomainXMLOptionPtr xmlopt, } break;
+ case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + if (vhostuser_socket == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("No <socket> 'path' attribute "
_( should be aligned with the 'VIR_' on the line above - it indented 2 more spaces.
+ "specified with <interface type='vhostuser'/>")); + goto error;
Indentation is wonky for the goto.
+ } + if (vhostuser_mode == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("No <socket> 'mode' attribute "
And here too
+ "specified with <interface type='vhostuser'/>")); + goto error;
And here too
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1d5bce6..755bf9d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7007,6 +7007,62 @@ qemuBuildGraphicsCommandLine(virQEMUDriverConfigPtr cfg, }
static int +qemuBuildVhostuserCommandLine(virCommandPtr cmd, + virDomainDefPtr def, + virDomainNetDefPtr net, + virQEMUCapsPtr qemuCaps) +{ + virBuffer buf1 = VIR_BUFFER_INITIALIZER; + virBuffer buf2 = VIR_BUFFER_INITIALIZER; + + char* nic = NULL; + + if (!qemuDomainSupportsNetdev(def, qemuCaps, net)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Netdev support unavailable")); + goto error; + } + + if (!qemuDomainSupportsNicdev(def, qemuCaps, net)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Nicdev support unavailable")); + goto error; + } + + virBufferAsprintf(&buf1, "socket,id=char%s,path=%s%s", + net->info.alias, net->data.vhostuser.socket, + STRCASEEQ(net->data.vhostuser.mode, "server") ? ",server" : "");
Again these trailing lines should be aligned with the '&buf1' - eg as is done in the line below:
+ virBufferAsprintf(&buf2, "type=vhost-user,id=host%s,chardev=char%s", + net->info.alias, net->info.alias); + + if (virBufferError(&buf1) || virBufferError(&buf2)) { + virReportOOMError(); + goto error; + } + + virCommandAddArgList(cmd, "-chardev", virBufferContentAndReset(&buf1), + NULL); + virCommandAddArgList(cmd, "-netdev", virBufferContentAndReset(&buf2), + NULL); + + if (!(nic = qemuBuildNicDevStr(def, net, -1, 0, 0, qemuCaps))) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Error generating NIC -device string")); + goto error; + } + + virCommandAddArgList(cmd, "-device", nic, NULL); + + return 0; + +error: + virBufferFreeAndReset(&buf1); + virBufferFreeAndReset(&buf2); + + return -1; +} + +static int qemuBuildInterfaceCommandLine(virCommandPtr cmd, virQEMUDriverPtr driver, virConnectPtr conn, @@ -7029,6 +7085,10 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, int actualType = virDomainNetGetActualType(net); size_t i;
+ if (actualType == VIR_DOMAIN_NET_TYPE_VHOSTUSER) { + return qemuBuildVhostuserCommandLine(cmd, def, net, qemuCaps); + }
No need for '{...}' on single line if()s
+ if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { /* NET_TYPE_HOSTDEV devices are really hostdev devices, so * their commandlines are constructed with other hostdevs. @@ -7374,8 +7434,10 @@ qemuBuildCommandLine(virConnectPtr conn, def->emulator); goto error; } - virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", - cfg->hugepagePath, NULL); + virCommandAddArg(cmd, "-mem-path"); + virCommandAddArgFormat(cmd, "%s,prealloc=on,share=%s", + cfg->hugepagePath, + def->mem.nosharepages ? "off" : "on");
This is totally bogus and should be removed from the patch.
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args index d42d9fc..5f8df71 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args @@ -1,5 +1,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/qemu -S -M \ -pc -m 214 -mem-prealloc -mem-path /dev/hugepages/libvirt/qemu -smp 1 \ +pc -m 214 -mem-path /dev/hugepages/libvirt/qemu,prealloc=on,share=on -smp 1 \ -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ /dev/HostVG/QEMUGuest1 -net none -serial none -parallel none
This change is also bogus. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Wed, May 28, 2014 at 10:46:27AM +0200, Luke Gorrie wrote:
vhost-user is a networking backend based on unix domain sockets instead of tap devices and ioctl(). This makes it possible for userspace networking stacks (vswitches) to provide vhost-networking to guests.
Signed-off-by: Luke Gorrie <luke@snabb.co>
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 8f17c74..b5ea2f6 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -791,6 +791,7 @@ struct _virDomainFSDef { enum virDomainNetType { VIR_DOMAIN_NET_TYPE_USER, VIR_DOMAIN_NET_TYPE_ETHERNET, + VIR_DOMAIN_NET_TYPE_VHOSTUSER, VIR_DOMAIN_NET_TYPE_SERVER, VIR_DOMAIN_NET_TYPE_CLIENT, VIR_DOMAIN_NET_TYPE_MCAST, @@ -877,6 +878,10 @@ struct _virDomainNetDef { char *ipaddr; } ethernet; struct { + char *socket; + char *mode; + } vhostuser; + struct {
So your QEMU command line generation code shows that QEMU is actually allowing any chardev here, not restricted to a UNIX socket. So you should be using a virDomainChrSourceDefPtr chardev; instead of just hardcoding a UNIX socket path + mode.
+ + virCommandAddArgList(cmd, "-chardev", virBufferContentAndReset(&buf1), + NULL); + virCommandAddArgList(cmd, "-netdev", virBufferContentAndReset(&buf2), + NULL);
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 05/28/2014 11:46 AM, Luke Gorrie wrote:
vhost-user is a networking backend based on unix domain sockets instead of tap devices and ioctl(). This makes it possible for userspace networking stacks (vswitches) to provide vhost-networking to guests.
Signed-off-by: Luke Gorrie <luke@snabb.co> --- docs/schemas/domaincommon.rng | 23 ++++++++ src/conf/domain_conf.c | 42 ++++++++++++++ src/conf/domain_conf.h | 5 ++ src/lxc/lxc_process.c | 1 + src/qemu/qemu_command.c | 66 +++++++++++++++++++++- src/uml/uml_conf.c | 5 ++ src/xenxs/xen_sxpr.c | 1 + tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- 8 files changed, 142 insertions(+), 3 deletions(-)
Daniel has already pointed out that you need to supply a patch to update formatdomain.html.in to document this new interface type. I would also recommend a single simple example of the new <interface> type in the commit log, just to make it easy for someone looking for something in the git log (others may disagree with this - anyone?) So, lacking an example, I deduce from the XML schema and code that it would be something like this: <interface type='vhostuser'> <model type='virtio'/> <socket path='xyzzy' mode='server'/> ... </interface> However, the standard for other interface types is: <interface type='blah'> <model type='virtio'/> <source [attributes describing connection to outside world]/> ... </interface> so it would follow precedent much more closely if you did this: <interface type='vhostuser'> <model type='virtio'/> <source path='xyzzy' mode='server'/> ... </interface> Daniel pointed out that qemu actually accepts any generic character device to connect to, and it makes sense to support this by using virDomainChrSourceDefPtr. However, the parse function for virDomainChrSourceDef requires that the type of character device be already parsed and set in the object, implying that it needs to be in the config somewhere outside of the <source> element itself. Related to that, I'm wondering if maybe instead of setting the type of <interface> to the very specific "vhostuser", maybe it could be set to the type of character device ("socket", "tcp", etc), and the fact that vhostuser is used could be specified in exactly the same way that vhost-net is turned on for tap devices - with <driver name="vhost"/>. So for example, you would have this: <interface type='unix'> <!-- for a unix domain socket --> <model type='virtio'/> <driver name='vhost'/> <source path='xyzzy' mode='server'/> ... </interface> This way we would already have a type to set in the virDomainChrSourceDef object before calling virDomainChrSourceDefParseXML(). (we would just have to do some post-processing to make sure that we emit a reasonable error for any ChrSourceDef type that isn't supported by qemu's vhostuser device, and likewise if someone specified one of those types and forgot the "<driver name='vhost'/>"). Finally, it would be very nice to also define libvirt networks with <forward mode="vhostuser"> (or to follow what I said in the previous paragraph: <forward mode='socket'>, with the info for the source contained in attributes/subelements of the <forward> (similar to what has been done for <forward mode='bridge|private|passthrough|hostdev'). This way the specifics of the network connection aren't hardcoded into the domain definitions, and can be changed for all domains by changing them in a single place; also this could make it possible to live migrate from one host that doesn't support vhostuser onto another host that does (and take advantage of this new capability). (I can see one departure here from what is done with tap-based network types though - in their case the <driver name='vhost'/> is specified in the domain's <interface>, *not* in the network, so it is possible to have some domains that use vhost-net and some that don't. In the case of vhostuser, that doesn't really make sense (maybe it shouldn't be done that was for vhost-net either, but that's a separate issue).
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index af67123..30e7daf 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1966,6 +1966,29 @@ <ref name="interface-options"/> </interleave> </group> + <!-- vhostuser interface: specify unix socket path, socket mode --> + <group> + <attribute name="type"> + <value>vhostuser</value> + </attribute> + <interleave> + <optional>
I was going to say that it seems odd to make this optional in the schmema when it really is required in practice, but then I saw that the schemas for all the existing interface types do exactly the same thing...

On Thu, Jun 05, 2014 at 05:09:02PM +0300, Laine Stump wrote:
Daniel pointed out that qemu actually accepts any generic character device to connect to, and it makes sense to support this by using virDomainChrSourceDefPtr. However, the parse function for virDomainChrSourceDef requires that the type of character device be already parsed and set in the object, implying that it needs to be in the config somewhere outside of the <source> element itself.
Related to that, I'm wondering if maybe instead of setting the type of <interface> to the very specific "vhostuser", maybe it could be set to the type of character device ("socket", "tcp", etc), and the fact that vhostuser is used could be specified in exactly the same way that vhost-net is turned on for tap devices - with <driver name="vhost"/>. So for example, you would have this:
<interface type='unix'> <!-- for a unix domain socket --> <model type='virtio'/> <driver name='vhost'/> <source path='xyzzy' mode='server'/> ... </interface>
The problem with this is that some of the chardev 'type' options clash with some of the interface 'type' options, so I think we must keep them separated. I do agree that 'vhostuser' is a somewhat QEMU specific name - at least with 'tap' this was a common terminology across multiple OS and projects. This is really a variant on 'tap' which is avoiding use of kernel devices. To perhaps we should call this new mode 'usertap' ? Or another idea would be to call it 'tapstream' eg <interface type='usertap'> <model type='virtio'/> <driver name='vhost'/> <source type='unix'> ...chardev source schema... </source> </interface> Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 06/05/2014 05:35 PM, Daniel P. Berrange wrote:
On Thu, Jun 05, 2014 at 05:09:02PM +0300, Laine Stump wrote:
Daniel pointed out that qemu actually accepts any generic character device to connect to, and it makes sense to support this by using virDomainChrSourceDefPtr. However, the parse function for virDomainChrSourceDef requires that the type of character device be already parsed and set in the object, implying that it needs to be in the config somewhere outside of the <source> element itself.
Related to that, I'm wondering if maybe instead of setting the type of <interface> to the very specific "vhostuser", maybe it could be set to the type of character device ("socket", "tcp", etc), and the fact that vhostuser is used could be specified in exactly the same way that vhost-net is turned on for tap devices - with <driver name="vhost"/>. So for example, you would have this:
<interface type='unix'> <!-- for a unix domain socket --> <model type='virtio'/> <driver name='vhost'/> <source path='xyzzy' mode='server'/> ... </interface> The problem with this is that some of the chardev 'type' options clash with some of the interface 'type' options, so I think we must keep them separated.
I thought about that when writing my earlier mail, and without actually looking at the list of accepted types for both I assumed it was the case, but figured that the disambiguation of functionality could be handled by the presence/absence of <driver name='vhost'/>. After checking, I see that currently none of the the chardev types actually clash with interface types: virDomainChr: "null", "vc", "pty", "dev", "file", "pipe", "stdio", "udp", "tcp", "unix", "spicevmc", "spiceport", "nmdm" virDomainNet: "user", "ethernet", "server", "client", "mcast", "network", "bridge", "internal", "direct", "hostdev" But this is really just because the two implementations are inconsistent in details with each other, if they were consistent, there *would* be clashes (e.g. what is type='udp' in chardev is nearly identical in function to an interface with type='client' (assuming subordinate options are set correctly)). This means that if we tried to just add the chardev types to the interface types, likely people would confuse one with the other and make a lot of non-working configs. So I think I do agree with you that it would be cleaner to have a single toplevel type, both to avoid the need to translate the same value between the two different enums, and to prevent confusion between nearly identical types, as well potential *real* clashes when we add something to one or the other.
I do agree that 'vhostuser' is a somewhat QEMU specific name - at least with 'tap' this was a common terminology across multiple OS and projects. This is really a variant on 'tap' which is avoiding use of kernel devices. To perhaps we should call this new mode 'usertap' ? Or another idea would be to call it 'tapstream' eg
For some reason I'm more partial to "tapstream". Maybe because it doesn't force any future uses to only be in userspace. But at some level a name is just a name, so...
<interface type='usertap'> <model type='virtio'/> <driver name='vhost'/> <source type='unix'> ...chardev source schema... </source>
If we were to use the existing virDomainChrSourceDefFormat, the <source> element would end up looking like this: <source type='unix'> <source path='xyzzy' mode='listen'/> </source> which is a bit redundant. What if instead, we enhanced the <source> element to allow specifying the type as an attribute of that element itself (at least for this case)? (We did something similar for <address - it can include "type='pci'" in some contexts - see virDevicePCIAddressFormat; it has an arg bool includeTypeInAddr). If we did this (by adding a small bit of code to virDomainChrSourceDef(Format|ParseXML), the source address could be the simpler and easier on the eye: <source type='unix' path='xyzzy' mode='listen'/>

On 06/06/2014 11:49 AM, Laine Stump wrote:
On 06/05/2014 05:35 PM, Daniel P. Berrange wrote:
I do agree that 'vhostuser' is a somewhat QEMU specific name - at least with 'tap' this was a common terminology across multiple OS and projects. This is really a variant on 'tap' which is avoiding use of kernel devices. To perhaps we should call this new mode 'usertap' ? Or another idea would be to call it 'tapstream' eg For some reason I'm more partial to "tapstream". Maybe because it doesn't force any future uses to only be in userspace. But at some level a name is just a name, so...
I think I've changed my opinion. After looking at the high level description of vhost-user, I found that it isn't implemented simply using a stream - the stream is used by qemu to learn the location of a shared memory region, and the shared memory region is what is actually used to send/receive the packets. So it isn't just a stream. Maybe it *is* reasonable to focus on the "this all happens in userspace" aspect, and simply user <interface type='user'> ? This would mean that the xml would look like this: <interface type='user'> <model type='virtio'/> <driver name='vhost'/> <source type='unix' path='xyzzy' mode='connect'/> ... </interface> Currently the "user" interface type has only one backend, which is the usermode networking of qemu. This would give it a 2nd backend, "vhost", which would be implemented by vhost-user. (BTW, I have a question - is only root allowed to connect to the vhost-user socket? Is there any selinux protection of it that needs to be accounted for? If not, then you may want to consider doing this to prevent a rogue (e.g. a qemu guest that exploits some hypothetical exploit in qemu to break out into the qemu process) from wreaking havoc with your network infrastructure. Since libvirt usually starts up qemu processes with a non-privileged uid, if root access is required to open the socket, qemu needs to be able to accept an open file descriptor for this rather than a path, and libvirt should be opening the socket, then passing the fd to qemu.)

On Fri, Jun 06, 2014 at 12:11:57PM +0300, Laine Stump wrote:
On 06/06/2014 11:49 AM, Laine Stump wrote:
On 06/05/2014 05:35 PM, Daniel P. Berrange wrote:
I do agree that 'vhostuser' is a somewhat QEMU specific name - at least with 'tap' this was a common terminology across multiple OS and projects. This is really a variant on 'tap' which is avoiding use of kernel devices. To perhaps we should call this new mode 'usertap' ? Or another idea would be to call it 'tapstream' eg For some reason I'm more partial to "tapstream". Maybe because it doesn't force any future uses to only be in userspace. But at some level a name is just a name, so...
I think I've changed my opinion. After looking at the high level description of vhost-user, I found that it isn't implemented simply using a stream - the stream is used by qemu to learn the location of a shared memory region, and the shared memory region is what is actually used to send/receive the packets. So it isn't just a stream.
Maybe it *is* reasonable to focus on the "this all happens in userspace" aspect, and simply user <interface type='user'> ? This would mean that the xml would look like this:
<interface type='user'> <model type='virtio'/> <driver name='vhost'/> <source type='unix' path='xyzzy' mode='connect'/> ... </interface>
Currently the "user" interface type has only one backend, which is the usermode networking of qemu. This would give it a 2nd backend, "vhost", which would be implemented by vhost-user.
No, this isn't good. type='user' specifically means that we're applying NAT via the SLIRP ethernet munging layer. This is very different to what vhost user is doing. In fact looking at the QEMU code again, it says that this QEMU net backend is in fact sending data over the chardev using the vhost packet format. So in fact I think the choice was right in the first patch - type='vhostuser' is a semantically good thing to use because the data is indeed in a vhost specific format.
(BTW, I have a question - is only root allowed to connect to the vhost-user socket? Is there any selinux protection of it that needs to be accounted for? If not, then you may want to consider doing this to prevent a rogue (e.g. a qemu guest that exploits some hypothetical exploit in qemu to break out into the qemu process) from wreaking havoc with your network infrastructure. Since libvirt usually starts up qemu processes with a non-privileged uid, if root access is required to open the socket, qemu needs to be able to accept an open file descriptor for this rather than a path, and libvirt should be opening the socket, then passing the fd to qemu.)
I think this is all the same as any chardev backend - we need to apply whatever security manager labelling code we already have for chardevs to this new net backend. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Laine Stump
-
Luke Gorrie