[libvirt] [ 0/3] more useful network ethernet type devices

By default libvirt does not manages ethernet devices, bypass all control to qemu. This series enable creating tap devices via libvirt, assign mac, ip address and routes. Also when use specif linkstate down in xml, phisicaly down tap device. This is very useful in case of using bird, quagga or somethink like this to forward traffic to vm. Main question about peer address assign. How to deal with absent peer address? Now i'm simplify check that peer address is valid ip address, but may be this is wrong. Vasiliy Tolstov (3): autocreate tap device for ethernet network type allow to specify peer to ip element in domain network assign peer address if it provided in network docs/schemas/interface.rng | 6 ++ include/libvirt/libvirt-domain.h | 1 + src/conf/domain_conf.c | 14 +++- src/conf/domain_conf.h | 1 + src/conf/network_conf.c | 26 ++++++- src/conf/network_conf.h | 1 + src/lxc/lxc_container.c | 2 +- src/network/bridge_driver.c | 2 +- src/qemu/qemu_command.c | 38 +++++----- src/qemu/qemu_hotplug.c | 36 +++++++-- src/qemu/qemu_interface.c | 158 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_interface.h | 5 ++ src/qemu/qemu_process.c | 6 ++ src/util/virnetdev.c | 43 +++++++---- src/util/virnetdev.h | 1 + 15 files changed, 300 insertions(+), 40 deletions(-) -- 2.7.3

If a user specify network type ethernet, then create it via libvirt and run script if it provided. After this commit user does not need to run external script to create tap device or add root permissions to qemu process. Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- src/qemu/qemu_command.c | 38 ++++++----- src/qemu/qemu_hotplug.c | 36 +++++++++-- src/qemu/qemu_interface.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_interface.h | 5 ++ src/qemu/qemu_process.c | 6 ++ 5 files changed, 221 insertions(+), 22 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 3e7f1fe3ddf1..30eeab79a608 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3220,6 +3220,7 @@ qemuBuildHostNetStr(virDomainNetDefPtr net, case VIR_DOMAIN_NET_TYPE_BRIDGE: case VIR_DOMAIN_NET_TYPE_NETWORK: case VIR_DOMAIN_NET_TYPE_DIRECT: + case VIR_DOMAIN_NET_TYPE_ETHERNET: virBufferAsprintf(&buf, "tap%c", type_sep); /* for one tapfd 'fd=' shall be used, * for more than one 'fds=' is the right choice */ @@ -3237,20 +3238,6 @@ qemuBuildHostNetStr(virDomainNetDefPtr net, is_tap = true; break; - case VIR_DOMAIN_NET_TYPE_ETHERNET: - virBufferAddLit(&buf, "tap"); - if (net->ifname) { - virBufferAsprintf(&buf, "%cifname=%s", type_sep, net->ifname); - type_sep = ','; - } - if (net->script) { - virBufferAsprintf(&buf, "%cscript=%s", type_sep, - net->script); - type_sep = ','; - } - is_tap = true; - break; - case VIR_DOMAIN_NET_TYPE_CLIENT: virBufferAsprintf(&buf, "socket%cconnect=%s:%d", type_sep, @@ -7783,7 +7770,8 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, if (net->driver.virtio.queues > 0 && !(actualType == VIR_DOMAIN_NET_TYPE_NETWORK || actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || - actualType == VIR_DOMAIN_NET_TYPE_DIRECT)) { + actualType == VIR_DOMAIN_NET_TYPE_DIRECT || + actualType == VIR_DOMAIN_NET_TYPE_ETHERNET)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Multiqueue network is not supported for: %s"), virDomainNetTypeToString(actualType)); @@ -7793,7 +7781,8 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, /* and only TAP devices support nwfilter rules */ if (net->filter && !(actualType == VIR_DOMAIN_NET_TYPE_NETWORK || - actualType == VIR_DOMAIN_NET_TYPE_BRIDGE)) { + actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || + actualType == VIR_DOMAIN_NET_TYPE_ETHERNET)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("filterref is not supported for " "network interfaces of type %s"), @@ -7803,7 +7792,8 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, if (net->backend.tap && !(actualType == VIR_DOMAIN_NET_TYPE_NETWORK || - actualType == VIR_DOMAIN_NET_TYPE_BRIDGE)) { + actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || + actualType == VIR_DOMAIN_NET_TYPE_ETHERNET)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Custom tap device path is not supported for: %s"), virDomainNetTypeToString(actualType)); @@ -7841,6 +7831,20 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd, if (qemuInterfaceDirectConnect(def, driver, net, tapfd, tapfdSize, vmop) < 0) goto cleanup; + } else if (actualType == VIR_DOMAIN_NET_TYPE_ETHERNET) { + tapfdSize = net->driver.virtio.queues; + if (!tapfdSize) + tapfdSize = 1; + + if (VIR_ALLOC_N(tapfd, tapfdSize) < 0 || + VIR_ALLOC_N(tapfdName, tapfdSize) < 0) + goto cleanup; + + memset(tapfd, -1, tapfdSize * sizeof(tapfd[0])); + + if (qemuInterfaceEthernetConnect(def, driver, net, + tapfd, tapfdSize) < 0) + goto cleanup; } /* For types whose implementations use a netdev on the host, add diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index b580283d6273..4c224be25632 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -896,7 +896,8 @@ int qemuDomainAttachNetDevice(virConnectPtr conn, if (net->driver.virtio.queues > 0 && !(actualType == VIR_DOMAIN_NET_TYPE_NETWORK || actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || - actualType == VIR_DOMAIN_NET_TYPE_DIRECT)) { + actualType == VIR_DOMAIN_NET_TYPE_DIRECT || + actualType == VIR_DOMAIN_NET_TYPE_ETHERNET)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Multiqueue network is not supported for: %s"), virDomainNetTypeToString(actualType)); @@ -906,7 +907,8 @@ int qemuDomainAttachNetDevice(virConnectPtr conn, /* and only TAP devices support nwfilter rules */ if (net->filter && !(actualType == VIR_DOMAIN_NET_TYPE_NETWORK || - actualType == VIR_DOMAIN_NET_TYPE_BRIDGE)) { + actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || + actualType == VIR_DOMAIN_NET_TYPE_ETHERNET)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("filterref is not supported for " "network interfaces of type %s"), @@ -951,10 +953,19 @@ int qemuDomainAttachNetDevice(virConnectPtr conn, vhostfd, &vhostfdSize) < 0) goto cleanup; } else if (actualType == VIR_DOMAIN_NET_TYPE_ETHERNET) { - vhostfdSize = 1; - if (VIR_ALLOC(vhostfd) < 0) + tapfdSize = vhostfdSize = net->driver.virtio.queues; + if (!tapfdSize) + tapfdSize = vhostfdSize = 1; + if (VIR_ALLOC_N(tapfd, tapfdSize) < 0) goto cleanup; - *vhostfd = -1; + memset(tapfd, -1, sizeof(*tapfd) * tapfdSize); + if (VIR_ALLOC_N(vhostfd, vhostfdSize) < 0) + goto cleanup; + memset(vhostfd, -1, sizeof(*vhostfd) * vhostfdSize); + if (qemuInterfaceEthernetConnect(vm->def, driver, net, + tapfd, tapfdSize) < 0) + goto cleanup; + iface_connected = true; if (qemuInterfaceOpenVhostNet(vm->def, net, priv->qemuCaps, vhostfd, &vhostfdSize) < 0) goto cleanup; @@ -2205,6 +2216,21 @@ int qemuDomainChangeNetLinkState(virQEMUDriverPtr driver, if (ret < 0) goto cleanup; + if (virDomainNetGetActualType(dev) == VIR_DOMAIN_NET_TYPE_ETHERNET) { + switch (linkstate) { + case VIR_DOMAIN_NET_INTERFACE_LINK_STATE_UP: + case VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DEFAULT: + if ((ret = virNetDevSetOnline(dev->ifname, true)) < 0) + goto cleanup; + break; + + case VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN: + if ((ret = virNetDevSetOnline(dev->ifname, false)) < 0) + goto cleanup; + break; + } + } + /* modify the device configuration */ dev->linkstate = linkstate; diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c index 08af13358113..bab0ff5b2006 100644 --- a/src/qemu/qemu_interface.c +++ b/src/qemu/qemu_interface.c @@ -387,6 +387,164 @@ qemuCreateInBridgePortWithHelper(virQEMUDriverConfigPtr cfg, return *tapfd < 0 ? -1 : 0; } +/** + * qemuExecuteEthernetScript: + * @ifname: the interface name + * @script: the script name + * + * This function executes script for new tap device created by libvirt. + * Returns 0 in case of success or -1 on failure + */ +static int +qemuExecuteEthernetScript(const char *ifname, const char *script) +{ + virCommandPtr cmd; + int ret; + + cmd = virCommandNew(script); + virCommandAddArgFormat(cmd, "%s", ifname); + virCommandClearCaps(cmd); +#ifdef CAP_NET_ADMIN + virCommandAllowCap(cmd, CAP_NET_ADMIN); +#endif + virCommandAddEnvPassCommon(cmd); + + ret = virCommandRun(cmd, NULL); + + virCommandFree(cmd); + return ret; +} + +/* qemuInterfaceEthernetConnect: + * @def: the definition of the VM + * @driver: qemu driver data + * @net: pointer to the VM's interface description + * @tapfd: array of file descriptor return value for the new device + * @tapfdsize: number of file descriptors in @tapfd + * + * Called *only* called if actualType is VIR_DOMAIN_NET_TYPE_ETHERNET + * (i.e. if the connection is made with a tap device) + */ +int +qemuInterfaceEthernetConnect(virDomainDefPtr def, + virQEMUDriverPtr driver, + virDomainNetDefPtr net, + int *tapfd, + size_t tapfdSize) +{ + virMacAddr tapmac; + size_t j; + int ret = -1; + unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP; + bool template_ifname = false; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + const char *tunpath = "/dev/net/tun"; + + if (net->backend.tap) { + tunpath = net->backend.tap; + if (!(virQEMUDriverIsPrivileged(driver))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cannot use custom tap device in session mode")); + goto cleanup; + } + } + + if (!net->ifname || + STRPREFIX(net->ifname, VIR_NET_GENERATED_PREFIX) || + strchr(net->ifname, '%')) { + VIR_FREE(net->ifname); + if (VIR_STRDUP(net->ifname, VIR_NET_GENERATED_PREFIX "%d") < 0) + goto cleanup; + /* avoid exposing vnet%d in getXMLDesc or error outputs */ + template_ifname = true; + } + + if (net->model && STREQ(net->model, "virtio")) + tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR; + + if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, + tap_create_flags) < 0) { + virDomainAuditNetDevice(def, net, tunpath, false); + goto cleanup; + } + + virDomainAuditNetDevice(def, net, tunpath, true); + virMacAddrSet(&tapmac, &net->mac); + tapmac.addr[0] = 0xFE; + + if (virNetDevSetMAC(net->ifname, &tapmac) < 0) + goto cleanup; + + for (j = 0; j < net->nips; j++) { + virDomainNetIpDefPtr ip = net->ips[j]; + unsigned int prefix = (ip->prefix > 0) ? ip->prefix : + VIR_SOCKET_ADDR_DEFAULT_PREFIX; + char *ipStr = virSocketAddrFormat(&ip->address); + + VIR_DEBUG("Adding IP address '%s/%u' to '%s'", + ipStr, ip->prefix, net->ifname); + + if (virNetDevSetIPAddress(net->ifname, &ip->address, &ip->peer, prefix) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, + _("Failed to set IP address '%s' on %s"), + ipStr, net->ifname); + VIR_FREE(ipStr); + goto cleanup; + } + VIR_FREE(ipStr); + } + + if (net->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_UP || + net->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DEFAULT) { + if (virNetDevSetOnline(net->ifname, true) < 0) + goto cleanup; + + /* Set the routes */ + for (j = 0; j < net->nroutes; j++) { + virNetworkRouteDefPtr route = net->routes[j]; + + if (virNetDevAddRoute(net->ifname, + virNetworkRouteDefGetAddress(route), + virNetworkRouteDefGetPrefix(route), + virNetworkRouteDefGetGateway(route), + virNetworkRouteDefGetMetric(route)) < 0) { + goto cleanup; + } + } + } + + if (net->script) { + if (qemuExecuteEthernetScript(net->ifname, net->script) < 0) + goto cleanup; + } + + if (cfg->macFilter && + ebtablesAddForwardAllowIn(driver->ebtables, + net->ifname, + &net->mac) < 0) + goto cleanup; + + if (net->filter && + virDomainConfNWFilterInstantiate(def->uuid, net) < 0) { + goto cleanup; + } + + ret = 0; + + cleanup: + if (ret < 0) { + size_t i; + for (i = 0; i < tapfdSize && tapfd[i] >= 0; i++) + VIR_FORCE_CLOSE(tapfd[i]); + if (template_ifname) + VIR_FREE(net->ifname); + } + virObjectUnref(cfg); + + return ret; +} + + /* qemuInterfaceBridgeConnect: * @def: the definition of the VM * @driver: qemu driver data diff --git a/src/qemu/qemu_interface.h b/src/qemu/qemu_interface.h index bf4dedf84ac9..66ead2aef81c 100644 --- a/src/qemu/qemu_interface.h +++ b/src/qemu/qemu_interface.h @@ -40,6 +40,11 @@ int qemuInterfaceDirectConnect(virDomainDefPtr def, int *tapfd, size_t tapfdSize, virNetDevVPortProfileOp vmop); +int qemuInterfaceEthernetConnect(virDomainDefPtr def, + virQEMUDriverPtr driver, + virDomainNetDefPtr net, + int *tapfd, + size_t tapfdSize); int qemuInterfaceBridgeConnect(virDomainDefPtr def, virQEMUDriverPtr driver, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 14e4629e66f9..ef82581b130c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -5699,6 +5699,12 @@ void qemuProcessStop(virQEMUDriverPtr driver, virDomainNetGetActualVirtPortProfile(net), cfg->stateDir)); break; + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (net->ifname) { + ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap)); + VIR_FREE(net->ifname); + } + break; case VIR_DOMAIN_NET_TYPE_BRIDGE: case VIR_DOMAIN_NET_TYPE_NETWORK: #ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP -- 2.7.3

On Fri, Mar 18, 2016 at 10:19:41PM +0000, Vasiliy Tolstov wrote:
If a user specify network type ethernet, then create it via libvirt and run script if it provided. After this commit user does not need to run external script to create tap device or add root permissions to qemu process.
Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- src/qemu/qemu_command.c | 38 ++++++----- src/qemu/qemu_hotplug.c | 36 +++++++++-- src/qemu/qemu_interface.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_interface.h | 5 ++ src/qemu/qemu_process.c | 6 ++ 5 files changed, 221 insertions(+), 22 deletions(-)
diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c index 08af13358113..bab0ff5b2006 100644 --- a/src/qemu/qemu_interface.c +++ b/src/qemu/qemu_interface.c +int +qemuInterfaceEthernetConnect(virDomainDefPtr def, + virQEMUDriverPtr driver, + virDomainNetDefPtr net, + int *tapfd, + size_t tapfdSize) +{ + virMacAddr tapmac; + size_t j; + int ret = -1; + unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP; + bool template_ifname = false; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + const char *tunpath = "/dev/net/tun"; + + if (net->backend.tap) { + tunpath = net->backend.tap; + if (!(virQEMUDriverIsPrivileged(driver))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cannot use custom tap device in session mode")); + goto cleanup; + } + } + + if (!net->ifname || + STRPREFIX(net->ifname, VIR_NET_GENERATED_PREFIX) || + strchr(net->ifname, '%')) { + VIR_FREE(net->ifname); + if (VIR_STRDUP(net->ifname, VIR_NET_GENERATED_PREFIX "%d") < 0) + goto cleanup; + /* avoid exposing vnet%d in getXMLDesc or error outputs */ + template_ifname = true; + } + + if (net->model && STREQ(net->model, "virtio")) + tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR; + + if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, + tap_create_flags) < 0) { + virDomainAuditNetDevice(def, net, tunpath, false); + goto cleanup; + } + + virDomainAuditNetDevice(def, net, tunpath, true); + virMacAddrSet(&tapmac, &net->mac); + tapmac.addr[0] = 0xFE; + + if (virNetDevSetMAC(net->ifname, &tapmac) < 0) + goto cleanup; + + for (j = 0; j < net->nips; j++) { + virDomainNetIpDefPtr ip = net->ips[j]; + unsigned int prefix = (ip->prefix > 0) ? ip->prefix : + VIR_SOCKET_ADDR_DEFAULT_PREFIX; + char *ipStr = virSocketAddrFormat(&ip->address); + + VIR_DEBUG("Adding IP address '%s/%u' to '%s'", + ipStr, ip->prefix, net->ifname); + + if (virNetDevSetIPAddress(net->ifname, &ip->address, &ip->peer, prefix) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, + _("Failed to set IP address '%s' on %s"), + ipStr, net->ifname); + VIR_FREE(ipStr); + goto cleanup; + } + VIR_FREE(ipStr); + } + + if (net->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_UP || + net->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DEFAULT) { + if (virNetDevSetOnline(net->ifname, true) < 0) + goto cleanup; + + /* Set the routes */ + for (j = 0; j < net->nroutes; j++) { + virNetworkRouteDefPtr route = net->routes[j]; + + if (virNetDevAddRoute(net->ifname, + virNetworkRouteDefGetAddress(route), + virNetworkRouteDefGetPrefix(route), + virNetworkRouteDefGetGateway(route), + virNetworkRouteDefGetMetric(route)) < 0) { + goto cleanup; + } + } + }
This is still doing alot more work than QEMU would do if it crated the TAP device. In particular the adding of IP addresses & routes is not something QEMU would do - this is left for the specified network script todo. We can have a discussion about whether to support setting of routes and IP's on this device, vs inventing a new type of device for this purpose, but that should be in a separate patch. RAther than have you re-post again, I'll commit just this patch with this IP & route stuff removed now. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

2016-03-23 14:34 GMT+03:00 Daniel P. Berrange <berrange@redhat.com>:
RAther than have you re-post again, I'll commit just this patch with this IP & route stuff removed now.
Ok, no problem with this. If you apply only part, that creates tap devices, this is good start for me. -- Vasiliy Tolstov, e-mail: v.tolstov@selfip.ru

On Wed, Mar 23, 2016 at 02:39:34PM +0300, Vasiliy Tolstov wrote:
2016-03-23 14:34 GMT+03:00 Daniel P. Berrange <berrange@redhat.com>:
RAther than have you re-post again, I'll commit just this patch with this IP & route stuff removed now.
Ok, no problem with this. If you apply only part, that creates tap devices, this is good start for me.
Ok it is merged - just resubmit your code to add IP & route stuff as a separate patch now. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 23.03.2016 13:08, Daniel P. Berrange wrote:
On Wed, Mar 23, 2016 at 02:39:34PM +0300, Vasiliy Tolstov wrote:
2016-03-23 14:34 GMT+03:00 Daniel P. Berrange <berrange@redhat.com>:
RAther than have you re-post again, I'll commit just this patch with this IP & route stuff removed now.
Ok, no problem with this. If you apply only part, that creates tap devices, this is good start for me.
Ok it is merged - just resubmit your code to add IP & route stuff as a separate patch now.
Thing is, this breaks the build. 'make check' is failing now as it really tries to create a tap device - we have used 'ethernet' type of interface in our tests to avoid creating any device on the host. Mocking all of the virNetDev* is going to be verbose. But I'm already working on it. Michal

On Wed, Mar 23, 2016 at 03:22:35PM +0100, Michal Privoznik wrote:
On 23.03.2016 13:08, Daniel P. Berrange wrote:
On Wed, Mar 23, 2016 at 02:39:34PM +0300, Vasiliy Tolstov wrote:
2016-03-23 14:34 GMT+03:00 Daniel P. Berrange <berrange@redhat.com>:
RAther than have you re-post again, I'll commit just this patch with this IP & route stuff removed now.
Ok, no problem with this. If you apply only part, that creates tap devices, this is good start for me.
Ok it is merged - just resubmit your code to add IP & route stuff as a separate patch now.
Thing is, this breaks the build. 'make check' is failing now as it really tries to create a tap device - we have used 'ethernet' type of interface in our tests to avoid creating any device on the host. Mocking all of the virNetDev* is going to be verbose. But I'm already working on it.
Oh damn, sorry I missed that - I mistakenly thought we skipped ethernet devices in the tests, the same way we skip bridge devices and guess I only ran syntax-check rather than check. We could just quickly disable those ethernet tests if virNetDev mocking is going to take some time. It would let us test type=bridge too once we do it. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

this allows user to specify destination address for ip like tunnel Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- docs/schemas/interface.rng | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/schemas/interface.rng b/docs/schemas/interface.rng index 052703ce84aa..b81c38b9d07c 100644 --- a/docs/schemas/interface.rng +++ b/docs/schemas/interface.rng @@ -332,6 +332,9 @@ <optional> <attribute name="prefix"><ref name="ipv4Prefix"/></attribute> </optional> + <optional> + <attribute name="peer"><ref name="ipv4Addr"/></attribute> + </optional> </element> </zeroOrMore> <optional> @@ -361,6 +364,9 @@ <optional> <attribute name="prefix"><ref name="ipv6Prefix"/></attribute> </optional> + <optional> + <attribute name="peer"><ref name="ipv6Addr"/></attribute> + </optional> </element> </zeroOrMore> <optional> -- 2.7.3

On Fri, Mar 18, 2016 at 10:19:42PM +0000, Vasiliy Tolstov wrote:
this allows user to specify destination address for ip like tunnel
Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- docs/schemas/interface.rng | 6 ++++++ 1 file changed, 6 insertions(+)
The RNG schema changes are usually associated with the parser changes. IOW, this patch should contain these changes src/conf/domain_conf.c | 14 ++++++++++++- src/conf/domain_conf.h | 1 + src/conf/network_conf.c | 26 +++++++++++++++++++++++- src/conf/network_conf.h | 1 + from your 3rd patch. The 3rd patch should just contain the code for doing the host OS setup / integration.
diff --git a/docs/schemas/interface.rng b/docs/schemas/interface.rng index 052703ce84aa..b81c38b9d07c 100644 --- a/docs/schemas/interface.rng +++ b/docs/schemas/interface.rng @@ -332,6 +332,9 @@ <optional> <attribute name="prefix"><ref name="ipv4Prefix"/></attribute> </optional> + <optional> + <attribute name="peer"><ref name="ipv4Addr"/></attribute> + </optional> </element> </zeroOrMore> <optional> @@ -361,6 +364,9 @@ <optional> <attribute name="prefix"><ref name="ipv6Prefix"/></attribute> </optional> + <optional> + <attribute name="peer"><ref name="ipv6Addr"/></attribute> + </optional> </element> </zeroOrMore> <optional>
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 03/23/2016 08:03 AM, Daniel P. Berrange wrote:
On Fri, Mar 18, 2016 at 10:19:42PM +0000, Vasiliy Tolstov wrote:
this allows user to specify destination address for ip like tunnel
Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- docs/schemas/interface.rng | 6 ++++++ 1 file changed, 6 insertions(+) The RNG schema changes are usually associated with the parser changes. IOW, this patch should contain these changes
src/conf/domain_conf.c | 14 ++++++++++++- src/conf/domain_conf.h | 1 + src/conf/network_conf.c | 26 +++++++++++++++++++++++- src/conf/network_conf.h | 1 +
from your 3rd patch. The 3rd patch should just contain the code for doing the host OS setup / integration.
Actually interface.rng is only for the virInterface APIs, so it shouldn't be touched at all for any of this. Any change to interface.rng would be paired with a change to interface_conf.c and to the netcf package that virInterface uses as its backend. The RNG for something related to an IP address in the libvirt virtual network xml would be in network.rng or networkcommon.rng, and that for an IP address in the domain would be in domaincommon.rng or networkcommon.rng.

Assign peer address if it provided Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru> --- include/libvirt/libvirt-domain.h | 1 + src/conf/domain_conf.c | 14 ++++++++++++- src/conf/domain_conf.h | 1 + src/conf/network_conf.c | 26 +++++++++++++++++++++++- src/conf/network_conf.h | 1 + src/lxc/lxc_container.c | 2 +- src/network/bridge_driver.c | 2 +- src/util/virnetdev.c | 43 +++++++++++++++++++++++++++------------- src/util/virnetdev.h | 1 + 9 files changed, 73 insertions(+), 18 deletions(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 4ac29cd78816..d2cd45eba253 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -3926,6 +3926,7 @@ typedef virDomainIPAddress *virDomainIPAddressPtr; struct _virDomainInterfaceIPAddress { int type; /* virIPAddrType */ char *addr; /* IP address */ + char *peer; /* IP peer */ unsigned int prefix; /* IP address prefix */ }; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 7d68096f0e55..fa62cda7a290 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -5725,7 +5725,7 @@ virDomainNetIpParseXML(xmlNodePtr node) unsigned int prefixValue = 0; char *familyStr = NULL; int family = AF_UNSPEC; - char *address = NULL; + char *address = NULL, *peer = NULL; if (!(prefixStr = virXMLPropString(node, "prefix")) || (virStrToLong_ui(prefixStr, NULL, 10, &prefixValue) < 0)) { @@ -5739,6 +5739,9 @@ virDomainNetIpParseXML(xmlNodePtr node) goto cleanup; } + if ((peer = virXMLPropString(node, "peer")) == NULL) + VIR_DEBUG("Peer is empty"); + familyStr = virXMLPropString(node, "family"); if (familyStr && STREQ(familyStr, "ipv4")) family = AF_INET; @@ -5756,6 +5759,14 @@ virDomainNetIpParseXML(xmlNodePtr node) address); goto cleanup; } + + if ((peer != NULL) && (virSocketAddrParse(&ip->peer, peer, family) < 0)) { + virReportError(VIR_ERR_INVALID_ARG, + _("Failed to parse IP address: '%s'"), + peer); + goto cleanup; + } + ip->prefix = prefixValue; ret = ip; @@ -5765,6 +5776,7 @@ virDomainNetIpParseXML(xmlNodePtr node) VIR_FREE(prefixStr); VIR_FREE(familyStr); VIR_FREE(address); + VIR_FREE(peer); VIR_FREE(ip); return ret; } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 85c4f554a2e2..0feff47ac99e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -513,6 +513,7 @@ typedef struct _virDomainNetIpDef virDomainNetIpDef; typedef virDomainNetIpDef *virDomainNetIpDefPtr; struct _virDomainNetIpDef { virSocketAddr address; /* ipv4 or ipv6 address */ + virSocketAddr peer; /* ipv4 or ipv6 address of peer */ unsigned int prefix; /* number of 1 bits in the net mask */ }; diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c index 4fb2e2a75b9d..1a0cf7fdcdd2 100644 --- a/src/conf/network_conf.c +++ b/src/conf/network_conf.c @@ -1456,7 +1456,7 @@ virNetworkIPDefParseXML(const char *networkName, */ xmlNodePtr cur, save; - char *address = NULL, *netmask = NULL; + char *address = NULL, *netmask = NULL, *peer = NULL; unsigned long prefix = 0; int prefixRc; int result = -1; @@ -1502,6 +1502,14 @@ virNetworkIPDefParseXML(const char *networkName, else def->prefix = prefix; + peer = virXPathString("string(./@peer)", ctxt); + if (peer && (virSocketAddrParse(&def->peer, peer, AF_UNSPEC) < 0)) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid peer '%s' in network '%s'"), + peer, networkName); + goto cleanup; + } + /* validate address, etc. for each family */ if ((def->family == NULL) || (STREQ(def->family, "ipv4"))) { if (!(VIR_SOCKET_ADDR_IS_FAMILY(&def->address, AF_INET) || @@ -1531,6 +1539,14 @@ virNetworkIPDefParseXML(const char *networkName, prefix, networkName); goto cleanup; } + if (peer) { + if (!VIR_SOCKET_ADDR_IS_FAMILY(&def->peer, AF_INET)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Family 'ipv4' specified for non-IPv4 address '%s' in network '%s'"), + peer, networkName); + goto cleanup; + } + } } else if (STREQ(def->family, "ipv6")) { if (!VIR_SOCKET_ADDR_IS_FAMILY(&def->address, AF_INET6)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -1550,6 +1566,14 @@ virNetworkIPDefParseXML(const char *networkName, prefix, networkName); goto cleanup; } + if (peer) { + if (!VIR_SOCKET_ADDR_IS_FAMILY(&def->peer, AF_INET6)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Family 'ipv6' specified for non-IPv6 address '%s' in network '%s'"), + peer, networkName); + goto cleanup; + } + } } else { virReportError(VIR_ERR_XML_ERROR, _("Unrecognized family '%s' in network '%s'"), diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h index b72257b970a8..9b8e807fda3c 100644 --- a/src/conf/network_conf.h +++ b/src/conf/network_conf.h @@ -152,6 +152,7 @@ struct _virNetworkIpDef { */ unsigned int prefix; /* ipv6 - only prefix allowed */ virSocketAddr netmask; /* ipv4 - either netmask or prefix specified */ + virSocketAddr peer; /* ipv4 or ipv6 peer address */ size_t nranges; /* Zero or more dhcp ranges */ virSocketAddrRangePtr ranges; diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 348bbfbc01fc..a909b660b3ae 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -520,7 +520,7 @@ static int lxcContainerRenameAndEnableInterfaces(virDomainDefPtr vmDef, VIR_DEBUG("Adding IP address '%s/%u' to '%s'", ipStr, ip->prefix, newname); - if (virNetDevSetIPAddress(newname, &ip->address, prefix) < 0) { + if (virNetDevSetIPAddress(newname, &ip->address, NULL, prefix) < 0) { virReportError(VIR_ERR_SYSTEM_ERROR, _("Failed to set IP address '%s' on %s"), ipStr, newname); diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index a09a7e474fc5..b3dbea13b58f 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -1970,7 +1970,7 @@ networkAddAddrToBridge(virNetworkObjPtr network, } if (virNetDevSetIPAddress(network->def->bridge, - &ipdef->address, prefix) < 0) + &ipdef->address, NULL, prefix) < 0) return -1; return 0; diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index aed50f546263..d171f34e6058 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -1039,21 +1039,28 @@ virNetDevCreateNetlinkAddressMessage(int messageType, const char *ifname, virSocketAddr *addr, unsigned int prefix, - virSocketAddr *broadcast) + virSocketAddr *broadcast, + virSocketAddr *peer) { struct nl_msg *nlmsg = NULL; struct ifaddrmsg ifa; unsigned int ifindex; void *addrData = NULL; + void *peerData = NULL; void *broadcastData = NULL; size_t addrDataLen; if (virNetDevGetIPAddressBinary(addr, &addrData, &addrDataLen) < 0) return NULL; - if (broadcast && virNetDevGetIPAddressBinary(broadcast, &broadcastData, - &addrDataLen) < 0) - return NULL; + if (peer && VIR_SOCKET_ADDR_VALID(peer)) { + if (virNetDevGetIPAddressBinary(peer, &peerData, &addrDataLen) < 0) + return NULL; + } else if (broadcast) { + if (virNetDevGetIPAddressBinary(broadcast, &broadcastData, + &addrDataLen) < 0) + return NULL; + } /* Get the interface index */ if ((ifindex = if_nametoindex(ifname)) == 0) @@ -1078,12 +1085,15 @@ virNetDevCreateNetlinkAddressMessage(int messageType, if (nla_put(nlmsg, IFA_LOCAL, addrDataLen, addrData) < 0) goto buffer_too_small; - if (nla_put(nlmsg, IFA_ADDRESS, addrDataLen, addrData) < 0) - goto buffer_too_small; + if (peerData) { + if (nla_put(nlmsg, IFA_ADDRESS, addrDataLen, peerData) < 0) + goto buffer_too_small; + } - if (broadcastData && - nla_put(nlmsg, IFA_BROADCAST, addrDataLen, broadcastData) < 0) - goto buffer_too_small; + if (broadcastData) { + if (nla_put(nlmsg, IFA_BROADCAST, addrDataLen, broadcastData) < 0) + goto buffer_too_small; + } return nlmsg; @@ -1098,6 +1108,7 @@ virNetDevCreateNetlinkAddressMessage(int messageType, * virNetDevSetIPAddress: * @ifname: the interface name * @addr: the IP address (IPv4 or IPv6) + * @peer: The IP address of peer (IPv4 or IPv6) * @prefix: number of 1 bits in the netmask * * Add an IP address to an interface. This function *does not* remove @@ -1108,6 +1119,7 @@ virNetDevCreateNetlinkAddressMessage(int messageType, */ int virNetDevSetIPAddress(const char *ifname, virSocketAddr *addr, + virSocketAddr *peer, unsigned int prefix) { virSocketAddr *broadcast = NULL; @@ -1116,9 +1128,8 @@ int virNetDevSetIPAddress(const char *ifname, struct nlmsghdr *resp = NULL; unsigned int recvbuflen; - /* The caller needs to provide a correct address */ - if (VIR_SOCKET_ADDR_FAMILY(addr) == AF_INET) { + if (VIR_SOCKET_ADDR_FAMILY(addr) == AF_INET && !VIR_SOCKET_ADDR_VALID(peer)) { /* compute a broadcast address if this is IPv4 */ if (VIR_ALLOC(broadcast) < 0) return -1; @@ -1129,7 +1140,7 @@ int virNetDevSetIPAddress(const char *ifname, if (!(nlmsg = virNetDevCreateNetlinkAddressMessage(RTM_NEWADDR, ifname, addr, prefix, - broadcast))) + broadcast, peer))) goto cleanup; if (virNetlinkCommand(nlmsg, &resp, &recvbuflen, 0, 0, @@ -1288,7 +1299,7 @@ int virNetDevClearIPAddress(const char *ifname, if (!(nlmsg = virNetDevCreateNetlinkAddressMessage(RTM_DELADDR, ifname, addr, prefix, - NULL))) + NULL, NULL))) goto cleanup; if (virNetlinkCommand(nlmsg, &resp, &recvbuflen, 0, 0, @@ -1423,10 +1434,11 @@ virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count) int virNetDevSetIPAddress(const char *ifname, virSocketAddr *addr, + virSocketAddr *peer, unsigned int prefix) { virCommandPtr cmd = NULL; - char *addrstr = NULL, *bcaststr = NULL; + char *addrstr = NULL, *bcaststr = NULL, *peerstr = NULL; virSocketAddr broadcast; int ret = -1; @@ -1453,6 +1465,8 @@ int virNetDevSetIPAddress(const char *ifname, cmd = virCommandNew(IP_PATH); virCommandAddArgList(cmd, "addr", "add", NULL); virCommandAddArgFormat(cmd, "%s/%u", addrstr, prefix); + if (peerstr) + virCommandAddArgList(cmd, "peer", peerstr, NULL); if (bcaststr) virCommandAddArgList(cmd, "broadcast", bcaststr, NULL); virCommandAddArgList(cmd, "dev", ifname, NULL); @@ -1465,6 +1479,7 @@ int virNetDevSetIPAddress(const char *ifname, cleanup: VIR_FREE(addrstr); VIR_FREE(bcaststr); + VIR_FREE(peerstr); virCommandFree(cmd); return ret; } diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index e7719d58a410..240fff774d30 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -90,6 +90,7 @@ int virNetDevGetOnline(const char *ifname, int virNetDevSetIPAddress(const char *ifname, virSocketAddr *addr, + virSocketAddr *peer, unsigned int prefix) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; int virNetDevAddRoute(const char *ifname, -- 2.7.3

On Fri, Mar 18, 2016 at 10:19:43PM +0000, Vasiliy Tolstov wrote:
Assign peer address if it provided
[snip]
@@ -1039,21 +1039,28 @@ virNetDevCreateNetlinkAddressMessage(int messageType, const char *ifname, virSocketAddr *addr, unsigned int prefix, - virSocketAddr *broadcast) + virSocketAddr *broadcast, + virSocketAddr *peer) { struct nl_msg *nlmsg = NULL; struct ifaddrmsg ifa; unsigned int ifindex; void *addrData = NULL; + void *peerData = NULL; void *broadcastData = NULL; size_t addrDataLen;
if (virNetDevGetIPAddressBinary(addr, &addrData, &addrDataLen) < 0) return NULL;
- if (broadcast && virNetDevGetIPAddressBinary(broadcast, &broadcastData, - &addrDataLen) < 0) - return NULL; + if (peer && VIR_SOCKET_ADDR_VALID(peer)) { + if (virNetDevGetIPAddressBinary(peer, &peerData, &addrDataLen) < 0) + return NULL; + } else if (broadcast) { + if (virNetDevGetIPAddressBinary(broadcast, &broadcastData, + &addrDataLen) < 0) + return NULL; + }
/* Get the interface index */ if ((ifindex = if_nametoindex(ifname)) == 0) @@ -1078,12 +1085,15 @@ virNetDevCreateNetlinkAddressMessage(int messageType, if (nla_put(nlmsg, IFA_LOCAL, addrDataLen, addrData) < 0) goto buffer_too_small;
- if (nla_put(nlmsg, IFA_ADDRESS, addrDataLen, addrData) < 0) - goto buffer_too_small; + if (peerData) { + if (nla_put(nlmsg, IFA_ADDRESS, addrDataLen, peerData) < 0) + goto buffer_too_small; + }
- if (broadcastData && - nla_put(nlmsg, IFA_BROADCAST, addrDataLen, broadcastData) < 0) - goto buffer_too_small; + if (broadcastData) { + if (nla_put(nlmsg, IFA_BROADCAST, addrDataLen, broadcastData) < 0) + goto buffer_too_small; + }
return nlmsg;
Ok, I took me a while to understand what the point of this is. I eventually found this comment in /usr/include/linux/if_addr.h * IFA_ADDRESS is prefix address, rather than local interface address. * It makes no difference for normally configured broadcast interfaces, * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute. Likewise in ip-address(8) manpage peer ADDRESS the address of the remote endpoint for pointopoint inter‐ faces. Again, the ADDRESS may be followed by a slash and a decimal number, encoding the network prefix length. If a peer address is specified, the local address cannot have a prefix length. The network prefix is associated with the peer rather than with the local address. So, if we accept ip, broader & route info, it makes sense that we also accept a peer address. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

2016-03-23 14:52 GMT+03:00 Daniel P. Berrange <berrange@redhat.com>:
Ok, I took me a while to understand what the point of this is. I eventually found this comment in /usr/include/linux/if_addr.h
* IFA_ADDRESS is prefix address, rather than local interface address. * It makes no difference for normally configured broadcast interfaces, * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute.
Likewise in ip-address(8) manpage
peer ADDRESS the address of the remote endpoint for pointopoint inter‐ faces. Again, the ADDRESS may be followed by a slash and a decimal number, encoding the network prefix length. If a peer address is specified, the local address cannot have a prefix length. The network prefix is associated with the peer rather than with the local address.
So, if we accept ip, broader & route info, it makes sense that we also accept a peer address.
Yes, and broadcast and peer (or remote) mutually exclusive. -- Vasiliy Tolstov, e-mail: v.tolstov@selfip.ru

On Fri, Mar 18, 2016 at 10:19:40PM +0000, Vasiliy Tolstov wrote:
By default libvirt does not manages ethernet devices, bypass all control to qemu. This series enable creating tap devices via libvirt, assign mac, ip address and routes. Also when use specif linkstate down in xml, phisicaly down tap device. This is very useful in case of using bird, quagga or somethink like this to forward traffic to vm.
Ok, so there's two parts for this series really 1. Moving creation of TAP device out of QEMU into libvirt for type=ethernet. This avoids need for QEMU to run with root privileges which is awesome 2. Extending semantics of type=ethernet to allow it support setting of IP address details and routes. My big design question is whether we should be fitting this into the type=ethernet NIC, or whether we should be inventing a new type=routed NIC. Historically we deprecated use of type=ethernet for two reasons, first it required root privileges, second it ran an external script which is a black box from libvirt POV. We've now solved the root privileges problem. The external script is opt in, so we could consider type=ethernet fully supported and merely deprecated/discourage use of the script instead. Functionally if we invented type=routed, it would be identical to type=ethernet, except that it set the IP Address + route info, and did not allow use of an external script. On that basis I'm thinking that this probably a waste of time to invent a new NIC, and instead we should just go down the route you suggest of letting us assign IP address, routes, etc for type=ethernet
Main question about peer address assign. How to deal with absent peer address? Now i'm simplify check that peer address is valid ip address, but may be this is wrong.
IIUC, if peer address is not specified in the XML, then we should just fallback to current behaviour of using the normal IP address. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (4)
-
Daniel P. Berrange
-
Laine Stump
-
Michal Privoznik
-
Vasiliy Tolstov