[libvirt] [PATCH 0/9] Let libvirt manage a bridge's FDB

The idea behind these patches is the following: 1) most virtual machines only have a single MAC address behind each interface, and that MAC address is known by libvirt. 2) If we (i.e. libvirt) manually add an entry to the bridge's forwarding database (fdb) for the MAC address associated with a port on the bridge, we can turn off learning and unicast_flooding for that port. 3) kernels starting with 3.15 (and actually working correctly starting in kernel 3.17) will notice that all of a bridge's ports have flood and learning turned off, and in that case will turn off promiscuous mode on all ports. If all but one of the ports have flood/learning turned off, then promiscuous will be turned off on that port (and left on for all the other ports) 4) When (4) can be done, there is a measurable performance advantage. It can also help security, as it will prevent a guest from doing anything useful if it changes its MAC address. Notes: A) Patches 1-7 are the only ones that I plan to push now, 8 & 9 are just to simplify tangentially-related code, and I've found a problem with these during update of a live system, so I won't be pushing them. You can/should ignore them for now. B) I don't like the name "promiscLinks", and will probably change it (maybe to "fdb='managed|auto'), but want to get the other aspects of these patches reviewed. C) These only work with a fixed MAC address, and no vlan tags set in the guest. Support for both of those will be coming. Laine Stump (9): util: new functions for setting bridge and bridge port attributes util: functions to manage bridge fdb (forwarding database) conf: new network bridge device attribute promiscLinks network: save bridge name in ActualNetDef when actualType==network too network: store network promiscLinks setting in NetDef actual object network: setup bridge devices for promiscLinks='no' qemu: setup tap devices for promiscLinks='no' qemu: always use virDomainNetGetActualBridgeName to get interface's bridge lxc: always use virDomainNetGetActualBridgeName to get interface's bridge docs/formatnetwork.html.in | 36 +- docs/schemas/network.rng | 5 + src/conf/domain_conf.c | 129 +++++--- src/conf/domain_conf.h | 2 + src/conf/network_conf.c | 47 ++- src/conf/network_conf.h | 1 + src/libvirt_private.syms | 9 + src/lxc/lxc_driver.c | 32 +- src/lxc/lxc_process.c | 32 +- src/network/bridge_driver.c | 66 ++++ src/qemu/qemu_command.c | 51 ++- src/qemu/qemu_hotplug.c | 60 +--- src/util/virnetdevbridge.c | 364 ++++++++++++++++++++- src/util/virnetdevbridge.h | 44 ++- tests/networkxml2xmlin/host-bridge-no-flood.xml | 6 + .../nat-network-explicit-flood.xml | 21 ++ tests/networkxml2xmlout/host-bridge-no-flood.xml | 6 + .../nat-network-explicit-flood.xml | 23 ++ tests/networkxml2xmltest.c | 2 + 19 files changed, 726 insertions(+), 210 deletions(-) create mode 100644 tests/networkxml2xmlin/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlin/nat-network-explicit-flood.xml create mode 100644 tests/networkxml2xmlout/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlout/nat-network-explicit-flood.xml -- 1.9.3

These functions all set/get items in the sysfs for a bridge device. --- src/libvirt_private.syms | 6 ++ src/util/virnetdevbridge.c | 236 ++++++++++++++++++++++++++++++++++++++++++++- src/util/virnetdevbridge.h | 28 +++++- 3 files changed, 268 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2647d36..6453826 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1662,9 +1662,15 @@ virNetDevBridgeCreate; virNetDevBridgeDelete; virNetDevBridgeGetSTP; virNetDevBridgeGetSTPDelay; +virNetDevBridgeGetVlanFiltering; +virNetDevBridgePortGetLearning; +virNetDevBridgePortGetUnicastFlood; +virNetDevBridgePortSetLearning; +virNetDevBridgePortSetUnicastFlood; virNetDevBridgeRemovePort; virNetDevBridgeSetSTP; virNetDevBridgeSetSTPDelay; +virNetDevBridgeSetVlanFiltering; # util/virnetdevmacvlan.h diff --git a/src/util/virnetdevbridge.c b/src/util/virnetdevbridge.c index 15434de..9be835c 100644 --- a/src/util/virnetdevbridge.c +++ b/src/util/virnetdevbridge.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2013 Red Hat, Inc. + * Copyright (C) 2007-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -217,8 +217,175 @@ static int virNetDevBridgeGet(const char *brname, VIR_FREE(path); return ret; } + #endif /* __linux__ */ +#if defined(__linux__) +static +int +virNetDevBridgePortSet(const char *brname, + const char *ifname, + const char *paramname, + unsigned long value) +{ + char *path = NULL; + char valuestr[INT_BUFSIZE_BOUND(value)]; + int ret = -1; + + snprintf(valuestr, sizeof(valuestr), "%lu", value); + + if (virAsprintf(&path, "%s/%s/brif/%s/%s", + SYSFS_NET_DIR, brname, ifname, paramname) < 0) + return -1; + + if (!virFileExists(path)) + errno = EINVAL; + else + ret = virFileWriteStr(path, valuestr, 0); + + if (ret < 0) { + virReportSystemError(errno, + _("Unable to set bridge %s port %s %s to %s"), + brname, ifname, paramname, valuestr); + } + + VIR_FREE(path); + return ret; +} + + +static +int +virNetDevBridgePortGet(const char *brname, + const char *ifname, + const char *paramname, + unsigned long *value) +{ + char *path = NULL; + char *valuestr = NULL; + int ret = -1; + + if (virAsprintf(&path, "%s/%s/brif/%s/%s", + SYSFS_NET_DIR, brname, ifname, paramname) < 0) + return -1; + + if (virFileReadAll(path, INT_BUFSIZE_BOUND(unsigned long), &valuestr) < 0) + goto cleanup; + + if (virStrToLong_ul(valuestr, NULL, 10, value) < 0) { + virReportSystemError(EINVAL, + _("Unable to get bridge %s port %s %s"), + brname, ifname, paramname); + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(path); + VIR_FREE(valuestr); + return ret; +} + + +int +virNetDevBridgePortGetLearning(const char *brname, + const char *ifname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgePortGet(brname, ifname, "learning", &value) < 0) + goto cleanup; + + *enable = !!value; + ret = 0; + cleanup: + return ret; +} + + +int +virNetDevBridgePortSetLearning(const char *brname, + const char *ifname, + bool enable) +{ + return virNetDevBridgePortSet(brname, ifname, "learning", enable ? 1 : 0); +} + + +int +virNetDevBridgePortGetUnicastFlood(const char *brname, + const char *ifname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgePortGet(brname, ifname, "unicast_flood", &value) < 0) + goto cleanup; + + *enable = !!value; + ret = 0; + cleanup: + return ret; +} + + +int +virNetDevBridgePortSetUnicastFlood(const char *brname, + const char *ifname, + bool enable) +{ + return virNetDevBridgePortSet(brname, ifname, "unicast_flood", enable ? 1 : 0); +} + + +#else +int +virNetDevBridgePortGetLearning(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge port learning on this platform")); + return -1; +} + + +int +virNetDevBridgePortSetLearning(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool enable) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge port learning on this platform")); + return -1; +} + + +int +virNetDevBridgePortGetUnicastFlood(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge port unicast_flood on this platform")); + return -1; +} + + +int +virNetDevBridgePortSetUnicastFlood(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge port unicast_flood on this platform")); + return -1; +} +#endif + /** * virNetDevBridgeCreate: @@ -682,3 +849,70 @@ int virNetDevBridgeGetSTP(const char *brname, return -1; } #endif + +#if defined(HAVE_STRUCT_IFREQ) && defined(__linux__) +/** + * virNetDevBridgeGetVlanFiltering: + * @brname: the bridge device name + * @enable: true or false + * + * Retrives the vlan_filtering setting for the bridge device @brname + * storing it in @enable. + * + * Returns 0 on success, -1 on error+ + */ +int +virNetDevBridgeGetVlanFiltering(const char *brname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgeGet(brname, "vlan_filtering", &value, -1, NULL) < 0) + goto cleanup; + + *enable = !!value; + ret = 0; + cleanup: + return ret; +} + + +/** + * virNetDevBridgeSetVlanFiltering: + * @brname: the bridge name + * @enable: true or false + * + * Set the bridge vlan_filtering mode + * + * Returns 0 in case of success or -1 on failure + */ + +int +virNetDevBridgeSetVlanFiltering(const char *brname, + bool enable) +{ + return virNetDevBridgeSet(brname, "vlan_filtering", enable ? 1 : 0, -1, NULL); +} + + +#else +int +virNetDevBridgeGetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge vlan_filtering on this platform")); + return -1; +} + + +int +virNetDevBridgeSetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, + bool enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge vlan_filtering on this platform")); + return -1; +} +#endif diff --git a/src/util/virnetdevbridge.h b/src/util/virnetdevbridge.h index 13776d2..8188a2f 100644 --- a/src/util/virnetdevbridge.h +++ b/src/util/virnetdevbridge.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2011 Red Hat, Inc. + * Copyright (C) 2007-2012, 2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -51,4 +51,30 @@ int virNetDevBridgeGetSTP(const char *brname, bool *enable) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgeSetVlanFiltering(const char *brname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgeGetVlanFiltering(const char *brname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + +int virNetDevBridgePortGetLearning(const char *brname, + const char *ifname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3) + ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortSetLearning(const char *brname, + const char *ifname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortGetUnicastFlood(const char *brname, + const char *ifname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3) + ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortSetUnicastFlood(const char *brname, + const char *ifname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + #endif /* __VIR_NETDEV_BRIDGE_H__ */ -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
These functions all set/get items in the sysfs for a bridge device.
Probably could have split up the "virNetDevBridgePort*" from the virNetDevBridgeSetVlanFiltering - if only to make it a bit clearer in the commit message that you're adding functions to manage? the BridgePort settings.
--- src/libvirt_private.syms | 6 ++ src/util/virnetdevbridge.c | 236 ++++++++++++++++++++++++++++++++++++++++++++- src/util/virnetdevbridge.h | 28 +++++- 3 files changed, 268 insertions(+), 2 deletions(-)
Couple of nits follow - not necessary for a v2. ACK w/ the changes John
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2647d36..6453826 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1662,9 +1662,15 @@ virNetDevBridgeCreate; virNetDevBridgeDelete; virNetDevBridgeGetSTP; virNetDevBridgeGetSTPDelay; +virNetDevBridgeGetVlanFiltering; +virNetDevBridgePortGetLearning; +virNetDevBridgePortGetUnicastFlood; +virNetDevBridgePortSetLearning; +virNetDevBridgePortSetUnicastFlood; virNetDevBridgeRemovePort; virNetDevBridgeSetSTP; virNetDevBridgeSetSTPDelay; +virNetDevBridgeSetVlanFiltering;
# util/virnetdevmacvlan.h diff --git a/src/util/virnetdevbridge.c b/src/util/virnetdevbridge.c index 15434de..9be835c 100644 --- a/src/util/virnetdevbridge.c +++ b/src/util/virnetdevbridge.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2013 Red Hat, Inc. + * Copyright (C) 2007-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -217,8 +217,175 @@ static int virNetDevBridgeGet(const char *brname, VIR_FREE(path); return ret; } +
Spurrious? Or evidence of moving things around late in self review?
#endif /* __linux__ */
+#if defined(__linux__) +static +int
place on same line, eg "static int" (vs. 2 lines)
+virNetDevBridgePortSet(const char *brname, + const char *ifname, + const char *paramname, + unsigned long value) +{ + char *path = NULL; + char valuestr[INT_BUFSIZE_BOUND(value)]; + int ret = -1; + + snprintf(valuestr, sizeof(valuestr), "%lu", value); + + if (virAsprintf(&path, "%s/%s/brif/%s/%s", + SYSFS_NET_DIR, brname, ifname, paramname) < 0) + return -1; + + if (!virFileExists(path)) + errno = EINVAL; + else + ret = virFileWriteStr(path, valuestr, 0); + + if (ret < 0) { + virReportSystemError(errno, + _("Unable to set bridge %s port %s %s to %s"), + brname, ifname, paramname, valuestr); + } + + VIR_FREE(path); + return ret; +} + + +static +int
ditto
+virNetDevBridgePortGet(const char *brname, + const char *ifname, + const char *paramname, + unsigned long *value) +{ + char *path = NULL; + char *valuestr = NULL; + int ret = -1; + + if (virAsprintf(&path, "%s/%s/brif/%s/%s", + SYSFS_NET_DIR, brname, ifname, paramname) < 0) + return -1; + + if (virFileReadAll(path, INT_BUFSIZE_BOUND(unsigned long), &valuestr) < 0) + goto cleanup; + + if (virStrToLong_ul(valuestr, NULL, 10, value) < 0) { + virReportSystemError(EINVAL, + _("Unable to get bridge %s port %s %s"), + brname, ifname, paramname); + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(path); + VIR_FREE(valuestr); + return ret; +} + + +int +virNetDevBridgePortGetLearning(const char *brname, + const char *ifname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgePortGet(brname, ifname, "learning", &value) < 0) + goto cleanup; + + *enable = !!value;
Every time I see one of these "!!"... I see virNetDevBridgeGetSTP() uses *enabled = val ? true : false; - that's at least easier to read for me. Your call on which way to go though.
+ ret = 0; + cleanup: + return ret; +} + + +int +virNetDevBridgePortSetLearning(const char *brname, + const char *ifname, + bool enable) +{ + return virNetDevBridgePortSet(brname, ifname, "learning", enable ? 1 : 0); +} + + +int +virNetDevBridgePortGetUnicastFlood(const char *brname, + const char *ifname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgePortGet(brname, ifname, "unicast_flood", &value) < 0) + goto cleanup; + + *enable = !!value; + ret = 0; + cleanup: + return ret; +} + + +int +virNetDevBridgePortSetUnicastFlood(const char *brname, + const char *ifname, + bool enable) +{ + return virNetDevBridgePortSet(brname, ifname, "unicast_flood", enable ? 1 : 0); +} + + +#else +int +virNetDevBridgePortGetLearning(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge port learning on this platform")); + return -1; +} + + +int +virNetDevBridgePortSetLearning(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool enable) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge port learning on this platform")); + return -1; +} + + +int +virNetDevBridgePortGetUnicastFlood(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge port unicast_flood on this platform")); + return -1; +} + + +int +virNetDevBridgePortSetUnicastFlood(const char *brname ATTRIBUTE_UNUSED, + const char *ifname ATTRIBUTE_UNUSED, + bool enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge port unicast_flood on this platform")); + return -1; +} +#endif +
/** * virNetDevBridgeCreate: @@ -682,3 +849,70 @@ int virNetDevBridgeGetSTP(const char *brname, return -1; } #endif + +#if defined(HAVE_STRUCT_IFREQ) && defined(__linux__) +/** + * virNetDevBridgeGetVlanFiltering: + * @brname: the bridge device name + * @enable: true or false + * + * Retrives the vlan_filtering setting for the bridge device @brname
s/Retrives/Retrieves/
+ * storing it in @enable. + * + * Returns 0 on success, -1 on error+
There's an extraneous "+" at the end ^
+ */ +int +virNetDevBridgeGetVlanFiltering(const char *brname, + bool *enable) +{ + int ret = -1; + unsigned long value; + + if (virNetDevBridgeGet(brname, "vlan_filtering", &value, -1, NULL) < 0) + goto cleanup; + + *enable = !!value; + ret = 0; + cleanup: + return ret; +} + + +/** + * virNetDevBridgeSetVlanFiltering: + * @brname: the bridge name + * @enable: true or false + * + * Set the bridge vlan_filtering mode + * + * Returns 0 in case of success or -1 on failure + */ + +int +virNetDevBridgeSetVlanFiltering(const char *brname, + bool enable) +{ + return virNetDevBridgeSet(brname, "vlan_filtering", enable ? 1 : 0, -1, NULL); +} + + +#else +int +virNetDevBridgeGetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, + bool *enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get bridge vlan_filtering on this platform")); + return -1; +} + + +int +virNetDevBridgeSetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, + bool enable ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to set bridge vlan_filtering on this platform")); + return -1; +} +#endif diff --git a/src/util/virnetdevbridge.h b/src/util/virnetdevbridge.h index 13776d2..8188a2f 100644 --- a/src/util/virnetdevbridge.h +++ b/src/util/virnetdevbridge.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2011 Red Hat, Inc. + * Copyright (C) 2007-2012, 2014 Red Hat, Inc.
I forget if we're just allowed to say 2007-2014...
* * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -51,4 +51,30 @@ int virNetDevBridgeGetSTP(const char *brname, bool *enable) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+int virNetDevBridgeSetVlanFiltering(const char *brname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgeGetVlanFiltering(const char *brname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + +int virNetDevBridgePortGetLearning(const char *brname, + const char *ifname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3) + ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortSetLearning(const char *brname, + const char *ifname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortGetUnicastFlood(const char *brname, + const char *ifname, + bool *enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3) + ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgePortSetUnicastFlood(const char *brname, + const char *ifname, + bool enable) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + #endif /* __VIR_NETDEV_BRIDGE_H__ */

On 11/24/2014 01:36 PM, John Ferlan wrote:
On 11/24/2014 12:48 PM, Laine Stump wrote:
These functions all set/get items in the sysfs for a bridge device.
Probably could have split up the "virNetDevBridgePort*" from the virNetDevBridgeSetVlanFiltering - if only to make it a bit clearer in the commit message that you're adding functions to manage? the BridgePort settings.
+ + *enable = !!value;
Every time I see one of these "!!"...
I see virNetDevBridgeGetSTP() uses *enabled = val ? true : false; - that's at least easier to read for me. Your call on which way to go though.
I'm the opposite color of bikeshed - 'val ? true : false' feels over-verbose, compared to the shorter '!!val'; and we already use the short form elsewhere in the code base so it is somewhat idiomatic. But like John, I don't care about the paint color enough to dictate which way you go.
+++ b/src/util/virnetdevbridge.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2011 Red Hat, Inc. + * Copyright (C) 2007-2012, 2014 Red Hat, Inc.
I forget if we're just allowed to say 2007-2014...
Generally, for Red Hat copyright lines, I only use a range if there is at least one @redhat.com authorship during each year of the range; alas, for this file, it was created during a split in 2011 (so earlier years come from its earlier history in other files) and 2 commits in 2012, but nothing in 2013. I don't think we are in any legal trouble by including 2013, but I wouldn't add it in my own workflow. Other contributors may have different rules for when they add copyright years; I treat those as something I will not modify, whether they are right or wrong (although I do try to point out suspicious use in reviews if I notice it). I'm also starting to be of the opinion that if anyone ever takes this to court, the git log will be of much more use than actual years listed in any given file :) -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

These two functions use netlink RTM_NEWNEIGH and RTM_DELNEIGH messages to add and delete entries from a bridge's fdb. The bridge itself is not referenced in the arguments to the functions, only the name of the device that is attached to the bridge (since a device can only be attached to one bridge at a time, and must be attached for this function to make sense, the kernel easily infers which bridge's fdb is being modified by looking at the device name/index). --- src/libvirt_private.syms | 2 + src/util/virnetdevbridge.c | 128 +++++++++++++++++++++++++++++++++++++++++++++ src/util/virnetdevbridge.h | 16 ++++++ 3 files changed, 146 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6453826..6b6c51b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1660,6 +1660,8 @@ virNetDevBandwidthUpdateRate; virNetDevBridgeAddPort; virNetDevBridgeCreate; virNetDevBridgeDelete; +virNetDevBridgeFDBAdd; +virNetDevBridgeFDBDel; virNetDevBridgeGetSTP; virNetDevBridgeGetSTPDelay; virNetDevBridgeGetVlanFiltering; diff --git a/src/util/virnetdevbridge.c b/src/util/virnetdevbridge.c index 9be835c..a38323b 100644 --- a/src/util/virnetdevbridge.c +++ b/src/util/virnetdevbridge.c @@ -37,6 +37,9 @@ #include <netinet/in.h> #ifdef __linux__ +# if defined(HAVE_LIBNL) +# include "virnetlink.h" +# endif # include <linux/sockios.h> # include <linux/param.h> /* HZ */ # if NETINET_LINUX_WORKAROUND @@ -915,4 +918,129 @@ virNetDevBridgeSetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, _("Unable to set bridge vlan_filtering on this platform")); return -1; } + #endif + + + +#if defined(__linux__) && defined(HAVE_LIBNL) +static int +virNetDevBridgeFDBAddDel(const virMacAddr *mac, const char *ifname, + unsigned int flags, bool isAdd) +{ + int ret = -1; + struct nlmsghdr *resp = NULL; + struct nlmsgerr *err; + unsigned int recvbuflen; + struct nl_msg *nl_msg; + struct ndmsg ndm = { .ndm_family = PF_BRIDGE, .ndm_state = NUD_NOARP }; + + if (virNetDevGetIndex(ifname, &ndm.ndm_ifindex) < 0) + return -1; + + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_ROUTER) + ndm.ndm_flags |= NTF_ROUTER; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_SELF) + ndm.ndm_flags |= NTF_SELF; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_MASTER) + ndm.ndm_flags |= NTF_MASTER; + /* default self (same as iproute2's bridge command */ + if (!(ndm.ndm_flags & (NTF_MASTER | NTF_SELF))) + ndm.ndm_flags |= NTF_SELF; + + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_PERMANENT) + ndm.ndm_state |= NUD_PERMANENT; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) + ndm.ndm_state |= NUD_REACHABLE; + /* default permanent, same as iproute2's bridge command */ + if (!(ndm.ndm_state & (NUD_PERMANENT | NUD_REACHABLE))) + ndm.ndm_state |= NUD_PERMANENT; + + nl_msg = nlmsg_alloc_simple(isAdd ? RTM_NEWNEIGH : RTM_DELNEIGH, + NLM_F_REQUEST | + (isAdd ? (NLM_F_CREATE | NLM_F_EXCL) : 0)); + if (!nl_msg) { + virReportOOMError(); + return -1; + } + + if (nlmsg_append(nl_msg, &ndm, sizeof(ndm), NLMSG_ALIGNTO) < 0) + goto buffer_too_small; + if (nla_put(nl_msg, NDA_LLADDR, VIR_MAC_BUFLEN, mac) < 0) + goto buffer_too_small; + + /* NB: this message can also accept a Destination IP, a port, a + * vlan tag, and a via (see iproute2/bridge/fdb.c:fdb_modify()), + * but those aren't required for our application + */ + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, + NETLINK_ROUTE, 0) < 0) { + goto cleanup; + } + if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL) + goto malformed_resp; + + switch (resp->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(resp); + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + goto malformed_resp; + if (err->error) { + virReportSystemError(-err->error, + _("error adding fdb entry for %s"), ifname); + goto cleanup; + } + break; + case NLMSG_DONE: + break; + + default: + goto malformed_resp; + } + + ret = 0; + cleanup: + nlmsg_free(nl_msg); + VIR_FREE(resp); + return ret; + + malformed_resp: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + + buffer_too_small: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; +} + + +#else +static int +virNetDevBridgeFDBAddDel(const virMacAddr *mac, const char *ifname, + unsigned int flags, bool isAdd) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to add/delete fdb entries on this platform")); + return -1; +} + + +#endif + +int +virNetDevBridgeFDBAdd(const virMacAddr *mac, const char *ifname, + unsigned int flags) +{ + return virNetDevBridgeFDBAddDel(mac, ifname, flags, true); +} + + +int +virNetDevBridgeFDBDel(const virMacAddr *mac, const char *ifname, + unsigned int flags) +{ + return virNetDevBridgeFDBAddDel(mac, ifname, flags, false); +} diff --git a/src/util/virnetdevbridge.h b/src/util/virnetdevbridge.h index 8188a2f..da116b9 100644 --- a/src/util/virnetdevbridge.h +++ b/src/util/virnetdevbridge.h @@ -24,6 +24,7 @@ # define __VIR_NETDEV_BRIDGE_H__ # include "internal.h" +# include "virmacaddr.h" int virNetDevBridgeCreate(const char *brname) ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; @@ -77,4 +78,19 @@ int virNetDevBridgePortSetUnicastFlood(const char *brname, bool enable) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +enum { + VIR_NETDEVBRIDGE_FDB_FLAG_ROUTER = (1 << 0), + VIR_NETDEVBRIDGE_FDB_FLAG_SELF = (1 << 1), + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER = (1 << 2), + + VIR_NETDEVBRIDGE_FDB_FLAG_PERMANENT = (1 << 3), + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP = (1 << 4), +}; + +int virNetDevBridgeFDBAdd(const virMacAddr *mac, const char *ifname, + unsigned int flags) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgeFDBDel(const virMacAddr *mac, const char *ifname, + unsigned int flags) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; #endif /* __VIR_NETDEV_BRIDGE_H__ */ -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
These two functions use netlink RTM_NEWNEIGH and RTM_DELNEIGH messages to add and delete entries from a bridge's fdb. The bridge itself is not referenced in the arguments to the functions, only the name of the device that is attached to the bridge (since a device can only be attached to one bridge at a time, and must be attached for this function to make sense, the kernel easily infers which bridge's fdb is being modified by looking at the device name/index).
Perhaps this hunk of comment can be copied into the static function as well...
--- src/libvirt_private.syms | 2 + src/util/virnetdevbridge.c | 128 +++++++++++++++++++++++++++++++++++++++++++++ src/util/virnetdevbridge.h | 16 ++++++ 3 files changed, 146 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6453826..6b6c51b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1660,6 +1660,8 @@ virNetDevBandwidthUpdateRate; virNetDevBridgeAddPort; virNetDevBridgeCreate; virNetDevBridgeDelete; +virNetDevBridgeFDBAdd; +virNetDevBridgeFDBDel; virNetDevBridgeGetSTP; virNetDevBridgeGetSTPDelay; virNetDevBridgeGetVlanFiltering; diff --git a/src/util/virnetdevbridge.c b/src/util/virnetdevbridge.c index 9be835c..a38323b 100644 --- a/src/util/virnetdevbridge.c +++ b/src/util/virnetdevbridge.c @@ -37,6 +37,9 @@ #include <netinet/in.h>
#ifdef __linux__ +# if defined(HAVE_LIBNL) +# include "virnetlink.h" +# endif # include <linux/sockios.h> # include <linux/param.h> /* HZ */ # if NETINET_LINUX_WORKAROUND @@ -915,4 +918,129 @@ virNetDevBridgeSetVlanFiltering(const char *brname ATTRIBUTE_UNUSED, _("Unable to set bridge vlan_filtering on this platform")); return -1; } +
Spurrious?
#endif + + +
Three lines... Just two usually...
+#if defined(__linux__) && defined(HAVE_LIBNL) +static int +virNetDevBridgeFDBAddDel(const virMacAddr *mac, const char *ifname, + unsigned int flags, bool isAdd) +{ + int ret = -1; + struct nlmsghdr *resp = NULL; + struct nlmsgerr *err; + unsigned int recvbuflen; + struct nl_msg *nl_msg; + struct ndmsg ndm = { .ndm_family = PF_BRIDGE, .ndm_state = NUD_NOARP }; + + if (virNetDevGetIndex(ifname, &ndm.ndm_ifindex) < 0) + return -1; + + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_ROUTER) + ndm.ndm_flags |= NTF_ROUTER; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_SELF) + ndm.ndm_flags |= NTF_SELF; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_MASTER) + ndm.ndm_flags |= NTF_MASTER; + /* default self (same as iproute2's bridge command */ + if (!(ndm.ndm_flags & (NTF_MASTER | NTF_SELF))) + ndm.ndm_flags |= NTF_SELF; + + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_PERMANENT) + ndm.ndm_state |= NUD_PERMANENT; + if (flags & VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) + ndm.ndm_state |= NUD_REACHABLE; + /* default permanent, same as iproute2's bridge command */ + if (!(ndm.ndm_state & (NUD_PERMANENT | NUD_REACHABLE))) + ndm.ndm_state |= NUD_PERMANENT; + + nl_msg = nlmsg_alloc_simple(isAdd ? RTM_NEWNEIGH : RTM_DELNEIGH, + NLM_F_REQUEST | + (isAdd ? (NLM_F_CREATE | NLM_F_EXCL) : 0)); + if (!nl_msg) { + virReportOOMError(); + return -1; + } + + if (nlmsg_append(nl_msg, &ndm, sizeof(ndm), NLMSG_ALIGNTO) < 0) + goto buffer_too_small; + if (nla_put(nl_msg, NDA_LLADDR, VIR_MAC_BUFLEN, mac) < 0) + goto buffer_too_small;
So if someone adds the same thing twice, what happens? Does it matter? IOW: Is there any need for us to check a return status here that indicates "duplicate entry" and ignore? Or is there any need for a *FDBGet() function to determine whether what we're about to add already exists? Similar argument of course for delete, but removing something that doesn't exist - what happens? Then course there's the "FDBModify()" type function. We have something, but want to change a setting. [yes, I know nothing about fdb's, but when I see database mentioned all sorts of mgmt functions come to mind]
+ + /* NB: this message can also accept a Destination IP, a port, a + * vlan tag, and a via (see iproute2/bridge/fdb.c:fdb_modify()), + * but those aren't required for our application + */ + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, + NETLINK_ROUTE, 0) < 0) { + goto cleanup; + } + if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL) + goto malformed_resp; + + switch (resp->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(resp); + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + goto malformed_resp; + if (err->error) { + virReportSystemError(-err->error, + _("error adding fdb entry for %s"), ifname); + goto cleanup; + } + break; + case NLMSG_DONE: + break; + + default: + goto malformed_resp; + } + + ret = 0; + cleanup: + nlmsg_free(nl_msg); + VIR_FREE(resp); + return ret; + + malformed_resp: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + + buffer_too_small: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; +} + + +#else +static int +virNetDevBridgeFDBAddDel(const virMacAddr *mac, const char *ifname, + unsigned int flags, bool isAdd)
interesting - is because it's static the compiler didn't complain about ATTRIBUTE_UNUSED (1-4)?
+{ + virReportSystemError(ENOSYS, "%s", + _("Unable to add/delete fdb entries on this platform"));
You could have gone with "Unable to %s fdb entries on this platform", isAdd ? "add" : "delete") to really hide that you didn't write separate "Add" and "Del" functions.
+ return -1; +} + + +#endif + +int +virNetDevBridgeFDBAdd(const virMacAddr *mac, const char *ifname, + unsigned int flags) +{ + return virNetDevBridgeFDBAddDel(mac, ifname, flags, true); +} + +
Thinking out loud for the future patches. The 'flags' here - must they match how they were added? Or do they really matter on the delete function? I see no callers to this in any other future patch nor is there much usage of flags for the Add function (only MASTER | TEMP). For what's here - seems fine... ACK with nits for the extraneous lines. Although I still am curious about where things go from here... John
+int +virNetDevBridgeFDBDel(const virMacAddr *mac, const char *ifname, + unsigned int flags) +{ + return virNetDevBridgeFDBAddDel(mac, ifname, flags, false); +} diff --git a/src/util/virnetdevbridge.h b/src/util/virnetdevbridge.h index 8188a2f..da116b9 100644 --- a/src/util/virnetdevbridge.h +++ b/src/util/virnetdevbridge.h @@ -24,6 +24,7 @@ # define __VIR_NETDEV_BRIDGE_H__
# include "internal.h" +# include "virmacaddr.h"
int virNetDevBridgeCreate(const char *brname) ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; @@ -77,4 +78,19 @@ int virNetDevBridgePortSetUnicastFlood(const char *brname, bool enable) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+enum { + VIR_NETDEVBRIDGE_FDB_FLAG_ROUTER = (1 << 0), + VIR_NETDEVBRIDGE_FDB_FLAG_SELF = (1 << 1), + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER = (1 << 2), + + VIR_NETDEVBRIDGE_FDB_FLAG_PERMANENT = (1 << 3), + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP = (1 << 4), +}; + +int virNetDevBridgeFDBAdd(const virMacAddr *mac, const char *ifname, + unsigned int flags) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevBridgeFDBDel(const virMacAddr *mac, const char *ifname, + unsigned int flags) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; #endif /* __VIR_NETDEV_BRIDGE_H__ */

On 11/24/2014 04:23 PM, John Ferlan wrote:
On 11/24/2014 12:48 PM, Laine Stump wrote:
+ if (nlmsg_append(nl_msg, &ndm, sizeof(ndm), NLMSG_ALIGNTO) < 0) + goto buffer_too_small; + if (nla_put(nl_msg, NDA_LLADDR, VIR_MAC_BUFLEN, mac) < 0) + goto buffer_too_small; So if someone adds the same thing twice, what happens? Does it matter? IOW: Is there any need for us to check a return status here that indicates "duplicate entry" and ignore? Or is there any need for a *FDBGet() function to determine whether what we're about to add already exists?
If you try to add something that's already there, or remove something that isn't there, the kernel returns an error. We're only adding these entries immediately after creating a tap device, so the entry will never exist, and we actually never delete anything - it's automatically deleted when the tap device is deleted. In the future when we allow changing MAC address, then we'll need to think about those things, but for now we're safe. The "FDBGet()" type function is considerably more complicated (you basically get a dump of the entire db into a list), and not needed for what I'm implementing now.
Similar argument of course for delete, but removing something that doesn't exist - what happens?
Then course there's the "FDBModify()" type function. We have something, but want to change a setting.
That really never happens. You add them and take them away.
+ return -1; +} + + +#endif + +int +virNetDevBridgeFDBAdd(const virMacAddr *mac, const char *ifname, + unsigned int flags) +{ + return virNetDevBridgeFDBAddDel(mac, ifname, flags, true); +} + + Thinking out loud for the future patches. The 'flags' here - must they match how they were added?
I think they do matter, as it's possible to have two entries with the same MAC address and interface name, but different flags.

The promiscLinks attribute of a network's bridge subelement is used as a single switch to simultaneously turn on/off several options related to a bridge and its attached devices. When this is done, as many of the links to the bridge as possible have promiscuous mode turned off and/or flooding turned on. The result is better performance (because packets aren't being flooded to all ports, and can be dropped earlier when they are of no interest) and better security (since a guest will only receive traffic intended for the guest interface's configured MAC address). The attribute looks like this in the configuration: <network> <name>test</name> <bridge name='br0' promiscLinks='no'/> ... In order to preserve operational backward compatibility, the default for promiscLinks will be considered to be "yes" (although that won't be automatically stored in the XML, to allow for existing installations to automatically take advantage of this performance improvement in the future if we ever decide that it is safe to turn on everywhere). The details of what is turned on/off will be described in a later enabling patch. This patch only adds the config knob, documentation, and test cases. --- docs/formatnetwork.html.in | 36 ++++++++++++++--- docs/schemas/network.rng | 5 +++ src/conf/network_conf.c | 47 +++++++++++++++++----- src/conf/network_conf.h | 1 + tests/networkxml2xmlin/host-bridge-no-flood.xml | 6 +++ .../nat-network-explicit-flood.xml | 21 ++++++++++ tests/networkxml2xmlout/host-bridge-no-flood.xml | 6 +++ .../nat-network-explicit-flood.xml | 23 +++++++++++ tests/networkxml2xmltest.c | 2 + 9 files changed, 130 insertions(+), 17 deletions(-) create mode 100644 tests/networkxml2xmlin/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlin/nat-network-explicit-flood.xml create mode 100644 tests/networkxml2xmlout/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlout/nat-network-explicit-flood.xml diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in index dc438ae..82bdc88 100644 --- a/docs/formatnetwork.html.in +++ b/docs/formatnetwork.html.in @@ -81,7 +81,7 @@ <pre> ... - <bridge name="virbr0" stp="on" delay="5"/> + <bridge name="virbr0" stp="on" delay="5" promiscLinks="no"/> <domain name="example.com"/> <forward mode="nat" dev="eth0"/> ...</pre> @@ -92,18 +92,42 @@ defines the name of a bridge device which will be used to construct the virtual network. The virtual machines will be connected to this bridge device allowing them to talk to each other. The bridge device - may also be connected to the LAN. It is recommended that bridge - device names started with the prefix <code>vir</code>, but the name - <code>virbr0</code> is reserved for the "default" virtual - network. This element should always be provided when defining + may also be connected to the LAN. When defining a new network with a <code><forward></code> mode of + "nat" or "route" (or an isolated network with - no <code><forward></code> element). + no <code><forward></code> element), libvirt will + automatically generate a unique name for the bridge device if + none is given, and this name will be permanently stored in the + network configuration so that that the same name will be used + every time the network is started. For these types of networks + (nat, routed, and isolated), a bridge name beginning with the + prefix "virbr" is recommended (and that is what is + auto-generated), but not enforced. Attribute <code>stp</code> specifies if Spanning Tree Protocol is 'on' or 'off' (default is 'on'). Attribute <code>delay</code> sets the bridge's forward delay value in seconds (default is 0). <span class="since">Since 0.3.0</span> + + <p> + The <code>promiscLinks</code> attribute of the bridge + element is used to tell libvirt whether or not to attempt a + bridge configuration which disables flooding and turns off + promiscuous mode on as many ports as possible (this is done + by turning off each tap device's learning and unicast_flood + settings, and manually managing the bridge's forwarding + databas). It can be set to 'yes' or 'no' (defaults to 'yes' + to preserve backward compatibility). Turning off promiscuous + mode on the links to a bridge can improve performance and + provide better security, but can also cause some networking + setups to stop working (e.g. vlan tagging, multicast) and is + not supported by older kernels. + <span class="since">Since 1.2.11, requires kernel 3.17 or + newer</span> + </p> + + </dd> <dt><code>domain</code></dt> <dd> diff --git a/docs/schemas/network.rng b/docs/schemas/network.rng index 4546f80..5c57165 100644 --- a/docs/schemas/network.rng +++ b/docs/schemas/network.rng @@ -64,6 +64,11 @@ <data type="unsignedLong"/> </attribute> </optional> + <optional> + <attribute name="promiscLinks"> + <ref name="virYesNo"/> + </attribute> + </optional> </element> </optional> diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c index 067334e..d6ecc51 100644 --- a/src/conf/network_conf.c +++ b/src/conf/network_conf.c @@ -2108,6 +2108,18 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) } VIR_FREE(tmp); + tmp = virXPathString("string(./bridge[1]/@promiscLinks)", ctxt); + if (tmp) { + if ((def->promiscLinks + = virTristateBoolTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid promiscLinks setting '%s' " + "in network '%s'"), tmp, def->name); + goto error; + } + VIR_FREE(tmp); + } + tmp = virXPathString("string(./mac[1]/@address)", ctxt); if (tmp) { if (virMacAddrParse(tmp, &def->mac) < 0) { @@ -2290,6 +2302,14 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) def->name); goto error; } + if (def->promiscLinks) { + virReportError(VIR_ERR_XML_ERROR, + _("bridge promiscLinks setting not allowed " + "in %s mode (network '%s')"), + virNetworkForwardTypeToString(def->forward.type), + def->name); + goto error; + } /* fall through to next case */ case VIR_NETWORK_FORWARD_BRIDGE: if (def->delay || stp) { @@ -2783,22 +2803,27 @@ virNetworkDefFormatBuf(virBufferPtr buf, virBufferAddLit(buf, "</forward>\n"); } + if (def->forward.type == VIR_NETWORK_FORWARD_NONE || - def->forward.type == VIR_NETWORK_FORWARD_NAT || - def->forward.type == VIR_NETWORK_FORWARD_ROUTE) { + def->forward.type == VIR_NETWORK_FORWARD_NAT || + def->forward.type == VIR_NETWORK_FORWARD_ROUTE || + def->bridge || def->promiscLinks) { virBufferAddLit(buf, "<bridge"); - if (def->bridge) - virBufferEscapeString(buf, " name='%s'", def->bridge); - virBufferAsprintf(buf, " stp='%s' delay='%ld'/>\n", - def->stp ? "on" : "off", - def->delay); - } else if (def->forward.type == VIR_NETWORK_FORWARD_BRIDGE && - def->bridge) { - virBufferEscapeString(buf, "<bridge name='%s'/>\n", def->bridge); + virBufferEscapeString(buf, " name='%s'", def->bridge); + if (def->forward.type == VIR_NETWORK_FORWARD_NONE || + def->forward.type == VIR_NETWORK_FORWARD_NAT || + def->forward.type == VIR_NETWORK_FORWARD_ROUTE) { + virBufferAsprintf(buf, " stp='%s' delay='%ld'", + def->stp ? "on" : "off", def->delay); + } + if (def->promiscLinks) { + virBufferAsprintf(buf, " promiscLinks='%s'", + virTristateBoolTypeToString(def->promiscLinks)); + } + virBufferAddLit(buf, "/>\n"); } - if (def->mac_specified) { char macaddr[VIR_MAC_STRING_BUFLEN]; virMacAddrFormat(&def->mac, macaddr); diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h index 660cd2d..0d4aa2e 100644 --- a/src/conf/network_conf.h +++ b/src/conf/network_conf.h @@ -231,6 +231,7 @@ struct _virNetworkDef { int connections; /* # of guest interfaces connected to this network */ char *bridge; /* Name of bridge device */ + int promiscLinks; /* enum virTristateBool - default is YES */ char *domain; unsigned long delay; /* Bridge forward delay (ms) */ bool stp; /* Spanning tree protocol */ diff --git a/tests/networkxml2xmlin/host-bridge-no-flood.xml b/tests/networkxml2xmlin/host-bridge-no-flood.xml new file mode 100644 index 0000000..a5378c8 --- /dev/null +++ b/tests/networkxml2xmlin/host-bridge-no-flood.xml @@ -0,0 +1,6 @@ +<network> + <name>host-bridge-net</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid> + <forward mode="bridge"/> + <bridge name="br0" promiscLinks='no'/> +</network> diff --git a/tests/networkxml2xmlin/nat-network-explicit-flood.xml b/tests/networkxml2xmlin/nat-network-explicit-flood.xml new file mode 100644 index 0000000..a0f79ff --- /dev/null +++ b/tests/networkxml2xmlin/nat-network-explicit-flood.xml @@ -0,0 +1,21 @@ +<network> + <name>default</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid> + <bridge name="virbr0" promiscLinks='yes'/> + <forward mode="nat" dev="eth1"/> + <ip address="192.168.122.1" netmask="255.255.255.0"> + <dhcp> + <range start="192.168.122.2" end="192.168.122.254"/> + <host mac="00:16:3e:77:e2:ed" name="a.example.com" ip="192.168.122.10"/> + <host mac="00:16:3e:3e:a9:1a" name="b.example.com" ip="192.168.122.11"/> + </dhcp> + </ip> + <ip family="ipv4" address="192.168.123.1" netmask="255.255.255.0"> + </ip> + <ip family="ipv6" address="2001:db8:ac10:fe01::1" prefix="64"> + </ip> + <ip family="ipv6" address="2001:db8:ac10:fd01::1" prefix="64"> + </ip> + <ip family="ipv4" address="10.24.10.1"> + </ip> +</network> diff --git a/tests/networkxml2xmlout/host-bridge-no-flood.xml b/tests/networkxml2xmlout/host-bridge-no-flood.xml new file mode 100644 index 0000000..f8f1ced --- /dev/null +++ b/tests/networkxml2xmlout/host-bridge-no-flood.xml @@ -0,0 +1,6 @@ +<network> + <name>host-bridge-net</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid> + <forward mode='bridge'/> + <bridge name='br0' promiscLinks='no'/> +</network> diff --git a/tests/networkxml2xmlout/nat-network-explicit-flood.xml b/tests/networkxml2xmlout/nat-network-explicit-flood.xml new file mode 100644 index 0000000..ffca9af --- /dev/null +++ b/tests/networkxml2xmlout/nat-network-explicit-flood.xml @@ -0,0 +1,23 @@ +<network> + <name>default</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid> + <forward dev='eth1' mode='nat'> + <interface dev='eth1'/> + </forward> + <bridge name='virbr0' stp='on' delay='0' promiscLinks='yes'/> + <ip address='192.168.122.1' netmask='255.255.255.0'> + <dhcp> + <range start='192.168.122.2' end='192.168.122.254'/> + <host mac='00:16:3e:77:e2:ed' name='a.example.com' ip='192.168.122.10'/> + <host mac='00:16:3e:3e:a9:1a' name='b.example.com' ip='192.168.122.11'/> + </dhcp> + </ip> + <ip family='ipv4' address='192.168.123.1' netmask='255.255.255.0'> + </ip> + <ip family='ipv6' address='2001:db8:ac10:fe01::1' prefix='64'> + </ip> + <ip family='ipv6' address='2001:db8:ac10:fd01::1' prefix='64'> + </ip> + <ip family='ipv4' address='10.24.10.1'> + </ip> +</network> diff --git a/tests/networkxml2xmltest.c b/tests/networkxml2xmltest.c index 65ac591..34a5211 100644 --- a/tests/networkxml2xmltest.c +++ b/tests/networkxml2xmltest.c @@ -120,6 +120,8 @@ mymain(void) DO_TEST("hostdev"); DO_TEST_FULL("hostdev-pf", VIR_NETWORK_XML_INACTIVE); DO_TEST("passthrough-address-crash"); + DO_TEST("nat-network-explicit-flood"); + DO_TEST("host-bridge-no-flood"); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
The promiscLinks attribute of a network's bridge subelement is used as a single switch to simultaneously turn on/off several options related to a bridge and its attached devices. When this is done, as many of the links to the bridge as possible have promiscuous mode turned off and/or flooding turned on. The result is better performance (because packets aren't being flooded to all ports, and can be dropped earlier when they are of no interest) and better security (since a guest will only receive traffic intended for the guest interface's configured MAC address).
The attribute looks like this in the configuration:
<network> <name>test</name> <bridge name='br0' promiscLinks='no'/> ...
In order to preserve operational backward compatibility, the default for promiscLinks will be considered to be "yes" (although that won't be automatically stored in the XML, to allow for existing installations to automatically take advantage of this performance improvement in the future if we ever decide that it is safe to turn on everywhere).
I'm sure this makes sense if you've been looking at it every day for a while; however, if I'm reading this correctly, the "desired eventual" state is to set promiscLinks to no, but because we don't set it that way today *and* because setting it to no means you have to have a specific kernel (or better) installed, then we have to set this to 'yes' by default. I guess I'm confused by the message that states the default is 'yes', but won't be saved in the XML "to allow existing installations to automatically take advantage of this performance improvement in the future" [assuming they have the right kernel version or later, right?]... Do you see my confusion? Changing the default in the future might not be the best solution given the requirement of a specific kernel which we cannot ever seem to assume... FWIW: My brain works the other way when considering new knobs. By default it's not enabled because of a myriad of reasons, but if you set this attribute to "yes" or "enabled" (or whatever), then this new feature can be taken advantage of. By setting to yes, you've taken responsibility to ensure you have the required config. Because of internal IRC I know you struggled with a name for this and using fdb='managed|auto' probably only means something to someone that understands what the fdb is! So either allow for setting each attribute separately (e.g. promisc={yes|no} flood={yes|no}) or perhaps consider 'portMACFilter={yes|no}' (defaults to 'no'). If setting things separately doesn't make sense (or is too confusing to describe), then one knob is fine. If going with the latter, then I would think it's easier to describe that by setting this attribute to yes, libvirt will turn promiscuous mode (eg learning) and flooding off on each port. The longer attribute name is still less than trustGuestRxFilters. BTW: To me the name implies, hey this is something I want, even though it comes with caveats. The code below looks fine with regard to what you've done and how things are currently named. I went through all of the above because you were looking for a better name! Be careful what you ask for! John
The details of what is turned on/off will be described in a later enabling patch. This patch only adds the config knob, documentation, and test cases. --- docs/formatnetwork.html.in | 36 ++++++++++++++--- docs/schemas/network.rng | 5 +++ src/conf/network_conf.c | 47 +++++++++++++++++----- src/conf/network_conf.h | 1 + tests/networkxml2xmlin/host-bridge-no-flood.xml | 6 +++ .../nat-network-explicit-flood.xml | 21 ++++++++++ tests/networkxml2xmlout/host-bridge-no-flood.xml | 6 +++ .../nat-network-explicit-flood.xml | 23 +++++++++++ tests/networkxml2xmltest.c | 2 + 9 files changed, 130 insertions(+), 17 deletions(-) create mode 100644 tests/networkxml2xmlin/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlin/nat-network-explicit-flood.xml create mode 100644 tests/networkxml2xmlout/host-bridge-no-flood.xml create mode 100644 tests/networkxml2xmlout/nat-network-explicit-flood.xml
diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in index dc438ae..82bdc88 100644 --- a/docs/formatnetwork.html.in +++ b/docs/formatnetwork.html.in @@ -81,7 +81,7 @@
<pre> ... - <bridge name="virbr0" stp="on" delay="5"/> + <bridge name="virbr0" stp="on" delay="5" promiscLinks="no"/> <domain name="example.com"/> <forward mode="nat" dev="eth0"/> ...</pre> @@ -92,18 +92,42 @@ defines the name of a bridge device which will be used to construct the virtual network. The virtual machines will be connected to this bridge device allowing them to talk to each other. The bridge device - may also be connected to the LAN. It is recommended that bridge - device names started with the prefix <code>vir</code>, but the name - <code>virbr0</code> is reserved for the "default" virtual - network. This element should always be provided when defining + may also be connected to the LAN. When defining a new network with a <code><forward></code> mode of + "nat" or "route" (or an isolated network with - no <code><forward></code> element). + no <code><forward></code> element), libvirt will + automatically generate a unique name for the bridge device if + none is given, and this name will be permanently stored in the + network configuration so that that the same name will be used + every time the network is started. For these types of networks + (nat, routed, and isolated), a bridge name beginning with the + prefix "virbr" is recommended (and that is what is + auto-generated), but not enforced. Attribute <code>stp</code> specifies if Spanning Tree Protocol is 'on' or 'off' (default is 'on'). Attribute <code>delay</code> sets the bridge's forward delay value in seconds (default is 0). <span class="since">Since 0.3.0</span> + + <p> + The <code>promiscLinks</code> attribute of the bridge + element is used to tell libvirt whether or not to attempt a + bridge configuration which disables flooding and turns off + promiscuous mode on as many ports as possible (this is done + by turning off each tap device's learning and unicast_flood + settings, and manually managing the bridge's forwarding + databas). It can be set to 'yes' or 'no' (defaults to 'yes' + to preserve backward compatibility). Turning off promiscuous + mode on the links to a bridge can improve performance and + provide better security, but can also cause some networking + setups to stop working (e.g. vlan tagging, multicast) and is + not supported by older kernels. + <span class="since">Since 1.2.11, requires kernel 3.17 or + newer</span> + </p> + + </dd> <dt><code>domain</code></dt> <dd> diff --git a/docs/schemas/network.rng b/docs/schemas/network.rng index 4546f80..5c57165 100644 --- a/docs/schemas/network.rng +++ b/docs/schemas/network.rng @@ -64,6 +64,11 @@ <data type="unsignedLong"/> </attribute> </optional>
You could add an extra line here if you change this - to be like the other optional attributes
+ <optional> + <attribute name="promiscLinks"> + <ref name="virYesNo"/> + </attribute> + </optional>
</element> </optional> diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c index 067334e..d6ecc51 100644 --- a/src/conf/network_conf.c +++ b/src/conf/network_conf.c @@ -2108,6 +2108,18 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) } VIR_FREE(tmp);
+ tmp = virXPathString("string(./bridge[1]/@promiscLinks)", ctxt); + if (tmp) { + if ((def->promiscLinks + = virTristateBoolTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid promiscLinks setting '%s' " + "in network '%s'"), tmp, def->name); + goto error; + } + VIR_FREE(tmp); + } + tmp = virXPathString("string(./mac[1]/@address)", ctxt); if (tmp) { if (virMacAddrParse(tmp, &def->mac) < 0) { @@ -2290,6 +2302,14 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt) def->name); goto error; } + if (def->promiscLinks) { + virReportError(VIR_ERR_XML_ERROR, + _("bridge promiscLinks setting not allowed " + "in %s mode (network '%s')"), + virNetworkForwardTypeToString(def->forward.type), + def->name); + goto error; + } /* fall through to next case */ case VIR_NETWORK_FORWARD_BRIDGE: if (def->delay || stp) { @@ -2783,22 +2803,27 @@ virNetworkDefFormatBuf(virBufferPtr buf, virBufferAddLit(buf, "</forward>\n"); }
+ if (def->forward.type == VIR_NETWORK_FORWARD_NONE || - def->forward.type == VIR_NETWORK_FORWARD_NAT || - def->forward.type == VIR_NETWORK_FORWARD_ROUTE) { + def->forward.type == VIR_NETWORK_FORWARD_NAT || + def->forward.type == VIR_NETWORK_FORWARD_ROUTE || + def->bridge || def->promiscLinks) {
virBufferAddLit(buf, "<bridge"); - if (def->bridge) - virBufferEscapeString(buf, " name='%s'", def->bridge); - virBufferAsprintf(buf, " stp='%s' delay='%ld'/>\n", - def->stp ? "on" : "off", - def->delay); - } else if (def->forward.type == VIR_NETWORK_FORWARD_BRIDGE && - def->bridge) { - virBufferEscapeString(buf, "<bridge name='%s'/>\n", def->bridge); + virBufferEscapeString(buf, " name='%s'", def->bridge); + if (def->forward.type == VIR_NETWORK_FORWARD_NONE || + def->forward.type == VIR_NETWORK_FORWARD_NAT || + def->forward.type == VIR_NETWORK_FORWARD_ROUTE) { + virBufferAsprintf(buf, " stp='%s' delay='%ld'", + def->stp ? "on" : "off", def->delay); + } + if (def->promiscLinks) { + virBufferAsprintf(buf, " promiscLinks='%s'", + virTristateBoolTypeToString(def->promiscLinks)); + } + virBufferAddLit(buf, "/>\n");
ahh - so much clearer!
}
- if (def->mac_specified) { char macaddr[VIR_MAC_STRING_BUFLEN]; virMacAddrFormat(&def->mac, macaddr); diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h index 660cd2d..0d4aa2e 100644 --- a/src/conf/network_conf.h +++ b/src/conf/network_conf.h @@ -231,6 +231,7 @@ struct _virNetworkDef { int connections; /* # of guest interfaces connected to this network */
char *bridge; /* Name of bridge device */ + int promiscLinks; /* enum virTristateBool - default is YES */ char *domain; unsigned long delay; /* Bridge forward delay (ms) */ bool stp; /* Spanning tree protocol */ diff --git a/tests/networkxml2xmlin/host-bridge-no-flood.xml b/tests/networkxml2xmlin/host-bridge-no-flood.xml new file mode 100644 index 0000000..a5378c8 --- /dev/null +++ b/tests/networkxml2xmlin/host-bridge-no-flood.xml @@ -0,0 +1,6 @@ +<network> + <name>host-bridge-net</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid> + <forward mode="bridge"/> + <bridge name="br0" promiscLinks='no'/> +</network> diff --git a/tests/networkxml2xmlin/nat-network-explicit-flood.xml b/tests/networkxml2xmlin/nat-network-explicit-flood.xml new file mode 100644 index 0000000..a0f79ff --- /dev/null +++ b/tests/networkxml2xmlin/nat-network-explicit-flood.xml @@ -0,0 +1,21 @@ +<network> + <name>default</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid> + <bridge name="virbr0" promiscLinks='yes'/> + <forward mode="nat" dev="eth1"/> + <ip address="192.168.122.1" netmask="255.255.255.0"> + <dhcp> + <range start="192.168.122.2" end="192.168.122.254"/> + <host mac="00:16:3e:77:e2:ed" name="a.example.com" ip="192.168.122.10"/> + <host mac="00:16:3e:3e:a9:1a" name="b.example.com" ip="192.168.122.11"/> + </dhcp> + </ip> + <ip family="ipv4" address="192.168.123.1" netmask="255.255.255.0"> + </ip> + <ip family="ipv6" address="2001:db8:ac10:fe01::1" prefix="64"> + </ip> + <ip family="ipv6" address="2001:db8:ac10:fd01::1" prefix="64"> + </ip> + <ip family="ipv4" address="10.24.10.1"> + </ip> +</network> diff --git a/tests/networkxml2xmlout/host-bridge-no-flood.xml b/tests/networkxml2xmlout/host-bridge-no-flood.xml new file mode 100644 index 0000000..f8f1ced --- /dev/null +++ b/tests/networkxml2xmlout/host-bridge-no-flood.xml @@ -0,0 +1,6 @@ +<network> + <name>host-bridge-net</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid> + <forward mode='bridge'/> + <bridge name='br0' promiscLinks='no'/> +</network> diff --git a/tests/networkxml2xmlout/nat-network-explicit-flood.xml b/tests/networkxml2xmlout/nat-network-explicit-flood.xml new file mode 100644 index 0000000..ffca9af --- /dev/null +++ b/tests/networkxml2xmlout/nat-network-explicit-flood.xml @@ -0,0 +1,23 @@ +<network> + <name>default</name> + <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid> + <forward dev='eth1' mode='nat'> + <interface dev='eth1'/> + </forward> + <bridge name='virbr0' stp='on' delay='0' promiscLinks='yes'/> + <ip address='192.168.122.1' netmask='255.255.255.0'> + <dhcp> + <range start='192.168.122.2' end='192.168.122.254'/> + <host mac='00:16:3e:77:e2:ed' name='a.example.com' ip='192.168.122.10'/> + <host mac='00:16:3e:3e:a9:1a' name='b.example.com' ip='192.168.122.11'/> + </dhcp> + </ip> + <ip family='ipv4' address='192.168.123.1' netmask='255.255.255.0'> + </ip> + <ip family='ipv6' address='2001:db8:ac10:fe01::1' prefix='64'> + </ip> + <ip family='ipv6' address='2001:db8:ac10:fd01::1' prefix='64'> + </ip> + <ip family='ipv4' address='10.24.10.1'> + </ip> +</network> diff --git a/tests/networkxml2xmltest.c b/tests/networkxml2xmltest.c index 65ac591..34a5211 100644 --- a/tests/networkxml2xmltest.c +++ b/tests/networkxml2xmltest.c @@ -120,6 +120,8 @@ mymain(void) DO_TEST("hostdev"); DO_TEST_FULL("hostdev-pf", VIR_NETWORK_XML_INACTIVE); DO_TEST("passthrough-address-crash"); + DO_TEST("nat-network-explicit-flood"); + DO_TEST("host-bridge-no-flood");
return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; }

When the actualType of a virDomainNetDef is "network", it means that we are connecting to a libvirt-managed network (routed, natted, or isolated) which does use a bridge device (created by libvirt). In the past we have required drivers such as qemu to call the public API to retrieve the bridge name in this case (even though it is available in the NetDef's ActualNetDef if the actualType is "bridge" (i.e., an externally-created bridge that isn't managed by libvirt). There is no real reason for this difference, and as a matter of fact it complicates things for qemu. Also, there is another bridge-related attribute (promiscLinks) that will need to be available in both cases, so this makes things consistent. Along with making the bridge name available in the internal object, it is also now reported in the <source> element of the <interface> state XML (or the <actual> subelement in the internally-stored format). The one oddity about this change is that usually there is a separate union for every different "type" in a higher level object (e.g. in the case of a virDomainNetDef there are separate "network" and "bridge" members of the union that pivots on the type), but in this case network and bridge types both have exactly the same attributes, so the "bridge" member is used for both type==network and type==bridge. --- src/conf/domain_conf.c | 102 +++++++++++++++++++++++--------------------- src/network/bridge_driver.c | 9 ++++ 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 68eef54..932bb1c 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1352,6 +1352,7 @@ virDomainActualNetDefFree(virDomainActualNetDefPtr def) switch (def->type) { case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: VIR_FREE(def->data.bridge.brname); break; case VIR_DOMAIN_NET_TYPE_DIRECT: @@ -7074,9 +7075,12 @@ virDomainActualNetDefParseXML(xmlNodePtr node, goto error; } VIR_FREE(class_id); - } else if (actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) { + } + if (actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + actual->type == VIR_DOMAIN_NET_TYPE_NETWORK) { char *brname = virXPathString("string(./source/@bridge)", ctxt); - if (!brname) { + + if (!brname && actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing <source> element with bridge name in " "interface's <actual> element")); @@ -17015,60 +17019,59 @@ virDomainHostdevDefFormatCaps(virBufferPtr buf, static int virDomainActualNetDefContentsFormat(virBufferPtr buf, virDomainNetDefPtr def, - const char *typeStr, bool inSubelement, unsigned int flags) { - const char *mode; - - switch (virDomainNetGetActualType(def)) { - case VIR_DOMAIN_NET_TYPE_BRIDGE: - virBufferEscapeString(buf, "<source bridge='%s'/>\n", - virDomainNetGetActualBridgeName(def)); - break; - - case VIR_DOMAIN_NET_TYPE_DIRECT: - virBufferAddLit(buf, "<source"); - virBufferEscapeString(buf, " dev='%s'", - virDomainNetGetActualDirectDev(def)); + int actualType = virDomainNetGetActualType(def); - mode = virNetDevMacVLanModeTypeToString(virDomainNetGetActualDirectMode(def)); - if (!mode) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("unexpected source mode %d"), - virDomainNetGetActualDirectMode(def)); - return -1; - } - virBufferAsprintf(buf, " mode='%s'/>\n", mode); - break; - - case VIR_DOMAIN_NET_TYPE_HOSTDEV: + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { if (virDomainHostdevDefFormatSubsys(buf, virDomainNetGetActualHostdev(def), flags, true) < 0) { return -1; } - break; - - case VIR_DOMAIN_NET_TYPE_NETWORK: - if (!inSubelement) { - /* the <source> element isn't included in <actual> - * (i.e. when we're putting our output into a subelement - * rather than inline) because the main element has the - * same info already. If we're outputting inline, though, - * we *do* need to output <source>, because the caller - * won't have done it. + } else { + virBufferAddLit(buf, "<source"); + if (def->type == VIR_DOMAIN_NET_TYPE_NETWORK && !inSubelement) { + /* When we're putting our output into the <actual> + * subelement rather than the main <interface>, the + * network name isn't included in the <source> because the + * main interface element's <source> has the same info + * already. If we've been called to output directly into + * the main element's <source> though (the case here - + * "!inSubElement"), we *do* need to output the network + * name, because the caller won't have done it). */ - virBufferEscapeString(buf, "<source network='%s'/>\n", - def->data.network.name); + virBufferEscapeString(buf, " network='%s'", def->data.network.name); } - if (def->data.network.actual && def->data.network.actual->class_id) - virBufferAsprintf(buf, "<class id='%u'/>\n", - def->data.network.actual->class_id); - break; - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("unexpected actual net type %s"), typeStr); - return -1; + if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || + actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { + /* actualType == NETWORK includes the name of the bridge + * that is used by the network, whether we are + * "inSubElement" or not. + */ + virBufferEscapeString(buf, " bridge='%s'", + virDomainNetGetActualBridgeName(def)); + } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) { + const char *mode; + + virBufferEscapeString(buf, " dev='%s'", + virDomainNetGetActualDirectDev(def)); + mode = virNetDevMacVLanModeTypeToString(virDomainNetGetActualDirectMode(def)); + if (!mode) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unexpected source mode %d"), + virDomainNetGetActualDirectMode(def)); + return -1; + } + virBufferAsprintf(buf, " mode='%s'", mode); + } + + virBufferAddLit(buf, "/>\n"); + } + if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT && + def->data.network.actual && def->data.network.actual->class_id) { + virBufferAsprintf(buf, "<class id='%u'/>\n", + def->data.network.actual->class_id); } if (virNetDevVlanFormat(virDomainNetGetActualVlan(def), buf) < 0) @@ -17116,7 +17119,7 @@ virDomainActualNetDefFormat(virBufferPtr buf, virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); - if (virDomainActualNetDefContentsFormat(buf, def, typeStr, true, flags) < 0) + if (virDomainActualNetDefContentsFormat(buf, def, true, flags) < 0) return -1; virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</actual>\n"); @@ -17287,7 +17290,7 @@ virDomainNetDefFormat(virBufferPtr buf, * the standard place... (this is for public reporting of * interface status) */ - if (virDomainActualNetDefContentsFormat(buf, def, typeStr, false, flags) < 0) + if (virDomainActualNetDefContentsFormat(buf, def, false, flags) < 0) return -1; } else { /* ...but if we've asked for the inactive XML (rather than @@ -20638,7 +20641,8 @@ virDomainNetGetActualBridgeName(virDomainNetDefPtr iface) return iface->data.bridge.brname; if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && iface->data.network.actual && - iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) + (iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_NETWORK)) return iface->data.network.actual->data.bridge.brname; return NULL; } diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 6cb421c..92efd7e 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -3771,6 +3771,15 @@ networkAllocateActualDevice(virDomainDefPtr dom, */ iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK; + /* we also store the bridge device + * in iface->data.network.actual->data.bridge for later use + * after the domain's tap device is created (to attach to the + * bridge and set flood/learning mode on the tap device) + */ + if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, + netdef->bridge) < 0) + goto error; + if (networkPlugBandwidth(network, iface) < 0) goto error; -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
When the actualType of a virDomainNetDef is "network", it means that we are connecting to a libvirt-managed network (routed, natted, or isolated) which does use a bridge device (created by libvirt). In the past we have required drivers such as qemu to call the public API to retrieve the bridge name in this case (even though it is available in the NetDef's ActualNetDef if the actualType is "bridge" (i.e., an externally-created bridge that isn't managed by libvirt). There is no real reason for this difference, and as a matter of fact it complicates things for qemu. Also, there is another bridge-related attribute (promiscLinks) that will need to be available in both cases, so this makes things consistent.
Along with making the bridge name available in the internal object, it is also now reported in the <source> element of the <interface> state XML (or the <actual> subelement in the internally-stored format).
The one oddity about this change is that usually there is a separate union for every different "type" in a higher level object (e.g. in the case of a virDomainNetDef there are separate "network" and "bridge" members of the union that pivots on the type), but in this case network and bridge types both have exactly the same attributes, so the "bridge" member is used for both type==network and type==bridge. --- src/conf/domain_conf.c | 102 +++++++++++++++++++++++--------------------- src/network/bridge_driver.c | 9 ++++ 2 files changed, 62 insertions(+), 49 deletions(-)
I'm happy someone understands the comings and goings of actual! Seems reasonable... Is there any concern vis-a-vis migration (or similar guest state saving activities) with respect to having a <source> element in the output for actual that wouldn't have been there before? (if I'm reading the tea leaves correctly - that's what's happening here). ACK in general - nothing jumps out at me other than the display/saving of the <source> John
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 68eef54..932bb1c 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1352,6 +1352,7 @@ virDomainActualNetDefFree(virDomainActualNetDefPtr def)
switch (def->type) { case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: VIR_FREE(def->data.bridge.brname); break; case VIR_DOMAIN_NET_TYPE_DIRECT: @@ -7074,9 +7075,12 @@ virDomainActualNetDefParseXML(xmlNodePtr node, goto error; } VIR_FREE(class_id); - } else if (actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) { + } + if (actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + actual->type == VIR_DOMAIN_NET_TYPE_NETWORK) { char *brname = virXPathString("string(./source/@bridge)", ctxt); - if (!brname) { + + if (!brname && actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing <source> element with bridge name in " "interface's <actual> element")); @@ -17015,60 +17019,59 @@ virDomainHostdevDefFormatCaps(virBufferPtr buf, static int virDomainActualNetDefContentsFormat(virBufferPtr buf, virDomainNetDefPtr def, - const char *typeStr, bool inSubelement, unsigned int flags) { - const char *mode; - - switch (virDomainNetGetActualType(def)) { - case VIR_DOMAIN_NET_TYPE_BRIDGE: - virBufferEscapeString(buf, "<source bridge='%s'/>\n", - virDomainNetGetActualBridgeName(def)); - break; - - case VIR_DOMAIN_NET_TYPE_DIRECT: - virBufferAddLit(buf, "<source"); - virBufferEscapeString(buf, " dev='%s'", - virDomainNetGetActualDirectDev(def)); + int actualType = virDomainNetGetActualType(def);
- mode = virNetDevMacVLanModeTypeToString(virDomainNetGetActualDirectMode(def)); - if (!mode) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("unexpected source mode %d"), - virDomainNetGetActualDirectMode(def)); - return -1; - } - virBufferAsprintf(buf, " mode='%s'/>\n", mode); - break; - - case VIR_DOMAIN_NET_TYPE_HOSTDEV: + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) { if (virDomainHostdevDefFormatSubsys(buf, virDomainNetGetActualHostdev(def), flags, true) < 0) { return -1; } - break; - - case VIR_DOMAIN_NET_TYPE_NETWORK: - if (!inSubelement) { - /* the <source> element isn't included in <actual> - * (i.e. when we're putting our output into a subelement - * rather than inline) because the main element has the - * same info already. If we're outputting inline, though, - * we *do* need to output <source>, because the caller - * won't have done it. + } else { + virBufferAddLit(buf, "<source"); + if (def->type == VIR_DOMAIN_NET_TYPE_NETWORK && !inSubelement) { + /* When we're putting our output into the <actual> + * subelement rather than the main <interface>, the + * network name isn't included in the <source> because the + * main interface element's <source> has the same info + * already. If we've been called to output directly into + * the main element's <source> though (the case here - + * "!inSubElement"), we *do* need to output the network + * name, because the caller won't have done it). */ - virBufferEscapeString(buf, "<source network='%s'/>\n", - def->data.network.name); + virBufferEscapeString(buf, " network='%s'", def->data.network.name); } - if (def->data.network.actual && def->data.network.actual->class_id) - virBufferAsprintf(buf, "<class id='%u'/>\n", - def->data.network.actual->class_id); - break; - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("unexpected actual net type %s"), typeStr); - return -1; + if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || + actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { + /* actualType == NETWORK includes the name of the bridge + * that is used by the network, whether we are + * "inSubElement" or not. + */ + virBufferEscapeString(buf, " bridge='%s'", + virDomainNetGetActualBridgeName(def)); + } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) { + const char *mode; + + virBufferEscapeString(buf, " dev='%s'", + virDomainNetGetActualDirectDev(def)); + mode = virNetDevMacVLanModeTypeToString(virDomainNetGetActualDirectMode(def)); + if (!mode) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unexpected source mode %d"), + virDomainNetGetActualDirectMode(def)); + return -1; + } + virBufferAsprintf(buf, " mode='%s'", mode); + } + + virBufferAddLit(buf, "/>\n"); + } + if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT && + def->data.network.actual && def->data.network.actual->class_id) { + virBufferAsprintf(buf, "<class id='%u'/>\n", + def->data.network.actual->class_id); }
if (virNetDevVlanFormat(virDomainNetGetActualVlan(def), buf) < 0) @@ -17116,7 +17119,7 @@ virDomainActualNetDefFormat(virBufferPtr buf, virBufferAddLit(buf, ">\n");
virBufferAdjustIndent(buf, 2); - if (virDomainActualNetDefContentsFormat(buf, def, typeStr, true, flags) < 0) + if (virDomainActualNetDefContentsFormat(buf, def, true, flags) < 0) return -1; virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</actual>\n"); @@ -17287,7 +17290,7 @@ virDomainNetDefFormat(virBufferPtr buf, * the standard place... (this is for public reporting of * interface status) */ - if (virDomainActualNetDefContentsFormat(buf, def, typeStr, false, flags) < 0) + if (virDomainActualNetDefContentsFormat(buf, def, false, flags) < 0) return -1; } else { /* ...but if we've asked for the inactive XML (rather than @@ -20638,7 +20641,8 @@ virDomainNetGetActualBridgeName(virDomainNetDefPtr iface) return iface->data.bridge.brname; if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && iface->data.network.actual && - iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE) + (iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_NETWORK)) return iface->data.network.actual->data.bridge.brname; return NULL; } diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 6cb421c..92efd7e 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -3771,6 +3771,15 @@ networkAllocateActualDevice(virDomainDefPtr dom, */ iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK;
+ /* we also store the bridge device + * in iface->data.network.actual->data.bridge for later use + * after the domain's tap device is created (to attach to the + * bridge and set flood/learning mode on the tap device) + */ + if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, + netdef->bridge) < 0) + goto error; + if (networkPlugBandwidth(network, iface) < 0) goto error;

At the time that the network driver allocates a connection to a network, the tap device that will be used hasn't yet been created - that will be done later by qemu (or lxc or whoever) - but if the network has promiscLinks='no', then when we do get around to creating the tap device, we will need to add an entry for it to the network bridge's fdb (forwarding database) *and* turn off learning and unicast_flood for that tap device in the bridge's sysfs settings. This means that qemu needs to know both the bridge name as well as the setting of promiscLinks, so we either need to create a new API to retrieve that info, or just pass it back in the ActualNetDef that is created during networkAllocateActualDevice. We choose the latter method, since it's already done for the bridge device, and it has the side effect of making the information available in domain status. (NB: in the future, I think that the tap device should actually be created by networkAllocateActualDevice(), as that will solve several other problems, but that is a battle for another day, and this information will still be useful outside the network driver) --- src/conf/domain_conf.c | 29 +++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/network/bridge_driver.c | 6 +++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 932bb1c..1317df4 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -6971,6 +6971,7 @@ virDomainActualNetDefParseXML(xmlNodePtr node, char *mode = NULL; char *addrtype = NULL; char *trustGuestRxFilters = NULL; + char *promiscLinks = NULL; if (VIR_ALLOC(actual) < 0) return -1; @@ -7087,6 +7088,16 @@ virDomainActualNetDefParseXML(xmlNodePtr node, goto error; } actual->data.bridge.brname = brname; + promiscLinks = virXPathString("string(./source/@promiscLinks)", ctxt); + if (promiscLinks && + (actual->data.bridge.promiscLinks + = virTristateBoolTypeFromString(promiscLinks)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid promiscLinks setting '%s' " + "in domain interface's <actual> element"), + promiscLinks); + goto error; + } } bandwidth_node = virXPathNode("./bandwidth", ctxt); @@ -7107,6 +7118,7 @@ virDomainActualNetDefParseXML(xmlNodePtr node, VIR_FREE(mode); VIR_FREE(addrtype); VIR_FREE(trustGuestRxFilters); + VIR_FREE(promiscLinks); virDomainActualNetDefFree(actual); ctxt->node = save_ctxt; @@ -17045,12 +17057,18 @@ virDomainActualNetDefContentsFormat(virBufferPtr buf, } if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { + int promiscLinks = virDomainNetGetActualPromiscLinks(def); + /* actualType == NETWORK includes the name of the bridge * that is used by the network, whether we are * "inSubElement" or not. */ virBufferEscapeString(buf, " bridge='%s'", virDomainNetGetActualBridgeName(def)); + if (promiscLinks) { + virBufferAsprintf(buf, " promiscLinks='%s'", + virTristateSwitchTypeToString(promiscLinks)); + } } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) { const char *mode; @@ -20647,6 +20665,17 @@ virDomainNetGetActualBridgeName(virDomainNetDefPtr iface) return NULL; } +int +virDomainNetGetActualPromiscLinks(virDomainNetDefPtr iface) +{ + if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && + iface->data.network.actual && + (iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_NETWORK)) + return iface->data.network.actual->data.bridge.promiscLinks; + return 0; +} + const char * virDomainNetGetActualDirectDev(virDomainNetDefPtr iface) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0a609df..3b8ac54 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -880,6 +880,7 @@ struct _virDomainActualNetDef { union { struct { char *brname; + int promiscLinks; } bridge; struct { char *linkdev; @@ -2533,6 +2534,7 @@ int virDomainGraphicsListenSetNetwork(virDomainGraphicsDefPtr def, int virDomainNetGetActualType(virDomainNetDefPtr iface); const char *virDomainNetGetActualBridgeName(virDomainNetDefPtr iface); +int virDomainNetGetActualPromiscLinks(virDomainNetDefPtr iface); const char *virDomainNetGetActualDirectDev(virDomainNetDefPtr iface); int virDomainNetGetActualDirectMode(virDomainNetDefPtr iface); virDomainHostdevDefPtr virDomainNetGetActualHostdev(virDomainNetDefPtr iface); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6b6c51b..4730781 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -332,6 +332,7 @@ virDomainNetGetActualBridgeName; virDomainNetGetActualDirectDev; virDomainNetGetActualDirectMode; virDomainNetGetActualHostdev; +virDomainNetGetActualPromiscLinks; virDomainNetGetActualTrustGuestRxFilters; virDomainNetGetActualType; virDomainNetGetActualVirtPortProfile; diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 92efd7e..bc8da79 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -3771,7 +3771,7 @@ networkAllocateActualDevice(virDomainDefPtr dom, */ iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK; - /* we also store the bridge device + /* we also store the bridge device and promiscLinks settings * in iface->data.network.actual->data.bridge for later use * after the domain's tap device is created (to attach to the * bridge and set flood/learning mode on the tap device) @@ -3779,6 +3779,8 @@ networkAllocateActualDevice(virDomainDefPtr dom, if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, netdef->bridge) < 0) goto error; + iface->data.network.actual->data.bridge.promiscLinks + = netdef->promiscLinks; if (networkPlugBandwidth(network, iface) < 0) goto error; @@ -3794,6 +3796,8 @@ networkAllocateActualDevice(virDomainDefPtr dom, if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, netdef->bridge) < 0) goto error; + iface->data.network.actual->data.bridge.promiscLinks + = netdef->promiscLinks; /* merge virtualports from interface, network, and portgroup to * arrive at actual virtualport to use -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
At the time that the network driver allocates a connection to a network, the tap device that will be used hasn't yet been created - that will be done later by qemu (or lxc or whoever) - but if the network has promiscLinks='no', then when we do get around to creating the tap device, we will need to add an entry for it to the network bridge's fdb (forwarding database) *and* turn off learning and unicast_flood for that tap device in the bridge's sysfs settings. This means that qemu needs to know both the bridge name as well as the setting of promiscLinks, so we either need to create a new API to retrieve that info, or just pass it back in the ActualNetDef that is created during networkAllocateActualDevice. We choose the latter method, since it's already done for the bridge device, and it has the side effect of making the information available in domain status.
(NB: in the future, I think that the tap device should actually be created by networkAllocateActualDevice(), as that will solve several other problems, but that is a battle for another day, and this information will still be useful outside the network driver) --- src/conf/domain_conf.c | 29 +++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/network/bridge_driver.c | 6 +++++- 4 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 932bb1c..1317df4 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -6971,6 +6971,7 @@ virDomainActualNetDefParseXML(xmlNodePtr node, char *mode = NULL; char *addrtype = NULL; char *trustGuestRxFilters = NULL; + char *promiscLinks = NULL;
if (VIR_ALLOC(actual) < 0) return -1; @@ -7087,6 +7088,16 @@ virDomainActualNetDefParseXML(xmlNodePtr node, goto error; } actual->data.bridge.brname = brname; + promiscLinks = virXPathString("string(./source/@promiscLinks)", ctxt); + if (promiscLinks && + (actual->data.bridge.promiscLinks + = virTristateBoolTypeFromString(promiscLinks)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid promiscLinks setting '%s' " + "in domain interface's <actual> element"), + promiscLinks); + goto error; + } }
bandwidth_node = virXPathNode("./bandwidth", ctxt); @@ -7107,6 +7118,7 @@ virDomainActualNetDefParseXML(xmlNodePtr node, VIR_FREE(mode); VIR_FREE(addrtype); VIR_FREE(trustGuestRxFilters); + VIR_FREE(promiscLinks); virDomainActualNetDefFree(actual);
ctxt->node = save_ctxt; @@ -17045,12 +17057,18 @@ virDomainActualNetDefContentsFormat(virBufferPtr buf, } if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE || actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { + int promiscLinks = virDomainNetGetActualPromiscLinks(def); + /* actualType == NETWORK includes the name of the bridge * that is used by the network, whether we are * "inSubElement" or not. */ virBufferEscapeString(buf, " bridge='%s'", virDomainNetGetActualBridgeName(def)); + if (promiscLinks) { + virBufferAsprintf(buf, " promiscLinks='%s'", + virTristateSwitchTypeToString(promiscLinks)); + } } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) { const char *mode;
@@ -20647,6 +20665,17 @@ virDomainNetGetActualBridgeName(virDomainNetDefPtr iface) return NULL; }
+int +virDomainNetGetActualPromiscLinks(virDomainNetDefPtr iface) +{ + if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && + iface->data.network.actual && + (iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_BRIDGE || + iface->data.network.actual->type == VIR_DOMAIN_NET_TYPE_NETWORK)) + return iface->data.network.actual->data.bridge.promiscLinks; + return 0;
return VIR_TRISTATE_BOOL_YES; ?? of course that could change to _NO if you followed my comment in patch 3...
+} + const char * virDomainNetGetActualDirectDev(virDomainNetDefPtr iface) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0a609df..3b8ac54 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -880,6 +880,7 @@ struct _virDomainActualNetDef { union { struct { char *brname; + int promiscLinks;
/* enum virTristateBool */ Rest looks reasonable, ACK John
} bridge; struct { char *linkdev; @@ -2533,6 +2534,7 @@ int virDomainGraphicsListenSetNetwork(virDomainGraphicsDefPtr def,
int virDomainNetGetActualType(virDomainNetDefPtr iface); const char *virDomainNetGetActualBridgeName(virDomainNetDefPtr iface); +int virDomainNetGetActualPromiscLinks(virDomainNetDefPtr iface); const char *virDomainNetGetActualDirectDev(virDomainNetDefPtr iface); int virDomainNetGetActualDirectMode(virDomainNetDefPtr iface); virDomainHostdevDefPtr virDomainNetGetActualHostdev(virDomainNetDefPtr iface); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6b6c51b..4730781 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -332,6 +332,7 @@ virDomainNetGetActualBridgeName; virDomainNetGetActualDirectDev; virDomainNetGetActualDirectMode; virDomainNetGetActualHostdev; +virDomainNetGetActualPromiscLinks; virDomainNetGetActualTrustGuestRxFilters; virDomainNetGetActualType; virDomainNetGetActualVirtPortProfile; diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 92efd7e..bc8da79 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -3771,7 +3771,7 @@ networkAllocateActualDevice(virDomainDefPtr dom, */ iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK;
- /* we also store the bridge device + /* we also store the bridge device and promiscLinks settings * in iface->data.network.actual->data.bridge for later use * after the domain's tap device is created (to attach to the * bridge and set flood/learning mode on the tap device) @@ -3779,6 +3779,8 @@ networkAllocateActualDevice(virDomainDefPtr dom, if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, netdef->bridge) < 0) goto error; + iface->data.network.actual->data.bridge.promiscLinks + = netdef->promiscLinks;
if (networkPlugBandwidth(network, iface) < 0) goto error; @@ -3794,6 +3796,8 @@ networkAllocateActualDevice(virDomainDefPtr dom, if (VIR_STRDUP(iface->data.network.actual->data.bridge.brname, netdef->bridge) < 0) goto error; + iface->data.network.actual->data.bridge.promiscLinks + = netdef->promiscLinks;
/* merge virtualports from interface, network, and portgroup to * arrive at actual virtualport to use

When the bridge device for a network has promiscLinks='no', the intent is to setup the bridge to opportunistically turn off promiscuous mode and/or flood mode for as many ports/links on the bridge as possible, thus improving performance and security. The setup for the bridge itself is: 1) set the "vlan_filtering" property of the bridge device to 1. 2) If the bridge has a "Dummy" tap device used to set a fixed MAC address on the bridge, turn off learning and unicast_flood on this tap (this is needed even though this tap is never IFF_UP, because the kernel ignores the IFF_UP flag of other devices when using their settings to automatically decide whether or not to turn off promiscuous mode for any attached device). This is done both for libvirt-created/managed bridges, and for bridges that are created by the host system config. There is no attempt to turn vlan_filtering off when destroying the network because in the case of a libvirt-created bridge, the bridge is about to be destroyed anyway, and in the case of a system bridge, if the other devices attached to the bridge could operate properly before destroying libvirt's network object, they will continue to operate properly (this is similar to the way that libvirt will enable ip_forwarding whenever a routed/natted network is started, but will never attempt to disable it if they are stopped). --- src/network/bridge_driver.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index bc8da79..f2564c3 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -1893,6 +1893,28 @@ networkAddAddrToBridge(virNetworkObjPtr network, return 0; } + +static int +networkStartHandlePromiscLinks(virNetworkObjPtr network, const char *macTapIfName) +{ + const char *brname = network->def->bridge; + + /* promiscuous mode won't be turned off unless vlan filtering is enabled. */ + if (brname && + network->def->promiscLinks == VIR_TRISTATE_BOOL_NO) { + if (virNetDevBridgeSetVlanFiltering(brname, true) < 0) + return -1; + if (macTapIfName) { + if (virNetDevBridgePortSetLearning(brname, macTapIfName, false) < 0) + return -1; + if (virNetDevBridgePortSetUnicastFlood(brname, macTapIfName, false) < 0) + return -1; + } + } + return 0; +} + + /* add an IP (static) route to a bridge */ static int networkAddRouteToBridge(virNetworkObjPtr network, @@ -2019,6 +2041,9 @@ networkStartNetworkVirtual(virNetworkObjPtr network) goto err2; } + if (networkStartHandlePromiscLinks(network, macTapIfName) < 0) + goto err2; + /* Bring up the bridge interface */ if (virNetDevSetOnline(network->def->bridge, 1) < 0) goto err2; @@ -2163,6 +2188,27 @@ static int networkShutdownNetworkVirtual(virNetworkObjPtr network) } +static int +networkStartNetworkBridge(virNetworkObjPtr network) +{ + /* put anything here that needs to be done each time a network of + * type BRIDGE, is started. On failure, undo anything you've done, + * and return -1. On success return 0. + */ + return networkStartHandlePromiscLinks(network, NULL); +} + +static +int networkShutdownNetworkBridge(virNetworkObjPtr network ATTRIBUTE_UNUSED) +{ + /* put anything here that needs to be done each time a network of + * type BRIDGE is shutdown. On failure, undo anything you've done, + * and return -1. On success return 0. + */ + return 0; +} + + /* networkCreateInterfacePool: * @netdef: the original NetDef from the network * @@ -2326,6 +2372,10 @@ networkStartNetwork(virNetworkObjPtr network) break; case VIR_NETWORK_FORWARD_BRIDGE: + if (networkStartNetworkBridge(network) < 0) + goto cleanup; + break; + case VIR_NETWORK_FORWARD_PRIVATE: case VIR_NETWORK_FORWARD_VEPA: case VIR_NETWORK_FORWARD_PASSTHROUGH: @@ -2392,6 +2442,9 @@ static int networkShutdownNetwork(virNetworkObjPtr network) break; case VIR_NETWORK_FORWARD_BRIDGE: + ret = networkShutdownNetworkBridge(network); + break; + case VIR_NETWORK_FORWARD_PRIVATE: case VIR_NETWORK_FORWARD_VEPA: case VIR_NETWORK_FORWARD_PASSTHROUGH: -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
When the bridge device for a network has promiscLinks='no', the intent is to setup the bridge to opportunistically turn off promiscuous mode and/or flood mode for as many ports/links on the bridge as possible, thus improving performance and security. The setup for the bridge itself is:
1) set the "vlan_filtering" property of the bridge device to 1. 2) If the bridge has a "Dummy" tap device used to set a fixed MAC address on the bridge, turn off learning and unicast_flood on this tap (this is needed even though this tap is never IFF_UP, because the kernel ignores the IFF_UP flag of other devices when using their settings to automatically decide whether or not to turn off promiscuous mode for any attached device).
This is done both for libvirt-created/managed bridges, and for bridges that are created by the host system config.
There is no attempt to turn vlan_filtering off when destroying the network because in the case of a libvirt-created bridge, the bridge is about to be destroyed anyway, and in the case of a system bridge, if the other devices attached to the bridge could operate properly before destroying libvirt's network object, they will continue to operate properly (this is similar to the way that libvirt will enable ip_forwarding whenever a routed/natted network is started, but will never attempt to disable it if they are stopped). --- src/network/bridge_driver.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index bc8da79..f2564c3 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -1893,6 +1893,28 @@ networkAddAddrToBridge(virNetworkObjPtr network, return 0; }
+ +static int +networkStartHandlePromiscLinks(virNetworkObjPtr network, const char *macTapIfName) +{ + const char *brname = network->def->bridge; + + /* promiscuous mode won't be turned off unless vlan filtering is enabled. */ + if (brname && + network->def->promiscLinks == VIR_TRISTATE_BOOL_NO) { + if (virNetDevBridgeSetVlanFiltering(brname, true) < 0)
Could this return failure if we don't have the right kernel? Then do we really want to kill the whole startup processing? It almost seems as if the failure case here should be some kind of VIR_WARN or VIR_INFO, return 0 and do nothing. I guess what I'm most worried about is the "future" if the decision is to quietly change the default of 'promiscLinks' (or whatever name is used) and we get here from networkStartNetworkBridge which in a libvirtd restart has what effect on something that was running before? You would be correct in pointing out that for the current design and assumptions this is the right course of action. Someone set promisc=no and there was a failure, so we need to fail too.
+ return -1; + if (macTapIfName) { + if (virNetDevBridgePortSetLearning(brname, macTapIfName, false) < 0) + return -1; + if (virNetDevBridgePortSetUnicastFlood(brname, macTapIfName, false) < 0) + return -1;
Ah - although networkStartNetworkBridge says to restore things on failure, since we'll be deleting the bridge if either of these fail so it doesn't matter. The BOOL_NO logic could change if you took my suggestion from patch3 ACK, but this is ultimately where changing the default would be affected and perhaps we need to somehow note that. John
+ } + } + return 0; +} + + /* add an IP (static) route to a bridge */ static int networkAddRouteToBridge(virNetworkObjPtr network, @@ -2019,6 +2041,9 @@ networkStartNetworkVirtual(virNetworkObjPtr network) goto err2; }
+ if (networkStartHandlePromiscLinks(network, macTapIfName) < 0) + goto err2; + /* Bring up the bridge interface */ if (virNetDevSetOnline(network->def->bridge, 1) < 0) goto err2; @@ -2163,6 +2188,27 @@ static int networkShutdownNetworkVirtual(virNetworkObjPtr network) }
+static int +networkStartNetworkBridge(virNetworkObjPtr network) +{ + /* put anything here that needs to be done each time a network of + * type BRIDGE, is started. On failure, undo anything you've done, + * and return -1. On success return 0. + */ + return networkStartHandlePromiscLinks(network, NULL); +} + +static +int networkShutdownNetworkBridge(virNetworkObjPtr network ATTRIBUTE_UNUSED) +{ + /* put anything here that needs to be done each time a network of + * type BRIDGE is shutdown. On failure, undo anything you've done, + * and return -1. On success return 0. + */ + return 0; +} + + /* networkCreateInterfacePool: * @netdef: the original NetDef from the network * @@ -2326,6 +2372,10 @@ networkStartNetwork(virNetworkObjPtr network) break;
case VIR_NETWORK_FORWARD_BRIDGE: + if (networkStartNetworkBridge(network) < 0) + goto cleanup; + break; + case VIR_NETWORK_FORWARD_PRIVATE: case VIR_NETWORK_FORWARD_VEPA: case VIR_NETWORK_FORWARD_PASSTHROUGH: @@ -2392,6 +2442,9 @@ static int networkShutdownNetwork(virNetworkObjPtr network) break;
case VIR_NETWORK_FORWARD_BRIDGE: + ret = networkShutdownNetworkBridge(network); + break; + case VIR_NETWORK_FORWARD_PRIVATE: case VIR_NETWORK_FORWARD_VEPA: case VIR_NETWORK_FORWARD_PASSTHROUGH:

On 11/24/2014 06:22 PM, John Ferlan wrote:
On 11/24/2014 12:48 PM, Laine Stump wrote:
When the bridge device for a network has promiscLinks='no', the intent is to setup the bridge to opportunistically turn off promiscuous mode and/or flood mode for as many ports/links on the bridge as possible, thus improving performance and security. The setup for the bridge itself is:
1) set the "vlan_filtering" property of the bridge device to 1. 2) If the bridge has a "Dummy" tap device used to set a fixed MAC address on the bridge, turn off learning and unicast_flood on this tap (this is needed even though this tap is never IFF_UP, because the kernel ignores the IFF_UP flag of other devices when using their settings to automatically decide whether or not to turn off promiscuous mode for any attached device).
This is done both for libvirt-created/managed bridges, and for bridges that are created by the host system config.
There is no attempt to turn vlan_filtering off when destroying the network because in the case of a libvirt-created bridge, the bridge is about to be destroyed anyway, and in the case of a system bridge, if the other devices attached to the bridge could operate properly before destroying libvirt's network object, they will continue to operate properly (this is similar to the way that libvirt will enable ip_forwarding whenever a routed/natted network is started, but will never attempt to disable it if they are stopped). --- src/network/bridge_driver.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index bc8da79..f2564c3 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -1893,6 +1893,28 @@ networkAddAddrToBridge(virNetworkObjPtr network, return 0; }
+ +static int +networkStartHandlePromiscLinks(virNetworkObjPtr network, const char *macTapIfName) +{ + const char *brname = network->def->bridge; + + /* promiscuous mode won't be turned off unless vlan filtering is enabled. */ + if (brname && + network->def->promiscLinks == VIR_TRISTATE_BOOL_NO) { + if (virNetDevBridgeSetVlanFiltering(brname, true) < 0) Could this return failure if we don't have the right kernel? Then do we really want to kill the whole startup processing?
Yes, and yes. If they've requested it and we can't comply, then we need to fail.
It almost seems as if the failure case here should be some kind of VIR_WARN or VIR_INFO, return 0 and do nothing.
I guess what I'm most worried about is the "future" if the decision is to quietly change the default of 'promiscLinks' (or whatever name is used) and we get here from networkStartNetworkBridge which in a libvirtd restart has what effect on something that was running before?
If we ever do that, it will be done in a "quietly fall back" way. I don't want to build in too much "intelligence" that ends up never getting used (and in the meantime makes the code (more) confusing).
You would be correct in pointing out that for the current design and assumptions this is the right course of action. Someone set promisc=no and there was a failure, so we need to fail too.
+ return -1; + if (macTapIfName) { + if (virNetDevBridgePortSetLearning(brname, macTapIfName, false) < 0) + return -1; + if (virNetDevBridgePortSetUnicastFlood(brname, macTapIfName, false) < 0) + return -1; Ah - although networkStartNetworkBridge says to restore things on failure, since we'll be deleting the bridge if either of these fail so it doesn't matter.
Correct.
The BOOL_NO logic could change if you took my suggestion from patch3
ACK, but this is ultimately where changing the default would be affected and perhaps we need to somehow note that.

In order for the kernel to be able to promiscuous mode on as many ports of a bridge as possible, at most one attached device can have learning and unicast_flood enabled (in practice, this one device is always the physical device that connects the bridge to the real world). If more than one device has those settings enabled, the kernel cannot enable promiscuous mode. Since both settings default to enabled, we need to turn them both off (using virNetDevBridgeSetLearning() and virNetDevBridgeSetUnicastFlood()) for every tap device plugged into a bridge that has promiscLinks='no'. If there is only one device with learning/unicast_flood enabled, then that device has promiscuous mode disabled. If there are *no* devices with learning/unicast_flood enabled (e.g. for a libvirt "route", "nat", or isolated network that has no physical device attached), then all devices will have promiscuous mode disabled. Once we have disabled learning and flooding, any packet that has a destination MAC address not present in the forwarding database (fdb) will be dropped by the bridge, and libvirt is responsible for adding entries to the bridge fdb for the MAC address of each guest interface. That is done with virNetDevBridgeFDBAdd(). None of this has any effect for kernels prior to 3.15 (upstream kernel commit 2796d0c648c940b4796f84384fbcfb0a2399db84 "bridge: Automatically manage port promiscuous mode"). Even after that, until kernel 3.17 (upstream commit 5be5a2df40f005ea7fb7e280e87bbbcfcf1c2fc0 "bridge: Add filtering support for default_pvid") the following workaround is required in order for untagged traffic to pass (vlan tagged traffic will in any case currently not pass with promiscLinks='no') --- src/qemu/qemu_command.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbdef9c..d3d129a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -35,6 +35,7 @@ #include "virerror.h" #include "virfile.h" #include "virnetdev.h" +#include "virnetdevbridge.h" #include "virstring.h" #include "virtime.h" #include "viruuid.h" @@ -350,6 +351,21 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, virDomainAuditNetDevice(def, net, tunpath, false); goto cleanup; } + if (virDomainNetGetActualPromiscLinks(net) == VIR_TRISTATE_BOOL_NO) { + /* In order to make as many as possible of the links to a + * bridge promiscuous/no-flood, we need to turn off + * learning and unicast_flood, and add an fdb entry to the + * bridge for this new device. + */ + if (virNetDevBridgePortSetLearning(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgePortSetUnicastFlood(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgeFDBAdd(&net->mac, net->ifname, + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) + goto cleanup; + } } else { if (qemuCreateInBridgePortWithHelper(cfg, brname, &net->ifname, -- 1.9.3

On 11/24/2014 12:48 PM, Laine Stump wrote:
In order for the kernel to be able to promiscuous mode on as many ports of a bridge as possible, at most one attached device can have learning and unicast_flood enabled (in practice, this one device is always the physical device that connects the bridge to the real world). If more than one device has those settings enabled, the kernel cannot enable promiscuous mode. Since both settings default to enabled, we need to turn them both off (using virNetDevBridgeSetLearning() and virNetDevBridgeSetUnicastFlood()) for every tap device plugged into a bridge that has promiscLinks='no'.
If there is only one device with learning/unicast_flood enabled, then that device has promiscuous mode disabled. If there are *no* devices with learning/unicast_flood enabled (e.g. for a libvirt "route", "nat", or isolated network that has no physical device attached), then all devices will have promiscuous mode disabled.
Once we have disabled learning and flooding, any packet that has a destination MAC address not present in the forwarding database (fdb) will be dropped by the bridge, and libvirt is responsible for adding entries to the bridge fdb for the MAC address of each guest interface. That is done with virNetDevBridgeFDBAdd().
None of this has any effect for kernels prior to 3.15 (upstream kernel commit 2796d0c648c940b4796f84384fbcfb0a2399db84 "bridge: Automatically manage port promiscuous mode"). Even after that, until kernel 3.17 (upstream commit 5be5a2df40f005ea7fb7e280e87bbbcfcf1c2fc0 "bridge: Add filtering support for default_pvid") the following workaround is required in order for untagged traffic to pass (vlan tagged traffic will in any case currently not pass with promiscLinks='no') --- src/qemu/qemu_command.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbdef9c..d3d129a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -35,6 +35,7 @@ #include "virerror.h" #include "virfile.h" #include "virnetdev.h" +#include "virnetdevbridge.h" #include "virstring.h" #include "virtime.h" #include "viruuid.h" @@ -350,6 +351,21 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, virDomainAuditNetDevice(def, net, tunpath, false); goto cleanup; } + if (virDomainNetGetActualPromiscLinks(net) == VIR_TRISTATE_BOOL_NO) { + /* In order to make as many as possible of the links to a + * bridge promiscuous/no-flood, we need to turn off + * learning and unicast_flood, and add an fdb entry to the + * bridge for this new device. + */
In patch 6 we explicitly check vlan filtering being enabled - that's not done here, does it need to be?
+ if (virNetDevBridgePortSetLearning(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgePortSetUnicastFlood(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgeFDBAdd(&net->mac, net->ifname, + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) + goto cleanup;
I'd probably have similar "concerns" as patch 6 with issues that could occur if the default for promiscLinks changed and the error path logic. Although I see now that restart logic doesn't mean much (hey, it's late in the day). ACK to what's here though. Obviously if you changed the name from my patch 3 suggestion, then the BOOL_NO above would change as well. John
+ } } else { if (qemuCreateInBridgePortWithHelper(cfg, brname, &net->ifname,

On 11/24/2014 06:41 PM, John Ferlan wrote:
On 11/24/2014 12:48 PM, Laine Stump wrote:
In order for the kernel to be able to promiscuous mode on as many ports of a bridge as possible, at most one attached device can have learning and unicast_flood enabled (in practice, this one device is always the physical device that connects the bridge to the real world). If more than one device has those settings enabled, the kernel cannot enable promiscuous mode. Since both settings default to enabled, we need to turn them both off (using virNetDevBridgeSetLearning() and virNetDevBridgeSetUnicastFlood()) for every tap device plugged into a bridge that has promiscLinks='no'.
If there is only one device with learning/unicast_flood enabled, then that device has promiscuous mode disabled. If there are *no* devices with learning/unicast_flood enabled (e.g. for a libvirt "route", "nat", or isolated network that has no physical device attached), then all devices will have promiscuous mode disabled.
Once we have disabled learning and flooding, any packet that has a destination MAC address not present in the forwarding database (fdb) will be dropped by the bridge, and libvirt is responsible for adding entries to the bridge fdb for the MAC address of each guest interface. That is done with virNetDevBridgeFDBAdd().
None of this has any effect for kernels prior to 3.15 (upstream kernel commit 2796d0c648c940b4796f84384fbcfb0a2399db84 "bridge: Automatically manage port promiscuous mode"). Even after that, until kernel 3.17 (upstream commit 5be5a2df40f005ea7fb7e280e87bbbcfcf1c2fc0 "bridge: Add filtering support for default_pvid") the following workaround is required in order for untagged traffic to pass (vlan tagged traffic will in any case currently not pass with promiscLinks='no') --- src/qemu/qemu_command.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbdef9c..d3d129a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -35,6 +35,7 @@ #include "virerror.h" #include "virfile.h" #include "virnetdev.h" +#include "virnetdevbridge.h" #include "virstring.h" #include "virtime.h" #include "viruuid.h" @@ -350,6 +351,21 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, virDomainAuditNetDevice(def, net, tunpath, false); goto cleanup; } + if (virDomainNetGetActualPromiscLinks(net) == VIR_TRISTATE_BOOL_NO) { + /* In order to make as many as possible of the links to a + * bridge promiscuous/no-flood, we need to turn off + * learning and unicast_flood, and add an fdb entry to the + * bridge for this new device. + */ In patch 6 we explicitly check vlan filtering being enabled - that's not done here, does it need to be?
No. vlan_filtering is a property of the bridge, while "learning" and "unicast_flood" are properties of the ports of a bridge.
+ if (virNetDevBridgePortSetLearning(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgePortSetUnicastFlood(brname, net->ifname, false) < 0) + goto cleanup; + if (virNetDevBridgeFDBAdd(&net->mac, net->ifname, + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) + goto cleanup; I'd probably have similar "concerns" as patch 6 with issues that could occur if the default for promiscLinks changed and the error path logic. Although I see now that restart logic doesn't mean much (hey, it's late in the day).
ACK to what's here though. Obviously if you changed the name from my patch 3 suggestion, then the BOOL_NO above would change as well.

INFORMATIONAL ONLY - NO NEED TO REVIEW, WILL NOT BE PUSHED qemuNetworkIfaceConnect() used to have a special case for actualType='network' (a network with forward mode of route, nat, or isolated) to call the libvirt public API to retrieve the bridge being used by a network. That is no longer necessary - since all network types that use a bridge and tap device now get the bridge name stored in the ActualNetDef, we can just always use virDomainNetGetActualBridgeName() instead. (an audit of the two callers to qemuNetworkIfaceConnect() confirms that it is never called for any other type of network, so the dead code in the else statement (logging an internal error if it is called for any other type of network) is eliminated in the process.) --- src/qemu/qemu_command.c | 35 +++++------------------------ src/qemu/qemu_hotplug.c | 60 ++++++------------------------------------------- 2 files changed, 13 insertions(+), 82 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index d3d129a..57815fb 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -277,6 +277,11 @@ static int qemuCreateInBridgePortWithHelper(virQEMUDriverConfigPtr cfg, return *tapfd < 0 ? -1 : 0; } +/* qemuNetworkIfaceConnect - *only* called if actualType is + * VIR_DOMAIN_NET_TYPE_NETWORK or VIR_DOMAIN_NET_TYPE_BRIDGE (i.e. if + * the connection is made with a tap device connecting to a bridge + * device) + */ int qemuNetworkIfaceConnect(virDomainDefPtr def, virConnectPtr conn, @@ -290,42 +295,14 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, int ret = -1; unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP; bool template_ifname = false; - int actualType = virDomainNetGetActualType(net); virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); const char *tunpath = "/dev/net/tun"; if (net->backend.tap) tunpath = net->backend.tap; - if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { - bool fail = false; - virErrorPtr errobj; - virNetworkPtr network = virNetworkLookupByName(conn, - net->data.network.name); - if (!network) - return ret; - - if (!(brname = virNetworkGetBridgeName(network))) - fail = true; - - /* Make sure any above failure is preserved */ - errobj = virSaveLastError(); - virNetworkFree(network); - virSetError(errobj); - virFreeError(errobj); - - if (fail) - return ret; - - } else if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) { - if (VIR_STRDUP(brname, virDomainNetGetActualBridgeName(net)) < 0) - return ret; - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Network type %d is not supported"), - virDomainNetGetActualType(net)); + if (VIR_STRDUP(brname, virDomainNetGetActualBridgeName(net)) < 0) return ret; - } if (!net->ifname || STRPREFIX(net->ifname, VIR_NET_GENERATED_PREFIX) || diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index b00fd8f..b530199 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1746,64 +1746,20 @@ static virDomainNetDefPtr *qemuDomainFindNet(virDomainObjPtr vm, return NULL; } -static char * -qemuDomainNetGetBridgeName(virConnectPtr conn, virDomainNetDefPtr net) -{ - char *brname = NULL; - int actualType = virDomainNetGetActualType(net); - - if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) { - const char *tmpbr = virDomainNetGetActualBridgeName(net); - if (!tmpbr) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("interface is missing bridge name")); - goto cleanup; - } - /* we need a copy, not just a pointer to the original */ - if (VIR_STRDUP(brname, tmpbr) < 0) - goto cleanup; - } else if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { - virErrorPtr errobj; - virNetworkPtr network; - - if (!(network = virNetworkLookupByName(conn, net->data.network.name))) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Couldn't find network '%s'"), - net->data.network.name); - goto cleanup; - } - brname = virNetworkGetBridgeName(network); - - /* Make sure any above failure is preserved */ - errobj = virSaveLastError(); - virNetworkFree(network); - virSetError(errobj); - virFreeError(errobj); - - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Interface type %d has no bridge name"), - virDomainNetGetActualType(net)); - } - - cleanup: - return brname; -} static int -qemuDomainChangeNetBridge(virConnectPtr conn, - virDomainObjPtr vm, +qemuDomainChangeNetBridge(virDomainObjPtr vm, virDomainNetDefPtr olddev, virDomainNetDefPtr newdev) { int ret = -1; - char *oldbridge = NULL, *newbridge = NULL; + const char *oldbridge = virDomainNetGetActualBridgeName(olddev); + const char *newbridge = virDomainNetGetActualBridgeName(newdev); - if (!(oldbridge = qemuDomainNetGetBridgeName(conn, olddev))) - goto cleanup; - - if (!(newbridge = qemuDomainNetGetBridgeName(conn, newdev))) + if (!oldbridge || !newbridge) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing bridge name")); goto cleanup; + } VIR_DEBUG("Change bridge for interface %s: %s -> %s", olddev->ifname, oldbridge, newbridge); @@ -1842,8 +1798,6 @@ qemuDomainChangeNetBridge(virConnectPtr conn, /* caller will replace entire olddev with newdev in domain nets list */ ret = 0; cleanup: - VIR_FREE(oldbridge); - VIR_FREE(newbridge); return ret; } @@ -2237,7 +2191,7 @@ qemuDomainChangeNet(virQEMUDriverPtr driver, } if (needBridgeChange) { - if (qemuDomainChangeNetBridge(dom->conn, vm, olddev, newdev) < 0) + if (qemuDomainChangeNetBridge(vm, olddev, newdev) < 0) goto cleanup; /* we successfully switched to the new bridge, and we've * determined that the rest of newdev is equivalent to olddev, -- 1.9.3

INFORMATIONAL ONLY - NO NEED TO REVIEW, WILL NOT BE PUSHED qemuNetworkIfaceConnect() used to have a special case for actualType='network' (a network with forward mode of route, nat, or isolated) to call the libvirt public API to retrieve the bridge being used by a network. That is no longer necessary - since all network types that use a bridge and tap device now get the bridge name stored in the ActualNetDef, we can just always use virDomainNetGetActualBridgeName() instead. --- src/lxc/lxc_driver.c | 32 ++------------------------------ src/lxc/lxc_process.c | 32 +------------------------------- 2 files changed, 3 insertions(+), 61 deletions(-) diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 93db1ee..51c664f 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -4161,7 +4161,8 @@ lxcDomainAttachDeviceNetLive(virConnectPtr conn, actualType = virDomainNetGetActualType(net); switch (actualType) { - case VIR_DOMAIN_NET_TYPE_BRIDGE: { + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: { const char *brname = virDomainNetGetActualBridgeName(net); if (!brname) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -4174,35 +4175,6 @@ lxcDomainAttachDeviceNetLive(virConnectPtr conn, brname))) goto cleanup; } break; - case VIR_DOMAIN_NET_TYPE_NETWORK: { - virNetworkPtr network; - char *brname = NULL; - bool fail = false; - virErrorPtr errobj; - - if (!(network = virNetworkLookupByName(conn, net->data.network.name))) - goto cleanup; - if (!(brname = virNetworkGetBridgeName(network))) - fail = true; - - /* Make sure any above failure is preserved */ - errobj = virSaveLastError(); - virNetworkFree(network); - virSetError(errobj); - virFreeError(errobj); - - if (fail) - goto cleanup; - - if (!(veth = virLXCProcessSetupInterfaceBridged(conn, - vm->def, - net, - brname))) { - VIR_FREE(brname); - goto cleanup; - } - VIR_FREE(brname); - } break; case VIR_DOMAIN_NET_TYPE_DIRECT: { if (!(veth = virLXCProcessSetupInterfaceDirect(conn, vm->def, diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 9208f02..632fc17 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -382,37 +382,7 @@ static int virLXCProcessSetupInterfaces(virConnectPtr conn, type = virDomainNetGetActualType(net); switch (type) { - case VIR_DOMAIN_NET_TYPE_NETWORK: { - virNetworkPtr network; - char *brname = NULL; - bool fail = false; - virErrorPtr errobj; - - if (!(network = virNetworkLookupByName(conn, - net->data.network.name))) - goto cleanup; - if (!(brname = virNetworkGetBridgeName(network))) - fail = true; - - /* Make sure any above failure is preserved */ - errobj = virSaveLastError(); - virNetworkFree(network); - virSetError(errobj); - virFreeError(errobj); - - if (fail) - goto cleanup; - - if (!(veth = virLXCProcessSetupInterfaceBridged(conn, - def, - net, - brname))) { - VIR_FREE(brname); - goto cleanup; - } - VIR_FREE(brname); - break; - } + case VIR_DOMAIN_NET_TYPE_NETWORK: case VIR_DOMAIN_NET_TYPE_BRIDGE: { const char *brname = virDomainNetGetActualBridgeName(net); if (!brname) { -- 1.9.3
participants (3)
-
Eric Blake
-
John Ferlan
-
Laine Stump