This is a fix for commit db488c79173b240459c7754f38c3c6af9b432970
dnsmasq main process exits without waiting for DAD, this is dnsmasq
daemon's task. So we periodically poll the kernel using netlink and
check whether there are any IPv6 addresses assigned to bridge
which have 'tentative' state. After DAD is finished, execution continues.
I guess that is what dnsmasq was assumed to do.
Since the comments in our code imply that dnsmasq should be waiting for
DAD to complete prior to daemonizing, before pushing a fix like this I'd
like to find out from the dnsmasq folks if we are erroneously relying on
nonexistent dnsmasq behavior, or if maybe there is a bug in some version
of dnsmasq.
Simon (or other dnsmasq people) - when dnsmasq is run with "enable-ra",
does it make sure it completes DAD prior to daemonizing? Or does libvirt
need to do this extra polling to assure that DAD has completed? (or
maybe there's some other config parameter we need to add?)
---
Difference to v2: Moved to virnetdev.
src/libvirt_private.syms | 1 +
src/network/bridge_driver.c | 35 +++++++++-
src/util/virnetdev.c | 160 ++++++++++++++++++++++++++++++++++++++++++++
src/util/virnetdev.h | 2 +
4 files changed, 197 insertions(+), 1 deletion(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 0517c24..fa9e1c1 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1776,6 +1776,7 @@ virNetDevSetRcvMulti;
virNetDevSetupControl;
virNetDevSysfsFile;
virNetDevValidateConfig;
+virNetDevWaitDadFinish;
# util/virnetdevbandwidth.h
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
index 3d6721b..2172a3d 100644
--- a/src/network/bridge_driver.c
+++ b/src/network/bridge_driver.c
@@ -2026,6 +2026,33 @@ networkAddRouteToBridge(virNetworkObjPtr network,
}
static int
+networkWaitDadFinish(virNetworkObjPtr network)
+{
+ virNetworkIpDefPtr ipdef;
+ virSocketAddrPtr *addrs = NULL;
+ size_t i;
+ int ret;
+ for (i = 0;
+ (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET6, i));
+ i++) {}
+
+ if (i == 0)
+ return 0;
+ if (VIR_ALLOC_N(addrs, i))
+ return -1;
+
+ for (i = 0;
+ (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET6, i));
+ i++) {
+ addrs[i] = &ipdef->address;
+ }
+
+ ret = virNetDevWaitDadFinish(addrs, i);
+ VIR_FREE(addrs);
+ return ret;
+}
+
+static int
networkStartNetworkVirtual(virNetworkDriverStatePtr driver,
virNetworkObjPtr network)
{
@@ -2159,7 +2186,13 @@ networkStartNetworkVirtual(virNetworkDriverStatePtr driver,
if (v6present && networkStartRadvd(driver, network) < 0)
goto err4;
- /* DAD has happened (dnsmasq waits for it), dnsmasq is now bound to the
+ /* dnsmasq main process does not wait for DAD to complete,
+ * so we need to wait for it ourselves.
+ */
+ if (v6present && networkWaitDadFinish(network) < 0)
+ goto err4;
+
+ /* DAD has happened, dnsmasq is now bound to the
* bridge's IPv6 address, so we can now set the dummy tun down.
*/
if (tapfd >= 0) {
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 1e20789..c81342a 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -96,6 +96,7 @@ VIR_LOG_INIT("util.netdev");
# define FEATURE_BIT_IS_SET(blocks, index, field) \
(FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
#endif
+# define IP_BUF_SIZE 4096
typedef enum {
VIR_MCAST_TYPE_INDEX_TOKEN,
@@ -1219,6 +1220,103 @@ int virNetDevClearIPAddress(const char *ifname,
return ret;
}
+/* return whether there is a known address with 'tentative' flag set */
+static int
+virNetDevParseDadStatus(struct nlmsghdr *nlh, int len,
+ virSocketAddrPtr *addrs, size_t count)
+{
+ struct ifaddrmsg *ifaddrmsg_ptr;
+ unsigned int ifaddrmsg_len;
+ struct rtattr *rtattr_ptr;
+ size_t i;
+ struct in6_addr *addr;
+ for (; NLMSG_OK(nlh, len); nlh = NLMSG_NEXT(nlh, len)) {
+ if (NLMSG_PAYLOAD(nlh, 0) < sizeof(struct ifaddrmsg)) {
+ /* Message without payload is the last one. */
+ break;
+ }
+
+ ifaddrmsg_ptr = (struct ifaddrmsg *)NLMSG_DATA(nlh);
+ if (!(ifaddrmsg_ptr->ifa_flags & IFA_F_TENTATIVE)) {
+ /* Not tentative: we are not interested in this entry. */
+ continue;
+ }
+
+ ifaddrmsg_len = IFA_PAYLOAD(nlh);
+ rtattr_ptr = (struct rtattr *) IFA_RTA(ifaddrmsg_ptr);
+ for (; RTA_OK(rtattr_ptr, ifaddrmsg_len);
+ rtattr_ptr = RTA_NEXT(rtattr_ptr, ifaddrmsg_len)) {
+ if (RTA_PAYLOAD(rtattr_ptr) != sizeof(struct in6_addr)) {
+ /* No address: ignore. */
+ continue;
+ }
+
+ /* We check only known addresses. */
+ for (i = 0; i < count; i++) {
+ addr = &addrs[i]->data.inet6.sin6_addr;
+ if (!memcmp(addr, RTA_DATA(rtattr_ptr),
+ sizeof(struct in6_addr))) {
+ /* We found matching tentative address. */
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/* return after DAD finishes for all known IPv6 addresses or an error */
+int
+virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
+{
+ struct nl_msg *nlmsg = NULL;
+ struct ifaddrmsg ifa;
+ struct nlmsghdr *resp = NULL;
+ unsigned int recvbuflen;
+ int ret = -1, dad = 1;
+
+ if (!(nlmsg = nlmsg_alloc_simple(RTM_GETADDR,
+ NLM_F_REQUEST | NLM_F_DUMP))) {
+ virReportOOMError();
+ return -1;
+ }
+
+ memset(&ifa, 0, sizeof(ifa));
+ /* DAD is for IPv6 adresses only. */
+ ifa.ifa_family = AF_INET6;
+ if (nlmsg_append(nlmsg, &ifa, sizeof(ifa), NLMSG_ALIGNTO) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("allocated netlink buffer is too small"));
+ goto cleanup;
+ }
+
+ /* Periodically query netlink until DAD finishes on all known addresses. */
+ while (dad) {
+ if (virNetlinkCommand(nlmsg, &resp, &recvbuflen, 0, 0,
+ NETLINK_ROUTE, 0) < 0)
+ goto cleanup;
+
+ if (virNetlinkGetErrorCode(resp, recvbuflen) < 0) {
+ virReportError(VIR_ERR_SYSTEM_ERROR, "%s",
+ _("error reading DAD state information"));
+ goto cleanup;
+ }
+
+ /* Parse response. */
+ dad = virNetDevParseDadStatus(resp, recvbuflen, addrs, count);
+ if (dad)
+ usleep(1000 * 10);
+
+ VIR_FREE(resp);
+ }
+ ret = 0;
+
+ cleanup:
+ VIR_FREE(resp);
+ nlmsg_free(nlmsg);
+ return ret;
+}
+
#else /* defined(__linux__) && defined(HAVE_LIBNL) */
int virNetDevSetIPAddress(const char *ifname,
@@ -1338,6 +1436,68 @@ int virNetDevClearIPAddress(const char *ifname,
return ret;
}
+/* return whether there is a known address with 'tentative' flag set */
+static int
+virNetDevParseDadStatus(char *outbuf,
+ virSocketAddrPtr *addrs, size_t count)
+{
+ virSocketAddr sockaddr;
+ size_t i, j;
+ int ret = 0;
+ char **addr_strings = virStringSplit(outbuf, "\n", 0);
+ for (j = 0; addr_strings[j] != NULL; ++j) {
+ if (virSocketParseAddr(strings[j], &sockaddr, AF_INET6) < 0)
+ continue;
+ for (i = 0; i < count; i++) {
+ if (virSocketAddrEqual(addrs[i], &sockaddr)) {
+ ret = 1;
+ goto cleanup;
+ }
+ }
+ }
+
+ cleanup:
+ virStringFreeList(addr_strings);
+ return ret;
+}
+
+/* return after DAD finishes for all known IPv6 addresses or an error */
+int
+virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
+{
+ int ret = -1, dad = 1;
+ char *outbuf = NULL;
+ virCommandPtr cmd = NULL;
+
+# ifdef IP_PATH
+ while (dad) {
+ cmd = virCommandNew(POSIX_SHELL);
+ virCommandAddArgList(cmd, "-c",
+ IP_PATH "-6 addr show tentative | \
+ awk '/inet6/{split(\\$2, a, \\\"/\\\"); \
+ print a[1]}'", NULL);
+ virCommandSetOutputBuffer(cmd, &outbuf);
+ if (virCommandRun(cmd, NULL))
+ goto cleanup;
+ virCommandFree(cmd);
+
+ dad = virNetDevParseDadStatus(outbuf, addrs, count);
+ if (dad)
+ usleep(1000 * 10);
+
+ VIR_FREE(outbuf);
+ }
+ ret = 0;
+# else
+ virReportSystemError(ENOSYS, "%s", _("Unable to check IPv6
DAD"));
+# endif
+
+ cleanup:
+ virCommandFree(cmd);
+ VIR_FREE(outbuf);
+ return ret;
+}
+
#endif /* defined(__linux__) && defined(HAVE_LIBNL) */
/**
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index fff881c..a09b3b2 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -105,6 +105,8 @@ int virNetDevClearIPAddress(const char *ifname,
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
int virNetDevGetIPAddress(const char *ifname, virSocketAddrPtr addr)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+int virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
int virNetDevSetMAC(const char *ifname,
--
Sincerely,
Maxim Perevedentsev
--
libvir-list mailing list
libvir-list(a)redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list