In this patch I am adding functions that help to iteratively determine
the root physical interface of a given interface. An example would be
that a macvtap device is linked to eth0.100 which in turn is linked to
eth0. Given the name or interface index of the macvtap device that is
linked to eth0.100, eth0 is found by following the links to the end. I
am using now the netlink library to parse the returned netlink messages
and for that I am making additions to configure.ac and the rpm spec file
to check for the netlink and netlink-devel packages respectively. In the
configure.ac the requirement to have the netlink library is dependent on
having macvtap.
The setup of the upcoming VEPA patches requires knowledge over which
interface to run the setup protocol. In the above case the protocol
would need to run over interface eth0 and provide the knowledge of vlan
id 100 in the protocol (see previous patch).
Changes from V1 to V2:
- replaced the constant '256' representing the space for a netlink
message with a constant
- replaced the constant '64' representing the space for a rtattr
structure with a constant
- fixed the spacings that weren't correct
- fixed M4 quoting
Signed-off-by: Stefan Berger <stefanb(a)us.ibm.com>
---
configure.ac | 24 ++++++
libvirt.spec.in | 14 +++
src/Makefile.am | 4 -
src/util/macvtap.c | 197
++++++++++++++++++++++++++++++++++++++++++++++++++---
4 files changed, 229 insertions(+), 10 deletions(-)
Index: libvirt-acl/src/util/macvtap.c
===================================================================
--- libvirt-acl.orig/src/util/macvtap.c
+++ libvirt-acl/src/util/macvtap.c
@@ -41,6 +41,9 @@
# include <linux/rtnetlink.h>
# include <linux/if_tun.h>
+# include <netlink/attr.h>
+# include <netlink/msg.h>
+
# include "util.h"
# include "memory.h"
# include "macvtap.h"
@@ -57,6 +60,9 @@
# define MACVTAP_NAME_PREFIX "macvtap"
# define MACVTAP_NAME_PATTERN "macvtap%d"
+#define MAX_NL_MESSAGE_SIZE 256
+#define MAX_RTATTR_SIZE 64
+
static int nlOpen(void)
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
@@ -202,10 +208,10 @@ link_add(const char *type,
int *retry)
{
int rc = 0;
- char nlmsgbuf[256];
+ char nlmsgbuf[MAX_NL_MESSAGE_SIZE] = { 0, };
struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
struct nlmsgerr *err;
- char rtattbuf[64];
+ char rtattbuf[MAX_RTATTR_SIZE];
struct rtattr *rta, *rta1, *li;
struct ifinfomsg i = { .ifi_family = AF_UNSPEC };
int ifindex;
@@ -217,8 +223,6 @@ link_add(const char *type,
*retry = 0;
- memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
-
nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWLINK);
if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i)))
@@ -347,17 +351,15 @@ static int
link_del(const char *name)
{
int rc = 0;
- char nlmsgbuf[256];
+ char nlmsgbuf[MAX_NL_MESSAGE_SIZE] = { 0, };
struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
struct nlmsgerr *err;
- char rtattbuf[64];
+ char rtattbuf[MAX_RTATTR_SIZE];
struct rtattr *rta;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
char *recvbuf = NULL;
int recvbuflen;
- memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
-
nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL,
RTM_DELLINK);
if (!nlAppend(nlm, sizeof(nlmsgbuf), &ifinfo, sizeof(ifinfo)))
@@ -421,6 +423,185 @@ buffer_too_small:
}
+static struct nla_policy ifla_policy[IFLA_MAX + 1] =
+{
+ [IFLA_IFNAME] = { .type = NLA_STRING },
+ [IFLA_LINK] = { .type = NLA_U32 },
+};
+
+
+static int
+link_dump(int ifindex, const char *ifname, struct nlattr **tb,
+ char **recvbuf)
+{
+ int rc = 0;
+ char nlmsgbuf[MAX_NL_MESSAGE_SIZE] = { 0, };
+ struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+ struct nlmsgerr *err;
+ char rtattbuf[MAX_RTATTR_SIZE];
+ struct rtattr *rta;
+ struct ifinfomsg i = {
+ .ifi_family = AF_UNSPEC,
+ .ifi_index = ifindex
+ };
+ int recvbuflen;
+
+ *recvbuf = NULL;
+
+ nlInit(nlm, NLM_F_REQUEST, RTM_GETLINK);
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i)))
+ goto buffer_too_small;
+
+ if (ifindex < 0 && ifname != NULL) {
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME,
+ ifname, strlen(ifname) + 1);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+ }
+
+ if (nlComm(nlm, recvbuf, &recvbuflen) < 0)
+ return -1;
+
+ if (recvbuflen < NLMSG_LENGTH(0) || *recvbuf == NULL)
+ goto malformed_resp;
+
+ resp = (struct nlmsghdr *)*recvbuf;
+
+ switch (resp->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(resp);
+ if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ goto malformed_resp;
+
+ switch (-err->error) {
+ case 0:
+ break;
+
+ default:
+ virReportSystemError(-err->error,
+ _("error dumping %d interface"),
+ ifindex);
+ rc = -1;
+ }
+ break;
+
+ case GENL_ID_CTRL:
+ case NLMSG_DONE:
+ if (nlmsg_parse(resp, sizeof(struct ifinfomsg),
+ tb, IFLA_MAX, ifla_policy)) {
+ goto malformed_resp;
+ }
+ break;
+
+ default:
+ goto malformed_resp;
+ }
+
+ if (rc != 0)
+ VIR_FREE(*recvbuf);
+
+ return rc;
+
+malformed_resp:
+ macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("malformed netlink response message"));
+ VIR_FREE(*recvbuf);
+ return -1;
+
+buffer_too_small:
+ macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("internal buffer is too small"));
+ return -1;
+}
+
+
+/* TODO: move this into interface.c after moving netlink functions into
+ * utils dir
+ */
+/**
+ * ifaceGetNthParent
+ *
+ * @ifindex : the index of the interface or -1 if ifname is given
+ * @ifname : the name of the interface; ignored if ifindex is valid
+ * @nthParent : the nth parent interface to get
+ * @rootifname : pointer to buffer of size IFNAMSIZ
+ * @nth : the nth parent that is actually returned; if for example
eth0.100
+ * was given and the 100th parent is to be returned, then eth0
will
+ * most likely be returned with nth set to 1 since the chain
does
+ * not have more interfaces
+ *
+ * Get the nth parent interface of the given interface. 0 is the
interface
+ * itself.
+ *
+ * Return 0 on success, != 0 otherwise
+ */
+static int
+ifaceGetNthParent(int ifindex, const char *ifname, unsigned int
nthParent,
+ char *rootifname, unsigned int *nth)
+{
+ int rc;
+ struct nlattr *tb[IFLA_MAX + 1];
+ char *recvbuf = NULL;
+ bool end = false;
+ unsigned int i = 0;
+
+ while (!end && i <= nthParent) {
+ rc = link_dump(ifindex, ifname, tb, &recvbuf);
+ if (rc)
+ break;
+
+ if (tb[IFLA_IFNAME]) {
+ if (!virStrcpy(rootifname,
(char*)RTA_DATA(tb[IFLA_IFNAME]),
+ IFNAMSIZ)) {
+ macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("buffer for root interface name is too
small"));
+ VIR_FREE(recvbuf);
+ return 1;
+ }
+ }
+
+ if (tb[IFLA_LINK]) {
+ ifindex = *(int *)RTA_DATA(tb[IFLA_LINK]);
+ ifname = NULL;
+ } else
+ end = true;
+
+ VIR_FREE(recvbuf);
+
+ i++;
+ }
+
+ if (nth)
+ *nth = i - 1;
+
+ return rc;
+}
+
+
+/**
+ * ifaceGetRootIface
+ *
+ * @ifindex : the index of the interface or -1 if ifname is given
+ * @ifname : the name of the interface; ignored if ifindex is valid
+ * @rootifname : pointer to buffer of size IFNAMSIZ
+ *
+ * Get the root interface of a given interface, i.e., if macvtap
+ * is linked to eth0.100, it will return eth0.
+ *
+ * Return 0 on success, != 0 otherwise
+ */
+static int
+ifaceGetRootIface(int ifindex, const char *ifname,
+ char *rootifname)
+{
+ return ifaceGetNthParent(ifindex, ifname, ~0, rootifname, NULL);
+}
+
+
/* Open the macvtap's tap device.
* @ifname: Name of the macvtap interface
* @retries : Number of retries in case udev for example may need to be
Index: libvirt-acl/configure.ac
===================================================================
--- libvirt-acl.orig/configure.ac
+++ libvirt-acl/configure.ac
@@ -42,6 +42,7 @@ HAL_REQUIRED=0.5.0
DEVMAPPER_REQUIRED=1.0.0
LIBCURL_REQUIRED="7.18.0"
LIBPCAP_REQUIRED="1.0.0"
+LIBNL_REQUIRED="1.1"
dnl Checks for C compiler.
AC_PROG_CC
@@ -2005,6 +2006,24 @@ fi
AM_CONDITIONAL([WITH_MACVTAP], [test "$with_macvtap" = "yes"])
+dnl netlink library
+
+LIBNL_CFLAGS=""
+LIBNL_LIBS=""
+
+if test "$with_macvtap" = "yes"; then
+ PKG_CHECK_MODULES([LIBNL], [libnl-1 >= $LIBNL_REQUIRED], [
+ ], [
+ AC_MSG_ERROR([libnl >= $LIBNL_REQUIRED is required for macvtap
support])
+ ])
+fi
+
+AC_SUBST([LIBNL_CFLAGS])
+AC_SUBST([LIBNL_LIBS])
+
+
+
+
# Only COPYING.LIB is under version control, yet COPYING
# is included as part of the distribution tarball.
# Copy one to the other, but only if this is a srcdir-build.
@@ -2183,6 +2202,11 @@ AC_MSG_NOTICE([ pcap: $LIBPCAP_CFLAGS
else
AC_MSG_NOTICE([ pcap: no])
fi
+if test "$with_macvtap" = "yes" ; then
+AC_MSG_NOTICE([ nl: $LIBNL_CFLAGS $LIBNL_LIBS])
+else
+AC_MSG_NOTICE([ nl: no])
+fi
AC_MSG_NOTICE([])
AC_MSG_NOTICE([Test suite])
AC_MSG_NOTICE([])
Index: libvirt-acl/libvirt.spec.in
===================================================================
--- libvirt-acl.orig/libvirt.spec.in
+++ libvirt-acl/libvirt.spec.in
@@ -63,6 +63,7 @@
%define with_yajl 0%{!?_without_yajl:0}
%define with_nwfilter 0%{!?_without_nwfilter:0}
%define with_libpcap 0%{!?_without_libpcap:0}
+%define with_macvtap 0%{!?_without_macvtap:0}
# Non-server/HV driver defaults which are always enabled
%define with_python 0%{!?_without_python:1}
@@ -153,6 +154,11 @@
%if %{with_qemu}
%define with_nwfilter 0%{!?_without_nwfilter:%{server_drivers}}
%define with_libpcap 0%{!?_without_libpcap:%{server_drivers}}
+%define with_macvtap 0%{!?_without_macvtap:%{server_drivers}}
+%endif
+
+%if %{with_macvtap}
+%define with_libnl 1
%endif
# Force QEMU to run as non-root
@@ -282,6 +288,9 @@ BuildRequires: yajl-devel
%if %{with_libpcap}
BuildRequires: libpcap-devel
%endif
+%if %{with_libnl}
+BuildRequires: libnl-devel
+%endif
%if %{with_avahi}
BuildRequires: avahi-devel
%endif
@@ -531,6 +540,10 @@ of recent versions of Linux (and other O
%define _without_libpcap --without-libpcap
%endif
+%if ! %{with_macvtap}
+%define _without_macvtap --without-macvtap
+%endif
+
%configure %{?_without_xen} \
%{?_without_qemu} \
%{?_without_openvz} \
@@ -560,6 +573,7 @@ of recent versions of Linux (and other O
%{?_without_udev} \
%{?_without_yajl} \
%{?_without_libpcap} \
+ %{?_without_macvtap} \
--with-qemu-user=%{qemu_user} \
--with-qemu-group=%{qemu_group} \
--with-init-script=redhat \
Index: libvirt-acl/src/Makefile.am
===================================================================
--- libvirt-acl.orig/src/Makefile.am
+++ libvirt-acl/src/Makefile.am
@@ -973,7 +973,7 @@ libvirt_la_LDFLAGS = $(VERSION_SCRIPT_FL
$(COVERAGE_CFLAGS:-f%=-Wc,-f%) \
$(CYGWIN_EXTRA_LDFLAGS) $(MINGW_EXTRA_LDFLAGS)
libvirt_la_LIBADD += $(LIBXML_LIBS) \
- $(LIBPCAP_LIBS) \
+ $(LIBPCAP_LIBS) $(LIBNL_LIBS) \
$(DRIVER_MODULE_LIBS) \
$(CYGWIN_EXTRA_LIBADD) ../gnulib/lib/libgnu.la
libvirt_la_CFLAGS = $(COVERAGE_CFLAGS) -DIN_LIBVIRT
@@ -1027,7 +1027,7 @@ libvirt_lxc_SOURCES = \
libvirt_lxc_LDFLAGS = $(WARN_CFLAGS) $(COVERAGE_LDCFLAGS)
libvirt_lxc_LDADD = $(CAPNG_LIBS) $(YAJL_LIBS) \
$(LIBXML_LIBS) $(NUMACTL_LIBS) $(LIB_PTHREAD) \
- ../gnulib/lib/libgnu.la
+ $(LIBNL_LIBS) ../gnulib/lib/libgnu.la
libvirt_lxc_CFLAGS = \
$(LIBPARTED_CFLAGS) \
$(NUMACTL_CFLAGS) \