Adding functionality to libvirt that will allow it query the interface
for the availability of switchdev Offloading NIC capabilities
---
configure.ac | 13 ++
docs/formatnode.html.in | 1 +
src/util/virnetdev.c | 204 +++++++++++++++++++++-
src/util/virnetdev.h | 1 +
tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml | 1 +
tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml | 1 +
6 files changed, 220 insertions(+), 1 deletion(-)
diff --git a/configure.ac b/configure.ac
index afacf40..a050b99 100644
--- a/configure.ac
+++ b/configure.ac
@@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then
AC_CHECK_HEADERS([linux/btrfs.h])
fi
+dnl
+dnl check for kernel headers required by devlink
+dnl
+if test "$with_linux" = "yes"; then
+ AC_CHECK_HEADERS([linux/devlink.h])
+ AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX,
DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME,
DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV],
+ [AC_DEFINE([HAVE_DECL_DEVLINK],
+ [1],
+ [whether devlink declarations is available])],
+ [],
+ [[#include <linux/devlink.h>]])
+fi
+
dnl Allow perl/python overrides
AC_PATH_PROGS([PYTHON], [python2 python])
if test -z "$PYTHON"; then
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
index 32451d5..e7b30ea 100644
--- a/docs/formatnode.html.in
+++ b/docs/formatnode.html.in
@@ -227,6 +227,7 @@
<dt><code>rxhash</code></dt><dd>receive-hashing</dd>
<dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
<dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
+
<dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd>
</dl>
</dd>
<dt><code>capability</code></dt>
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 90b7bee..084fb41 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -59,6 +59,10 @@
# include <net/if_dl.h>
#endif
+#if HAVE_DECL_DEVLINK
+# include <linux/devlink.h>
+#endif
+
#ifndef IFNAMSIZ
# define IFNAMSIZ 16
#endif
@@ -95,6 +99,7 @@ VIR_LOG_INIT("util.netdev");
(FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
#endif
+
typedef enum {
VIR_MCAST_TYPE_INDEX_TOKEN,
VIR_MCAST_TYPE_NAME_TOKEN,
@@ -2396,7 +2401,8 @@ VIR_ENUM_IMPL(virNetDevFeature,
"ntuple",
"rxhash",
"rdma",
- "txudptnl")
+ "txudptnl",
+ "switchdev")
#ifdef __linux__
int
@@ -2851,6 +2857,199 @@ int virNetDevGetRxFilter(const char *ifname,
return ret;
}
+
+
+#if HAVE_DECL_DEVLINK
+/**
+ * virNetDevPutExtraHeader
+ * reserve and prepare room for an extra header
+ * This function sets to zero the room that is required to put the extra
+ * header after the initial Netlink header. This function also increases
+ * the nlmsg_len field. You have to invoke mnl_nlmsg_put_header() before
+ * you call this function. This function returns a pointer to the extra
+ * header.
+ *
+ * @nlh: pointer to Netlink header
+ * @size: size of the extra header that we want to put
+ *
+ * Returns pointer to the start of the extended header
+ */
+static void *
+virNetDevPutExtraHeader(struct nlmsghdr *nlh, size_t size)
+{
+ char *ptr = (char *)nlh + nlh->nlmsg_len;
+ size_t len = NLMSG_ALIGN(size);
+ nlh->nlmsg_len += len;
+ memset(ptr, 0, len);
+ return ptr;
+}
+
+
+/**
+ * virNetDevGetFamilyId:
+ * This function supplies the devlink family id
+ *
+ * @family_name: the name of the family to query
+ *
+ * Returns family id or 0 on failure.
+ */
+static int
+virNetDevGetFamilyId(const char *family_name)
+{
+ struct nl_msg *nl_msg = NULL;
+ struct nlmsghdr *resp = NULL;
+ struct genlmsghdr* gmsgh = NULL;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, };
+ unsigned int recvbuflen;
+ uint32_t family_id = 0;
+
+ if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL,
+ NLM_F_REQUEST | NLM_F_ACK))) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+ if (!gmsgh)
+ goto cleanup;
+
+ gmsgh->cmd = CTRL_CMD_GETFAMILY;
+ gmsgh->version = DEVLINK_GENL_VERSION;
+
+ if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0)
+ goto buffer_too_small;
+
+ if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0)
< 0)
+ goto cleanup;
+
+ if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0)
+ goto malformed_resp;
+
+ if (tb[CTRL_ATTR_FAMILY_ID] == NULL)
+ goto cleanup;
+
+ family_id = *(int *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]);
+
+ cleanup:
+ nlmsg_free(nl_msg);
+ VIR_FREE(resp);
+ return family_id;
+
+ malformed_resp:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("malformed netlink response message"));
+ goto cleanup;
+
+ buffer_too_small:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("allocated netlink buffer is too small"));
+ goto cleanup;
+}
+
+
+/**
+ * virNetDevSwitchdevFeature
+ * This function checks for the availability of Switchdev feature
+ * and add it to bitmap
+ *
+ * @ifname: name of the interface
+ * @out: add Switchdev feature if exist to bitmap
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+ virBitmapPtr *out)
+{
+ struct nl_msg *nl_msg = NULL;
+ struct nlmsghdr *resp = NULL;
+ unsigned int recvbuflen;
+ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, };
+ virPCIDevicePtr pci_device_ptr = NULL;
+ struct genlmsghdr* gmsgh = NULL;
+ const char *pci_name;
+ char *pfname = NULL;
+ int func_ret_val = -1;
+ int ret = -1;
+
+ int family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME);
+ if (family_id == 0)
+ goto cleanup;
+
+ func_ret_val = virNetDevIsVirtualFunction(ifname);
+ if (func_ret_val == 1) {
+ if (virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
+ goto cleanup;
+ }
+ else if (func_ret_val < 0)
+ goto cleanup;
+
+ if (!(nl_msg = nlmsg_alloc_simple(family_id,
+ NLM_F_REQUEST | NLM_F_ACK))) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) :
+ virNetDevGetPCIDevice(ifname);
+ if (!pci_device_ptr)
+ goto cleanup;
+
+ pci_name = virPCIDeviceGetName(pci_device_ptr);
+
+ gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+ if (!gmsgh)
+ goto cleanup;
+
+ gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET;
+ gmsgh->version = DEVLINK_GENL_VERSION;
+
+ if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1,
"pci") < 0)
+ goto buffer_too_small;
+
+ if (nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0)
+ goto buffer_too_small;
+
+ if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
+ NETLINK_GENERIC, 0) < 0)
+ goto cleanup;
+
+ if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0)
+ goto malformed_resp;
+
+ if (tb[DEVLINK_ATTR_ESWITCH_MODE] &&
+ *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) ==
DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+ ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV));
+ }
+
+ ret = 0;
+
+ cleanup:
+ nlmsg_free(nl_msg);
+ virPCIDeviceFree(pci_device_ptr);
+ VIR_FREE(resp);
+ return ret;
+
+ malformed_resp:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("malformed netlink response message"));
+ goto cleanup;
+
+ buffer_too_small:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("allocated netlink buffer is too small"));
+ goto cleanup;
+}
+# else
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+ virBitmapPtr *out)
+{
+ return 0;
+}
+# endif
+
+
#if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
/**
@@ -3230,6 +3429,9 @@ virNetDevGetFeatures(const char *ifname,
if (virNetDevRDMAFeature(ifname, out) < 0)
goto cleanup;
+ if (virNetDevSwitchdevFeature(ifname, out) < 0)
+ goto cleanup;
+
ret = 0;
cleanup:
VIR_FORCE_CLOSE(fd);
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 2e9a9c4..8fd6036 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -112,6 +112,7 @@ typedef enum {
VIR_NET_DEV_FEAT_RXHASH,
VIR_NET_DEV_FEAT_RDMA,
VIR_NET_DEV_FEAT_TXUDPTNL,
+ VIR_NET_DEV_FEAT_SWITCHDEV,
VIR_NET_DEV_FEAT_LAST
} virNetDevFeature;
diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
index d4c96e8..88252e6 100644
--- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
+++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
@@ -15,6 +15,7 @@
<feature name='rxhash'/>
<feature name='rdma'/>
<feature name='txudptnl'/>
+ <feature name='switchdev'/>
<capability type='80211'/>
</capability>
</device>
diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
index 71bf90e..f77dfcc 100644
--- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
+++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
@@ -15,6 +15,7 @@
<feature name='rxhash'/>
<feature name='rdma'/>
<feature name='txudptnl'/>
+ <feature name='switchdev'/>
<capability type='80203'/>
</capability>
</device>
--
2.1.4