Thank you for reviewing John,
I've changed the code according to your comments except I couldn't find an NLMSG_*
macro to replace the code.
I've addressed the rest of your questions below.
-----Original Message-----
From: John Ferlan [mailto:jferlan@redhat.com]
Sent: Wednesday, August 16, 2017 11:28 PM
To: Edan David <edand(a)mellanox.com>; libvir-list(a)redhat.com
Subject: Re: [libvirt] [PATCH] nodedev: add switchdev to NIC capabilities
On 08/02/2017 06:00 AM, Edan David wrote:
Adding functionality to libvirt that will allow it query the
interface
for the availability of switchdev Offloading NIC capabilities
Not my area of expertise, but I'll point out some "basic" stuff...
You really should have considered adding some of what you added to :
https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.re...
into your commit message to describe what you're doing and why
If only one patch, then the details/links or background can go after the following
"---" in order to help the reviewer get some extra details. If you had multiple
patches, then that'd be part of the cover letter
---
configure.ac | 13 ++
docs/formatnode.html.in | 1 +
src/util/virnetdev.c | 204 +++++++++++++++++++++-
src/util/virnetdev.h | 1 +
tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml | 1 +
tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml | 1 +
6 files changed, 220 insertions(+), 1 deletion(-)
Before sending patches, please be sure to run w/ "make check syntax-check" -
this one fails syntax-check in a few places:
preprocessor_indentation
cppi: src/util/virnetdev.c: line 3128: not properly indented
cppi: src/util/virnetdev.c: line 3135: not properly indented
maint.mk: incorrect preprocessor indentation
cfg.mk:684: recipe for target 'sc_preprocessor_indentation' failed
make: *** [sc_preprocessor_indentation] Error 1
diff --git a/configure.ac b/configure.ac index afacf40..a050b99
100644
--- a/configure.ac
+++ b/configure.ac
@@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then
AC_CHECK_HEADERS([linux/btrfs.h]) fi
+dnl
+dnl check for kernel headers required by devlink dnl if test
+"$with_linux" = "yes"; then
+ AC_CHECK_HEADERS([linux/devlink.h])
+ AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX,
DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME,
DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV],
+ [AC_DEFINE([HAVE_DECL_DEVLINK],
+ [1],
+ [whether devlink declarations is available])],
+ [],
+ [[#include <linux/devlink.h>]]) fi
+
dnl Allow perl/python overrides
AC_PATH_PROGS([PYTHON], [python2 python]) if test -z "$PYTHON"; then
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index
32451d5..e7b30ea 100644
--- a/docs/formatnode.html.in
+++ b/docs/formatnode.html.in
@@ -227,6 +227,7 @@
<dt><code>rxhash</code></dt><dd>receive-hashing</dd>
<dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
<dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
+
+
<dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd>
</dl>
</dd>
<dt><code>capability</code></dt> diff --git
a/src/util/virnetdev.c b/src/util/virnetdev.c index 90b7bee..084fb41
100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -59,6 +59,10 @@
# include <net/if_dl.h>
#endif
+#if HAVE_DECL_DEVLINK
+# include <linux/devlink.h>
+#endif
+
#ifndef IFNAMSIZ
# define IFNAMSIZ 16
#endif
@@ -95,6 +99,7 @@ VIR_LOG_INIT("util.netdev");
(FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
#endif
+
Spurious whitespace
typedef enum {
VIR_MCAST_TYPE_INDEX_TOKEN,
VIR_MCAST_TYPE_NAME_TOKEN,
@@ -2396,7 +2401,8 @@ VIR_ENUM_IMPL(virNetDevFeature,
"ntuple",
"rxhash",
"rdma",
- "txudptnl")
+ "txudptnl",
+ "switchdev")
#ifdef __linux__
int
@@ -2851,6 +2857,199 @@ int virNetDevGetRxFilter(const char *ifname,
return ret;
}
+
+
Only 2 lines between functions, not 3
+#if HAVE_DECL_DEVLINK
+/**
+ * virNetDevPutExtraHeader
+ * reserve and prepare room for an extra header
+ * This function sets to zero the room that is required to put the
+extra
+ * header after the initial Netlink header. This function also
+increases
+ * the nlmsg_len field. You have to invoke mnl_nlmsg_put_header()
+before
+ * you call this function. This function returns a pointer to the
+extra
+ * header.
This seems like a cut-n-paste from somewhere - it's not fully formed and certainly
what it says to invoke isn't invoked prior to this call!
+ *
+ * @nlh: pointer to Netlink header
+ * @size: size of the extra header that we want to put
+ *
+ * Returns pointer to the start of the extended header */ static
+void * virNetDevPutExtraHeader(struct nlmsghdr *nlh, size_t size)
use multiple lines for multiple args:
e.g.
virNetDevPutExtraHeader(struct nlmsghdr *nlh,
size_t size)
+{
+ char *ptr = (char *)nlh + nlh->nlmsg_len;
+ size_t len = NLMSG_ALIGN(size);
None of those NLMSG_* macros do the trick?
** I couldn't find such macro, if you know of any replacement I'll be happy to
change this **
+ nlh->nlmsg_len += len;
+ memset(ptr, 0, len);
+ return ptr;
+}
+
+
+/**
+ * virNetDevGetFamilyId:
+ * This function supplies the devlink family id
+ *
+ * @family_name: the name of the family to query
+ *
+ * Returns family id or 0 on failure.
+ */
+static int
+virNetDevGetFamilyId(const char *family_name) {
+ struct nl_msg *nl_msg = NULL;
+ struct nlmsghdr *resp = NULL;
+ struct genlmsghdr* gmsgh = NULL;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, };
+ unsigned int recvbuflen;
+ uint32_t family_id = 0;
uint32_t being returned to an int function.
+
+ if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL,
+ NLM_F_REQUEST | NLM_F_ACK))) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+ if (!gmsgh)
+ goto cleanup;
if (!(gmsgh = virNet*()))
goto cleanup;
+
+ gmsgh->cmd = CTRL_CMD_GETFAMILY;
+ gmsgh->version = DEVLINK_GENL_VERSION;
+
+ if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0)
+ goto buffer_too_small;
Only one place to have the goto - just put the error message inline and goto cleanup;
+
+ if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0)
< 0)
+ goto cleanup;
+
+ if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0)
+ goto malformed_resp;
same here
+
+ if (tb[CTRL_ATTR_FAMILY_ID] == NULL)
+ goto cleanup;
Is this the case where this isn't really supported?
** This should retrieve the family id, and is not related to the switchdev
support/unsupported (we need the family id to query the interface).
The function now returns 0 on failure, I understand that the family id cannot be
0, and 0 value is net only when creating a new family id
So 0 is a valid value for failure, but I'll be happy if someone can confirm
it :) **
I wonder if perhaps this function should return -1 on failures, 0 on unsupported, and some
value on supported. After Moshe Levi added the rdma and txudptnl code (commit id
'ac3ed2085') there was a bug that was reported by testing that I fixed via commit
id '136f17efd'. There may have been something else too, but it's been too long
to remember!
I guess it comes down to the reason why this would be NULL when HAVE_DECL_DEVLINK is set
and whether you really want things like "virsh nodedev-list --cap net" to fail
in that case.
+
+ family_id = *(int *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]);
+
+ cleanup:
+ nlmsg_free(nl_msg);
+ VIR_FREE(resp);
+ return family_id;
+
+ malformed_resp:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("malformed netlink response message"));
+ goto cleanup;
+
+ buffer_too_small:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("allocated netlink buffer is too small"));
+ goto cleanup;
+}
+
+
+/**
+ * virNetDevSwitchdevFeature
+ * This function checks for the availability of Switchdev feature
+ * and add it to bitmap
+ *
+ * @ifname: name of the interface
+ * @out: add Switchdev feature if exist to bitmap
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+ virBitmapPtr *out) {
+ struct nl_msg *nl_msg = NULL;
+ struct nlmsghdr *resp = NULL;
+ unsigned int recvbuflen;
+ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, };
+ virPCIDevicePtr pci_device_ptr = NULL;
+ struct genlmsghdr* gmsgh = NULL;
+ const char *pci_name;
+ char *pfname = NULL;
+ int func_ret_val = -1;
+ int ret = -1;
+
+ int family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME);
+ if (family_id == 0)
+ goto cleanup;
uint32_t family_id;
if ((family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME)) == 0)
return -1
of course depends on whether you decide to use -1, 0, & value type returns, but you
get the general gist.
+
+ func_ret_val = virNetDevIsVirtualFunction(ifname);
+ if (func_ret_val == 1) {
+ if (virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
+ goto cleanup;
+ }
+ else if (func_ret_val < 0)
+ goto cleanup;
if ((is_vf = virNetDevIsVirtualFunction(ifname)) < 0)
return -1
if (is_vf == 1 && virNetDevGetPhysicalFunction(ifname, &pfname) < 0)
return -1;
+
+ if (!(nl_msg = nlmsg_alloc_simple(family_id,
+ NLM_F_REQUEST | NLM_F_ACK))) {
+ virReportOOMError();
+ goto cleanup;
return -1;
+ }
+
+ pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) :
+ virNetDevGetPCIDevice(ifname);
+ if (!pci_device_ptr)
+ goto cleanup;
+
+ pci_name = virPCIDeviceGetName(pci_device_ptr);
+
+ gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr));
+ if (!gmsgh)
+ goto cleanup;
could just be "if (!(gsmsg = virNetDev*()))"
+
+ gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET;
+ gmsgh->version = DEVLINK_GENL_VERSION;
+
+ if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1,
"pci") < 0)
+ goto buffer_too_small;
+
+ if (nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0)
+ goto buffer_too_small;
Not my preferred style to have multiple goto's like this, but I see it's generally
used in this manner for the various virnetdev*.c
BTW: I think it looks better as something like:
if ((nla_put() < 0) ||
(nla_put() < 0)) {
virReportError();
goto cleanup;
+
+ if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
+ NETLINK_GENERIC, 0) < 0)
+ goto cleanup;
+
+ if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) <
0)
+ goto malformed_resp;
For this one, there's no need to have the additional goto as there's only one
place that
+
+ if (tb[DEVLINK_ATTR_ESWITCH_MODE] &&
+ *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) ==
DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+ ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV));
+ }
+
+ ret = 0;
+
+ cleanup:
+ nlmsg_free(nl_msg);
+ virPCIDeviceFree(pci_device_ptr);
+ VIR_FREE(resp);
+ return ret;
+
+ malformed_resp:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("malformed netlink response message"));
+ goto cleanup;
+
+ buffer_too_small:
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("allocated netlink buffer is too small"));
+ goto cleanup;
+}
+# else
+static int
+virNetDevSwitchdevFeature(const char *ifname,
+ virBitmapPtr *out)
You need to add ATTRIBUTE_UNUSED after each argument like virNetDevGetEthtoolGFeatures
+{
+ return 0;
+}
+# endif
+
+
Since this is called after virNetDevRDMAFeature it could have gone after the hunk that
supports that - IOW: this next hunk. Not a requirement - it's one of those type A code
flow things ;-)
John
#if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
/**
@@ -3230,6 +3429,9 @@ virNetDevGetFeatures(const char *ifname,
if (virNetDevRDMAFeature(ifname, out) < 0)
goto cleanup;
+ if (virNetDevSwitchdevFeature(ifname, out) < 0)
+ goto cleanup;
+
ret = 0;
cleanup:
VIR_FORCE_CLOSE(fd);
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index
2e9a9c4..8fd6036 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -112,6 +112,7 @@ typedef enum {
VIR_NET_DEV_FEAT_RXHASH,
VIR_NET_DEV_FEAT_RDMA,
VIR_NET_DEV_FEAT_TXUDPTNL,
+ VIR_NET_DEV_FEAT_SWITCHDEV,
VIR_NET_DEV_FEAT_LAST
} virNetDevFeature;
diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
index d4c96e8..88252e6 100644
--- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
+++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
@@ -15,6 +15,7 @@
<feature name='rxhash'/>
<feature name='rdma'/>
<feature name='txudptnl'/>
+ <feature name='switchdev'/>
<capability type='80211'/>
</capability>
</device>
diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
index 71bf90e..f77dfcc 100644
--- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
+++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
@@ -15,6 +15,7 @@
<feature name='rxhash'/>
<feature name='rdma'/>
<feature name='txudptnl'/>
+ <feature name='switchdev'/>
<capability type='80203'/>
</capability>
</device>