On 07/19/2015 06:11 AM, Moshe Levi wrote:
Adding functionality to libvirt that will allow
it query the interface for the availability of RDMA and
tx-udp_tnl-segmentation Offloading NIC capabilities
Here is an example of the feature XML definition:
<device>
<name>net_eth4_90_e2_ba_5e_a5_45</name>
<path>/sys/devices/pci0000:00/0000:00:03.0/0000:08:00.1/net/eth4</path>
<parent>pci_0000_08_00_1</parent>
<capability type='net'>
<interface>eth4</interface>
<address>90:e2:ba:5e:a5:45</address>
<link speed='10000' state='up'/>
<feature name='rx'/>
<feature name='tx'/>
<feature name='sg'/>
<feature name='tso'/>
<feature name='gso'/>
<feature name='gro'/>
<feature name='rxvlan'/>
<feature name='txvlan'/>
<feature name='rxhash'/>
<feature name='rdma'/>
<feature name='txudptnl'/>
<capability type='80203'/>
</capability>
</device>
---
configure.ac | 2 +-
docs/formatnode.html.in | 2 +
src/conf/device_conf.c | 4 +-
src/conf/device_conf.h | 2 +
src/util/virnetdev.c | 141 +++++++++++++++++++--
src/util/virnetdev.h | 1 +
tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml | 2 +
tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml | 2 +
8 files changed, 144 insertions(+), 12 deletions(-)
diff --git a/configure.ac b/configure.ac
index a7f38e8..70b3ef3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -390,7 +390,7 @@ AC_CHECK_TYPE([struct ifreq],
]])
AC_CHECK_DECLS([ETH_FLAG_TXVLAN, ETH_FLAG_NTUPLE, ETH_FLAG_RXHASH, ETH_FLAG_LRO,
- ETHTOOL_GGSO, ETHTOOL_GGRO, ETHTOOL_GFLAGS],
+ ETHTOOL_GGSO, ETHTOOL_GGRO, ETHTOOL_GFLAGS, ETHTOOL_GFEATURES],
[], [], [[#include <linux/ethtool.h>
]])
diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in
index 3ff1bef..ed00af5 100644
--- a/docs/formatnode.html.in
+++ b/docs/formatnode.html.in
@@ -199,6 +199,8 @@
<dt><code>txvlan</code></dt><dd>tx-vlan-offload</dd>
<dt><code>ntuple</code></dt><dd>ntuple-filters</dd>
<dt><code>rxhash</code></dt><dd>receive-hashing</dd>
+
<dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd>
+
<dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd>
</dl>
</dd>
<dt><code>capability</code></dt>
diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
index 98808e2..e7b7957 100644
--- a/src/conf/device_conf.c
+++ b/src/conf/device_conf.c
@@ -51,7 +51,9 @@ VIR_ENUM_IMPL(virNetDevFeature,
"rxvlan",
"txvlan",
"ntuple",
- "rxhash")
+ "rxhash",
+ "rdma",
+ "txudptnl")
int virDevicePCIAddressIsValid(virDevicePCIAddressPtr addr)
{
diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
index 7ea90f6..40a2b3d 100644
--- a/src/conf/device_conf.h
+++ b/src/conf/device_conf.h
@@ -74,6 +74,8 @@ typedef enum {
VIR_NET_DEV_FEAT_TXVLAN,
VIR_NET_DEV_FEAT_NTUPLE,
VIR_NET_DEV_FEAT_RXHASH,
+ VIR_NET_DEV_FEAT_RDMA,
+ VIR_NET_DEV_FEAT_TXUDPTNL,
VIR_NET_DEV_FEAT_LAST
} virNetDevFeature;
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index e4fcd81..0dcb42d 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -87,6 +87,14 @@ VIR_LOG_INIT("util.netdev");
# define VIR_IFF_ALLMULTI 0
#endif
+#define RESOURCE_FILE_LEN 4096
+#define TX_UDP_TNL 25
+#define GFEATURES_SIZE 2
+#define FEATURE_WORD(blocks, index, field) ((blocks)[(index) / 32U].field)
+#define FEATURE_FIELD_FLAG(index) (1U << (index) % 32U)
+#define FEATURE_BIT_IS_SET(blocks, index, field) \
+ (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index))
+
typedef enum {
VIR_MCAST_TYPE_INDEX_TOKEN,
VIR_MCAST_TYPE_NAME_TOKEN,
@@ -1868,7 +1876,6 @@ virNetDevReplaceMacAddress(const char *linkdev,
goto cleanup;
ret = 0;
-
cleanup:
VIR_FREE(path);
return ret;
@@ -2858,9 +2865,9 @@ static int virNetDevGetMulticastTable(const char *ifname,
}
ret = 0;
+
cleanup:
virNetDevMcastListClear(&mcast);
-
return ret;
}
@@ -2943,11 +2950,76 @@ int virNetDevGetRxFilter(const char *ifname,
return ret;
}
+
+/**
+ * virNetDevRDMAFeature
+ * This function checks for the availability of RDMA feature
+ * and add it to bitmap
+ *
+ * @ifname: name of the interface
+ * @out: add RDMA feature if exist to bitmap
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+virNetDevRDMAFeature(const char *ifname,
+ virBitmapPtr *out)
+{
+ char *eth_devpath = NULL;
+ char *ib_devpath = NULL;
+ char *eth_res_buf = NULL;
+ char *ib_res_buf = NULL;
+ DIR *dirp = NULL;
+ struct dirent *dp;
+ int ret = -1;
+
+ if (!(dirp = opendir(SYSFS_INFINIBAND_DIR))) {
+ virReportSystemError(errno,
+ _("Failed to opendir path '%s'"),
+ SYSFS_INFINIBAND_DIR);
+ goto cleanup;
+ }
+
+ if (virAsprintf(ð_devpath, SYSFS_NET_DIR "%s/device/resource",
ifname) < 0)
+ goto cleanup;
+ if (!virFileExists(eth_devpath))
+ goto cleanup;
+ if (virFileReadAll(eth_devpath, RESOURCE_FILE_LEN, ð_res_buf) < 0)
+ goto cleanup;
+
+ while (virDirRead(dirp, &dp, SYSFS_INFINIBAND_DIR) > 0) {
+ if (dp->d_name[0] == '.')
+ continue;
+ if (virAsprintf(&ib_devpath, SYSFS_INFINIBAND_DIR
"%s/device/resource", dp->d_name) < 0) {
+ VIR_FREE(ib_devpath);
If virAsprintf fails, then no need to VIR_FREE here
+ continue;
+ }
+ if (virFileReadAll(ib_devpath, RESOURCE_FILE_LEN, &ib_res_buf) < 0) {
+ VIR_FREE(ib_res_buf);
if virFileReadAll fails, ib_devpath should be VIR_FREE()'d
+ continue;
+ }
+ if (STREQ(eth_res_buf, ib_res_buf)) {
+ ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_RDMA));
+ break;
+ }
+ VIR_FREE(ib_res_buf);
+ }
How about if I change the while loop to:
while (virDirRead(dirp, &dp, SYSFS_INFINIBAND_DIR) > 0) {
if (dp->d_name[0] == '.')
continue;
if (virAsprintf(&ib_devpath, SYSFS_INFINIBAND_DIR
"%s/device/resource",
dp->d_name) < 0)
continue;
if (virFileReadAll(ib_devpath, RESOURCE_FILE_LEN, &ib_res_buf) > 0
&&
STREQ(eth_res_buf, ib_res_buf)) {
ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_RDMA));
break;
}
VIR_FREE(ib_devpath);
VIR_FREE(ib_res_buf);
}
NOTE: I considered >= 0 to do the STREQ, but that'd be pointless I think
+ ret = 0;
+
+ cleanup:
+ closedir(dirp);
+ VIR_FREE(eth_devpath);
+ VIR_FREE(ib_devpath);
+ VIR_FREE(eth_res_buf);
+ VIR_FREE(ib_res_buf);
+ return ret;
+}
+
#if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ)
/**
- * virNetDevFeatureAvailable
- * This function checks for the availability of a network device feature
+ * virNetDevSendEthtoolIoctl
+ * This function sends ethtool ioctl request
*
* @ifname: name of the interface
* @cmd: reference to an ethtool command structure
@@ -2955,7 +3027,7 @@ int virNetDevGetRxFilter(const char *ifname,
* Returns 0 on success, -1 on failure.
*/
static int
-virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
+virNetDevSendEthtoolIoctl(const char *ifname, void *cmd)
{
int ret = -1;
int sock = -1;
@@ -2969,9 +3041,9 @@ virNetDevFeatureAvailable(const char *ifname, struct ethtool_value
*cmd)
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, ifname);
- ifr.ifr_data = (void*) cmd;
-
- if (ioctl(sock, SIOCETHTOOL, &ifr) != 0) {
+ ifr.ifr_data = cmd;
+ ret = ioctl(sock, SIOCETHTOOL, &ifr);
+ if (ret != 0) {
switch (errno) {
case EPERM:
VIR_DEBUG("ethtool ioctl: permission denied");
@@ -2988,11 +3060,51 @@ virNetDevFeatureAvailable(const char *ifname, struct
ethtool_value *cmd)
}
}
- ret = cmd->data > 0 ? 1: 0;
cleanup:
if (sock)
VIR_FORCE_CLOSE(sock);
+ return ret;
+}
+
+/**
+* virNetDevFeatureAvailable
+* This function checks for the availability of a network device feature
+*
+* @ifname: name of the interface
+* @cmd: reference to an ethtool command structure
+*
+* Returns 0 if not found, 1 on success, and -1 on failure.
+*/
+static int
+virNetDevFeatureAvailable(const char *ifname, struct ethtool_value *cmd)
+{
+ int ret = -1;
+
+ cmd = (void*)cmd;
+ if (!virNetDevSendEthtoolIoctl(ifname, cmd))
+ ret = cmd->data > 0 ? 1: 0;
+ return ret;
+}
+
+
+/**
+ * virNetDevGFeatureAvailable
+ * This function checks for the availability of a network device gfeature
+ *
+ * @ifname: name of the interface
+ * @cmd: reference to a gfeatures ethtool command structure
+ *
+ * Returns 0 if not found, 1 on success, and -1 on failure.
+ */
+static int
+virNetDevGFeatureAvailable(const char *ifname, struct ethtool_gfeatures *cmd)
+{
+ int ret = -1;
+
+ cmd = (void*)cmd;
+ if (!virNetDevSendEthtoolIoctl(ifname, cmd))
+ ret = FEATURE_BIT_IS_SET(cmd->features, TX_UDP_TNL, active);
return ret;
}
@@ -3013,7 +3125,7 @@ virNetDevGetFeatures(const char *ifname,
{
size_t i = -1;
struct ethtool_value cmd = { 0 };
-
+ struct ethtool_gfeatures g_cmd = { 0 };
struct elem{
const int cmd;
const virNetDevFeature feat;
@@ -3069,6 +3181,15 @@ virNetDevGetFeatures(const char *ifname,
}
# endif
+# if HAVE_DECL_ETHTOOL_GFEATURES
make syntax-check would have told you this was off by an extra space.
If you're good with those adjustments, I'll push the patch.
Tks -
John
+ g_cmd.cmd = ETHTOOL_GFEATURES;
+ g_cmd.size = GFEATURES_SIZE;
+ if (virNetDevGFeatureAvailable(ifname, &g_cmd))
+ ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_TXUDPTNL));
+# endif
+
+ if (virNetDevRDMAFeature(ifname, out))
+ return -1;
return 0;
}
#else
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 190b70e..fff881c 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -210,6 +210,7 @@ int virNetDevGetRcvAllMulti(const char *ifname, bool *receive)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
# define SYSFS_NET_DIR "/sys/class/net/"
+# define SYSFS_INFINIBAND_DIR "/sys/class/infiniband/"
int virNetDevSysfsFile(char **pf_sysfs_device_link,
const char *ifname,
const char *file)
diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
index 2a34fed..d4c96e8 100644
--- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
+++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml
@@ -13,6 +13,8 @@
<feature name='rxvlan'/>
<feature name='txvlan'/>
<feature name='rxhash'/>
+ <feature name='rdma'/>
+ <feature name='txudptnl'/>
<capability type='80211'/>
</capability>
</device>
diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
index 81d398c..71bf90e 100644
--- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
+++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml
@@ -13,6 +13,8 @@
<feature name='rxvlan'/>
<feature name='txvlan'/>
<feature name='rxhash'/>
+ <feature name='rdma'/>
+ <feature name='txudptnl'/>
<capability type='80203'/>
</capability>
</device>