[PATCH v5 1/2] util: support PCI passthrough net device stats collection

Collect PCI passthrough net device stats from kernel by netlink API. Currently, libvirt can not get PCI passthrough net device stats, run command: #virsh domifstat instance --interface=52:54:00:2d:b2:35 error: Failed to get interface stats instance 52:54:00:2d:b2:35 error: internal error: Interface name not provided The PCI device(usually SR-IOV virtual function device) is detached while it's used in PCI passthrough mode. And we can not parse this device from /proc/net/dev any more. In this patch, libvirt checks net device is VF of not firstly, then query virNetDevVFInterfaceStats(new API). virNetDevVFInterfaceStats gets PF name, then dumps VF stats by netlink, compares with VF index and get stats(suggest by Laine, original version is implemented by zhenwei which parses VFs info of all PFs, compares MAC address until the two MAC addresses match). address until the two MAC addresses match. '#ip -s link show' can get the same result. Instead of parsing the output result, implement this feature by libnl API. Notice that this feature depends on driver of PF. Test on Mellanox ConnectX-4 Lx, it works well. Also test on Intel Corporation 82599ES, it works, but only get 0. (ip-link command get the same result). IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine, just using defined(__linux__) && WITH_LIBNL is enough. Thanks to Laine, Daniel<berrange@redhat.com> & DHB for suggestions. Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 13 ++++++++ src/util/virnetdev.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virnetdev.h | 4 +++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 52e9c6313f..ae543589f1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2584,6 +2584,7 @@ virNetDevSetRootQDisc; virNetDevSetupControl; virNetDevSysfsFile; virNetDevValidateConfig; +virNetDevVFInterfaceStats; # util/virnetdevbandwidth.h diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 825bdd9119..519af71fc1 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10132,6 +10132,19 @@ qemuDomainInterfaceStats(virDomainPtr dom, if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) { if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0) goto cleanup; + } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) { + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); + virPCIDeviceAddressPtr vfAddr; + + if (!hostdev) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hostdev interface missing hostdev data")); + goto cleanup; + } + + vfAddr = &hostdev->source.subsys.u.pci.addr; + if (virNetDevVFInterfaceStats(vfAddr, stats) < 0) + goto cleanup; } else { if (virNetDevTapInterfaceStats(net->ifname, stats, !virDomainNetTypeSharesHostView(net)) < 0) diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 5c7660dab4..f53e1751b3 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -1514,6 +1514,17 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .maxlen = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .type = NLA_UNSPEC, .maxlen = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_STATS] = { .type = NLA_NESTED }, +}; + + +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = { + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 }, + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 }, }; @@ -1651,13 +1662,14 @@ virNetDevSetVfConfig(const char *ifname, int vf, static int virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, - int *vlanid) + int *vlanid, virDomainInterfaceStatsPtr stats) { int rc = -1; struct ifla_vf_mac *vf_mac; struct ifla_vf_vlan *vf_vlan; struct nlattr *tb_vf_info = {NULL, }; struct nlattr *tb_vf[IFLA_VF_MAX+1]; + struct nlattr *tb_vf_stats[IFLA_VF_STATS_MAX+1]; int rem; if (!tb[IFLA_VFINFO_LIST]) { @@ -1693,6 +1705,26 @@ virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, } } + if (stats && tb_vf[IFLA_VF_STATS] && tb_vf[IFLA_VF_MAC]) { + vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]); + if (vf_mac && vf_mac->vf == vf) { + rc = nla_parse_nested(tb_vf_stats, IFLA_VF_STATS_MAX, + tb_vf[IFLA_VF_STATS], + ifla_vfstats_policy); + if (rc < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("error parsing IFLA_VF_STATS")); + return rc; + } + + stats->rx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_BYTES]); + stats->tx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_BYTES]); + stats->rx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_PACKETS]); + stats->tx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_PACKETS]); + rc = 0; + } + } + if (rc == 0) break; } @@ -1714,7 +1746,43 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac, if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0) return -1; - return virNetDevParseVfConfig(tb, vf, mac, vlanid); + return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL); +} + + +/** + * virNetDevVFInterfaceStats: + * @vfAddr: PCI address of a VF + * @stats: returns stats of the VF interface + * + * Get the VF interface from kernel by netlink. + * Returns 0 on success, -1 on failure. + */ +int +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, + virDomainInterfaceStatsPtr stats) +{ + g_autofree void *nlData = NULL; + struct nlattr *tb[IFLA_MAX + 1] = {NULL, }; + g_autofree char *vfSysfsPath = NULL; + g_autofree char *pfname = NULL; + int vf = -1; + + if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0) + return -1; + + if (!virPCIIsVirtualFunction(vfSysfsPath)) { + virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), vfSysfsPath); + return -1; + } + + if (virPCIGetVirtualFunctionInfo(vfSysfsPath, -1, &pfname, &vf) < 0) + return -1; + + if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0) + return -1; + + return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats); } @@ -2330,6 +2398,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED, } +int +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr G_GNUC_UNUSED, + virDomainInterfaceStatsPtr stats G_GNUC_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get VF net device stats on this platform")); + return -1; +} + + #endif /* defined(WITH_LIBNL) */ VIR_ENUM_IMPL(virNetDevIfState, diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index dfef49938f..53e606c61c 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -316,4 +316,8 @@ int virNetDevSetRootQDisc(const char *ifname, const char *qdisc) G_GNUC_NO_INLINE; +int virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, + virDomainInterfaceStatsPtr stats) +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree); -- 2.11.0

Suggested by Laine: virNetDevParseVfConfig has became a multifunctional helper function, rename it to virNetDevParseVfInfo. Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- src/util/virnetdev.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index f53e1751b3..088f35621d 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -1660,9 +1660,15 @@ virNetDevSetVfConfig(const char *ifname, int vf, goto cleanup; } +/** + * virNetDevParseVfInfo: + * Get the VF interface infomation from kernel by netlink, To make netlink + * parsing logic easy to maintain, extending this function to get some new + * data is better than add a new function. + */ static int -virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, - int *vlanid, virDomainInterfaceStatsPtr stats) +virNetDevParseVfInfo(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, + int *vlanid, virDomainInterfaceStatsPtr stats) { int rc = -1; struct ifla_vf_mac *vf_mac; @@ -1746,7 +1752,7 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac, if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0) return -1; - return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL); + return virNetDevParseVfInfo(tb, vf, mac, vlanid, NULL); } @@ -1782,7 +1788,7 @@ virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0) return -1; - return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats); + return virNetDevParseVfInfo(tb, vf, NULL, NULL, stats); } -- 2.11.0

On 10/15/20 7:21 AM, zhenwei pi wrote:
Suggested by Laine: virNetDevParseVfConfig has became a multifunctional helper function, rename it to virNetDevParseVfInfo.
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Reviewed-by: Laine Stump <laine@redhat.com> and pushed. Now I need to figure out what the prize is for answering the bonus question :-)
--- src/util/virnetdev.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index f53e1751b3..088f35621d 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -1660,9 +1660,15 @@ virNetDevSetVfConfig(const char *ifname, int vf, goto cleanup; }
+/** + * virNetDevParseVfInfo: + * Get the VF interface infomation from kernel by netlink, To make netlink + * parsing logic easy to maintain, extending this function to get some new + * data is better than add a new function. + */ static int -virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, - int *vlanid, virDomainInterfaceStatsPtr stats) +virNetDevParseVfInfo(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, + int *vlanid, virDomainInterfaceStatsPtr stats) { int rc = -1; struct ifla_vf_mac *vf_mac; @@ -1746,7 +1752,7 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac, if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0) return -1;
- return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL); + return virNetDevParseVfInfo(tb, vf, mac, vlanid, NULL); }
@@ -1782,7 +1788,7 @@ virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0) return -1;
- return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats); + return virNetDevParseVfInfo(tb, vf, NULL, NULL, stats); }

On 10/15/20 7:21 AM, zhenwei pi wrote:
Collect PCI passthrough net device stats from kernel by netlink API.
Currently, libvirt can not get PCI passthrough net device stats, run command: #virsh domifstat instance --interface=52:54:00:2d:b2:35 error: Failed to get interface stats instance 52:54:00:2d:b2:35 error: internal error: Interface name not provided
The PCI device(usually SR-IOV virtual function device) is detached
(Actually it is *always* an SR-IOV VF - SR-IOV functionality is required to set the MAC address and clan tag of the interface; without that capability, it can only be assigned to the guest as a plain <hostdev>, and in that case libvirt wouldn't even know that it was a network device.)
while it's used in PCI passthrough mode. And we can not parse this device from /proc/net/dev any more.
In this patch, libvirt checks net device is VF of not firstly, then query virNetDevVFInterfaceStats(new API). virNetDevVFInterfaceStats gets PF name, then dumps VF stats by netlink, compares with VF index and get stats(suggest by Laine, original version is implemented by zhenwei which parses VFs info of all PFs, compares MAC address until the two MAC addresses match).
address until the two MAC addresses match. (you forgot to remove this bit - I'll clean it up before pushing)
'#ip -s link show' can get the same result. Instead of parsing the output result, implement this feature by libnl API.
Notice that this feature depends on driver of PF. Test on Mellanox ConnectX-4 Lx, it works well. Also test on Intel Corporation 82599ES, it works, but only get 0. (ip-link command get the same result).
IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine, just using defined(__linux__) && WITH_LIBNL is enough.
(I'll also remove these historical artifacts of the review/revision process)
Thanks to Laine, Daniel<berrange@redhat.com> & DHB for suggestions.
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 13 ++++++++ src/util/virnetdev.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virnetdev.h | 4 +++ 4 files changed, 98 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 52e9c6313f..ae543589f1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2584,6 +2584,7 @@ virNetDevSetRootQDisc; virNetDevSetupControl; virNetDevSysfsFile; virNetDevValidateConfig; +virNetDevVFInterfaceStats;
# util/virnetdevbandwidth.h diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 825bdd9119..519af71fc1 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10132,6 +10132,19 @@ qemuDomainInterfaceStats(virDomainPtr dom, if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) { if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0) goto cleanup; + } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) { + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net); + virPCIDeviceAddressPtr vfAddr; + + if (!hostdev) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hostdev interface missing hostdev data")); + goto cleanup; + } + + vfAddr = &hostdev->source.subsys.u.pci.addr; + if (virNetDevVFInterfaceStats(vfAddr, stats) < 0) + goto cleanup; } else {
And extra line below goto cleanup would improve readability
if (virNetDevTapInterfaceStats(net->ifname, stats, !virDomainNetTypeSharesHostView(net)) < 0) diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 5c7660dab4..f53e1751b3 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -1514,6 +1514,17 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .maxlen = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .type = NLA_UNSPEC, .maxlen = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_STATS] = { .type = NLA_NESTED }, +}; + + +static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = { + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 }, + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 }, };
@@ -1651,13 +1662,14 @@ virNetDevSetVfConfig(const char *ifname, int vf,
static int virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, - int *vlanid) + int *vlanid, virDomainInterfaceStatsPtr stats) { int rc = -1; struct ifla_vf_mac *vf_mac; struct ifla_vf_vlan *vf_vlan; struct nlattr *tb_vf_info = {NULL, }; struct nlattr *tb_vf[IFLA_VF_MAX+1]; + struct nlattr *tb_vf_stats[IFLA_VF_STATS_MAX+1]; int rem;
if (!tb[IFLA_VFINFO_LIST]) { @@ -1693,6 +1705,26 @@ virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac, } }
+ if (stats && tb_vf[IFLA_VF_STATS] && tb_vf[IFLA_VF_MAC]) { + vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);
Hmm. Looking at this (correct) code made me realize that there has been a bug higher up in this function ever since it was first implemented in 2012 (commit 5095bf06f)! - it has been checking if (mac && tb[IFLA_VF_MAC]) instead of if (mac && tb_vf[IFLA_VF_MAC]) I'll send a patch for that as soon as I'm finished reviewing and pushing your patches.
+ if (vf_mac && vf_mac->vf == vf) { + rc = nla_parse_nested(tb_vf_stats, IFLA_VF_STATS_MAX, + tb_vf[IFLA_VF_STATS], + ifla_vfstats_policy); + if (rc < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("error parsing IFLA_VF_STATS")); + return rc; + } + + stats->rx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_BYTES]); + stats->tx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_BYTES]); + stats->rx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_PACKETS]); + stats->tx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_PACKETS]); + rc = 0; + } + } + if (rc == 0) break; } @@ -1714,7 +1746,43 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac, if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0) return -1;
- return virNetDevParseVfConfig(tb, vf, mac, vlanid); + return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL); +} + + +/** + * virNetDevVFInterfaceStats: + * @vfAddr: PCI address of a VF + * @stats: returns stats of the VF interface + * + * Get the VF interface from kernel by netlink. + * Returns 0 on success, -1 on failure. + */ +int +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, + virDomainInterfaceStatsPtr stats) +{ + g_autofree void *nlData = NULL; + struct nlattr *tb[IFLA_MAX + 1] = {NULL, }; + g_autofree char *vfSysfsPath = NULL; + g_autofree char *pfname = NULL; + int vf = -1; + + if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0) + return -1; + + if (!virPCIIsVirtualFunction(vfSysfsPath)) { + virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), vfSysfsPath); + return -1; + } + + if (virPCIGetVirtualFunctionInfo(vfSysfsPath, -1, &pfname, &vf) < 0) + return -1; + + if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0) + return -1; + + return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats); }
@@ -2330,6 +2398,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED, }
+int +virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr G_GNUC_UNUSED, + virDomainInterfaceStatsPtr stats G_GNUC_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to get VF net device stats on this platform")); + return -1; +} + + #endif /* defined(WITH_LIBNL) */
VIR_ENUM_IMPL(virNetDevIfState, diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index dfef49938f..53e606c61c 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -316,4 +316,8 @@ int virNetDevSetRootQDisc(const char *ifname, const char *qdisc) G_GNUC_NO_INLINE;
+int virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr, + virDomainInterfaceStatsPtr stats) +ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
I'm unable to test on a system that has interface stats, but am assuming that you have :-). This looks fine to me, so: Reviewed-by: Laine Stump <laine@redhat.com> and pushed. Thanks for your contribution!
participants (2)
-
Laine Stump
-
zhenwei pi