On Mon, Nov 15, 2021 at 09:59:23PM +0300, Dmitrii Shcherbakov wrote:
SmartNIC DPUs may not expose some privileged eswitch operations
to the hypervisor hosts. For example, this happens with Bluefield
devices running in the ECPF (default) mode for security reasons. While
VF MAC address programming is possible via an RTM_SETLINK operation,
trying to set a VLAN ID in the same operation will fail with EPERM.
The equivalent ip link commands below provide an illustration:
1. This works:
sudo ip link set enp130s0f0 vf 2 mac de:ad:be:ef:ca:fe
2. Setting (or clearing) a VLAN fails with EPERM:
sudo ip link set enp130s0f0 vf 2 vlan 0
RTNETLINK answers: Operation not permitted
3. This is what Libvirt attempts to do today (when trying to clear a
VF VLAN at the same time as programming a VF MAC).
sudo ip link set enp130s0f0 vf 2 vlan 0 mac de:ad:be:ef:ca:fe
RTNETLINK answers: Operation not permitted
If setting an explicit VLAN ID results in an EPERM, clearing a VLAN
(setting a VLAN ID to 0) can be handled gracefully by ignoring the
EPERM error with the rationale being that if we cannot set this state
in the first place, we cannot clear it either.
Thus, virNetDevSetVfConfig is split into two distinct functions. If
clearing a VLAN ID fails with EPERM, the error is simply ignored.
Both new functions rely virNetDevSendVfSetLinkRequest that implements
common functionality related to formatting a request, sending it and
handling error conditions and returns 0 or an error since in both cases
the payload is either NLMSG_DONE (no error) or NLMSG_ERROR where an
error message is needed by the caller to handle known cases
appropriately. This function allows the conditional code to be unit tested.
An alternative to this could be providing a higher level control plane
mechanism that would provide metadata about a device being remotely
managed in which case Libvirt would avoid trying to set or clear a
VLAN ID. This would be more complicated since other software (like Nova
in the OpenStack case) would have to annotate every guest device with an
attribute indicating whether a device is remotely managed or not based
on operator provided configuration so that Libvirt can act on this and
avoid VLAN programming.
Signed-off-by: Dmitrii Shcherbakov <dmitrii.shcherbakov(a)canonical.com>
---
NEWS.rst | 9 ++
src/libvirt_private.syms | 7 ++
src/util/virnetdev.c | 196 +++++++++++++++++++++-------------
src/util/virnetdevpriv.h | 44 ++++++++
tests/virnetdevtest.c | 222 ++++++++++++++++++++++++++++++++++++++-
5 files changed, 405 insertions(+), 73 deletions(-)
create mode 100644 src/util/virnetdevpriv.h
diff --git a/NEWS.rst b/NEWS.rst
index a71b84c363..d1702d95d5 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -30,6 +30,15 @@ v7.10.0 (unreleased)
Libvirt is now able to report interface information from the guest's
perspective (using guest agent).
+ * virnetdev: Ignore EPERM on attempts to clear VF VLAN ID
+
+ Libvirt will now ignore EPERM errors on attempts to clear a VLAN ID as
+ SmartNIC DPUs do not expose VLAN programming capabilities to the
+ hypervisor host. Libvirt tries to clear a VLAN in the same operation
+ as setting a MAC address for VIR_DOMAIN_NET_TYPE_HOSTDEV devices which
+ is now split into two distinct operations. EPERM errors received while
+ trying to program a non-zero VLAN ID will still cause errors.
+
* **Bug fixes**
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 9ee8fda25f..4b8e3e2b15 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2851,6 +2851,13 @@ virNetDevOpenvswitchSetTimeout;
virNetDevOpenvswitchUpdateVlan;
+#util/virnetdevpriv.h
+virNetDevSendVfSetLinkRequest;
+virNetDevSetVfConfig;
+virNetDevSetVfMac;
+virNetDevSetVfVlan;
+
+
# util/virnetdevtap.h
virNetDevTapAttachBridge;
virNetDevTapCreate;
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 58f7360a0f..5daec532b4 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -19,7 +19,9 @@
#include <config.h>
#include <math.h>
-#include "virnetdev.h"
+#define LIBVIRT_VIRNETDEVPRIV_H_ALLOW
+
+#include "virnetdevpriv.h"
#include "viralloc.h"
#include "virnetlink.h"
#include "virmacaddr.h"
@@ -1527,16 +1529,12 @@ static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1]
= {
[IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
};
-
-static int
-virNetDevSetVfConfig(const char *ifname, int vf,
- const virMacAddr *macaddr, int vlanid,
- bool *allowRetry)
+int
+virNetDevSendVfSetLinkRequest(const char *ifname, int vfInfoType,
+ const void *payload, const size_t payloadLen)
I think it would be desirable for this method to be introduced
in a patch on its own, separate from the patch that then splits
virNetDevSetVfConfig into 2 parts, and separate from one that
adds EPERM handling.
Our general guideline is that refactoring should never be mixed
with functional behaviour changes, as this has often resulted
in surprise regressions that were diguised by the mixing.
{
- int rc = -1;
- char macstr[VIR_MAC_STRING_BUFLEN];
g_autofree struct nlmsghdr *resp = NULL;
- struct nlmsgerr *err;
+ struct nlmsgerr *err = NULL;
unsigned int recvbuflen = 0;
struct nl_msg *nl_msg;
struct nlattr *vfinfolist, *vfinfo;
@@ -1544,9 +1542,11 @@ virNetDevSetVfConfig(const char *ifname, int vf,
.ifi_family = AF_UNSPEC,
.ifi_index = -1,
};
+ int rc = 0;
- if (!macaddr && vlanid < 0)
+ if (payload == NULL || payloadLen == 0) {
return -1;
+ }
nl_msg = virNetlinkMsgNew(RTM_SETLINK, NLM_F_REQUEST);
@@ -1564,30 +1564,8 @@ virNetDevSetVfConfig(const char *ifname, int vf,
if (!(vfinfo = nla_nest_start(nl_msg, IFLA_VF_INFO)))
goto buffer_too_small;
- if (macaddr) {
- struct ifla_vf_mac ifla_vf_mac = {
- .vf = vf,
- .mac = { 0, },
- };
-
- virMacAddrGetRaw(macaddr, ifla_vf_mac.mac);
-
- if (nla_put(nl_msg, IFLA_VF_MAC, sizeof(ifla_vf_mac),
- &ifla_vf_mac) < 0)
- goto buffer_too_small;
- }
-
- if (vlanid >= 0) {
- struct ifla_vf_vlan ifla_vf_vlan = {
- .vf = vf,
- .vlan = vlanid,
- .qos = 0,
- };
-
- if (nla_put(nl_msg, IFLA_VF_VLAN, sizeof(ifla_vf_vlan),
- &ifla_vf_vlan) < 0)
- goto buffer_too_small;
- }
+ if (nla_put(nl_msg, vfInfoType, payloadLen, payload) < 0)
+ goto buffer_too_small;
nla_nest_end(nl_msg, vfinfo);
nla_nest_end(nl_msg, vfinfolist);
@@ -1600,48 +1578,20 @@ virNetDevSetVfConfig(const char *ifname, int vf,
goto malformed_resp;
switch (resp->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(resp);
- if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(resp);
+ if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ goto malformed_resp;
+ rc = err->error;
+ break;
+ case NLMSG_DONE:
+ rc = 0;
+ break;
+ default:
goto malformed_resp;
-
- /* if allowRetry is true and the error was EINVAL, then
- * silently return a failure so the caller can retry with a
- * different MAC address
- */
- if (err->error == -EINVAL && *allowRetry &&
- macaddr && !virMacAddrCmp(macaddr, &zeroMAC)) {
- goto cleanup;
- } else if (err->error) {
- /* other errors are permanent */
- virReportSystemError(-err->error,
- _("Cannot set interface MAC/vlanid to %s/%d
"
- "for ifname %s vf %d"),
- (macaddr
- ? virMacAddrFormat(macaddr, macstr)
- : "(unchanged)"),
- vlanid,
- ifname ? ifname : "(unspecified)",
- vf);
- *allowRetry = false; /* no use retrying */
- goto cleanup;
- }
- break;
-
- case NLMSG_DONE:
- break;
-
- default:
- goto malformed_resp;
}
- rc = 0;
cleanup:
- VIR_DEBUG("RTM_SETLINK %s vf %d MAC=%s vlanid=%d - %s",
- ifname, vf,
- macaddr ? virMacAddrFormat(macaddr, macstr) : "(unchanged)",
- vlanid, rc < 0 ? "Fail" : "Success");
-
nlmsg_free(nl_msg);
return rc;
@@ -1656,6 +1606,108 @@ virNetDevSetVfConfig(const char *ifname, int vf,
goto cleanup;
}
+int
+virNetDevSetVfVlan(const char *ifname, int vf, int vlanid)
+{
+ int rc = -1;
+ int requestError = 0;
+
+ struct ifla_vf_vlan ifla_vf_vlan = {
+ .vf = vf,
+ .vlan = vlanid,
+ .qos = 0,
+ };
+
+ /* VLAN ids 0 and 4095 are reserved per 802.1Q but are valid values. */
+ if (vlanid < 0 || vlanid > 4095)
+ return -1;
+
+ requestError = virNetDevSendVfSetLinkRequest(ifname, IFLA_VF_VLAN,
+ &ifla_vf_vlan,
sizeof(ifla_vf_vlan));
+
+ /* If vlanid is 0 - we are attempting to clear an existing VLAN id.
+ * An EPERM received at this stage is an indicator that the embedded
+ * switch is not exposed to this host and the network driver is not
+ * able to set a VLAN for a VF. */
+ if (requestError == -EPERM && vlanid == 0) {
This metod is taking a plain "int vlanid", but the eventual
caller of this method is taking "virNetDevVlan *vlan".
IOW, the caller can distinguish between two scenarios
- vlan is NULL => set vlanid to 0
- vlan is non-NULL and user specified vlanid of 0
I think it is reasonable to ignore EPERM in the first
scenario for the reasons you describe wrt hardware
driver restrictions.
I'm less sure it is a good idea to ignore EPERM when
it wasn an explicit user configuration request to set
vlanid to 0. It feels like we should be continuing to
report an error if we can't honour an explicit user
request - its a sign the user shouldn't have been
settng the vlan in the first place.
+ rc = 0;
+ } else if (requestError) {
+ virReportSystemError(-requestError,
+ _("Cannot set interface vlanid to %d "
+ "for ifname %s vf %d"),
+ vlanid, ifname ? ifname : "(unspecified)", vf);
+ rc = -1;
+ } else {
+ rc = 0;
+ }
+ VIR_DEBUG("RTM_SETLINK %s vf %d vlanid=%d - %s",
+ ifname, vf,
+ vlanid, rc < 0 ? "Fail" : "Success");
+ return rc;
+}
+
+int
+virNetDevSetVfMac(const char *ifname, int vf,
+ const virMacAddr *macaddr,
+ bool *allowRetry)
+{
+ int rc = 0;
+ int requestError = 0;
+ char macstr[VIR_MAC_STRING_BUFLEN];
+
+ struct ifla_vf_mac ifla_vf_mac = {
+ .vf = vf,
+ .mac = { 0, },
+ };
+
+ if (macaddr == NULL || allowRetry == NULL)
+ return -1;
+
+ virMacAddrGetRaw(macaddr, ifla_vf_mac.mac);
+
+ requestError = virNetDevSendVfSetLinkRequest(ifname, IFLA_VF_MAC,
+ &ifla_vf_mac,
sizeof(ifla_vf_mac));
+ /* if allowRetry is true and the error was EINVAL, then
+ * silently return a failure so the caller can retry with a
+ * different MAC address. */
+ if (requestError == -EINVAL && *allowRetry &&
!virMacAddrCmp(macaddr, &zeroMAC)) {
+ rc = -2;
+ } else if (requestError) {
+ /* other errors are permanent */
+ virReportSystemError(-requestError,
+ _("Cannot set interface MAC to %s "
+ "for ifname %s vf %d"),
+ (macaddr
+ ? virMacAddrFormat(macaddr, macstr)
+ : "(unchanged)"),
+ ifname ? ifname : "(unspecified)",
+ vf);
+ *allowRetry = false; /* no use retrying */
+ rc = -1;
+ } else {
+ rc = 0;
+ }
+ VIR_DEBUG("RTM_SETLINK %s vf %d MAC=%s - %s",
+ ifname, vf,
+ macaddr ? virMacAddrFormat(macaddr, macstr) : "(unchanged)",
+ rc < 0 ? "Fail" : "Success");
+ return rc;
+}
+
+int
+virNetDevSetVfConfig(const char *ifname, int vf,
+ const virMacAddr *macaddr, int vlanid,
+ bool *allowRetry)
+{
+ int rc = 0;
+ if ((rc = virNetDevSetVfMac(ifname, vf, macaddr, allowRetry)) < 0) {
+ return rc;
+ } else if ((rc = virNetDevSetVfVlan(ifname, vf, vlanid)) < 0) {
+ return rc;
+ }
+ return rc;
+}
+
Regards,
Daniel
--
|:
https://berrange.com -o-
https://www.flickr.com/photos/dberrange :|
|:
https://libvirt.org -o-
https://fstop138.berrange.com :|
|:
https://entangle-photo.org -o-
https://www.instagram.com/dberrange :|