Re: [libvirt] [v5 PATCH] add 802.1Qbh handling for port-profiles based on Stefan's previous patches

25 May 2010

Scott Feldman wrote:
...
From: Scott Feldman <scofeldm@cisco.com>
...

diff --git a/configure.ac b/configure.ac
...
-if test "$with_macvtap" = "yes"; then
+if test "$with_macvtap" = "yes" || "$with_virtualport" = "yes"; then
Hi Scott,

The above introduces a syntax error.
Fix it by inserting a "test" after the "||":

  if test "$with_macvtap" = "yes" || test "$with_virtualport" = "yes"; then

...
...
diff --git a/src/util/macvtap.c b/src/util/macvtap.c
...
@@ -159,6 +170,156 @@ err_exit:
 }
+# ifdef IFLA_VF_PORT_MAX
+
+/**
+ * nlCommWaitSuccess:
+ *
+ * @nlmsg: pointer to netlink message
+ * @nl_grousp: the netlink multicast groups to send to
+ * @respbuf: pointer to pointer where response buffer will be allocated
+ * @respbuflen: pointer to integer holding the size of the response buffer
+ *      on return of the function.
+ * @to_usecs: timeout in microseconds to wait for a success message
+ *            to be returned
+ *
+ * Send the given message to the netlink multicast group and receive
+ * responses. Skip responses indicating an error and keep on receiving
+ * responses until a success response is returned.
+ * Returns 0 on success, -1 on error. In case of error, no response
+ * buffer will be returned.
+ */
+static int
+nlCommWaitSuccess(struct nlmsghdr *nlmsg, int nl_groups,
+                  char **respbuf, int *respbuflen, long to_usecs)
The parameters, respbuflen and nl_groups make sense only when
non-negative, so my reflex is to make their types reflect that.
Any reason not to use an unsigned type in those cases?

This "to_usecs" parameter is in the same boat, in that it is
logically non-negative.  I suggest using unsigned long long
for it, since technically (with the two longs in struct timeval)
a portable timeout can be as large as 2^31 * 1000^2 microseconds.
Also, it'd be nice to rename it to "timeout_usecs", since "to_usecs"
made me think of a flag that says whether to convert "to microseconds".
...
+{
+    int rc = 0;
+    struct sockaddr_nl nladdr = {
+            .nl_family = AF_NETLINK,
+            .nl_pid    = getpid(),
+            .nl_groups = nl_groups,
+    };
+    int rcvChunkSize = 1024; // expecting less than that
+    int rcvoffset = 0;
This should be of type size_t.
It is used as an accumulator, and we do add nbytes (of type ssize_t) to it,
and (esp!) use it as an array index, so its type must be unsigned.
Otherwise, overflow could be exploitable.
...
+    ssize_t nbytes;
+    int n;
+    struct timeval tv = {
+        .tv_sec  = to_usecs / MICROSEC_PER_SEC,
+        .tv_usec = to_usecs % MICROSEC_PER_SEC,
+    };
+    fd_set rfds;
At least "n" and "rfds" are used only in the while loop,
so their declarations belong "down" there, too.
...
+    bool gotvalid = false;
Personal preference:
I find that a name like "got_valid" is easier to read than a
variablenamewithnoseparatorbetweenwords.
Same for rcvoffset vs. rcv_offset, etc.
...
+    int fd = nlOpen();
+    static uint32_t seq = 0x1234;
+    uint32_t myseq = seq++;
+    uint32_t mypid = getpid();
+
+    if (fd < 0)
+        return -1;
+
+    nlmsg->nlmsg_pid = mypid;
+    nlmsg->nlmsg_seq = myseq;
+    nlmsg->nlmsg_flags |= NLM_F_ACK;
+
+    nbytes = sendto(fd, (void *)nlmsg, nlmsg->nlmsg_len, 0,
+                    (struct sockaddr *)&nladdr, sizeof(nladdr));
+    if (nbytes < 0) {
+        virReportSystemError(errno,
+                             "%s", _("cannot send to netlink socket"));
+        rc = -1;
+        goto err_exit;
+    }
+
+    while (!gotvalid) {
+        rcvoffset = 0;
+        while (1) {
+            socklen_t addrlen = sizeof(nladdr);
+
+            if (VIR_REALLOC_N(*respbuf, rcvoffset+rcvChunkSize) < 0) {
+                virReportOOMError();
+                rc = -1;
+                goto err_exit;
+            }
+
+            FD_ZERO(&rfds);
+            FD_SET(fd, &rfds);
+
+            n = select(fd + 1, &rfds, NULL, NULL, &tv);
+            if (n == 0) {
That handles the case of an expired timeout.
However, if select fails, it returns -1.
You should diagnose that failure rather than going
ahead and reading from "fd".
...
+                rc = -1;
+                goto err_exit;
+            }
+
+            nbytes = recvfrom(fd, &((*respbuf)[rcvoffset]), rcvChunkSize, 0,
+                              (struct sockaddr *)&nladdr, &addrlen);
+            if (nbytes < 0) {
+                if (errno == EAGAIN || errno == EINTR)
+                    continue;
+                virReportSystemError(errno, "%s",
+                                     _("error receiving from netlink socket"));
+                rc = -1;
+                goto err_exit;
+            }
+            rcvoffset += nbytes;
+            break;
+        }
+        *respbuflen = rcvoffset;
+
+        /* check message for error */
+        if (*respbuflen > NLMSG_LENGTH(0) && *respbuf != NULL) {
+            struct nlmsghdr *resp = (struct nlmsghdr *)*respbuf;
+            struct nlmsgerr *err;
+
+            if (resp->nlmsg_pid != mypid ||
+                resp->nlmsg_seq != myseq)
+                continue;
+
+            /* skip reflected message */
+            if (resp->nlmsg_type & 0x10)
+                continue;
+
+            switch (resp->nlmsg_type) {
+               case NLMSG_ERROR:
+                  err = (struct nlmsgerr *)NLMSG_DATA(resp);
+                  if (resp->nlmsg_len >= NLMSG_LENGTH(sizeof(*err))) {
+                      if (-err->error != EOPNOTSUPP) {
I'm used to seeing this:
                         if (err->error != -EOPNOTSUPP) {
why do you do it the other way?
Such differences are generally discouraged.
...
+                          /* assuming error msg from daemon */
+                          gotvalid = true;
+                          break;
+                      }
+                  }
+                  /* whatever this is, skip it */
+                  VIR_FREE(*respbuf);
+                  *respbuf = NULL;
The above is a dead store, since VIR_FREE has just done that.
...
+                  *respbuflen = 0;
+                  break;
+
+               case NLMSG_DONE:
+                  gotvalid = true;
+                  break;
+
+               default:
+                  VIR_FREE(*respbuf);
+                  *respbuf = NULL;
Likewise.  Remove the dead store.
...
+                  *respbuflen = 0;
+                  break;
+            }
+        }
+    }
+
+err_exit:
+    if (rc == -1) {
+        VIR_FREE(*respbuf);
+        *respbuf = NULL;
Likewise.  Remove the dead store.
...
+        *respbuflen = 0;
+    }
+
+    nlClose(fd);
+    return rc;
+}
+
+# endif
...
...
+static int
+link_dump(int ifindex, struct nlattr **tb, char **recvbuf)
+{
+    int rc = 0;
+    char nlmsgbuf[256] = { 0, };
+    struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+    struct nlmsgerr *err;
+    struct ifinfomsg i = {
+        .ifi_family = AF_UNSPEC,
+        .ifi_index  = ifindex
+    };
When I first read the "sizeof(i)" below I did a double take.
I'd prefer a name that does not make me think of an integer index.
...
+    int recvbuflen;
+
+    *recvbuf = NULL;
+
+    nlInit(nlm, NLM_F_REQUEST, RTM_GETLINK);
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i)))
+        goto buffer_too_small;
+
+    if (nlComm(nlm, recvbuf, &recvbuflen) < 0)
+        return -1;
+
+    if (recvbuflen < NLMSG_LENGTH(0) || *recvbuf == NULL)
+        goto malformed_resp;
+
+    resp = (struct nlmsghdr *)*recvbuf;
+
+    switch (resp->nlmsg_type) {
+    case NLMSG_ERROR:
+        err = (struct nlmsgerr *)NLMSG_DATA(resp);
+        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+            goto malformed_resp;
+
+        switch (-err->error) {
Please remove the unary "-", since it's not used here.
...
+        case 0:
+        break;
That "break;" should be indented.
...
+
+        default:
+            virReportSystemError(-err->error,
+                                 _("error dumping %d interface"),
+                                 ifindex);
+            rc = -1;
+        }
+    break;
Same here.  Indent.
...
+
+    case GENL_ID_CTRL:
+    case NLMSG_DONE:
+        if (nlmsg_parse(resp, sizeof(struct ifinfomsg),
+                        tb, IFLA_MAX, ifla_policy)) {
+            goto malformed_resp;
+        }
+    break;
And here.  Indent the "break".
...
+
+    default:
+        goto malformed_resp;
+    }
+
+    if (rc != 0)
+        VIR_FREE(*recvbuf);
+
+    return rc;
+
+malformed_resp:
+    macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("malformed netlink response message"));
+    VIR_FREE(*recvbuf);
+    return -1;
+
+buffer_too_small:
+    macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("internal buffer is too small"));
+    return -1;
+}
...
+static int
+doPortProfileOpSetLink(bool multicast,
+                       int ifindex,
+                       const char *profileId,
+                       struct ifla_port_vsi *portVsi,
+                       const unsigned char *instanceId,
+                       const unsigned char *hostUUID,
+                       int32_t vf,
+                       uint8_t op)
+{
+    int rc = 0;
+    char nlmsgbuf[256];
+    struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+    struct nlmsgerr *err;
+    char rtattbuf[64];
Can you use a symbolic constant instead of "64" ?
...
+    struct rtattr *rta, *vfports, *vfport;
+    struct ifinfomsg ifinfo = {
+        .ifi_family = AF_UNSPEC,
+        .ifi_index  = ifindex,
+    };
+    char *recvbuf = NULL;
+    int recvbuflen = 0;
+
+    memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
+
+    nlInit(nlm, NLM_F_REQUEST, RTM_SETLINK);
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), &ifinfo, sizeof(ifinfo)))
+        goto buffer_too_small;
+
+    if (vf == PORT_SELF_VF) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_SELF, NULL, 0);
+    } else {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_VF_PORTS, NULL, 0);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!(vfports = nlAppend(nlm, sizeof(nlmsgbuf),
+                                 rtattbuf, rta->rta_len)))
+            goto buffer_too_small;
+
+        /* beging nesting vfports */
Typo.
s/beging/begin/
...
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_VF_PORT, NULL, 0);
+    }
+
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!(vfport = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+        goto buffer_too_small;
+
+    if (profileId) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_PROFILE,
+                           profileId, strlen(profileId) + 1);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    if (portVsi) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_VSI_TYPE,
+                           portVsi, sizeof(*portVsi));
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    if (instanceId) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_INSTANCE_UUID,
+                           instanceId, VIR_UUID_BUFLEN);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    if (hostUUID) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_HOST_UUID,
+                           hostUUID, VIR_UUID_BUFLEN);
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    if (vf != PORT_SELF_VF) {
+        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_VF,
+                           &vf, sizeof(vf));
+        if (!rta)
+            goto buffer_too_small;
+
+        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+            goto buffer_too_small;
+    }
+
+    rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_REQUEST,
+                       &op, sizeof(op));
+    if (!rta)
+        goto buffer_too_small;
+
+    if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+        goto buffer_too_small;
There is too much duplication in the above 6 clauses.
Can you factor out a tiny helper function to make this more
readable/maintainable?

At the very least, combine the two
"if...goto buffer_too_small" stmts into one:

           if (!rta || !nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
               goto buffer_too_small;
...
+
+    /* end nesting of vport */
+    vfport->rta_len  = (char *)nlm + nlm->nlmsg_len - (char *)vfport;
+
+    if (vf != PORT_SELF_VF) {
+        /* end nesting of vfports */
+        vfports->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)vfports;
+    }
+
+    if (!multicast) {
+        if (nlComm(nlm, &recvbuf, &recvbuflen) < 0)
+            return -1;
+    } else {
+        if (nlCommWaitSuccess(nlm, RTMGRP_LINK, &recvbuf, &recvbuflen,
+                              5 * MICROSEC_PER_SEC) < 0)
+            return -1;
+    }
+
+    if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL)
+        goto malformed_resp;
+
+    resp = (struct nlmsghdr *)recvbuf;
+
+    switch (resp->nlmsg_type) {
+    case NLMSG_ERROR:
+        err = (struct nlmsgerr *)NLMSG_DATA(resp);
+        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+            goto malformed_resp;
+        switch (-err->error) {
+        case 0:
+        break;
Indent the break:
           case 0:
               break;

Actually, since there's only one action here,
just drop the switch in favor of a simple "if" stmt.
(same with the other switch (-err->error) above)
That makes the resulting code shorter by 4 lines per switch stmt.

           if (err->error) {
               virReportSystemError(-err->error,
                   _("error during virtual port configuration of ifindex %d"),
                   ifindex);
               rc = -1;
           }
...
+        default:
+            virReportSystemError(-err->error,
+                _("error during virtual port configuration of ifindex %d"),
+                ifindex);
+            rc = -1;
+        }
+    break;
Indent.
...
+
+    case NLMSG_DONE:
+    break;
Indent.
...
+
+    default:
+        goto malformed_resp;
+    }
+
+    VIR_FREE(recvbuf);
+
+    return rc;
+
+malformed_resp:
+    macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("malformed netlink response message"));
+    VIR_FREE(recvbuf);
+    return -1;
+
+buffer_too_small:
+    macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("internal buffer is too small"));
+    return -1;
+}
+
+
+static int
+doPortProfileOpCommon(bool multicast,
+                      int ifindex,
+                      const char *profileId,
+                      struct ifla_port_vsi *portVsi,
+                      const unsigned char *instanceId,
+                      const unsigned char *hostUUID,
+                      int32_t vf,
+                      uint8_t op)
+{
+    int rc;
+    char *recvbuf = NULL;
+    struct nlattr *tb[IFLA_MAX + 1];
+    int repeats = 80;
This can (hence should) be unsigned.
This number is obviously related to the 125000-microsecond
sleep below.  Consider adding a comment saying up to how long
we should wait for a result.
...
+    uint16_t status = 0;
+
+    rc = doPortProfileOpSetLink(multicast,
+                                ifindex,
+                                profileId,
+                                portVsi,
+                                instanceId,
+                                hostUUID,
+                                vf,
+                                op);
+
+    if (rc != 0) {
+        macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                     _("sending of PortProfileRequest failed."));
+        return rc;
+    }
+
+    if (!multicast) {
+        while (--repeats) {
+            rc = link_dump(ifindex, tb, &recvbuf);
+            if (rc)
+                goto err_exit;
+            rc = getPortProfileStatus(tb, vf, &status);
+            if (rc == 0) {
What if getPortProfileStatus returns something other than 0?
Currently, when that happens, this code just ignores it (failure?)
sleeps and retries.
Doesn't failure to get status deserve an error just
as much as when we do get status that indicates a failure?
...
+                if (status == PORT_PROFILE_RESPONSE_SUCCESS ||
+                    status == PORT_VDP_RESPONSE_SUCCESS) {
+                    break;
+                } else if (status == PORT_PROFILE_RESPONSE_INPROGRESS) {
+                    // keep trying...
+                } else {
+                    virReportSystemError(EINVAL,
+                        _("error %d during port-profile setlink on ifindex %d"),
+                        status, ifindex);
+                    rc = 1;
+                    break;
+                }
+            }
+            usleep(125000);
Magic constant?
How did you determine this number?
How often are we expected to hit it?
Please add a comment.
...
+            VIR_FREE(recvbuf);
+        }
+    }
+
+    if (status == PORT_PROFILE_RESPONSE_INPROGRESS) {
+        macvtapError(VIR_ERR_INTERNAL_ERROR, "%s",
+                     _("port-profile setlink timed out"));
+        rc = -ETIMEDOUT;
+    }
+
+err_exit:
+    VIR_FREE(recvbuf);
+
+    return rc;
+}
+
+# endif /* IFLA_PORT_MAX */

    

Re: [libvirt] [v5 PATCH] add 802.1Qbh handling for port-profiles based on Stefan's previous patches

Jim Meyering