Daniel Veillard <veillard@redhat.com> wrote on 02/11/2010 06:08:15 AM:

[...]
> > +int openMacvtapTap(virConnectPtr conn,
> > +                   const char *ifname,
> > +                   const unsigned char *macaddress,
> > +                   const char *linkdev,
> > +                   int mode,
> > +                   char **res_ifname);
> > +
> > +int delMacvtapByMACAddress(virConnectPtr conn,
> > +                           const unsigned char *macaddress,
> > +                           int *hasBusyDev);
> > +
> > +#endif /* WITH_MACVTAP */
> > +
> > +#define MACVTAP_MODE_PRIVATE_STR  "private"
> > +#define MACVTAP_MODE_VEPA_STR     "vepa"
> > +#define MACVTAP_MODE_BRIDGE_STR   "bridge"
> > +
> > +
> > +#endif
>
> minor suggestion: #endif /* __UTIL_MACVTAP_H__ */


Fixed in upcoming patch.

[...]
> > +
> > +#define VIR_FROM_THIS VIR_FROM_NONE
> > +
> > +#define ReportError(conn, code, fmt...)                                  \
> > +        virReportErrorHelper(conn, VIR_FROM_NONE, code, __FILE__,        \
> > +                               __FUNCTION__, __LINE__, fmt)
>
>   Hum, I would suggest to use VIR_FROM_NET instead of VIR_FROM_NONE,
> or add a specific virErrorDomain enum in virterror.h (and associated
> code in virterror.c). I think VIR_FROM_NET is the simplest for now.


Fixed in upcoming patch.

>
> > +#define MACVTAP_NAME_PREFIX   "macvtap"
> > +#define MACVTAP_NAME_PATTERN   "macvtap%d"
> > +
> > +static int nlOpen(virConnectPtr conn)
> > +{
> > +    int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> > +    if (fd < 0)
> > +        virReportSystemError(conn, errno,
> > +                             "%s",_("cannot open netlink socket"));
> > +    return fd;
> > +}
> > +
>   the function signature will change in the rebase as others in the code
>   too



True.

>
> > +static void nlClose(int fd)
> > +{
> > +    close(fd);
> > +}
> > +
> > +
> > +static
> > +int nlComm(virConnectPtr conn,
> > +           struct nlmsghdr *nlmsg,
> > +           char *respbuf, int *respbuflen)
> > +{
> > +    int rc = 0;
> > +    struct sockaddr_nl nladdr = {
> > +            .nl_family = AF_NETLINK,
> > +            .nl_pid    = 0,
> > +            .nl_groups = 0,
> > +    };
> > +    char rcvbuf[1024];
> > +    ssize_t nbytes;
> > +    size_t tocopy;
> > +    int fd = nlOpen(conn);
> > +
> > +    if (fd < 0)
> > +        return -1;
> > +
> > +    nlmsg->nlmsg_flags |= NLM_F_ACK;
> > +
> > +    nbytes = sendto(fd, (void *)nlmsg, nlmsg->nlmsg_len, 0,
> > +                    (struct sockaddr *)&nladdr, sizeof(nladdr));
> > +    if (nbytes < 0) {
> > +        virReportSystemError(conn, errno,
> > +                             "%s", _("cannot send to netlink socket"));
> > +        rc = -1;
> > +        goto err_exit;
> > +    }
> > +
> > +    memset(rcvbuf, 0x0, sizeof(rcvbuf));
> > +    while (1) {
> > +        socklen_t addrlen = sizeof(nladdr);
> > +        nbytes = recvfrom(fd, &rcvbuf, sizeof(rcvbuf), 0,
> > +                          (struct sockaddr *)&nladdr, &addrlen);
> > +        if (nbytes < 0) {
> > +            if (errno == EAGAIN || errno == EINTR)
> > +                continue;
> > +            virReportSystemError(conn, errno, "%s",
> > +                                 _("error receiving from netlink socket"));
> > +            rc = -1;
> > +            goto err_exit;
> > +        }
> > +
> > +        tocopy = (nbytes < *respbuflen) ? nbytes : *respbuflen;
> > +        memcpy(respbuf, rcvbuf, tocopy);
> > +        *respbuflen = tocopy;
> > +        break;
> > +    }
> > +
> > +err_exit:
> > +    nlClose(fd);
> > +    return rc;
> > +}
>
>   Hum ... I don't see why we need rcvbuf here, we could make the recvfrom
> directly from respbuf with the respbuflen size, and no need to allocate
> one KB on stack. Also if for some reason one expect a large response
> packet there caller will be in control instead of a fixed size arbitrary
> limit in the function. The only case I could come to justifying this is
> if the caller want to truncate the response or if netlink force using
> a 1KB buffer input size.


In the next patch I'll write into the provided receive buffer.

>
> [...]
> > +static int
> > +link_add(virConnectPtr conn,
> > +         const char *type,
> > +         const unsigned char *macaddress, int macaddrsize,
> > +         const char *ifname,
> > +         const char *srcdev,
> > +         uint32_t macvlan_mode,
> > +         int *retry)
> > +{
> > +    char nlmsgbuf[1024], recvbuf[1024];
> > +    struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
> > +    struct nlmsgerr *err;
> > +    char rtattbuf[256];
> > +    struct rtattr *rta, *rta1, *li;
> > +    struct ifinfomsg i = { .ifi_family = AF_UNSPEC };

[...]

> > +
> > +    if (macvlan_mode > 0) {
> > +        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_DATA,
> > +                           NULL, 0);
> > +        if (!rta)
> > +            goto buffer_too_small;
> > +
> > +        if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf,
> rta->rta_len)))
> > +            goto buffer_too_small;
> > +
> > +        rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_MACVLAN_MODE,
> > +                           &macvlan_mode, sizeof(macvlan_mode));
> > +        if (!rta)
> > +            goto buffer_too_small;
> > +
> > +        if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
> > +            goto buffer_too_small;
> > +
> > +        rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
> > +    }
> > +
> > +    li->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)li;
>
>   I must admit that I'm again a bit surprized by the buffer handling.
> There is a function to append stuff on a buffer, so all this code could
> be changed to use dymanic allocation easilly and not be stuck on a fixed
> buffer size allocated on stack (a couple of buffers even). since we call
> nlComm below, we already have 3KB of stack allocated buffers piled up
> and honnestly none of this is required as far as I understand.


The thing with the netlink messages is that their content needs to be 'nested', meaning that you
add data to a netlink message parameter, depending on availability of parameters in the function,
i.e., test for macvlan_mode > 0, and only later on you can determine how large the size of the
nested information was. Above I am setting the li pointer in the nlAppend() call. Later on only
I set the size of the nested information in the li->rta_len = ... line. So the pointer for li better
not have changed while reallocating the target message buffer for example.

It's true that the size of the buffers is rather generous. I could limit them to around 128 bytes if
that's better. At least that is sufficient.

> > +
> > +    snprintf(path, sizeof(path), "/sys/class/net/%s/ifindex", ifname);
>
>   virBuildPathInternal could be another way, in any case one should
>   check the return value


Checking return value in next patch.

>
> > +    file = fopen(path, "r");
> > +
> > +    if (!file) {
> > +        virReportSystemError(conn, errno,
> > +                             _("cannot open macvtap file %s to determine "
> > +                               "interface index"), path);
> > +        return -1;
> > +    }
> > +
> > +    if (fscanf(file, "%d", &ifindex) != 1) {
> > +        virReportSystemError(conn, errno,
> > +                             "%s",_("cannot determine macvtap's
> tap device "
> > +                             "interface index"));
> > +        fclose(file);
> > +        return -1;
> > +    }
> > +
> > +    fclose(file);
> > +
> > +    snprintf(tapname, sizeof(tapname), "/dev/tap%d", ifindex);
> > +
> > +    while (1) {
> > +        // may need to wait for udev to be done
> > +        tapfd = open(tapname, O_RDWR);
> > +        if (tapfd < 0 && retries--) {
> > +            usleep(20000);
> > +            continue;
> > +        }
> > +        break;
> > +    }
>
>   hum, the function should check retries > 0 argument otherwise this
> could hang the daemon.


Changed this.

>
> > +    if (tapfd < 0)
> > +        virReportSystemError(conn, errno,
> > +                             _("cannot open macvtap tap device %s"),
> > +                             tapname);
> > +
> > +    return tapfd;
> > +}
>
>   okay, I'm not too convinced by the buffer management code, but that
> could be cleaned up in a later pach, so ACK


I'll shrink the sizes of the buffers. Want to be careful about dynamic allocation
of the buffers in this case, though.

   Stefan


>
> Daniel
>
> --
> Daniel Veillard      | libxml Gnome XML XSLT toolkit  
http://xmlsoft.org/
> daniel@veillard.com  | Rpmfind RPM search engine
http://rpmfind.net/
>
http://veillard.com/ | virtualization library  http://libvirt.org/