On Mon, Feb 08, 2010 at 02:37:52PM -0500, Stefan Berger wrote:
This part adds the helper code to setup and tear down macvtap
devices
using direct communication with the device driver via netlink sockets.
Signed-off-by: Stefan Berger <stefanb(a)us.ibm.com>
[...]
Index: libvirt-macvtap/src/util/macvtap.h
===================================================================
--- /dev/null
+++ libvirt-macvtap/src/util/macvtap.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors:
+ * Stefan Berger <stefanb(a)us.ibm.com>
+ */
+
+#ifndef __UTIL_MACVTAP_H__
+#define __UTIL_MACVTAP_H__
+
+#include <config.h>
+
+#if defined(WITH_MACVTAP)
+
+#include "internal.h"
+
+int openMacvtapTap(virConnectPtr conn,
+ const char *ifname,
+ const unsigned char *macaddress,
+ const char *linkdev,
+ int mode,
+ char **res_ifname);
+
+int delMacvtapByMACAddress(virConnectPtr conn,
+ const unsigned char *macaddress,
+ int *hasBusyDev);
+
+#endif /* WITH_MACVTAP */
+
+#define MACVTAP_MODE_PRIVATE_STR "private"
+#define MACVTAP_MODE_VEPA_STR "vepa"
+#define MACVTAP_MODE_BRIDGE_STR "bridge"
+
+
+#endif
minor suggestion: #endif /* __UTIL_MACVTAP_H__ */
Index: libvirt-macvtap/src/util/macvtap.c
===================================================================
--- /dev/null
+++ libvirt-macvtap/src/util/macvtap.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors:
+ * Stefan Berger <stefanb(a)us.ibm.com>
+ *
+ * Notes:
+ * netlink:
http://lovezutto.googlepages.com/netlink.pdf
+ * iproute2 package
+ *
+ */
+
+#include <config.h>
+
+#if defined(WITH_MACVTAP)
+
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_tun.h>
+
+#include "util.h"
+#include "macvtap.h"
+#include "conf/domain_conf.h"
+#include "virterror_internal.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+#define ReportError(conn, code, fmt...) \
+ virReportErrorHelper(conn, VIR_FROM_NONE, code, __FILE__, \
+ __FUNCTION__, __LINE__, fmt)
Hum, I would suggest to use VIR_FROM_NET instead of VIR_FROM_NONE,
or add a specific virErrorDomain enum in virterror.h (and associated
code in virterror.c). I think VIR_FROM_NET is the simplest for now.
+#define MACVTAP_NAME_PREFIX "macvtap"
+#define MACVTAP_NAME_PATTERN "macvtap%d"
+
+static int nlOpen(virConnectPtr conn)
+{
+ int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0)
+ virReportSystemError(conn, errno,
+ "%s",_("cannot open netlink socket"));
+ return fd;
+}
+
the function signature will change in the rebase as others in the code
too
+static void nlClose(int fd)
+{
+ close(fd);
+}
+
+
+static
+int nlComm(virConnectPtr conn,
+ struct nlmsghdr *nlmsg,
+ char *respbuf, int *respbuflen)
+{
+ int rc = 0;
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ .nl_pid = 0,
+ .nl_groups = 0,
+ };
+ char rcvbuf[1024];
+ ssize_t nbytes;
+ size_t tocopy;
+ int fd = nlOpen(conn);
+
+ if (fd < 0)
+ return -1;
+
+ nlmsg->nlmsg_flags |= NLM_F_ACK;
+
+ nbytes = sendto(fd, (void *)nlmsg, nlmsg->nlmsg_len, 0,
+ (struct sockaddr *)&nladdr, sizeof(nladdr));
+ if (nbytes < 0) {
+ virReportSystemError(conn, errno,
+ "%s", _("cannot send to netlink
socket"));
+ rc = -1;
+ goto err_exit;
+ }
+
+ memset(rcvbuf, 0x0, sizeof(rcvbuf));
+ while (1) {
+ socklen_t addrlen = sizeof(nladdr);
+ nbytes = recvfrom(fd, &rcvbuf, sizeof(rcvbuf), 0,
+ (struct sockaddr *)&nladdr, &addrlen);
+ if (nbytes < 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ virReportSystemError(conn, errno, "%s",
+ _("error receiving from netlink socket"));
+ rc = -1;
+ goto err_exit;
+ }
+
+ tocopy = (nbytes < *respbuflen) ? nbytes : *respbuflen;
+ memcpy(respbuf, rcvbuf, tocopy);
+ *respbuflen = tocopy;
+ break;
+ }
+
+err_exit:
+ nlClose(fd);
+ return rc;
+}
Hum ... I don't see why we need rcvbuf here, we could make the recvfrom
directly from respbuf with the respbuflen size, and no need to allocate
one KB on stack. Also if for some reason one expect a large response
packet there caller will be in control instead of a fixed size arbitrary
limit in the function. The only case I could come to justifying this is
if the caller want to truncate the response or if netlink force using
a 1KB buffer input size.
[...]
+static int
+link_add(virConnectPtr conn,
+ const char *type,
+ const unsigned char *macaddress, int macaddrsize,
+ const char *ifname,
+ const char *srcdev,
+ uint32_t macvlan_mode,
+ int *retry)
+{
+ char nlmsgbuf[1024], recvbuf[1024];
+ struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+ struct nlmsgerr *err;
+ char rtattbuf[256];
+ struct rtattr *rta, *rta1, *li;
+ struct ifinfomsg i = { .ifi_family = AF_UNSPEC };
+ int ifindex;
+ int recvbuflen = sizeof(recvbuf);
+
+ if (getIfIndex(conn, srcdev, &ifindex) != 0)
+ return -1;
+
+ *retry = 0;
+
+ memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
+
+ nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL, RTM_NEWLINK);
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i)))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINK,
+ &ifindex, sizeof(ifindex));
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_ADDRESS,
+ macaddress, macaddrsize);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ if (ifname) {
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME,
+ ifname, strlen(ifname) + 1);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+ }
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINKINFO, NULL, 0);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!(li = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_KIND,
+ type, strlen(type));
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ if (macvlan_mode > 0) {
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_DATA,
+ NULL, 0);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_MACVLAN_MODE,
+ &macvlan_mode, sizeof(macvlan_mode));
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
+ }
+
+ li->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)li;
I must admit that I'm again a bit surprized by the buffer handling.
There is a function to append stuff on a buffer, so all this code could
be changed to use dymanic allocation easilly and not be stuck on a fixed
buffer size allocated on stack (a couple of buffers even). since we call
nlComm below, we already have 3KB of stack allocated buffers piled up
and honnestly none of this is required as far as I understand.
+ if (nlComm(conn, nlm, recvbuf, &recvbuflen) < 0)
+ return -1;
+
+ if (recvbuflen < NLMSG_LENGTH(0))
+ goto malformed_resp;
+
+ resp = (struct nlmsghdr *)recvbuf;
+
+ switch (resp->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(resp);
+ if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ goto malformed_resp;
+
+ switch (-err->error) {
+
+ case 0:
+ break;
+
+ case EEXIST:
+ *retry = 1;
+ return -1;
+ break;
+
+ default:
+ virReportSystemError(conn, -err->error,
+ _("error creating %s type of interface"),
+ type);
+ return -1;
+ }
+ break;
+
+ case NLMSG_DONE:
+ break;
+
+ default:
+ goto malformed_resp;
+ }
+
+ return 0;
+
+malformed_resp:
+ ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+ _("malformed netlink response message"));
+ return -1;
+
+buffer_too_small:
+ ReportError(conn, VIR_ERR_INTERNAL_ERROR,
+ _("internal buffer is too small"));
+ return -1;
+}
+
+
+static int
+link_del(virConnectPtr conn,
+ const char *type,
+ const char *name)
+{
+ char nlmsgbuf[1024], recvbuf[1024];
+ struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp;
+ struct nlmsgerr *err;
+ char rtattbuf[256];
+ struct rtattr *rta, *rta1;
+ struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
+ int recvbuflen = sizeof(recvbuf);
+
+ memset(&nlmsgbuf, 0, sizeof(nlmsgbuf));
+
+ nlInit(nlm, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL, RTM_DELLINK);
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), &ifinfo, sizeof(ifinfo)))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_LINKINFO, NULL, 0);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!(rta1 = nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)))
+ goto buffer_too_small;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_INFO_KIND,
+ type, strlen(type));
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ rta1->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)rta1;
+
+ rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME,
+ name, strlen(name)+1);
+ if (!rta)
+ goto buffer_too_small;
+
+ if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len))
+ goto buffer_too_small;
+
+ if (nlComm(conn, nlm, recvbuf, &recvbuflen) < 0)
+ return -1;
same thing
+static
+int openTap(virConnectPtr conn,
+ const char *ifname,
+ int retries)
+{
+ FILE *file;
+ char path[256];
+ int ifindex;
+ char tapname[50];
+ int tapfd;
+
+ snprintf(path, sizeof(path), "/sys/class/net/%s/ifindex", ifname);
virBuildPathInternal could be another way, in any case one should
check the return value
+ file = fopen(path, "r");
+
+ if (!file) {
+ virReportSystemError(conn, errno,
+ _("cannot open macvtap file %s to determine "
+ "interface index"), path);
+ return -1;
+ }
+
+ if (fscanf(file, "%d", &ifindex) != 1) {
+ virReportSystemError(conn, errno,
+ "%s",_("cannot determine macvtap's tap
device "
+ "interface index"));
+ fclose(file);
+ return -1;
+ }
+
+ fclose(file);
+
+ snprintf(tapname, sizeof(tapname), "/dev/tap%d", ifindex);
+
+ while (1) {
+ // may need to wait for udev to be done
+ tapfd = open(tapname, O_RDWR);
+ if (tapfd < 0 && retries--) {
+ usleep(20000);
+ continue;
+ }
+ break;
+ }
hum, the function should check retries > 0 argument otherwise this
could hang the daemon.
+ if (tapfd < 0)
+ virReportSystemError(conn, errno,
+ _("cannot open macvtap tap device %s"),
+ tapname);
+
+ return tapfd;
+}
okay, I'm not too convinced by the buffer management code, but that
could be cleaned up in a later pach, so ACK
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit
http://xmlsoft.org/
daniel(a)veillard.com | Rpmfind RPM search engine
http://rpmfind.net/
http://veillard.com/ | virtualization library
http://libvirt.org/