Hey,
Here's a patch that allows libvirt created guests to use KVM's recent
GSO support in order to increase the throughput achieved with virtio_net
network interfaces.
We shouldn't apply this yet - we need the kernel and kvm TUNGETIFF
patches to be applied first - so this is just intended as an RFC.
See also:
http://marc.info/?l=linux-netdev&m=121863813904363
and:
http://marc.info/?l=kvm&m=121863857305255
Cheers,
Mark.
Subject: [PATCH] kvm/virtio: Set IFF_VNET_HDR when setting up tap fds
IFF_VNET_HDR is a tun/tap flag that allows you to send and receive
large (i.e. GSO) packets and packets with partial checksums. Setting
the flag means that every packet is proceeded by the same header which
virtio uses to communicate GSO/csum metadata.
By enabling this flag on the tap fds we create, we greatly increase
the achievable throughput with virtio_net.
However, we need to be careful to only set the flag when a) KVM has
support for this ABI and b) the value of the flag is queryable using
the TUNGETIFF ioctl.
Signed-off-by: Mark McLoughlin <markmc(a)redhat.com>
Index: libvirt/src/bridge.c
===================================================================
--- libvirt.orig/src/bridge.c 2008-08-13 15:40:34.000000000 +0100
+++ libvirt/src/bridge.c 2008-08-13 15:40:53.000000000 +0100
@@ -275,10 +275,52 @@
#endif
/**
+ * brProbeVnetHdr:
+ * @tapfd: a tun/tap file descriptor
+ *
+ * Check whether it is safe to enable the IFF_VNET_HDR flag on the
+ * tap interface.
+ *
+ * Setting IFF_VNET_HDR enables KVM's virtio_net driver to allow
+ * guests to pass larger (GSO) packets, with partial checksums, to
+ * the host. This greatly increases the achievable throughput.
+ *
+ * It is only useful to enable this when we're setting up a virtio
+ * interface. And it is only *safe* to enable it when we know for
+ * sure that a) qemu has support for IFF_VNET_HDR and b) the running
+ * kernel implements the TUNGETIFF ioctl(), which qemu needs to query
+ * the supplied tapfd.
+ *
+ * Returns 0 in case of success or an errno code in case of failure.
+ */
+static int
+brProbeVnetHdr(int tapfd)
+{
+#if defined(IFF_VNET_HDR) && defined(TUNGETFEATURES) &&
defined(TUNGETIFF)
+ unsigned int features;
+ struct ifreq dummy;
+
+ if (ioctl(tapfd, TUNGETFEATURES, &features) != 0)
+ return 0;
+
+ if (!(features & IFF_VNET_HDR))
+ return 0;
+
+ if (ioctl(tapfd, TUNGETIFF, &dummy) != -1 || errno != EBADFD)
+ return 0;
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/**
* brAddTap:
* @ctl: bridge control pointer
* @bridge: the bridge name
* @ifname: the interface name (or name template)
+ * @vnet_hdr: whether to try enabling iFF_VNET_HDR
* @tapfd: file descriptor return value for the new tap device
*
* This function creates a new tap device on a bridge. @ifname can be either
@@ -292,6 +334,7 @@
brAddTap(brControl *ctl,
const char *bridge,
char **ifname,
+ int vnet_hdr,
int *tapfd)
{
int id, subst, fd;
@@ -307,6 +350,9 @@
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
return errno;
+ if (vnet_hdr)
+ vnet_hdr = brProbeVnetHdr(fd);
+
do {
struct ifreq try;
int len;
@@ -315,6 +361,11 @@
try.ifr_flags = IFF_TAP|IFF_NO_PI;
+#ifdef IFF_VNET_HDR
+ if (vnet_hdr)
+ try.ifr_flags |= IFF_VNET_HDR;
+#endif
+
if (subst) {
len = snprintf(try.ifr_name, BR_IFNAME_MAXLEN, *ifname, id);
if (len >= BR_IFNAME_MAXLEN) {
Index: libvirt/src/bridge.h
===================================================================
--- libvirt.orig/src/bridge.h 2008-08-13 15:40:34.000000000 +0100
+++ libvirt/src/bridge.h 2008-08-13 15:40:53.000000000 +0100
@@ -61,6 +61,7 @@
int brAddTap (brControl *ctl,
const char *bridge,
char **ifname,
+ int vnet_hdr,
int *tapfd);
int brSetInterfaceUp (brControl *ctl,
Index: libvirt/src/qemu_conf.c
===================================================================
--- libvirt.orig/src/qemu_conf.c 2008-08-13 15:40:34.000000000 +0100
+++ libvirt/src/qemu_conf.c 2008-08-13 15:41:15.000000000 +0100
@@ -474,6 +474,8 @@
*flags |= QEMUD_CMD_FLAG_DRIVE;
if (strstr(help, "boot=on"))
*flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
+ if (strstr(help, "IFF_VNET_HDR"))
+ *flags |= QEMUD_CMD_FLAG_VNET_HDR;
if (ver >= 9000)
*flags |= QEMUD_CMD_FLAG_VNC_COLON;
}
@@ -545,7 +547,8 @@
int **tapfds,
int *ntapfds,
virDomainNetDefPtr net,
- int vlan)
+ int vlan,
+ int vnet_hdr)
{
virNetworkObjPtr network = NULL;
char *brname;
@@ -593,7 +596,7 @@
}
if ((err = brAddTap(driver->brctl, brname,
- &net->ifname, &tapfd))) {
+ &net->ifname, vnet_hdr, &tapfd))) {
if (errno == ENOTSUP) {
/* In this particular case, give a better diagnostic. */
qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -1011,9 +1014,16 @@
case VIR_DOMAIN_NET_TYPE_NETWORK:
case VIR_DOMAIN_NET_TYPE_BRIDGE:
{
- char *tap = qemudNetworkIfaceConnect(conn, driver,
- tapfds, ntapfds,
- net, vlan);
+ char *tap;
+ int vnet_hdr = 0;
+
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_VNET_HDR &&
+ net->model && !strcmp(net->model,
"virtio"))
+ vnet_hdr = 1;
+
+ tap = qemudNetworkIfaceConnect(conn, driver,
+ tapfds, ntapfds,
+ net, vlan, vnet_hdr);
if (tap == NULL)
goto error;
ADD_ARG(tap);
Index: libvirt/src/qemu_conf.h
===================================================================
--- libvirt.orig/src/qemu_conf.h 2008-08-13 15:40:34.000000000 +0100
+++ libvirt/src/qemu_conf.h 2008-08-13 15:41:15.000000000 +0100
@@ -47,6 +47,7 @@
QEMUD_CMD_FLAG_DRIVE = (1 << 3),
QEMUD_CMD_FLAG_DRIVE_BOOT = (1 << 4),
QEMUD_CMD_FLAG_NAME = (1 << 5),
+ QEMUD_CMD_FLAG_VNET_HDR = (1 << 6),
};
/* Main driver state */