This is in response to:
https://bugzilla.redhat.com/show_bug.cgi?id=629662
Explanation
qemu's virtio-net-pci driver allows setting the algorithm used for tx
packets to either "bh" or "timer". This is done by adding
",tx=bh" or
",tx=timer" to the "-device virtio-net-pci" commandline option.
'bh' stands for 'bottom half'; when this is set, packet tx is all done
in an iothread in the bottom half of the driver. (In libvirt, this
option is called the more descriptive "iothread".)
'timer' means that tx work is done in qemu, and if there is more tx
data than can be sent at the present time, a timer is set before qemu
moves on to do other things; when the timer fires, another attempt is
made to send more data. (libvirt retains the name "timer" for this
option.)
The resulting difference, according to the qemu developer who added
the option is:
bh makes tx more asynchronous and reduces latency, but potentially
causes more processor bandwidth contention since the cpu doing the
tx isn't necessarily the cpu where the guest generated the
packets.
Solution
This patch provides a libvirt domain xml knob to change the option on
the qemu commandline, by adding a new attribute "txmode" to the
<driver> element that can be placed inside any <interface> element in
a domain definition. It's use would be something like this:
<interface ...>
...
<model type='virtio'/>
<driver txmode='iothread'/>
...
</interface>
I chose to put this setting as an attribute to <driver> rather than as
a sub-element to <tune> because it is specific to the virtio-net
driver, not something that is generally usable by all network drivers.
(note that this is the same placement as the "driver name=..."
attribute used to choose kernel vs. userland backend for the
virtio-net driver.)
Actually adding the tx=xxx option to the qemu commandline is only done
if the version of qemu being used advertises it in the output of
qemu -device virtio-net-pci,?
If a particular txmode is requested in the XML, and the option isn't
listed in that help output, an UNSUPPORTED_CONFIG error is logged, and
the domain fails to start.
---
Changes from v1:
1) add error log / abort domain startup if option isn't supported by qemu
2) change attribute name from tx_alg to txmode
3) change attribute values from bh|timer to iothread|timer
(The difference in length between full patch and delta diff was small
enough that I decided to just resend the full patch.)
src/conf/domain_conf.c | 26 +++++++++++++++++++++++++-
src/conf/domain_conf.h | 11 +++++++++++
src/qemu/qemu_capabilities.c | 3 +++
src/qemu/qemu_capabilities.h | 1 +
src/qemu/qemu_command.c | 27 +++++++++++++++++++++++++++
5 files changed, 67 insertions(+), 1 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 84b866b..dee05c4 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -198,6 +198,11 @@ VIR_ENUM_IMPL(virDomainNetBackend, VIR_DOMAIN_NET_BACKEND_TYPE_LAST,
"qemu",
"vhost")
+VIR_ENUM_IMPL(virDomainNetVirtioTxMode, VIR_DOMAIN_NET_VIRTIO_TX_MODE_LAST,
+ "default",
+ "iothread",
+ "timer")
+
VIR_ENUM_IMPL(virDomainChrChannelTarget,
VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_LAST,
"guestfwd",
@@ -2477,6 +2482,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
char *port = NULL;
char *model = NULL;
char *backend = NULL;
+ char *txmode = NULL;
char *filter = NULL;
char *internal = NULL;
char *devaddr = NULL;
@@ -2565,6 +2571,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
model = virXMLPropString(cur, "type");
} else if (xmlStrEqual (cur->name, BAD_CAST "driver")) {
backend = virXMLPropString(cur, "name");
+ txmode = virXMLPropString(cur, "txmode");
} else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) {
filter = virXMLPropString(cur, "filter");
VIR_FREE(filterparams);
@@ -2769,6 +2776,18 @@ virDomainNetDefParseXML(virCapsPtr caps,
}
def->driver.virtio.name = name;
}
+ if (txmode != NULL) {
+ int m;
+ if (((m = virDomainNetVirtioTxModeTypeFromString(txmode)) < 0) ||
+ (m == VIR_DOMAIN_NET_VIRTIO_TX_MODE_DEFAULT)) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unknown interface <driver
txmode='%s'> "
+ "has been specified"),
+ txmode);
+ goto error;
+ }
+ def->driver.virtio.txmode = m;
+ }
}
if (filter != NULL) {
@@ -2808,6 +2827,7 @@ cleanup:
VIR_FREE(bridge);
VIR_FREE(model);
VIR_FREE(backend);
+ VIR_FREE(txmode);
VIR_FREE(filter);
VIR_FREE(type);
VIR_FREE(internal);
@@ -6808,12 +6828,16 @@ virDomainNetDefFormat(virBufferPtr buf,
virBufferEscapeString(buf, " <model type='%s'/>\n",
def->model);
if (STREQ(def->model, "virtio") &&
- def->driver.virtio.name) {
+ (def->driver.virtio.name || def->driver.virtio.txmode)) {
virBufferAddLit(buf, " <driver");
if (def->driver.virtio.name) {
virBufferVSprintf(buf, " name='%s'",
virDomainNetBackendTypeToString(def->driver.virtio.name));
}
+ if (def->driver.virtio.txmode) {
+ virBufferVSprintf(buf, " txmode='%s'",
+
virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode));
+ }
virBufferAddLit(buf, "/>\n");
}
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 44d0a4b..0fd23e7 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -321,6 +321,15 @@ enum virDomainNetBackendType {
VIR_DOMAIN_NET_BACKEND_TYPE_LAST,
};
+/* the TX algorithm used for virtio interfaces */
+enum virDomainNetVirtioTxModeType {
+ VIR_DOMAIN_NET_VIRTIO_TX_MODE_DEFAULT, /* default for this version of qemu */
+ VIR_DOMAIN_NET_VIRTIO_TX_MODE_IOTHREAD,
+ VIR_DOMAIN_NET_VIRTIO_TX_MODE_TIMER,
+
+ VIR_DOMAIN_NET_VIRTIO_TX_MODE_LAST,
+};
+
/* the mode type for macvtap devices */
enum virDomainNetdevMacvtapType {
VIR_DOMAIN_NETDEV_MACVTAP_MODE_VEPA,
@@ -341,6 +350,7 @@ struct _virDomainNetDef {
union {
struct {
enum virDomainNetBackendType name; /* which driver backend to use */
+ enum virDomainNetVirtioTxModeType txmode;
} virtio;
} driver;
union {
@@ -1367,6 +1377,7 @@ VIR_ENUM_DECL(virDomainFS)
VIR_ENUM_DECL(virDomainFSAccessMode)
VIR_ENUM_DECL(virDomainNet)
VIR_ENUM_DECL(virDomainNetBackend)
+VIR_ENUM_DECL(virDomainNetVirtioTxMode)
VIR_ENUM_DECL(virDomainChrDevice)
VIR_ENUM_DECL(virDomainChrChannelTarget)
VIR_ENUM_DECL(virDomainChrConsoleTarget)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index 0e1f79c..935c669 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -1063,6 +1063,7 @@ qemuCapsExtractDeviceStr(const char *qemu,
"-device", "?",
"-device", "pci-assign,?",
"-device", "virtio-blk-pci,?",
+ "-device", "virtio-net-pci,?",
NULL);
virCommandAddEnvPassCommon(cmd);
/* qemu -help goes to stdout, but qemu -device ? goes to stderr. */
@@ -1104,6 +1105,8 @@ qemuCapsParseDeviceStr(const char *str, unsigned long long *flags)
if (strstr(str, "pci-assign.bootindex"))
*flags |= QEMUD_CMD_FLAG_PCI_BOOTINDEX;
}
+ if (strstr(str, "virtio-net-pci.tx="))
+ *flags |= QEMUD_CMD_FLAG_VIRTIO_TX_ALG;
return 0;
}
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index dd39b3b..c29d914 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -92,6 +92,7 @@ enum qemuCapsFlags {
QEMUD_CMD_FLAG_CCID_PASSTHRU = (1LL << 55), /* -device ccid-card-passthru */
QEMUD_CMD_FLAG_CHARDEV_SPICEVMC = (1LL << 56), /* newer -chardev spicevmc */
QEMUD_CMD_FLAG_DEVICE_SPICEVMC = (1LL << 57), /* older -device spicevmc*/
+ QEMUD_CMD_FLAG_VIRTIO_TX_ALG = (1LL << 58), /* -device virtio-net-pci,tx=string
*/
};
virCapsPtr qemuCapsInit(virCapsPtr old_caps);
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 1b213da..7b49b3c 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1577,16 +1577,43 @@ qemuBuildNicDevStr(virDomainNetDefPtr net,
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
const char *nic;
+ bool usingVirtio = false;
if (!net->model) {
nic = "rtl8139";
} else if (STREQ(net->model, "virtio")) {
nic = "virtio-net-pci";
+ usingVirtio = true;
} else {
nic = net->model;
}
virBufferAdd(&buf, nic, strlen(nic));
+ if (usingVirtio && net->driver.virtio.txmode) {
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_VIRTIO_TX_ALG) {
+ virBufferAddLit(&buf, ",tx=");
+ switch (net->driver.virtio.txmode) {
+ case VIR_DOMAIN_NET_VIRTIO_TX_MODE_IOTHREAD:
+ virBufferAddLit(&buf, "bh");
+ break;
+
+ case VIR_DOMAIN_NET_VIRTIO_TX_MODE_TIMER:
+ virBufferAddLit(&buf, "timer");
+ break;
+ default:
+ /* this should never happen, if it does, we need
+ * to add another case to this switch.
+ */
+ qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("unrecognized virtio-net-pci 'tx'
option"));
+ goto error;
+ }
+ } else {
+ qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("virtio-net-pci 'tx' option not supported in
this QEMU binary"));
+ goto error;
+ }
+ }
if (vlan == -1)
virBufferVSprintf(&buf, ",netdev=host%s", net->info.alias);
else
--
1.7.3.4