For virtio disks and interfaces, qemu allows users to enable or disable
ioeventfd feature. This means, qemu can execute domain code, while
another thread waits for I/O event. Basically, in some cases it is win,
in some loss. This feature is available via 'asyncio' attribute in disk
and interface <driver> element. It accepts 'on' and 'off'. Leaving
this
attribute out defaults to hypervisor decision.
---
this is rework as suggested:
https://www.redhat.com/archives/libvir-list/2011-May/msg01269.html
docs/formatdomain.html.in | 34 ++++++++++++-
docs/schemas/domain.rng | 14 +++++
src/conf/domain_conf.c | 49 ++++++++++++++++++-
src/conf/domain_conf.h | 11 ++++
src/libvirt_private.syms | 2 +
src/qemu/qemu_capabilities.c | 3 +
src/qemu/qemu_capabilities.h | 1 +
src/qemu/qemu_command.c | 23 +++++++++
tests/qemuhelptest.c | 3 +-
.../qemuxml2argv-disk-asyncio.args | 11 ++++
.../qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml | 51 ++++++++++++++++++++
tests/qemuxml2argvtest.c | 3 +
12 files changed, 201 insertions(+), 4 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 98fb2b4..8d23740 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -767,7 +767,7 @@
</disk>
...
<disk type='network'>
- <driver name="qemu" type="raw"
io="threads"/>
+ <driver name="qemu" type="raw" io="threads"
asyncio="on"/>
<source protocol="sheepdog" name="image_name">
<host name="hostname" port="7000"/>
</source>
@@ -851,6 +851,20 @@
policies on I/O; qemu guests support "threads" and
"native". <span class="since">Since
0.8.8</span>
</li>
+ <li>
+ The optional <code>asyncio</code> attribute allows users to
+ set <a
href='https://patchwork.kernel.org/patch/43390/'>
+ domain I/O asynchronous handling</a> for disk device.
+ The default is left to the discretion of the hypervisor.
+ Accepted values are "on" and "off". Enabling this allows
+ qemu to execute VM while a separate thread handles I/O.
+ Typically guests experiencing high system CPU utilization
+ during I/O will benefit from this. On the other hand,
+ on overloaded host it could increase guest I/O latency.
+ <span class="since">Since 0.9.3 (QEMU and KVM
only)</span>
+ <b>In general you should leave this option alone, unless you
+ are very certain you know what you are doing.</b>
+ </li>
</ul>
</dd>
<dt><code>boot</code></dt>
@@ -1631,7 +1645,7 @@ qemu-kvm -net nic,model=? /dev/null
<source network='default'/>
<target dev='vnet1'/>
<model type='virtio'/>
- <b><driver name='vhost'
txmode='iothread'/></b>
+ <b><driver name='vhost' txmode='iothread'
asyncio='on'/></b>
</interface>
</devices>
...</pre>
@@ -1682,6 +1696,22 @@ qemu-kvm -net nic,model=? /dev/null
<b>In general you should leave this option alone, unless you
are very certain you know what you are doing.</b>
</dd>
+ <dt><code>asyncio</code></dt>
+ <dd>
+ This optional attribute allows users to set
+ <a
href='https://patchwork.kernel.org/patch/43390/'>
+ domain I/O asynchronous handling</a> for interface device.
+ The default is left to the discretion of the hypervisor.
+ Accepted values are "on" and "off". Enabling this allows
+ qemu to execute VM while a separate thread handles I/O.
+ Typically guests experiencing high system CPU utilization
+ during I/O will benefit from this. On the other hand,
+ on overloaded host it could increase guest I/O latency.
+ <span class="since">Since 0.9.3 (QEMU and KVM
only)</span><br/><br/>
+
+ <b>In general you should leave this option alone, unless you
+ are very certain you know what you are doing.</b>
+ </dd>
</dl>
<h5><a name="elementsNICSTargetOverride">Overriding the target
element</a></h5>
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 0be0371..08b92ed 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -758,6 +758,9 @@
<optional>
<ref name="driverIO"/>
</optional>
+ <optional>
+ <ref name="asyncIO"/>
+ </optional>
<empty/>
</element>
</define>
@@ -797,6 +800,14 @@
</choice>
</attribute>
</define>
+ <define name="asyncIO">
+ <attribute name="asyncio">
+ <choice>
+ <value>on</value>
+ <value>off</value>
+ </choice>
+ </attribute>
+ </define>
<define name="controller">
<element name="controller">
<choice>
@@ -1097,6 +1108,9 @@
</choice>
</attribute>
</optional>
+ <optional>
+ <ref name="asyncIO"/>
+ </optional>
<empty/>
</element>
</optional>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 65d4f89..2b81f2b 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -163,6 +163,11 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST,
"native",
"threads")
+VIR_ENUM_IMPL(virDomainAsyncIo, VIR_DOMAIN_ASYNC_IO_LAST,
+ "default",
+ "on",
+ "off")
+
VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST,
"ide",
"fdc",
@@ -2001,6 +2006,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
char *cachetag = NULL;
char *error_policy = NULL;
char *iotag = NULL;
+ char *asyncio = NULL;
char *devaddr = NULL;
virStorageEncryptionPtr encryption = NULL;
char *serial = NULL;
@@ -2116,6 +2122,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
cachetag = virXMLPropString(cur, "cache");
error_policy = virXMLPropString(cur, "error_policy");
iotag = virXMLPropString(cur, "io");
+ asyncio = virXMLPropString(cur, "asyncio");
} else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) {
def->readonly = 1;
} else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) {
@@ -2252,6 +2259,24 @@ virDomainDiskDefParseXML(virCapsPtr caps,
}
}
+ if (asyncio) {
+ if (def->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) {
+ virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("disk asyncio mode supported "
+ "only for virtio bus"));
+ goto error;
+ }
+
+ int i;
+ if ((i = virDomainAsyncIoTypeFromString(asyncio)) <= 0) {
+ virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("unknown disk asyncio mode '%s'"),
+ asyncio);
+ goto error;
+ }
+ def->asyncio=i;
+ }
+
if (devaddr) {
if (virDomainParseLegacyDeviceAddress(devaddr,
&def->info.addr.pci) < 0) {
@@ -2314,6 +2339,7 @@ cleanup:
VIR_FREE(cachetag);
VIR_FREE(error_policy);
VIR_FREE(iotag);
+ VIR_FREE(asyncio);
VIR_FREE(devaddr);
VIR_FREE(serial);
virStorageEncryptionFree(encryption);
@@ -2701,6 +2727,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
char *model = NULL;
char *backend = NULL;
char *txmode = NULL;
+ char *asyncio = NULL;
char *filter = NULL;
char *internal = NULL;
char *devaddr = NULL;
@@ -2790,6 +2817,7 @@ virDomainNetDefParseXML(virCapsPtr caps,
} else if (xmlStrEqual (cur->name, BAD_CAST "driver")) {
backend = virXMLPropString(cur, "name");
txmode = virXMLPropString(cur, "txmode");
+ asyncio = virXMLPropString(cur, "asyncio");
} else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) {
filter = virXMLPropString(cur, "filter");
VIR_FREE(filterparams);
@@ -3006,6 +3034,16 @@ virDomainNetDefParseXML(virCapsPtr caps,
}
def->driver.virtio.txmode = m;
}
+ if (asyncio) {
+ int i;
+ if ((i = virDomainAsyncIoTypeFromString(asyncio)) <= 0) {
+ virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("unknown interface asyncio mode
'%s'"),
+ asyncio);
+ goto error;
+ }
+ def->driver.virtio.asyncio = i;
+ }
}
if (filter != NULL) {
@@ -3045,6 +3083,7 @@ cleanup:
VIR_FREE(model);
VIR_FREE(backend);
VIR_FREE(txmode);
+ VIR_FREE(asyncio);
VIR_FREE(filter);
VIR_FREE(type);
VIR_FREE(internal);
@@ -8175,6 +8214,7 @@ virDomainDiskDefFormat(virBufferPtr buf,
const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode);
const char *error_policy =
virDomainDiskErrorPolicyTypeToString(def->error_policy);
const char *iomode = virDomainDiskIoTypeToString(def->iomode);
+ const char *asyncio = virDomainAsyncIoTypeToString(def->asyncio);
if (!type) {
virDomainReportError(VIR_ERR_INTERNAL_ERROR,
@@ -8206,7 +8246,8 @@ virDomainDiskDefFormat(virBufferPtr buf,
" <disk type='%s'
device='%s'>\n",
type, device);
- if (def->driverName || def->driverType || def->cachemode) {
+ if (def->driverName || def->driverType || def->cachemode ||
+ def->asyncio) {
virBufferAsprintf(buf, " <driver");
if (def->driverName)
virBufferAsprintf(buf, " name='%s'", def->driverName);
@@ -8218,6 +8259,8 @@ virDomainDiskDefFormat(virBufferPtr buf,
virBufferAsprintf(buf, " error_policy='%s'",
error_policy);
if (def->iomode)
virBufferAsprintf(buf, " io='%s'", iomode);
+ if (def->asyncio)
+ virBufferAsprintf(buf, " asyncio='%s'", asyncio);
virBufferAsprintf(buf, "/>\n");
}
@@ -8508,6 +8551,10 @@ virDomainNetDefFormat(virBufferPtr buf,
virBufferAsprintf(buf, " txmode='%s'",
virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode));
}
+ if (def->driver.virtio.asyncio) {
+ virBufferAsprintf(buf, " asyncio='%s'",
+
virDomainAsyncIoTypeToString(def->driver.virtio.asyncio));
+ }
virBufferAddLit(buf, "/>\n");
}
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 41c8136..01a98c9 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -206,6 +206,14 @@ enum virDomainDiskIo {
VIR_DOMAIN_DISK_IO_LAST
};
+enum virDomainAsyncIo {
+ VIR_DOMAIN_ASYNC_IO_DEFAULT = 0,
+ VIR_DOMAIN_ASYNC_IO_ON,
+ VIR_DOMAIN_ASYNC_IO_OFF,
+
+ VIR_DOMAIN_ASYNC_IO_LAST
+};
+
/* Stores the virtual disk configuration */
typedef struct _virDomainDiskDef virDomainDiskDef;
typedef virDomainDiskDef *virDomainDiskDefPtr;
@@ -225,6 +233,7 @@ struct _virDomainDiskDef {
int error_policy;
int bootIndex;
int iomode;
+ int asyncio;
unsigned int readonly : 1;
unsigned int shared : 1;
virDomainDeviceInfo info;
@@ -361,6 +370,7 @@ struct _virDomainNetDef {
struct {
enum virDomainNetBackendType name; /* which driver backend to use */
enum virDomainNetVirtioTxModeType txmode;
+ enum virDomainAsyncIo asyncio;
} virtio;
} driver;
union {
@@ -1521,6 +1531,7 @@ VIR_ENUM_DECL(virDomainDiskCache)
VIR_ENUM_DECL(virDomainDiskErrorPolicy)
VIR_ENUM_DECL(virDomainDiskProtocol)
VIR_ENUM_DECL(virDomainDiskIo)
+VIR_ENUM_DECL(virDomainAsyncIo)
VIR_ENUM_DECL(virDomainController)
VIR_ENUM_DECL(virDomainControllerModel)
VIR_ENUM_DECL(virDomainFS)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 737cd31..f3a44f6 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -205,6 +205,8 @@ dnsmasqSave;
virDiskNameToBusDeviceIndex;
virDiskNameToIndex;
virDomainAssignDef;
+virDomainAsyncIoTypeFromString;
+virDomainAsyncIoTypeToString;
virDomainChrConsoleTargetTypeFromString;
virDomainChrConsoleTargetTypeToString;
virDomainChrDefForeach;
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index 28c89b5..ad62a07 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -121,6 +121,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST,
"device-qxl-vga",
"pci-multifunction", /* 60 */
+ "virtio-blk-pci.ioeventfd",
);
struct qemu_feature_flags {
@@ -1207,6 +1208,8 @@ qemuCapsParseDeviceStr(const char *str, virBitmapPtr flags)
qemuCapsSet(flags, QEMU_CAPS_VIRTIO_TX_ALG);
if (strstr(str, "name \"qxl-vga\""))
qemuCapsSet(flags, QEMU_CAPS_DEVICE_QXL_VGA);
+ if (strstr(str, "virtio-blk-pci.ioeventfd"))
+ qemuCapsSet(flags, QEMU_CAPS_VIRTIO_IOEVENTFD);
return 0;
}
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index e6d2fa3..0b9c8be 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -96,6 +96,7 @@ enum qemuCapsFlags {
QEMU_CAPS_VIRTIO_TX_ALG = 58, /* -device virtio-net-pci,tx=string */
QEMU_CAPS_DEVICE_QXL_VGA = 59, /* Is the primary and vga campatible qxl device
named qxl-vga? */
QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */
+ QEMU_CAPS_VIRTIO_IOEVENTFD = 61, /* IOeventFD feature:
virtio-{net|blk}-pci.ioeventfd=on/off */
QEMU_CAPS_LAST, /* this must always be the last item */
};
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index cb81354..8273e7e 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1287,6 +1287,26 @@ qemuBuildDeviceAddressStr(virBufferPtr buf,
return 0;
}
+static int
+qemuBuildAsyncIoStr(virBufferPtr buf,
+ enum virDomainAsyncIo use,
+ virBitmapPtr qemuCaps)
+{
+ if (qemuCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_IOEVENTFD)) {
+ switch (use) {
+ case VIR_DOMAIN_ASYNC_IO_ON:
+ case VIR_DOMAIN_ASYNC_IO_OFF:
+ virBufferAsprintf(buf, ",ioeventfd=%s",
+ virDomainAsyncIoTypeToString(use));
+ break;
+ default:
+ /* In other cases (_DEFAULT, _LAST) we don't
+ * want to add anything */
+ break;
+ }
+ }
+ return 0;
+}
#define QEMU_SERIAL_PARAM_ACCEPTED_CHARS \
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
@@ -1552,6 +1572,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk,
break;
case VIR_DOMAIN_DISK_BUS_VIRTIO:
virBufferAddLit(&opt, "virtio-blk-pci");
+ qemuBuildAsyncIoStr(&opt, disk->asyncio, qemuCaps);
qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps);
break;
case VIR_DOMAIN_DISK_BUS_USB:
@@ -1774,6 +1795,8 @@ qemuBuildNicDevStr(virDomainNetDefPtr net,
goto error;
}
}
+ if (usingVirtio)
+ qemuBuildAsyncIoStr(&buf, net->driver.virtio.asyncio, qemuCaps);
if (vlan == -1)
virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias);
else
diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c
index 327a0c7..119e771 100644
--- a/tests/qemuhelptest.c
+++ b/tests/qemuhelptest.c
@@ -475,7 +475,8 @@ mymain(void)
QEMU_CAPS_CCID_PASSTHRU,
QEMU_CAPS_CHARDEV_SPICEVMC,
QEMU_CAPS_DEVICE_QXL_VGA,
- QEMU_CAPS_VIRTIO_TX_ALG);
+ QEMU_CAPS_VIRTIO_TX_ALG,
+ QEMU_CAPS_VIRTIO_IOEVENTFD);
return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args
b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args
new file mode 100644
index 0000000..c512f15
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args
@@ -0,0 +1,11 @@
+LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \
+/usr/bin/qemu -S -M pc-0.13 -m 1024 -smp 1 -nodefaults \
+-monitor unix:/tmp/test-monitor,server,nowait -no-acpi \
+-boot dc -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \
+-drive file=/var/lib/libvirt/images/f14.img,if=none,id=drive-virtio-disk0 \
+-device
virtio-blk-pci,ioeventfd=on,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 \
+-drive
file=/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso,if=none,media=cdrom,id=drive-ide0-1-0
\
+-device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 \
+-device
virtio-net-pci,tx=bh,ioeventfd=off,vlan=0,id=net0,mac=52:54:00:e5:48:58,bus=pci.0,addr=0x3
\
+-net user,vlan=0,name=hostnet0 -serial pty -usb -vnc 127.0.0.1:-809 -std-vga \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml
b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml
new file mode 100644
index 0000000..5a16bd1
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml
@@ -0,0 +1,51 @@
+<domain type='qemu'>
+ <name>test</name>
+ <memory>1048576</memory>
+ <vcpu>1</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc-0.13'>hvm</type>
+ <boot dev='cdrom'/>
+ <boot dev='hd'/>
+ <bootmenu enable='yes'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>restart</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu</emulator>
+ <disk type='file' device='disk'>
+ <driver name='qemu' type='qcow2' asyncio='on'/>
+ <source file='/var/lib/libvirt/images/f14.img'/>
+ <target dev='vda' bus='virtio'/>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x04' function='0x0'/>
+ </disk>
+ <disk type='file' device='cdrom'>
+ <driver name='qemu' type='raw'/>
+ <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
+ <target dev='hdc' bus='ide'/>
+ <readonly/>
+ <address type='drive' controller='0' bus='1'
unit='0'/>
+ </disk>
+ <interface type='user'>
+ <mac address='52:54:00:e5:48:58'/>
+ <model type='virtio'/>
+ <driver name='vhost' txmode='iothread'
asyncio='off'/>
+ </interface>
+ <controller type='virtio-serial' index='0'>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x06' function='0x0'/>
+ </controller>
+ <serial type='pty'>
+ <target port='0'/>
+ </serial>
+ <console type='pty'>
+ <target type='serial' port='0'/>
+ </console>
+ <graphics type='vnc' port='5091' autoport='no'
listen='127.0.0.1'/>
+ <video>
+ <model type='vga' vram='9216' heads='1'/>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x02' function='0x0'/>
+ </video>
+ </devices>
+</domain>
+
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index b8fd468..489025f 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -333,6 +333,9 @@ mymain(void)
DO_TEST("disk-aio", false,
QEMU_CAPS_DRIVE, QEMU_CAPS_DRIVE_AIO,
QEMU_CAPS_DRIVE_CACHE_V2, QEMU_CAPS_DRIVE_FORMAT);
+ DO_TEST("disk-asyncio", false,
+ QEMU_CAPS_DRIVE, QEMU_CAPS_VIRTIO_IOEVENTFD,
+ QEMU_CAPS_VIRTIO_TX_ALG, QEMU_CAPS_DEVICE);
DO_TEST("graphics-vnc", false, NONE);
DO_TEST("graphics-vnc-socket", false, NONE);
--
1.7.5.rc3