[libvirt] [PATCH v2] Support for qemu aio drive option

Revised patch against libvirt 0.7.6 to support qemu's aio option. qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance. This patch exposes this functionality through libvirt. Thanks a lot to Eric Blake and Matthias Bolte for their comments. --- docs/formatdomain.html.in | 4 +++- docs/schemas/domain.rng | 13 +++++++++++-- src/conf/domain_conf.c | 23 +++++++++++++++++++++++ src/conf/domain_conf.h | 10 ++++++++++ src/qemu/qemu_conf.c | 14 ++++++++++++++ src/qemu/qemu_conf.h | 2 ++ 6 files changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index ce49f7d..ec85ef3 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -478,7 +478,9 @@ attribute is the primary backend driver name, while the optional <code>type</code> attribute provides the sub-type. The optional <code>cache</code> attribute controls the cache mechanism, possible values are "default", "none", - "writethrough" and "writeback". <span class="since">Since 0.1.8</span> + "writethrough" and "writeback". Specific to KVM guests is the optional <code>aio</code> + attribute which controls what storage api is used for io operations, its possible + values are "threads" and "native". <span class="since">Since 0.1.8; "aio" attribute since 0.7.7</span> </dd> <dt><code>encryption</code></dt> <dd>If present, specifies how the volume is encrypted. See diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index bb6d00d..5d2dafe 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -499,8 +499,9 @@ <ref name="driverCache"/> </group> </choice> - <empty/> - </element> + <optional> + <ref name="driverAIO"/> + </optional> </define> <define name="driverFormat"> <attribute name="name"> @@ -521,6 +522,14 @@ </choice> </attribute> </define> + <define name="driverAIO"> + <attribute name="aio"> + <choice> + <value>threads</value> + <value>native</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 766993c..8bee7b8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -120,6 +120,11 @@ VIR_ENUM_IMPL(virDomainDiskCache, VIR_DOMAIN_DISK_CACHE_LAST, "writethrough", "writeback") +VIR_ENUM_IMPL(virDomainDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", "fdc", @@ -1228,6 +1233,7 @@ virDomainDiskDefParseXML(virConnectPtr conn, char *target = NULL; char *bus = NULL; char *cachetag = NULL; + char *aiotag = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -1293,6 +1299,7 @@ virDomainDiskDefParseXML(virConnectPtr conn, driverName = virXMLPropString(cur, "name"); driverType = virXMLPropString(cur, "type"); cachetag = virXMLPropString(cur, "cache"); + aiotag = virXMLPropString(cur, "aio"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -1409,6 +1416,13 @@ virDomainDiskDefParseXML(virConnectPtr conn, goto error; } + if (aiotag && + (def->aiomode = virDomainDiskAIOTypeFromString(aiotag)) < 0) { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + _("unknown disk aio mode '%s'"), aiotag); + goto error; + } + if (devaddr) { if (sscanf(devaddr, "%x:%x:%x", &def->info.addr.pci.domain, @@ -1450,6 +1464,7 @@ cleanup: VIR_FREE(driverType); VIR_FREE(driverName); VIR_FREE(cachetag); + VIR_FREE(aiotag); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -4565,6 +4580,7 @@ virDomainDiskDefFormat(virConnectPtr conn, const char *device = virDomainDiskDeviceTypeToString(def->device); const char *bus = virDomainDiskBusTypeToString(def->bus); const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); + const char *aiomode = virDomainDiskAIOTypeToString(def->aiomode); if (!type) { virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, @@ -4586,6 +4602,11 @@ virDomainDiskDefFormat(virConnectPtr conn, _("unexpected disk cache mode %d"), def->cachemode); return -1; } + if (!aiomode) { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + _("unexpected disk aio mode %d"), def->aiomode); + return -1; + } virBufferVSprintf(buf, " <disk type='%s' device='%s'>\n", @@ -4597,6 +4618,8 @@ virDomainDiskDefFormat(virConnectPtr conn, virBufferVSprintf(buf, " type='%s'", def->driverType); if (def->cachemode) virBufferVSprintf(buf, " cache='%s'", cachemode); + if (def->aiomode) + virBufferVSprintf(buf, " aio='%s'", aiomode); virBufferVSprintf(buf, "/>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0b79e88..07805a6 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -141,6 +141,14 @@ enum virDomainDiskCache { VIR_DOMAIN_DISK_CACHE_LAST }; +enum virDomainDiskAIO { + VIR_DOMAIN_DISK_AIO_DEFAULT, + VIR_DOMAIN_DISK_AIO_NATIVE, + VIR_DOMAIN_DISK_AIO_THREADS, + + VIR_DOMAIN_DISK_AIO_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -154,6 +162,7 @@ struct _virDomainDiskDef { char *driverType; char *serial; int cachemode; + int aiomode; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -897,6 +906,7 @@ VIR_ENUM_DECL(virDomainDisk) VIR_ENUM_DECL(virDomainDiskDevice) VIR_ENUM_DECL(virDomainDiskBus) VIR_ENUM_DECL(virDomainDiskCache) +VIR_ENUM_DECL(virDomainDiskAIO) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainFS) VIR_ENUM_DECL(virDomainNet) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 3d83a8f..fd3a670 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -83,6 +83,12 @@ VIR_ENUM_IMPL(qemuDiskCacheV2, VIR_DOMAIN_DISK_CACHE_LAST, "writethrough", "writeback"); +VIR_ENUM_DECL(qemuDiskAIO) +VIR_ENUM_IMPL(qemuDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads"); + VIR_ENUM_DECL(qemuVideo) VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, @@ -1137,6 +1143,8 @@ static unsigned int qemudComputeCmdFlags(const char *help, flags |= QEMUD_CMD_FLAG_DRIVE_CACHE_V2; if (strstr(help, "format=")) flags |= QEMUD_CMD_FLAG_DRIVE_FORMAT; + if (strstr(help, "aio=threads|native")) + flags |= QEMUD_CMD_FLAG_DRIVE_AIO; } if (strstr(help, "-vga") && !strstr(help, "-std-vga")) flags |= QEMUD_CMD_FLAG_VGA; @@ -2340,6 +2348,12 @@ qemuBuildDriveStr(virDomainDiskDefPtr disk, virBufferAddLit(&opt, ",cache=off"); } + if (disk->aiomode && (qemuCmdFlags & QEMUD_CMD_FLAG_DRIVE_AIO)) { + const char * mode = qemuDiskAIOTypeToString(disk->aiomode); + + virBufferVSprintf(&opt, ",aio=%s", mode); + } + if (virBufferError(&opt)) { virReportOOMError(NULL); goto error; diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 101f187..780ae6e 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -82,6 +82,8 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_SDL = (1 << 27), /* Is the new -sdl arg available */ QEMUD_CMD_FLAG_SMP_TOPOLOGY = (1 << 28), /* Is sockets=s,cores=c,threads=t available for -smp? */ QEMUD_CMD_FLAG_NETDEV = (1 << 29), /* The -netdev flag & netdev_add/remove monitor commands */ + + QEMUD_CMD_FLAG_DRIVE_AIO = (1 << 30), /* Is -drive aio= avail */ }; /* Main driver state */ -- 1.7.0.4

On 04/21/2010 08:28 AM, Matthias Dahl wrote:
Revised patch against libvirt 0.7.6 to support qemu's aio option.
It's hard to tell from your comments whether you built the patch against the latest git (0.8.0+).
+++ b/docs/formatdomain.html.in @@ -478,7 +478,9 @@ attribute is the primary backend driver name, while the optional <code>type</code> attribute provides the sub-type. The optional <code>cache</code> attribute controls the cache mechanism, possible values are "default", "none", - "writethrough" and "writeback". <span class="since">Since 0.1.8</span> + "writethrough" and "writeback". Specific to KVM guests is the optional <code>aio</code> + attribute which controls what storage api is used for io operations, its possible + values are "threads" and "native". <span class="since">Since 0.1.8; "aio" attribute since 0.7.7</span>
Which means you would want to s/0.7.7/0.8.1/.
</define> + <define name="driverAIO"> + <attribute name="aio"> + <choice> + <value>threads</value> + <value>native</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> ... diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 766993c..8bee7b8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -120,6 +120,11 @@ VIR_ENUM_IMPL(virDomainDiskCache, VIR_DOMAIN_DISK_CACHE_LAST, "writethrough", "writeback")
+VIR_ENUM_IMPL(virDomainDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads")
Why the use of "default" in the enum, but not in the XML? Is it a third possibility, or is it an alias for 'threads', in which case 'threads' should be listed first? But you are getting closer :) -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 04/21/2010 09:19 AM, Eric Blake wrote:
On 04/21/2010 08:28 AM, Matthias Dahl wrote:
Revised patch against libvirt 0.7.6 to support qemu's aio option.
It's hard to tell from your comments whether you built the patch against the latest git (0.8.0+).
This is now https://bugzilla.redhat.com/show_bug.cgi?id=591703, and now that 0.8.1 is out the door, I'm going to take another look at it in the near future. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

From: Matthias Dahl <mdvirt@designassembly.de> qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance. Thanks a lot to Eric Blake and Matthias Bolte for their comments. Red Hat Bugzilla #591703 Signed-off-by: Eric Blake <eblake@redhat.com> --- I've rebased Matthias' patch against the latest git, and discovered a bug in qemu_conf in the process (separate patch sent for that). I also discovered that formatdomain.html.in is incomplete; it lacks mention of at least the recent error_policy option. But I ran out of time to make a separate patch to bring it all up to date tonight. docs/formatdomain.html.in | 5 ++++- docs/schemas/domain.rng | 11 +++++++++++ src/conf/domain_conf.c | 23 +++++++++++++++++++++++ src/conf/domain_conf.h | 10 ++++++++++ src/qemu/qemu_conf.c | 15 ++++++++++++++- src/qemu/qemu_conf.h | 1 + tests/qemuhelptest.c | 6 ++++-- 7 files changed, 67 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index a23663a..5c178d8 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -520,7 +520,10 @@ attribute is the primary backend driver name, while the optional <code>type</code> attribute provides the sub-type. The optional <code>cache</code> attribute controls the cache mechanism, possible values are "default", "none", - "writethrough" and "writeback". <span class="since">Since 0.1.8</span> + "writethrough" and "writeback". Specific to KVM guests is the optional <code>aio</code> + attribute which controls what storage api is used for io operations, its possible + values are "threads" and "native". <span class="since">Since + 0.1.8; <code>aio</code> attribute since 0.8.2</span> </dd> <dt><code>encryption</code></dt> <dd>If present, specifies how the volume is encrypted. See diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 56b6705..acbbf9f 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -602,6 +602,9 @@ <optional> <ref name="driverErrorPolicy"/> </optional> + <optional> + <ref name="driverAIO"/> + </optional> <empty/> </element> </define> @@ -633,6 +636,14 @@ </choice> </attribute> </define> + <define name="driverAIO"> + <attribute name="aio"> + <choice> + <value>threads</value> + <value>native</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <choice> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 3e45f79..118585a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -132,6 +132,11 @@ VIR_ENUM_IMPL(virDomainDiskErrorPolicy, VIR_DOMAIN_DISK_ERROR_POLICY_LAST, "ignore", "enospace") +VIR_ENUM_IMPL(virDomainDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", "fdc", @@ -1378,6 +1383,7 @@ virDomainDiskDefParseXML(xmlNodePtr node, char *bus = NULL; char *cachetag = NULL; char *error_policy = NULL; + char *aiotag = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -1444,6 +1450,7 @@ virDomainDiskDefParseXML(xmlNodePtr node, driverType = virXMLPropString(cur, "type"); cachetag = virXMLPropString(cur, "cache"); error_policy = virXMLPropString(cur, "error_policy"); + aiotag = virXMLPropString(cur, "aio"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -1567,6 +1574,13 @@ virDomainDiskDefParseXML(xmlNodePtr node, goto error; } + if (aiotag && + (def->aiomode = virDomainDiskAIOTypeFromString(aiotag)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + _("unknown disk aio mode '%s'"), aiotag); + goto error; + } + if (devaddr) { if (virDomainParseLegacyDeviceAddress(devaddr, &def->info.addr.pci) < 0) { @@ -1608,6 +1622,7 @@ cleanup: VIR_FREE(driverName); VIR_FREE(cachetag); VIR_FREE(error_policy); + VIR_FREE(aiotag); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -4887,6 +4902,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *bus = virDomainDiskBusTypeToString(def->bus); const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy); + const char *aiomode = virDomainDiskAIOTypeToString(def->aiomode); if (!type) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -4908,6 +4924,11 @@ virDomainDiskDefFormat(virBufferPtr buf, _("unexpected disk cache mode %d"), def->cachemode); return -1; } + if (!aiomode) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + _("unexpected disk aio mode %d"), def->aiomode); + return -1; + } virBufferVSprintf(buf, " <disk type='%s' device='%s'>\n", @@ -4923,6 +4944,8 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferVSprintf(buf, " cache='%s'", cachemode); if (def->error_policy) virBufferVSprintf(buf, " error_policy='%s'", error_policy); + if (def->aiomode) + virBufferVSprintf(buf, " aio='%s'", aiomode); virBufferVSprintf(buf, "/>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fadc8bd..1eec1d2 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -161,6 +161,14 @@ enum virDomainDiskErrorPolicy { VIR_DOMAIN_DISK_ERROR_POLICY_LAST }; +enum virDomainDiskAIO { + VIR_DOMAIN_DISK_AIO_DEFAULT, + VIR_DOMAIN_DISK_AIO_NATIVE, + VIR_DOMAIN_DISK_AIO_THREADS, + + VIR_DOMAIN_DISK_AIO_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -175,6 +183,7 @@ struct _virDomainDiskDef { char *serial; int cachemode; int error_policy; + int aiomode; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -1069,6 +1078,7 @@ VIR_ENUM_DECL(virDomainDiskDevice) VIR_ENUM_DECL(virDomainDiskBus) VIR_ENUM_DECL(virDomainDiskCache) VIR_ENUM_DECL(virDomainDiskErrorPolicy) +VIR_ENUM_DECL(virDomainDiskAIO) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainFS) VIR_ENUM_DECL(virDomainNet) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..87998c1 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1,7 +1,7 @@ /* * qemu_conf.c: QEMU configuration management * - * Copyright (C) 2006, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright (C) 2006-2010 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -85,6 +85,12 @@ VIR_ENUM_IMPL(qemuDiskCacheV2, VIR_DOMAIN_DISK_CACHE_LAST, "writethrough", "writeback"); +VIR_ENUM_DECL(qemuDiskAIO) +VIR_ENUM_IMPL(qemuDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads"); + VIR_ENUM_DECL(qemuVideo) VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, @@ -1150,6 +1156,8 @@ static unsigned long long qemudComputeCmdFlags(const char *help, flags |= QEMUD_CMD_FLAG_DRIVE_CACHE_V2; if (strstr(help, "format=")) flags |= QEMUD_CMD_FLAG_DRIVE_FORMAT; + if (strstr(help, "aio=threads|native")) + flags |= QEMUD_CMD_FLAG_DRIVE_AIO; } if (strstr(help, "-vga") && !strstr(help, "-std-vga")) flags |= QEMUD_CMD_FLAG_VGA; @@ -2518,6 +2526,11 @@ qemuBuildDriveStr(virDomainDiskDefPtr disk, } } + if (disk->aiomode && (qemuCmdFlags & QEMUD_CMD_FLAG_DRIVE_AIO)) { + virBufferVSprintf(&opt, ",aio=%s", + qemuDiskAIOTypeToString(disk->aiomode)); + } + if (virBufferError(&opt)) { virReportOOMError(); goto error; diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..44043b4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_DRIVE_AIO = (1LL << 36), /* -drive aio= supported */ }; /* Main driver state */ diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 517a8fe..a00c743 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -244,7 +244,8 @@ mymain(int argc, char **argv) QEMUD_CMD_FLAG_DEVICE | QEMUD_CMD_FLAG_SMP_TOPOLOGY | QEMUD_CMD_FLAG_RTC | - QEMUD_CMD_FLAG_NO_HPET, + QEMUD_CMD_FLAG_NO_HPET | + QEMUD_CMD_FLAG_DRIVE_AIO, 12001, 0, 0); DO_TEST("qemu-kvm-0.12.3", QEMUD_CMD_FLAG_VNC_COLON | @@ -274,7 +275,8 @@ mymain(int argc, char **argv) QEMUD_CMD_FLAG_VNET_HOST | QEMUD_CMD_FLAG_NO_HPET | QEMUD_CMD_FLAG_NO_KVM_PIT | - QEMUD_CMD_FLAG_TDF, + QEMUD_CMD_FLAG_TDF | + QEMUD_CMD_FLAG_DRIVE_AIO, 12003, 1, 0); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; -- 1.7.0.1

In looking for other examples of XML additions, I noticed commit 8bf6799 was the last of a four-patch series for adding timer support. I'm reposting this into a split series, but still leaving attribution to Matthias, since the text of the result is still very close to his original submission. [PATCHv4 1/4] qemu aio: add XML parsing [PATCHv4 2/4] qemu aio: add aio attribute to schema [PATCHv4 3/4] qemu aio: parse aio support from qemu -help [PATCHv4 4/4] qemu aio: enable support

From: Matthias Dahl <mdvirt@designassembly.de> Allows aio={threads|native} as an optional attribute to <driver>. Signed-off-by: Eric Blake <eblake@redhat.com> --- Changes from v3: export the private symbol break a single patch into a 4-patch series, along the lines of what was done in commit 8bf6799 when adding timer support src/conf/domain_conf.c | 23 +++++++++++++++++++++++ src/conf/domain_conf.h | 10 ++++++++++ src/libvirt_private.syms | 3 +++ 3 files changed, 36 insertions(+), 0 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 3e45f79..118585a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -132,6 +132,11 @@ VIR_ENUM_IMPL(virDomainDiskErrorPolicy, VIR_DOMAIN_DISK_ERROR_POLICY_LAST, "ignore", "enospace") +VIR_ENUM_IMPL(virDomainDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", "fdc", @@ -1378,6 +1383,7 @@ virDomainDiskDefParseXML(xmlNodePtr node, char *bus = NULL; char *cachetag = NULL; char *error_policy = NULL; + char *aiotag = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -1444,6 +1450,7 @@ virDomainDiskDefParseXML(xmlNodePtr node, driverType = virXMLPropString(cur, "type"); cachetag = virXMLPropString(cur, "cache"); error_policy = virXMLPropString(cur, "error_policy"); + aiotag = virXMLPropString(cur, "aio"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -1567,6 +1574,13 @@ virDomainDiskDefParseXML(xmlNodePtr node, goto error; } + if (aiotag && + (def->aiomode = virDomainDiskAIOTypeFromString(aiotag)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + _("unknown disk aio mode '%s'"), aiotag); + goto error; + } + if (devaddr) { if (virDomainParseLegacyDeviceAddress(devaddr, &def->info.addr.pci) < 0) { @@ -1608,6 +1622,7 @@ cleanup: VIR_FREE(driverName); VIR_FREE(cachetag); VIR_FREE(error_policy); + VIR_FREE(aiotag); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -4887,6 +4902,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *bus = virDomainDiskBusTypeToString(def->bus); const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy); + const char *aiomode = virDomainDiskAIOTypeToString(def->aiomode); if (!type) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -4908,6 +4924,11 @@ virDomainDiskDefFormat(virBufferPtr buf, _("unexpected disk cache mode %d"), def->cachemode); return -1; } + if (!aiomode) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + _("unexpected disk aio mode %d"), def->aiomode); + return -1; + } virBufferVSprintf(buf, " <disk type='%s' device='%s'>\n", @@ -4923,6 +4944,8 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferVSprintf(buf, " cache='%s'", cachemode); if (def->error_policy) virBufferVSprintf(buf, " error_policy='%s'", error_policy); + if (def->aiomode) + virBufferVSprintf(buf, " aio='%s'", aiomode); virBufferVSprintf(buf, "/>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fadc8bd..1eec1d2 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -161,6 +161,14 @@ enum virDomainDiskErrorPolicy { VIR_DOMAIN_DISK_ERROR_POLICY_LAST }; +enum virDomainDiskAIO { + VIR_DOMAIN_DISK_AIO_DEFAULT, + VIR_DOMAIN_DISK_AIO_NATIVE, + VIR_DOMAIN_DISK_AIO_THREADS, + + VIR_DOMAIN_DISK_AIO_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -175,6 +183,7 @@ struct _virDomainDiskDef { char *serial; int cachemode; int error_policy; + int aiomode; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -1069,6 +1078,7 @@ VIR_ENUM_DECL(virDomainDiskDevice) VIR_ENUM_DECL(virDomainDiskBus) VIR_ENUM_DECL(virDomainDiskCache) VIR_ENUM_DECL(virDomainDiskErrorPolicy) +VIR_ENUM_DECL(virDomainDiskAIO) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainFS) VIR_ENUM_DECL(virDomainNet) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 09f3da1..f1555d8 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -198,6 +198,9 @@ virDomainDeviceInfoIterate; virDomainClockOffsetTypeToString; virDomainClockOffsetTypeFromString; virDomainDiskErrorPolicyTypeToString; +virDomainDiskErrorPolicyTypeFromString; +virDomainDiskAIOToString; +virDomainDiskAIOFromString; virDomainTimerNameTypeToString; virDomainTimerNameTypeFromString; virDomainTimerTrackTypeToString; -- 1.7.0.1

From: Matthias Dahl <mdvirt@designassembly.de> aio is an optional attribute of <driver> for a given disk. Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/formatdomain.html.in | 5 ++++- docs/schemas/domain.rng | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index be0446d..6d6641b 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -586,7 +586,10 @@ attribute is the primary backend driver name, while the optional <code>type</code> attribute provides the sub-type. The optional <code>cache</code> attribute controls the cache mechanism, possible values are "default", "none", - "writethrough" and "writeback". <span class="since">Since 0.1.8</span> + "writethrough" and "writeback". Specific to KVM guests is the optional <code>aio</code> + attribute which controls what storage api is used for io operations, its possible + values are "threads" and "native". <span class="since">Since + 0.1.8; <code>aio</code> attribute since 0.8.2</span> </dd> <dt><code>encryption</code></dt> <dd>If present, specifies how the volume is encrypted. See diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 56b6705..acbbf9f 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -602,6 +602,9 @@ <optional> <ref name="driverErrorPolicy"/> </optional> + <optional> + <ref name="driverAIO"/> + </optional> <empty/> </element> </define> @@ -633,6 +636,14 @@ </choice> </attribute> </define> + <define name="driverAIO"> + <attribute name="aio"> + <choice> + <value>threads</value> + <value>native</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <choice> -- 1.7.0.1

From: Matthias Dahl <mdvirt@designassembly.de> Signed-off-by: Eric Blake <eblake@redhat.com> --- I'm still not sure why this code includes "default" in its list of enum options, as the XML does not allow it. But since that was copying from other elements, like qemuDiskCacheV2, which also have a "default" listing not present in XML... src/qemu/qemu_conf.c | 10 +++++++++- src/qemu/qemu_conf.h | 1 + tests/qemuhelptest.c | 6 ++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..9f84988 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1,7 +1,7 @@ /* * qemu_conf.c: QEMU configuration management * - * Copyright (C) 2006, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright (C) 2006-2010 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -85,6 +85,12 @@ VIR_ENUM_IMPL(qemuDiskCacheV2, VIR_DOMAIN_DISK_CACHE_LAST, "writethrough", "writeback"); +VIR_ENUM_DECL(qemuDiskAIO) +VIR_ENUM_IMPL(qemuDiskAIO, VIR_DOMAIN_DISK_AIO_LAST, + "default", + "native", + "threads"); + VIR_ENUM_DECL(qemuVideo) VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, @@ -1150,6 +1156,8 @@ static unsigned long long qemudComputeCmdFlags(const char *help, flags |= QEMUD_CMD_FLAG_DRIVE_CACHE_V2; if (strstr(help, "format=")) flags |= QEMUD_CMD_FLAG_DRIVE_FORMAT; + if (strstr(help, "aio=threads|native")) + flags |= QEMUD_CMD_FLAG_DRIVE_AIO; } if (strstr(help, "-vga") && !strstr(help, "-std-vga")) flags |= QEMUD_CMD_FLAG_VGA; diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..44043b4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_DRIVE_AIO = (1LL << 36), /* -drive aio= supported */ }; /* Main driver state */ diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 517a8fe..a00c743 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -244,7 +244,8 @@ mymain(int argc, char **argv) QEMUD_CMD_FLAG_DEVICE | QEMUD_CMD_FLAG_SMP_TOPOLOGY | QEMUD_CMD_FLAG_RTC | - QEMUD_CMD_FLAG_NO_HPET, + QEMUD_CMD_FLAG_NO_HPET | + QEMUD_CMD_FLAG_DRIVE_AIO, 12001, 0, 0); DO_TEST("qemu-kvm-0.12.3", QEMUD_CMD_FLAG_VNC_COLON | @@ -274,7 +275,8 @@ mymain(int argc, char **argv) QEMUD_CMD_FLAG_VNET_HOST | QEMUD_CMD_FLAG_NO_HPET | QEMUD_CMD_FLAG_NO_KVM_PIT | - QEMUD_CMD_FLAG_TDF, + QEMUD_CMD_FLAG_TDF | + QEMUD_CMD_FLAG_DRIVE_AIO, 12003, 1, 0); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; -- 1.7.0.1

From: Matthias Dahl <mdvirt@designassembly.de> qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance. Thanks a lot to Eric Blake and Matthias Bolte for their comments. Red Hat Bugzilla #591703 Signed-off-by: Eric Blake <eblake@redhat.com> --- This is so simple, maybe it's worth squashing into 3/4? src/qemu/qemu_conf.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 9f84988..87998c1 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -2526,6 +2526,11 @@ qemuBuildDriveStr(virDomainDiskDefPtr disk, } } + if (disk->aiomode && (qemuCmdFlags & QEMUD_CMD_FLAG_DRIVE_AIO)) { + virBufferVSprintf(&opt, ",aio=%s", + qemuDiskAIOTypeToString(disk->aiomode)); + } + if (virBufferError(&opt)) { virReportOOMError(); goto error; -- 1.7.0.1

On 05/12/2010 11:26 PM, Eric Blake wrote:
From: Matthias Dahl <mdvirt@designassembly.de>
qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance.
Thanks a lot to Eric Blake and Matthias Bolte for their comments.
(sorry, I reviewed the one big patch instead of the split up patches. It seems to be the same except for the addition of the symbols to libvirt_private.syms) The implementation looks perfectly reasonable. I'm just concerned that the concept of what we are doing is too qemu specific, though. Basically, I think what we are trying to model here is the concept of an I/O backend implementation, correct? Should we maybe change this to be "<iobackend type='%s'/>", and then have available enums like: aiothreads aionative ... That way, for other hypervisors that do something different (like VirtualBox, which just has AIO on/off), we can have additional enums to describe their behavior. Even further, if a given hypervisor wanted to do something like "Direct I/O" for the I/O backend (as an example), we could also use this element to specify that. What do you think? -- Chris Lalancette

[adding Matthias on cc] On 05/17/2010 03:07 PM, Chris Lalancette wrote:
On 05/12/2010 11:26 PM, Eric Blake wrote:
From: Matthias Dahl <mdvirt@designassembly.de>
qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance.
The implementation looks perfectly reasonable. I'm just concerned that the concept of what we are doing is too qemu specific, though. Basically, I think what we are trying to model here is the concept of an I/O backend implementation, correct? Should we maybe change this to be "<iobackend type='%s'/>", and then have available enums like:
aiothreads aionative ...
That way, for other hypervisors that do something different (like VirtualBox, which just has AIO on/off), we can have additional enums to describe their behavior. Even further, if a given hypervisor wanted to do something like "Direct I/O" for the I/O backend (as an example), we could also use this element to specify that. What do you think?
Indeed, having a more-generic attribute "type", where only a subset of known enum values are appropriate per a given hypervisor, makes sense to me. After all, that's how we treated the <clock> addition in 0.8.0; not all HV's support all the <timer name="foo"> timers. That is a bigger rewrite, though, and while we could still complete it in time for 0.8.2, it becomes much tougher to get it complete this week. If we really want it now, then I can shepherd those changes through, but I need feedback from other list members first on whether the proposed <iobackend> makes more sense. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 05/17/2010 03:31 PM, Eric Blake wrote:
[adding Matthias on cc]
Reviving an old thread, as it is something I plan on working in the near future.
On 05/17/2010 03:07 PM, Chris Lalancette wrote:
On 05/12/2010 11:26 PM, Eric Blake wrote:
From: Matthias Dahl <mdvirt@designassembly.de>
qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance.
The implementation looks perfectly reasonable. I'm just concerned that the concept of what we are doing is too qemu specific, though. Basically, I think what we are trying to model here is the concept of an I/O backend implementation, correct? Should we maybe change this to be "<iobackend type='%s'/>", and then have available enums like:
aiothreads aionative ...
That way, for other hypervisors that do something different (like VirtualBox, which just has AIO on/off), we can have additional enums to describe their behavior. Even further, if a given hypervisor wanted to do something like "Direct I/O" for the I/O backend (as an example), we could also use this element to specify that. What do you think?
Indeed, having a more-generic attribute "type", where only a subset of known enum values are appropriate per a given hypervisor, makes sense to me.
I'm thinking that the following XML changes make the most sense for this (note that the choice of aio mode is per-disk), where the change is the addition of the new <iobackend> element: <domain ...> <devices> <disk type='block' device='disk'> <driver name='qemu' type='qcow2' cache='none'/> <source dev='...'/> <target dev='hda' bus='ide'/> <iobackend type='aiothreads'/> </disk> </devices> </domain> If <iobackend> is missing, it is the same as type='default'; type can be 'default' (whatever qemu prefers, which may differ over qemu versions), 'aiothreads' (use ,aio=threads in qemu -drive argument), or 'aionative' (use ,aio=native in qemu -drive argument); where we can add other types later if needed. Does this deserve a new <iobackend> element, or should it merely be a new optional attribute of the existing <driver> element, as in: <driver name='qemu' type='qcow2' cache='none' iobackend='aiothreads'/> -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 01/03/11 - 04:43:06PM, Eric Blake wrote:
On 05/17/2010 03:31 PM, Eric Blake wrote:
[adding Matthias on cc]
Reviving an old thread, as it is something I plan on working in the near future.
On 05/17/2010 03:07 PM, Chris Lalancette wrote:
On 05/12/2010 11:26 PM, Eric Blake wrote:
From: Matthias Dahl <mdvirt@designassembly.de>
qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance.
The implementation looks perfectly reasonable. I'm just concerned that the concept of what we are doing is too qemu specific, though. Basically, I think what we are trying to model here is the concept of an I/O backend implementation, correct? Should we maybe change this to be "<iobackend type='%s'/>", and then have available enums like:
aiothreads aionative ...
That way, for other hypervisors that do something different (like VirtualBox, which just has AIO on/off), we can have additional enums to describe their behavior. Even further, if a given hypervisor wanted to do something like "Direct I/O" for the I/O backend (as an example), we could also use this element to specify that. What do you think?
Indeed, having a more-generic attribute "type", where only a subset of known enum values are appropriate per a given hypervisor, makes sense to me.
I'm thinking that the following XML changes make the most sense for this (note that the choice of aio mode is per-disk), where the change is the addition of the new <iobackend> element:
<domain ...> <devices> <disk type='block' device='disk'> <driver name='qemu' type='qcow2' cache='none'/> <source dev='...'/> <target dev='hda' bus='ide'/> <iobackend type='aiothreads'/> </disk> </devices> </domain>
If <iobackend> is missing, it is the same as type='default'; type can be 'default' (whatever qemu prefers, which may differ over qemu versions), 'aiothreads' (use ,aio=threads in qemu -drive argument), or 'aionative' (use ,aio=native in qemu -drive argument); where we can add other types later if needed.
Does this deserve a new <iobackend> element, or should it merely be a new optional attribute of the existing <driver> element, as in:
<driver name='qemu' type='qcow2' cache='none' iobackend='aiothreads'/>
I slightly prefer making it a new attribute of <driver>. The only reason I can see to make it a new <iobackend> element is if we think it will get significantly more complex in the future (necessitating new XML elements). Do we think that? -- Chris Lalancette

On Tue, Jan 04, 2011 at 08:12:21AM -0500, Chris Lalancette wrote:
On 01/03/11 - 04:43:06PM, Eric Blake wrote:
On 05/17/2010 03:31 PM, Eric Blake wrote:
[adding Matthias on cc]
Reviving an old thread, as it is something I plan on working in the near future.
On 05/17/2010 03:07 PM, Chris Lalancette wrote:
On 05/12/2010 11:26 PM, Eric Blake wrote:
From: Matthias Dahl <mdvirt@designassembly.de>
qemu allows the user to choose what io storage api should be used, either the default (threads) or native (linux aio) which in the latter case can result in better performance.
The implementation looks perfectly reasonable. I'm just concerned that the concept of what we are doing is too qemu specific, though. Basically, I think what we are trying to model here is the concept of an I/O backend implementation, correct? Should we maybe change this to be "<iobackend type='%s'/>", and then have available enums like:
aiothreads aionative ...
That way, for other hypervisors that do something different (like VirtualBox, which just has AIO on/off), we can have additional enums to describe their behavior. Even further, if a given hypervisor wanted to do something like "Direct I/O" for the I/O backend (as an example), we could also use this element to specify that. What do you think?
Indeed, having a more-generic attribute "type", where only a subset of known enum values are appropriate per a given hypervisor, makes sense to me.
I'm thinking that the following XML changes make the most sense for this (note that the choice of aio mode is per-disk), where the change is the addition of the new <iobackend> element:
<domain ...> <devices> <disk type='block' device='disk'> <driver name='qemu' type='qcow2' cache='none'/> <source dev='...'/> <target dev='hda' bus='ide'/> <iobackend type='aiothreads'/> </disk> </devices> </domain>
If <iobackend> is missing, it is the same as type='default'; type can be 'default' (whatever qemu prefers, which may differ over qemu versions), 'aiothreads' (use ,aio=threads in qemu -drive argument), or 'aionative' (use ,aio=native in qemu -drive argument); where we can add other types later if needed.
Does this deserve a new <iobackend> element, or should it merely be a new optional attribute of the existing <driver> element, as in:
<driver name='qemu' type='qcow2' cache='none' iobackend='aiothreads'/>
I slightly prefer making it a new attribute of <driver>. The only reason I can see to make it a new <iobackend> element is if we think it will get significantly more complex in the future (necessitating new XML elements). Do we think that?
Yep, the whole '<driver>' element is really specifying an I/O backend, so I don't think it makes sense to add another <iobackend> element, nor is it particularly great as an attribute name. It should be an attribute on <driver> though Daniel

On 01/04/2011 09:33 AM, Daniel P. Berrange wrote:
Does this deserve a new <iobackend> element, or should it merely be a new optional attribute of the existing <driver> element, as in:
<driver name='qemu' type='qcow2' cache='none' iobackend='aiothreads'/>
I slightly prefer making it a new attribute of <driver>. The only reason I can see to make it a new <iobackend> element is if we think it will get significantly more complex in the future (necessitating new XML elements). Do we think that?
Yep, the whole '<driver>' element is really specifying an I/O backend, so I don't think it makes sense to add another <iobackend> element, nor is it particularly great as an attribute name. It should be an attribute on <driver> though
Any better ideas for an attribute name? The complaint was that Matthias' original proposal of naming the attribute 'aio' (as in <driver aio='threads'/>) felt a bit too specific to qemu's implementation; yet iobackend='aiothreads' is long. Maybe just io? <driver io='aiothreads'/> where io maps to qemu's ,aio=threads designation, but leaves us flexibility of adding other attribute values unrelated to the name aio? -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (4)
-
Chris Lalancette
-
Daniel P. Berrange
-
Eric Blake
-
Matthias Dahl