From: Lei Li <lilei(a)linux.vnet.ibm.com>
Enable block I/O throttle for per-disk in XML, as the first
per-disk IO tuning parameter.
Signed-off-by: Lei Li <lilei(a)linux.vnet.ibm.com>
Signed-off-by: Zhi Yong Wu <wuzhy(a)linux.vnet.ibm.com>
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/formatdomain.html.in | 39 +++++++++++++
docs/schemas/domaincommon.rng | 122 +++++++++++++++++++++++++++++------------
src/conf/domain_conf.c | 90 ++++++++++++++++++++++++++++++-
src/conf/domain_conf.h | 14 +++++
4 files changed, 228 insertions(+), 37 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 61123ac..f45d0ce 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -922,6 +922,11 @@
<driver name="tap" type="aio"
cache="default"/>
<source file='/var/lib/xen/images/fv0'/
startupPolicy='optional'>
<target dev='hda' bus='ide'/>
+ <iotune>
+ <total_bytes_sec>10000000</total_bytes_sec>
+ <read_iops_sec>400000</read_iops_sec>
+ <write_iops_sec>100000</write_iops_sec>
+ </iotune>
<boot order='2'/>
<encryption type='...'>
...
@@ -1039,6 +1044,40 @@
<span class="since">Since 0.0.3; <code>bus</code>
attribute since 0.4.3;
"usb" attribute value since after 0.4.4; "sata" attribute
value since
0.9.7</span></dd>
+ <dt><code>iotune</code></dt>
+ <dd>The optional <code>iotune</code> element provides the
+ ability to provide additional per-device I/O tuning, with
+ values that can vary for each device (contrast this to
+ the <a
href="#elementsBlockTuning"><code><blkiotune></code></a>
+ element, which applies globally to the domain). Currently,
+ the only tuning available is Block I/O throttling for qemu.
+ This element has optional sub-elements; any sub-element not
+ specified or given with a value of 0 implies no
+ limit. <span class="since">Since 0.9.8</span>
+ <dl>
+ <dt><code>total_bytes_sec</code></dt>
+ <dd>The optional <code>total_bytes_sec</code> element is the
+ total throughput limit in bytes per second. This cannot
+ appear with <code>read_bytes_sec</code>
+ or <code>write_bytes_sec</code>.</dd>
+ <dt><code>read_bytes_sec</code></dt>
+ <dd>The optional <code>read_bytes_sec</code> element is the
+ read throughput limit in bytes per second.</dd>
+ <dt><code>write_bytes_sec</code></dt>
+ <dd>The optional <code>write_bytes_sec</code> element is the
+ write throughput limit in bytes per second.</dd>
+ <dt><code>total_iops_sec</code></dt>
+ <dd>The optional <code>total_iops_sec</code> element is the
+ total I/O operations per second. This cannot
+ appear with <code>read_iops_sec</code>
+ or <code>write_iops_sec</code>.</dd>
+ <dt><code>read_iops_sec</code></dt>
+ <dd>The optional <code>read_iops_sec</code> element is the
+ read I/O operations per second.</dd>
+ <dt><code>write_iops_sec</code></dt>
+ <dd>The optional <code>write_iops_sec</code> element is the
+ write I/O operations per second.</dd>
+ </dl>
<dt><code>driver</code></dt>
<dd>
The optional driver element allows specifying further details
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 8968ee6..bb6d94d 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -599,42 +599,47 @@
</element>
</define>
<define name="diskspec">
- <optional>
- <ref name="driver"/>
- </optional>
- <optional>
- <ref name="diskAuth"/>
- </optional>
- <ref name="target"/>
- <optional>
- <ref name="deviceBoot"/>
- </optional>
- <optional>
- <element name="readonly">
- <empty/>
- </element>
- </optional>
- <optional>
- <element name="shareable">
- <empty/>
- </element>
- </optional>
- <optional>
- <element name="transient">
- <empty/>
- </element>
- </optional>
- <optional>
- <element name="serial">
- <ref name="diskSerial"/>
- </element>
- </optional>
- <optional>
- <ref name="encryption"/>
- </optional>
- <optional>
- <ref name="address"/>
- </optional>
+ <interleave>
+ <optional>
+ <ref name="driver"/>
+ </optional>
+ <optional>
+ <ref name="diskAuth"/>
+ </optional>
+ <ref name="target"/>
+ <optional>
+ <ref name="deviceBoot"/>
+ </optional>
+ <optional>
+ <element name="readonly">
+ <empty/>
+ </element>
+ </optional>
+ <optional>
+ <element name="shareable">
+ <empty/>
+ </element>
+ </optional>
+ <optional>
+ <element name="transient">
+ <empty/>
+ </element>
+ </optional>
+ <optional>
+ <element name="serial">
+ <ref name="diskSerial"/>
+ </element>
+ </optional>
+ <optional>
+ <ref name="encryption"/>
+ </optional>
+ <optional>
+ <ref name="diskIoTune"/>
+ </optional>
+ <optional>
+ <ref name="address"/>
+ </optional>
+ </interleave>
</define>
<define name="snapshot">
<attribute name="snapshot">
@@ -2596,6 +2601,51 @@
</element>
</define>
+ <define name='diskIoTune'>
+ <element name="iotune">
+ <interleave>
+ <choice>
+ <element name="total_bytes_sec">
+ <data type="unsignedLong"/>
+ </element>
+ <group>
+ <interleave>
+ <optional>
+ <element name="read_bytes_sec">
+ <data type="unsignedLong"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="write_bytes_sec">
+ <data type="unsignedLong"/>
+ </element>
+ </optional>
+ </interleave>
+ </group>
+ </choice>
+ <choice>
+ <element name="total_iops_sec">
+ <data type="unsignedLong"/>
+ </element>
+ <group>
+ <interleave>
+ <optional>
+ <element name="read_iops_sec">
+ <data type="unsignedLong"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="write_iops_sec">
+ <data type="unsignedLong"/>
+ </element>
+ </optional>
+ </interleave>
+ </group>
+ </choice>
+ </interleave>
+ </element>
+ </define>
+
<!--
Optional hypervisor extensions in their own namespace:
QEmu
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index d365cee..a2702c5 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2394,6 +2394,7 @@ cleanup:
static virDomainDiskDefPtr
virDomainDiskDefParseXML(virCapsPtr caps,
xmlNodePtr node,
+ xmlXPathContextPtr ctxt,
virBitmapPtr bootMap,
unsigned int flags)
{
@@ -2594,6 +2595,50 @@ virDomainDiskDefParseXML(virCapsPtr caps,
}
child = child->next;
}
+ } else if (xmlStrEqual(cur->name, BAD_CAST "iotune")) {
+ if
(virXPathULongLong("string(./devices/disk/iotune/total_bytes_sec)",
+ ctxt, &def->blkdeviotune.total_bytes_sec)
< 0) {
+ def->blkdeviotune.total_bytes_sec = 0;
+ }
+
+ if
(virXPathULongLong("string(./devices/disk/iotune/read_bytes_sec)",
+ ctxt, &def->blkdeviotune.read_bytes_sec)
< 0) {
+ def->blkdeviotune.read_bytes_sec = 0;
+ }
+
+ if
(virXPathULongLong("string(./devices/disk/iotune/write_bytes_sec)",
+ ctxt, &def->blkdeviotune.write_bytes_sec)
< 0) {
+ def->blkdeviotune.write_bytes_sec = 0;
+ }
+
+ if
(virXPathULongLong("string(./devices/disk/iotune/total_iops_sec)",
+ ctxt, &def->blkdeviotune.total_iops_sec)
< 0) {
+ def->blkdeviotune.total_iops_sec = 0;
+ }
+
+ if
(virXPathULongLong("string(./devices/disk/iotune/read_iops_sec)",
+ ctxt, &def->blkdeviotune.read_iops_sec) <
0) {
+ def->blkdeviotune.read_iops_sec = 0;
+ }
+
+ if
(virXPathULongLong("string(./devices/disk/iotune/write_iops_sec)",
+ ctxt, &def->blkdeviotune.write_iops_sec)
< 0) {
+ def->blkdeviotune.write_iops_sec = 0;
+ }
+
+ if ((def->blkdeviotune.total_bytes_sec &&
def->blkdeviotune.read_bytes_sec)
+ || (def->blkdeviotune.total_bytes_sec &&
def->blkdeviotune.write_bytes_sec)) {
+ virDomainReportError(VIR_ERR_XML_ERROR,
+ _("total and read/write bytes_sec cannot be
set at the same time"));
+ goto error;
+ }
+
+ if ((def->blkdeviotune.total_iops_sec &&
def->blkdeviotune.read_iops_sec)
+ || (def->blkdeviotune.total_iops_sec &&
def->blkdeviotune.write_iops_sec)) {
+ virDomainReportError(VIR_ERR_XML_ERROR,
+ _("total and read/write iops_sec cannot be
set at the same time"));
+ goto error;
+ }
} else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) {
def->readonly = 1;
} else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) {
@@ -6078,7 +6123,7 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,
if (xmlStrEqual(node->name, BAD_CAST "disk")) {
dev->type = VIR_DOMAIN_DEVICE_DISK;
- if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node,
+ if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, ctxt,
NULL, flags)))
goto error;
} else if (xmlStrEqual(node->name, BAD_CAST "lease")) {
@@ -7148,6 +7193,7 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
for (i = 0 ; i < n ; i++) {
virDomainDiskDefPtr disk = virDomainDiskDefParseXML(caps,
nodes[i],
+ ctxt,
bootMap,
flags);
if (!disk)
@@ -9589,6 +9635,48 @@ virDomainDiskDefFormat(virBufferPtr buf,
virBufferAsprintf(buf, " <target dev='%s'
bus='%s'/>\n",
def->dst, bus);
+ /*disk I/O throttling*/
+ if (def->blkdeviotune.total_bytes_sec ||
+ def->blkdeviotune.read_bytes_sec ||
+ def->blkdeviotune.write_bytes_sec ||
+ def->blkdeviotune.total_iops_sec ||
+ def->blkdeviotune.read_iops_sec ||
+ def->blkdeviotune.write_iops_sec) {
+ virBufferAddLit(buf, " <iotune>\n");
+ if (def->blkdeviotune.total_bytes_sec) {
+ virBufferAsprintf(buf, "
<total_bytes_sec>%llu</total_bytes_sec>\n",
+ def->blkdeviotune.total_bytes_sec);
+ }
+
+ if (def->blkdeviotune.read_bytes_sec) {
+ virBufferAsprintf(buf, "
<read_bytes_sec>%llu</read_bytes_sec>\n",
+ def->blkdeviotune.read_bytes_sec);
+
+ }
+
+ if (def->blkdeviotune.write_bytes_sec) {
+ virBufferAsprintf(buf, "
<write_bytes_sec>%llu</write_bytes_sec>\n",
+ def->blkdeviotune.write_bytes_sec);
+ }
+
+ if (def->blkdeviotune.total_iops_sec) {
+ virBufferAsprintf(buf, "
<total_iops_sec>%llu</total_iops_sec>\n",
+ def->blkdeviotune.total_iops_sec);
+ }
+
+ if (def->blkdeviotune.read_iops_sec) {
+ virBufferAsprintf(buf, "
<read_iops_sec>%llu</read_iops_sec>",
+ def->blkdeviotune.read_iops_sec);
+ }
+
+ if (def->blkdeviotune.write_iops_sec) {
+ virBufferAsprintf(buf, "
<write_iops_sec>%llu</write_iops_sec>",
+ def->blkdeviotune.write_iops_sec);
+ }
+
+ virBufferAddLit(buf, " </iotune>\n");
+ }
+
if (def->bootIndex)
virBufferAsprintf(buf, " <boot order='%d'/>\n",
def->bootIndex);
if (def->readonly)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 7511178..ff6921a 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -313,6 +313,17 @@ enum virDomainDiskSecretType {
VIR_DOMAIN_DISK_SECRET_TYPE_LAST
};
+typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo;
+struct _virDomainBlockIoTuneInfo {
+ unsigned long long total_bytes_sec;
+ unsigned long long read_bytes_sec;
+ unsigned long long write_bytes_sec;
+ unsigned long long total_iops_sec;
+ unsigned long long read_iops_sec;
+ unsigned long long write_iops_sec;
+};
+typedef virDomainBlockIoTuneInfo *virDomainBlockIoTuneInfoPtr;
+
/* Stores the virtual disk configuration */
typedef struct _virDomainDiskDef virDomainDiskDef;
typedef virDomainDiskDef *virDomainDiskDefPtr;
@@ -335,6 +346,9 @@ struct _virDomainDiskDef {
} auth;
char *driverName;
char *driverType;
+
+ virDomainBlockIoTuneInfo blkdeviotune;
+
char *serial;
int cachemode;
int error_policy; /* enum virDomainDiskErrorPolicy */
--
1.7.7.3