In order for QEMU vCPU (and other) threads to run with RT scheduler,
libvirt needs to take care of that so QEMU doesn't have to run privileged.
Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
docs/formatdomain.html.in | 13 +++
docs/schemas/domaincommon.rng | 33 ++++++
src/conf/domain_conf.c | 137 +++++++++++++++++++++++-
src/conf/domain_conf.h | 24 +++++
tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 +
5 files changed, 210 insertions(+), 1 deletion(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index d8c31e0..60dad76 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -550,6 +550,8 @@
<quota>-1</quota>
<emulator_period>1000000</emulator_period>
<emulator_quota>-1</emulator_quota>
+ <vcpusched scheduler='fifo' priority='1'/>
+ <iothreadsched scheduler='idle'/>
</cputune>
...
</domain>
@@ -656,6 +658,17 @@
<span class="since">Only QEMU driver support since
0.10.0</span>
</dd>
+ <dt><code>vcpusched</code> and
<code>iothreadsched</code></dt>
+ <dd>
+ The optional <code>vcpusched</code> element specifies the scheduler
+ (values <code>batch</code>, <code>idle</code>,
<code>fifo</code>,
+ <code>rr</code> and <code>other</code>, which is default
and ignored)
+ for all vCPU/IOThread threads. For real-time schedulers
+ (<code>fifo</code>, <code>rr</code>), priority can be
specified as
+ well. The value range for the priority depends on the host kernel.
+ <span class="since">Since 1.2.12</span>
+ </dd>
+
</dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index d250e6f..6653737 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -815,10 +815,43 @@
</attribute>
</element>
</zeroOrMore>
+ <zeroOrMore>
+ <element name="vcpusched">
+ <attribute name="vcpu">
+ <ref name="vcpuid"/>
+ </attribute>
+ <ref name="schedparam"/>
+ </element>
+ </zeroOrMore>
+ <zeroOrMore>
+ <element name="iothreadsched">
+ <attribute name="iothread">
+ <ref name="unsignedInt"/>
+ </attribute>
+ <ref name="schedparam"/>
+ </element>
+ </zeroOrMore>
</interleave>
</element>
</define>
+ <define name="schedparam">
+ <attribute name="scheduler">
+ <choice>
+ <value>other</value>
+ <value>batch</value>
+ <value>idle</value>
+ <value>fifo</value>
+ <value>rr</value>
+ </choice>
+ </attribute>
+ <optional>
+ <attribute name="priority">
+ <ref name="unsignedShort"/>
+ </attribute>
+ </optional>
+ </define>
+
<!-- All the NUMA related tunables would go in the numatune -->
<define name="numatune">
<element name="numatune">
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 96d80a9..16adbf1 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -795,6 +795,13 @@ VIR_ENUM_IMPL(virDomainLoader,
"rom",
"pflash")
+VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST,
+ "other", /* default */
+ "batch",
+ "idle",
+ "fifo",
+ "rr")
+
/* Internal mapping: subset of block job types that can be present in
* <mirror> XML (remaining types are not two-phase). */
VIR_ENUM_DECL(virDomainBlockJob)
@@ -2260,6 +2267,9 @@ void virDomainDefFree(virDomainDefPtr def)
virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin,
def->cputune.niothreadspin);
+ VIR_FREE(def->cputune.vcpusched);
+ VIR_FREE(def->cputune.iothreadsched);
+
virDomainNumatuneFree(def->numatune);
virSysinfoDefFree(def->sysinfo);
@@ -12653,6 +12663,65 @@ virDomainLoaderDefParseXML(xmlNodePtr node,
return ret;
}
+static int
+virDomainThreadSchedParse(xmlNodePtr node,
+ unsigned int maxid,
+ const char *name,
+ virDomainThreadSchedParamPtr sp)
+{
+ char *tmp = NULL;
+ int sched = 0;
+
+ tmp = virXMLPropString(node, name);
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("invalid value for attribute %s"), name);
+ goto error;
+ }
+
+ if (virStrToLong_uip(tmp, NULL, 10, &sp->id) < 0 ||
+ sp->id >= maxid) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("vcpu must be positive integer smaller than "
+ "maximum of vcpus, not '%s'"),
+ tmp);
+ goto error;
+ }
+ VIR_FREE(tmp);
+
+ tmp = virXMLPropString(node, "scheduler");
+ if (tmp) {
+ if ((sched = virDomainThreadSchedTypeFromString(tmp)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("invalid vcpu scheduler: '%s'"), tmp);
+ goto error;
+ }
+ sp->scheduler = sched;
+
+ VIR_FREE(tmp);
+ if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) {
+ tmp = virXMLPropString(node, "priority");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("missing scheduler priority"));
+ goto error;
+ }
+ if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("invalid value for element priority"));
+ goto error;
+ }
+ VIR_FREE(tmp);
+ }
+ }
+
+ return 0;
+
+ error:
+ VIR_FREE(tmp);
+ return -1;
+}
+
static virDomainDefPtr
virDomainDefParseXML(xmlDocPtr xml,
xmlNodePtr root,
@@ -13201,6 +13270,51 @@ virDomainDefParseXML(xmlDocPtr xml,
}
VIR_FREE(nodes);
+ if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0)
{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract vcpusched nodes"));
+ goto error;
+ }
+ if (n) {
+ if (n > def->maxvcpus) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("too many vcpusched nodes in cputune"));
+ goto error;
+ }
+
+ if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0)
+ goto error;
+ def->cputune.nvcpusched = n;
+
+ for (i = 0; i < def->cputune.nvcpusched; i++) {
+ if (virDomainThreadSchedParse(nodes[i], def->maxvcpus, "vcpu",
+ &def->cputune.vcpusched[i]) < 0)
+ goto error;
+ }
+ }
+
+ if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) <
0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract iothreadsched nodes"));
+ goto error;
+ }
+ if (n) {
+ if (n > def->iothreads) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("too many iothreadsched nodes in cputune"));
+ goto error;
+ }
+
+ if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0)
+ goto error;
+ def->cputune.niothreadsched = n;
+
+ for (i = 0; i < def->cputune.niothreadsched; i++) {
+ if (virDomainThreadSchedParse(nodes[i], def->iothreads + 1,
"iothread",
+ &def->cputune.iothreadsched[i]) < 0)
+ goto error;
+ }
+ }
/* analysis of cpu handling */
if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
@@ -19519,7 +19633,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
def->cputune.period || def->cputune.quota ||
def->cputune.emulatorpin ||
def->cputune.emulator_period || def->cputune.emulator_quota ||
- def->cputune.niothreadspin) {
+ def->cputune.niothreadspin ||
+ def->cputune.vcpusched || def->cputune.iothreadsched) {
virBufferAddLit(buf, "<cputune>\n");
cputune = true;
}
@@ -19592,6 +19707,26 @@ virDomainDefFormatInternal(virDomainDefPtr def,
VIR_FREE(cpumask);
}
+ for (i = 0; i < def->cputune.nvcpusched; i++) {
+ virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i];
+ virBufferAsprintf(buf, "<vcpusched vcpu='%d'
scheduler='%s'",
+ sp->id,
+ virDomainThreadSchedTypeToString(sp->scheduler));
+ if (sp->priority)
+ virBufferAsprintf(buf, " priority='%d'", sp->priority);
+ virBufferAddLit(buf, "/>\n");
+ }
+
+ for (i = 0; i < def->cputune.niothreadsched; i++) {
+ virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i];
+ virBufferAsprintf(buf, "<iothreadsched iothread='%d'
scheduler='%s'",
+ sp->id,
+ virDomainThreadSchedTypeToString(sp->scheduler));
+ if (sp->priority)
+ virBufferAsprintf(buf, " priority='%d'", sp->priority);
+ virBufferAddLit(buf, "/>\n");
+ }
+
virBufferAdjustIndent(buf, -2);
if (cputune)
virBufferAddLit(buf, "</cputune>\n");
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 5cc42d1..636e2d1 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1815,6 +1815,24 @@ typedef enum {
VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST
} virDomainCpuPlacementMode;
+typedef enum {
+ VIR_DOMAIN_THREAD_SCHED_OTHER = 0,
+ VIR_DOMAIN_THREAD_SCHED_BATCH,
+ VIR_DOMAIN_THREAD_SCHED_IDLE,
+ VIR_DOMAIN_THREAD_SCHED_FIFO,
+ VIR_DOMAIN_THREAD_SCHED_RR,
+
+ VIR_DOMAIN_THREAD_SCHED_LAST
+} virDomainThreadSched;
+
+typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam;
+typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr;
+struct _virDomainThreadSchedParam {
+ unsigned int id;
+ virDomainThreadSched scheduler;
+ int priority;
+};
+
typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef;
typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr;
struct _virDomainTimerCatchupDef {
@@ -2002,6 +2020,11 @@ struct _virDomainCputune {
virDomainVcpuPinDefPtr emulatorpin;
size_t niothreadspin;
virDomainVcpuPinDefPtr *iothreadspin;
+
+ size_t nvcpusched;
+ virDomainThreadSchedParamPtr vcpusched;
+ size_t niothreadsched;
+ virDomainThreadSchedParamPtr iothreadsched;
};
typedef struct _virDomainBlkiotune virDomainBlkiotune;
@@ -2807,6 +2830,7 @@ VIR_ENUM_DECL(virDomainRNGModel)
VIR_ENUM_DECL(virDomainRNGBackend)
VIR_ENUM_DECL(virDomainTPMModel)
VIR_ENUM_DECL(virDomainTPMBackend)
+VIR_ENUM_DECL(virDomainThreadSched)
/* from libvirt.h */
VIR_ENUM_DECL(virDomainState)
VIR_ENUM_DECL(virDomainNostateReason)
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
index 813d201..243092b 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
+++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
@@ -4,6 +4,7 @@
<memory unit='KiB'>219136</memory>
<currentMemory unit='KiB'>219136</currentMemory>
<vcpu placement='static'>2</vcpu>
+ <iothreads>1</iothreads>
<cputune>
<shares>2048</shares>
<period>1000000</period>
@@ -11,6 +12,9 @@
<vcpupin vcpu='0' cpuset='0'/>
<vcpupin vcpu='1' cpuset='1'/>
<emulatorpin cpuset='1'/>
+ <vcpusched vcpu='0' scheduler='fifo' priority='1'/>
+ <vcpusched vcpu='1' scheduler='batch'/>
+ <iothreadsched iothread='0' scheduler='idle'/>
</cputune>
<os>
<type arch='i686' machine='pc'>hvm</type>
--
2.2.1