On 10.02.2015 16:35, Martin Kletzander wrote:
In order for QEMU vCPU (and other) threads to run with RT scheduler,
libvirt needs to take care of that so QEMU doesn't have to run privileged.
Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
docs/formatdomain.html.in | 16 ++
docs/schemas/domaincommon.rng | 44 +++++
src/conf/domain_conf.c | 183 ++++++++++++++++++++-
src/conf/domain_conf.h | 24 +++
.../qemuxml2argv-cputune-iothreadsched-toomuch.xml | 38 +++++
.../qemuxml2argv-cputune-iothreadsched.xml | 39 +++++
.../qemuxml2argv-cputune-vcpusched-overlap.xml | 38 +++++
tests/qemuxml2argvtest.c | 2 +
.../qemuxml2xmlout-cputune-iothreadsched.xml | 39 +++++
tests/qemuxml2xmltest.c | 1 +
10 files changed, 423 insertions(+), 1 deletion(-)
create mode 100644
tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml
create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index d8144ea..3381dfe 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -550,6 +550,8 @@
<quota>-1</quota>
<emulator_period>1000000</emulator_period>
<emulator_quota>-1</emulator_quota>
+ <vcpusched vcpus='0-4,^3' scheduler='fifo'
priority='1'/>
+ <iothreadsched iothreads='2' scheduler='batch'/>
</cputune>
...
</domain>
@@ -652,6 +654,20 @@
<span class="since">Only QEMU driver support since
0.10.0</span>
</dd>
+ <dt><code>vcpusched</code> and
<code>iothreadsched</code></dt>
+ <dd>
+ The optional <code>vcpusched</code> elements specifie the scheduler
s/specifie/specifies/
And maybe s/scheduler/scheduler type/?
+ (values <code>batch</code>,
<code>idle</code>, <code>fifo</code>,
+ <code>rr</code>) for particular vCPU/IOThread threads (based on
+ <code>vcpus</code> and <code>iothreads</code>, leaving
out
+ <code>vcpus</code>/<code>iothreads</code> sets the
default).
+ For real-time schedulers (<code>fifo</code>,
<code>rr</code>),
+ priority must be specified as well (and is ignored for
+ non-real-time ones). The value range for the priority depends
+ on the host kernel (usually 1-99).
+ <span class="since">Since 1.2.12</span>
+ </dd>
+
</dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index d467dce..98766dc 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -815,10 +815,54 @@
</attribute>
</element>
</zeroOrMore>
+ <zeroOrMore>
+ <element name="vcpusched">
+ <optional>
+ <attribute name="vcpus">
+ <ref name='cpuset'/>
+ </attribute>
+ </optional>
+ <ref name="schedparam"/>
+ </element>
+ </zeroOrMore>
+ <zeroOrMore>
+ <element name="iothreadsched">
+ <optional>
+ <attribute name="iothreads">
+ <ref name='cpuset'/>
+ </attribute>
+ </optional>
+ <ref name="schedparam"/>
+ </element>
+ </zeroOrMore>
</interleave>
</element>
</define>
+ <define name="schedparam">
+ <choice>
+ <group>
+ <attribute name="scheduler">
+ <choice>
+ <value>batch</value>
+ <value>idle</value>
+ </choice>
+ </attribute>
+ </group>
+ <group>
+ <attribute name="scheduler">
+ <choice>
+ <value>fifo</value>
+ <value>rr</value>
+ </choice>
+ </attribute>
+ <attribute name="priority">
+ <ref name="unsignedShort"/>
+ </attribute>
+ </group>
+ </choice>
+ </define>
+
Have we returned to the rule where each new XML extension requires ACK
from at least one of Daniels?
<!-- All the NUMA related tunables would go in the numatune
-->
<define name="numatune">
<element name="numatune">
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 77319dc..3fb68b3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -772,6 +772,13 @@ VIR_ENUM_IMPL(virDomainLoader,
"rom",
"pflash")
+VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST,
+ "other", /* default */
+ "batch",
+ "idle",
+ "fifo",
+ "rr")
+
/* Internal mapping: subset of block job types that can be present in
* <mirror> XML (remaining types are not two-phase). */
VIR_ENUM_DECL(virDomainBlockJob)
@@ -2234,6 +2241,14 @@ void virDomainDefFree(virDomainDefPtr def)
virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin,
def->cputune.niothreadspin);
+ for (i = 0; i < def->cputune.nvcpusched; i++)
+ virBitmapFree(def->cputune.vcpusched[i].ids);
+ VIR_FREE(def->cputune.vcpusched);
+
+ for (i = 0; i < def->cputune.niothreadsched; i++)
+ virBitmapFree(def->cputune.iothreadsched[i].ids);
+ VIR_FREE(def->cputune.iothreadsched);
+
virDomainNumatuneFree(def->numatune);
virSysinfoDefFree(def->sysinfo);
@@ -12595,6 +12610,70 @@ virDomainLoaderDefParseXML(xmlNodePtr node,
return ret;
}
+static int
+virDomainThreadSchedParse(xmlNodePtr node,
+ unsigned int minid,
+ unsigned int maxid,
+ const char *name,
+ virDomainThreadSchedParamPtr sp)
+{
+ char *tmp = NULL;
+ int sched = 0;
+
+ tmp = virXMLPropString(node, name);
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Missing attribute '%s' in element
'%sched'"),
+ name, name);
+ goto error;
+ }
+
+ if (!virBitmapParse(tmp, 0, &sp->ids,
+ VIR_DOMAIN_CPUMASK_LEN) ||
+ virBitmapIsAllClear(sp->ids) ||
+ virBitmapNextSetBit(sp->ids, -1) < minid ||
+ virBitmapLastSetBit(sp->ids) > maxid) {
+
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid value of '%s': %s"),
+ name, tmp);
+ goto error;
+ }
+ VIR_FREE(tmp);
+
+ tmp = virXMLPropString(node, "scheduler");
+ if (tmp) {
+ if ((sched = virDomainThreadSchedTypeFromString(tmp)) <= 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid scheduler attribute: '%s'"),
+ tmp);
+ goto error;
+ }
+ sp->scheduler = sched;
+
+ VIR_FREE(tmp);
+ if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) {
+ tmp = virXMLPropString(node, "priority");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing scheduler priority"));
+ goto error;
+ }
+ if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Invalid value for element priority"));
+ goto error;
+ }
+ VIR_FREE(tmp);
+ }
+ }
+
+ return 0;
+
+ error:
+ VIR_FREE(tmp);
+ return -1;
+}
static virDomainDefPtr
virDomainDefParseXML(xmlDocPtr xml,
@@ -13139,6 +13218,77 @@ virDomainDefParseXML(xmlDocPtr xml,
}
VIR_FREE(nodes);
+ if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0)
{
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract vcpusched nodes"));
+ goto error;
+ }
+ if (n) {
+ if (n > def->maxvcpus) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("too many vcpusched nodes in cputune"));
+ goto error;
+ }
+
+ if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0)
+ goto error;
+ def->cputune.nvcpusched = n;
+
+ for (i = 0; i < def->cputune.nvcpusched; i++) {
+ if (virDomainThreadSchedParse(nodes[i],
+ 0, def->maxvcpus - 1,
+ "vcpus",
+ &def->cputune.vcpusched[i]) < 0)
+ goto error;
+
+ for (j = 0; j < i; j++) {
+ if (virBitmapOverlaps(def->cputune.vcpusched[i].ids,
+ def->cputune.vcpusched[j].ids)) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("vcpusched attributes 'vcpus' "
+ "must not overlap"));
+ goto error;
+ }
+ }
+ }
+ }
+ VIR_FREE(nodes);
+
+ if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes))
< 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract iothreadsched nodes"));
+ goto error;
+ }
+ if (n) {
+ if (n > def->iothreads) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("too many iothreadsched nodes in cputune"));
+ goto error;
+ }
+
+ if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0)
+ goto error;
+ def->cputune.niothreadsched = n;
+
+ for (i = 0; i < def->cputune.niothreadsched; i++) {
+ if (virDomainThreadSchedParse(nodes[i],
+ 1, def->iothreads,
+ "iothreads",
+ &def->cputune.iothreadsched[i]) <
0)
+ goto error;
+
+ for (j = 0; j < i; j++) {
+ if (virBitmapOverlaps(def->cputune.iothreadsched[i].ids,
+ def->cputune.iothreadsched[j].ids)) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("iothreadsched attributes 'iothreads'
"
+ "must not overlap"));
+ goto error;
+ }
+ }
+ }
+ }
+ VIR_FREE(nodes);
Once parsed, do we also need a check in virDomainDefCheckABIStability()?
Or can the sched parameters change on migration? I guess they can, but
seems like you dig more into this area than me recently.
/* analysis of cpu handling */
if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
@@ -19434,7 +19584,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
def->cputune.period || def->cputune.quota ||
def->cputune.emulatorpin ||
def->cputune.emulator_period || def->cputune.emulator_quota ||
- def->cputune.niothreadspin) {
+ def->cputune.niothreadspin ||
+ def->cputune.vcpusched || def->cputune.iothreadsched) {
virBufferAddLit(buf, "<cputune>\n");
cputune = true;
}
@@ -19503,6 +19654,36 @@ virDomainDefFormatInternal(virDomainDefPtr def,
VIR_FREE(cpumask);
}
+ for (i = 0; i < def->cputune.nvcpusched; i++) {
+ virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i];
+ char *ids = NULL;
+
+ if (!(ids = virBitmapFormat(sp->ids)))
+ goto error;
+ virBufferAsprintf(buf, "<vcpusched vcpus='%s'
scheduler='%s'",
+ ids, virDomainThreadSchedTypeToString(sp->scheduler));
+ VIR_FREE(ids);
+
+ if (sp->priority)
+ virBufferAsprintf(buf, " priority='%d'",
sp->priority);
+ virBufferAddLit(buf, "/>\n");
+ }
+
+ for (i = 0; i < def->cputune.niothreadsched; i++) {
+ virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i];
+ char *ids = NULL;
+
+ if (!(ids = virBitmapFormat(sp->ids)))
+ goto error;
+ virBufferAsprintf(buf, "<iothreadsched iothreads='%s'
scheduler='%s'",
+ ids, virDomainThreadSchedTypeToString(sp->scheduler));
+ VIR_FREE(ids);
+
+ if (sp->priority)
+ virBufferAsprintf(buf, " priority='%d'",
sp->priority);
+ virBufferAddLit(buf, "/>\n");
+ }
+
virBufferAdjustIndent(buf, -2);
if (cputune)
virBufferAddLit(buf, "</cputune>\n");
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 93f2314..6be70bf 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1810,6 +1810,24 @@ typedef enum {
VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST
} virDomainCpuPlacementMode;
+typedef enum {
+ VIR_DOMAIN_THREAD_SCHED_OTHER = 0,
+ VIR_DOMAIN_THREAD_SCHED_BATCH,
+ VIR_DOMAIN_THREAD_SCHED_IDLE,
+ VIR_DOMAIN_THREAD_SCHED_FIFO,
+ VIR_DOMAIN_THREAD_SCHED_RR,
+
+ VIR_DOMAIN_THREAD_SCHED_LAST
+} virDomainThreadSched;
+
+typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam;
+typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr;
+struct _virDomainThreadSchedParam {
+ virBitmapPtr ids;
+ virDomainThreadSched scheduler;
+ int priority;
+};
+
typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef;
typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr;
struct _virDomainTimerCatchupDef {
@@ -1997,6 +2015,11 @@ struct _virDomainCputune {
virDomainVcpuPinDefPtr emulatorpin;
size_t niothreadspin;
virDomainVcpuPinDefPtr *iothreadspin;
+
+ size_t nvcpusched;
+ virDomainThreadSchedParamPtr vcpusched;
+ size_t niothreadsched;
+ virDomainThreadSchedParamPtr iothreadsched;
};
typedef struct _virDomainBlkiotune virDomainBlkiotune;
@@ -2844,6 +2867,7 @@ VIR_ENUM_DECL(virDomainRNGModel)
VIR_ENUM_DECL(virDomainRNGBackend)
VIR_ENUM_DECL(virDomainTPMModel)
VIR_ENUM_DECL(virDomainTPMBackend)
+VIR_ENUM_DECL(virDomainThreadSched)
Yet another ENUM_DECL without corresponding libvirt_private.syms change.
/* from libvirt.h */
VIR_ENUM_DECL(virDomainState)
VIR_ENUM_DECL(virDomainNostateReason)
ACK with all of that fixed.
Michal