[libvirt] [PATCH 0/3] Support for per-thread scheduling settings

Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 Martin Kletzander (3): util: Add virProcessSetScheduler() function for scheduler settings docs, schema, conf: Add support for setting scheduler parameters of guest threads qemu: Add support for setting vCPU and I/O thread scheduler setting docs/formatdomain.html.in | 13 +++ docs/schemas/domaincommon.rng | 33 ++++++ src/conf/domain_conf.c | 137 +++++++++++++++++++++++- src/conf/domain_conf.h | 24 +++++ src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 7 +- src/qemu/qemu_process.c | 76 ++++++++++++- src/qemu/qemu_process.h | 5 +- src/util/virprocess.c | 46 +++++++- src/util/virprocess.h | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 + 11 files changed, 345 insertions(+), 7 deletions(-) -- 2.2.1

This function uses sched_setscheduler() function so it works with processes and threads as well (even threads not created by us, which is what we'll need in the future). Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 6 +++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fb5d003..de3476e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1882,6 +1882,7 @@ virProcessSetMaxFiles; virProcessSetMaxMemLock; virProcessSetMaxProcesses; virProcessSetNamespaces; +virProcessSetScheduler; virProcessTranslateStatus; virProcessWait; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d0a1500..85da5e8 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1,7 +1,7 @@ /* * virprocess.c: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -32,7 +32,6 @@ # include <sys/time.h> # include <sys/resource.h> #endif -#include <sched.h> #if defined(__FreeBSD__) || HAVE_BSD_CPU_AFFINITY # include <sys/param.h> @@ -1052,3 +1051,46 @@ virProcessExitWithStatus(int status) } exit(value); } + +int +virProcessSetScheduler(pid_t pid, int policy, int priority) +{ + struct sched_param param = {0}; + + VIR_DEBUG("pid=%d, policy=%d, priority=%u", pid, policy, priority); + + if (policy == SCHED_FIFO || policy == SCHED_RR) { + int min = 0; + int max = 0; + + if ((min = sched_get_priority_min(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get minimum scheduler priority value")); + return -1; + } + + if ((max = sched_get_priority_max(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get maximum scheduler priority value")); + return -1; + } + + if (priority < min || priority > max) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler priority %d out of range [%d, %d]"), + priority, min, max); + return -1; + } + + param.sched_priority = priority; + } + + if (sched_setscheduler(pid, policy, ¶m) < 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index bcaede5..34b15a4 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -1,7 +1,7 @@ /* * virprocess.h: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -23,6 +23,7 @@ # define __VIR_PROCESS_H__ # include <sys/types.h> +# include <sched.h> # include "internal.h" # include "virbitmap.h" @@ -73,4 +74,7 @@ typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque); + +int virProcessSetScheduler(pid_t pid, int policy, int priority); + #endif /* __VIR_PROCESS_H__ */ -- 2.2.1

On 01/13/2015 02:57 AM, Martin Kletzander wrote:
This function uses sched_setscheduler() function so it works with processes and threads as well (even threads not created by us, which is what we'll need in the future).
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 6 +++++- 3 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fb5d003..de3476e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1882,6 +1882,7 @@ virProcessSetMaxFiles; virProcessSetMaxMemLock; virProcessSetMaxProcesses; virProcessSetNamespaces; +virProcessSetScheduler; virProcessTranslateStatus; virProcessWait;
diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d0a1500..85da5e8 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1,7 +1,7 @@ /* * virprocess.c: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -32,7 +32,6 @@ # include <sys/time.h> # include <sys/resource.h> #endif -#include <sched.h>
hmm.. moving to virprocess.h....
#if defined(__FreeBSD__) || HAVE_BSD_CPU_AFFINITY # include <sys/param.h> @@ -1052,3 +1051,46 @@ virProcessExitWithStatus(int status) } exit(value); } + +int +virProcessSetScheduler(pid_t pid, int policy, int priority) +{ + struct sched_param param = {0}; + + VIR_DEBUG("pid=%d, policy=%d, priority=%u", pid, policy, priority); + + if (policy == SCHED_FIFO || policy == SCHED_RR) { + int min = 0; + int max = 0; + + if ((min = sched_get_priority_min(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get minimum scheduler priority value")); + return -1; + } + + if ((max = sched_get_priority_max(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get maximum scheduler priority value")); + return -1; + } + + if (priority < min || priority > max) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler priority %d out of range [%d, %d]"), + priority, min, max); + return -1; + } + + param.sched_priority = priority; + } + + if (sched_setscheduler(pid, policy, ¶m) < 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index bcaede5..34b15a4 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -1,7 +1,7 @@ /* * virprocess.h: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -23,6 +23,7 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# include <sched.h>
Does moving this into here work for other build platforms? Is it required - it doesn't seem so given all that changes doesn't include anything sched.h specific. John
# include "internal.h" # include "virbitmap.h" @@ -73,4 +74,7 @@ typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque); + +int virProcessSetScheduler(pid_t pid, int policy, int priority); + #endif /* __VIR_PROCESS_H__ */

On Mon, Jan 19, 2015 at 10:12:34PM -0500, John Ferlan wrote:
On 01/13/2015 02:57 AM, Martin Kletzander wrote:
This function uses sched_setscheduler() function so it works with processes and threads as well (even threads not created by us, which is what we'll need in the future).
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 6 +++++- 3 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fb5d003..de3476e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1882,6 +1882,7 @@ virProcessSetMaxFiles; virProcessSetMaxMemLock; virProcessSetMaxProcesses; virProcessSetNamespaces; +virProcessSetScheduler; virProcessTranslateStatus; virProcessWait;
diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d0a1500..85da5e8 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1,7 +1,7 @@ /* * virprocess.c: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -32,7 +32,6 @@ # include <sys/time.h> # include <sys/resource.h> #endif -#include <sched.h>
hmm.. moving to virprocess.h....
#if defined(__FreeBSD__) || HAVE_BSD_CPU_AFFINITY # include <sys/param.h> @@ -1052,3 +1051,46 @@ virProcessExitWithStatus(int status) } exit(value); } + +int +virProcessSetScheduler(pid_t pid, int policy, int priority) +{ + struct sched_param param = {0}; + + VIR_DEBUG("pid=%d, policy=%d, priority=%u", pid, policy, priority); + + if (policy == SCHED_FIFO || policy == SCHED_RR) { + int min = 0; + int max = 0; + + if ((min = sched_get_priority_min(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get minimum scheduler priority value")); + return -1; + } + + if ((max = sched_get_priority_max(policy)) < 0) { + virReportSystemError(errno, "%s", + _("Cannot get maximum scheduler priority value")); + return -1; + } + + if (priority < min || priority > max) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler priority %d out of range [%d, %d]"), + priority, min, max); + return -1; + } + + param.sched_priority = priority; + } + + if (sched_setscheduler(pid, policy, ¶m) < 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index bcaede5..34b15a4 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -1,7 +1,7 @@ /* * virprocess.h: interaction with processes * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -23,6 +23,7 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# include <sched.h>
Does moving this into here work for other build platforms? Is it required - it doesn't seem so given all that changes doesn't include anything sched.h specific.
I moved this so that whoever includes this file has SCHED_* macros automatically available. Unfortunately, this breaks the build on mingw, you're right. I'll try a different approach. Or maybe just guarding this include by ifndef WIN32 would work since we can't (and won't) change the scheduler on windows. Otherwise the translation needs to be done in util/ and the enum has to be renamed and moved here as well so that this not depend on conf/.
John
# include "internal.h" # include "virbitmap.h" @@ -73,4 +74,7 @@ typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque); + +int virProcessSetScheduler(pid_t pid, int policy, int priority); + #endif /* __VIR_PROCESS_H__ */

In order for QEMU vCPU (and other) threads to run with RT scheduler, libvirt needs to take care of that so QEMU doesn't have to run privileged. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 13 +++ docs/schemas/domaincommon.rng | 33 ++++++ src/conf/domain_conf.c | 137 +++++++++++++++++++++++- src/conf/domain_conf.h | 24 +++++ tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 + 5 files changed, 210 insertions(+), 1 deletion(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index d8c31e0..60dad76 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -550,6 +550,8 @@ <quota>-1</quota> <emulator_period>1000000</emulator_period> <emulator_quota>-1</emulator_quota> + <vcpusched scheduler='fifo' priority='1'/> + <iothreadsched scheduler='idle'/> </cputune> ... </domain> @@ -656,6 +658,17 @@ <span class="since">Only QEMU driver support since 0.10.0</span> </dd> + <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> + <dd> + The optional <code>vcpusched</code> element specifies the scheduler + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, + <code>rr</code> and <code>other</code>, which is default and ignored) + for all vCPU/IOThread threads. For real-time schedulers + (<code>fifo</code>, <code>rr</code>), priority can be specified as + well. The value range for the priority depends on the host kernel. + <span class="since">Since 1.2.12</span> + </dd> + </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d250e6f..6653737 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -815,10 +815,43 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="vcpusched"> + <attribute name="vcpu"> + <ref name="vcpuid"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> + <zeroOrMore> + <element name="iothreadsched"> + <attribute name="iothread"> + <ref name="unsignedInt"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> </interleave> </element> </define> + <define name="schedparam"> + <attribute name="scheduler"> + <choice> + <value>other</value> + <value>batch</value> + <value>idle</value> + <value>fifo</value> + <value>rr</value> + </choice> + </attribute> + <optional> + <attribute name="priority"> + <ref name="unsignedShort"/> + </attribute> + </optional> + </define> + <!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 96d80a9..16adbf1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -795,6 +795,13 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash") +VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, + "other", /* default */ + "batch", + "idle", + "fifo", + "rr") + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -2260,6 +2267,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, def->cputune.niothreadspin); + VIR_FREE(def->cputune.vcpusched); + VIR_FREE(def->cputune.iothreadsched); + virDomainNumatuneFree(def->numatune); virSysinfoDefFree(def->sysinfo); @@ -12653,6 +12663,65 @@ virDomainLoaderDefParseXML(xmlNodePtr node, return ret; } +static int +virDomainThreadSchedParse(xmlNodePtr node, + unsigned int maxid, + const char *name, + virDomainThreadSchedParamPtr sp) +{ + char *tmp = NULL; + int sched = 0; + + tmp = virXMLPropString(node, name); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid value for attribute %s"), name); + goto error; + } + + if (virStrToLong_uip(tmp, NULL, 10, &sp->id) < 0 || + sp->id >= maxid) { + virReportError(VIR_ERR_XML_ERROR, + _("vcpu must be positive integer smaller than " + "maximum of vcpus, not '%s'"), + tmp); + goto error; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "scheduler"); + if (tmp) { + if ((sched = virDomainThreadSchedTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("invalid vcpu scheduler: '%s'"), tmp); + goto error; + } + sp->scheduler = sched; + + VIR_FREE(tmp); + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) { + tmp = virXMLPropString(node, "priority"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing scheduler priority")); + goto error; + } + if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("invalid value for element priority")); + goto error; + } + VIR_FREE(tmp); + } + } + + return 0; + + error: + VIR_FREE(tmp); + return -1; +} + static virDomainDefPtr virDomainDefParseXML(xmlDocPtr xml, xmlNodePtr root, @@ -13201,6 +13270,51 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract vcpusched nodes")); + goto error; + } + if (n) { + if (n > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many vcpusched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) + goto error; + def->cputune.nvcpusched = n; + + for (i = 0; i < def->cputune.nvcpusched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->maxvcpus, "vcpu", + &def->cputune.vcpusched[i]) < 0) + goto error; + } + } + + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract iothreadsched nodes")); + goto error; + } + if (n) { + if (n > def->iothreads) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many iothreadsched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) + goto error; + def->cputune.niothreadsched = n; + + for (i = 0; i < def->cputune.niothreadsched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->iothreads + 1, "iothread", + &def->cputune.iothreadsched[i]) < 0) + goto error; + } + } /* analysis of cpu handling */ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { @@ -19519,7 +19633,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota || def->cputune.emulatorpin || def->cputune.emulator_period || def->cputune.emulator_quota || - def->cputune.niothreadspin) { + def->cputune.niothreadspin || + def->cputune.vcpusched || def->cputune.iothreadsched) { virBufferAddLit(buf, "<cputune>\n"); cputune = true; } @@ -19592,6 +19707,26 @@ virDomainDefFormatInternal(virDomainDefPtr def, VIR_FREE(cpumask); } + for (i = 0; i < def->cputune.nvcpusched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; + virBufferAsprintf(buf, "<vcpusched vcpu='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + + for (i = 0; i < def->cputune.niothreadsched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; + virBufferAsprintf(buf, "<iothreadsched iothread='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + virBufferAdjustIndent(buf, -2); if (cputune) virBufferAddLit(buf, "</cputune>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5cc42d1..636e2d1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1815,6 +1815,24 @@ typedef enum { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST } virDomainCpuPlacementMode; +typedef enum { + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, + VIR_DOMAIN_THREAD_SCHED_BATCH, + VIR_DOMAIN_THREAD_SCHED_IDLE, + VIR_DOMAIN_THREAD_SCHED_FIFO, + VIR_DOMAIN_THREAD_SCHED_RR, + + VIR_DOMAIN_THREAD_SCHED_LAST +} virDomainThreadSched; + +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; +struct _virDomainThreadSchedParam { + unsigned int id; + virDomainThreadSched scheduler; + int priority; +}; + typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -2002,6 +2020,11 @@ struct _virDomainCputune { virDomainVcpuPinDefPtr emulatorpin; size_t niothreadspin; virDomainVcpuPinDefPtr *iothreadspin; + + size_t nvcpusched; + virDomainThreadSchedParamPtr vcpusched; + size_t niothreadsched; + virDomainThreadSchedParamPtr iothreadsched; }; typedef struct _virDomainBlkiotune virDomainBlkiotune; @@ -2807,6 +2830,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainThreadSched) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml index 813d201..243092b 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml @@ -4,6 +4,7 @@ <memory unit='KiB'>219136</memory> <currentMemory unit='KiB'>219136</currentMemory> <vcpu placement='static'>2</vcpu> + <iothreads>1</iothreads> <cputune> <shares>2048</shares> <period>1000000</period> @@ -11,6 +12,9 @@ <vcpupin vcpu='0' cpuset='0'/> <vcpupin vcpu='1' cpuset='1'/> <emulatorpin cpuset='1'/> + <vcpusched vcpu='0' scheduler='fifo' priority='1'/> + <vcpusched vcpu='1' scheduler='batch'/> + <iothreadsched iothread='0' scheduler='idle'/> </cputune> <os> <type arch='i686' machine='pc'>hvm</type> -- 2.2.1

On 01/13/2015 02:57 AM, Martin Kletzander wrote:
In order for QEMU vCPU (and other) threads to run with RT scheduler, libvirt needs to take care of that so QEMU doesn't have to run privileged.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 13 +++ docs/schemas/domaincommon.rng | 33 ++++++ src/conf/domain_conf.c | 137 +++++++++++++++++++++++- src/conf/domain_conf.h | 24 +++++ tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 + 5 files changed, 210 insertions(+), 1 deletion(-)
You'll need to rebase - I couldn't get this to apply... So review is strictly visual and memory based.
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index d8c31e0..60dad76 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -550,6 +550,8 @@ <quota>-1</quota> <emulator_period>1000000</emulator_period> <emulator_quota>-1</emulator_quota> + <vcpusched scheduler='fifo' priority='1'/> + <iothreadsched scheduler='idle'/> </cputune> ... </domain> @@ -656,6 +658,17 @@ <span class="since">Only QEMU driver support since 0.10.0</span> </dd>
+ <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> + <dd> + The optional <code>vcpusched</code> element specifies the scheduler + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, + <code>rr</code> and <code>other</code>, which is default and ignored) + for all vCPU/IOThread threads. For real-time schedulers + (<code>fifo</code>, <code>rr</code>), priority can be specified as + well. The value range for the priority depends on the host kernel. + <span class="since">Since 1.2.12</span> + </dd> +
Why would someone use batch or idle? They're not described. Upon first reading, I assumed one setting was good for *all* vcpu's and iothread's; however, later on there's a 'vcpuid' and 'iothreadid' defined in the rng file. Does it even make sense to have 2 vCPUs using 'fifo' while another 2 are using 'ff'? If not, then why allow choosing the id? (it's the "for all vCPU/IOThread threads" language that gave me the impression). If you did desire different models, then why not model after the 'cpuset' with respect to being able to choose a range. If someone has 16 vCPU's are they going to be happy having to make 16 entries or just one that indicates 0-15. You should be able to borrow/reuse existing code for that parsing cpuset... Would listing priority with batch/idle result in error or be ignored? Testing will try it and they will flag it unless you say it'll be ignored. I wouldn't list "other" - it's confusing. I don't think other XML lists other. Why would someone list "other" - they would only have this if they wanted one of the models.
</dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d250e6f..6653737 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -815,10 +815,43 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="vcpusched"> + <attribute name="vcpu"> + <ref name="vcpuid"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> + <zeroOrMore> + <element name="iothreadsched"> + <attribute name="iothread"> + <ref name="unsignedInt"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore>
so essentially : <vcpusched vcpuid=n scheduler='string' priority=n/> and <iothreadsched iothread=n scheduler='string' priority=n/>
</interleave> </element> </define>
+ <define name="schedparam"> + <attribute name="scheduler"> + <choice> + <value>other</value> + <value>batch</value> + <value>idle</value> + <value>fifo</value> + <value>rr</value> + </choice> + </attribute> + <optional> + <attribute name="priority"> + <ref name="unsignedShort"/> + </attribute> + </optional> + </define> +
leaving other here is OK... could also go with "none"
<!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 96d80a9..16adbf1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -795,6 +795,13 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash")
+VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, + "other", /* default */ + "batch", + "idle", + "fifo", + "rr") +
other a/k/a none
/* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -2260,6 +2267,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, def->cputune.niothreadspin);
+ VIR_FREE(def->cputune.vcpusched); + VIR_FREE(def->cputune.iothreadsched); + virDomainNumatuneFree(def->numatune);
virSysinfoDefFree(def->sysinfo); @@ -12653,6 +12663,65 @@ virDomainLoaderDefParseXML(xmlNodePtr node, return ret; }
+static int +virDomainThreadSchedParse(xmlNodePtr node, + unsigned int maxid, + const char *name, + virDomainThreadSchedParamPtr sp) +{ + char *tmp = NULL; + int sched = 0; + + tmp = virXMLPropString(node, name); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid value for attribute %s"), name); + goto error; + } + + if (virStrToLong_uip(tmp, NULL, 10, &sp->id) < 0 || + sp->id >= maxid) { + virReportError(VIR_ERR_XML_ERROR, + _("vcpu must be positive integer smaller than " + "maximum of vcpus, not '%s'"), + tmp);
Hmm.... this is challenging... vcpus are numbered 0..n-1; however, iothreads are numbered 1..n Use the 'name' instead of 'vcpu' _("Invalid %ssched value '%s'", name, tmp). However, I think the range check should be in the caller. Then again, why are we allowing someone to choose per cpu?
+ goto error; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "scheduler"); + if (tmp) { + if ((sched = virDomainThreadSchedTypeFromString(tmp)) < 0) {
Why accept "other"? If you choose to not accept "other", then this would be <= 0... That way on output the only way it gets written is if it's been set to something.
+ virReportError(VIR_ERR_INTERNAL_ERROR, + _("invalid vcpu scheduler: '%s'"), tmp);
change 'vcpu scheduler: '%s' to %ssched scheduler='%s' value, where the first %s is the 'name' argument.
+ goto error; + } + sp->scheduler = sched; + + VIR_FREE(tmp); + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) {
Oh - this is dangerous... Today you've set them up that way, but if some day something is added after RR that doesn't support priority, then there'll be an error. Be specific here.
+ tmp = virXMLPropString(node, "priority"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing scheduler priority")); + goto error; + }
Priority is listed as optional in the rng, but this implies it must be provided. Also you'll need to document that it's only parsed for fifo/rr...
+ if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("invalid value for element priority")); + goto error; + } + VIR_FREE(tmp); + } + } + + return 0; + + error: + VIR_FREE(tmp); + return -1; +} + static virDomainDefPtr virDomainDefParseXML(xmlDocPtr xml, xmlNodePtr root, @@ -13201,6 +13270,51 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes);
+ if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract vcpusched nodes")); + goto error; + } + if (n) { + if (n > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many vcpusched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) + goto error; + def->cputune.nvcpusched = n; + + for (i = 0; i < def->cputune.nvcpusched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->maxvcpus, "vcpu", + &def->cputune.vcpusched[i]) < 0) + goto error; + }
linear algorithm, but not linear number to vcpusched... Also, could follow the 'cpuset' algorithm...
+ }
VIR_FREE(nodes); ?
+ + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract iothreadsched nodes")); + goto error; + } + if (n) { + if (n > def->iothreads) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many iothreadsched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) + goto error; + def->cputune.niothreadsched = n; + + for (i = 0; i < def->cputune.niothreadsched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->iothreads + 1, "iothread", + &def->cputune.iothreadsched[i]) < 0) + goto error; + } + }
VIR_FREE(nodes); ?
/* analysis of cpu handling */ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { @@ -19519,7 +19633,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota || def->cputune.emulatorpin || def->cputune.emulator_period || def->cputune.emulator_quota || - def->cputune.niothreadspin) { + def->cputune.niothreadspin || + def->cputune.vcpusched || def->cputune.iothreadsched) { virBufferAddLit(buf, "<cputune>\n"); cputune = true; } @@ -19592,6 +19707,26 @@ virDomainDefFormatInternal(virDomainDefPtr def, VIR_FREE(cpumask); }
+ for (i = 0; i < def->cputune.nvcpusched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; + virBufferAsprintf(buf, "<vcpusched vcpu='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + + for (i = 0; i < def->cputune.niothreadsched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; + virBufferAsprintf(buf, "<iothreadsched iothread='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + virBufferAdjustIndent(buf, -2); if (cputune) virBufferAddLit(buf, "</cputune>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5cc42d1..636e2d1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1815,6 +1815,24 @@ typedef enum { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST } virDomainCpuPlacementMode;
+typedef enum { + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, + VIR_DOMAIN_THREAD_SCHED_BATCH, + VIR_DOMAIN_THREAD_SCHED_IDLE, + VIR_DOMAIN_THREAD_SCHED_FIFO, + VIR_DOMAIN_THREAD_SCHED_RR, + + VIR_DOMAIN_THREAD_SCHED_LAST +} virDomainThreadSched; + +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; +struct _virDomainThreadSchedParam { + unsigned int id; + virDomainThreadSched scheduler; + int priority; +}; + typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -2002,6 +2020,11 @@ struct _virDomainCputune { virDomainVcpuPinDefPtr emulatorpin; size_t niothreadspin; virDomainVcpuPinDefPtr *iothreadspin; + + size_t nvcpusched; + virDomainThreadSchedParamPtr vcpusched; + size_t niothreadsched; + virDomainThreadSchedParamPtr iothreadsched; };
typedef struct _virDomainBlkiotune virDomainBlkiotune; @@ -2807,6 +2830,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainThreadSched) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml index 813d201..243092b 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml @@ -4,6 +4,7 @@ <memory unit='KiB'>219136</memory> <currentMemory unit='KiB'>219136</currentMemory> <vcpu placement='static'>2</vcpu> + <iothreads>1</iothreads> <cputune> <shares>2048</shares> <period>1000000</period> @@ -11,6 +12,9 @@ <vcpupin vcpu='0' cpuset='0'/> <vcpupin vcpu='1' cpuset='1'/> <emulatorpin cpuset='1'/> + <vcpusched vcpu='0' scheduler='fifo' priority='1'/> + <vcpusched vcpu='1' scheduler='batch'/> + <iothreadsched iothread='0' scheduler='idle'/>
iothread='0' - no such beast. 1..n read the iothreadpin description
</cputune> <os> <type arch='i686' machine='pc'>hvm</type>
I think this should be a new file - eg qemuxml2argv-cpusched.xml with the accompanying qemuxml2argv-cpusched.args file... That way you're testing the results with and without your new XML Since priority is listed as optional for 'fifo' and 'rr', you'll also need one 'cpusched' with priority defined and one without it defined using 'fifo'/'rr'. So that's either two more files or more vcpus in your XML. John

On Mon, Jan 19, 2015 at 10:23:22PM -0500, John Ferlan wrote:
On 01/13/2015 02:57 AM, Martin Kletzander wrote:
In order for QEMU vCPU (and other) threads to run with RT scheduler, libvirt needs to take care of that so QEMU doesn't have to run privileged.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 13 +++ docs/schemas/domaincommon.rng | 33 ++++++ src/conf/domain_conf.c | 137 +++++++++++++++++++++++- src/conf/domain_conf.h | 24 +++++ tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 + 5 files changed, 210 insertions(+), 1 deletion(-)
You'll need to rebase - I couldn't get this to apply... So review is strictly visual and memory based.
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index d8c31e0..60dad76 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -550,6 +550,8 @@ <quota>-1</quota> <emulator_period>1000000</emulator_period> <emulator_quota>-1</emulator_quota> + <vcpusched scheduler='fifo' priority='1'/> + <iothreadsched scheduler='idle'/> </cputune> ... </domain> @@ -656,6 +658,17 @@ <span class="since">Only QEMU driver support since 0.10.0</span> </dd>
+ <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> + <dd> + The optional <code>vcpusched</code> element specifies the scheduler + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, + <code>rr</code> and <code>other</code>, which is default and ignored) + for all vCPU/IOThread threads. For real-time schedulers + (<code>fifo</code>, <code>rr</code>), priority can be specified as + well. The value range for the priority depends on the host kernel. + <span class="since">Since 1.2.12</span> + </dd> +
Why would someone use batch or idle? They're not described.
I just took all the options form sched_setscheduler() and made them available. But I have no problem removing some of them as we can always add them later on.
Upon first reading, I assumed one setting was good for *all* vcpu's and iothread's; however, later on there's a 'vcpuid' and 'iothreadid' defined in the rng file. Does it even make sense to have 2 vCPUs using 'fifo' while another 2 are using 'ff'? If not, then why allow choosing the id? (it's the "for all vCPU/IOThread threads" language that gave me the impression).
Yes, that's my fault. What you understood fine (and I described) was a first version. Then, before posting it, I enhanced that based on response from QEMU guys who say that someone might need per-thread specifics.
If you did desire different models, then why not model after the 'cpuset' with respect to being able to choose a range. If someone has 16 vCPU's are they going to be happy having to make 16 entries or just one that indicates 0-15. You should be able to borrow/reuse existing code for that parsing cpuset...
Would listing priority with batch/idle result in error or be ignored? Testing will try it and they will flag it unless you say it'll be ignored.
It would be ignored. I went with all-optional approach, so it's easiest to use. And that takes me to the fact that one thing is still missing from this. I'd suggest (myself) to make the vcpu/iothread ID also optional and that one element without that particular ID would be default for all threads not specified in other elements. I'm just not sure that goes fine with our current approach (or rather all our current approaches as we're *so* inconsistent).
I wouldn't list "other" - it's confusing. I don't think other XML lists other. Why would someone list "other" - they would only have this if they wanted one of the models.
</dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d250e6f..6653737 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -815,10 +815,43 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="vcpusched"> + <attribute name="vcpu"> + <ref name="vcpuid"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> + <zeroOrMore> + <element name="iothreadsched"> + <attribute name="iothread"> + <ref name="unsignedInt"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore>
so essentially :
<vcpusched vcpuid=n scheduler='string' priority=n/>
and
<iothreadsched iothread=n scheduler='string' priority=n/>
</interleave> </element> </define>
+ <define name="schedparam"> + <attribute name="scheduler"> + <choice> + <value>other</value> + <value>batch</value> + <value>idle</value> + <value>fifo</value> + <value>rr</value> + </choice> + </attribute> + <optional> + <attribute name="priority"> + <ref name="unsignedShort"/> + </attribute> + </optional> + </define> +
leaving other here is OK... could also go with "none"
I took other as SCHED_OTHER from sched_setscheduler(). If I'd add "none", I'd add it before "other" and "other" would then mean that the user wants to specifically call sched_setscheduler() with SCHED_OTHER. That might be useful if you want to use some policy for all vCPUs except one particular one.
<!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 96d80a9..16adbf1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -795,6 +795,13 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash")
+VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, + "other", /* default */ + "batch", + "idle", + "fifo", + "rr") +
other a/k/a none
/* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -2260,6 +2267,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, def->cputune.niothreadspin);
+ VIR_FREE(def->cputune.vcpusched); + VIR_FREE(def->cputune.iothreadsched); + virDomainNumatuneFree(def->numatune);
virSysinfoDefFree(def->sysinfo); @@ -12653,6 +12663,65 @@ virDomainLoaderDefParseXML(xmlNodePtr node, return ret; }
+static int +virDomainThreadSchedParse(xmlNodePtr node, + unsigned int maxid, + const char *name, + virDomainThreadSchedParamPtr sp) +{ + char *tmp = NULL; + int sched = 0; + + tmp = virXMLPropString(node, name); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid value for attribute %s"), name); + goto error; + } + + if (virStrToLong_uip(tmp, NULL, 10, &sp->id) < 0 || + sp->id >= maxid) { + virReportError(VIR_ERR_XML_ERROR, + _("vcpu must be positive integer smaller than " + "maximum of vcpus, not '%s'"), + tmp);
Hmm.... this is challenging... vcpus are numbered 0..n-1; however, iothreads are numbered 1..n
Yes, but I remember you had a reason for that.
Use the 'name' instead of 'vcpu' _("Invalid %ssched value '%s'", name, tmp).
That should be OK for translators. Or at least I hope so.
However, I think the range check should be in the caller. Then again, why are we allowing someone to choose per cpu?
+ goto error; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "scheduler"); + if (tmp) { + if ((sched = virDomainThreadSchedTypeFromString(tmp)) < 0) {
Why accept "other"? If you choose to not accept "other", then this would be <= 0... That way on output the only way it gets written is if it's been set to something.
+ virReportError(VIR_ERR_INTERNAL_ERROR, + _("invalid vcpu scheduler: '%s'"), tmp);
change 'vcpu scheduler: '%s' to %ssched scheduler='%s' value, where the first %s is the 'name' argument.
+ goto error; + } + sp->scheduler = sched; + + VIR_FREE(tmp); + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) {
Oh - this is dangerous... Today you've set them up that way, but if some day something is added after RR that doesn't support priority, then there'll be an error. Be specific here.
OK, that'll be better.
+ tmp = virXMLPropString(node, "priority"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing scheduler priority")); + goto error; + }
Priority is listed as optional in the rng, but this implies it must be provided. Also you'll need to document that it's only parsed for fifo/rr...
+ if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("invalid value for element priority")); + goto error; + } + VIR_FREE(tmp); + } + } + + return 0; + + error: + VIR_FREE(tmp); + return -1; +} + static virDomainDefPtr virDomainDefParseXML(xmlDocPtr xml, xmlNodePtr root, @@ -13201,6 +13270,51 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes);
+ if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract vcpusched nodes")); + goto error; + } + if (n) { + if (n > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many vcpusched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) + goto error; + def->cputune.nvcpusched = n; + + for (i = 0; i < def->cputune.nvcpusched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->maxvcpus, "vcpu", + &def->cputune.vcpusched[i]) < 0) + goto error; + }
linear algorithm, but not linear number to vcpusched... Also, could follow the 'cpuset' algorithm...
My idea would be to allocate the parameters per each cpu and just fill it in like I did with memnode. And for the cpuset=, I was under the impression that it would just confuse people. Look at the vcpupin and iothreadpin, we don't put it there... But I'm not against that.
+ }
VIR_FREE(nodes); ?
Yes!
+ + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract iothreadsched nodes")); + goto error; + } + if (n) { + if (n > def->iothreads) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many iothreadsched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) + goto error; + def->cputune.niothreadsched = n; + + for (i = 0; i < def->cputune.niothreadsched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->iothreads + 1, "iothread", + &def->cputune.iothreadsched[i]) < 0) + goto error; + } + }
VIR_FREE(nodes); ?
And yes.
/* analysis of cpu handling */ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { @@ -19519,7 +19633,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota || def->cputune.emulatorpin || def->cputune.emulator_period || def->cputune.emulator_quota || - def->cputune.niothreadspin) { + def->cputune.niothreadspin || + def->cputune.vcpusched || def->cputune.iothreadsched) { virBufferAddLit(buf, "<cputune>\n"); cputune = true; } @@ -19592,6 +19707,26 @@ virDomainDefFormatInternal(virDomainDefPtr def, VIR_FREE(cpumask); }
+ for (i = 0; i < def->cputune.nvcpusched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; + virBufferAsprintf(buf, "<vcpusched vcpu='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + + for (i = 0; i < def->cputune.niothreadsched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; + virBufferAsprintf(buf, "<iothreadsched iothread='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + virBufferAdjustIndent(buf, -2); if (cputune) virBufferAddLit(buf, "</cputune>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5cc42d1..636e2d1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1815,6 +1815,24 @@ typedef enum { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST } virDomainCpuPlacementMode;
+typedef enum { + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, + VIR_DOMAIN_THREAD_SCHED_BATCH, + VIR_DOMAIN_THREAD_SCHED_IDLE, + VIR_DOMAIN_THREAD_SCHED_FIFO, + VIR_DOMAIN_THREAD_SCHED_RR, + + VIR_DOMAIN_THREAD_SCHED_LAST +} virDomainThreadSched; + +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; +struct _virDomainThreadSchedParam { + unsigned int id; + virDomainThreadSched scheduler; + int priority; +}; + typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -2002,6 +2020,11 @@ struct _virDomainCputune { virDomainVcpuPinDefPtr emulatorpin; size_t niothreadspin; virDomainVcpuPinDefPtr *iothreadspin; + + size_t nvcpusched; + virDomainThreadSchedParamPtr vcpusched; + size_t niothreadsched; + virDomainThreadSchedParamPtr iothreadsched; };
typedef struct _virDomainBlkiotune virDomainBlkiotune; @@ -2807,6 +2830,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainThreadSched) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml index 813d201..243092b 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml @@ -4,6 +4,7 @@ <memory unit='KiB'>219136</memory> <currentMemory unit='KiB'>219136</currentMemory> <vcpu placement='static'>2</vcpu> + <iothreads>1</iothreads> <cputune> <shares>2048</shares> <period>1000000</period> @@ -11,6 +12,9 @@ <vcpupin vcpu='0' cpuset='0'/> <vcpupin vcpu='1' cpuset='1'/> <emulatorpin cpuset='1'/> + <vcpusched vcpu='0' scheduler='fifo' priority='1'/> + <vcpusched vcpu='1' scheduler='batch'/> + <iothreadsched iothread='0' scheduler='idle'/>
iothread='0' - no such beast. 1..n
read the iothreadpin description
Ouch, what a beast it had become...
</cputune> <os> <type arch='i686' machine='pc'>hvm</type>
I think this should be a new file - eg qemuxml2argv-cpusched.xml with the accompanying qemuxml2argv-cpusched.args file... That way you're testing the results with and without your new XML
Since priority is listed as optional for 'fifo' and 'rr', you'll also need one 'cpusched' with priority defined and one without it defined using 'fifo'/'rr'. So that's either two more files or more vcpus in your XML.
OK, I'll do that. Thanks for the review, Martin

Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++- src/qemu/qemu_process.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_process.h | 5 +++- 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index cdf4173..fdfd4e9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,7 +1,7 @@ /* * qemu_driver.c: core driver methods for managing qemu guests * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -4491,6 +4491,11 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver, } virCgroupFree(&cgroup_vcpu); + + if (qemuProcessSetSchedParams(i, cpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + goto cleanup; } } else { for (i = oldvcpus - 1; i >= nvcpus; i--) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c18204b..0d69c43 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1,7 +1,7 @@ /* * qemu_process.c: QEMU process management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -2574,6 +2574,76 @@ qemuProcessSetIOThreadsAffinity(virDomainObjPtr vm) return ret; } +/* Set Scheduler parameters for vCPU or I/O threads. */ +int +qemuProcessSetSchedParams(int id, + pid_t pid, + size_t nsp, + virDomainThreadSchedParamPtr sp) +{ + int sched = 0; + size_t i = 0; + virDomainThreadSchedParamPtr s = NULL; + + for (i = 0; i < nsp; i++) { + if (id != sp[i].id) + continue; + + s = sp + i; + break; + } + + if (!s) + return 0; + + switch (s->scheduler) { + case VIR_DOMAIN_THREAD_SCHED_BATCH: + sched = SCHED_BATCH; + break; + + case VIR_DOMAIN_THREAD_SCHED_IDLE: + sched = SCHED_IDLE; + break; + + case VIR_DOMAIN_THREAD_SCHED_FIFO: + sched = SCHED_FIFO; + break; + + case VIR_DOMAIN_THREAD_SCHED_RR: + sched = SCHED_RR; + break; + + case VIR_DOMAIN_THREAD_SCHED_OTHER: + case VIR_DOMAIN_THREAD_SCHED_LAST: + return 0; + } + + return virProcessSetScheduler(pid, sched, s->priority); +} + +static int +qemuProcessSetSchedulers(virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; + + for (i = 0; i < vm->def->vcpus; i++) { + if (qemuProcessSetSchedParams(i, priv->vcpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + return -1; + } + + for (i = 0; i < vm->def->iothreads; i++) { + if (qemuProcessSetSchedParams(i, priv->iothreadpids[i], + vm->def->cputune.niothreadsched, + vm->def->cputune.iothreadsched) < 0) + return -1; + } + + return 0; +} + static int qemuProcessInitPasswords(virConnectPtr conn, virQEMUDriverPtr driver, @@ -4750,6 +4820,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuProcessSetIOThreadsAffinity(vm) < 0) goto cleanup; + VIR_DEBUG("Setting scheduler parameters"); + if (qemuProcessSetSchedulers(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting any required VM passwords"); if (qemuProcessInitPasswords(conn, driver, vm, asyncJob) < 0) goto cleanup; diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 5948ea4..2e1d393 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -1,7 +1,7 @@ /* * qemu_process.h: QEMU process management * - * Copyright (C) 2006-2012 Red Hat, Inc. + * Copyright (C) 2006-2012, 2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -104,4 +104,7 @@ virBitmapPtr qemuPrepareCpumap(virQEMUDriverPtr driver, int qemuProcessReadLog(int fd, char *buf, int buflen, int off, bool skipchar); +int qemuProcessSetSchedParams(int id, pid_t pid, size_t nsp, + virDomainThreadSchedParamPtr sp); + #endif /* __QEMU_PROCESS_H__ */ -- 2.2.1

On Tue, Jan 13, 2015 at 08:57:04AM +0100, Martin Kletzander wrote:
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++- src/qemu/qemu_process.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_process.h | 5 +++- 3 files changed, 85 insertions(+), 3 deletions(-)
Ehm, consider the following diff squashed in O:-) diff --git i/src/qemu/qemu_process.c w/src/qemu/qemu_process.c index 0d69c43..1702474 100644 --- i/src/qemu/qemu_process.c +++ w/src/qemu/qemu_process.c @@ -2627,14 +2627,14 @@ qemuProcessSetSchedulers(virDomainObjPtr vm) qemuDomainObjPrivatePtr priv = vm->privateData; size_t i = 0; - for (i = 0; i < vm->def->vcpus; i++) { + for (i = 0; i < priv->nvcpupids; i++) { if (qemuProcessSetSchedParams(i, priv->vcpupids[i], vm->def->cputune.nvcpusched, vm->def->cputune.vcpusched) < 0) return -1; } - for (i = 0; i < vm->def->iothreads; i++) { + for (i = 0; i < priv->niothreadpids; i++) { if (qemuProcessSetSchedParams(i, priv->iothreadpids[i], vm->def->cputune.niothreadsched, vm->def->cputune.iothreadsched) < 0) --

On 01/13/2015 02:57 AM, Martin Kletzander wrote:
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Could use a bit more details here.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++- src/qemu/qemu_process.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_process.h | 5 +++- 3 files changed, 85 insertions(+), 3 deletions(-)
No change to qemu_hotplug necessary??
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index cdf4173..fdfd4e9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,7 +1,7 @@ /* * qemu_driver.c: core driver methods for managing qemu guests * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -4491,6 +4491,11 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver, }
virCgroupFree(&cgroup_vcpu); + + if (qemuProcessSetSchedParams(i, cpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + goto cleanup; } } else { for (i = oldvcpus - 1; i >= nvcpus; i--) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c18204b..0d69c43 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1,7 +1,7 @@ /* * qemu_process.c: QEMU process management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -2574,6 +2574,76 @@ qemuProcessSetIOThreadsAffinity(virDomainObjPtr vm) return ret; }
+/* Set Scheduler parameters for vCPU or I/O threads. */ +int +qemuProcessSetSchedParams(int id, + pid_t pid, + size_t nsp, + virDomainThreadSchedParamPtr sp) +{ + int sched = 0; + size_t i = 0; + virDomainThreadSchedParamPtr s = NULL; + + for (i = 0; i < nsp; i++) { + if (id != sp[i].id) + continue; + + s = sp + i; + break; + }
Ug. I think this would be easier with the bitmap like cpuset. I'll wait for your feedback on 2/3... Do you plan to add a new command 'vcpusched' (like vcpupin). It's something that's desired for IOThreads, so perhaps one of us gets to reuse code :-) John
+ + if (!s) + return 0; + + switch (s->scheduler) { + case VIR_DOMAIN_THREAD_SCHED_BATCH: + sched = SCHED_BATCH; + break; + + case VIR_DOMAIN_THREAD_SCHED_IDLE: + sched = SCHED_IDLE; + break; + + case VIR_DOMAIN_THREAD_SCHED_FIFO: + sched = SCHED_FIFO; + break; + + case VIR_DOMAIN_THREAD_SCHED_RR: + sched = SCHED_RR; + break; + + case VIR_DOMAIN_THREAD_SCHED_OTHER: + case VIR_DOMAIN_THREAD_SCHED_LAST: + return 0; + } + + return virProcessSetScheduler(pid, sched, s->priority); +} + +static int +qemuProcessSetSchedulers(virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; + + for (i = 0; i < vm->def->vcpus; i++) { + if (qemuProcessSetSchedParams(i, priv->vcpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + return -1; + } + + for (i = 0; i < vm->def->iothreads; i++) { + if (qemuProcessSetSchedParams(i, priv->iothreadpids[i], + vm->def->cputune.niothreadsched, + vm->def->cputune.iothreadsched) < 0) + return -1; + } + + return 0; +} + static int qemuProcessInitPasswords(virConnectPtr conn, virQEMUDriverPtr driver, @@ -4750,6 +4820,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuProcessSetIOThreadsAffinity(vm) < 0) goto cleanup;
+ VIR_DEBUG("Setting scheduler parameters"); + if (qemuProcessSetSchedulers(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting any required VM passwords"); if (qemuProcessInitPasswords(conn, driver, vm, asyncJob) < 0) goto cleanup; diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 5948ea4..2e1d393 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -1,7 +1,7 @@ /* * qemu_process.h: QEMU process management * - * Copyright (C) 2006-2012 Red Hat, Inc. + * Copyright (C) 2006-2012, 2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -104,4 +104,7 @@ virBitmapPtr qemuPrepareCpumap(virQEMUDriverPtr driver,
int qemuProcessReadLog(int fd, char *buf, int buflen, int off, bool skipchar);
+int qemuProcessSetSchedParams(int id, pid_t pid, size_t nsp, + virDomainThreadSchedParamPtr sp); + #endif /* __QEMU_PROCESS_H__ */

On Mon, Jan 19, 2015 at 10:33:52PM -0500, John Ferlan wrote:
On 01/13/2015 02:57 AM, Martin Kletzander wrote:
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986
Could use a bit more details here.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++- src/qemu/qemu_process.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_process.h | 5 +++- 3 files changed, 85 insertions(+), 3 deletions(-)
No change to qemu_hotplug necessary??
There's nothing needed there, IIUC.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index cdf4173..fdfd4e9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,7 +1,7 @@ /* * qemu_driver.c: core driver methods for managing qemu guests * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -4491,6 +4491,11 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver, }
virCgroupFree(&cgroup_vcpu); + + if (qemuProcessSetSchedParams(i, cpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + goto cleanup; } } else { for (i = oldvcpus - 1; i >= nvcpus; i--) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c18204b..0d69c43 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1,7 +1,7 @@ /* * qemu_process.c: QEMU process management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -2574,6 +2574,76 @@ qemuProcessSetIOThreadsAffinity(virDomainObjPtr vm) return ret; }
+/* Set Scheduler parameters for vCPU or I/O threads. */ +int +qemuProcessSetSchedParams(int id, + pid_t pid, + size_t nsp, + virDomainThreadSchedParamPtr sp) +{ + int sched = 0; + size_t i = 0; + virDomainThreadSchedParamPtr s = NULL; + + for (i = 0; i < nsp; i++) { + if (id != sp[i].id) + continue; + + s = sp + i; + break; + }
Ug. I think this would be easier with the bitmap like cpuset. I'll wait for your feedback on 2/3...
Yes, this will look way better in v2.
Do you plan to add a new command 'vcpusched' (like vcpupin). It's something that's desired for IOThreads, so perhaps one of us gets to reuse code :-)
I'm not sure anyone will use that, but it there is demand, then I have no problem adding that as well.
John
+ + if (!s) + return 0; + + switch (s->scheduler) { + case VIR_DOMAIN_THREAD_SCHED_BATCH: + sched = SCHED_BATCH; + break; + + case VIR_DOMAIN_THREAD_SCHED_IDLE: + sched = SCHED_IDLE; + break; + + case VIR_DOMAIN_THREAD_SCHED_FIFO: + sched = SCHED_FIFO; + break; + + case VIR_DOMAIN_THREAD_SCHED_RR: + sched = SCHED_RR; + break; + + case VIR_DOMAIN_THREAD_SCHED_OTHER: + case VIR_DOMAIN_THREAD_SCHED_LAST: + return 0; + } + + return virProcessSetScheduler(pid, sched, s->priority); +} + +static int +qemuProcessSetSchedulers(virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; + + for (i = 0; i < vm->def->vcpus; i++) { + if (qemuProcessSetSchedParams(i, priv->vcpupids[i], + vm->def->cputune.nvcpusched, + vm->def->cputune.vcpusched) < 0) + return -1; + } + + for (i = 0; i < vm->def->iothreads; i++) { + if (qemuProcessSetSchedParams(i, priv->iothreadpids[i], + vm->def->cputune.niothreadsched, + vm->def->cputune.iothreadsched) < 0) + return -1; + } + + return 0; +} + static int qemuProcessInitPasswords(virConnectPtr conn, virQEMUDriverPtr driver, @@ -4750,6 +4820,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuProcessSetIOThreadsAffinity(vm) < 0) goto cleanup;
+ VIR_DEBUG("Setting scheduler parameters"); + if (qemuProcessSetSchedulers(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting any required VM passwords"); if (qemuProcessInitPasswords(conn, driver, vm, asyncJob) < 0) goto cleanup; diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 5948ea4..2e1d393 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -1,7 +1,7 @@ /* * qemu_process.h: QEMU process management * - * Copyright (C) 2006-2012 Red Hat, Inc. + * Copyright (C) 2006-2012, 2015 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -104,4 +104,7 @@ virBitmapPtr qemuPrepareCpumap(virQEMUDriverPtr driver,
int qemuProcessReadLog(int fd, char *buf, int buflen, int off, bool skipchar);
+int qemuProcessSetSchedParams(int id, pid_t pid, size_t nsp, + virDomainThreadSchedParamPtr sp); + #endif /* __QEMU_PROCESS_H__ */

On 01/20/2015 09:48 AM, Martin Kletzander wrote: <...snip...>
Do you plan to add a new command 'vcpusched' (like vcpupin). It's something that's desired for IOThreads, so perhaps one of us gets to reuse code :-)
I'm not sure anyone will use that, but it there is demand, then I have no problem adding that as well.
With this implementation someone can only edit the XML in order to enable/add; whereas, a 'virsh vcpusched {scheduler} [vcpuid] [priority]' would be much more simple... and of course enables a python script API. Someone with 1-4 vCPU's probably doesn't care. Someone with more probably starts caring when they've cut-n-pasted too much. Rome wasn't built in a day, so not required for this pass, but something to consider. John
participants (2)
-
John Ferlan
-
Martin Kletzander