[libvirt] [PATCH 0/6] Add deadline scheduler

The policy SCHED_DEADLINE is available since kernel 3.14 (and most likely backported to older RT_PREEMPT kernels). It is safer to use than fifo or round robin policies due to only limiting part of cpu time for the RT process, leading to lack of lockups of the host. The series adds new vcpusched/iothreadsched called 'deadline' that activates SCHED_DEADLINE for given process. As the scheduler is linux specific, extra implementation was required - it is not possible to use sched_setscheduler, sched_setattr syscall must be used. Martin Polednik (6): conf: add entries for deadline scheduler util: allow virProcessSetScheduler to set SCHED_DEADLINE virDomainFormatSchedDef: factor out subset code conf: add deadline scheduler schema: add deadline scheduler docs: mention deadline scheduler in vcpusched docs/formatdomain.html.in | 15 ++-- docs/schemas/domaincommon.rng | 16 +++++ src/conf/domain_conf.c | 154 +++++++++++++++++++++++++++++++++++------- src/conf/domain_conf.h | 3 + src/qemu/qemu_process.c | 3 +- src/util/virprocess.c | 82 +++++++++++++++++++++- src/util/virprocess.h | 29 +++++++- 7 files changed, 268 insertions(+), 34 deletions(-) -- 2.8.1

Deadline scheduler, or SCHED_DEADLINE, is a new realtime scheduler added to Linux 3.14. In order to support it as a possible value in vcpusched/iothreadsched, we have to add it to scheduler related structures. --- src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 3 +++ src/util/virprocess.c | 12 +++++++++++- src/util/virprocess.h | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a233c0c..ca0fdfd 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23125,6 +23125,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, case VIR_PROC_POLICY_NONE: case VIR_PROC_POLICY_BATCH: case VIR_PROC_POLICY_IDLE: + case VIR_PROC_POLICY_DEADLINE: case VIR_PROC_POLICY_LAST: currentMap = schedMap; break; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 541b600..67b9e3e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1840,6 +1840,9 @@ typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; struct _virDomainThreadSchedParam { virProcessSchedPolicy policy; int priority; + unsigned long long runtime; + unsigned long long deadline; + unsigned long long period; }; typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 718c4a2..d68800b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -34,6 +34,12 @@ #endif #if HAVE_SCHED_SETSCHEDULER # include <sched.h> +# ifdef __linux__ +# include <linux/sched.h> +# endif +# ifndef SCHED_DEADLINE +# define SCHED_DEADLINE 6 +# endif #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || HAVE_BSD_CPU_AFFINITY @@ -111,7 +117,8 @@ VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, "batch", "idle", "fifo", - "rr"); + "rr", + "deadline"); /** * virProcessTranslateStatus: @@ -1204,6 +1211,9 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) case VIR_PROC_POLICY_RR: return SCHED_RR; + case VIR_PROC_POLICY_DEADLINE: + return SCHED_DEADLINE; + case VIR_PROC_POLICY_LAST: /* nada */ break; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 04e9802..1eac3e6 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -34,6 +34,7 @@ typedef enum { VIR_PROC_POLICY_IDLE, VIR_PROC_POLICY_FIFO, VIR_PROC_POLICY_RR, + VIR_PROC_POLICY_DEADLINE, VIR_PROC_POLICY_LAST } virProcessSchedPolicy; -- 2.8.1

As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period. In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm, /* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup; ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process"); #ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif +# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif + # ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h> static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0; + if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; + + ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__ # include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif # include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy; +# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif + VIR_ENUM_DECL(virProcessSchedPolicy); char * @@ -93,6 +116,9 @@ int virProcessRunInMountNamespace(pid_t pid, int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority); + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period); #endif /* __VIR_PROCESS_H__ */ -- 2.8.1

On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif
+# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif +
At least ARMs, POWERs and i386 should be defined from the start so that tests in CI don't fail right away. And other platforms should error out right away. But honestly, I'd drop this compatibility code. What you're adding here is not a bleeding-edge feature.
# ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif
VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0;
+ if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; +
C99 initialization is forbidden in libvirt (but we have no syntax-check rule for it), move it to the top of the code block.
+ ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif
# include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy;
+# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif +
I don't like this redefinition. Not only because it uses __uXX instead of uint##_t. I think we should have a configure.ac check for HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error out if it's not available. If we're doing that, we can safely do the same thing with SCHED_DEADLINE and not have to carry this code on for decades. Otherwise I like it.
VIR_ENUM_DECL(virProcessSchedPolicy);
char * @@ -93,6 +116,9 @@ int virProcessRunInMountNamespace(pid_t pid,
int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority); + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period);
#endif /* __VIR_PROCESS_H__ */ -- 2.8.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 10/11/16 16:21 +0100, Martin Kletzander wrote:
On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif
+# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif +
At least ARMs, POWERs and i386 should be defined from the start so that tests in CI don't fail right away. And other platforms should error out right away. But honestly, I'd drop this compatibility code. What you're adding here is not a bleeding-edge feature.
It maybe makes sense to virReportSystemError in case it's not defined -- although the feature isn't cutting edge, older kernels (such as 3.10 on CentOS) do not have the syscall (or the number) unless they're rebases with RT_PREEMPT patch.
# ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif
VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0;
+ if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; +
C99 initialization is forbidden in libvirt (but we have no syntax-check rule for it), move it to the top of the code block.
+ ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif
# include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy;
+# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif +
I don't like this redefinition. Not only because it uses __uXX instead of uint##_t. I think we should have a configure.ac check for HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error out if it's not available. If we're doing that, we can safely do the same thing with SCHED_DEADLINE and not have to carry this code on for decades.
The struct doesn't seem to be present in (g)libc or any other library. I'm afraid we can't therefore simply error out. As for the types, these should be fixable.
Otherwise I like it.
VIR_ENUM_DECL(virProcessSchedPolicy);
char * @@ -93,6 +116,9 @@ int virProcessRunInMountNamespace(pid_t pid,
int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority); + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period);
#endif /* __VIR_PROCESS_H__ */ -- 2.8.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On Tue, Nov 15, 2016 at 03:51:22PM +0100, Martin Polednik wrote:
On 10/11/16 16:21 +0100, Martin Kletzander wrote:
On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif
+# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif +
At least ARMs, POWERs and i386 should be defined from the start so that tests in CI don't fail right away. And other platforms should error out right away. But honestly, I'd drop this compatibility code. What you're adding here is not a bleeding-edge feature.
It maybe makes sense to virReportSystemError in case it's not defined -- although the feature isn't cutting edge, older kernels (such as 3.10 on CentOS) do not have the syscall (or the number) unless they're rebases with RT_PREEMPT patch.
# ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif
VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0;
+ if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; +
C99 initialization is forbidden in libvirt (but we have no syntax-check rule for it), move it to the top of the code block.
+ ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif
# include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy;
+# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif +
I don't like this redefinition. Not only because it uses __uXX instead of uint##_t. I think we should have a configure.ac check for HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error out if it's not available. If we're doing that, we can safely do the same thing with SCHED_DEADLINE and not have to carry this code on for decades.
The struct doesn't seem to be present in (g)libc or any other library. I'm afraid we can't therefore simply error out.
Can we find out why it isn't present in glibc. Is it simply that it is too new ? If so, IMHO desirable to just wait for glibc to support it and make libvirt code conditional on the new version. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

On 15/11/16 15:02 +0000, Daniel P. Berrange wrote:
On Tue, Nov 15, 2016 at 03:51:22PM +0100, Martin Polednik wrote:
On 10/11/16 16:21 +0100, Martin Kletzander wrote:
On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif
+# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif +
At least ARMs, POWERs and i386 should be defined from the start so that tests in CI don't fail right away. And other platforms should error out right away. But honestly, I'd drop this compatibility code. What you're adding here is not a bleeding-edge feature.
It maybe makes sense to virReportSystemError in case it's not defined -- although the feature isn't cutting edge, older kernels (such as 3.10 on CentOS) do not have the syscall (or the number) unless they're rebases with RT_PREEMPT patch.
# ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif
VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0;
+ if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; +
C99 initialization is forbidden in libvirt (but we have no syntax-check rule for it), move it to the top of the code block.
+ ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif
# include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy;
+# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif +
I don't like this redefinition. Not only because it uses __uXX instead of uint##_t. I think we should have a configure.ac check for HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error out if it's not available. If we're doing that, we can safely do the same thing with SCHED_DEADLINE and not have to carry this code on for decades.
The struct doesn't seem to be present in (g)libc or any other library. I'm afraid we can't therefore simply error out.
Can we find out why it isn't present in glibc. Is it simply that it is too new ? If so, IMHO desirable to just wait for glibc to support it and make libvirt code conditional on the new version.
My feeling is the novelty combined with limited uses in anything but RT applications. That being said, I was unable to find anything mentioning sched_attr and glibc together. I'm slightly afraid of the time for the whole glibc->libvirt->management layer dance.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

On Tue, Nov 15, 2016 at 04:26:58PM +0100, Martin Polednik wrote:
On 15/11/16 15:02 +0000, Daniel P. Berrange wrote:
On Tue, Nov 15, 2016 at 03:51:22PM +0100, Martin Polednik wrote:
On 10/11/16 16:21 +0100, Martin Kletzander wrote:
On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++-- src/util/virprocess.h | 28 +++++++++++++++++++- 3 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1b67aee..91a635c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index d68800b..cd1cc9b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -70,6 +70,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process"); # endif # endif
+# ifndef __NR_sched_setattr +# if defined(__x86_64__) +# define __NR_sched_setattr 314 +# endif +# endif +
At least ARMs, POWERs and i386 should be defined from the start so that tests in CI don't fail right away. And other platforms should error out right away. But honestly, I'd drop this compatibility code. What you're adding here is not a bleeding-edge feature.
It maybe makes sense to virReportSystemError in case it's not defined -- although the feature isn't cutting edge, older kernels (such as 3.10 on CentOS) do not have the syscall (or the number) unless they're rebases with RT_PREEMPT patch.
virReportSystemError() will error out at runtime, which is anyway the only proper thing to do when the kernel doesn't support it. But this redefinition makes it available when *building* on older kernels. Whoever does that (older distros) can handle that themselves (downstream) if required.
# ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) _("Namespaces are not supported on this platform.")); return -1; } + +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} #endif
VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, @@ -1225,7 +1247,10 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; int pol = virProcessSchedTranslatePolicy(policy); @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid, if (!policy) return 0;
+ if (pol == SCHED_DEADLINE) { + int ret = 0; + + if (runtime < 1024 || deadline < 1024 || period < 24) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), runtime, deadline, period); + return -1; + } + + if (!(runtime <= deadline && deadline <= period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + runtime, deadline, period); + return -1; + } + + struct sched_attr attrs = { + .size = sizeof(attrs), + .sched_policy = SCHED_DEADLINE, + .sched_flags = 0, + .sched_nice = 0, + .sched_priority = 0, + .sched_runtime = runtime, + .sched_deadline = deadline, + .sched_period = period, + }; +
C99 initialization is forbidden in libvirt (but we have no syntax-check rule for it), move it to the top of the code block.
+ ret = sched_setattr(pid, &attrs, 0); + if (ret != 0) { + virReportSystemError(errno, + _("Cannot set scheduler parameters for pid %d"), + pid); + return -1; + } + + return 0; + } + if (pol == SCHED_FIFO || pol == SCHED_RR) { int min = 0; int max = 0; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..e6914e7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -23,6 +23,9 @@ # define __VIR_PROCESS_H__
# include <sys/types.h> +# ifdef __linux__ +# include <linux/types.h> +# endif
# include "internal.h" # include "virbitmap.h" @@ -39,6 +42,26 @@ typedef enum { VIR_PROC_POLICY_LAST } virProcessSchedPolicy;
+# ifdef __linux__ +struct sched_attr { + __u32 size; + + __u32 sched_policy; + __u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + __s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + __u32 sched_priority; + + /* SCHED_DEADLINE (nsec) */ + __u64 sched_runtime; + __u64 sched_deadline; + __u64 sched_period; +}; +# endif +
I don't like this redefinition. Not only because it uses __uXX instead of uint##_t. I think we should have a configure.ac check for HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error out if it's not available. If we're doing that, we can safely do the same thing with SCHED_DEADLINE and not have to carry this code on for decades.
The struct doesn't seem to be present in (g)libc or any other library. I'm afraid we can't therefore simply error out.
Can we find out why it isn't present in glibc. Is it simply that it is too new ? If so, IMHO desirable to just wait for glibc to support it and make libvirt code conditional on the new version.
My feeling is the novelty combined with limited uses in anything but RT applications. That being said, I was unable to find anything mentioning sched_attr and glibc together.
It's not that new, it's just that it's linux-specific, not POSIX and the only added value compared to sched_setscheduler() is the sched_deadline addition. So most probably nobody bothered. As I see, whoever uses that (I only found strace and chrt from what I have installed) redefine it themselves according to the sched_setattr(2) man page. So we'll probably have to do that ourselves anyway. Also fixing it in glibc would make it work only on glibc platforms, but we're now nicely compatible with other libc implementations (I mean I don't recall anyone describing any new problem on the list recently).
I'm slightly afraid of the time for the whole glibc->libvirt->management layer dance.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

The code within the function is too specific for priority attribute of RT schedulers. To allow addition of schedulers that group by different properties, we factor out the logic to calculate cpu subset. Instead of comparing by priority, the new code accepts comparator for the 2 sched structs. --- src/conf/domain_conf.c | 77 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ca0fdfd..0ed755c 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23058,6 +23058,47 @@ virDomainDefHasCapabilitiesFeatures(virDomainDefPtr def) return false; } +static bool +virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + bool ret = false; + ret = (baseSched->priority == sched->priority); + + return ret; +} + +static virDomainThreadSchedParamPtr +virDomainSchedSubsetCharacteristic(virDomainDefPtr def, + virBitmapPtr schedMap, + virBitmapPtr subsetMap, + virDomainThreadSchedParamPtr (*func)(virDomainDefPtr, unsigned int), + bool (*comparator)(virDomainThreadSchedParamPtr, + virDomainThreadSchedParamPtr)) +{ + ssize_t nextidx; + virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched = NULL; + + virBitmapClearAll(subsetMap); + + /* we need to find a subset of vCPUs with the given scheduler + * that share the priority */ + nextidx = virBitmapNextSetBit(schedMap, -1); + if (!(sched = func(def, nextidx))) + return NULL; + + baseSched = sched; + ignore_value(virBitmapSetBit(subsetMap, nextidx)); + + while ((nextidx = virBitmapNextSetBit(schedMap, nextidx)) > -1) { + sched = func(def, nextidx); + if (sched && comparator(baseSched, sched)) + ignore_value(virBitmapSetBit(subsetMap, nextidx)); + } + + return baseSched; +} /** * virDomainFormatSchedDef: @@ -23082,8 +23123,9 @@ virDomainFormatSchedDef(virDomainDefPtr def, virBitmapPtr resourceMap) { virBitmapPtr schedMap = NULL; - virBitmapPtr prioMap = NULL; + virBitmapPtr subsetMap = NULL; virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched; char *tmp = NULL; ssize_t next; size_t i; @@ -23098,7 +23140,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, */ if (!(schedMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN)) || - !(prioMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) + !(subsetMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) goto cleanup; for (i = VIR_PROC_POLICY_NONE + 1; i < VIR_PROC_POLICY_LAST; i++) { @@ -23117,9 +23159,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, * have them */ while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; - ssize_t nextprio; bool hasPriority = false; - int priority = 0; + baseSched = NULL; switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: @@ -23132,25 +23173,17 @@ virDomainFormatSchedDef(virDomainDefPtr def, case VIR_PROC_POLICY_FIFO: case VIR_PROC_POLICY_RR: - virBitmapClearAll(prioMap); hasPriority = true; - /* we need to find a subset of vCPUs with the given scheduler - * that share the priority */ - nextprio = virBitmapNextSetBit(schedMap, -1); - if (!(sched = func(def, nextprio))) + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + subsetMap, + func, + virDomainSchedPriorityComparator); + if (baseSched == NULL) goto cleanup; - priority = sched->priority; - ignore_value(virBitmapSetBit(prioMap, nextprio)); - - while ((nextprio = virBitmapNextSetBit(schedMap, nextprio)) > -1) { - sched = func(def, nextprio); - if (sched && sched->priority == priority) - ignore_value(virBitmapSetBit(prioMap, nextprio)); - } - - currentMap = prioMap; + currentMap = subsetMap; break; } @@ -23164,8 +23197,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, virProcessSchedPolicyTypeToString(i)); VIR_FREE(tmp); - if (hasPriority) - virBufferAsprintf(buf, " priority='%d'", priority); + if (hasPriority && baseSched != NULL) + virBufferAsprintf(buf, " priority='%d'", baseSched->priority); virBufferAddLit(buf, "/>\n"); @@ -23178,7 +23211,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, cleanup: virBitmapFree(schedMap); - virBitmapFree(prioMap); + virBitmapFree(subsetMap); return ret; } -- 2.8.1

On Mon, Nov 07, 2016 at 10:01:14AM +0100, Martin Polednik wrote:
The code within the function is too specific for priority attribute of RT schedulers. To allow addition of schedulers that group by different properties, we factor out the logic to calculate cpu subset. Instead of comparing by priority, the new code accepts comparator for the 2 sched structs. --- src/conf/domain_conf.c | 77 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 22 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ca0fdfd..0ed755c 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23058,6 +23058,47 @@ virDomainDefHasCapabilitiesFeatures(virDomainDefPtr def) return false; }
+static bool +virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + bool ret = false; + ret = (baseSched->priority == sched->priority); + + return ret;
Just return (baseSched->priority == sched->priority); all else is not needed here.
+} + +static virDomainThreadSchedParamPtr +virDomainSchedSubsetCharacteristic(virDomainDefPtr def, + virBitmapPtr schedMap, + virBitmapPtr subsetMap, + virDomainThreadSchedParamPtr (*func)(virDomainDefPtr, unsigned int), + bool (*comparator)(virDomainThreadSchedParamPtr, + virDomainThreadSchedParamPtr)) +{ + ssize_t nextidx; + virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched = NULL; + + virBitmapClearAll(subsetMap); + + /* we need to find a subset of vCPUs with the given scheduler + * that share the priority */ + nextidx = virBitmapNextSetBit(schedMap, -1); + if (!(sched = func(def, nextidx))) + return NULL; + + baseSched = sched; + ignore_value(virBitmapSetBit(subsetMap, nextidx)); + + while ((nextidx = virBitmapNextSetBit(schedMap, nextidx)) > -1) { + sched = func(def, nextidx); + if (sched && comparator(baseSched, sched)) + ignore_value(virBitmapSetBit(subsetMap, nextidx)); + } + + return baseSched; +}
/** * virDomainFormatSchedDef: @@ -23082,8 +23123,9 @@ virDomainFormatSchedDef(virDomainDefPtr def, virBitmapPtr resourceMap) { virBitmapPtr schedMap = NULL; - virBitmapPtr prioMap = NULL; + virBitmapPtr subsetMap = NULL; virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched; char *tmp = NULL; ssize_t next; size_t i; @@ -23098,7 +23140,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, */
if (!(schedMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN)) || - !(prioMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) + !(subsetMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) goto cleanup;
for (i = VIR_PROC_POLICY_NONE + 1; i < VIR_PROC_POLICY_LAST; i++) { @@ -23117,9 +23159,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, * have them */ while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; - ssize_t nextprio; bool hasPriority = false; - int priority = 0; + baseSched = NULL;
switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: @@ -23132,25 +23173,17 @@ virDomainFormatSchedDef(virDomainDefPtr def,
case VIR_PROC_POLICY_FIFO: case VIR_PROC_POLICY_RR: - virBitmapClearAll(prioMap); hasPriority = true;
- /* we need to find a subset of vCPUs with the given scheduler - * that share the priority */ - nextprio = virBitmapNextSetBit(schedMap, -1); - if (!(sched = func(def, nextprio))) + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + subsetMap, + func, + virDomainSchedPriorityComparator); + if (baseSched == NULL) goto cleanup;
- priority = sched->priority; - ignore_value(virBitmapSetBit(prioMap, nextprio)); - - while ((nextprio = virBitmapNextSetBit(schedMap, nextprio)) > -1) { - sched = func(def, nextprio); - if (sched && sched->priority == priority) - ignore_value(virBitmapSetBit(prioMap, nextprio)); - } - - currentMap = prioMap; + currentMap = subsetMap; break;
This looks like it was factored out but with the combination of renaming the variable with the same patch makes it weird to review.
@@ -23164,8 +23197,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, virProcessSchedPolicyTypeToString(i)); VIR_FREE(tmp);
- if (hasPriority) - virBufferAsprintf(buf, " priority='%d'", priority); + if (hasPriority && baseSched != NULL) + virBufferAsprintf(buf, " priority='%d'", baseSched->priority);
virBufferAddLit(buf, "/>\n");
@@ -23178,7 +23211,7 @@ virDomainFormatSchedDef(virDomainDefPtr def,
cleanup: virBitmapFree(schedMap); - virBitmapFree(prioMap); + virBitmapFree(subsetMap); return ret; }
-- 2.8.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

As the code for changing task scheduler is now able to choose deadline scheduler, we can update domain configuration to parse the scheduler. The subset of vcpus or iothreads where the scheduler is configured can be calculated by simply adding comparator based on SCHED_DEADLINE attributes (runtime, deadline, period) and correctly passing it within virDomainFormatSchedDef. --- src/conf/domain_conf.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 0ed755c..a4eb2e1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -15710,7 +15710,10 @@ static virBitmapPtr virDomainSchedulerParse(xmlNodePtr node, const char *name, virProcessSchedPolicy *policy, - int *priority) + int *priority, + unsigned long long *runtime, + unsigned long long *deadline, + unsigned long long *period) { virBitmapPtr ret = NULL; char *tmp = NULL; @@ -15766,6 +15769,42 @@ virDomainSchedulerParse(xmlNodePtr node, VIR_FREE(tmp); } + if (pol == VIR_PROC_POLICY_DEADLINE) { + if (!(tmp = virXMLPropString(node, "runtime"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler runtime")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, runtime) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element runtime")); + goto error; + } + + if (!(tmp = virXMLPropString(node, "deadline"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler deadline")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, deadline) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element deadline")); + goto error; + } + + if (!(tmp = virXMLPropString(node, "period"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler period")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, period) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element period")); + goto error; + } + VIR_FREE(tmp); + } + return ret; error: @@ -15786,9 +15825,13 @@ virDomainThreadSchedParseHelper(xmlNodePtr node, virDomainThreadSchedParamPtr sched; virProcessSchedPolicy policy; int priority; + unsigned long long runtime; + unsigned long long deadline; + unsigned long long period; int ret = -1; - if (!(map = virDomainSchedulerParse(node, name, &policy, &priority))) + if (!(map = virDomainSchedulerParse(node, name, &policy, &priority, + &runtime, &deadline, &period))) goto cleanup; while ((next = virBitmapNextSetBit(map, next)) > -1) { @@ -15804,6 +15847,9 @@ virDomainThreadSchedParseHelper(xmlNodePtr node, sched->policy = policy; sched->priority = priority; + sched->runtime = runtime; + sched->deadline = deadline; + sched->period = period; } ret = 0; @@ -23068,6 +23114,18 @@ virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, return ret; } +static bool +virDomainSchedDeadlineComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + bool ret = false; + ret = (baseSched->runtime == sched->priority && + baseSched->deadline == sched->deadline && + baseSched->period == sched->period); + + return ret; +} + static virDomainThreadSchedParamPtr virDomainSchedSubsetCharacteristic(virDomainDefPtr def, virBitmapPtr schedMap, @@ -23160,13 +23218,13 @@ virDomainFormatSchedDef(virDomainDefPtr def, while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; bool hasPriority = false; + bool isDeadline = false; baseSched = NULL; switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: case VIR_PROC_POLICY_BATCH: case VIR_PROC_POLICY_IDLE: - case VIR_PROC_POLICY_DEADLINE: case VIR_PROC_POLICY_LAST: currentMap = schedMap; break; @@ -23185,6 +23243,19 @@ virDomainFormatSchedDef(virDomainDefPtr def, currentMap = subsetMap; break; + case VIR_PROC_POLICY_DEADLINE: + isDeadline = true; + + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + subsetMap, + func, + virDomainSchedDeadlineComparator); + if (baseSched == NULL) + goto cleanup; + + currentMap = subsetMap; + break; } /* now we have the complete group */ @@ -23199,6 +23270,9 @@ virDomainFormatSchedDef(virDomainDefPtr def, if (hasPriority && baseSched != NULL) virBufferAsprintf(buf, " priority='%d'", baseSched->priority); + if (isDeadline && baseSched != NULL) + virBufferAsprintf(buf, " runtime='%llu' deadline='%llu' period='%llu'", + baseSched->runtime, baseSched->deadline, baseSched->period); virBufferAddLit(buf, "/>\n"); -- 2.8.1

Can be used in the same way as other schedulers, only requiring additional parameters. --- docs/schemas/domaincommon.rng | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 19d45fd..1461d34 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -851,6 +851,22 @@ <ref name="unsignedShort"/> </attribute> </group> + <group> + <attribute name="scheduler"> + <choice> + <value>deadline</value> + </choice> + </attribute> + <attribute name="runtime"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="deadline"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="period"> + <ref name="unsignedLong"/> + </attribute> + </group> </choice> </define> -- 2.8.1

--- docs/formatdomain.html.in | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 11b3330..1442190 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -799,12 +799,12 @@ <dd> The optional <code>vcpusched</code> elements specifies the scheduler type (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, - <code>rr</code>) for particular vCPU/IOThread threads (based on - <code>vcpus</code> and <code>iothreads</code>, leaving out - <code>vcpus</code>/<code>iothreads</code> sets the default). Valid - <code>vcpus</code> values start at 0 through one less than the - number of vCPU's defined for the domain. Valid <code>iothreads</code> - values are described in the <code>iothreadids</code> + <code>rr</code>, <code>deadline</code>) for particular vCPU/IOThread + threads (based on <code>vcpus</code> and <code>iothreads</code>, + leaving out <code>vcpus</code>/<code>iothreads</code> sets the + default). Valid <code>vcpus</code> values start at 0 through one less + than the number of vCPU's defined for the domain. Valid + <code>iothreads</code> values are described in the <code>iothreadids</code> <a href="#elementsIOThreadsAllocation"><code>description</code></a>. If no <code>iothreadids</code> are defined, then libvirt numbers IOThreads from 1 to the number of <code>iothreads</code> available @@ -812,6 +812,9 @@ <code>rr</code>), priority must be specified as well (and is ignored for non-real-time ones). The value range for the priority depends on the host kernel (usually 1-99). + For deadline real-time scheduler, additional parameters runtime, + deadline and period must be specified. The value of these parameters is + specified in nanoseconds, where minimum is 1024 (1 usec). <span class="since">Since 1.2.13</span> </dd> -- 2.8.1

On Mon, Nov 07, 2016 at 10:01:11AM +0100, Martin Polednik wrote:
The policy SCHED_DEADLINE is available since kernel 3.14 (and most likely backported to older RT_PREEMPT kernels). It is safer to use than fifo or round robin policies due to only limiting part of cpu time for the RT process, leading to lack of lockups of the host.
The series adds new vcpusched/iothreadsched called 'deadline' that activates SCHED_DEADLINE for given process. As the scheduler is linux specific, extra implementation was required - it is not possible to use sched_setscheduler, sched_setattr syscall must be used.
Martin Polednik (6): conf: add entries for deadline scheduler util: allow virProcessSetScheduler to set SCHED_DEADLINE virDomainFormatSchedDef: factor out subset code
conf: add deadline scheduler schema: add deadline scheduler docs: mention deadline scheduler in vcpusched
These three should be merged together. Also you should error out with unsupported values (e.g. deadline < runtime) in virDomainDefPostParse() so that users know it right away. That way we can change it later, but not when everything is acceptable now. Otherwise looks fine from a quick look. Have a nice day, Martin

The policy SCHED_DEADLINE is available since kernel 3.14 (and most likely backported to older RT_PREEMPT kernels). It is safer to use than fifo or round robin policies due to only limiting part of cpu time for the RT process, leading to lack of lockups of the host. The series adds new vcpusched/iothreadsched called 'deadline' that activates SCHED_DEADLINE for given process. As the scheduler is linux specific, extra implementation was required - it is not possible to use sched_setscheduler, sched_setattr syscall must be used. Changes in v2: * using newly merged SCHED_* facilities * split renaming and refactoring in virDomainFormatSchedDef * squashed conf additions * moved error handling to *PostParse Martin Polednik (5): conf: add entries for deadline scheduler util: allow virProcessSetScheduler to set SCHED_DEADLINE virDomainFormatSchedDef: factor out subset code virDomainFormatSchedDef: rename prioMap and nextprio conf: add deadline scheduler docs/formatdomain.html.in | 15 ++-- docs/schemas/domaincommon.rng | 16 ++++ src/conf/domain_conf.c | 179 ++++++++++++++++++++++++++++++++++++------ src/conf/domain_conf.h | 3 + src/qemu/qemu_process.c | 3 +- src/util/virprocess.c | 50 ++++++++++-- src/util/virprocess.h | 26 +++++- 7 files changed, 256 insertions(+), 36 deletions(-) -- 2.8.1

Deadline scheduler, or SCHED_DEADLINE, is a new realtime scheduler added to Linux 3.14. In order to support it as a possible value in vcpusched/iothreadsched, we have to add it to scheduler related structures. --- src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 3 +++ src/util/virprocess.c | 13 ++++++++++++- src/util/virprocess.h | 1 + 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 6e008e2..2dd5e6f 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23132,6 +23132,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, case VIR_PROC_POLICY_NONE: case VIR_PROC_POLICY_BATCH: case VIR_PROC_POLICY_IDLE: + case VIR_PROC_POLICY_DEADLINE: case VIR_PROC_POLICY_LAST: currentMap = schedMap; break; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 541b600..67b9e3e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1840,6 +1840,9 @@ typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; struct _virDomainThreadSchedParam { virProcessSchedPolicy policy; int priority; + unsigned long long runtime; + unsigned long long deadline; + unsigned long long period; }; typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 3cacc89..9b4a555 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -34,6 +34,9 @@ #endif #if HAVE_SCHED_SETSCHEDULER # include <sched.h> +# ifdef __linux__ +# include <linux/sched.h> +# endif #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || HAVE_BSD_CPU_AFFINITY @@ -111,7 +114,8 @@ VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST, "batch", "idle", "fifo", - "rr"); + "rr", + "deadline"); /** * virProcessTranslateStatus: @@ -1212,6 +1216,13 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) case VIR_PROC_POLICY_RR: return SCHED_RR; + case VIR_PROC_POLICY_DEADLINE: +# ifdef SCHED_DEADLINE + return SCHED_DEADLINE; +# else + return -1; +# endif + case VIR_PROC_POLICY_LAST: /* nada */ break; diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 04e9802..1eac3e6 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -34,6 +34,7 @@ typedef enum { VIR_PROC_POLICY_IDLE, VIR_PROC_POLICY_FIFO, VIR_PROC_POLICY_RR, + VIR_PROC_POLICY_DEADLINE, VIR_PROC_POLICY_LAST } virProcessSchedPolicy; -- 2.8.1

As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period. In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 37 +++++++++++++++++++++++++++++++++---- src/util/virprocess.h | 25 ++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3552a31..e984ccd 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm, /* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup; ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 9b4a555..acf4d6b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -67,6 +67,7 @@ VIR_LOG_INIT("util.process"); #ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -87,10 +88,8 @@ VIR_LOG_INIT("util.process"); # define __NR_setns 339 # endif # endif - # ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h> static inline int setns(int fd, int nstype) { @@ -100,6 +99,24 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +# if defined(SCHED_DEADLINE) && defined(__NR_sched_setattr) +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} +# else +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} +# endif #else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -1234,10 +1251,16 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; + struct sched_attr attrs = { sizeof(attrs), SCHED_DEADLINE, 0, 0, 0, + runtime, deadline, period }; int pol = virProcessSchedTranslatePolicy(policy); + int ret = 0; VIR_DEBUG("pid=%lld, policy=%d, priority=%u", (long long) pid, policy, priority); @@ -1280,7 +1303,13 @@ virProcessSetScheduler(pid_t pid, param.sched_priority = priority; } - if (sched_setscheduler(pid, pol, ¶m) < 0) { + if (pol == SCHED_DEADLINE) { + ret = sched_setattr(pid, &attrs, 0); + } else { + ret = sched_setscheduler(pid, pol, ¶m); + } + + if (ret < 0) { virReportSystemError(errno, _("Cannot set scheduler parameters for pid %lld"), (long long) pid); diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..02522b4 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -41,6 +41,26 @@ typedef enum { VIR_ENUM_DECL(virProcessSchedPolicy); +# ifdef SCHED_DEADLINE +struct sched_attr { + uint32_t size; + + uint32_t sched_policy; + uint64_t sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + uint32_t sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + uint32_t sched_priority; + + /* SCHED_DEADLINE (nsec) */ + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; +# endif + char * virProcessTranslateStatus(int status); @@ -93,6 +113,9 @@ int virProcessRunInMountNamespace(pid_t pid, int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority); + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period); #endif /* __VIR_PROCESS_H__ */ -- 2.8.1

On Mon, Nov 21, 2016 at 01:56:05PM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 37 +++++++++++++++++++++++++++++++++---- src/util/virprocess.h | 25 ++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3552a31..e984ccd 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
/* Set scheduler type and priority. */ if (sched && - virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + virProcessSetScheduler(pid, sched->policy, sched->priority, + sched->runtime, sched->deadline, sched->period) < 0) goto cleanup;
ret = 0; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 9b4a555..acf4d6b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -67,6 +67,7 @@ VIR_LOG_INIT("util.process");
#ifdef __linux__ +# include <sys/syscall.h> /* * Workaround older glibc. While kernel may support the setns * syscall, the glibc wrapper might not exist. If that's the @@ -87,10 +88,8 @@ VIR_LOG_INIT("util.process"); # define __NR_setns 339 # endif # endif - # ifndef HAVE_SETNS # if defined(__NR_setns) -# include <sys/syscall.h>
static inline int setns(int fd, int nstype) { @@ -100,6 +99,24 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +# if defined(SCHED_DEADLINE) && defined(__NR_sched_setattr) +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} +# else +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} +# endif
As I said, no need to do that, just use AC_CHECK_FUNCS_ONCE we already have in configure.ac. Don't redefine it, just error out if it's not available.
#else /* !__linux__ */ static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) { @@ -1234,10 +1251,16 @@ virProcessSchedTranslatePolicy(virProcessSchedPolicy policy) int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority) + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period) { struct sched_param param = {0}; + struct sched_attr attrs = { sizeof(attrs), SCHED_DEADLINE, 0, 0, 0, + runtime, deadline, period }; int pol = virProcessSchedTranslatePolicy(policy); + int ret = 0;
VIR_DEBUG("pid=%lld, policy=%d, priority=%u", (long long) pid, policy, priority); @@ -1280,7 +1303,13 @@ virProcessSetScheduler(pid_t pid, param.sched_priority = priority; }
- if (sched_setscheduler(pid, pol, ¶m) < 0) { + if (pol == SCHED_DEADLINE) { + ret = sched_setattr(pid, &attrs, 0); + } else { + ret = sched_setscheduler(pid, pol, ¶m); + } + + if (ret < 0) { virReportSystemError(errno, _("Cannot set scheduler parameters for pid %lld"), (long long) pid); diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 1eac3e6..02522b4 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -41,6 +41,26 @@ typedef enum {
VIR_ENUM_DECL(virProcessSchedPolicy);
+# ifdef SCHED_DEADLINE +struct sched_attr { + uint32_t size; + + uint32_t sched_policy; + uint64_t sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + uint32_t sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + uint32_t sched_priority; + + /* SCHED_DEADLINE (nsec) */ + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; +# endif +
There's no need to expose this in a header file, move it to virprocess.c.
char * virProcessTranslateStatus(int status);
@@ -93,6 +113,9 @@ int virProcessRunInMountNamespace(pid_t pid,
int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, - int priority); + int priority, + unsigned long long runtime, + unsigned long long deadline, + unsigned long long period);
#endif /* __VIR_PROCESS_H__ */ -- 2.8.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On Mon, Nov 21, 2016 at 02:23:21PM +0100, Martin Kletzander wrote:
On Mon, Nov 21, 2016 at 01:56:05PM +0100, Martin Polednik wrote:
As sched_deadline is linux specific, it is not set through sched_setscheduler but rather the sched_setattr syscall. Additionally, the scheduler has new set of parameters: runtime, deadline and period.
In this part of the series, we extend virProcessSetScheduler to accommodate the additional parameters and use sched_setattr syscall to set SCHED_DEADLINE. Another small addition is sched_attr struct, which is required for sched_setattr and not exposed from the kernel or libraries. --- src/qemu/qemu_process.c | 3 ++- src/util/virprocess.c | 37 +++++++++++++++++++++++++++++++++---- src/util/virprocess.h | 25 ++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 9b4a555..acf4d6b 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -100,6 +99,24 @@ static inline int setns(int fd, int nstype) # error Please determine the syscall number for setns on your architecture # endif # endif + +# if defined(SCHED_DEADLINE) && defined(__NR_sched_setattr) +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + return syscall(__NR_sched_setattr, pid, attr, flags); +} +# else +static inline int sched_setattr(pid_t pid, + const struct sched_attr *attr, + unsigned int flags) +{ + virReportSystemError(ENOSYS, "%s", + _("Deadline scheduler is not supported on this platform.")); + return -1; +} +# endif
As I said, no need to do that, just use AC_CHECK_FUNCS_ONCE we already have in configure.ac. Don't redefine it, just error out if it's not available.
OK, my bad, it *is* needed, even according to Documentation/scheduler/sched-deadline.txt Sorry for the confusion.

The code within the function is too specific for priority attribute of RT schedulers. To allow addition of schedulers that group by different properties, we factor out the logic to calculate cpu subset. Instead of comparing by priority, the new code accepts comparator for the 2 sched structs. --- src/conf/domain_conf.c | 66 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 2dd5e6f..fe5a754 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23065,6 +23065,44 @@ virDomainDefHasCapabilitiesFeatures(virDomainDefPtr def) return false; } +static bool +virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + return (baseSched->priority == sched->priority); +} + +static virDomainThreadSchedParamPtr +virDomainSchedSubsetCharacteristic(virDomainDefPtr def, + virBitmapPtr schedMap, + virBitmapPtr prioMap, + virDomainThreadSchedParamPtr (*func)(virDomainDefPtr, unsigned int), + bool (*comparator)(virDomainThreadSchedParamPtr, + virDomainThreadSchedParamPtr)) +{ + ssize_t nextprio; + virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched = NULL; + + virBitmapClearAll(prioMap); + + /* we need to find a subset of vCPUs with the given scheduler + * that share the priority */ + nextprio = virBitmapNextSetBit(schedMap, -1); + if (!(sched = func(def, nextprio))) + return NULL; + + baseSched = sched; + ignore_value(virBitmapSetBit(prioMap, nextprio)); + + while ((nextprio = virBitmapNextSetBit(schedMap, nextprio)) > -1) { + sched = func(def, nextprio); + if (sched && comparator(baseSched, sched)) + ignore_value(virBitmapSetBit(prioMap, nextprio)); + } + + return baseSched; +} /** * virDomainFormatSchedDef: @@ -23091,6 +23129,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, virBitmapPtr schedMap = NULL; virBitmapPtr prioMap = NULL; virDomainThreadSchedParamPtr sched; + virDomainThreadSchedParamPtr baseSched; char *tmp = NULL; ssize_t next; size_t i; @@ -23124,9 +23163,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, * have them */ while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; - ssize_t nextprio; bool hasPriority = false; - int priority = 0; + baseSched = NULL; switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: @@ -23139,24 +23177,16 @@ virDomainFormatSchedDef(virDomainDefPtr def, case VIR_PROC_POLICY_FIFO: case VIR_PROC_POLICY_RR: - virBitmapClearAll(prioMap); hasPriority = true; - /* we need to find a subset of vCPUs with the given scheduler - * that share the priority */ - nextprio = virBitmapNextSetBit(schedMap, -1); - if (!(sched = func(def, nextprio))) + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + prioMap, + func, + virDomainSchedPriorityComparator); + if (baseSched == NULL) goto cleanup; - priority = sched->priority; - ignore_value(virBitmapSetBit(prioMap, nextprio)); - - while ((nextprio = virBitmapNextSetBit(schedMap, nextprio)) > -1) { - sched = func(def, nextprio); - if (sched && sched->priority == priority) - ignore_value(virBitmapSetBit(prioMap, nextprio)); - } - currentMap = prioMap; break; } @@ -23171,8 +23201,8 @@ virDomainFormatSchedDef(virDomainDefPtr def, virProcessSchedPolicyTypeToString(i)); VIR_FREE(tmp); - if (hasPriority) - virBufferAsprintf(buf, " priority='%d'", priority); + if (hasPriority && baseSched != NULL) + virBufferAsprintf(buf, " priority='%d'", baseSched->priority); virBufferAddLit(buf, "/>\n"); -- 2.8.1

Since the code is now generalized beyond just vcpusched priority, it's better to rename the variables that indicated direct relationship with priority: prioMap and nextprio. --- src/conf/domain_conf.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index fe5a754..9ec23be 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -23075,30 +23075,30 @@ virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, static virDomainThreadSchedParamPtr virDomainSchedSubsetCharacteristic(virDomainDefPtr def, virBitmapPtr schedMap, - virBitmapPtr prioMap, + virBitmapPtr subsetMap, virDomainThreadSchedParamPtr (*func)(virDomainDefPtr, unsigned int), bool (*comparator)(virDomainThreadSchedParamPtr, virDomainThreadSchedParamPtr)) { - ssize_t nextprio; + ssize_t nextidx; virDomainThreadSchedParamPtr sched; virDomainThreadSchedParamPtr baseSched = NULL; - virBitmapClearAll(prioMap); + virBitmapClearAll(subsetMap); /* we need to find a subset of vCPUs with the given scheduler * that share the priority */ - nextprio = virBitmapNextSetBit(schedMap, -1); - if (!(sched = func(def, nextprio))) + nextidx = virBitmapNextSetBit(schedMap, -1); + if (!(sched = func(def, nextidx))) return NULL; baseSched = sched; - ignore_value(virBitmapSetBit(prioMap, nextprio)); + ignore_value(virBitmapSetBit(subsetMap, nextidx)); - while ((nextprio = virBitmapNextSetBit(schedMap, nextprio)) > -1) { - sched = func(def, nextprio); + while ((nextidx = virBitmapNextSetBit(schedMap, nextidx)) > -1) { + sched = func(def, nextidx); if (sched && comparator(baseSched, sched)) - ignore_value(virBitmapSetBit(prioMap, nextprio)); + ignore_value(virBitmapSetBit(subsetMap, nextidx)); } return baseSched; @@ -23127,7 +23127,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, virBitmapPtr resourceMap) { virBitmapPtr schedMap = NULL; - virBitmapPtr prioMap = NULL; + virBitmapPtr subsetMap = NULL; virDomainThreadSchedParamPtr sched; virDomainThreadSchedParamPtr baseSched; char *tmp = NULL; @@ -23144,7 +23144,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, */ if (!(schedMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN)) || - !(prioMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) + !(subsetMap = virBitmapNew(VIR_DOMAIN_CPUMASK_LEN))) goto cleanup; for (i = VIR_PROC_POLICY_NONE + 1; i < VIR_PROC_POLICY_LAST; i++) { @@ -23181,13 +23181,13 @@ virDomainFormatSchedDef(virDomainDefPtr def, baseSched = virDomainSchedSubsetCharacteristic(def, schedMap, - prioMap, + subsetMap, func, virDomainSchedPriorityComparator); if (baseSched == NULL) goto cleanup; - currentMap = prioMap; + currentMap = subsetMap; break; } @@ -23215,7 +23215,7 @@ virDomainFormatSchedDef(virDomainDefPtr def, cleanup: virBitmapFree(schedMap); - virBitmapFree(prioMap); + virBitmapFree(subsetMap); return ret; } -- 2.8.1

As the code for changing task scheduler is now able to choose deadline scheduler, we can update domain configuration to parse the scheduler. --- docs/formatdomain.html.in | 15 +++--- docs/schemas/domaincommon.rng | 16 +++++++ src/conf/domain_conf.c | 108 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 9 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 4e40aa1..2f654f9 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -799,12 +799,12 @@ <dd> The optional <code>vcpusched</code> elements specifies the scheduler type (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, - <code>rr</code>) for particular vCPU/IOThread threads (based on - <code>vcpus</code> and <code>iothreads</code>, leaving out - <code>vcpus</code>/<code>iothreads</code> sets the default). Valid - <code>vcpus</code> values start at 0 through one less than the - number of vCPU's defined for the domain. Valid <code>iothreads</code> - values are described in the <code>iothreadids</code> + <code>rr</code>, <code>deadline</code>) for particular vCPU/IOThread + threads (based on <code>vcpus</code> and <code>iothreads</code>, + leaving out <code>vcpus</code>/<code>iothreads</code> sets the + default). Valid <code>vcpus</code> values start at 0 through one less + than the number of vCPU's defined for the domain. Valid + <code>iothreads</code> values are described in the <code>iothreadids</code> <a href="#elementsIOThreadsAllocation"><code>description</code></a>. If no <code>iothreadids</code> are defined, then libvirt numbers IOThreads from 1 to the number of <code>iothreads</code> available @@ -812,6 +812,9 @@ <code>rr</code>), priority must be specified as well (and is ignored for non-real-time ones). The value range for the priority depends on the host kernel (usually 1-99). + For deadline real-time scheduler, additional parameters runtime, + deadline and period must be specified. The value of these parameters is + specified in nanoseconds, where minimum is 1024 (1 usec). <span class="since">Since 1.2.13</span> </dd> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 19d45fd..1461d34 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -851,6 +851,22 @@ <ref name="unsignedShort"/> </attribute> </group> + <group> + <attribute name="scheduler"> + <choice> + <value>deadline</value> + </choice> + </attribute> + <attribute name="runtime"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="deadline"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="period"> + <ref name="unsignedLong"/> + </attribute> + </group> </choice> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9ec23be..342745e 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4518,11 +4518,13 @@ static int virDomainVcpuDefPostParse(virDomainDefPtr def) { virDomainVcpuDefPtr vcpu; + virDomainThreadSchedParamPtr sched; size_t maxvcpus = virDomainDefGetVcpusMax(def); size_t i; for (i = 0; i < maxvcpus; i++) { vcpu = virDomainDefGetVcpu(def, i); + sched = &vcpu->sched; /* impossible but some compilers don't like it */ if (!vcpu) @@ -4549,6 +4551,35 @@ virDomainVcpuDefPostParse(virDomainDefPtr def) case VIR_TRISTATE_BOOL_LAST: break; } + + switch (sched->policy) { + case VIR_PROC_POLICY_NONE: + case VIR_PROC_POLICY_BATCH: + case VIR_PROC_POLICY_IDLE: + case VIR_PROC_POLICY_FIFO: + case VIR_PROC_POLICY_RR: + break; + case VIR_PROC_POLICY_DEADLINE: + if (sched->runtime < 1024 || sched->deadline < 1024 || sched->period < 1024) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), + sched->runtime, sched->deadline, sched->period); + return -1; + } + + if (!(sched->runtime <= sched->deadline && sched->deadline <= sched->period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + sched->runtime, sched->deadline, sched->period); + return -1; + } + break; + case VIR_PROC_POLICY_LAST: + break; + } } return 0; @@ -15717,7 +15748,10 @@ static virBitmapPtr virDomainSchedulerParse(xmlNodePtr node, const char *name, virProcessSchedPolicy *policy, - int *priority) + int *priority, + unsigned long long *runtime, + unsigned long long *deadline, + unsigned long long *period) { virBitmapPtr ret = NULL; char *tmp = NULL; @@ -15773,6 +15807,42 @@ virDomainSchedulerParse(xmlNodePtr node, VIR_FREE(tmp); } + if (pol == VIR_PROC_POLICY_DEADLINE) { + if (!(tmp = virXMLPropString(node, "runtime"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler runtime")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, runtime) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element runtime")); + goto error; + } + + if (!(tmp = virXMLPropString(node, "deadline"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler deadline")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, deadline) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element deadline")); + goto error; + } + + if (!(tmp = virXMLPropString(node, "period"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler period")); + goto error; + } + if (virStrToLong_ull(tmp, NULL, 10, period) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element period")); + goto error; + } + VIR_FREE(tmp); + } + return ret; error: @@ -15793,9 +15863,13 @@ virDomainThreadSchedParseHelper(xmlNodePtr node, virDomainThreadSchedParamPtr sched; virProcessSchedPolicy policy; int priority; + unsigned long long runtime; + unsigned long long deadline; + unsigned long long period; int ret = -1; - if (!(map = virDomainSchedulerParse(node, name, &policy, &priority))) + if (!(map = virDomainSchedulerParse(node, name, &policy, &priority, + &runtime, &deadline, &period))) goto cleanup; while ((next = virBitmapNextSetBit(map, next)) > -1) { @@ -15811,6 +15885,9 @@ virDomainThreadSchedParseHelper(xmlNodePtr node, sched->policy = policy; sched->priority = priority; + sched->runtime = runtime; + sched->deadline = deadline; + sched->period = period; } ret = 0; @@ -23072,6 +23149,15 @@ virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, return (baseSched->priority == sched->priority); } +static bool +virDomainSchedDeadlineComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + return (baseSched->runtime == sched->priority && + baseSched->deadline == sched->deadline && + baseSched->period == sched->period); +} + static virDomainThreadSchedParamPtr virDomainSchedSubsetCharacteristic(virDomainDefPtr def, virBitmapPtr schedMap, @@ -23164,13 +23250,13 @@ virDomainFormatSchedDef(virDomainDefPtr def, while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; bool hasPriority = false; + bool isDeadline = false; baseSched = NULL; switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: case VIR_PROC_POLICY_BATCH: case VIR_PROC_POLICY_IDLE: - case VIR_PROC_POLICY_DEADLINE: case VIR_PROC_POLICY_LAST: currentMap = schedMap; break; @@ -23189,6 +23275,19 @@ virDomainFormatSchedDef(virDomainDefPtr def, currentMap = subsetMap; break; + case VIR_PROC_POLICY_DEADLINE: + isDeadline = true; + + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + subsetMap, + func, + virDomainSchedDeadlineComparator); + if (baseSched == NULL) + goto cleanup; + + currentMap = subsetMap; + break; } /* now we have the complete group */ @@ -23203,6 +23302,9 @@ virDomainFormatSchedDef(virDomainDefPtr def, if (hasPriority && baseSched != NULL) virBufferAsprintf(buf, " priority='%d'", baseSched->priority); + if (isDeadline && baseSched != NULL) + virBufferAsprintf(buf, " runtime='%llu' deadline='%llu' period='%llu'", + baseSched->runtime, baseSched->deadline, baseSched->period); virBufferAddLit(buf, "/>\n"); -- 2.8.1

On Mon, Nov 21, 2016 at 01:56:08PM +0100, Martin Polednik wrote:
As the code for changing task scheduler is now able to choose deadline scheduler, we can update domain configuration to parse the scheduler. --- docs/formatdomain.html.in | 15 +++--- docs/schemas/domaincommon.rng | 16 +++++++ src/conf/domain_conf.c | 108 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 9 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 4e40aa1..2f654f9 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -799,12 +799,12 @@ <dd> The optional <code>vcpusched</code> elements specifies the scheduler type (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, - <code>rr</code>) for particular vCPU/IOThread threads (based on - <code>vcpus</code> and <code>iothreads</code>, leaving out - <code>vcpus</code>/<code>iothreads</code> sets the default). Valid - <code>vcpus</code> values start at 0 through one less than the - number of vCPU's defined for the domain. Valid <code>iothreads</code> - values are described in the <code>iothreadids</code> + <code>rr</code>, <code>deadline</code>) for particular vCPU/IOThread + threads (based on <code>vcpus</code> and <code>iothreads</code>, + leaving out <code>vcpus</code>/<code>iothreads</code> sets the + default). Valid <code>vcpus</code> values start at 0 through one less + than the number of vCPU's defined for the domain. Valid + <code>iothreads</code> values are described in the <code>iothreadids</code> <a href="#elementsIOThreadsAllocation"><code>description</code></a>. If no <code>iothreadids</code> are defined, then libvirt numbers IOThreads from 1 to the number of <code>iothreads</code> available @@ -812,6 +812,9 @@ <code>rr</code>), priority must be specified as well (and is ignored for non-real-time ones). The value range for the priority depends on the host kernel (usually 1-99). + For deadline real-time scheduler, additional parameters runtime, + deadline and period must be specified. The value of these parameters is + specified in nanoseconds, where minimum is 1024 (1 usec). <span class="since">Since 1.2.13</span> </dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 19d45fd..1461d34 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -851,6 +851,22 @@ <ref name="unsignedShort"/> </attribute> </group> + <group> + <attribute name="scheduler"> + <choice> + <value>deadline</value> + </choice> + </attribute> + <attribute name="runtime"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="deadline"> + <ref name="unsignedLong"/> + </attribute> + <attribute name="period"> + <ref name="unsignedLong"/> + </attribute> + </group> </choice> </define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9ec23be..342745e 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4518,11 +4518,13 @@ static int virDomainVcpuDefPostParse(virDomainDefPtr def) { virDomainVcpuDefPtr vcpu; + virDomainThreadSchedParamPtr sched; size_t maxvcpus = virDomainDefGetVcpusMax(def); size_t i;
for (i = 0; i < maxvcpus; i++) { vcpu = virDomainDefGetVcpu(def, i); + sched = &vcpu->sched;
/* impossible but some compilers don't like it */ if (!vcpu) @@ -4549,6 +4551,35 @@ virDomainVcpuDefPostParse(virDomainDefPtr def) case VIR_TRISTATE_BOOL_LAST: break; } + + switch (sched->policy) { + case VIR_PROC_POLICY_NONE: + case VIR_PROC_POLICY_BATCH: + case VIR_PROC_POLICY_IDLE: + case VIR_PROC_POLICY_FIFO: + case VIR_PROC_POLICY_RR: + break; + case VIR_PROC_POLICY_DEADLINE: + if (sched->runtime < 1024 || sched->deadline < 1024 || sched->period < 1024) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler runtime, deadline and period must be " + "higher or equal to 1024 (1 us) (runtime(%llu), " + "deadline(%llu), period(%llu))"), + sched->runtime, sched->deadline, sched->period); + return -1; + } + + if (!(sched->runtime <= sched->deadline && sched->deadline <= sched->period)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Scheduler configuration does not satisfy " + "(runtime(%llu) <= deadline(%llu) <= period(%llu))"), + sched->runtime, sched->deadline, sched->period); + return -1; + } + break; + case VIR_PROC_POLICY_LAST: + break; + } }
The indentation is totally off in this hunk ^^
@@ -23072,6 +23149,15 @@ virDomainSchedPriorityComparator(virDomainThreadSchedParamPtr baseSched, return (baseSched->priority == sched->priority); }
+static bool +virDomainSchedDeadlineComparator(virDomainThreadSchedParamPtr baseSched, + virDomainThreadSchedParamPtr sched) +{ + return (baseSched->runtime == sched->priority && + baseSched->deadline == sched->deadline && + baseSched->period == sched->period); +} + static virDomainThreadSchedParamPtr virDomainSchedSubsetCharacteristic(virDomainDefPtr def, virBitmapPtr schedMap, @@ -23164,13 +23250,13 @@ virDomainFormatSchedDef(virDomainDefPtr def, while (!virBitmapIsAllClear(schedMap)) { virBitmapPtr currentMap = NULL; bool hasPriority = false; + bool isDeadline = false; baseSched = NULL;
switch ((virProcessSchedPolicy) i) { case VIR_PROC_POLICY_NONE: case VIR_PROC_POLICY_BATCH: case VIR_PROC_POLICY_IDLE: - case VIR_PROC_POLICY_DEADLINE: case VIR_PROC_POLICY_LAST: currentMap = schedMap; break; @@ -23189,6 +23275,19 @@ virDomainFormatSchedDef(virDomainDefPtr def,
currentMap = subsetMap; break; + case VIR_PROC_POLICY_DEADLINE: + isDeadline = true; + + baseSched = virDomainSchedSubsetCharacteristic(def, + schedMap, + subsetMap, + func, + virDomainSchedDeadlineComparator); + if (baseSched == NULL) + goto cleanup; + + currentMap = subsetMap; + break; }
/* now we have the complete group */ @@ -23203,6 +23302,9 @@ virDomainFormatSchedDef(virDomainDefPtr def,
if (hasPriority && baseSched != NULL) virBufferAsprintf(buf, " priority='%d'", baseSched->priority); + if (isDeadline && baseSched != NULL) + virBufferAsprintf(buf, " runtime='%llu' deadline='%llu' period='%llu'", + baseSched->runtime, baseSched->deadline, baseSched->period);
virBufferAddLit(buf, "/>\n");
And this (plus patches 3/5 and 4/5) is overly complicated due to how the code is trying to merge the same specifications. I'll propose a patch to remove that unnecessary complexity in a while. Rest seems fine.
-- 2.8.1
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
participants (3)
-
Daniel P. Berrange
-
Martin Kletzander
-
Martin Polednik