On Tue, Nov 15, 2016 at 04:26:58PM +0100, Martin Polednik wrote:
On 15/11/16 15:02 +0000, Daniel P. Berrange wrote:
>On Tue, Nov 15, 2016 at 03:51:22PM +0100, Martin Polednik wrote:
>> On 10/11/16 16:21 +0100, Martin Kletzander wrote:
>> > On Mon, Nov 07, 2016 at 10:01:13AM +0100, Martin Polednik wrote:
>> > > As sched_deadline is linux specific, it is not set through
>> > > sched_setscheduler but rather the sched_setattr syscall. Additionally,
>> > > the scheduler has new set of parameters: runtime, deadline and period.
>> > >
>> > > In this part of the series, we extend virProcessSetScheduler to
>> > > accommodate the additional parameters and use sched_setattr syscall to
>> > > set SCHED_DEADLINE. Another small addition is sched_attr struct, which
>> > > is required for sched_setattr and not exposed from the kernel or
>> > > libraries.
>> > > ---
>> > > src/qemu/qemu_process.c | 3 ++-
>> > > src/util/virprocess.c | 70
+++++++++++++++++++++++++++++++++++++++++++++++--
>> > > src/util/virprocess.h | 28 +++++++++++++++++++-
>> > > 3 files changed, 97 insertions(+), 4 deletions(-)
>> > >
>> > > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> > > index 1b67aee..91a635c 100644
>> > > --- a/src/qemu/qemu_process.c
>> > > +++ b/src/qemu/qemu_process.c
>> > > @@ -2395,7 +2395,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
>> > >
>> > > /* Set scheduler type and priority. */
>> > > if (sched &&
>> > > - virProcessSetScheduler(pid, sched->policy,
sched->priority) < 0)
>> > > + virProcessSetScheduler(pid, sched->policy,
sched->priority,
>> > > + sched->runtime, sched->deadline,
sched->period) < 0)
>> > > goto cleanup;
>> > >
>> > > ret = 0;
>> > > diff --git a/src/util/virprocess.c b/src/util/virprocess.c
>> > > index d68800b..cd1cc9b 100644
>> > > --- a/src/util/virprocess.c
>> > > +++ b/src/util/virprocess.c
>> > > @@ -70,6 +70,7 @@
>> > > VIR_LOG_INIT("util.process");
>> > >
>> > > #ifdef __linux__
>> > > +# include <sys/syscall.h>
>> > > /*
>> > > * Workaround older glibc. While kernel may support the setns
>> > > * syscall, the glibc wrapper might not exist. If that's the
>> > > @@ -91,9 +92,14 @@ VIR_LOG_INIT("util.process");
>> > > # endif
>> > > # endif
>> > >
>> > > +# ifndef __NR_sched_setattr
>> > > +# if defined(__x86_64__)
>> > > +# define __NR_sched_setattr 314
>> > > +# endif
>> > > +# endif
>> > > +
>> >
>> > At least ARMs, POWERs and i386 should be defined from the start so that
>> > tests in CI don't fail right away. And other platforms should error
out
>> > right away. But honestly, I'd drop this compatibility code. What
>> > you're adding here is not a bleeding-edge feature.
>>
>> It maybe makes sense to virReportSystemError in case it's not defined
>> -- although the feature isn't cutting edge, older kernels (such as
>> 3.10 on CentOS) do not have the syscall (or the number)
>> unless they're rebases with RT_PREEMPT patch.
>>
virReportSystemError() will error out at runtime, which is anyway the
only proper thing to do when the kernel doesn't support it. But this
redefinition makes it available when *building* on older kernels.
Whoever does that (older distros) can handle that themselves
(downstream) if required.
>> > > # ifndef HAVE_SETNS
>> > > # if defined(__NR_setns)
>> > > -# include <sys/syscall.h>
>> > >
>> > > static inline int setns(int fd, int nstype)
>> > > {
>> > > @@ -103,6 +109,13 @@ static inline int setns(int fd, int nstype)
>> > > # error Please determine the syscall number for setns on your
architecture
>> > > # endif
>> > > # endif
>> > > +
>> > > +static inline int sched_setattr(pid_t pid,
>> > > + const struct sched_attr *attr,
>> > > + unsigned int flags)
>> > > +{
>> > > + return syscall(__NR_sched_setattr, pid, attr, flags);
>> > > +}
>> > > #else /* !__linux__ */
>> > > static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype
ATTRIBUTE_UNUSED)
>> > > {
>> > > @@ -110,6 +123,15 @@ static inline int setns(int fd ATTRIBUTE_UNUSED,
int nstype ATTRIBUTE_UNUSED)
>> > > _("Namespaces are not supported on this
platform."));
>> > > return -1;
>> > > }
>> > > +
>> > > +static inline int sched_setattr(pid_t pid,
>> > > + const struct sched_attr *attr,
>> > > + unsigned int flags)
>> > > +{
>> > > + virReportSystemError(ENOSYS, "%s",
>> > > + _("Deadline scheduler is not supported
on this platform."));
>> > > + return -1;
>> > > +}
>> > > #endif
>> > >
>> > > VIR_ENUM_IMPL(virProcessSchedPolicy, VIR_PROC_POLICY_LAST,
>> > > @@ -1225,7 +1247,10 @@
virProcessSchedTranslatePolicy(virProcessSchedPolicy policy)
>> > > int
>> > > virProcessSetScheduler(pid_t pid,
>> > > virProcessSchedPolicy policy,
>> > > - int priority)
>> > > + int priority,
>> > > + unsigned long long runtime,
>> > > + unsigned long long deadline,
>> > > + unsigned long long period)
>> > > {
>> > > struct sched_param param = {0};
>> > > int pol = virProcessSchedTranslatePolicy(policy);
>> > > @@ -1235,6 +1260,47 @@ virProcessSetScheduler(pid_t pid,
>> > > if (!policy)
>> > > return 0;
>> > >
>> > > + if (pol == SCHED_DEADLINE) {
>> > > + int ret = 0;
>> > > +
>> > > + if (runtime < 1024 || deadline < 1024 || period < 24)
{
>> > > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> > > + _("Scheduler runtime, deadline and period
must be "
>> > > + "higher or equal to 1024 (1 us)
(runtime(%llu), "
>> > > + "deadline(%llu), period(%llu))"),
runtime, deadline, period);
>> > > + return -1;
>> > > + }
>> > > +
>> > > + if (!(runtime <= deadline && deadline <=
period)) {
>> > > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> > > + _("Scheduler configuration does not
satisfy "
>> > > + "(runtime(%llu) <= deadline(%llu)
<= period(%llu))"),
>> > > + runtime, deadline, period);
>> > > + return -1;
>> > > + }
>> > > +
>> > > + struct sched_attr attrs = {
>> > > + .size = sizeof(attrs),
>> > > + .sched_policy = SCHED_DEADLINE,
>> > > + .sched_flags = 0,
>> > > + .sched_nice = 0,
>> > > + .sched_priority = 0,
>> > > + .sched_runtime = runtime,
>> > > + .sched_deadline = deadline,
>> > > + .sched_period = period,
>> > > + };
>> > > +
>> >
>> > C99 initialization is forbidden in libvirt (but we have no syntax-check
>> > rule for it), move it to the top of the code block.
>> >
>> > > + ret = sched_setattr(pid, &attrs, 0);
>> > > + if (ret != 0) {
>> > > + virReportSystemError(errno,
>> > > + _("Cannot set scheduler
parameters for pid %d"),
>> > > + pid);
>> > > + return -1;
>> > > + }
>> > > +
>> > > + return 0;
>> > > + }
>> > > +
>> > > if (pol == SCHED_FIFO || pol == SCHED_RR) {
>> > > int min = 0;
>> > > int max = 0;
>> > > diff --git a/src/util/virprocess.h b/src/util/virprocess.h
>> > > index 1eac3e6..e6914e7 100644
>> > > --- a/src/util/virprocess.h
>> > > +++ b/src/util/virprocess.h
>> > > @@ -23,6 +23,9 @@
>> > > # define __VIR_PROCESS_H__
>> > >
>> > > # include <sys/types.h>
>> > > +# ifdef __linux__
>> > > +# include <linux/types.h>
>> > > +# endif
>> > >
>> > > # include "internal.h"
>> > > # include "virbitmap.h"
>> > > @@ -39,6 +42,26 @@ typedef enum {
>> > > VIR_PROC_POLICY_LAST
>> > > } virProcessSchedPolicy;
>> > >
>> > > +# ifdef __linux__
>> > > +struct sched_attr {
>> > > + __u32 size;
>> > > +
>> > > + __u32 sched_policy;
>> > > + __u64 sched_flags;
>> > > +
>> > > + /* SCHED_NORMAL, SCHED_BATCH */
>> > > + __s32 sched_nice;
>> > > +
>> > > + /* SCHED_FIFO, SCHED_RR */
>> > > + __u32 sched_priority;
>> > > +
>> > > + /* SCHED_DEADLINE (nsec) */
>> > > + __u64 sched_runtime;
>> > > + __u64 sched_deadline;
>> > > + __u64 sched_period;
>> > > +};
>> > > +# endif
>> > > +
>> >
>> > I don't like this redefinition. Not only because it uses __uXX instead
>> > of uint##_t. I think we should have a configure.ac check for
>> > HAVE_STRUCT_SCHED_ATTR (as we do with some other structs) and just error
>> > out if it's not available. If we're doing that, we can safely do
the
>> > same thing with SCHED_DEADLINE and not have to carry this code on for
>> > decades.
>>
>> The struct doesn't seem to be present in (g)libc or any other
>> library. I'm afraid we can't therefore simply error out.
>
>Can we find out why it isn't present in glibc. Is it simply that it
>is too new ? If so, IMHO desirable to just wait for glibc to support
>it and make libvirt code conditional on the new version.
My feeling is the novelty combined with limited uses in anything but
RT applications. That being said, I was unable to find anything
mentioning sched_attr and glibc together.
It's not that new, it's just that it's linux-specific, not POSIX and the
only added value compared to sched_setscheduler() is the sched_deadline
addition. So most probably nobody bothered. As I see, whoever uses
that (I only found strace and chrt from what I have installed) redefine
it themselves according to the sched_setattr(2) man page. So we'll
probably have to do that ourselves anyway.
Also fixing it in glibc would make it work only on glibc platforms, but
we're now nicely compatible with other libc implementations (I mean I
don't recall anyone describing any new problem on the list recently).