[PATCH v4 0/1] qemu: add per-vcpu delay stats

Fixed since v3: - Report error if cannot read stats. - Don't fail if cannot read stats. - Add documentation. - Drop unnecessary checks. - Don't delete unrelated whitespace. - Make sure schedstat file is closed before returning. Fixed since v2: - Close schedstat file after use. Fixes since v1: - Collect per-vcpu stats. - Coding style errors - Use glib functions. Aleksei Zakharov (1): qemu: add per-vcpu delay stats docs/manpages/virsh.rst | 4 ++++ src/libvirt-domain.c | 4 ++++ src/qemu/qemu_driver.c | 44 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) -- 2.17.1

This patch adds delay time (steal time inside guest) to libvirt domain per-vcpu stats. Delay time is an important performance metric. It is a consequence of the overloaded CPU. Knowledge of the delay time of a virtual machine helps to understand if it is affected and estimate the impact. As a result, it is possible to react exactly when needed and rebalance the load between hosts. This is used by cloud providers to provide quality of service, especially when the CPU is oversubscribed. It's more convenient to work with this metric in a context of a libvirt domain. Any monitoring software may use this information. Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru> --- docs/manpages/virsh.rst | 4 ++++ src/libvirt-domain.c | 4 ++++ src/qemu/qemu_driver.c | 44 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/docs/manpages/virsh.rst b/docs/manpages/virsh.rst index e3afa48f7b..a2b83ddfaa 100644 --- a/docs/manpages/virsh.rst +++ b/docs/manpages/virsh.rst @@ -2295,6 +2295,10 @@ When selecting the *--state* group the following fields are returned: * ``vcpu.<num>.halted`` - virtual CPU <num> is halted: yes or no (may indicate the processor is idle or even disabled, depending on the architecture) +* ``vcpu.<num>.delay`` - time the vCPU <num> thread was enqueued by the + host scheduler, but was waiting in the queue instead of running. + Exposed to the VM as a steal time. + *--interface* returns: diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index 4af0166872..e54d11e513 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11693,6 +11693,10 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * "vcpu.<num>.halted" - virtual CPU <num> is halted, may indicate the * processor is idle or even disabled, depending * on the architecture) + * "vcpu.<num>.delay" - time the vCPU <num> thread was enqueued by the + * host scheduler, but was waiting in the queue + * instead of running. Exposed to the VM as a steal + * time. * * VIR_DOMAIN_STATS_INTERFACE: * Return network interface statistics (from domain point of view). diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b9bbdf8d48..9ec3c8fce7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1332,6 +1332,34 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) { return virCapabilitiesFormatXML(caps); } +static int +qemuGetSchedstatDelay(unsigned long long *cpudelay, + pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + unsigned long long oncpu = 0; + g_autofree FILE *schedstat = NULL; + + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/schedstat", (int)pid); + + schedstat = fopen(proc, "r"); + if (!schedstat) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to open schedstat file at '%s'"), + proc); + } + if (fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse schedstat info at '%s'"), + proc); + } + + VIR_FORCE_FCLOSE(schedstat); + return 0; +} static int qemuGetSchedInfo(unsigned long long *cpuWait, @@ -1470,6 +1498,7 @@ static int qemuDomainHelperGetVcpus(virDomainObjPtr vm, virVcpuInfoPtr info, unsigned long long *cpuwait, + unsigned long long *cpudelay, int maxinfo, unsigned char *cpumaps, int maplen) @@ -1529,6 +1558,11 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm, return -1; } + if (cpudelay) { + if (qemuGetSchedstatDelay(&(cpudelay[ncpuinfo]), vm->pid, vcpupid) < 0) + return -1; + } + ncpuinfo++; } @@ -4873,7 +4907,7 @@ qemuDomainGetVcpus(virDomainPtr dom, goto cleanup; } - ret = qemuDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen); + ret = qemuDomainHelperGetVcpus(vm, info, NULL, NULL, maxinfo, cpumaps, maplen); cleanup: virDomainObjEndAPI(&vm); @@ -17934,6 +17968,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, int ret = -1; virVcpuInfoPtr cpuinfo = NULL; g_autofree unsigned long long *cpuwait = NULL; + g_autofree unsigned long long *cpudelay = NULL; if (virTypedParamListAddUInt(params, virDomainDefGetVcpus(dom->def), "vcpu.current") < 0) @@ -17945,6 +17980,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, cpuinfo = g_new0(virVcpuInfo, virDomainDefGetVcpus(dom->def)); cpuwait = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def)); + cpudelay = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def)); if (HAVE_JOB(privflags) && virDomainObjIsActive(dom) && qemuDomainRefreshVcpuHalted(driver, dom, QEMU_ASYNC_JOB_NONE) < 0) { @@ -17953,7 +17989,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, virResetLastError(); } - if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait, + if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait, cpudelay, virDomainDefGetVcpus(dom->def), NULL, 0) < 0) { virResetLastError(); @@ -17978,6 +18014,10 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, "vcpu.%u.wait", cpuinfo[i].number) < 0) goto cleanup; + if (virTypedParamListAddULLong(params, cpudelay[i], + "vcpu.%u.delay", cpuinfo[i].number) < 0) + goto cleanup; + /* state below is extracted from the individual vcpu structs */ if (!(vcpu = virDomainDefGetVcpu(dom->def, cpuinfo[i].number))) continue; -- 2.17.1

On 2/19/21 5:08 PM, Aleksei Zakharov wrote:
This patch adds delay time (steal time inside guest) to libvirt domain per-vcpu stats. Delay time is an important performance metric. It is a consequence of the overloaded CPU. Knowledge of the delay time of a virtual machine helps to understand if it is affected and estimate the impact.
As a result, it is possible to react exactly when needed and rebalance the load between hosts. This is used by cloud providers to provide quality of service, especially when the CPU is oversubscribed.
It's more convenient to work with this metric in a context of a libvirt domain. Any monitoring software may use this information.
Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru> --- docs/manpages/virsh.rst | 4 ++++ src/libvirt-domain.c | 4 ++++ src/qemu/qemu_driver.c | 44 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/docs/manpages/virsh.rst b/docs/manpages/virsh.rst index e3afa48f7b..a2b83ddfaa 100644 --- a/docs/manpages/virsh.rst +++ b/docs/manpages/virsh.rst @@ -2295,6 +2295,10 @@ When selecting the *--state* group the following fields are returned: * ``vcpu.<num>.halted`` - virtual CPU <num> is halted: yes or no (may indicate the processor is idle or even disabled, depending on the architecture) +* ``vcpu.<num>.delay`` - time the vCPU <num> thread was enqueued by the + host scheduler, but was waiting in the queue instead of running. + Exposed to the VM as a steal time. +
*--interface* returns: diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index 4af0166872..e54d11e513 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11693,6 +11693,10 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * "vcpu.<num>.halted" - virtual CPU <num> is halted, may indicate the * processor is idle or even disabled, depending * on the architecture) + * "vcpu.<num>.delay" - time the vCPU <num> thread was enqueued by the + * host scheduler, but was waiting in the queue + * instead of running. Exposed to the VM as a steal + * time. * * VIR_DOMAIN_STATS_INTERFACE: * Return network interface statistics (from domain point of view). diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b9bbdf8d48..9ec3c8fce7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1332,6 +1332,34 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) { return virCapabilitiesFormatXML(caps); }
+static int +qemuGetSchedstatDelay(unsigned long long *cpudelay, + pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + unsigned long long oncpu = 0; + g_autofree FILE *schedstat = NULL;
So, the most common practice in the project is to declare the file fds with VIR_AUTOCLOSE to avoid having to close the fd at the end. There are a lot of examples in src/util/virnetdev.c. This is the first time I see 'g_autofree FILE *'.
+ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/schedstat", (int)pid); + + schedstat = fopen(proc, "r"); + if (!schedstat) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to open schedstat file at '%s'"), + proc); + } + if (fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse schedstat info at '%s'"), + proc); + } + + VIR_FORCE_FCLOSE(schedstat);
That said, reading the change log for the patch I see that someone already mentioned the needed to close the FD in v1 and you fixed it in v2, so I'll take that this code, albeit unusual to me, works. If it doesn't, it's a rather trivial amend that the maintainer can do before pushing. Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
+ return 0; +}
static int qemuGetSchedInfo(unsigned long long *cpuWait, @@ -1470,6 +1498,7 @@ static int qemuDomainHelperGetVcpus(virDomainObjPtr vm, virVcpuInfoPtr info, unsigned long long *cpuwait, + unsigned long long *cpudelay, int maxinfo, unsigned char *cpumaps, int maplen) @@ -1529,6 +1558,11 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm, return -1; }
+ if (cpudelay) { + if (qemuGetSchedstatDelay(&(cpudelay[ncpuinfo]), vm->pid, vcpupid) < 0) + return -1; + } + ncpuinfo++; }
@@ -4873,7 +4907,7 @@ qemuDomainGetVcpus(virDomainPtr dom, goto cleanup; }
- ret = qemuDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen); + ret = qemuDomainHelperGetVcpus(vm, info, NULL, NULL, maxinfo, cpumaps, maplen);
cleanup: virDomainObjEndAPI(&vm); @@ -17934,6 +17968,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, int ret = -1; virVcpuInfoPtr cpuinfo = NULL; g_autofree unsigned long long *cpuwait = NULL; + g_autofree unsigned long long *cpudelay = NULL;
if (virTypedParamListAddUInt(params, virDomainDefGetVcpus(dom->def), "vcpu.current") < 0) @@ -17945,6 +17980,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
cpuinfo = g_new0(virVcpuInfo, virDomainDefGetVcpus(dom->def)); cpuwait = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def)); + cpudelay = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def));
if (HAVE_JOB(privflags) && virDomainObjIsActive(dom) && qemuDomainRefreshVcpuHalted(driver, dom, QEMU_ASYNC_JOB_NONE) < 0) { @@ -17953,7 +17989,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, virResetLastError(); }
- if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait, + if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait, cpudelay, virDomainDefGetVcpus(dom->def), NULL, 0) < 0) { virResetLastError(); @@ -17978,6 +18014,10 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver, "vcpu.%u.wait", cpuinfo[i].number) < 0) goto cleanup;
+ if (virTypedParamListAddULLong(params, cpudelay[i], + "vcpu.%u.delay", cpuinfo[i].number) < 0) + goto cleanup; + /* state below is extracted from the individual vcpu structs */ if (!(vcpu = virDomainDefGetVcpu(dom->def, cpuinfo[i].number))) continue;

On 2/19/21 9:08 PM, Aleksei Zakharov wrote:
This patch adds delay time (steal time inside guest) to libvirt domain per-vcpu stats. Delay time is an important performance metric. It is a consequence of the overloaded CPU. Knowledge of the delay time of a virtual machine helps to understand if it is affected and estimate the impact.
As a result, it is possible to react exactly when needed and rebalance the load between hosts. This is used by cloud providers to provide quality of service, especially when the CPU is oversubscribed.
It's more convenient to work with this metric in a context of a libvirt domain. Any monitoring software may use this information.
Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru> --- docs/manpages/virsh.rst | 4 ++++ src/libvirt-domain.c | 4 ++++ src/qemu/qemu_driver.c | 44 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/docs/manpages/virsh.rst b/docs/manpages/virsh.rst index e3afa48f7b..a2b83ddfaa 100644 --- a/docs/manpages/virsh.rst +++ b/docs/manpages/virsh.rst @@ -2295,6 +2295,10 @@ When selecting the *--state* group the following fields are returned: * ``vcpu.<num>.halted`` - virtual CPU <num> is halted: yes or no (may indicate the processor is idle or even disabled, depending on the architecture) +* ``vcpu.<num>.delay`` - time the vCPU <num> thread was enqueued by the + host scheduler, but was waiting in the queue instead of running. + Exposed to the VM as a steal time. +
*--interface* returns: diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index 4af0166872..e54d11e513 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11693,6 +11693,10 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * "vcpu.<num>.halted" - virtual CPU <num> is halted, may indicate the * processor is idle or even disabled, depending * on the architecture) + * "vcpu.<num>.delay" - time the vCPU <num> thread was enqueued by the + * host scheduler, but was waiting in the queue + * instead of running. Exposed to the VM as a steal + * time. * * VIR_DOMAIN_STATS_INTERFACE: * Return network interface statistics (from domain point of view). diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b9bbdf8d48..9ec3c8fce7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1332,6 +1332,34 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) { return virCapabilitiesFormatXML(caps); }
+static int +qemuGetSchedstatDelay(unsigned long long *cpudelay, + pid_t pid, pid_t tid) +{
I know you took inspiration in existing code, but our standards have moved since that code was written, a bit.
+ g_autofree char *proc = NULL; + unsigned long long oncpu = 0; + g_autofree FILE *schedstat = NULL; + + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/schedstat", (int)pid); + + schedstat = fopen(proc, "r"); + if (!schedstat) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to open schedstat file at '%s'"), + proc);
I'd expect this to be an error. Except for the case when the file doesn't exist.
+ } + if (fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse schedstat info at '%s'"), + proc);
And this has to be error always.
+ } + + VIR_FORCE_FCLOSE(schedstat); + return 0;
I'm fixing this function a bit.
+}
The rest looks okay. I'm pushing it. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Congratulations on your first libvirt contribution! Michal

вт, 9 мар. 2021 г. в 15:35, Michal Privoznik <mprivozn@redhat.com>:
This patch adds delay time (steal time inside guest) to libvirt domain per-vcpu stats. Delay time is an important performance metric. It is a consequence of the overloaded CPU. Knowledge of the delay time of a virtual machine helps to understand if it is affected and estimate the impact.
As a result, it is possible to react exactly when needed and rebalance the load between hosts. This is used by cloud providers to provide quality of service, especially when the CPU is oversubscribed.
It's more convenient to work with this metric in a context of a libvirt domain. Any monitoring software may use this information.
Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru> --- docs/manpages/virsh.rst | 4 ++++ src/libvirt-domain.c | 4 ++++ src/qemu/qemu_driver.c | 44 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/docs/manpages/virsh.rst b/docs/manpages/virsh.rst index e3afa48f7b..a2b83ddfaa 100644 --- a/docs/manpages/virsh.rst +++ b/docs/manpages/virsh.rst @@ -2295,6 +2295,10 @@ When selecting the *--state* group the following fields are returned: * ``vcpu.<num>.halted`` - virtual CPU <num> is halted: yes or no (may indicate the processor is idle or even disabled, depending on the architecture) +* ``vcpu.<num>.delay`` - time the vCPU <num> thread was enqueued by the + host scheduler, but was waiting in the queue instead of running. + Exposed to the VM as a steal time. +
*--interface* returns: diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index 4af0166872..e54d11e513 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11693,6 +11693,10 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * "vcpu.<num>.halted" - virtual CPU <num> is halted, may indicate
* processor is idle or even disabled, depending * on the architecture) + * "vcpu.<num>.delay" - time the vCPU <num> thread was enqueued by
On 2/19/21 9:08 PM, Aleksei Zakharov wrote: the the
+ * host scheduler, but was waiting in the queue + * instead of running. Exposed to the VM as a steal + * time. * * VIR_DOMAIN_STATS_INTERFACE: * Return network interface statistics (from domain point of view). diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b9bbdf8d48..9ec3c8fce7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1332,6 +1332,34 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) { return virCapabilitiesFormatXML(caps); }
+static int +qemuGetSchedstatDelay(unsigned long long *cpudelay, + pid_t pid, pid_t tid) +{
I know you took inspiration in existing code, but our standards have moved since that code was written, a bit.
+ g_autofree char *proc = NULL; + unsigned long long oncpu = 0; + g_autofree FILE *schedstat = NULL; + + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/schedstat", (int)pid); + + schedstat = fopen(proc, "r"); + if (!schedstat) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to open schedstat file at '%s'"), + proc);
I'd expect this to be an error. Except for the case when the file doesn't exist.
+ } + if (fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse schedstat info at '%s'"), + proc);
And this has to be error always.
+ } + + VIR_FORCE_FCLOSE(schedstat); + return 0;
I'm fixing this function a bit.
+}
The rest looks okay. I'm pushing it.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Congratulations on your first libvirt contribution!
Michal
Awesome, thank you!
-- Best Regards, Aleksei Zakharov
participants (3)
-
Aleksei Zakharov
-
Daniel Henrique Barboza
-
Michal Privoznik