[libvirt] [PATCH 0/2] physical cpu usages of virtual cpus

See patch 1 and patch 4 for the purpose of this series. patches 1,2 are for libvirt. patch 3 is for ocaml-libvirt. patch 4 is for virt-top. Hu Tao (2): Add a new flag VIR_DOMAIN_CPU_STATS_F_VCPU to virDomainGetCPUStats Adds support to VIR_DOMAIN_CPU_STATS_F_VCPU in qemu_driver. include/libvirt/libvirt.h.in | 10 +++ src/qemu/qemu_driver.c | 152 +++++++++++++++++++++++++++++++++++++----- src/util/cgroup.c | 4 +- tools/virsh.c | 17 +++-- 4 files changed, 160 insertions(+), 23 deletions(-)

Currently virDomainGetCPUStats gets total cpu usage, which consists of: 1. vcpu usage: the physical cpu time consumed by virtual cpu(s) of domain 2. hypervisor: `total cpu usage' - `vcpu usage' The flag VIR_DOMAIN_CPU_STATS_F_VCPU is for getting vcpu usages. --- include/libvirt/libvirt.h.in | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 499dcd4..94c6abd 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -1355,6 +1355,16 @@ int virDomainGetState (virDomainPtr domain, */ #define VIR_DOMAIN_CPU_STATS_SYSTEMTIME "system_time" +typedef enum { + /* virDomainModificationImpact and virTypedParameterFlags go + here. */ + + /* Additionally, these flags may be bitwise-OR'd in. These + flags should not override those of virDomainModificationImpact + and virTypedParameterFlags */ + VIR_DOMAIN_CPU_STATS_F_VCPU = 1 << 3, /* get vcpu stats */ +} virDomainGetCPUStatsFlags; + int virDomainGetCPUStats(virDomainPtr domain, virTypedParameterPtr params, unsigned int nparams, -- 1.7.1

On 04/18/2012 05:14 AM, Hu Tao wrote:
Currently virDomainGetCPUStats gets total cpu usage, which consists of:
1. vcpu usage: the physical cpu time consumed by virtual cpu(s) of domain 2. hypervisor: `total cpu usage' - `vcpu usage'
The flag VIR_DOMAIN_CPU_STATS_F_VCPU is for getting vcpu usages. --- include/libvirt/libvirt.h.in | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-)
Missing documentation of what the new flag does in src/libvirt.c.
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 499dcd4..94c6abd 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -1355,6 +1355,16 @@ int virDomainGetState (virDomainPtr domain, */ #define VIR_DOMAIN_CPU_STATS_SYSTEMTIME "system_time"
+typedef enum { + /* virDomainModificationImpact and virTypedParameterFlags go + here. */
Why reserve space for virDomainModificationImpact? CPU stats are only available for a running VM, so there is no point in reserving a flag to query the stats of an inactive persistent domain. But I do agree that we must not conflict with virTypedParameterFlags.
+ + /* Additionally, these flags may be bitwise-OR'd in. These + flags should not override those of virDomainModificationImpact + and virTypedParameterFlags */ + VIR_DOMAIN_CPU_STATS_F_VCPU = 1 << 3, /* get vcpu stats */ +} virDomainGetCPUStatsFlags; + int virDomainGetCPUStats(virDomainPtr domain, virTypedParameterPtr params, unsigned int nparams,
-- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

--- src/qemu/qemu_driver.c | 152 ++++++++++++++++++++++++++++++++++++++++++----- src/util/cgroup.c | 4 +- tools/virsh.c | 17 ++++-- 3 files changed, 150 insertions(+), 23 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0d3b0bd..165b5f3 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12377,19 +12377,110 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group, return nparams; } +/* get the cpu time from cpuacct cgroup group, saving + cpu time value in cpu_time. caller is responsible + for freeing memory allocated for cpu_time. + return 0 on success, -1 otherwise */ +static int getVcpuPercpuStats(virCgroupPtr group, + unsigned long long **cpu_time, + unsigned int *num) +{ + int ret = -1; + unsigned long long *ptime = NULL; + char *buf = NULL; + char *pos; + unsigned long long tmp; + + if (virCgroupGetCpuacctPercpuUsage(group, &buf)) + goto error; + + pos = buf; + *num = 0; + while (virStrToLong_ull(pos, &pos, 10, &tmp) == 0) + (*num)++; + + if (*num > 0) { + int i; + + if (VIR_ALLOC_N(ptime, *num) < 0) + goto error; + + pos = buf; + for (i = 0; i < *num; i++) + virStrToLong_ull(pos, &pos, 10, ptime + i); + *cpu_time = ptime; + ret = 0; + } +error: + return ret; +} + +static int getSumVcpuPercpuStats(virCgroupPtr group, + unsigned int nvcpu, + unsigned long long **sum_cpu_time, + unsigned int *num) +{ + unsigned long long *cpu_time[nvcpu]; + unsigned int ncpu_time[nvcpu]; + unsigned int max = 0; + unsigned long long *tmp = NULL; + virCgroupPtr group_vcpu = NULL; + int ret = -1; + int i, j; + + for (i = 0; i < nvcpu; i++) { + ret = virCgroupForVcpu(group, i, &group_vcpu, 0); + if (ret < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cpuacct parse error")); + goto error; + } + ret = getVcpuPercpuStats(group_vcpu, + &cpu_time[i], + &ncpu_time[i]); + if (ret < 0) + goto error; + if (max < ncpu_time[i]) + max = ncpu_time[i]; + } + + if (max > 0) { + if (VIR_ALLOC_N(tmp, max) < 0) + goto error; + + memset(tmp, 0, sizeof(*tmp) * max); + for (i = 0; i < nvcpu; i++) { + for (j = 0; j < ncpu_time[i]; j++) + tmp[j] += cpu_time[i][j]; + } + *sum_cpu_time = tmp; + *num = max; + ret = 0; + } + + for (i = 0; i < nvcpu; i++) + VIR_FREE(cpu_time[i]); + +error: + return ret; +} + static int qemuDomainGetPercpuStats(virDomainPtr domain, + virDomainObjPtr vm, virCgroupPtr group, virTypedParameterPtr params, unsigned int nparams, int start_cpu, - unsigned int ncpus) + unsigned int ncpus, + unsigned int flags) { char *map = NULL; int rv = -1; int i, max_id; char *pos; char *buf = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; virTypedParameterPtr ent; int param_idx; @@ -12425,22 +12516,48 @@ qemuDomainGetPercpuStats(virDomainPtr domain, if (max_id - start_cpu > ncpus - 1) max_id = start_cpu + ncpus - 1; - for (i = 0; i <= max_id; i++) { + if (flags & VIR_DOMAIN_CPU_STATS_F_VCPU) { + unsigned long long *sum_cpu_time; unsigned long long cpu_time; + unsigned int n; - if (!map[i]) { - cpu_time = 0; - } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cpuacct parse error")); - goto cleanup; + getSumVcpuPercpuStats(group, + priv->nvcpupids, + &sum_cpu_time, + &n); + + for (i = 0; i <= max_id && i < n; i++) { + if (i < start_cpu) + continue; + + if (!map[i]) + cpu_time = 0; + else + cpu_time = sum_cpu_time[i]; + if (virTypedParameterAssign(¶ms[(i - start_cpu) * nparams + param_idx], + VIR_DOMAIN_CPU_STATS_CPUTIME, + VIR_TYPED_PARAM_ULLONG, + cpu_time) < 0) + goto cleanup; + } + } else { + for (i = 0; i <= max_id; i++) { + unsigned long long cpu_time; + + if (!map[i]) { + cpu_time = 0; + } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cpuacct parse error")); + goto cleanup; + } + if (i < start_cpu) + continue; + ent = ¶ms[ (i - start_cpu) * nparams + param_idx]; + if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME, + VIR_TYPED_PARAM_ULLONG, cpu_time) < 0) + goto cleanup; } - if (i < start_cpu) - continue; - ent = ¶ms[ (i - start_cpu) * nparams + param_idx]; - if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME, - VIR_TYPED_PARAM_ULLONG, cpu_time) < 0) - goto cleanup; } rv = param_idx + 1; cleanup: @@ -12464,7 +12581,8 @@ qemuDomainGetCPUStats(virDomainPtr domain, int ret = -1; bool isActive; - virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY, -1); + virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY | + VIR_DOMAIN_CPU_STATS_F_VCPU, -1); qemuDriverLock(driver); @@ -12497,8 +12615,8 @@ qemuDomainGetCPUStats(virDomainPtr domain, if (start_cpu == -1) ret = qemuDomainGetTotalcpuStats(group, params, nparams); else - ret = qemuDomainGetPercpuStats(domain, group, params, nparams, - start_cpu, ncpus); + ret = qemuDomainGetPercpuStats(domain, vm, group, params, nparams, + start_cpu, ncpus, flags); cleanup: virCgroupFree(&group); if (vm) diff --git a/src/util/cgroup.c b/src/util/cgroup.c index ad49bc2..5b32881 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -530,7 +530,9 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group, continue; /* We need to control cpu bandwidth for each vcpu now */ - if ((flags & VIR_CGROUP_VCPU) && (i != VIR_CGROUP_CONTROLLER_CPU)) { + if ((flags & VIR_CGROUP_VCPU) && + (i != VIR_CGROUP_CONTROLLER_CPU && + i != VIR_CGROUP_CONTROLLER_CPUACCT)) { /* treat it as unmounted and we can use virCgroupAddTask */ VIR_FREE(group->controllers[i].mountPoint); continue; diff --git a/tools/virsh.c b/tools/virsh.c index 5009b6b..c952dde 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -5563,6 +5563,7 @@ static const vshCmdOptDef opts_cpu_stats[] = { {"total", VSH_OT_BOOL, 0, N_("Show total statistics only")}, {"start", VSH_OT_INT, 0, N_("Show statistics from this CPU")}, {"count", VSH_OT_INT, 0, N_("Number of shown CPUs at most")}, + {"vcpu", VSH_OT_BOOL, 0, N_("Show vcpu statistics")}, {NULL, 0, 0, NULL}, }; @@ -5573,6 +5574,7 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) virTypedParameterPtr params = NULL; int i, j, pos, max_id, cpu = -1, show_count = -1, nparams; bool show_total = false, show_per_cpu = false; + unsigned int flags = 0; if (!vshConnectionUsability(ctl, ctl->conn)) return false; @@ -5580,6 +5582,11 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return false; + ; + if (vshCommandOptBool(cmd, "vcpu")) { + flags |= VIR_DOMAIN_CPU_STATS_F_VCPU; + } + show_total = vshCommandOptBool(cmd, "total"); if (vshCommandOptInt(cmd, "start", &cpu) > 0) show_per_cpu = true; @@ -5600,13 +5607,13 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) cpu = 0; /* get number of cpus on the node */ - if ((max_id = virDomainGetCPUStats(dom, NULL, 0, 0, 0, 0)) < 0) + if ((max_id = virDomainGetCPUStats(dom, NULL, 0, 0, 0, flags)) < 0) goto failed_stats; if (show_count < 0 || show_count > max_id) show_count = max_id; /* get percpu information */ - if ((nparams = virDomainGetCPUStats(dom, NULL, 0, 0, 1, 0)) < 0) + if ((nparams = virDomainGetCPUStats(dom, NULL, 0, 0, 1, flags)) < 0) goto failed_stats; if (!nparams) { @@ -5620,7 +5627,7 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) while (show_count) { int ncpus = MIN(show_count, 128); - if (virDomainGetCPUStats(dom, params, nparams, cpu, ncpus, 0) < 0) + if (virDomainGetCPUStats(dom, params, nparams, cpu, ncpus, flags) < 0) goto failed_stats; for (i = 0; i < ncpus; i++) { @@ -5654,7 +5661,7 @@ do_show_total: goto cleanup; /* get supported num of parameter for total statistics */ - if ((nparams = virDomainGetCPUStats(dom, NULL, 0, -1, 1, 0)) < 0) + if ((nparams = virDomainGetCPUStats(dom, NULL, 0, -1, 1, flags)) < 0) goto failed_stats; if (!nparams) { @@ -5666,7 +5673,7 @@ do_show_total: goto failed_params; /* passing start_cpu == -1 gives us domain's total status */ - if ((nparams = virDomainGetCPUStats(dom, params, nparams, -1, 1, 0)) < 0) + if ((nparams = virDomainGetCPUStats(dom, params, nparams, -1, 1, flags)) < 0) goto failed_stats; vshPrint(ctl, _("Total:\n")); -- 1.7.1

On 04/18/2012 05:14 AM, Hu Tao wrote:
--- src/qemu/qemu_driver.c | 152 ++++++++++++++++++++++++++++++++++++++++++----- src/util/cgroup.c | 4 +- tools/virsh.c | 17 ++++-- 3 files changed, 150 insertions(+), 23 deletions(-)
Commit message is too sparse. You are mixing a lot of things in one patch; personally, I would have moved the virsh.c change into patch 1, where you are defining the new API, leaving just the cgroup and qemu_driver changes as a single patch to implement the API. What does the _F_ in VIR_DOMAIN_CPU_STATS_F_VCPU stand for?
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0d3b0bd..165b5f3 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12377,19 +12377,110 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group, return nparams; }
+/* get the cpu time from cpuacct cgroup group, saving + cpu time value in cpu_time. caller is responsible + for freeing memory allocated for cpu_time. + return 0 on success, -1 otherwise */ +static int getVcpuPercpuStats(virCgroupPtr group, + unsigned long long **cpu_time, + unsigned int *num) +{ + int ret = -1; + unsigned long long *ptime = NULL; + char *buf = NULL; + char *pos; + unsigned long long tmp; + + if (virCgroupGetCpuacctPercpuUsage(group, &buf)) + goto error; + + pos = buf; + *num = 0; + while (virStrToLong_ull(pos, &pos, 10, &tmp) == 0) + (*num)++; + + if (*num > 0) { + int i; + + if (VIR_ALLOC_N(ptime, *num) < 0) + goto error;
Missing a virReportOOMError().
+ + pos = buf; + for (i = 0; i < *num; i++) + virStrToLong_ull(pos, &pos, 10, ptime + i);
No error checking?
+ *cpu_time = ptime; + ret = 0; + } +error: + return ret;
Leaks buf. How does the caller know how many entries were allocated into *cpu_time?
+} + +static int getSumVcpuPercpuStats(virCgroupPtr group, + unsigned int nvcpu, + unsigned long long **sum_cpu_time, + unsigned int *num) +{ + unsigned long long *cpu_time[nvcpu];
I'm not sure whether use of nvcpu as an array length in a local variable declaration is portable.
+ unsigned int ncpu_time[nvcpu]; + unsigned int max = 0; + unsigned long long *tmp = NULL; + virCgroupPtr group_vcpu = NULL; + int ret = -1; + int i, j; + + for (i = 0; i < nvcpu; i++) { + ret = virCgroupForVcpu(group, i, &group_vcpu, 0); + if (ret < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cpuacct parse error")); + goto error; + } + ret = getVcpuPercpuStats(group_vcpu, + &cpu_time[i], + &ncpu_time[i]); + if (ret < 0) + goto error; + if (max < ncpu_time[i]) + max = ncpu_time[i]; + } + + if (max > 0) { + if (VIR_ALLOC_N(tmp, max) < 0) + goto error;
If this allocation fails...
+ + memset(tmp, 0, sizeof(*tmp) * max);
Useless memset. VIR_ALLOC_N already guarantees zero-initialization.
+ for (i = 0; i < nvcpu; i++) { + for (j = 0; j < ncpu_time[i]; j++) + tmp[j] += cpu_time[i][j]; + } + *sum_cpu_time = tmp; + *num = max; + ret = 0; + } + + for (i = 0; i < nvcpu; i++) + VIR_FREE(cpu_time[i]); + +error: + return ret;
...then this leaks each cpu_time[i]. You need to move the error: label up by two lines.
+} + static int qemuDomainGetPercpuStats(virDomainPtr domain, + virDomainObjPtr vm, virCgroupPtr group, virTypedParameterPtr params, unsigned int nparams, int start_cpu, - unsigned int ncpus) + unsigned int ncpus, + unsigned int flags) { char *map = NULL; int rv = -1; int i, max_id; char *pos; char *buf = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; virTypedParameterPtr ent; int param_idx;
@@ -12425,22 +12516,48 @@ qemuDomainGetPercpuStats(virDomainPtr domain, if (max_id - start_cpu > ncpus - 1) max_id = start_cpu + ncpus - 1;
- for (i = 0; i <= max_id; i++) { + if (flags & VIR_DOMAIN_CPU_STATS_F_VCPU) { + unsigned long long *sum_cpu_time; unsigned long long cpu_time; + unsigned int n;
- if (!map[i]) { - cpu_time = 0; - } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cpuacct parse error")); - goto cleanup; + getSumVcpuPercpuStats(group, + priv->nvcpupids, + &sum_cpu_time, + &n);
No error checking?
+ + for (i = 0; i <= max_id && i < n; i++) {
Is max_id appropriate? Is it possible to simulate a VM with more vcpus than the host has physical CPUs? On the other hand, it is certainly possible to have fewer vcpus to the guest than there are physical cpus. Are we sure that things are lining up correctly? For example, suppose I have 8 CPUs, but hot-unplug cpu 1. Then the kernel gives us only 7 numbers, but we correctly expand those three numbers into our return of [cpu0, 0, cpu2, cpu3, cpu4, cpu5, cpu6, cpu7]. But if I am then running a guest with only 2 vcpus, and pinning that guest to run on just physical cpus 4-6, what numbers am I looking at in the cppuacct cgroup? Is it a case where the kernel will only show 4 numbers, because the cgroup is pinned to a subset of online processors? And how many values is our API returning - just 2 because the guest has just 2 vcpus? Does that mean that our return value is effectively the 2 element array, where each element is a sum of three values, as in [vcpu0 on cpu4 + vcpu0 on cpu5 + vcpu0 on cpu6, vcpu1 on cpu4 + vcpu1 on cpu5 + vcpu1 on cpu6]? That's why I think you need more documentation, to say _what_ cpuacct information you are grabbing, and _how_ that information will be consolidated into the return value, before I can even review whether your statistics gathering looks like it is matching that documentation.
@@ -5580,6 +5582,11 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return false;
+ ;
What's this line doing?
+ if (vshCommandOptBool(cmd, "vcpu")) { + flags |= VIR_DOMAIN_CPU_STATS_F_VCPU; + } + show_total = vshCommandOptBool(cmd, "total"); if (vshCommandOptInt(cmd, "start", &cpu) > 0) show_per_cpu = true;
-- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Apr 18, 2012 at 08:43:42AM -0600, Eric Blake wrote:
On 04/18/2012 05:14 AM, Hu Tao wrote:
--- src/qemu/qemu_driver.c | 152 ++++++++++++++++++++++++++++++++++++++++++----- src/util/cgroup.c | 4 +- tools/virsh.c | 17 ++++-- 3 files changed, 150 insertions(+), 23 deletions(-)
Commit message is too sparse. You are mixing a lot of things in one patch; personally, I would have moved the virsh.c change into patch 1, where you are defining the new API, leaving just the cgroup and qemu_driver changes as a single patch to implement the API.
What does the _F_ in VIR_DOMAIN_CPU_STATS_F_VCPU stand for?
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0d3b0bd..165b5f3 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12377,19 +12377,110 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group, return nparams; }
+/* get the cpu time from cpuacct cgroup group, saving + cpu time value in cpu_time. caller is responsible + for freeing memory allocated for cpu_time. + return 0 on success, -1 otherwise */ +static int getVcpuPercpuStats(virCgroupPtr group, + unsigned long long **cpu_time, + unsigned int *num) +{ + int ret = -1; + unsigned long long *ptime = NULL; + char *buf = NULL; + char *pos; + unsigned long long tmp; + + if (virCgroupGetCpuacctPercpuUsage(group, &buf)) + goto error; + + pos = buf; + *num = 0; + while (virStrToLong_ull(pos, &pos, 10, &tmp) == 0) + (*num)++; + + if (*num > 0) { + int i; + + if (VIR_ALLOC_N(ptime, *num) < 0) + goto error;
Missing a virReportOOMError().
+ + pos = buf; + for (i = 0; i < *num; i++) + virStrToLong_ull(pos, &pos, 10, ptime + i);
No error checking?
+ *cpu_time = ptime; + ret = 0; + } +error: + return ret;
Leaks buf. How does the caller know how many entries were allocated into *cpu_time?
The number is returned by parameter num.
+} + +static int getSumVcpuPercpuStats(virCgroupPtr group, + unsigned int nvcpu, + unsigned long long **sum_cpu_time, + unsigned int *num) +{ + unsigned long long *cpu_time[nvcpu];
I'm not sure whether use of nvcpu as an array length in a local variable declaration is portable.
+ unsigned int ncpu_time[nvcpu]; + unsigned int max = 0; + unsigned long long *tmp = NULL; + virCgroupPtr group_vcpu = NULL; + int ret = -1; + int i, j; + + for (i = 0; i < nvcpu; i++) { + ret = virCgroupForVcpu(group, i, &group_vcpu, 0); + if (ret < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cpuacct parse error")); + goto error; + } + ret = getVcpuPercpuStats(group_vcpu, + &cpu_time[i], + &ncpu_time[i]); + if (ret < 0) + goto error; + if (max < ncpu_time[i]) + max = ncpu_time[i]; + } + + if (max > 0) { + if (VIR_ALLOC_N(tmp, max) < 0) + goto error;
If this allocation fails...
+ + memset(tmp, 0, sizeof(*tmp) * max);
Useless memset. VIR_ALLOC_N already guarantees zero-initialization.
+ for (i = 0; i < nvcpu; i++) { + for (j = 0; j < ncpu_time[i]; j++) + tmp[j] += cpu_time[i][j]; + } + *sum_cpu_time = tmp; + *num = max; + ret = 0; + } + + for (i = 0; i < nvcpu; i++) + VIR_FREE(cpu_time[i]); + +error: + return ret;
...then this leaks each cpu_time[i]. You need to move the error: label up by two lines.
+} + static int qemuDomainGetPercpuStats(virDomainPtr domain, + virDomainObjPtr vm, virCgroupPtr group, virTypedParameterPtr params, unsigned int nparams, int start_cpu, - unsigned int ncpus) + unsigned int ncpus, + unsigned int flags) { char *map = NULL; int rv = -1; int i, max_id; char *pos; char *buf = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; virTypedParameterPtr ent; int param_idx;
@@ -12425,22 +12516,48 @@ qemuDomainGetPercpuStats(virDomainPtr domain, if (max_id - start_cpu > ncpus - 1) max_id = start_cpu + ncpus - 1;
- for (i = 0; i <= max_id; i++) { + if (flags & VIR_DOMAIN_CPU_STATS_F_VCPU) { + unsigned long long *sum_cpu_time; unsigned long long cpu_time; + unsigned int n;
- if (!map[i]) { - cpu_time = 0; - } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cpuacct parse error")); - goto cleanup; + getSumVcpuPercpuStats(group, + priv->nvcpupids, + &sum_cpu_time, + &n);
No error checking?
+ + for (i = 0; i <= max_id && i < n; i++) {
Is max_id appropriate? Is it possible to simulate a VM with more vcpus than the host has physical CPUs? On the other hand, it is certainly possible to have fewer vcpus to the guest than there are physical cpus. Are we sure that things are lining up correctly?
We are getting physical CPU usages (still from host persperctive), but consumed by vcpus, no matter how many vcpus there are in the domain.
For example, suppose I have 8 CPUs, but hot-unplug cpu 1. Then the kernel gives us only 7 numbers, but we correctly expand those three numbers into our return of [cpu0, 0, cpu2, cpu3, cpu4, cpu5, cpu6, cpu7]. But if I am then running a guest with only 2 vcpus, and pinning that guest to run on just physical cpus 4-6, what numbers am I looking at in the cppuacct cgroup? Is it a case where the kernel will only show 4 numbers, because the cgroup is pinned to a subset of online
you mean 3 numbers 'cause guest is pinned to pcpus 4-6?
processors? And how many values is our API returning - just 2 because the guest has just 2 vcpus? Does that mean that our return value is
No, but 3 values are returned, see below...
effectively the 2 element array, where each element is a sum of three values, as in [vcpu0 on cpu4 + vcpu0 on cpu5 + vcpu0 on cpu6, vcpu1 on cpu4 + vcpu1 on cpu5 + vcpu1 on cpu6]?
...a 3 elements array, where each element is a sum of 2 values: [vcpu0/cpu4 + vcpu1/cpu4, vcpu0/cpu5 + vcpu1/cpu5, vcpu0/cpu6 + vcpu1/cpu6].
That's why I think you need more documentation, to say _what_ cpuacct information you are grabbing, and _how_ that information will be consolidated into the return value, before I can even review whether your statistics gathering looks like it is matching that documentation.
I'll send a v2 with those functions documented detailedly. And thanks for your review. -- Thanks, Hu Tao

--- examples/get_cpu_stats.ml | 2 +- libvirt/libvirt.ml | 2 +- libvirt/libvirt.mli | 2 +- libvirt/libvirt_c_oneoffs.c | 9 +++++---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/get_cpu_stats.ml b/examples/get_cpu_stats.ml index 79d5c3c..3541720 100644 --- a/examples/get_cpu_stats.ml +++ b/examples/get_cpu_stats.ml @@ -25,7 +25,7 @@ let () = let stats = let dom = D.lookup_by_name conn domname in - D.get_cpu_stats dom nr_pcpus in + D.get_cpu_stats dom nr_pcpus 0 in Array.iteri ( fun n params -> diff --git a/libvirt/libvirt.ml b/libvirt/libvirt.ml index 53c5bb4..7a32071 100644 --- a/libvirt/libvirt.ml +++ b/libvirt/libvirt.ml @@ -417,7 +417,7 @@ struct external set_vcpus : [>`W] t -> int -> unit = "ocaml_libvirt_domain_set_vcpus" external pin_vcpu : [>`W] t -> int -> string -> unit = "ocaml_libvirt_domain_pin_vcpu" external get_vcpus : [>`R] t -> int -> int -> int * vcpu_info array * string = "ocaml_libvirt_domain_get_vcpus" - external get_cpu_stats : [>`R] t -> int -> typed_param list array = "ocaml_libvirt_domain_get_cpu_stats" + external get_cpu_stats : [>`R] t -> int -> int -> typed_param list array = "ocaml_libvirt_domain_get_cpu_stats" external get_max_vcpus : [>`R] t -> int = "ocaml_libvirt_domain_get_max_vcpus" external attach_device : [>`W] t -> xml -> unit = "ocaml_libvirt_domain_attach_device" external detach_device : [>`W] t -> xml -> unit = "ocaml_libvirt_domain_detach_device" diff --git a/libvirt/libvirt.mli b/libvirt/libvirt.mli index 0913a63..9782406 100644 --- a/libvirt/libvirt.mli +++ b/libvirt/libvirt.mli @@ -559,7 +559,7 @@ sig for a domain. See the libvirt documentation for details of the array and bitmap returned from this function. *) - val get_cpu_stats : [>`R] t -> int -> typed_param list array + val get_cpu_stats : [>`R] t -> int -> int -> typed_param list array (** [get_pcpu_stats dom nr_pcpu] returns the physical CPU stats for a domain. See the libvirt documentation for details. *) diff --git a/libvirt/libvirt_c_oneoffs.c b/libvirt/libvirt_c_oneoffs.c index 3d42b73..135b934 100644 --- a/libvirt/libvirt_c_oneoffs.c +++ b/libvirt/libvirt_c_oneoffs.c @@ -532,20 +532,21 @@ extern int virDomainGetCPUStats (virDomainPtr domain, #endif CAMLprim value -ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv) +ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv, value flagsv) { #ifdef HAVE_VIRDOMAINGETCPUSTATS - CAMLparam2 (domv, nr_pcpusv); + CAMLparam3 (domv, nr_pcpusv, flagsv); CAMLlocal5 (cpustats, param_head, param_node, typed_param, typed_param_value); CAMLlocal1 (v); virDomainPtr dom = Domain_val (domv); virConnectPtr conn = Connect_domv (domv); int nr_pcpus = Int_val (nr_pcpusv); + int flags = Int_val (flagsv); virTypedParameterPtr params; int r, cpu, ncpus, nparams, i, j, pos; /* get percpu information */ - NONBLOCKING (nparams = virDomainGetCPUStats(dom, NULL, 0, 0, 1, 0)); + NONBLOCKING (nparams = virDomainGetCPUStats(dom, NULL, 0, 0, 1, flags)); CHECK_ERROR (nparams < 0, conn, "virDomainGetCPUStats"); if ((params = malloc(sizeof(*params) * nparams * 128)) == NULL) @@ -556,7 +557,7 @@ ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv) while (cpu < nr_pcpus) { ncpus = nr_pcpus - cpu > 128 ? 128 : nr_pcpus - cpu; - NONBLOCKING (r = virDomainGetCPUStats(dom, params, nparams, cpu, ncpus, 0)); + NONBLOCKING (r = virDomainGetCPUStats(dom, params, nparams, cpu, ncpus, flags)); CHECK_ERROR (r < 0, conn, "virDomainGetCPUStats"); for (i = 0; i < ncpus; i++) { -- 1.7.1

On 04/18/2012 05:14 AM, Hu Tao wrote:
--- examples/get_cpu_stats.ml | 2 +- libvirt/libvirt.ml | 2 +- libvirt/libvirt.mli | 2 +- libvirt/libvirt_c_oneoffs.c | 9 +++++---- 4 files changed, 8 insertions(+), 7 deletions(-)
+++ b/libvirt/libvirt_c_oneoffs.c @@ -532,20 +532,21 @@ extern int virDomainGetCPUStats (virDomainPtr domain, #endif
CAMLprim value -ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv) +ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv, value flagsv)
This adds flags support to the per-cpu half of the libvirt API, but you are still missing ocaml bindings for the portion of the libvirt API that accesses the domain total stats. Also, I never understood why the caller has to know how many cpus they are passing in advance - shouldn't the bindings be smart enough to return an appropriately sized array that covers all possible cpus without the caller having to pre-specify that sizing? For comparison, in the python bindings, we expressed things as: if (!PyArg_ParseTuple(args, (char *)"OOi:virDomainGetCPUStats", &pyobj_domain, &totalbool, &flags)) so that a user passes in the domain; a choice of whether they want total stats (pass true to get a 1-element array back, corresponding to the C code passing start_cpu of -1), or per-cpu stats (pass fals to get an n-element array back, with each element mapping to a cpu, and with the array sized according to all cpus available); and finally a flags parameter. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Apr 18, 2012 at 09:18:45AM -0600, Eric Blake wrote:
On 04/18/2012 05:14 AM, Hu Tao wrote:
--- examples/get_cpu_stats.ml | 2 +- libvirt/libvirt.ml | 2 +- libvirt/libvirt.mli | 2 +- libvirt/libvirt_c_oneoffs.c | 9 +++++---- 4 files changed, 8 insertions(+), 7 deletions(-)
+++ b/libvirt/libvirt_c_oneoffs.c @@ -532,20 +532,21 @@ extern int virDomainGetCPUStats (virDomainPtr domain, #endif
CAMLprim value -ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv) +ocaml_libvirt_domain_get_cpu_stats (value domv, value nr_pcpusv, value flagsv)
This adds flags support to the per-cpu half of the libvirt API, but you are still missing ocaml bindings for the portion of the libvirt API that accesses the domain total stats. Also, I never understood why the caller has to know how many cpus they are passing in advance - shouldn't the bindings be smart enough to return an appropriately sized array that covers all possible cpus without the caller having to pre-specify that sizing?
For comparison, in the python bindings, we expressed things as:
if (!PyArg_ParseTuple(args, (char *)"OOi:virDomainGetCPUStats", &pyobj_domain, &totalbool, &flags))
so that a user passes in the domain; a choice of whether they want total stats (pass true to get a 1-element array back, corresponding to the C code passing start_cpu of -1), or per-cpu stats (pass fals to get an n-element array back, with each element mapping to a cpu, and with the array sized according to all cpus available); and finally a flags parameter.
It's certainly a unusual, and I agree that we should go with the model used by the Python bindings. FWIW it's probably because 'nr_pcpus' happens to be available conveniently as a value in virt-top at the point when 'get_cpu_stats' is called. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org

Before this patch, `virt-top -1' shows total cpu usages which euqal to `vcpu usages' + `hypervisor usages'. This patch adds another column for domains showing `vcpu usages'. An example is: PHYCPU %CPU example_domain 0 10.4 10.4 0.8 1 1.6 1.6 1.4 2 2.6 2.6 2.6 3 0.0 0.0 0.1 --- virt-top/virt_top.ml | 62 +++++++++++++++++++++++++++++++++++++------------- 1 files changed, 46 insertions(+), 16 deletions(-) diff --git a/virt-top/virt_top.ml b/virt-top/virt_top.ml index e2fe554..4591208 100644 --- a/virt-top/virt_top.ml +++ b/virt-top/virt_top.ml @@ -448,6 +448,7 @@ let collect, clear_pcpu_display_data = (* Save pcpu_usages structures across redraws too (only for pCPU display). *) let last_pcpu_usages = Hashtbl.create 13 in + let last_vcpu_usages = Hashtbl.create 13 in let clear_pcpu_display_data () = (* Clear out pcpu_usages used by PCPUDisplay display_mode @@ -652,7 +653,7 @@ let collect, clear_pcpu_display_data = (try let domid = rd.rd_domid in let maplen = C.cpumaplen nr_pcpus in - let cpu_stats = D.get_cpu_stats rd.rd_dom nr_pcpus in + let cpu_stats = D.get_cpu_stats rd.rd_dom nr_pcpus 0 in let rec find_usages_from_stats = function | ("cpu_time", D.TypedFieldUInt64 usages) :: _ -> usages | _ :: params -> find_usages_from_stats params @@ -669,11 +670,20 @@ let collect, clear_pcpu_display_data = (* Update last_pcpu_usages. *) Hashtbl.replace last_pcpu_usages domid pcpu_usages; - (match prev_pcpu_usages with - | Some prev_pcpu_usages + (* vcpu usages *) + let vcpu_stats = D.get_cpu_stats rd.rd_dom nr_pcpus 8 in + let vcpu_usages = Array.map find_usages_from_stats vcpu_stats in + let prev_vcpu_usages = + try Some (Hashtbl.find last_vcpu_usages domid) + with Not_found -> None in + Hashtbl.replace last_vcpu_usages domid vcpu_usages; + + (match prev_pcpu_usages, prev_vcpu_usages with + | Some prev_pcpu_usages, Some prev_vcpu_usages when Array.length prev_pcpu_usages = Array.length pcpu_usages -> - Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, - prev_pcpu_usages, cpumaps, maplen) + Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, + prev_pcpu_usages, vcpu_usages, prev_vcpu_usages, + cpumaps, maplen) | _ -> None (* ignore missing / unequal length prev_vcpu_infos *) ); with @@ -691,13 +701,24 @@ let collect, clear_pcpu_display_data = List.iteri ( fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, - prev_pcpu_usages, cpumaps, maplen) -> + prev_pcpu_usages, vcpu_usages, prev_vcpu_usages, + cpumaps, maplen) -> (* Which pCPUs can this dom run on? *) for p = 0 to Array.length pcpu_usages - 1 do pcpus.(p).(di) <- pcpu_usages.(p) -^ prev_pcpu_usages.(p) - done + done ) doms; + let vcpus = Array.make_matrix nr_pcpus nr_doms 0L in + List.iteri ( + fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, + prev_pcpu_usages, vcpu_usages, prev_vcpu_usages, + cpumaps, maplen) -> + for p = 0 to Array.length vcpu_usages - 1 do + vcpus.(p).(di) <- vcpu_usages.(p) -^ prev_vcpu_usages.(p) + done + ) doms; + (* Sum the CPU time used by each pCPU, for the %CPU column. *) let pcpus_cpu_time = Array.map ( fun row -> @@ -709,7 +730,7 @@ let collect, clear_pcpu_display_data = Int64.to_float !cpu_time ) pcpus in - Some (doms, pcpus, pcpus_cpu_time) + Some (doms, pcpus, vcpus, pcpus_cpu_time) ) else None in @@ -913,7 +934,7 @@ let redraw = loop domains_lineno doms | PCPUDisplay -> (*---------- Showing physical CPUs ----------*) - let doms, pcpus, pcpus_cpu_time = + let doms, pcpus, vcpus, pcpus_cpu_time = match pcpu_display with | Some p -> p | None -> failwith "internal error: no pcpu_display data" in @@ -922,9 +943,9 @@ let redraw = let dom_names = String.concat "" ( List.map ( - fun (_, name, _, _, _, _, _, _) -> + fun (_, name, _, _, _, _, _, _, _, _) -> let len = String.length name in - let width = max (len+1) 7 in + let width = max (len+1) 12 in pad width name ) doms ) in @@ -941,18 +962,27 @@ let redraw = addch ' '; List.iteri ( - fun di (domid, name, _, _, _, _, _, _) -> + fun di (domid, name, _, _, _, _, _, _, _, _) -> let t = pcpus.(p).(di) in + let tv = vcpus.(p).(di) in let len = String.length name in - let width = max (len+1) 7 in - let str = + let width = max (len+1) 12 in + let str_pcpu = if t <= 0L then "" else ( let t = Int64.to_float t in let percent = 100. *. t /. total_cpu_per_pcpu in - sprintf "%s " (Show.percent percent) + sprintf "%s" (Show.percent percent) ) in - addstr (pad width str); + let str_vcpu = + if tv <= 0L then "" + else ( + let tv = Int64.to_float tv in + let percent = 100. *. tv /. total_cpu_per_pcpu in + sprintf "%s" (Show.percent percent) + ) in + let str = sprintf "%s %s" str_pcpu str_vcpu in + addstr (pad width str); () ) doms ) pcpus; -- 1.7.1
participants (3)
-
Eric Blake
-
Hu Tao
-
Richard W.M. Jones