Re: [libvirt] [PATCH 2/2] Adds support to VIR_DOMAIN_CPU_STATS_F_VCPU in qemu_driver.

18 Apr 2012

On 04/18/2012 05:14 AM, Hu Tao wrote:
...
---
 src/qemu/qemu_driver.c |  152 ++++++++++++++++++++++++++++++++++++++++++-----
 src/util/cgroup.c      |    4 +-
 tools/virsh.c          |   17 ++++--
 3 files changed, 150 insertions(+), 23 deletions(-)
Commit message is too sparse.  You are mixing a lot of things in one
patch; personally, I would have moved the virsh.c change into patch 1,
where you are defining the new API, leaving just the cgroup and
qemu_driver changes as a single patch to implement the API.

What does the _F_ in VIR_DOMAIN_CPU_STATS_F_VCPU stand for?
...

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 0d3b0bd..165b5f3 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -12377,19 +12377,110 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group,
     return nparams;
 }
+/* get the cpu time from cpuacct cgroup group, saving
+   cpu time value in cpu_time. caller is responsible
+   for freeing memory allocated for cpu_time.
+   return 0 on success, -1 otherwise */
+static int getVcpuPercpuStats(virCgroupPtr group,
+                              unsigned long long **cpu_time,
+                              unsigned int *num)
+{
+    int ret = -1;
+    unsigned long long *ptime = NULL;
+    char *buf = NULL;
+    char *pos;
+    unsigned long long tmp;
+
+    if (virCgroupGetCpuacctPercpuUsage(group, &buf))
+        goto error;
+
+    pos = buf;
+    *num = 0;
+    while (virStrToLong_ull(pos, &pos, 10, &tmp) == 0)
+        (*num)++;
+
+    if (*num > 0) {
+        int i;
+
+        if (VIR_ALLOC_N(ptime, *num) < 0)
+            goto error;
Missing a virReportOOMError().
...
+
+        pos = buf;
+        for (i = 0; i < *num; i++)
+            virStrToLong_ull(pos, &pos, 10, ptime + i);
No error checking?
...
+        *cpu_time = ptime;
+        ret = 0;
+    }
+error:
+    return ret;
Leaks buf.  How does the caller know how many entries were allocated
into *cpu_time?
...
+}
+
+static int getSumVcpuPercpuStats(virCgroupPtr group,
+                                 unsigned int nvcpu,
+                                 unsigned long long **sum_cpu_time,
+                                 unsigned int *num)
+{
+    unsigned long long *cpu_time[nvcpu];
I'm not sure whether use of nvcpu as an array length in a local variable
declaration is portable.
...
+    unsigned int ncpu_time[nvcpu];
+    unsigned int max = 0;
+    unsigned long long *tmp = NULL;
+    virCgroupPtr group_vcpu = NULL;
+    int ret = -1;
+    int i, j;
+
+    for (i = 0; i < nvcpu; i++) {
+        ret = virCgroupForVcpu(group, i, &group_vcpu, 0);
+        if (ret < 0) {
+            qemuReportError(VIR_ERR_INTERNAL_ERROR,
+                            _("cpuacct parse error"));
+            goto error;
+        }
+        ret = getVcpuPercpuStats(group_vcpu,
+                                 &cpu_time[i],
+                                 &ncpu_time[i]);
+        if (ret < 0)
+            goto error;
+        if (max < ncpu_time[i])
+            max = ncpu_time[i];
+    }
+
+    if (max > 0) {
+        if (VIR_ALLOC_N(tmp, max) < 0)
+            goto error;
If this allocation fails...
...
+
+        memset(tmp, 0, sizeof(*tmp) * max);
Useless memset.  VIR_ALLOC_N already guarantees zero-initialization.
...
+        for (i = 0; i < nvcpu; i++) {
+            for (j = 0; j < ncpu_time[i]; j++)
+                tmp[j] += cpu_time[i][j];
+        }
+        *sum_cpu_time = tmp;
+        *num = max;
+        ret = 0;
+    }
+
+    for (i = 0; i < nvcpu; i++)
+        VIR_FREE(cpu_time[i]);
+
+error:
+    return ret;
...then this leaks each cpu_time[i].  You need to move the error: label
up by two lines.
...
+}
+
 static int
 qemuDomainGetPercpuStats(virDomainPtr domain,
+                         virDomainObjPtr vm,
                          virCgroupPtr group,
                          virTypedParameterPtr params,
                          unsigned int nparams,
                          int start_cpu,
-                         unsigned int ncpus)
+                         unsigned int ncpus,
+                         unsigned int flags)
 {
     char *map = NULL;
     int rv = -1;
     int i, max_id;
     char *pos;
     char *buf = NULL;
+    qemuDomainObjPrivatePtr priv = vm->privateData;
     virTypedParameterPtr ent;
     int param_idx;
@@ -12425,22 +12516,48 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
     if (max_id - start_cpu > ncpus - 1)
         max_id = start_cpu + ncpus - 1;
-    for (i = 0; i <= max_id; i++) {
+    if (flags & VIR_DOMAIN_CPU_STATS_F_VCPU) {
+        unsigned long long *sum_cpu_time;
         unsigned long long cpu_time;
+        unsigned int n;
-        if (!map[i]) {
-            cpu_time = 0;
-        } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) {
-            qemuReportError(VIR_ERR_INTERNAL_ERROR,
-                            _("cpuacct parse error"));
-            goto cleanup;
+        getSumVcpuPercpuStats(group,
+                              priv->nvcpupids,
+                              &sum_cpu_time,
+                              &n);
No error checking?
...
+
+        for (i = 0; i <= max_id && i < n; i++) {
Is max_id appropriate?  Is it possible to simulate a VM with more vcpus
than the host has physical CPUs?  On the other hand, it is certainly
possible to have fewer vcpus to the guest than there are physical cpus.
 Are we sure that things are lining up correctly?

For example, suppose I have 8 CPUs, but hot-unplug cpu 1.  Then the
kernel gives us only 7 numbers, but we correctly expand those three
numbers into our return of [cpu0, 0, cpu2, cpu3, cpu4, cpu5, cpu6,
cpu7].  But if I am then running a guest with only 2 vcpus, and pinning
that guest to run on just physical cpus 4-6, what numbers am I looking
at in the cppuacct cgroup?  Is it a case where the kernel will only show
4 numbers, because the cgroup is pinned to a subset of online
processors?  And how many values is our API returning - just 2 because
the guest has just 2 vcpus?  Does that mean that our return value is
effectively the 2 element array, where each element is a sum of three
values, as in [vcpu0 on cpu4 + vcpu0 on cpu5 + vcpu0 on cpu6, vcpu1 on
cpu4 + vcpu1 on cpu5 + vcpu1 on cpu6]?

That's why I think you need more documentation, to say _what_ cpuacct
information you are grabbing, and _how_ that information will be
consolidated into the return value, before I can even review whether
your statistics gathering looks like it is matching that documentation.
...
@@ -5580,6 +5582,11 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd)
     if (!(dom = vshCommandOptDomain(ctl, cmd, NULL)))
         return false;
+    ;
What's this line doing?
...
+    if (vshCommandOptBool(cmd, "vcpu")) {
+        flags |= VIR_DOMAIN_CPU_STATS_F_VCPU;
+    }
+
     show_total = vshCommandOptBool(cmd, "total");
     if (vshCommandOptInt(cmd, "start", &cpu) > 0)
         show_per_cpu = true;
-- 
Eric Blake   eblake@redhat.com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org