From: Hu Tao <hutao(a)cn.fujitsu.com>
This involves setting the cpuacct cgroup to a per-vcpu granularity,
as well as summing the each vcpu accounting into a common array.
Now that we are reading more than one cgroup file, we double-check
that cpus weren't hot-plugged between reads to invalidate our
summing.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
diff from v4: rewrite qemu code to use fewer malloc calls, fix
some logic bugs
src/qemu/qemu_driver.c | 123 ++++++++++++++++++++++++++++++++++++++++++++----
src/util/cgroup.c | 4 +-
2 files changed, 117 insertions(+), 10 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 0fd7de1..f6d0985 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -104,7 +104,7 @@
#define QEMU_NB_NUMA_PARAM 2
#define QEMU_NB_TOTAL_CPU_STAT_PARAM 3
-#define QEMU_NB_PER_CPU_STAT_PARAM 1
+#define QEMU_NB_PER_CPU_STAT_PARAM 2
#if HAVE_LINUX_KVM_H
# include <linux/kvm.h>
@@ -12563,8 +12563,69 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group,
return nparams;
}
+/* This function gets the sums of cpu time consumed by all vcpus.
+ * For example, if there are 4 physical cpus, and 2 vcpus in a domain,
+ * then for each vcpu, the cpuacct.usage_percpu looks like this:
+ * t0 t1 t2 t3
+ * and we have 2 groups of such data:
+ * v\p 0 1 2 3
+ * 0 t00 t01 t02 t03
+ * 1 t10 t11 t12 t13
+ * for each pcpu, the sum is cpu time consumed by all vcpus.
+ * s0 = t00 + t10
+ * s1 = t01 + t11
+ * s2 = t02 + t12
+ * s3 = t03 + t13
+ */
+static int
+getSumVcpuPercpuStats(virCgroupPtr group,
+ unsigned int nvcpu,
+ unsigned long long *sum_cpu_time,
+ unsigned int num)
+{
+ int ret = -1;
+ int i;
+ char *buf = NULL;
+ virCgroupPtr group_vcpu = NULL;
+
+ for (i = 0; i < nvcpu; i++) {
+ char *pos;
+ unsigned long long tmp;
+ int j;
+
+ if (virCgroupForVcpu(group, i, &group_vcpu, 0) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("error accessing cgroup cpuacct for vcpu"));
+ goto cleanup;
+ }
+
+ if (virCgroupGetCpuacctPercpuUsage(group, &buf) < 0)
+ goto cleanup;
+
+ pos = buf;
+ for (j = 0; j < num; j++) {
+ if (virStrToLong_ull(pos, &pos, 10, &tmp) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("cpuacct parse error"));
+ goto cleanup;
+ }
+ sum_cpu_time[j] += tmp;
+ }
+
+ virCgroupFree(&group_vcpu);
+ VIR_FREE(buf);
+ }
+
+ ret = 0;
+cleanup:
+ virCgroupFree(&group_vcpu);
+ VIR_FREE(buf);
+ return ret;
+}
+
static int
qemuDomainGetPercpuStats(virDomainPtr domain,
+ virDomainObjPtr vm,
virCgroupPtr group,
virTypedParameterPtr params,
unsigned int nparams,
@@ -12572,20 +12633,24 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
unsigned int ncpus)
{
char *map = NULL;
+ char *map2 = NULL;
int rv = -1;
int i, max_id;
char *pos;
char *buf = NULL;
+ unsigned long long *sum_cpu_time = NULL;
+ unsigned long long *sum_cpu_pos;
+ unsigned int n = 0;
+ qemuDomainObjPrivatePtr priv = vm->privateData;
virTypedParameterPtr ent;
int param_idx;
+ unsigned long long cpu_time;
/* return the number of supported params */
if (nparams == 0 && ncpus != 0)
- return QEMU_NB_PER_CPU_STAT_PARAM; /* only cpu_time is supported */
+ return QEMU_NB_PER_CPU_STAT_PARAM;
- /* return percpu cputime in index 0 */
- param_idx = 0;
- /* to parse account file, we need "present" cpu map */
+ /* To parse account file, we need "present" cpu map. */
map = nodeGetCPUmap(domain->conn, &max_id, "present");
if (!map)
return rv;
@@ -12608,30 +12673,70 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
pos = buf;
memset(params, 0, nparams * ncpus);
+ /* return percpu cputime in index 0 */
+ param_idx = 0;
+
if (max_id - start_cpu > ncpus - 1)
max_id = start_cpu + ncpus - 1;
for (i = 0; i <= max_id; i++) {
- unsigned long long cpu_time;
-
if (!map[i]) {
cpu_time = 0;
} else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) {
qemuReportError(VIR_ERR_INTERNAL_ERROR,
_("cpuacct parse error"));
goto cleanup;
+ } else {
+ n++;
}
if (i < start_cpu)
continue;
- ent = ¶ms[ (i - start_cpu) * nparams + param_idx];
+ ent = ¶ms[(i - start_cpu) * nparams + param_idx];
if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME,
VIR_TYPED_PARAM_ULLONG, cpu_time) < 0)
goto cleanup;
}
+
+ /* return percpu vcputime in index 1 */
+ if (++param_idx >= nparams) {
+ rv = nparams;
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC_N(sum_cpu_time, n) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ if (getSumVcpuPercpuStats(group, priv->nvcpupids, sum_cpu_time, n) < 0)
+ goto cleanup;
+
+ /* Check that the mapping of online cpus didn't change mid-parse. */
+ map2 = nodeGetCPUmap(domain->conn, &max_id, "present");
+ if (!map2 || memcmp(map, map2, VIR_DOMAIN_CPUMASK_LEN) != 0)
+ goto cleanup;
+
+ sum_cpu_pos = sum_cpu_time;
+ for (i = 0; i <= max_id; i++) {
+ if (!map[i])
+ cpu_time = 0;
+ else
+ cpu_time = *(sum_cpu_pos++);
+ if (i < start_cpu)
+ continue;
+ if (virTypedParameterAssign(¶ms[(i - start_cpu) * nparams +
+ param_idx],
+ VIR_DOMAIN_CPU_STATS_VCPUTIME,
+ VIR_TYPED_PARAM_ULLONG,
+ cpu_time) < 0)
+ goto cleanup;
+ }
+
rv = param_idx + 1;
cleanup:
+ VIR_FREE(sum_cpu_time);
VIR_FREE(buf);
VIR_FREE(map);
+ VIR_FREE(map2);
return rv;
}
@@ -12683,7 +12788,7 @@ qemuDomainGetCPUStats(virDomainPtr domain,
if (start_cpu == -1)
ret = qemuDomainGetTotalcpuStats(group, params, nparams);
else
- ret = qemuDomainGetPercpuStats(domain, group, params, nparams,
+ ret = qemuDomainGetPercpuStats(domain, vm, group, params, nparams,
start_cpu, ncpus);
cleanup:
virCgroupFree(&group);
diff --git a/src/util/cgroup.c b/src/util/cgroup.c
index ad49bc2..5b32881 100644
--- a/src/util/cgroup.c
+++ b/src/util/cgroup.c
@@ -530,7 +530,9 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr
group,
continue;
/* We need to control cpu bandwidth for each vcpu now */
- if ((flags & VIR_CGROUP_VCPU) && (i != VIR_CGROUP_CONTROLLER_CPU)) {
+ if ((flags & VIR_CGROUP_VCPU) &&
+ (i != VIR_CGROUP_CONTROLLER_CPU &&
+ i != VIR_CGROUP_CONTROLLER_CPUACCT)) {
/* treat it as unmounted and we can use virCgroupAddTask */
VIR_FREE(group->controllers[i].mountPoint);
continue;
--
1.7.7.6