From: Tang Chen <tangchen(a)cn.fujitsu.com>
vcpu threads pin are implemented using sched_setaffinity(), but
not controlled by cgroup. This patch does the following things:
1) enable cpuset cgroup
2) reflect all the vcpu threads pin info to cgroup
Signed-off-by: Tang Chen <tangchen(a)cn.fujitsu.com>
Signed-off-by: Hu Tao <hutao(a)cn.fujitsu.com>
---
src/libvirt_private.syms | 2 ++
src/qemu/qemu_cgroup.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_cgroup.h | 2 ++
src/qemu/qemu_driver.c | 44 ++++++++++++++++++++++++++++++++++++--------
src/util/cgroup.c | 35 ++++++++++++++++++++++++++++++++++-
src/util/cgroup.h | 3 +++
6 files changed, 121 insertions(+), 9 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 0511a0f..ae7f124 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -82,6 +82,7 @@ virCgroupGetCpuShares;
virCgroupGetCpuacctPercpuUsage;
virCgroupGetCpuacctStat;
virCgroupGetCpuacctUsage;
+virCgroupGetCpusetCpus;
virCgroupGetCpusetMems;
virCgroupGetFreezerState;
virCgroupGetMemSwapHardLimit;
@@ -100,6 +101,7 @@ virCgroupSetBlkioWeight;
virCgroupSetCpuCfsPeriod;
virCgroupSetCpuCfsQuota;
virCgroupSetCpuShares;
+virCgroupSetCpusetCpus;
virCgroupSetCpusetMems;
virCgroupSetFreezerState;
virCgroupSetMemSwapHardLimit;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 9400efe..ceca4de 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -479,11 +479,49 @@ cleanup:
return -1;
}
+int qemuSetupCgroupVcpuPin(virCgroupPtr cgroup, virDomainDefPtr def,
+ int vcpuid)
+{
+ int i, rc = 0;
+ char *new_cpus = NULL;
+
+ if (vcpuid < 0 || vcpuid >= def->vcpus) {
+ virReportSystemError(EINVAL,
+ _("invalid vcpuid: %d"), vcpuid);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < def->cputune.nvcpupin; i++) {
+ if (vcpuid == def->cputune.vcpupin[i]->vcpuid) {
+ new_cpus = virDomainCpuSetFormat(def->cputune.vcpupin[i]->cpumask,
+ VIR_DOMAIN_CPUMASK_LEN);
+ if (!new_cpus) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to convert cpu mask"));
+ rc = -1;
+ goto cleanup;
+ }
+ rc = virCgroupSetCpusetCpus(cgroup, new_cpus);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ "%s",
+ _("Unable to set cpuset.cpus"));
+ goto cleanup;
+ }
+ }
+ }
+
+cleanup:
+ VIR_FREE(new_cpus);
+ return rc;
+}
+
int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
{
virCgroupPtr cgroup = NULL;
virCgroupPtr cgroup_vcpu = NULL;
qemuDomainObjPrivatePtr priv = vm->privateData;
+ virDomainDefPtr def = vm->def;
int rc;
unsigned int i;
unsigned long long period = vm->def->cputune.period;
@@ -555,6 +593,12 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver,
virDomainObjPtr vm)
}
}
+ /* Set vcpupin in cgroup if vcpupin xml is provided */
+ if (def->cputune.nvcpupin &&
+ qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPUSET) &&
+ qemuSetupCgroupVcpuPin(cgroup_vcpu, def, i) < 0)
+ goto cleanup;
+
virCgroupFree(&cgroup_vcpu);
}
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 3380ee2..62ec953 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -53,6 +53,8 @@ int qemuSetupCgroup(struct qemud_driver *driver,
int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup,
unsigned long long period,
long long quota);
+int qemuSetupCgroupVcpuPin(virCgroupPtr cgroup, virDomainDefPtr def,
+ int vcpuid);
int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm);
int qemuSetupCgroupForHypervisor(struct qemud_driver *driver,
virDomainObjPtr vm);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 270e4dd..2da13a4 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -3613,6 +3613,8 @@ qemudDomainPinVcpuFlags(virDomainPtr dom,
struct qemud_driver *driver = dom->conn->privateData;
virDomainObjPtr vm;
virDomainDefPtr persistentDef = NULL;
+ virCgroupPtr cgroup_dom = NULL;
+ virCgroupPtr cgroup_vcpu = NULL;
int maxcpu, hostcpus;
virNodeInfo nodeinfo;
int ret = -1;
@@ -3667,9 +3669,38 @@ qemudDomainPinVcpuFlags(virDomainPtr dom,
if (flags & VIR_DOMAIN_AFFECT_LIVE) {
if (priv->vcpupids != NULL) {
+ /* Add config to vm->def first, because qemuSetupCgroupVcpuPin needs it.
*/
+ if (virDomainVcpuPinAdd(vm->def, cpumap, maplen, vcpu) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to update or add vcpupin xml of "
+ "a running domain"));
+ goto cleanup;
+ }
+
+ /* Configure the corresponding cpuset cgroup before set affinity. */
+ if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPUSET)) {
+ if (virCgroupForDomain(driver->cgroup, vm->def->name,
&cgroup_dom, 0) == 0 &&
+ virCgroupForVcpu(cgroup_dom, vcpu, &cgroup_vcpu, 0) == 0
&&
+ qemuSetupCgroupVcpuPin(cgroup_vcpu, vm->def, vcpu) < 0) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("failed to set cpuset.cpus in cgroup"
+ " for vcpu %d"), vcpu);
+ goto cleanup;
+ }
+ } else {
+ /* Here, we should go on because even if cgroup is not active,
+ * we can still use virProcessInfoSetAffinity.
+ */
+ VIR_WARN("cpuset cgroup is not active");
+ }
+
if (virProcessInfoSetAffinity(priv->vcpupids[vcpu],
- cpumap, maplen, maxcpu) < 0)
+ cpumap, maplen, maxcpu) < 0) {
+ virReportError(VIR_ERR_SYSTEM_ERROR,
+ _("failed to set cpu affinity for vcpu %d"),
+ vcpu);
goto cleanup;
+ }
} else {
virReportError(VIR_ERR_OPERATION_INVALID,
"%s", _("cpu affinity is not
supported"));
@@ -3683,13 +3714,6 @@ qemudDomainPinVcpuFlags(virDomainPtr dom,
"a running domain"));
goto cleanup;
}
- } else {
- if (virDomainVcpuPinAdd(vm->def, cpumap, maplen, vcpu) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("failed to update or add vcpupin xml of "
- "a running domain"));
- goto cleanup;
- }
}
if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
@@ -3721,6 +3745,10 @@ qemudDomainPinVcpuFlags(virDomainPtr dom,
ret = 0;
cleanup:
+ if (cgroup_vcpu)
+ virCgroupFree(&cgroup_vcpu);
+ if (cgroup_dom)
+ virCgroupFree(&cgroup_dom);
if (vm)
virDomainObjUnlock(vm);
return ret;
diff --git a/src/util/cgroup.c b/src/util/cgroup.c
index 23837fe..c35828c 100644
--- a/src/util/cgroup.c
+++ b/src/util/cgroup.c
@@ -544,7 +544,8 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr
group,
/* We need to control cpu bandwidth for each vcpu now */
if ((flags & VIR_CGROUP_VCPU) &&
(i != VIR_CGROUP_CONTROLLER_CPU &&
- i != VIR_CGROUP_CONTROLLER_CPUACCT)) {
+ i != VIR_CGROUP_CONTROLLER_CPUACCT &&
+ i != VIR_CGROUP_CONTROLLER_CPUSET)) {
/* treat it as unmounted and we can use virCgroupAddTask */
VIR_FREE(group->controllers[i].mountPoint);
continue;
@@ -1405,6 +1406,38 @@ int virCgroupGetCpusetMems(virCgroupPtr group, char **mems)
}
/**
+ * virCgroupSetCpusetCpus:
+ *
+ * @group: The cgroup to set cpuset.cpus for
+ * @cpus: the cpus to set
+ *
+ * Retuens: 0 on success
+ */
+int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus)
+{
+ return virCgroupSetValueStr(group,
+ VIR_CGROUP_CONTROLLER_CPUSET,
+ "cpuset.cpus",
+ cpus);
+}
+
+/**
+ * virCgroupGetCpusetCpus:
+ *
+ * @group: The cgroup to get cpuset.cpus for
+ * @cpus: the cpus to get
+ *
+ * Retuens: 0 on success
+ */
+int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus)
+{
+ return virCgroupGetValueStr(group,
+ VIR_CGROUP_CONTROLLER_CPUSET,
+ "cpuset.cpus",
+ cpus);
+}
+
+/**
* virCgroupDenyAllDevices:
*
* @group: The cgroup to deny all permissions, for all devices
diff --git a/src/util/cgroup.h b/src/util/cgroup.h
index 29c52c1..1409df9 100644
--- a/src/util/cgroup.h
+++ b/src/util/cgroup.h
@@ -151,6 +151,9 @@ int virCgroupGetFreezerState(virCgroupPtr group, char **state);
int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems);
int virCgroupGetCpusetMems(virCgroupPtr group, char **mems);
+int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus);
+int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus);
+
int virCgroupRemove(virCgroupPtr group);
void virCgroupFree(virCgroupPtr *group);
--
1.7.10.2