Instead of setting the value of cpuset.mems once when the domain starts
and then re-calculating the value every time we need to change the child
cgroup values, leave the cgroup alone and rather set the child data
every time there is new cgroup created. We don't leave any task in the
parent group anyway. This will ease both current and future code.
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
src/qemu/qemu_cgroup.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_driver.c | 59 +++++++++++++++-----------------------------
2 files changed, 85 insertions(+), 41 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index a0370bc..1e383c4 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -35,6 +35,7 @@
#include "virstring.h"
#include "virfile.h"
#include "virtypedparam.h"
+#include "virnuma.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -629,8 +630,7 @@ qemuSetupCpusetMems(virDomainObjPtr vm)
if (mem_mask)
if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
- virCgroupSetCpusetMems(cgroup_temp, mem_mask) < 0 ||
- virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
+ virCgroupSetCpusetMems(cgroup_temp, mem_mask) < 0)
goto cleanup;
ret = 0;
@@ -785,6 +785,39 @@ qemuInitCgroup(virQEMUDriverPtr driver,
return ret;
}
+static void
+qemuRestoreCgroupState(virDomainObjPtr vm)
+{
+ char *mem_mask;
+ int empty = -1;
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ virBitmapPtr all_nodes;
+
+ if (!(all_nodes = virNumaGetHostNodeset()))
+ goto error;
+
+ if (!(mem_mask = virBitmapFormat(all_nodes)))
+ goto error;
+
+ if ((empty = virCgroupHasEmptyTasks(priv->cgroup,
+ VIR_CGROUP_CONTROLLER_CPUSET)) < 0)
+
+ if (!empty)
+ goto error;
+
+ if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
+ goto error;
+
+ cleanup:
+ VIR_FREE(mem_mask);
+ virBitmapFree(all_nodes);
+ return;
+
+ error:
+ virResetLastError();
+ VIR_DEBUG("Couldn't restore cgroups to meaningful state");
+ goto cleanup;
+}
int
qemuConnectCgroup(virQEMUDriverPtr driver,
@@ -812,6 +845,8 @@ qemuConnectCgroup(virQEMUDriverPtr driver,
&priv->cgroup) < 0)
goto cleanup;
+ qemuRestoreCgroupState(vm);
+
done:
ret = 0;
cleanup:
@@ -961,6 +996,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
size_t i, j;
unsigned long long period = vm->def->cputune.period;
long long quota = vm->def->cputune.quota;
+ char *mem_mask = NULL;
if ((period || quota) &&
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
@@ -992,6 +1028,13 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
return 0;
}
+ if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
+ VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
+ priv->autoNodeset,
+ &mem_mask, -1) < 0)
+ goto cleanup;
+
for (i = 0; i < priv->nvcpupids; i++) {
if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
goto cleanup;
@@ -1000,6 +1043,10 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
if (virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]) < 0)
goto cleanup;
+ if (mem_mask &&
+ virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
+ goto cleanup;
+
if (period || quota) {
if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
goto cleanup;
@@ -1025,6 +1072,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
virCgroupFree(&cgroup_vcpu);
}
+ VIR_FREE(mem_mask);
return 0;
@@ -1033,6 +1081,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm)
virCgroupRemove(cgroup_vcpu);
virCgroupFree(&cgroup_vcpu);
}
+ VIR_FREE(mem_mask);
return -1;
}
@@ -1121,6 +1170,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
size_t i, j;
unsigned long long period = vm->def->cputune.period;
long long quota = vm->def->cputune.quota;
+ char *mem_mask = NULL;
if ((period || quota) &&
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
@@ -1149,6 +1199,13 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
return 0;
}
+ if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
+ VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
+ priv->autoNodeset,
+ &mem_mask, -1) < 0)
+ goto cleanup;
+
for (i = 0; i < priv->niothreadpids; i++) {
/* IOThreads are numbered 1..n, although the array is 0..n-1,
* so we will account for that here
@@ -1166,6 +1223,10 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
goto cleanup;
}
+ if (mem_mask &&
+ virCgroupSetCpusetMems(cgroup_iothread, mem_mask) < 0)
+ goto cleanup;
+
/* Set iothreadpin in cgroup if iothreadpin xml is provided */
if (virCgroupHasController(priv->cgroup,
VIR_CGROUP_CONTROLLER_CPUSET)) {
@@ -1188,6 +1249,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
virCgroupFree(&cgroup_iothread);
}
+ VIR_FREE(mem_mask);
return 0;
@@ -1196,6 +1258,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
virCgroupRemove(cgroup_iothread);
virCgroupFree(&cgroup_iothread);
}
+ VIR_FREE(mem_mask);
return -1;
}
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index df3ba6d..bfa62c4 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4427,6 +4427,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
pid_t *cpupids = NULL;
int ncpupids;
virCgroupPtr cgroup_vcpu = NULL;
+ char *mem_mask = NULL;
qemuDomainObjEnterMonitor(driver, vm);
@@ -4490,6 +4491,13 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
goto cleanup;
}
+ if (virDomainNumatuneGetMode(vm->def->numatune, -1) ==
+ VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numatune,
+ priv->autoNodeset,
+ &mem_mask, -1) < 0)
+ goto cleanup;
+
if (nvcpus > oldvcpus) {
for (i = oldvcpus; i < nvcpus; i++) {
if (priv->cgroup) {
@@ -4498,6 +4506,10 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
goto cleanup;
+ if (mem_mask &&
+ virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
+ goto cleanup;
+
/* Add vcpu thread to the cgroup */
rv = virCgroupAddTask(cgroup_vcpu, cpupids[i]);
if (rv < 0) {
@@ -4507,6 +4519,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
virCgroupRemove(cgroup_vcpu);
goto cleanup;
}
+
}
/* Inherit def->cpuset */
@@ -4579,6 +4592,7 @@ static int qemuDomainHotplugVcpus(virQEMUDriverPtr driver,
qemuDomainObjExitMonitor(driver, vm);
vm->def->vcpus = vcpus;
VIR_FREE(cpupids);
+ VIR_FREE(mem_mask);
virDomainAuditVcpu(vm, oldvcpus, nvcpus, "update", rc == 1);
if (cgroup_vcpu)
virCgroupFree(&cgroup_vcpu);
@@ -9215,11 +9229,9 @@ qemuDomainGetMemoryParameters(virDomainPtr dom,
static int
qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
- virCapsPtr caps,
virBitmapPtr nodeset)
{
virCgroupPtr cgroup_temp = NULL;
- virBitmapPtr temp_nodeset = NULL;
qemuDomainObjPrivatePtr priv = vm->privateData;
char *nodeset_str = NULL;
size_t i = 0;
@@ -9233,39 +9245,15 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
goto cleanup;
}
- /* Get existing nodeset values */
- if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
- virBitmapParse(nodeset_str, 0, &temp_nodeset,
- VIR_DOMAIN_CPUMASK_LEN) < 0)
- goto cleanup;
- VIR_FREE(nodeset_str);
-
- for (i = 0; i < caps->host.nnumaCell; i++) {
- bool result;
- virCapsHostNUMACellPtr cell = caps->host.numaCell[i];
- if (virBitmapGetBit(nodeset, cell->num, &result) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Failed to get cpuset bit values"));
- goto cleanup;
- }
- if (result && (virBitmapSetBit(temp_nodeset, cell->num) < 0)) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Failed to set temporary cpuset bit values"));
- goto cleanup;
- }
- }
-
- if (!(nodeset_str = virBitmapFormat(temp_nodeset)))
- goto cleanup;
-
- if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
- goto cleanup;
- VIR_FREE(nodeset_str);
-
/* Ensure the cpuset string is formatted before passing to cgroup */
if (!(nodeset_str = virBitmapFormat(nodeset)))
goto cleanup;
+ if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
+ virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0)
+ goto cleanup;
+ virCgroupFree(&cgroup_temp);
+
for (i = 0; i < priv->nvcpupids; i++) {
if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 ||
virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0)
@@ -9273,11 +9261,6 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
virCgroupFree(&cgroup_temp);
}
- if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
- virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0 ||
- virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0)
- goto cleanup;
-
for (i = 0; i < priv->niothreadpids; i++) {
if (virCgroupNewIOThread(priv->cgroup, i + 1, false,
&cgroup_temp) < 0 ||
@@ -9286,11 +9269,9 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
virCgroupFree(&cgroup_temp);
}
-
ret = 0;
cleanup:
VIR_FREE(nodeset_str);
- virBitmapFree(temp_nodeset);
virCgroupFree(&cgroup_temp);
return ret;
@@ -9392,7 +9373,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom,
}
if (nodeset &&
- qemuDomainSetNumaParamsLive(vm, caps, nodeset) < 0)
+ qemuDomainSetNumaParamsLive(vm, nodeset) < 0)
goto endjob;
if (virDomainNumatuneSet(&vm->def->numatune,
--
2.2.0