[PATCH 0/4] Restrictive numatune mode related fixes

*** BLURB HERE *** Michal Prívozník (4): lib: Don't short circuit around virDomainCgroupSetupVcpuBW() hypervisor: Drop dead code in virDomainCgroupSetupGlobalCpuCgroup() ch: Explicitly forbid live changing nodeset for strict numatune lib: Set up cpuset controller for restrictive numatune src/ch/ch_driver.c | 2 +- src/ch/ch_process.c | 6 +++--- src/hypervisor/domain_cgroup.c | 25 ++----------------------- src/hypervisor/domain_cgroup.h | 3 +-- src/lxc/lxc_controller.c | 3 ++- src/qemu/qemu_driver.c | 12 ------------ src/qemu/qemu_process.c | 11 ++++++----- 7 files changed, 15 insertions(+), 47 deletions(-) -- 2.35.1

The virDomainCgroupSetupVcpuBW() is a NOP if both period and quota to set are zero. There's no need to check in all the callers for this special case. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/hypervisor/domain_cgroup.c | 6 ++---- src/qemu/qemu_driver.c | 12 ------------ src/qemu/qemu_process.c | 3 +-- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c index fc0ad284c8..920ec8c895 100644 --- a/src/hypervisor/domain_cgroup.c +++ b/src/hypervisor/domain_cgroup.c @@ -603,10 +603,8 @@ virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm, autoNodeset, &mem_mask, -1) < 0) return -1; - if (period || quota) { - if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) - return -1; - } + if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) + return -1; return 0; } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 77012eb527..547af4384f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8923,9 +8923,6 @@ static int qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period, long long quota) { - if (period == 0 && quota == 0) - return 0; - if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) return -1; @@ -9104,9 +9101,6 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup, { size_t i; - if (period == 0 && quota == 0) - return 0; - if (!qemuDomainHasVcpuPids(vm)) return 0; @@ -9135,9 +9129,6 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup, { g_autoptr(virCgroup) cgroup_emulator = NULL; - if (period == 0 && quota == 0) - return 0; - if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, false, &cgroup_emulator) < 0) return -1; @@ -9155,9 +9146,6 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup, { size_t i; - if (period == 0 && quota == 0) - return 0; - if (!vm->def->niothreadids) return 0; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index f110e4f3dd..f3eb742406 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2699,8 +2699,7 @@ qemuProcessSetupPid(virDomainObj *vm, } - if ((period || quota) && - virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) + if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) goto cleanup; /* Move the thread to the sub dir */ -- 2.35.1

Since its introduction in v1.3.2-43-gef1fa55e46 there is a dead code in virDomainCgroupSetupGlobalCpuCgroup() (well, qemuSetupGlobalCpuCgroup() back then). The code formats NUMA nodeset but never sets it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/ch/ch_process.c | 3 +-- src/hypervisor/domain_cgroup.c | 21 +-------------------- src/hypervisor/domain_cgroup.h | 3 +-- src/qemu/qemu_process.c | 2 +- 4 files changed, 4 insertions(+), 25 deletions(-) diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 00d94ddcbe..785e4f8769 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -526,8 +526,7 @@ virCHProcessStart(virCHDriver *driver, VIR_DEBUG("Setting global CPU cgroup (if required)"); if (virDomainCgroupSetupGlobalCpuCgroup(vm, - priv->cgroup, - priv->autoNodeset) < 0) + priv->cgroup) < 0) goto cleanup; VIR_DEBUG("Setting vCPU tuning/settings"); diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c index 920ec8c895..5af88155bc 100644 --- a/src/hypervisor/domain_cgroup.c +++ b/src/hypervisor/domain_cgroup.c @@ -572,13 +572,10 @@ virDomainCgroupSetupCpusetCpus(virCgroup *cgroup, int virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm, - virCgroup *cgroup, - virBitmap *autoNodeset) + virCgroup *cgroup) { unsigned long long period = vm->def->cputune.global_period; long long quota = vm->def->cputune.global_quota; - g_autofree char *mem_mask = NULL; - virDomainNumatuneMemMode mem_mode; if ((period || quota) && !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { @@ -587,22 +584,6 @@ virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm, return -1; } - /* - * If CPU cgroup controller is not initialized here, then we need - * neither period nor quota settings. And if CPUSET controller is - * not initialized either, then there's nothing to do anyway. - */ - if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) && - !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - - if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - virDomainNumatuneMaybeFormatNodeset(vm->def->numa, - autoNodeset, &mem_mask, -1) < 0) - return -1; - if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) return -1; diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h index 20893e1b46..f8d261a080 100644 --- a/src/hypervisor/domain_cgroup.h +++ b/src/hypervisor/domain_cgroup.h @@ -98,8 +98,7 @@ virDomainCgroupSetupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask); int virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm, - virCgroup *cgroup, - virBitmap *autoNodeset); + virCgroup *cgroup); int virDomainCgroupRemoveCgroup(virDomainObj *vm, virCgroup *cgroup, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index f3eb742406..bd13cc5103 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -7642,7 +7642,7 @@ qemuProcessLaunch(virConnectPtr conn, goto cleanup; VIR_DEBUG("Setting global CPU cgroup (if required)"); - if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) + if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup) < 0) goto cleanup; VIR_DEBUG("Setting vCPU tuning/settings"); -- 2.35.1

This is similar to v7.10.0-354-g06f405c627 except this time it fixes CH driver. With strict numatune we can't guarantee that all memory is moved to new location. Therefore, let's forbid moving memory in that case. However, allow it for restrictive mode, which is documented to be best effort. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/ch/ch_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 2fe7aba9d0..34ce0c1a0c 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -1519,7 +1519,7 @@ chDomainSetNumaParamsLive(virDomainObj *vm, size_t i = 0; if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) == 0 && - mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("change of nodeset for running domain requires strict numa mode")); return -1; -- 2.35.1

The aim of 'restrictive' numatune mode is to rely solely on CGroups to have QEMU running on configured NUMA nodes. However, we were never setting the cpuset controller when a domain was starting up. We are doing so only when virDomainSetNumaParameters() is called (aka live pinning). This is obviously wrong. Fortunately, fix is simple as 'restrictive' is similar to 'strict' - every location where VIR_DOMAIN_NUMATUNE_MEM_STRICT occurs can be audited and VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE case can be added. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2070380 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/ch/ch_process.c | 3 ++- src/lxc/lxc_controller.c | 3 ++- src/qemu/qemu_process.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 785e4f8769..977082d585 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -254,7 +254,8 @@ virCHProcessSetupPid(virDomainObj *vm, virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + (mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT || + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 677fa5a4fb..d936f34793 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -811,7 +811,8 @@ static int virLXCControllerSetupResourceLimits(virLXCController *ctrl) virDomainNumatuneMemMode mode; if (virDomainNumatuneGetMode(ctrl->def->numa, -1, &mode) == 0) { - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if ((mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT || + mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) && virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { /* Use virNuma* API iff necessary. Once set and child is exec()-ed, * there's no way for us to change it. Rely on cgroups (if available diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index bd13cc5103..c4b349632e 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2650,7 +2650,8 @@ qemuProcessSetupPid(virDomainObj *vm, virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + (mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT || + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) @@ -3166,7 +3167,8 @@ static int qemuProcessHook(void *data) goto cleanup; if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) { - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if ((mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT || + mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) && h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) && virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { /* Use virNuma* API iff necessary. Once set and child is exec()-ed, -- 2.35.1

On a Friday in 2022, Michal Privoznik wrote:
*** BLURB HERE ***
Michal Prívozník (4): lib: Don't short circuit around virDomainCgroupSetupVcpuBW() hypervisor: Drop dead code in virDomainCgroupSetupGlobalCpuCgroup() ch: Explicitly forbid live changing nodeset for strict numatune lib: Set up cpuset controller for restrictive numatune
src/ch/ch_driver.c | 2 +- src/ch/ch_process.c | 6 +++--- src/hypervisor/domain_cgroup.c | 25 ++----------------------- src/hypervisor/domain_cgroup.h | 3 +-- src/lxc/lxc_controller.c | 3 ++- src/qemu/qemu_driver.c | 12 ------------ src/qemu/qemu_process.c | 11 ++++++----- 7 files changed, 15 insertions(+), 47 deletions(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano
participants (2)
-
Ján Tomko
-
Michal Privoznik