[libvirt] [PATCH 0/2] qemu/cgroup: Make live numatune setting usable

We didn't set the migrating memory flag to 1 and consequently any changes to numa memory tuning didn't take effect. Let's fix that. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1198497 Martin Kletzander (2): cgroup: Add accessors for cpuset.memory_migrate qemu: Migrate memory on numatune change src/libvirt_private.syms | 2 ++ src/qemu/qemu_cgroup.c | 36 +++++++++++++++++++++++++++++++++++- src/util/vircgroup.c | 42 +++++++++++++++++++++++++++++++++++++++++- src/util/vircgroup.h | 5 ++++- 4 files changed, 82 insertions(+), 3 deletions(-) -- 2.3.1

Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/vircgroup.c | 42 +++++++++++++++++++++++++++++++++++++++++- src/util/vircgroup.h | 5 ++++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e0d5459..f3d8517 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1130,6 +1130,7 @@ virCgroupGetCpuacctUsage; virCgroupGetCpuCfsPeriod; virCgroupGetCpuCfsQuota; virCgroupGetCpusetCpus; +virCgroupGetCpusetMemoryMigrate; virCgroupGetCpusetMems; virCgroupGetCpuShares; virCgroupGetDevicePermsString; @@ -1170,6 +1171,7 @@ virCgroupSetBlkioWeight; virCgroupSetCpuCfsPeriod; virCgroupSetCpuCfsQuota; virCgroupSetCpusetCpus; +virCgroupSetCpusetMemoryMigrate; virCgroupSetCpusetMems; virCgroupSetCpuShares; virCgroupSetFreezerState; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 6957e81..5d3de10 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1,7 +1,7 @@ /* * vircgroup.c: methods for managing control cgroups * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * Copyright IBM Corp. 2008 * * This library is free software; you can redistribute it and/or @@ -872,6 +872,7 @@ virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) const char *inherit_values[] = { "cpuset.cpus", "cpuset.mems", + "cpuset.memory_migrate", }; VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); @@ -2671,6 +2672,45 @@ virCgroupGetCpusetMems(virCgroupPtr group, char **mems) /** + * virCgroupSetCpusetMemoryMigrate: + * + * @group: The cgroup to set cpuset.memory_migrate for + * @migrate: Whether to migrate the memory on change or not + * + * Returns: 0 on success + */ +int +virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + migrate ? "1" : "0"); +} + + +/** + * virCgroupGetCpusetMemoryMigrate: + * + * @group: The cgroup to get cpuset.memory_migrate for + * @migrate: Migration setting + * + * Returns: 0 on success + */ +int +virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) +{ + unsigned long long value = 0; + int ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + &value); + *migrate = !!value; + return ret; +} + + +/** * virCgroupSetCpusetCpus: * * @group: The cgroup to set cpuset.cpus for diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 9f984e7..bfb3a9b 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -1,7 +1,7 @@ /* * vircgroup.h: methods for managing control cgroups * - * Copyright (C) 2011-2014 Red Hat, Inc. + * Copyright (C) 2011-2015 Red Hat, Inc. * Copyright IBM Corp. 2008 * * This library is free software; you can redistribute it and/or @@ -251,6 +251,9 @@ int virCgroupGetFreezerState(virCgroupPtr group, char **state); int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems); int virCgroupGetCpusetMems(virCgroupPtr group, char **mems); +int virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate); +int virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate); + int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus); -- 2.3.1

On 03/11/2015 08:39 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/vircgroup.c | 42 +++++++++++++++++++++++++++++++++++++++++- src/util/vircgroup.h | 5 ++++- 3 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e0d5459..f3d8517 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1130,6 +1130,7 @@ virCgroupGetCpuacctUsage; virCgroupGetCpuCfsPeriod; virCgroupGetCpuCfsQuota; virCgroupGetCpusetCpus; +virCgroupGetCpusetMemoryMigrate; virCgroupGetCpusetMems; virCgroupGetCpuShares; virCgroupGetDevicePermsString; @@ -1170,6 +1171,7 @@ virCgroupSetBlkioWeight; virCgroupSetCpuCfsPeriod; virCgroupSetCpuCfsQuota; virCgroupSetCpusetCpus; +virCgroupSetCpusetMemoryMigrate; virCgroupSetCpusetMems; virCgroupSetCpuShares; virCgroupSetFreezerState; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 6957e81..5d3de10 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1,7 +1,7 @@ /* * vircgroup.c: methods for managing control cgroups * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * Copyright IBM Corp. 2008 * * This library is free software; you can redistribute it and/or @@ -872,6 +872,7 @@ virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) const char *inherit_values[] = { "cpuset.cpus", "cpuset.mems", + "cpuset.memory_migrate", };
VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); @@ -2671,6 +2672,45 @@ virCgroupGetCpusetMems(virCgroupPtr group, char **mems)
/** + * virCgroupSetCpusetMemoryMigrate: + * + * @group: The cgroup to set cpuset.memory_migrate for + * @migrate: Whether to migrate the memory on change or not + * + * Returns: 0 on success + */ +int +virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + migrate ? "1" : "0"); +}
are my eyes deceiving me?... boolean here (1 or 0)
+ + +/** + * virCgroupGetCpusetMemoryMigrate: + * + * @group: The cgroup to get cpuset.memory_migrate for + * @migrate: Migration setting + * + * Returns: 0 on success + */ +int +virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) +{ + unsigned long long value = 0; + int ret = virCgroupGetValueU64(group,
But we're getting a U64 here? for what's documented as "contains a flag (0 or 1)
+ VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + &value); + *migrate = !!value; + return ret; +}
Also there's no callers to this? I see that vircgrouptest.c exists and there are a few API's called there - of course none for the virCgroupGetCpuset*... I think this is "ACKable" with perhaps usage of the right API to get at the value. John
+ + +/** * virCgroupSetCpusetCpus: * * @group: The cgroup to set cpuset.cpus for diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 9f984e7..bfb3a9b 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -1,7 +1,7 @@ /* * vircgroup.h: methods for managing control cgroups * - * Copyright (C) 2011-2014 Red Hat, Inc. + * Copyright (C) 2011-2015 Red Hat, Inc. * Copyright IBM Corp. 2008 * * This library is free software; you can redistribute it and/or @@ -251,6 +251,9 @@ int virCgroupGetFreezerState(virCgroupPtr group, char **state); int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems); int virCgroupGetCpusetMems(virCgroupPtr group, char **mems);
+int virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate); +int virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate); + int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus);

On 03/19/2015 05:11 PM, John Ferlan wrote:
+int +virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + migrate ? "1" : "0"); +}
are my eyes deceiving me?... boolean here (1 or 0)
No, the kernel really works this way - it takes the human-readable string "1" or "0" rather than the C byte '\1' or '\0', so this is the lowest level in the stack where we convert our convenient bool for everyone above us into the string literal the kernel cgroup interface expects.
+int +virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) +{ + unsigned long long value = 0; + int ret = virCgroupGetValueU64(group,
But we're getting a U64 here? for what's documented as "contains a flag (0 or 1)
+ VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + &value); + *migrate = !!value;
And here, this is one easy way to turn the kernel's string back into a bool (it might also be possible to read as a string, and then do *str=='1', rather than going through an intermediate integer conversion, but I'm not sure it is worth the micro-optimization). -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Thu, Mar 19, 2015 at 05:25:28PM -0600, Eric Blake wrote:
On 03/19/2015 05:11 PM, John Ferlan wrote:
+int +virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + migrate ? "1" : "0"); +}
are my eyes deceiving me?... boolean here (1 or 0)
No, the kernel really works this way - it takes the human-readable string "1" or "0" rather than the C byte '\1' or '\0', so this is the lowest level in the stack where we convert our convenient bool for everyone above us into the string literal the kernel cgroup interface expects.
Yes, exactly. I could've also used SetValueU64(), but that formats that value which is unnecessary overhead.
+int +virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) +{ + unsigned long long value = 0; + int ret = virCgroupGetValueU64(group,
But we're getting a U64 here? for what's documented as "contains a flag (0 or 1)
+ VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.memory_migrate", + &value); + *migrate = !!value;
And here, this is one easy way to turn the kernel's string back into a bool (it might also be possible to read as a string, and then do *str=='1', rather than going through an intermediate integer conversion, but I'm not sure it is worth the micro-optimization).
With one exception. If kernel changes the possible values to something else (let's say they'll add "2" for some special new meaning like "move aggressively"), this code will still work.
-- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 03/11/2015 08:39 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/vircgroup.c | 42 +++++++++++++++++++++++++++++++++++++++++- src/util/vircgroup.h | 5 ++++- 3 files changed, 47 insertions(+), 2 deletions(-)
OK - so with all that I've learned on this - it's an ACK... Whether I remember this the next time it comes up is anyone's guess ;-) Also, since it's been declared a bool already, if the kernel changes to allowing a 2 that I believe would be considered breaking things the kernel shouldn't break... John

On Fri, Mar 20, 2015 at 07:08:45AM -0400, John Ferlan wrote:
On 03/11/2015 08:39 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/vircgroup.c | 42 +++++++++++++++++++++++++++++++++++++++++- src/util/vircgroup.h | 5 ++++- 3 files changed, 47 insertions(+), 2 deletions(-)
OK - so with all that I've learned on this - it's an ACK... Whether I remember this the next time it comes up is anyone's guess ;-)
Also, since it's been declared a bool already, if the kernel changes to allowing a 2 that I believe would be considered breaking things the kernel shouldn't break...
"things kernel shouldn't break" :-D You made my weekend, thanks ;) I fixed the plus-one error in the second patch and pushed it. Thanks for the review.

We've never set the cpuset.memory_migrate value to anything, keeping it on default. However, we allow changing cpuset.mems on live domain. That setting, however, don't have any consequence on a domain unless it's going to allocate new memory. I managed to make 'virsh numatune' move all the memory to any node I wanted even without disabling libnuma's numa_set_membind(), so this should be safe to use with it as well. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1198497 Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_cgroup.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 2b5a182..976a8c4 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1,7 +1,7 @@ /* * qemu_cgroup.c: QEMU cgroup management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -652,6 +652,9 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0; + if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) + return -1; + if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) { @@ -792,9 +795,12 @@ static void qemuRestoreCgroupState(virDomainObjPtr vm) { char *mem_mask = NULL; + char *nodeset = NULL; int empty = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; virBitmapPtr all_nodes; + virCgroupPtr cgroup_temp = NULL; if (!(all_nodes = virNumaGetHostNodeset())) goto error; @@ -809,9 +815,37 @@ qemuRestoreCgroupState(virDomainObjPtr vm) if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) goto error; + for (i = 0; i < priv->nvcpupids; i++) { + if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + for (i = 0; i < priv->niothreadpids; i++) { + if (virCgroupNewIOThread(priv->cgroup, i, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + cleanup: VIR_FREE(mem_mask); + VIR_FREE(nodeset); virBitmapFree(all_nodes); + virCgroupFree(&cgroup_temp); return; error: -- 2.3.1

On 03/11/2015 08:39 AM, Martin Kletzander wrote:
We've never set the cpuset.memory_migrate value to anything, keeping it on default. However, we allow changing cpuset.mems on live domain. That setting, however, don't have any consequence on a domain unless it's going to allocate new memory.
I managed to make 'virsh numatune' move all the memory to any node I wanted even without disabling libnuma's numa_set_membind(), so this should be safe to use with it as well.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1198497
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_cgroup.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 2b5a182..976a8c4 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1,7 +1,7 @@ /* * qemu_cgroup.c: QEMU cgroup management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -652,6 +652,9 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0;
+ if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) + return -1; +
Is this OK to set even if NUMA isn't enabled? I didn't follow the callers all the way back and this isn't something I know a lot about, so I'm curious. I do see from the man page it's available since Linux 2.6.16, so one would hope/think that we're OK in assuming we can access it... Is it possible that someone wouldn't want to migrate the memory? That is should this be a configurable attribute?
if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {
@@ -792,9 +795,12 @@ static void qemuRestoreCgroupState(virDomainObjPtr vm) { char *mem_mask = NULL; + char *nodeset = NULL; int empty = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; virBitmapPtr all_nodes; + virCgroupPtr cgroup_temp = NULL;
if (!(all_nodes = virNumaGetHostNodeset())) goto error; @@ -809,9 +815,37 @@ qemuRestoreCgroupState(virDomainObjPtr vm)
So this is the path for currently running guests to be adjusted on livirtd restart after install, right?
if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) goto error;
+ for (i = 0; i < priv->nvcpupids; i++) { + if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + for (i = 0; i < priv->niothreadpids; i++) { + if (virCgroupNewIOThread(priv->cgroup, i, false, &cgroup_temp) < 0 ||
cgroup iothread id's are 1..n, so I believe this should be 'i + 1' simple enough to test by adding <iothreads>2</iothreads> to your domain definition and making sure this works... Beyond that - seems reasonable to me John
+ virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + cleanup: VIR_FREE(mem_mask); + VIR_FREE(nodeset); virBitmapFree(all_nodes); + virCgroupFree(&cgroup_temp); return;
error:

On Thu, Mar 19, 2015 at 07:30:26PM -0400, John Ferlan wrote:
On 03/11/2015 08:39 AM, Martin Kletzander wrote:
We've never set the cpuset.memory_migrate value to anything, keeping it on default. However, we allow changing cpuset.mems on live domain. That setting, however, don't have any consequence on a domain unless it's going to allocate new memory.
I managed to make 'virsh numatune' move all the memory to any node I wanted even without disabling libnuma's numa_set_membind(), so this should be safe to use with it as well.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1198497
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_cgroup.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 2b5a182..976a8c4 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1,7 +1,7 @@ /* * qemu_cgroup.c: QEMU cgroup management * - * Copyright (C) 2006-2014 Red Hat, Inc. + * Copyright (C) 2006-2015 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -652,6 +652,9 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0;
+ if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) + return -1; +
Is this OK to set even if NUMA isn't enabled? I didn't follow the callers all the way back and this isn't something I know a lot about, so I'm curious. I do see from the man page it's available since Linux 2.6.16, so one would hope/think that we're OK in assuming we can access it...
This has nothing to do with guest's NUMA, but with host's. If the host is not a NUMA machine, then this should have no effect.
Is it possible that someone wouldn't want to migrate the memory? That is should this be a configurable attribute?
Well, by default there is nothing to migrate. Migration can happen if someone changes the setting of cpuset.mems (that controls on what NUMA nodes should threads' memory reside). Without memory_migrate that has effect only on new allocations. So if you have a guest that already allocated all of its memory and management application wants to move this guest to some other NUMA node nothing happens. On the other hand, if you set memory_migrate and don't set cgroup.mems, nothing happens (that's why I'm reading and setting the same values for all sub-cgroups).
if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {
@@ -792,9 +795,12 @@ static void qemuRestoreCgroupState(virDomainObjPtr vm) { char *mem_mask = NULL; + char *nodeset = NULL; int empty = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; virBitmapPtr all_nodes; + virCgroupPtr cgroup_temp = NULL;
if (!(all_nodes = virNumaGetHostNodeset())) goto error; @@ -809,9 +815,37 @@ qemuRestoreCgroupState(virDomainObjPtr vm)
So this is the path for currently running guests to be adjusted on livirtd restart after install, right?
if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) goto error;
+ for (i = 0; i < priv->nvcpupids; i++) { + if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + for (i = 0; i < priv->niothreadpids; i++) { + if (virCgroupNewIOThread(priv->cgroup, i, false, &cgroup_temp) < 0 ||
cgroup iothread id's are 1..n, so I believe this should be 'i + 1'
Yes, once again I felt for the contraption in this. That shall not happen henceforth!
simple enough to test by adding <iothreads>2</iothreads> to your domain definition and making sure this works...
I remember trying with <iothreads>1</iothreads>, there must have been no allocation as I only checked that the memory moved -- my bad.
Beyond that - seems reasonable to me
John
+ virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + cleanup: VIR_FREE(mem_mask); + VIR_FREE(nodeset); virBitmapFree(all_nodes); + virCgroupFree(&cgroup_temp); return;
error:

On 03/20/2015 04:53 AM, Martin Kletzander wrote:
On Thu, Mar 19, 2015 at 07:30:26PM -0400, John Ferlan wrote:
<...snip...>
@@ -792,9 +795,12 @@ static void qemuRestoreCgroupState(virDomainObjPtr vm) { char *mem_mask = NULL; + char *nodeset = NULL; int empty = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + size_t i = 0; virBitmapPtr all_nodes; + virCgroupPtr cgroup_temp = NULL;
if (!(all_nodes = virNumaGetHostNodeset())) goto error; @@ -809,9 +815,37 @@ qemuRestoreCgroupState(virDomainObjPtr vm)
So this is the path for currently running guests to be adjusted on livirtd restart after install, right?
if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) goto error;
+ for (i = 0; i < priv->nvcpupids; i++) { + if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + virCgroupFree(&cgroup_temp); + } + + for (i = 0; i < priv->niothreadpids; i++) { + if (virCgroupNewIOThread(priv->cgroup, i, false, &cgroup_temp) < 0 ||
cgroup iothread id's are 1..n, so I believe this should be 'i + 1'
Yes, once again I felt for the contraption in this. That shall not happen henceforth!
I usually use cscope in order to find all callers to help remind me since I know my memory isn't as good as it once was... I assume you've made the change and so that it's official this is your ACK... John
simple enough to test by adding <iothreads>2</iothreads> to your domain definition and making sure this works...
I remember trying with <iothreads>1</iothreads>, there must have been no allocation as I only checked that the memory moved -- my bad.
Beyond that - seems reasonable to me
John
participants (3)
-
Eric Blake
-
John Ferlan
-
Martin Kletzander