On Wed, Feb 27, 2013 at 04:09:36PM +0800, Gao feng wrote:
Allow lxc using the advisory nodeset from querying numad.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_controller.c | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 7 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 15aa334..c6e7bbf 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -409,7 +409,8 @@ cleanup: }
#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; @@ -418,9 +419,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) int i = 0; int maxnode = 0; bool warned = false; - - if (!ctrl->def->numatune.memory.nodemask) + virDomainNumatuneDef numatune = ctrl->def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { return 0; + }
VIR_DEBUG("Setting NUMA memory policy");
@@ -435,7 +449,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; - while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) { + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { if (i > NUMA_NUM_NODES) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Host cannot support NUMA node %d"), i); @@ -488,7 +502,8 @@ cleanup: return ret; } #else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (ctrl->def->numatune.memory.nodemask) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -560,13 +575,38 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { - + virBitmapPtr nodemask = NULL; + char *nodeset; if (virLXCControllerSetupCpuAffinity(ctrl) < 0) return -1;
- if (virLXCControllerSetupNUMAPolicy(ctrl) < 0) + /* Get the advisory nodeset from numad if 'placement' of + * either <vcpu> or <numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virDomainGetNumadAdvice(ctrl->def);
Currently the 'def->vcpus' data has no effect in the LXC driver, since it has no concept of CPUs. With this change applied, the container is now going to be bind to exactly def->vcpus number of host CPUs. I'm not saying this is bad - it is actually a good thing I believe. You should document this behaviour in the GIT comment message though.
+ if (!nodeset) + return -1; + + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + + if (virBitmapParse(nodeset, 0, &nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(nodeset); + return -1; + } + VIR_FREE(nodeset); + } + + if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) { + virBitmapFree(nodemask); return -1; + }
+ virBitmapFree(nodemask); return virLXCCgroupSetup(ctrl->def); }
This method could do with a 'cleanup:' block so you don't duplicate the VIR_FREE(nodeset) and virBitmapFree(nodemask) calls everywhere. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|