On 2013年03月01日 14:52, Gao feng wrote:
Allow lxc using the advisory nodeset from querying numad,
this means if user doesn't specify the numa nodes that
the lxc domain should assign to, libvirt will automatically
bind the lxc domain to the advisory nodeset which queried from
numad.
Signed-off-by: Gao feng<gaofeng(a)cn.fujitsu.com>
---
src/lxc/lxc_controller.c | 84 ++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 74 insertions(+), 10 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 15aa334..b6c1fe8 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -69,6 +69,7 @@
#include "nodeinfo.h"
#include "virrandom.h"
#include "virprocess.h"
+#include "virnuma.h"
#include "rpc/virnetserver.h"
#define VIR_FROM_THIS VIR_FROM_LXC
@@ -409,7 +410,8 @@ cleanup:
}
#if WITH_NUMACTL
-static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl)
+static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl,
+ virBitmapPtr nodemask)
{
nodemask_t mask;
int mode = -1;
@@ -418,9 +420,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr
ctrl)
int i = 0;
int maxnode = 0;
bool warned = false;
-
- if (!ctrl->def->numatune.memory.nodemask)
+ virDomainNumatuneDef numatune = ctrl->def->numatune;
+ virBitmapPtr tmp_nodemask = NULL;
+
+ if (numatune.memory.placement_mode ==
+ VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) {
+ if (!numatune.memory.nodemask)
+ return 0;
+ VIR_DEBUG("Set NUMA memory policy with specified nodeset");
+ tmp_nodemask = numatune.memory.nodemask;
+ } else if (numatune.memory.placement_mode ==
+ VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) {
+ VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad");
+ tmp_nodemask = nodemask;
+ } else {
return 0;
+ }
VIR_DEBUG("Setting NUMA memory policy");
@@ -435,7 +450,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl)
/* Convert nodemask to NUMA bitmask. */
nodemask_zero(&mask);
i = -1;
- while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i))>=
0) {
+ while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) {
if (i> NUMA_NUM_NODES) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("Host cannot support NUMA node %d"), i);
@@ -488,7 +503,8 @@ cleanup:
return ret;
}
#else
-static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl)
+static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl,
+ virBitmapPtr nodemask ATTRIBUTE_UNUSED)
{
if (ctrl->def->numatune.memory.nodemask) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
@@ -549,6 +565,40 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr
ctrl)
}
+static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl,
+ virBitmapPtr *mask)
+{
+ virBitmapPtr nodemask = NULL;
+ char *nodeset;
+ int ret = -1;
+
+ /* Get the advisory nodeset from numad if 'placement' of
+ * either<vcpu> or<numatune> is 'auto'.
+ */
+ if ((ctrl->def->placement_mode ==
+ VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) ||
+ (ctrl->def->numatune.memory.placement_mode ==
+ VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) {
+ nodeset = virGetNumadAdvice(ctrl->def->vcpus,
+ ctrl->def->mem.cur_balloon);
+ if (!nodeset)
+ goto cleanup;
+
+ VIR_DEBUG("Nodeset returned from numad: %s", nodeset);
+
+ ret = virBitmapParse(nodeset, 0,&nodemask, VIR_DOMAIN_CPUMASK_LEN);
+ if (ret< 0)
+ goto cleanup;
+ }
+ ret = 0;
+ *mask = nodemask;
+
+cleanup:
+ VIR_FREE(nodeset);
+ return ret;
+}
+
+
/**
* virLXCControllerSetupResourceLimits
* @ctrl: the controller state
@@ -560,14 +610,28 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr
ctrl)
*/
static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl)
{
+ virBitmapPtr nodemask = NULL;
+ int ret;
int ret = -1;
- if (virLXCControllerSetupCpuAffinity(ctrl)< 0)
- return -1;
+ ret = virLXCControllerGetNumadAdvice(ctrl,&nodemask);
+ if (ret< 0)
+ goto cleanup;
And thus this can be simplified as:
if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0)
goto cleanup;
- if (virLXCControllerSetupNUMAPolicy(ctrl)< 0)
- return -1;
+ ret = virLXCControllerSetupCpuAffinity(ctrl);
+ if (ret< 0)
+ goto cleanup;
Likewise.
+
+ ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask);
+ if (ret< 0)
+ goto cleanup;
Likewise. And I'd like keep this together with GetNumadAdvice. I.E.
if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0)
goto cleanup;
if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0)
goto cleanup;
Or even simpler:
if (virLXCControllerSetupNUMAPolicy(ctrl, &nodemask) < 0 ||
virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0)
goto cleanup;
Instead of having SetupNumaPolicy between them.
- return virLXCCgroupSetup(ctrl->def);
+ ret = virLXCCgroupSetup(ctrl->def);
+ if (ret< 0)
+ goto cleanup;
Likewise.
But with setting "ret = 0" here.
ret = 0;
BTW, another disvantage of your methods (using ret across) is the return
value for virLXCControllerSetupResourceLimits is depended on the
sub-functions, which may return values other than 0/-1. It might not be
the result you want.
+
+cleanup:
+ virBitmapFree(nodemask);
+ return ret;
}
Others are simply copy & paste from qemu driver. Looks good. This needs
a v2.
Osier