[libvirt] [PATCH 0/4] Add cpuset cgroup support for LXC

This patchset intend to add cpuset cgroup support for LXC. in order to don't create too many redundant codes, this patchset also rename some functions and structure. Gao feng (4): rename qemuGetNumadAdvice to virGetNumadAdvice LXC: allow uses advisory nodeset from querying numad remove the redundant codes LXC: add cpuset cgroup support for lxc po/POTFILES.in | 1 + src/Makefile.am | 1 + src/conf/domain_conf.h | 23 +------ src/libvirt_private.syms | 4 ++ src/lxc/lxc_cgroup.c | 57 +++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 156 +++++++++++++++--------------------------- src/qemu/qemu_process.c | 154 +---------------------------------------- src/util/virnuma.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 52 ++++++++++++++ 10 files changed, 348 insertions(+), 276 deletions(-) create mode 100644 src/util/virnuma.c create mode 100644 src/util/virnuma.h -- 1.7.11.7

qemuGetNumadAdvice will be used by LXC driver,rename it to virGetNumaAdvice and move it to virnuma.c Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- po/POTFILES.in | 1 + src/Makefile.am | 1 + src/libvirt_private.syms | 3 +++ src/qemu/qemu_process.c | 33 ++------------------------ src/util/virnuma.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 28 ++++++++++++++++++++++ 6 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 src/util/virnuma.c create mode 100644 src/util/virnuma.h diff --git a/po/POTFILES.in b/po/POTFILES.in index bd2c02e..ee8ff86 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -164,6 +164,7 @@ src/util/virnetdevtap.c src/util/virnetdevvportprofile.c src/util/virnetlink.c src/util/virnodesuspend.c +src/util/virnuma.c src/util/virobject.c src/util/virpci.c src/util/virpidfile.c diff --git a/src/Makefile.am b/src/Makefile.am index c1659a4..21eb84a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -103,6 +103,7 @@ UTIL_SOURCES = \ util/virnetdevvportprofile.h util/virnetdevvportprofile.c \ util/virnetlink.c util/virnetlink.h \ util/virnodesuspend.c util/virnodesuspend.h \ + util/virnuma.c util/virnuma.h \ util/virobject.c util/virobject.h \ util/virpci.c util/virpci.h \ util/virpidfile.c util/virpidfile.h \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ed46479..6aee6fa 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1565,6 +1565,9 @@ nodeSuspendForDuration; virNodeSuspendGetTargetMask; +# util/virnuma.h +virGetNumadAdvice; + # util/virobject.h virClassForObject; virClassForObjectLockable; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index db95d6e..20d41e3 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -70,6 +70,7 @@ #include "virnetdevtap.h" #include "virbitmap.h" #include "viratomic.h" +#include "virnuma.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -1981,36 +1982,6 @@ qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, } #endif -#if HAVE_NUMAD -static char * -qemuGetNumadAdvice(virDomainDefPtr def) -{ - virCommandPtr cmd = NULL; - char *output = NULL; - - cmd = virCommandNewArgList(NUMAD, "-w", NULL); - virCommandAddArgFormat(cmd, "%d:%llu", def->vcpus, - VIR_DIV_UP(def->mem.cur_balloon, 1024)); - - virCommandSetOutputBuffer(cmd, &output); - - if (virCommandRun(cmd, NULL) < 0) - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Failed to query numad for the " - "advisory nodeset")); - - virCommandFree(cmd); - return output; -} -#else -static char * -qemuGetNumadAdvice(virDomainDefPtr def ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("numad is not available on this host")); - return NULL; -} -#endif /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise @@ -3721,7 +3692,7 @@ int qemuProcessStart(virConnectPtr conn, VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || (vm->def->numatune.memory.placement_mode == VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { - nodeset = qemuGetNumadAdvice(vm->def); + nodeset = virGetNumadAdvice(vm->def->vcpus, vm->def->mem.cur_balloon); if (!nodeset) goto cleanup; diff --git a/src/util/virnuma.c b/src/util/virnuma.c new file mode 100644 index 0000000..37931fe --- /dev/null +++ b/src/util/virnuma.c @@ -0,0 +1,60 @@ +/* + * virnuma.c: helper APIS for managing numa + * + * Copyright (C) 2011-2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + */ + +#include <config.h> + +#include "virnuma.h" +#include "vircommand.h" +#include "virerror.h" + +#define VIR_FROM_THIS VIR_FROM_NONE + +#if HAVE_NUMAD +char *virGetNumadAdvice(unsigned short vcpus, + unsigned long long balloon) +{ + virCommandPtr cmd = NULL; + char *output = NULL; + + cmd = virCommandNewArgList(NUMAD, "-w", NULL); + virCommandAddArgFormat(cmd, "%d:%llu", vcpus, + VIR_DIV_UP(balloon, 1024)); + + virCommandSetOutputBuffer(cmd, &output); + + if (virCommandRun(cmd, NULL) < 0) + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to query numad for the " + "advisory nodeset")); + + virCommandFree(cmd); + return output; +} +#else +char * +virGetNumadAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, + unsigned long long balloon ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("numad is not available on this host")); + return NULL; +} +#endif diff --git a/src/util/virnuma.h b/src/util/virnuma.h new file mode 100644 index 0000000..b9046c2 --- /dev/null +++ b/src/util/virnuma.h @@ -0,0 +1,28 @@ +/* + * virnuma.h: helper APIS for managing numa + * + * Copyright (C) 2011-2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + */ + +#ifndef __VIR_NUMA_H__ +# define __VIR_NUMA_H__ + +char *virGetNumadAdvice(unsigned short vcups, + unsigned long long balloon); + +#endif /* __VIR_NUMA_H__ */ -- 1.7.11.7

On 2013年03月01日 14:52, Gao feng wrote:
qemuGetNumadAdvice will be used by LXC driver,rename it to virGetNumaAdvice and move it to virnuma.c
s/virGetNumaAdvice/virGetNumadAdvice/,
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- po/POTFILES.in | 1 + src/Makefile.am | 1 + src/libvirt_private.syms | 3 +++ src/qemu/qemu_process.c | 33 ++------------------------ src/util/virnuma.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 28 ++++++++++++++++++++++ 6 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 src/util/virnuma.c create mode 100644 src/util/virnuma.h
diff --git a/po/POTFILES.in b/po/POTFILES.in index bd2c02e..ee8ff86 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -164,6 +164,7 @@ src/util/virnetdevtap.c src/util/virnetdevvportprofile.c src/util/virnetlink.c src/util/virnodesuspend.c +src/util/virnuma.c src/util/virobject.c src/util/virpci.c src/util/virpidfile.c diff --git a/src/Makefile.am b/src/Makefile.am index c1659a4..21eb84a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -103,6 +103,7 @@ UTIL_SOURCES = \ util/virnetdevvportprofile.h util/virnetdevvportprofile.c \ util/virnetlink.c util/virnetlink.h \ util/virnodesuspend.c util/virnodesuspend.h \ + util/virnuma.c util/virnuma.h \
Please use tab to align the "\".
util/virobject.c util/virobject.h \ util/virpci.c util/virpci.h \ util/virpidfile.c util/virpidfile.h \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ed46479..6aee6fa 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1565,6 +1565,9 @@ nodeSuspendForDuration; virNodeSuspendGetTargetMask;
+# util/virnuma.h +virGetNumadAdvice; + # util/virobject.h virClassForObject; virClassForObjectLockable; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index db95d6e..20d41e3 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -70,6 +70,7 @@ #include "virnetdevtap.h" #include "virbitmap.h" #include "viratomic.h" +#include "virnuma.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -1981,36 +1982,6 @@ qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, } #endif
-#if HAVE_NUMAD -static char * -qemuGetNumadAdvice(virDomainDefPtr def) -{ - virCommandPtr cmd = NULL; - char *output = NULL; - - cmd = virCommandNewArgList(NUMAD, "-w", NULL); - virCommandAddArgFormat(cmd, "%d:%llu", def->vcpus, - VIR_DIV_UP(def->mem.cur_balloon, 1024)); - - virCommandSetOutputBuffer(cmd,&output); - - if (virCommandRun(cmd, NULL)< 0) - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Failed to query numad for the " - "advisory nodeset")); - - virCommandFree(cmd); - return output; -} -#else -static char * -qemuGetNumadAdvice(virDomainDefPtr def ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("numad is not available on this host")); - return NULL; -} -#endif
/* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise @@ -3721,7 +3692,7 @@ int qemuProcessStart(virConnectPtr conn, VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || (vm->def->numatune.memory.placement_mode == VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { - nodeset = qemuGetNumadAdvice(vm->def); + nodeset = virGetNumadAdvice(vm->def->vcpus, vm->def->mem.cur_balloon); if (!nodeset) goto cleanup;
diff --git a/src/util/virnuma.c b/src/util/virnuma.c new file mode 100644 index 0000000..37931fe --- /dev/null +++ b/src/util/virnuma.c @@ -0,0 +1,60 @@ +/* + * virnuma.c: helper APIS for managing numa
s/APIS/APIs/,
+ * + * Copyright (C) 2011-2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + *<http://www.gnu.org/licenses/>. + * + */ + +#include<config.h> + +#include "virnuma.h" +#include "vircommand.h" +#include "virerror.h" + +#define VIR_FROM_THIS VIR_FROM_NONE + +#if HAVE_NUMAD +char *virGetNumadAdvice(unsigned short vcpus, + unsigned long long balloon)
char * virGetNumadAdvice(unsigned short vcpus, unsigned long long balloon)
+{ + virCommandPtr cmd = NULL; + char *output = NULL; + + cmd = virCommandNewArgList(NUMAD, "-w", NULL); + virCommandAddArgFormat(cmd, "%d:%llu", vcpus, + VIR_DIV_UP(balloon, 1024)); + + virCommandSetOutputBuffer(cmd,&output); + + if (virCommandRun(cmd, NULL)< 0) + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to query numad for the " + "advisory nodeset")); + + virCommandFree(cmd); + return output; +} +#else +char * +virGetNumadAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, + unsigned long long balloon ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("numad is not available on this host")); + return NULL; +} +#endif diff --git a/src/util/virnuma.h b/src/util/virnuma.h new file mode 100644 index 0000000..b9046c2 --- /dev/null +++ b/src/util/virnuma.h @@ -0,0 +1,28 @@ +/* + * virnuma.h: helper APIS for managing numa
s/APIS/APIs/,
+ * + * Copyright (C) 2011-2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + *<http://www.gnu.org/licenses/>. + * + */ + +#ifndef __VIR_NUMA_H__ +# define __VIR_NUMA_H__ + +char *virGetNumadAdvice(unsigned short vcups, + unsigned long long balloon); + +#endif /* __VIR_NUMA_H__ */
ACK with the small nits fixed. It's good to have virnuma.{h,c} for numa stuffs, I guess now we can move many codes into it. Osier

On Fri, Mar 15, 2013 at 04:13:30PM +0800, Osier Yang wrote:
On 2013年03月01日 14:52, Gao feng wrote:
qemuGetNumadAdvice will be used by LXC driver,rename it to virGetNumaAdvice and move it to virnuma.c
s/virGetNumaAdvice/virGetNumadAdvice/,
diff --git a/src/util/virnuma.h b/src/util/virnuma.h new file mode 100644 index 0000000..b9046c2 --- /dev/null +++ b/src/util/virnuma.h + +char *virGetNumadAdvice(unsigned short vcups, + unsigned long long balloon); + +#endif /* __VIR_NUMA_H__ */
ACK with the small nits fixed. It's good to have virnuma.{h,c} for numa stuffs, I guess now we can move many codes into it.
Function names should always aim to match the filename. So i'd rename this to virNumaGetAutoPlacementAdvice() Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Allow lxc using the advisory nodeset from querying numad, this means if user doesn't specify the numa nodes that the lxc domain should assign to, libvirt will automatically bind the lxc domain to the advisory nodeset which queried from numad. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_controller.c | 84 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 10 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 15aa334..b6c1fe8 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -69,6 +69,7 @@ #include "nodeinfo.h" #include "virrandom.h" #include "virprocess.h" +#include "virnuma.h" #include "rpc/virnetserver.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -409,7 +410,8 @@ cleanup: } #if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; @@ -418,9 +420,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) int i = 0; int maxnode = 0; bool warned = false; - - if (!ctrl->def->numatune.memory.nodemask) + virDomainNumatuneDef numatune = ctrl->def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { return 0; + } VIR_DEBUG("Setting NUMA memory policy"); @@ -435,7 +450,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; - while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) { + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { if (i > NUMA_NUM_NODES) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Host cannot support NUMA node %d"), i); @@ -488,7 +503,8 @@ cleanup: return ret; } #else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (ctrl->def->numatune.memory.nodemask) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -549,6 +565,40 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) } +static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, + virBitmapPtr *mask) +{ + virBitmapPtr nodemask = NULL; + char *nodeset; + int ret = -1; + + /* Get the advisory nodeset from numad if 'placement' of + * either <vcpu> or <numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virGetNumadAdvice(ctrl->def->vcpus, + ctrl->def->mem.cur_balloon); + if (!nodeset) + goto cleanup; + + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + + ret = virBitmapParse(nodeset, 0, &nodemask, VIR_DOMAIN_CPUMASK_LEN); + if (ret < 0) + goto cleanup; + } + ret = 0; + *mask = nodemask; + +cleanup: + VIR_FREE(nodeset); + return ret; +} + + /** * virLXCControllerSetupResourceLimits * @ctrl: the controller state @@ -560,14 +610,28 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { + virBitmapPtr nodemask = NULL; + int ret; - if (virLXCControllerSetupCpuAffinity(ctrl) < 0) - return -1; + ret = virLXCControllerGetNumadAdvice(ctrl, &nodemask); + if (ret < 0) + goto cleanup; - if (virLXCControllerSetupNUMAPolicy(ctrl) < 0) - return -1; + ret = virLXCControllerSetupCpuAffinity(ctrl); + if (ret < 0) + goto cleanup; + + ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask); + if (ret < 0) + goto cleanup; - return virLXCCgroupSetup(ctrl->def); + ret = virLXCCgroupSetup(ctrl->def); + if (ret < 0) + goto cleanup; + +cleanup: + virBitmapFree(nodemask); + return ret; } -- 1.7.11.7

On 2013年03月01日 14:52, Gao feng wrote:
Allow lxc using the advisory nodeset from querying numad, this means if user doesn't specify the numa nodes that the lxc domain should assign to, libvirt will automatically bind the lxc domain to the advisory nodeset which queried from numad.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/lxc/lxc_controller.c | 84 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 10 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 15aa334..b6c1fe8 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -69,6 +69,7 @@ #include "nodeinfo.h" #include "virrandom.h" #include "virprocess.h" +#include "virnuma.h" #include "rpc/virnetserver.h"
#define VIR_FROM_THIS VIR_FROM_LXC @@ -409,7 +410,8 @@ cleanup: }
#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; @@ -418,9 +420,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) int i = 0; int maxnode = 0; bool warned = false; - - if (!ctrl->def->numatune.memory.nodemask) + virDomainNumatuneDef numatune = ctrl->def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { return 0; + }
VIR_DEBUG("Setting NUMA memory policy");
@@ -435,7 +450,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; - while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i))>= 0) { + while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { if (i> NUMA_NUM_NODES) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Host cannot support NUMA node %d"), i); @@ -488,7 +503,8 @@ cleanup: return ret; } #else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (ctrl->def->numatune.memory.nodemask) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -549,6 +565,40 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) }
+static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, + virBitmapPtr *mask) +{ + virBitmapPtr nodemask = NULL; + char *nodeset; + int ret = -1; + + /* Get the advisory nodeset from numad if 'placement' of + * either<vcpu> or<numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virGetNumadAdvice(ctrl->def->vcpus, + ctrl->def->mem.cur_balloon); + if (!nodeset) + goto cleanup; + + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + + ret = virBitmapParse(nodeset, 0,&nodemask, VIR_DOMAIN_CPUMASK_LEN); + if (ret< 0) + goto cleanup; + } + ret = 0; + *mask = nodemask; + +cleanup: + VIR_FREE(nodeset); + return ret; +} + + /** * virLXCControllerSetupResourceLimits * @ctrl: the controller state @@ -560,14 +610,28 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { + virBitmapPtr nodemask = NULL; + int ret;
int ret = -1;
- if (virLXCControllerSetupCpuAffinity(ctrl)< 0) - return -1; + ret = virLXCControllerGetNumadAdvice(ctrl,&nodemask); + if (ret< 0) + goto cleanup;
And thus this can be simplified as: if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0) goto cleanup;
- if (virLXCControllerSetupNUMAPolicy(ctrl)< 0) - return -1; + ret = virLXCControllerSetupCpuAffinity(ctrl); + if (ret< 0) + goto cleanup;
Likewise.
+ + ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask); + if (ret< 0) + goto cleanup;
Likewise. And I'd like keep this together with GetNumadAdvice. I.E. if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0) goto cleanup; if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) goto cleanup; Or even simpler: if (virLXCControllerSetupNUMAPolicy(ctrl, &nodemask) < 0 || virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) goto cleanup; Instead of having SetupNumaPolicy between them.
- return virLXCCgroupSetup(ctrl->def); + ret = virLXCCgroupSetup(ctrl->def); + if (ret< 0) + goto cleanup;
Likewise. But with setting "ret = 0" here. ret = 0; BTW, another disvantage of your methods (using ret across) is the return value for virLXCControllerSetupResourceLimits is depended on the sub-functions, which may return values other than 0/-1. It might not be the result you want.
+ +cleanup: + virBitmapFree(nodemask); + return ret; }
Others are simply copy & paste from qemu driver. Looks good. This needs a v2. Osier

Intend to reduce the redundant code,use virSetupNumaMemoryPolicy to replace virLXCControllerSetupNUMAPolicy and qemuProcessInitNumaMemoryPolicy. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.h | 23 +-------- src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 114 +------------------------------------------- src/qemu/qemu_process.c | 121 +---------------------------------------------- src/util/virnuma.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 24 ++++++++++ 6 files changed, 143 insertions(+), 254 deletions(-) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5828ae2..2a8dff3 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -47,6 +47,7 @@ # include "device_conf.h" # include "virbitmap.h" # include "virstoragefile.h" +# include "virnuma.h" /* forward declarations of all device types, required by * virDomainDeviceDef @@ -1589,14 +1590,6 @@ enum virDomainCpuPlacementMode { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST }; -enum virDomainNumatuneMemPlacementMode { - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, - - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST -}; - typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -1685,18 +1678,6 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def, int nvcpupin, int vcpu); -typedef struct _virDomainNumatuneDef virDomainNumatuneDef; -typedef virDomainNumatuneDef *virDomainNumatuneDefPtr; -struct _virDomainNumatuneDef { - struct { - virBitmapPtr nodemask; - int mode; - int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ - } memory; - - /* Future NUMA tuning related stuff should go here. */ -}; - typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight; typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr; struct _virBlkioDeviceWeight { @@ -1784,7 +1765,7 @@ struct _virDomainDef { virDomainVcpuPinDefPtr emulatorpin; } cputune; - virDomainNumatuneDef numatune; + virNumaTuneParams numatune; /* These 3 are based on virDomainLifeCycleAction enum flags */ int onReboot; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6aee6fa..56c466a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1567,6 +1567,7 @@ virNodeSuspendGetTargetMask; # util/virnuma.h virGetNumadAdvice; +virSetupNumaMemoryPolicy; # util/virobject.h virClassForObject; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index b6c1fe8..3db0a88 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -46,11 +46,6 @@ # include <cap-ng.h> #endif -#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include <numa.h> -#endif - #include "virerror.h" #include "virlog.h" #include "virutil.h" @@ -409,113 +404,6 @@ cleanup: return ret; } -#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = ctrl->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - VIR_DEBUG("Setting NUMA memory policy"); - - if (numa_available() < 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { - if (i > NUMA_NUM_NODES) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i > maxnode && !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = ctrl->def->numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i < NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unable to set NUMA policy %s"), - virDomainNumatuneMemModeTypeToString(mode)); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (ctrl->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("NUMA policy is not available on this platform")); - return -1; - } - - return 0; -} -#endif - /* * To be run while still single threaded @@ -621,7 +509,7 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret < 0) goto cleanup; - ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret < 0) goto cleanup; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 20d41e3..2fa85aa 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -45,11 +45,6 @@ #include "qemu_bridge_filter.h" #include "qemu_migration.h" -#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include <numa.h> -#endif - #include "datatypes.h" #include "virlog.h" #include "virerror.h" @@ -1869,120 +1864,6 @@ qemuProcessDetectVcpuPIDs(virQEMUDriverPtr driver, } -/* - * Set NUMA memory policy for qemu process, to be run between - * fork/exec of QEMU only. - */ -#if WITH_NUMACTL -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = vm->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - if (numa_available() < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { - if (i > NUMA_NUM_NODES) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i > maxnode && !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i < NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - /* XXX: Shouldn't go here, as we already do checking when - * parsing domain XML. - */ - virReportError(VIR_ERR_XML_ERROR, - "%s", _("Invalid mode for memory NUMA tuning.")); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (vm->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("libvirt is compiled without NUMA tuning support")); - - return -1; - } - - return 0; -} -#endif - - /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise * just return a new allocated bitmap. @@ -2732,7 +2613,7 @@ static int qemuProcessHook(void *data) qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask) < 0) goto cleanup; - if (qemuProcessInitNumaMemoryPolicy(h->vm, h->nodemask) < 0) + if (virSetupNumaMemoryPolicy(h->vm->def->numatune, h->nodemask) < 0) goto cleanup; ret = 0; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 37931fe..75dcede 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -21,9 +21,15 @@ #include <config.h> +#if WITH_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" +#include "virlog.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -58,3 +64,111 @@ virGetNumadAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, return NULL; } #endif + +#if WITH_NUMACTL +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { + return 0; + } + + if (numa_available() < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + i = -1; + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { + if (i > NUMA_NUM_NODES) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + + mode = numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + /* XXX: Shouldn't go here, as we already do checking when + * parsing domain XML. + */ + virReportError(VIR_ERR_XML_ERROR, + "%s", _("Invalid mode for memory NUMA tuning.")); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) +{ + if (numatune.memory.nodemask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libvirt is compiled without NUMA tuning support")); + + return -1; + } + + return 0; +} +#endif diff --git a/src/util/virnuma.h b/src/util/virnuma.h index b9046c2..8d9f14d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -22,7 +22,31 @@ #ifndef __VIR_NUMA_H__ # define __VIR_NUMA_H__ +#include "virbitmap.h" + +enum virDomainNumatuneMemPlacementMode { + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST +}; + +typedef struct _virNumaTuneParams virNumaTuneParams; +typedef virNumaTuneParams *virNumaTuneParamsPtr; +struct _virNumaTuneParams { + struct { + virBitmapPtr nodemask; + int mode; + int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ + } memory; + + /* Future NUMA tuning related stuff should go here. */ +}; + char *virGetNumadAdvice(unsigned short vcups, unsigned long long balloon); +int virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask); #endif /* __VIR_NUMA_H__ */ -- 1.7.11.7

On 2013年03月01日 14:52, Gao feng wrote:
Intend to reduce the redundant code,use virSetupNumaMemoryPolicy to replace virLXCControllerSetupNUMAPolicy and qemuProcessInitNumaMemoryPolicy.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.h | 23 +-------- src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 114 +------------------------------------------- src/qemu/qemu_process.c | 121 +---------------------------------------------- src/util/virnuma.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 24 ++++++++++ 6 files changed, 143 insertions(+), 254 deletions(-)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5828ae2..2a8dff3 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -47,6 +47,7 @@ # include "device_conf.h" # include "virbitmap.h" # include "virstoragefile.h" +# include "virnuma.h"
/* forward declarations of all device types, required by * virDomainDeviceDef @@ -1589,14 +1590,6 @@ enum virDomainCpuPlacementMode { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST };
-enum virDomainNumatuneMemPlacementMode { - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, - - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST -}; -
Given that you move this into virnuma.h, VIR_ENUM_DECL and VIR_ENUM_IMPL also need to be moved. And I don't see changes on things like this: virDomainNumatuneMemPlacementModeTypeFromString in domain_conf.c, I bet the domain conf parsing and formating are now broken with this patch applied.
typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -1685,18 +1678,6 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def, int nvcpupin, int vcpu);
-typedef struct _virDomainNumatuneDef virDomainNumatuneDef; -typedef virDomainNumatuneDef *virDomainNumatuneDefPtr; -struct _virDomainNumatuneDef { - struct { - virBitmapPtr nodemask; - int mode; - int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ - } memory; - - /* Future NUMA tuning related stuff should go here. */ -}; - typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight; typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr; struct _virBlkioDeviceWeight { @@ -1784,7 +1765,7 @@ struct _virDomainDef { virDomainVcpuPinDefPtr emulatorpin; } cputune;
- virDomainNumatuneDef numatune; + virNumaTuneParams numatune;
A bad new name, why not virNumatuneDef? the new name can be confused, because we use params for other meaning in the project.
/* These 3 are based on virDomainLifeCycleAction enum flags */ int onReboot; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6aee6fa..56c466a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1567,6 +1567,7 @@ virNodeSuspendGetTargetMask;
# util/virnuma.h virGetNumadAdvice; +virSetupNumaMemoryPolicy;
Generally we want to use virNuma As the prefix for the helpers. This applies to virGetNumadAdvice too (I didn't realized it when reviewing 1/4).
# util/virobject.h virClassForObject; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index b6c1fe8..3db0a88 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -46,11 +46,6 @@ # include<cap-ng.h> #endif
-#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include<numa.h> -#endif - #include "virerror.h" #include "virlog.h" #include "virutil.h" @@ -409,113 +404,6 @@ cleanup: return ret; }
-#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = ctrl->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - VIR_DEBUG("Setting NUMA memory policy"); - - if (numa_available()< 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { - if (i> NUMA_NUM_NODES) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i> maxnode&& !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = ctrl->def->numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i< NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unable to set NUMA policy %s"), - virDomainNumatuneMemModeTypeToString(mode)); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (ctrl->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("NUMA policy is not available on this platform")); - return -1; - } - - return 0; -} -#endif -
/* * To be run while still single threaded @@ -621,7 +509,7 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret< 0) goto cleanup;
- ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret< 0) goto cleanup;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 20d41e3..2fa85aa 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -45,11 +45,6 @@ #include "qemu_bridge_filter.h" #include "qemu_migration.h"
-#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include<numa.h> -#endif - #include "datatypes.h" #include "virlog.h" #include "virerror.h" @@ -1869,120 +1864,6 @@ qemuProcessDetectVcpuPIDs(virQEMUDriverPtr driver, }
-/* - * Set NUMA memory policy for qemu process, to be run between - * fork/exec of QEMU only. - */ -#if WITH_NUMACTL -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = vm->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - if (numa_available()< 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { - if (i> NUMA_NUM_NODES) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i> maxnode&& !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i< NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - /* XXX: Shouldn't go here, as we already do checking when - * parsing domain XML. - */ - virReportError(VIR_ERR_XML_ERROR, - "%s", _("Invalid mode for memory NUMA tuning.")); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (vm->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("libvirt is compiled without NUMA tuning support")); - - return -1; - } - - return 0; -} -#endif - - /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise * just return a new allocated bitmap. @@ -2732,7 +2613,7 @@ static int qemuProcessHook(void *data) qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask)< 0) goto cleanup;
- if (qemuProcessInitNumaMemoryPolicy(h->vm, h->nodemask)< 0) + if (virSetupNumaMemoryPolicy(h->vm->def->numatune, h->nodemask)< 0) goto cleanup;
ret = 0; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 37931fe..75dcede 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -21,9 +21,15 @@
#include<config.h>
+#if WITH_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include<numa.h> +#endif + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" +#include "virlog.h"
#define VIR_FROM_THIS VIR_FROM_NONE
@@ -58,3 +64,111 @@ virGetNumadAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, return NULL; } #endif + +#if WITH_NUMACTL +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { + return 0; + } + + if (numa_available()< 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + i = -1; + while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { + if (i> NUMA_NUM_NODES) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i> maxnode&& !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + + mode = numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i< NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + /* XXX: Shouldn't go here, as we already do checking when + * parsing domain XML. + */ + virReportError(VIR_ERR_XML_ERROR, + "%s", _("Invalid mode for memory NUMA tuning.")); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) +{ + if (numatune.memory.nodemask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libvirt is compiled without NUMA tuning support")); + + return -1; + } + + return 0; +} +#endif diff --git a/src/util/virnuma.h b/src/util/virnuma.h index b9046c2..8d9f14d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -22,7 +22,31 @@ #ifndef __VIR_NUMA_H__ # define __VIR_NUMA_H__
+#include "virbitmap.h" + +enum virDomainNumatuneMemPlacementMode { + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST +}; + +typedef struct _virNumaTuneParams virNumaTuneParams; +typedef virNumaTuneParams *virNumaTuneParamsPtr; +struct _virNumaTuneParams { + struct { + virBitmapPtr nodemask; + int mode; + int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ + } memory; + + /* Future NUMA tuning related stuff should go here. */ +}; +
Except the pointed out nits, others are simply code moving, looks good to me. This needs a v2 too. Osier

On Fri, Mar 15, 2013 at 05:03:47PM +0800, Osier Yang wrote:
diff --git a/src/util/virnuma.h b/src/util/virnuma.h index b9046c2..8d9f14d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -22,7 +22,31 @@ #ifndef __VIR_NUMA_H__ # define __VIR_NUMA_H__
+#include "virbitmap.h" + +enum virDomainNumatuneMemPlacementMode { + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST +};
This needs renaming to enum virNumaMemPlacementMode and the constants to VIR_NUMA_MEM_PLACEMENT_MODE*
+ +typedef struct _virNumaTuneParams virNumaTuneParams; +typedef virNumaTuneParams *virNumaTuneParamsPtr; +struct _virNumaTuneParams { + struct { + virBitmapPtr nodemask; + int mode; + int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ + } memory; + + /* Future NUMA tuning related stuff should go here. */ +}; +
Except the pointed out nits, others are simply code moving, looks good to me. This needs a v2 too.
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 2013/03/15 17:41, Daniel P. Berrange wrote:
On Fri, Mar 15, 2013 at 05:03:47PM +0800, Osier Yang wrote:
diff --git a/src/util/virnuma.h b/src/util/virnuma.h index b9046c2..8d9f14d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -22,7 +22,31 @@ #ifndef __VIR_NUMA_H__ # define __VIR_NUMA_H__
+#include "virbitmap.h" + +enum virDomainNumatuneMemPlacementMode { + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST +};
This needs renaming to
enum virNumaMemPlacementMode
and the constants to
VIR_NUMA_MEM_PLACEMENT_MODE*
Get it! Thanks for your review!

On 2013/03/15 17:03, Osier Yang wrote:
On 2013年03月01日 14:52, Gao feng wrote:
Intend to reduce the redundant code,use virSetupNumaMemoryPolicy to replace virLXCControllerSetupNUMAPolicy and qemuProcessInitNumaMemoryPolicy.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.h | 23 +-------- src/libvirt_private.syms | 1 + src/lxc/lxc_controller.c | 114 +------------------------------------------- src/qemu/qemu_process.c | 121 +---------------------------------------------- src/util/virnuma.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 24 ++++++++++ 6 files changed, 143 insertions(+), 254 deletions(-)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5828ae2..2a8dff3 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -47,6 +47,7 @@ # include "device_conf.h" # include "virbitmap.h" # include "virstoragefile.h" +# include "virnuma.h"
/* forward declarations of all device types, required by * virDomainDeviceDef @@ -1589,14 +1590,6 @@ enum virDomainCpuPlacementMode { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST };
-enum virDomainNumatuneMemPlacementMode { - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, - - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST -}; -
Given that you move this into virnuma.h, VIR_ENUM_DECL and VIR_ENUM_IMPL also need to be moved. And I don't see changes on things like this:
virDomainNumatuneMemPlacementModeTypeFromString
in domain_conf.c, I bet the domain conf parsing and formating are now broken with this patch applied.
Yes, I totally miss this, thanks for pointing it out, will fix it :)
typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -1685,18 +1678,6 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def, int nvcpupin, int vcpu);
-typedef struct _virDomainNumatuneDef virDomainNumatuneDef; -typedef virDomainNumatuneDef *virDomainNumatuneDefPtr; -struct _virDomainNumatuneDef { - struct { - virBitmapPtr nodemask; - int mode; - int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ - } memory; - - /* Future NUMA tuning related stuff should go here. */ -}; - typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight; typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr; struct _virBlkioDeviceWeight { @@ -1784,7 +1765,7 @@ struct _virDomainDef { virDomainVcpuPinDefPtr emulatorpin; } cputune;
- virDomainNumatuneDef numatune; + virNumaTuneParams numatune;
A bad new name, why not virNumatuneDef? the new name can be confused, because we use params for other meaning in the project.
Ok, I prefer virNumatuneDef.
/* These 3 are based on virDomainLifeCycleAction enum flags */ int onReboot; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6aee6fa..56c466a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1567,6 +1567,7 @@ virNodeSuspendGetTargetMask;
# util/virnuma.h virGetNumadAdvice; +virSetupNumaMemoryPolicy;
Generally we want to use virNuma As the prefix for the helpers. This applies to virGetNumadAdvice too (I didn't realized it when reviewing 1/4).
Get it :)
# util/virobject.h virClassForObject; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index b6c1fe8..3db0a88 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -46,11 +46,6 @@ # include<cap-ng.h> #endif
-#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include<numa.h> -#endif - #include "virerror.h" #include "virlog.h" #include "virutil.h" @@ -409,113 +404,6 @@ cleanup: return ret; }
-#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = ctrl->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - VIR_DEBUG("Setting NUMA memory policy"); - - if (numa_available()< 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { - if (i> NUMA_NUM_NODES) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i> maxnode&& !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = ctrl->def->numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i< NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unable to set NUMA policy %s"), - virDomainNumatuneMemModeTypeToString(mode)); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (ctrl->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("NUMA policy is not available on this platform")); - return -1; - } - - return 0; -} -#endif -
/* * To be run while still single threaded @@ -621,7 +509,7 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret< 0) goto cleanup;
- ret = virLXCControllerSetupNUMAPolicy(ctrl, nodemask); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret< 0) goto cleanup;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 20d41e3..2fa85aa 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -45,11 +45,6 @@ #include "qemu_bridge_filter.h" #include "qemu_migration.h"
-#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include<numa.h> -#endif - #include "datatypes.h" #include "virlog.h" #include "virerror.h" @@ -1869,120 +1864,6 @@ qemuProcessDetectVcpuPIDs(virQEMUDriverPtr driver, }
-/* - * Set NUMA memory policy for qemu process, to be run between - * fork/exec of QEMU only. - */ -#if WITH_NUMACTL -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = vm->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - if (numa_available()< 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { - if (i> NUMA_NUM_NODES) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i> maxnode&& !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i< NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - /* XXX: Shouldn't go here, as we already do checking when - * parsing domain XML. - */ - virReportError(VIR_ERR_XML_ERROR, - "%s", _("Invalid mode for memory NUMA tuning.")); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (vm->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("libvirt is compiled without NUMA tuning support")); - - return -1; - } - - return 0; -} -#endif - - /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise * just return a new allocated bitmap. @@ -2732,7 +2613,7 @@ static int qemuProcessHook(void *data) qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask)< 0) goto cleanup;
- if (qemuProcessInitNumaMemoryPolicy(h->vm, h->nodemask)< 0) + if (virSetupNumaMemoryPolicy(h->vm->def->numatune, h->nodemask)< 0) goto cleanup;
ret = 0; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 37931fe..75dcede 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -21,9 +21,15 @@
#include<config.h>
+#if WITH_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include<numa.h> +#endif + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" +#include "virlog.h"
#define VIR_FROM_THIS VIR_FROM_NONE
@@ -58,3 +64,111 @@ virGetNumadAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, return NULL; } #endif + +#if WITH_NUMACTL +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { + return 0; + } + + if (numa_available()< 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + i = -1; + while ((i = virBitmapNextSetBit(tmp_nodemask, i))>= 0) { + if (i> NUMA_NUM_NODES) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i> maxnode&& !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + + mode = numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i< NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + /* XXX: Shouldn't go here, as we already do checking when + * parsing domain XML. + */ + virReportError(VIR_ERR_XML_ERROR, + "%s", _("Invalid mode for memory NUMA tuning.")); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +int +virSetupNumaMemoryPolicy(virNumaTuneParams numatune, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) +{ + if (numatune.memory.nodemask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libvirt is compiled without NUMA tuning support")); + + return -1; + } + + return 0; +} +#endif diff --git a/src/util/virnuma.h b/src/util/virnuma.h index b9046c2..8d9f14d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -22,7 +22,31 @@ #ifndef __VIR_NUMA_H__ # define __VIR_NUMA_H__
+#include "virbitmap.h" + +enum virDomainNumatuneMemPlacementMode { + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_LAST +}; + +typedef struct _virNumaTuneParams virNumaTuneParams; +typedef virNumaTuneParams *virNumaTuneParamsPtr; +struct _virNumaTuneParams { + struct { + virBitmapPtr nodemask; + int mode; + int placement_mode; /* enum virDomainNumatuneMemPlacementMode */ + } memory; + + /* Future NUMA tuning related stuff should go here. */ +}; +
Except the pointed out nits, others are simply code moving, looks good to me. This needs a v2 too.
Thanks for your review. will send a v2 patch Thanks.

This patch adds cpuset cgroup support for LXC. also set cpuset cgroup before setting cpu affinity and numa policy. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 6 ++--- 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index a075335..f94b914 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -68,6 +68,58 @@ cleanup: } +static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, + virCgroupPtr cgroup, + virBitmapPtr nodemask) +{ + int rc = 0; + char *mask = NULL; + + if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && + def->cpumask) { + mask = virBitmapFormat(def->cpumask); + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpumask")); + return -1; + } + + rc = virCgroupSetCpusetCpus(cgroup, mask); + VIR_FREE(mask); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.cpus")); + } + } + + if ((def->numatune.memory.nodemask || + (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) && + def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) + mask = virBitmapFormat(nodemask); + else + mask = virBitmapFormat(def->numatune.memory.nodemask); + + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert memory nodemask")); + return -1; + } + + rc = virCgroupSetCpusetMems(cgroup, mask); + VIR_FREE(mask); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.mems")); + } + } + + return rc; +} + + static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def, virCgroupPtr cgroup) { @@ -472,7 +524,7 @@ cleanup: } -int virLXCCgroupSetup(virDomainDefPtr def) +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask) { virCgroupPtr driver = NULL; virCgroupPtr cgroup = NULL; @@ -497,6 +549,9 @@ int virLXCCgroupSetup(virDomainDefPtr def) if (virLXCCgroupSetupCpuTune(def, cgroup) < 0) goto cleanup; + if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0) + goto cleanup; + if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0) goto cleanup; diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index fff554b..29f21d6 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -26,7 +26,7 @@ # include "lxc_fuse.h" # include "virusb.h" -int virLXCCgroupSetup(virDomainDefPtr def); +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask); int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo); int diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 3db0a88..75e2fe4 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -505,15 +505,15 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret < 0) goto cleanup; - ret = virLXCControllerSetupCpuAffinity(ctrl); + ret = virLXCCgroupSetup(ctrl->def, nodemask); if (ret < 0) goto cleanup; - ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); + ret = virLXCControllerSetupCpuAffinity(ctrl); if (ret < 0) goto cleanup; - ret = virLXCCgroupSetup(ctrl->def); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret < 0) goto cleanup; -- 1.7.11.7

On 2013年03月01日 14:52, Gao feng wrote:
This patch adds cpuset cgroup support for LXC. also set cpuset cgroup before setting cpu affinity and numa policy.
Any special reason to move lxcSetupCgroup before the CPU affinity and NUMA setttings?
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 6 ++--- 3 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index a075335..f94b914 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -68,6 +68,58 @@ cleanup: }
+static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, + virCgroupPtr cgroup, + virBitmapPtr nodemask) +{ + int rc = 0; + char *mask = NULL; + + if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO&& + def->cpumask) { + mask = virBitmapFormat(def->cpumask); + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpumask")); + return -1; + } + + rc = virCgroupSetCpusetCpus(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.cpus")); + } + } + + if ((def->numatune.memory.nodemask || + (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO))&& + def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) + mask = virBitmapFormat(nodemask); + else + mask = virBitmapFormat(def->numatune.memory.nodemask); + + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert memory nodemask")); + return -1; + } + + rc = virCgroupSetCpusetMems(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.mems")); + } + } + + return rc; +} + + static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def, virCgroupPtr cgroup) { @@ -472,7 +524,7 @@ cleanup: }
-int virLXCCgroupSetup(virDomainDefPtr def) +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask) { virCgroupPtr driver = NULL; virCgroupPtr cgroup = NULL; @@ -497,6 +549,9 @@ int virLXCCgroupSetup(virDomainDefPtr def) if (virLXCCgroupSetupCpuTune(def, cgroup)< 0) goto cleanup;
+ if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask)< 0) + goto cleanup; + if (virLXCCgroupSetupBlkioTune(def, cgroup)< 0) goto cleanup;
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index fff554b..29f21d6 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -26,7 +26,7 @@ # include "lxc_fuse.h" # include "virusb.h"
-int virLXCCgroupSetup(virDomainDefPtr def); +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask); int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo);
int diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 3db0a88..75e2fe4 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -505,15 +505,15 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret< 0) goto cleanup;
- ret = virLXCControllerSetupCpuAffinity(ctrl); + ret = virLXCCgroupSetup(ctrl->def, nodemask); if (ret< 0) goto cleanup;
- ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); + ret = virLXCControllerSetupCpuAffinity(ctrl); if (ret< 0) goto cleanup;
- ret = virLXCCgroupSetup(ctrl->def); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret< 0) goto cleanup;
Looks good & ACK if there is a reasonble response on the question, but this needs to be rebased for comments in 1/4.

On 2013/03/15 17:24, Osier Yang wrote:
On 2013年03月01日 14:52, Gao feng wrote:
This patch adds cpuset cgroup support for LXC. also set cpuset cgroup before setting cpu affinity and numa policy.
Any special reason to move lxcSetupCgroup before the CPU affinity and NUMA setttings?
Through it has no functional difference which step is done first. But consider sched_setaffinity will be affected by cpuset cgroup, I think its better to set cpuset cgroup before we set CPU affinity. I will add this reason into changelog.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 6 ++--- 3 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index a075335..f94b914 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -68,6 +68,58 @@ cleanup: }
+static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, + virCgroupPtr cgroup, + virBitmapPtr nodemask) +{ + int rc = 0; + char *mask = NULL; + + if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO&& + def->cpumask) { + mask = virBitmapFormat(def->cpumask); + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpumask")); + return -1; + } + + rc = virCgroupSetCpusetCpus(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.cpus")); + } + } + + if ((def->numatune.memory.nodemask || + (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO))&& + def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) + mask = virBitmapFormat(nodemask); + else + mask = virBitmapFormat(def->numatune.memory.nodemask); + + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert memory nodemask")); + return -1; + } + + rc = virCgroupSetCpusetMems(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.mems")); + } + } + + return rc; +} + + static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def, virCgroupPtr cgroup) { @@ -472,7 +524,7 @@ cleanup: }
-int virLXCCgroupSetup(virDomainDefPtr def) +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask) { virCgroupPtr driver = NULL; virCgroupPtr cgroup = NULL; @@ -497,6 +549,9 @@ int virLXCCgroupSetup(virDomainDefPtr def) if (virLXCCgroupSetupCpuTune(def, cgroup)< 0) goto cleanup;
+ if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask)< 0) + goto cleanup; + if (virLXCCgroupSetupBlkioTune(def, cgroup)< 0) goto cleanup;
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index fff554b..29f21d6 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -26,7 +26,7 @@ # include "lxc_fuse.h" # include "virusb.h"
-int virLXCCgroupSetup(virDomainDefPtr def); +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask); int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo);
int diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 3db0a88..75e2fe4 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -505,15 +505,15 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret< 0) goto cleanup;
- ret = virLXCControllerSetupCpuAffinity(ctrl); + ret = virLXCCgroupSetup(ctrl->def, nodemask); if (ret< 0) goto cleanup;
- ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); + ret = virLXCControllerSetupCpuAffinity(ctrl); if (ret< 0) goto cleanup;
- ret = virLXCCgroupSetup(ctrl->def); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret< 0) goto cleanup;
Looks good & ACK if there is a reasonble response on the question, but this needs to be rebased for comments in 1/4.
Yes,of course, thanks for your comments again.

On 2013年03月18日 10:27, Gao feng wrote:
On 2013/03/15 17:24, Osier Yang wrote:
On 2013年03月01日 14:52, Gao feng wrote:
This patch adds cpuset cgroup support for LXC. also set cpuset cgroup before setting cpu affinity and numa policy.
Any special reason to move lxcSetupCgroup before the CPU affinity and NUMA setttings?
Through it has no functional difference which step is done first. But consider sched_setaffinity will be affected by cpuset cgroup,
Not sure if this solve the affection. As sched_setaffinity after the cgroup setting might fail? But this is the history problem, and we don't have a way yet.
I think its better to set cpuset cgroup before we set CPU affinity.
I will add this reason into changelog.
Signed-off-by: Gao feng<gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 6 ++--- 3 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index a075335..f94b914 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -68,6 +68,58 @@ cleanup: }
+static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, + virCgroupPtr cgroup, + virBitmapPtr nodemask) +{ + int rc = 0; + char *mask = NULL; + + if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO&& + def->cpumask) { + mask = virBitmapFormat(def->cpumask); + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpumask")); + return -1; + } + + rc = virCgroupSetCpusetCpus(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.cpus")); + } + } + + if ((def->numatune.memory.nodemask || + (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO))&& + def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) + mask = virBitmapFormat(nodemask); + else + mask = virBitmapFormat(def->numatune.memory.nodemask); + + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert memory nodemask")); + return -1; + } + + rc = virCgroupSetCpusetMems(cgroup, mask); + VIR_FREE(mask); + if (rc< 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.mems")); + } + } + + return rc; +} + + static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def, virCgroupPtr cgroup) { @@ -472,7 +524,7 @@ cleanup: }
-int virLXCCgroupSetup(virDomainDefPtr def) +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask) { virCgroupPtr driver = NULL; virCgroupPtr cgroup = NULL; @@ -497,6 +549,9 @@ int virLXCCgroupSetup(virDomainDefPtr def) if (virLXCCgroupSetupCpuTune(def, cgroup)< 0) goto cleanup;
+ if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask)< 0) + goto cleanup; + if (virLXCCgroupSetupBlkioTune(def, cgroup)< 0) goto cleanup;
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index fff554b..29f21d6 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -26,7 +26,7 @@ # include "lxc_fuse.h" # include "virusb.h"
-int virLXCCgroupSetup(virDomainDefPtr def); +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask); int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo);
int diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 3db0a88..75e2fe4 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -505,15 +505,15 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) if (ret< 0) goto cleanup;
- ret = virLXCControllerSetupCpuAffinity(ctrl); + ret = virLXCCgroupSetup(ctrl->def, nodemask); if (ret< 0) goto cleanup;
- ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); + ret = virLXCControllerSetupCpuAffinity(ctrl); if (ret< 0) goto cleanup;
- ret = virLXCCgroupSetup(ctrl->def); + ret = virSetupNumaMemoryPolicy(ctrl->def->numatune, nodemask); if (ret< 0) goto cleanup;
Looks good& ACK if there is a reasonble response on the question, but this needs to be rebased for comments in 1/4.
Yes,of course, thanks for your comments again.

On 2013/03/18 11:19, Osier Yang wrote:
On 2013年03月18日 10:27, Gao feng wrote:
On 2013/03/15 17:24, Osier Yang wrote:
On 2013年03月01日 14:52, Gao feng wrote:
This patch adds cpuset cgroup support for LXC. also set cpuset cgroup before setting cpu affinity and numa policy.
Any special reason to move lxcSetupCgroup before the CPU affinity and NUMA setttings?
Through it has no functional difference which step is done first. But consider sched_setaffinity will be affected by cpuset cgroup,
Not sure if this solve the affection. As sched_setaffinity after the cgroup setting might fail? But this is the history problem, and we don't have a way yet.
Yes, set cpu cgroup before sched_setaffinity may cause libvirt lxc start failed. I will drop this change. Thanks!

On 2013/03/01 14:52, Gao feng wrote:
This patchset intend to add cpuset cgroup support for LXC. in order to don't create too many redundant codes, this patchset also rename some functions and structure.
Ping
Gao feng (4): rename qemuGetNumadAdvice to virGetNumadAdvice LXC: allow uses advisory nodeset from querying numad remove the redundant codes LXC: add cpuset cgroup support for lxc
po/POTFILES.in | 1 + src/Makefile.am | 1 + src/conf/domain_conf.h | 23 +------ src/libvirt_private.syms | 4 ++ src/lxc/lxc_cgroup.c | 57 +++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 156 +++++++++++++++--------------------------- src/qemu/qemu_process.c | 154 +---------------------------------------- src/util/virnuma.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 52 ++++++++++++++ 10 files changed, 348 insertions(+), 276 deletions(-) create mode 100644 src/util/virnuma.c create mode 100644 src/util/virnuma.h

On 2013/03/01 14:52, Gao feng wrote:
This patchset intend to add cpuset cgroup support for LXC. in order to don't create too many redundant codes, this patchset also rename some functions and structure.
Gao feng (4): rename qemuGetNumadAdvice to virGetNumadAdvice LXC: allow uses advisory nodeset from querying numad remove the redundant codes LXC: add cpuset cgroup support for lxc
Can anybody give me an ACK or some comments? Thanks!
po/POTFILES.in | 1 + src/Makefile.am | 1 + src/conf/domain_conf.h | 23 +------ src/libvirt_private.syms | 4 ++ src/lxc/lxc_cgroup.c | 57 +++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 156 +++++++++++++++--------------------------- src/qemu/qemu_process.c | 154 +---------------------------------------- src/util/virnuma.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 52 ++++++++++++++ 10 files changed, 348 insertions(+), 276 deletions(-) create mode 100644 src/util/virnuma.c create mode 100644 src/util/virnuma.h
participants (3)
-
Daniel P. Berrange
-
Gao feng
-
Osier Yang