[libvirt] [PATCH 0/4] Add cpuset support for LXC

This patchset intend to add cpuset support for LXC. in order to don't create too many redundant codes, this patchset also rename some functions. Gao feng (4): rename qemuGetNumadAdvice to virDomainGetNumadAdvice LXC: allow uses advisory nodeset from querying numad remove the redundant codes LXC: add cpuset support for lxc src/conf/domain_conf.c | 144 ++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 4 ++ src/libvirt_private.syms | 3 +- src/lxc/lxc_cgroup.c | 58 ++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 117 +++++++++-------------------------- src/qemu/qemu_process.c | 158 ++--------------------------------------------- 7 files changed, 242 insertions(+), 244 deletions(-) -- 1.7.11.7

qemuGetNumadAdvice will be used by LXC driver,rename it to virDomainGetNumaAdvice and move it to domain_conf.c Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/qemu/qemu_process.c | 32 +------------------------------- 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f2887c6..e4ebdd5 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -40,6 +40,7 @@ #include "viruuid.h" #include "virutil.h" #include "virbuffer.h" +#include "vircommand.h" #include "virlog.h" #include "nwfilter_conf.h" #include "virstoragefile.h" @@ -16342,3 +16343,33 @@ no_memory: virSecurityDeviceLabelDefFree(seclabel); return NULL; } + + +#if HAVE_NUMAD +char *virDomainGetNumadAdvice(virDomainDefPtr def) +{ + virCommandPtr cmd = NULL; + char *output = NULL; + + cmd = virCommandNewArgList(NUMAD, "-w", NULL); + virCommandAddArgFormat(cmd, "%d:%llu", def->vcpus, + VIR_DIV_UP(def->mem.cur_balloon, 1024)); + + virCommandSetOutputBuffer(cmd, &output); + + if (virCommandRun(cmd, NULL) < 0) + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to query numad for the " + "advisory nodeset")); + + virCommandFree(cmd); + return output; +} +#else +char *virDomainGetNumadAdvice(virDomainDefPtr def ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("numad is not available on this host")); + return NULL; +} +#endif diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4ffa4aa..59061e4 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2248,6 +2248,8 @@ virDomainDiskDefAddSecurityLabelDef(virDomainDiskDefPtr def, const char *model); typedef const char* (*virEventActionToStringFunc)(int type); typedef int (*virEventActionFromStringFunc)(const char *type); +char *virDomainGetNumadAdvice(virDomainDefPtr def); + VIR_ENUM_DECL(virDomainTaint) VIR_ENUM_DECL(virDomainVirt) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f399871..18e6cf7 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -341,6 +341,7 @@ virDomainWatchdogActionTypeFromString; virDomainWatchdogActionTypeToString; virDomainWatchdogModelTypeFromString; virDomainWatchdogModelTypeToString; +virDomainGetNumadAdvice; # conf/domain_event.h diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 476e3ed..6103874 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1981,36 +1981,6 @@ qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, } #endif -#if HAVE_NUMAD -static char * -qemuGetNumadAdvice(virDomainDefPtr def) -{ - virCommandPtr cmd = NULL; - char *output = NULL; - - cmd = virCommandNewArgList(NUMAD, "-w", NULL); - virCommandAddArgFormat(cmd, "%d:%llu", def->vcpus, - VIR_DIV_UP(def->mem.cur_balloon, 1024)); - - virCommandSetOutputBuffer(cmd, &output); - - if (virCommandRun(cmd, NULL) < 0) - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Failed to query numad for the " - "advisory nodeset")); - - virCommandFree(cmd); - return output; -} -#else -static char * -qemuGetNumadAdvice(virDomainDefPtr def ATTRIBUTE_UNUSED) -{ - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("numad is not available on this host")); - return NULL; -} -#endif /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise @@ -3717,7 +3687,7 @@ int qemuProcessStart(virConnectPtr conn, VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || (vm->def->numatune.memory.placement_mode == VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { - nodeset = qemuGetNumadAdvice(vm->def); + nodeset = virDomainGetNumadAdvice(vm->def); if (!nodeset) goto cleanup; -- 1.7.11.7

On Wed, Feb 27, 2013 at 04:09:35PM +0800, Gao feng wrote:
qemuGetNumadAdvice will be used by LXC driver,rename it to virDomainGetNumaAdvice and move it to domain_conf.c
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/qemu/qemu_process.c | 32 +------------------------------- 4 files changed, 35 insertions(+), 31 deletions(-)
Ewww no. The domain_conf.c file is for XML configuration parsing & formatting. Code dealing with numad has no business being there. We don't currently have any place for general NUMA related helper APIs, so I'd suggest you create a new pair of files for these methods src/util/virnuma.h and src/util/virnuma.c Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Wed, Feb 27, 2013 at 05:11:06PM +0000, Daniel P. Berrange wrote:
On Wed, Feb 27, 2013 at 04:09:35PM +0800, Gao feng wrote:
qemuGetNumadAdvice will be used by LXC driver,rename it to virDomainGetNumaAdvice and move it to domain_conf.c
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/qemu/qemu_process.c | 32 +------------------------------- 4 files changed, 35 insertions(+), 31 deletions(-)
Ewww no. The domain_conf.c file is for XML configuration parsing & formatting. Code dealing with numad has no business being there.
We don't currently have any place for general NUMA related helper APIs, so I'd suggest you create a new pair of files for these methods src/util/virnuma.h and src/util/virnuma.c
NB, you won't be able to pass in a virDomainDef when the code is located here. Instead make the API accept 2 params, the vcpus and memory values. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 2013/02/28 01:16, Daniel P. Berrange wrote:
On Wed, Feb 27, 2013 at 05:11:06PM +0000, Daniel P. Berrange wrote:
On Wed, Feb 27, 2013 at 04:09:35PM +0800, Gao feng wrote:
qemuGetNumadAdvice will be used by LXC driver,rename it to virDomainGetNumaAdvice and move it to domain_conf.c
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/libvirt_private.syms | 1 + src/qemu/qemu_process.c | 32 +------------------------------- 4 files changed, 35 insertions(+), 31 deletions(-)
Ewww no. The domain_conf.c file is for XML configuration parsing & formatting. Code dealing with numad has no business being there.
We don't currently have any place for general NUMA related helper APIs, so I'd suggest you create a new pair of files for these methods src/util/virnuma.h and src/util/virnuma.c
NB, you won't be able to pass in a virDomainDef when the code is located here. Instead make the API accept 2 params, the vcpus and memory values.
Get it :) Thanks for your comments. will fix these problems in next version.

Allow lxc using the advisory nodeset from querying numad. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_controller.c | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 15aa334..c6e7bbf 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -409,7 +409,8 @@ cleanup: } #if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; @@ -418,9 +419,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) int i = 0; int maxnode = 0; bool warned = false; - - if (!ctrl->def->numatune.memory.nodemask) + virDomainNumatuneDef numatune = ctrl->def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { return 0; + } VIR_DEBUG("Setting NUMA memory policy"); @@ -435,7 +449,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; - while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) { + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { if (i > NUMA_NUM_NODES) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Host cannot support NUMA node %d"), i); @@ -488,7 +502,8 @@ cleanup: return ret; } #else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (ctrl->def->numatune.memory.nodemask) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -560,13 +575,38 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { - + virBitmapPtr nodemask = NULL; + char *nodeset; if (virLXCControllerSetupCpuAffinity(ctrl) < 0) return -1; - if (virLXCControllerSetupNUMAPolicy(ctrl) < 0) + /* Get the advisory nodeset from numad if 'placement' of + * either <vcpu> or <numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virDomainGetNumadAdvice(ctrl->def); + if (!nodeset) + return -1; + + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + + if (virBitmapParse(nodeset, 0, &nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(nodeset); + return -1; + } + VIR_FREE(nodeset); + } + + if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) { + virBitmapFree(nodemask); return -1; + } + virBitmapFree(nodemask); return virLXCCgroupSetup(ctrl->def); } -- 1.7.11.7

On Wed, Feb 27, 2013 at 04:09:36PM +0800, Gao feng wrote:
Allow lxc using the advisory nodeset from querying numad.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_controller.c | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 7 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 15aa334..c6e7bbf 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -409,7 +409,8 @@ cleanup: }
#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; @@ -418,9 +419,22 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) int i = 0; int maxnode = 0; bool warned = false; - - if (!ctrl->def->numatune.memory.nodemask) + virDomainNumatuneDef numatune = ctrl->def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { return 0; + }
VIR_DEBUG("Setting NUMA memory policy");
@@ -435,7 +449,7 @@ static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; - while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) { + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { if (i > NUMA_NUM_NODES) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Host cannot support NUMA node %d"), i); @@ -488,7 +502,8 @@ cleanup: return ret; } #else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl) +static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (ctrl->def->numatune.memory.nodemask) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -560,13 +575,38 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { - + virBitmapPtr nodemask = NULL; + char *nodeset; if (virLXCControllerSetupCpuAffinity(ctrl) < 0) return -1;
- if (virLXCControllerSetupNUMAPolicy(ctrl) < 0) + /* Get the advisory nodeset from numad if 'placement' of + * either <vcpu> or <numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virDomainGetNumadAdvice(ctrl->def);
Currently the 'def->vcpus' data has no effect in the LXC driver, since it has no concept of CPUs. With this change applied, the container is now going to be bind to exactly def->vcpus number of host CPUs. I'm not saying this is bad - it is actually a good thing I believe. You should document this behaviour in the GIT comment message though.
+ if (!nodeset) + return -1; + + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + + if (virBitmapParse(nodeset, 0, &nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(nodeset); + return -1; + } + VIR_FREE(nodeset); + } + + if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) { + virBitmapFree(nodemask); return -1; + }
+ virBitmapFree(nodemask); return virLXCCgroupSetup(ctrl->def); }
This method could do with a 'cleanup:' block so you don't duplicate the VIR_FREE(nodeset) and virBitmapFree(nodemask) calls everywhere. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Intend to reduce the redundant code,use virDomainSetupNumaMemoryPolicy to replace virLXCControllerSetupNUMAPolicy and qemuProcessInitNumaMemoryPolicy. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 113 +++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 2 +- src/lxc/lxc_controller.c | 150 ++++++++--------------------------------------- src/qemu/qemu_process.c | 126 ++------------------------------------- 5 files changed, 146 insertions(+), 247 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e4ebdd5..085f6b7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -29,6 +29,11 @@ #include <sys/stat.h> #include <unistd.h> +#if WITH_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "internal.h" #include "virerror.h" #include "datatypes.h" @@ -16373,3 +16378,111 @@ char *virDomainGetNumadAdvice(virDomainDefPtr def ATTRIBUTE_UNUSED) return NULL; } #endif + +#if WITH_NUMACTL +int virDomainSetupNumaMemoryPolicy(virDomainDefPtr def, + virBitmapPtr nodemask) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + virDomainNumatuneDef numatune = def->numatune; + virBitmapPtr tmp_nodemask = NULL; + + if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { + if (!numatune.memory.nodemask) + return 0; + VIR_DEBUG("Set NUMA memory policy with specified nodeset"); + tmp_nodemask = numatune.memory.nodemask; + } else if (numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); + tmp_nodemask = nodemask; + } else { + return 0; + } + + if (numa_available() < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + i = -1; + while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { + if (i > NUMA_NUM_NODES) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + + mode = numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + /* XXX: Shouldn't go here, as we already do checking when + * parsing domain XML. + */ + virReportError(VIR_ERR_XML_ERROR, + "%s", _("Invalid mode for memory NUMA tuning.")); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +int +virDomainSetupNumaMemoryPolicy(virDomainDefPtr def, + virBitmapPtr nodemask ATTRIBUTE_UNUSED) +{ + if (def->numatune.memory.nodemask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libvirt is compiled without NUMA tuning support")); + + return -1; + } + + return 0; +} +#endif diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 59061e4..6fdfd69 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2249,6 +2249,8 @@ typedef const char* (*virEventActionToStringFunc)(int type); typedef int (*virEventActionFromStringFunc)(const char *type); char *virDomainGetNumadAdvice(virDomainDefPtr def); +int virDomainSetupNumaMemoryPolicy(virDomainDefPtr def, + virBitmapPtr nodemask); VIR_ENUM_DECL(virDomainTaint) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 18e6cf7..0c9d1a6 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -342,7 +342,7 @@ virDomainWatchdogActionTypeToString; virDomainWatchdogModelTypeFromString; virDomainWatchdogModelTypeToString; virDomainGetNumadAdvice; - +virDomainSetupNumaMemoryPolicy; # conf/domain_event.h virDomainEventBalloonChangeNewFromDom; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c6e7bbf..c52c947 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -46,11 +46,6 @@ # include <cap-ng.h> #endif -#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include <numa.h> -#endif - #include "virerror.h" #include "virlog.h" #include "virutil.h" @@ -408,112 +403,35 @@ cleanup: return ret; } -#if WITH_NUMACTL -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask) +static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, + virBitmapPtr *mask) { - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = ctrl->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - VIR_DEBUG("Setting NUMA memory policy"); - - if (numa_available() < 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; + virBitmapPtr nodemask = NULL; + char *nodeset; - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { - if (i > NUMA_NUM_NODES) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Host cannot support NUMA node %d"), i); + /* Get the advisory nodeset from numad if 'placement' of + * either <vcpu> or <numatune> is 'auto'. + */ + if ((ctrl->def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || + (ctrl->def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { + nodeset = virDomainGetNumadAdvice(ctrl->def); + if (!nodeset) return -1; - } - if (i > maxnode && !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - mode = ctrl->def->numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i < NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } + VIR_DEBUG("Nodeset returned from numad: %s", nodeset); - if (nnodes != 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; + if (virBitmapParse(nodeset, 0, &nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(nodeset); + return -1; } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unable to set NUMA policy %s"), - virDomainNumatuneMemModeTypeToString(mode)); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (ctrl->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("NUMA policy is not available on this platform")); - return -1; + VIR_FREE(nodeset); } - + *mask = nodemask; return 0; } -#endif /* @@ -576,32 +494,14 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { virBitmapPtr nodemask = NULL; - char *nodeset; - if (virLXCControllerSetupCpuAffinity(ctrl) < 0) - return -1; - - /* Get the advisory nodeset from numad if 'placement' of - * either <vcpu> or <numatune> is 'auto'. - */ - if ((ctrl->def->placement_mode == - VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || - (ctrl->def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) { - nodeset = virDomainGetNumadAdvice(ctrl->def); - if (!nodeset) - return -1; - VIR_DEBUG("Nodeset returned from numad: %s", nodeset); + if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0) + return -1; - if (virBitmapParse(nodeset, 0, &nodemask, - VIR_DOMAIN_CPUMASK_LEN) < 0) { - VIR_FREE(nodeset); - return -1; - } - VIR_FREE(nodeset); - } + if (virLXCControllerSetupCpuAffinity(ctrl) < 0) + return -1; - if (virLXCControllerSetupNUMAPolicy(ctrl, nodemask) < 0) { + if (virDomainSetupNumaMemoryPolicy(ctrl->def, nodemask) < 0) { virBitmapFree(nodemask); return -1; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 6103874..bbe64e4 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -45,11 +45,6 @@ #include "qemu_bridge_filter.h" #include "qemu_migration.h" -#if WITH_NUMACTL -# define NUMA_VERSION1_COMPATIBILITY 1 -# include <numa.h> -#endif - #include "datatypes.h" #include "virlog.h" #include "virerror.h" @@ -1868,120 +1863,6 @@ qemuProcessDetectVcpuPIDs(virQEMUDriverPtr driver, } -/* - * Set NUMA memory policy for qemu process, to be run between - * fork/exec of QEMU only. - */ -#if WITH_NUMACTL -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask) -{ - nodemask_t mask; - int mode = -1; - int node = -1; - int ret = -1; - int i = 0; - int maxnode = 0; - bool warned = false; - virDomainNumatuneDef numatune = vm->def->numatune; - virBitmapPtr tmp_nodemask = NULL; - - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { - return 0; - } - - if (numa_available() < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Host kernel is not aware of NUMA.")); - return -1; - } - - maxnode = numa_max_node() + 1; - /* Convert nodemask to NUMA bitmask. */ - nodemask_zero(&mask); - i = -1; - while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { - if (i > NUMA_NUM_NODES) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Host cannot support NUMA node %d"), i); - return -1; - } - if (i > maxnode && !warned) { - VIR_WARN("nodeset is out of range, there is only %d NUMA " - "nodes on host", maxnode); - warned = true; - } - nodemask_set(&mask, i); - } - - mode = numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - numa_set_bind_policy(1); - numa_set_membind(&mask); - numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { - int nnodes = 0; - for (i = 0; i < NUMA_NUM_NODES; i++) { - if (nodemask_isset(&mask, i)) { - node = i; - nnodes++; - } - } - - if (nnodes != 1) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("NUMA memory tuning in 'preferred' mode " - "only supports single node")); - goto cleanup; - } - - numa_set_bind_policy(0); - numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - /* XXX: Shouldn't go here, as we already do checking when - * parsing domain XML. - */ - virReportError(VIR_ERR_XML_ERROR, - "%s", _("Invalid mode for memory NUMA tuning.")); - goto cleanup; - } - - ret = 0; - -cleanup: - return ret; -} -#else -static int -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm, - virBitmapPtr nodemask ATTRIBUTE_UNUSED) -{ - if (vm->def->numatune.memory.nodemask) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("libvirt is compiled without NUMA tuning support")); - - return -1; - } - - return 0; -} -#endif - - /* Helper to prepare cpumap for affinity setting, convert * NUMA nodeset into cpuset if @nodemask is not NULL, otherwise * just return a new allocated bitmap. @@ -2730,8 +2611,11 @@ static int qemuProcessHook(void *data) if (!h->vm->def->cputune.emulatorpin && qemuProcessInitCpuAffinity(h->driver, h->vm, h->nodemask) < 0) goto cleanup; - - if (qemuProcessInitNumaMemoryPolicy(h->vm, h->nodemask) < 0) + /* + * Set NUMA memory policy for qemu process, to be run between + * fork/exec of QEMU only. + */ + if (virDomainSetupNumaMemoryPolicy(h->vm->def, h->nodemask) < 0) goto cleanup; ret = 0; -- 1.7.11.7

On Wed, Feb 27, 2013 at 04:09:37PM +0800, Gao feng wrote:
Intend to reduce the redundant code,use virDomainSetupNumaMemoryPolicy to replace virLXCControllerSetupNUMAPolicy and qemuProcessInitNumaMemoryPolicy.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/conf/domain_conf.c | 113 +++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 +
Again, this is not a suitable place for this code. It'll want to go in src/util/virnuma.c too. NB, you'll need to move the virDomainNumatuneDef struct definition out of domain_conf.h and rename it to 'virNumaTuneParams' in the src/util/virnuma.h file Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

this patch adds cpuset support for LXC. also set cpuset cgroup before we set cpu affinity and numa policy. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_controller.c | 25 +++++++++++++-------- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index a075335..f94b914 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -68,6 +68,58 @@ cleanup: } +static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, + virCgroupPtr cgroup, + virBitmapPtr nodemask) +{ + int rc = 0; + char *mask = NULL; + + if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && + def->cpumask) { + mask = virBitmapFormat(def->cpumask); + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpumask")); + return -1; + } + + rc = virCgroupSetCpusetCpus(cgroup, mask); + VIR_FREE(mask); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.cpus")); + } + } + + if ((def->numatune.memory.nodemask || + (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO)) && + def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (def->numatune.memory.placement_mode == + VIR_DOMAIN_NUMATUNE_MEM_PLACEMENT_MODE_AUTO) + mask = virBitmapFormat(nodemask); + else + mask = virBitmapFormat(def->numatune.memory.nodemask); + + if (!mask) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert memory nodemask")); + return -1; + } + + rc = virCgroupSetCpusetMems(cgroup, mask); + VIR_FREE(mask); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Unable to set cpuset.mems")); + } + } + + return rc; +} + + static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def, virCgroupPtr cgroup) { @@ -472,7 +524,7 @@ cleanup: } -int virLXCCgroupSetup(virDomainDefPtr def) +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask) { virCgroupPtr driver = NULL; virCgroupPtr cgroup = NULL; @@ -497,6 +549,9 @@ int virLXCCgroupSetup(virDomainDefPtr def) if (virLXCCgroupSetupCpuTune(def, cgroup) < 0) goto cleanup; + if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0) + goto cleanup; + if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0) goto cleanup; diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index fff554b..29f21d6 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -26,7 +26,7 @@ # include "lxc_fuse.h" # include "virusb.h" -int virLXCCgroupSetup(virDomainDefPtr def); +int virLXCCgroupSetup(virDomainDefPtr def, virBitmapPtr nodemask); int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo); int diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c52c947..bd0678e 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -493,21 +493,28 @@ static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl) */ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) { + int ret; virBitmapPtr nodemask = NULL; - if (virLXCControllerGetNumadAdvice(ctrl, &nodemask) < 0) - return -1; + ret = virLXCControllerGetNumadAdvice(ctrl, &nodemask); + if (ret < 0) + goto cleanup; - if (virLXCControllerSetupCpuAffinity(ctrl) < 0) - return -1; + ret = virLXCCgroupSetup(ctrl->def, nodemask); + if (ret < 0) + goto cleanup; - if (virDomainSetupNumaMemoryPolicy(ctrl->def, nodemask) < 0) { - virBitmapFree(nodemask); - return -1; - } + ret = virLXCControllerSetupCpuAffinity(ctrl); + if (ret < 0) + goto cleanup; + + ret = virDomainSetupNumaMemoryPolicy(ctrl->def, nodemask); + if (ret < 0) + goto cleanup; +cleanup: virBitmapFree(nodemask); - return virLXCCgroupSetup(ctrl->def); + return ret; } -- 1.7.11.7
participants (2)
-
Daniel P. Berrange
-
Gao feng