On Thu, Nov 03, 2011 at 12:13:27PM +0000, Daniel P. Berrange wrote:
On Thu, Nov 03, 2011 at 07:55:19PM +0800, Hu Tao wrote:
> Since we use cpuset to manage numa, we can safely remove dependence
> on libnuma.
> ---
> src/conf/domain_conf.c | 24 +----------
> src/conf/domain_conf.h | 1 -
> src/qemu/qemu_process.c | 111 -----------------------------------------------
> 3 files changed, 1 insertions(+), 135 deletions(-)
>
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 6e2d421..0cf3bb7 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -583,11 +583,6 @@ VIR_ENUM_IMPL(virDomainTimerMode, VIR_DOMAIN_TIMER_MODE_LAST,
> "paravirt",
> "smpsafe");
>
> -VIR_ENUM_IMPL(virDomainNumatuneMemMode, VIR_DOMAIN_NUMATUNE_MEM_LAST,
> - "strict",
> - "preferred",
> - "interleave");
> -
> VIR_ENUM_IMPL(virDomainStartupPolicy, VIR_DOMAIN_STARTUP_POLICY_LAST,
> "default",
> "mandatory",
> @@ -6834,20 +6829,6 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
> "%s", _("nodeset for NUMA memory
tuning must be set"));
> goto error;
> }
> -
> - tmp = virXPathString("string(./numatune/memory/@mode)", ctxt);
> - if (tmp) {
> - if ((def->numatune.memory.mode =
> - virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
> - virDomainReportError(VIR_ERR_INTERNAL_ERROR,
> - _("Unsupported NUMA memory tuning mode
'%s'"),
> - tmp);
> - goto error;
> - }
> - VIR_FREE(tmp);
> - } else {
> - def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
> - }
> }
>
> n = virXPathNodeSet("./features/*", ctxt, &nodes);
> @@ -10882,7 +10863,6 @@ virDomainDefFormatInternal(virDomainDefPtr def,
> virBufferAddLit(buf, " </cputune>\n");
>
> if (def->numatune.memory.nodemask) {
> - const char *mode;
> char *nodemask = NULL;
>
> virBufferAddLit(buf, " <numatune>\n");
> @@ -10894,9 +10874,7 @@ virDomainDefFormatInternal(virDomainDefPtr def,
> goto cleanup;
> }
>
> - mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
> - virBufferAsprintf(buf, " <memory mode='%s'
nodeset='%s'/>\n",
> - mode, nodemask);
> + virBufferAsprintf(buf, " <memory
nodeset='%s'/>\n", nodemask);
> VIR_FREE(nodemask);
> virBufferAddLit(buf, " </numatune>\n");
> }
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index f74f4bb..ca68437 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -1353,7 +1353,6 @@ typedef virDomainNumatuneDef *virDomainNumatuneDefPtr;
> struct _virDomainNumatuneDef {
> struct {
> char *nodemask;
> - int mode;
> } memory;
>
> /* Future NUMA tuning related stuff should go here. */
You can't remove this stuff from the XML - this is part of our public
stability guarentee. The way it is modelled is not specific to libnuma
anyway, so there shouldn't be this tie.
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 47164f7..5969b34 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -39,11 +39,6 @@
> #include "qemu_bridge_filter.h"
> #include "qemu_migration.h"
>
> -#if HAVE_NUMACTL
> -# define NUMA_VERSION1_COMPATIBILITY 1
> -# include <numa.h>
> -#endif
> -
> #include "datatypes.h"
> #include "logging.h"
> #include "virterror_internal.h"
> @@ -1314,109 +1309,6 @@ qemuProcessDetectVcpuPIDs(struct qemud_driver *driver,
> return 0;
> }
>
> -
> -/*
> - * Set NUMA memory policy for qemu process, to be run between
> - * fork/exec of QEMU only.
> - */
> -#if HAVE_NUMACTL
> -static int
> -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm)
> -{
> - nodemask_t mask;
> - int mode = -1;
> - int node = -1;
> - int ret = -1;
> - int i = 0;
> - int maxnode = 0;
> - bool warned = false;
> -
> - if (!vm->def->numatune.memory.nodemask)
> - return 0;
> -
> - VIR_DEBUG("Setting NUMA memory policy");
> -
> - if (numa_available() < 0) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("Host kernel is not aware of
NUMA."));
> - return -1;
> - }
> -
> - maxnode = numa_max_node() + 1;
> -
> - /* Convert nodemask to NUMA bitmask. */
> - nodemask_zero(&mask);
> - for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) {
> - if (vm->def->numatune.memory.nodemask[i]) {
> - if (i > NUMA_NUM_NODES) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR,
> - _("Host cannot support NUMA node %d"),
i);
> - return -1;
> - }
> - if (i > maxnode && !warned) {
> - VIR_WARN("nodeset is out of range, there is only %d NUMA
"
> - "nodes on host", maxnode);
> - warned = true;
> - }
> - nodemask_set(&mask, i);
> - }
> - }
> -
> - mode = vm->def->numatune.memory.mode;
> -
> - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
> - numa_set_bind_policy(1);
> - numa_set_membind(&mask);
> - numa_set_bind_policy(0);
> - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
> - int nnodes = 0;
> - for (i = 0; i < NUMA_NUM_NODES; i++) {
> - if (nodemask_isset(&mask, i)) {
> - node = i;
> - nnodes++;
> - }
> - }
> -
> - if (nnodes != 1) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("NUMA memory tuning in
'preferred' mode "
> - "only supports single node"));
> - goto cleanup;
> - }
> -
> - numa_set_bind_policy(0);
> - numa_set_preferred(node);
> - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
> - numa_set_interleave_mask(&mask);
> - } else {
> - /* XXX: Shouldn't go here, as we already do checking when
> - * parsing domain XML.
> - */
> - qemuReportError(VIR_ERR_XML_ERROR,
> - "%s", _("Invalid mode for memory NUMA
tuning."));
> - goto cleanup;
> - }
> -
> - ret = 0;
> -
> -cleanup:
> - return ret;
> -}
> -#else
> -static int
> -qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm)
> -{
> - if (vm->def->numatune.memory.nodemask) {
> - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("libvirt is compiled without NUMA tuning
support"));
> -
> - return -1;
> - }
> -
> - return 0;
> -}
> -#endif
> -
> /*
> * To be run between fork/exec of QEMU only
> */
> @@ -2179,9 +2071,6 @@ static int qemuProcessHook(void *data)
> if (qemuProcessInitCpuAffinity(h->vm) < 0)
> goto cleanup;
>
> - if (qemuProcessInitNumaMemoryPolicy(h->vm) < 0)
> - return -1;
> -
> VIR_DEBUG("Setting up security labelling");
> if (virSecurityManagerSetProcessLabel(h->driver->securityManager,
h->vm) < 0)
> goto cleanup;
NACK to this removal of libnuma. The libvirt QEMU driver still supports
deployment on RHEL5 vintage hosts where there is no cgroups mechanism
at all.
I'm all for using the cpuset controller *if* it is available. We must
fallback to libnuma if it is not present though, to prevent functional
regressions for existing users of libvirt
I will add the fallback code in v2. Thanks for comment.
--
Thanks,
Hu Tao