This element specifies similar settings as the memory element,
although memnode can be used per guest NUMA node.
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
docs/formatdomain.html.in | 15 +++
docs/schemas/domaincommon.rng | 17 ++++
src/conf/domain_conf.c | 220 +++++++++++++++++++++++++++++++++++-------
src/qemu/qemu_domain.c | 23 ++++-
src/qemu/qemu_driver.c | 11 +++
src/util/virnuma.h | 14 ++-
6 files changed, 260 insertions(+), 40 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 041f70d..2d855ea 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -709,6 +709,8 @@
...
<numatune>
<memory mode="strict" nodeset="1-4,^3"/>
+ <memnode cellid="0" mode="strict"
nodeset="1"/>
+ <memnode cellid="2" mode="preferred"
nodeset="2"/>
</numatune>
...
</domain>
@@ -745,6 +747,19 @@
<span class='since'>Since 0.9.3</span>
</dd>
+ <dt><code>memnode</code></dt>
+ <dd>
+ Optional <code>memnode</code> elements can specify memory allocation
+ policies per each guest NUMA node. For those nodes having no
+ corresponding <code>memnode</code> element, the default from
+ element <code>memory</code> will be used. Attribute
<code>cellid</code>
+ addresses guest NUMA node for which the settings are applied.
+ Attributes <code>mode</code> and <code>nodeset</code>
have the same
+ meaning and syntax as in <code>memory</code> element.
+
+ This setting is not compatible with automatic placement.
+ <span class='since'>QEMU Since 1.2.6</span>
+ </dd>
</dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 0787b5a..a8e3ba0 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -789,6 +789,23 @@
</choice>
</element>
</optional>
+ <zeroOrMore>
+ <element name="memnode">
+ <attribute name="cellid">
+ <ref name="unsignedInt"/>
+ </attribute>
+ <attribute name="mode">
+ <choice>
+ <value>strict</value>
+ <value>preferred</value>
+ <value>interleave</value>
+ </choice>
+ </attribute>
+ <attribute name='nodeset'>
+ <ref name='cpuset'/>
+ </attribute>
+ </element>
+ </zeroOrMore>
</element>
</define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index fe06921..352ba92 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2085,6 +2085,9 @@ void virDomainDefFree(virDomainDefPtr def)
virDomainVcpuPinDefFree(def->cputune.emulatorpin);
virBitmapFree(def->numatune.memory.nodemask);
+ for (i = 0; i < def->numatune.nmem_nodes; i++)
+ virBitmapFree(def->numatune.mem_nodes[i].nodemask);
+ VIR_FREE(def->numatune.mem_nodes);
virSysinfoDefFree(def->sysinfo);
@@ -11232,6 +11235,8 @@ virDomainDefParseXML(xmlDocPtr xml,
bool usb_other = false;
bool usb_master = false;
bool primaryVideo = false;
+ bool mem_nodes = false;
+
if (VIR_ALLOC(def) < 0)
return NULL;
@@ -11666,6 +11671,33 @@ virDomainDefParseXML(xmlDocPtr xml,
}
VIR_FREE(nodes);
+
+ /* analysis of cpu handling */
+ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
+ xmlNodePtr oldnode = ctxt->node;
+ ctxt->node = node;
+ def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
+ ctxt->node = oldnode;
+
+ if (def->cpu == NULL)
+ goto error;
+
+ if (def->cpu->sockets &&
+ def->maxvcpus >
+ def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("Maximum CPUs greater than topology limit"));
+ goto error;
+ }
+
+ if (def->cpu->cells_cpus > def->maxvcpus) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Number of CPUs in <numa> exceeds the"
+ " <vcpu> count"));
+ goto error;
+ }
+ }
+
/* Extract numatune if exists. */
if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -11682,6 +11714,12 @@ virDomainDefParseXML(xmlDocPtr xml,
if (n) {
cur = nodes[0]->children;
+ if (def->cpu) {
+ if (VIR_ALLOC_N(def->numatune.mem_nodes, def->cpu->ncells) < 0)
+ goto error;
+ def->numatune.nmem_nodes = def->cpu->ncells;
+ }
+
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE) {
if (xmlStrEqual(cur->name, BAD_CAST "memory")) {
@@ -11764,6 +11802,80 @@ virDomainDefParseXML(xmlDocPtr xml,
def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO;
def->numatune.memory.placement_mode = placement_mode;
+
+ } else if (xmlStrEqual(cur->name, BAD_CAST "memnode")) {
+ unsigned int cellid;
+ struct mem_node *mem_node = NULL;
+
+ if (!def->numatune.nmem_nodes) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Element 'memnode' is invalid
without "
+ "any guest NUMA cells"));
+ goto error;
+ }
+ tmp = virXMLPropString(cur, "cellid");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing required cellid attribute "
+ "in numatune memnode element"));
+ goto error;
+ }
+ if (virStrToLong_ui(tmp, NULL, 10, &cellid) < 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Invalid cellid attribute "
+ "in numatune memnode element"));
+ goto error;
+ }
+ VIR_FREE(tmp);
+
+ if (cellid >= def->numatune.nmem_nodes) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Argument 'cellid' in numatune
"
+ "memnode element must correspond to "
+ "existing guest's NUMA cell"));
+ goto error;
+ }
+
+ mem_node = &def->numatune.mem_nodes[cellid];
+
+ if (mem_node->specified) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Multiple numatune memnode elements "
+ "with duplicate 'cellid'"));
+ goto error;
+ }
+
+ mem_node->specified = true;
+ mem_nodes = true;
+
+ tmp = virXMLPropString(cur, "mode");
+ if (!tmp) {
+ mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
+ } else {
+ if ((mem_node->mode =
+ virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Invalid mode attribute "
+ "in numatune memnode element"));
+ goto error;
+ }
+ VIR_FREE(tmp);
+ }
+
+ tmp = virXMLPropString(cur, "nodeset");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing required nodeset attribute "
+ "in numatune memnode element"));
+ goto error;
+ }
+ if (virBitmapParse(tmp, 0,
+ &mem_node->nodemask,
+ VIR_DOMAIN_CPUMASK_LEN) < 0) {
+ goto error;
+ }
+ VIR_FREE(tmp);
+
} else {
virReportError(VIR_ERR_XML_ERROR,
_("unsupported XML element %s"),
@@ -11784,6 +11896,42 @@ virDomainDefParseXML(xmlDocPtr xml,
}
VIR_FREE(nodes);
+ if (def->numatune.nmem_nodes &&
+ def->numatune.memory.placement_mode ==
+ VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("Per-node binding is not compatible with "
+ "automatic NUMA placement."));
+ goto error;
+ }
+
+ if (!mem_nodes) {
+ /* If there are no <memnode> settings, clear all these data.
+ * If any driver wants to use these in the future, this code
+ * can be cleared. Until then it's easier to keep it this
+ * way. */
+ for (i = 0; i < def->numatune.nmem_nodes; i++)
+ virBitmapFree(def->numatune.mem_nodes[i].nodemask);
+ VIR_FREE(def->numatune.mem_nodes);
+ def->numatune.nmem_nodes = 0;
+ } else {
+ /* Copy numatune/memory information into each node, but leave
+ * specified == false. This eases the process of determination
+ * of each node's nodemask */
+ for (i = 0; i < def->numatune.nmem_nodes; i++) {
+ struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+
+ if (mem_node->specified)
+ continue;
+
+ mem_node->mode = def->numatune.memory.mode;
+ mem_node->nodemask = virBitmapNewCopy(def->numatune.memory.nodemask);
+
+ if (!mem_node->nodemask)
+ goto error;
+ }
+ }
+
if ((n = virXPathNodeSet("./resource", ctxt, &nodes)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("cannot extract resource nodes"));
@@ -12863,32 +13011,6 @@ virDomainDefParseXML(xmlDocPtr xml,
goto error;
}
- /* analysis of cpu handling */
- if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
- xmlNodePtr oldnode = ctxt->node;
- ctxt->node = node;
- def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
- ctxt->node = oldnode;
-
- if (def->cpu == NULL)
- goto error;
-
- if (def->cpu->sockets &&
- def->maxvcpus >
- def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
- virReportError(VIR_ERR_XML_DETAIL, "%s",
- _("Maximum CPUs greater than topology limit"));
- goto error;
- }
-
- if (def->cpu->cells_cpus > def->maxvcpus) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Number of CPUs in <numa> exceeds the"
- " <vcpu> count"));
- goto error;
- }
- }
-
if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) {
xmlNodePtr oldnode = ctxt->node;
ctxt->node = node;
@@ -17395,31 +17517,57 @@ virDomainDefFormatInternal(virDomainDefPtr def,
virBufferAddLit(buf, "</cputune>\n");
if (def->numatune.memory.nodemask ||
- def->numatune.memory.placement_mode) {
+ def->numatune.memory.placement_mode ||
+ def->numatune.nmem_nodes) {
const char *mode;
char *nodemask = NULL;
const char *placement;
virBufferAddLit(buf, "<numatune>\n");
virBufferAdjustIndent(buf, 2);
- mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
- virBufferAsprintf(buf, "<memory mode='%s' ", mode);
- if (def->numatune.memory.placement_mode ==
- VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
- nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+ if (def->numatune.memory.nodemask ||
+ def->numatune.memory.placement_mode) {
+
+ mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
+ virBufferAsprintf(buf, "<memory mode='%s' ", mode);
+
+ if (def->numatune.memory.placement_mode ==
+ VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
+ nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+ if (nodemask == NULL) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to format nodeset for "
+ "NUMA memory tuning"));
+ goto error;
+ }
+ virBufferAsprintf(buf, "nodeset='%s'/>\n",
nodemask);
+ VIR_FREE(nodemask);
+ } else if (def->numatune.memory.placement_mode) {
+ placement =
virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
+ virBufferAsprintf(buf, "placement='%s'/>\n",
placement);
+ }
+ }
+
+ for (i = 0; i < def->numatune.nmem_nodes; i++) {
+ struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+ if (!mem_node->specified)
+ continue;
+
+ nodemask = virBitmapFormat(mem_node->nodemask);
+ mode = virDomainNumatuneMemModeTypeToString(mem_node->mode);
if (nodemask == NULL) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to format nodeset for "
"NUMA memory tuning"));
goto error;
}
- virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask);
+ virBufferAsprintf(buf,
+ "<memnode cellid='%zu' mode='%s'
nodeset='%s'/>\n",
+ i, mode, nodemask);
VIR_FREE(nodemask);
- } else if (def->numatune.memory.placement_mode) {
- placement =
virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
- virBufferAsprintf(buf, "placement='%s'/>\n",
placement);
}
+
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</numatune>\n");
}
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index bbe32a0..99f9c48 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -712,7 +712,28 @@ qemuDomainDefPostParse(virDomainDefPtr def,
bool addDefaultMemballoon = true;
bool addDefaultUSBKBD = false;
bool addDefaultUSBMouse = false;
-
+ size_t i = 0;
+
+ if (def->numatune.memory.nodemask) {
+ for (i = 0; i < def->numatune.nmem_nodes; i++) {
+ struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+ ssize_t pos = -1;
+ bool bit = false;
+
+ if (!mem_node->specified)
+ continue;
+
+ while ((pos = virBitmapNextSetBit(mem_node->nodemask, pos)) >= 0) {
+ if (virBitmapGetBit(def->numatune.memory.nodemask, pos, &bit) <
0 ||
+ !bit) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("memnode nodeset must be subset of the "
+ "global memory nodeset"));
+ return -1;
+ }
+ }
+ }
+ }
/* check for emulator and create a default one if needed */
if (!def->emulator &&
!(def->emulator = virDomainDefGetDefaultEmulator(def, caps)))
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 3a7622a..545516e 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -8646,6 +8646,7 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
virCgroupPtr cgroup_temp = NULL;
virBitmapPtr temp_nodeset = NULL;
qemuDomainObjPrivatePtr priv = vm->privateData;
+ virDomainDefPtr def = vm->def;
char *nodeset_str = NULL;
size_t i = 0;
int ret = -1;
@@ -8657,6 +8658,16 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
goto cleanup;
}
+ for (i = 0; i < def->numatune.nmem_nodes; i++) {
+ if (def->numatune.mem_nodes[i].specified) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("change of nodeset for running domain "
+ "with per guest NUMA node numatune settings "
+ "is not supported"));
+ goto cleanup;
+ }
+ }
+
/* Get existing nodeset values */
if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
virBitmapParse(nodeset_str, 0, &temp_nodeset,
diff --git a/src/util/virnuma.h b/src/util/virnuma.h
index fe1e966..50fa3f8 100644
--- a/src/util/virnuma.h
+++ b/src/util/virnuma.h
@@ -1,7 +1,7 @@
/*
* virnuma.h: helper APIs for managing numa
*
- * Copyright (C) 2011-2013 Red Hat, Inc.
+ * Copyright (C) 2011-2014 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,17 @@ typedef virNumaTuneDef *virNumaTuneDefPtr;
struct _virNumaTuneDef {
struct {
virBitmapPtr nodemask;
- int mode;
+ int mode; /* enum virDomainNumatuneMemMode */
int placement_mode; /* enum virNumaTuneMemPlacementMode */
- } memory;
+ } memory; /* pinning for all the memory */
+
+ struct mem_node {
+ bool specified;
+ unsigned int nodeid;
+ virBitmapPtr nodemask;
+ int mode;
+ } *mem_nodes; /* pinning per guest's NUMA node */
+ size_t nmem_nodes;
/* Future NUMA tuning related stuff should go here. */
};
--
2.0.0