On 06/12/2017 12:54 PM, Wim Ten Have wrote:
From: Wim ten Have <wim.ten.have(a)oracle.com>
Add libvirtd NUMA cell domain administration functionality to
describe underlying cell id sibling distances in full fashion
when configuring HVM guests.
Thanks for the detailed cover letter :-). Some of that information (e.g. ACPI
spec, SLIT table, qemu support) would be useful in this commit message.
[below is an example of a 4 node setup]
<cpu>
<numa>
<cell id='0' cpus='0' memory='2097152'
unit='KiB'>
<distances>
<sibling id='0' value='10'/>
<sibling id='1' value='21'/>
<sibling id='2' value='31'/>
<sibling id='3' value='41'/>
</distances>
</cell>
Others on the list are better XML designers, but the following seems to achieve
the same and is a bit more compact
<cell id='0' cpus='0' memory='2097152'
unit='KiB'>
<sibling id='0' distance='10'/>
<sibling id='1' distance='21'/>
<sibling id='2' distance='31'/>
<sibling id='3' distance='41'/>
</cell>
CC danpb, who often has good ideas on schema design.
<cell id='1' cpus='1'
memory='2097152' unit='KiB'>
<distances>
<sibling id='0' value='21'/>
<sibling id='1' value='10'/>
<sibling id='2' value='31'/>
<sibling id='3' value='41'/>
</distances>
</cell>
<cell id='2' cpus='2' memory='2097152'
unit='KiB'>
<distances>
<sibling id='0' value='31'/>
<sibling id='1' value='21'/>
<sibling id='2' value='10'/>
<sibling id='3' value='21'/>
</distances>
<cell id='3' cpus='3' memory='2097152'
unit='KiB'>
<distances>
<sibling id='0' value='41'/>
<sibling id='1' value='31'/>
<sibling id='2' value='21'/>
<sibling id='3' value='10'/>
</distances>
</cell>
</numa>
</cpu>
Changes under this commit concern all those that require adding
the valid data structures, virDomainNuma* functional routines and the
XML doc schema enhancements to enforce appropriate administration.
One item you forgot was docs/formatdomain.html.in. Changes in schema should
always be described by accompanying changes in documentation.
Regards,
Jim
These changes alter the docs/schemas/cputypes.rng enforcing
domain administration to follow the syntax below per numa cell id.
These changes also alter docs/schemas/basictypes.rng to add
"numaDistanceValue" which is an "unsignedInt" with a minimum value
of 10 as 0-9 are reserved values and can not be used as System
Locality Distance Information Table data.
Signed-off-by: Wim ten Have <wim.ten.have(a)oracle.com>
---
docs/schemas/basictypes.rng | 8 ++
docs/schemas/cputypes.rng | 18 +++
src/conf/cpu_conf.c | 2 +-
src/conf/numa_conf.c | 260 +++++++++++++++++++++++++++++++++++++++++++-
src/conf/numa_conf.h | 25 ++++-
src/libvirt_private.syms | 6 +
6 files changed, 313 insertions(+), 6 deletions(-)
diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng
index 1ea667c..a335b5d 100644
--- a/docs/schemas/basictypes.rng
+++ b/docs/schemas/basictypes.rng
@@ -77,6 +77,14 @@
</choice>
</define>
+ <define name="numaDistanceValue">
+ <choice>
+ <data type="unsignedInt">
+ <param name="minInclusive">10</param>
+ </data>
+ </choice>
+ </define>
+
<define name="pciaddress">
<optional>
<attribute name="domain">
diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng
index 3eef16a..c45b6df 100644
--- a/docs/schemas/cputypes.rng
+++ b/docs/schemas/cputypes.rng
@@ -129,6 +129,24 @@
</choice>
</attribute>
</optional>
+ <optional>
+ <element name="distances">
+ <oneOrMore>
+ <ref name="numaDistance"/>
+ </oneOrMore>
+ </element>
+ </optional>
+ </element>
+ </define>
+
+ <define name="numaDistance">
+ <element name="sibling">
+ <attribute name="id">
+ <ref name="unsignedInt"/>
+ </attribute>
+ <attribute name="value">
+ <ref name="numaDistanceValue"/>
+ </attribute>
</element>
</define>
diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c
index da40e9b..5d8f7be3 100644
--- a/src/conf/cpu_conf.c
+++ b/src/conf/cpu_conf.c
@@ -643,7 +643,7 @@ virCPUDefFormatBufFull(virBufferPtr buf,
if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0)
goto cleanup;
- if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0)
+ if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0)
goto cleanup;
/* Put it all together */
diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
index bfd3703..1914810 100644
--- a/src/conf/numa_conf.c
+++ b/src/conf/numa_conf.c
@@ -48,6 +48,8 @@ VIR_ENUM_IMPL(virDomainMemoryAccess, VIR_DOMAIN_MEMORY_ACCESS_LAST,
"shared",
"private")
+typedef struct _virDomainNumaDistance virDomainNumaDistance;
+typedef virDomainNumaDistance *virDomainNumaDistancePtr;
typedef struct _virDomainNumaNode virDomainNumaNode;
typedef virDomainNumaNode *virDomainNumaNodePtr;
@@ -66,6 +68,12 @@ struct _virDomainNuma {
virBitmapPtr nodeset; /* host memory nodes where this guest node resides */
virDomainNumatuneMemMode mode; /* memory mode selection */
virDomainMemoryAccess memAccess; /* shared memory access configuration */
+
+ struct _virDomainNumaDistance {
+ unsigned int value; /* locality value for node i*j */
+ unsigned int cellid;
+ } *distances; /* remote node distances */
+ size_t ndistances;
} *mem_nodes; /* guest node configuration */
size_t nmem_nodes;
@@ -686,6 +694,95 @@ virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune,
}
+static int
+virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def,
+ xmlXPathContextPtr ctxt,
+ unsigned int cur_cell)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ size_t i;
+ xmlNodePtr *nodes = NULL;
+ int ndistances;
+ virDomainNumaDistancePtr distances = NULL;
+
+
+ if (!virXPathNode("./distances[1]/sibling", ctxt))
+ return 0;
+
+ if ((ndistances = virXPathNodeSet("./distances[1]/sibling", ctxt,
&nodes)) <= 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("NUMA distances defined without siblings"));
+ goto cleanup;
+ }
+
+ if (ndistances < def->nmem_nodes) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("NUMA distances defined with fewer siblings than nodes for
cell id: '%d'"),
+ cur_cell);
+ goto cleanup;
+ }
+
+ if (VIR_ALLOC_N(distances, ndistances) < 0)
+ goto cleanup;
+
+ for (i = 0; i < ndistances; i++) {
+ unsigned int sibling_id = i, sibling_value;
+
+ /* siblings are in order of parsing or explicitly numbered */
+ if ((tmp = virXMLPropString(nodes[i], "id"))) {
+ if (virStrToLong_uip(tmp, NULL, 10, &sibling_id) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid 'id' attribute in NUMA distances
for sibling: '%s'"),
+ tmp);
+ goto cleanup;
+ }
+
+ if (sibling_id >= ndistances) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Exactly one 'sibling' element per NUMA
distance "
+ "is allowed, non-contiguous ranges or ranges not
"
+ "starting from 0 are not allowed"));
+ goto cleanup;
+ }
+ }
+ VIR_FREE(tmp);
+
+ /* We need a locality value */
+ if (!(tmp = virXMLPropString(nodes[i], "value"))) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Missing 'value' attribute in NUMA distances
for sibling id: '%d'"),
+ sibling_id);
+ goto cleanup;
+ }
+
+ /* It needs to be applicable */
+ if (virStrToLong_uip(tmp, NULL, 10, &sibling_value) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid 'value' attribute in NUMA distances
for sibling id: '%d'"),
+ sibling_id);
+ goto cleanup;
+ }
+ VIR_FREE(tmp);
+
+ distances[sibling_id].cellid = sibling_id;
+ distances[sibling_id].value = sibling_value;
+ }
+
+ def->mem_nodes[cur_cell].distances = distances;
+ def->mem_nodes[cur_cell].ndistances = ndistances;
+
+ ret = 0;
+
+ cleanup:
+ if (ret)
+ VIR_FREE(distances);
+ VIR_FREE(nodes);
+ VIR_FREE(tmp);
+
+ return ret;
+}
+
int
virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
xmlXPathContextPtr ctxt)
@@ -788,6 +885,14 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
def->mem_nodes[cur_cell].memAccess = rc;
VIR_FREE(tmp);
}
+
+ /* Parse NUMA distances info */
+ if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("NUMA cell %d has incorrect 'distances'
configured"),
+ cur_cell);
+ goto cleanup;
+ }
}
ret = 0;
@@ -801,8 +906,8 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def,
int
-virDomainNumaDefCPUFormat(virBufferPtr buf,
- virDomainNumaPtr def)
+virDomainNumaDefCPUFormatXML(virBufferPtr buf,
+ virDomainNumaPtr def)
{
virDomainMemoryAccess memAccess;
char *cpustr;
@@ -815,6 +920,8 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
virBufferAddLit(buf, "<numa>\n");
virBufferAdjustIndent(buf, 2);
for (i = 0; i < ncells; i++) {
+ int ndistances;
+
memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i))))
@@ -829,7 +936,30 @@ virDomainNumaDefCPUFormat(virBufferPtr buf,
if (memAccess)
virBufferAsprintf(buf, " memAccess='%s'",
virDomainMemoryAccessTypeToString(memAccess));
- virBufferAddLit(buf, "/>\n");
+
+ ndistances = def->mem_nodes[i].ndistances;
+ if (!ndistances) {
+ virBufferAddLit(buf, "/>\n");
+ } else {
+ size_t j;
+ virDomainNumaDistancePtr distances = def->mem_nodes[i].distances;
+
+ virBufferAddLit(buf, ">\n");
+ virBufferAdjustIndent(buf, 2);
+ virBufferAddLit(buf, "<distances>\n");
+ virBufferAdjustIndent(buf, 2);
+ for (j = 0; j < ndistances; j++) {
+ virBufferAddLit(buf, "<sibling");
+ virBufferAsprintf(buf, " id='%d'",
distances[j].cellid);
+ virBufferAsprintf(buf, " value='%d'",
distances[j].value);
+ virBufferAddLit(buf, "/>\n");
+ }
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</distances>\n");
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</cell>\n");
+ }
+
VIR_FREE(cpustr);
}
virBufferAdjustIndent(buf, -2);
@@ -922,13 +1052,121 @@ virDomainNumaCheckABIStability(virDomainNumaPtr src,
size_t
virDomainNumaGetNodeCount(virDomainNumaPtr numa)
{
- if (!numa)
+ if (!numa || !numa->mem_nodes)
return 0;
return numa->nmem_nodes;
}
+size_t
+virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes)
+{
+ if (!numa || !nmem_nodes)
+ return 0;
+
+ if (numa->mem_nodes) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot alter an existing nmem_nodes set"));
+ return 0;
+ }
+
+ if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0)
+ return 0;
+
+ numa->nmem_nodes = nmem_nodes;
+
+ return numa->nmem_nodes;
+}
+
+
+size_t
+virDomainNumaGetNodeDistance(virDomainNumaPtr numa,
+ size_t node,
+ size_t cellid)
+{
+ virDomainNumaDistancePtr distances;
+
+ if (!numa)
+ return 0;
+
+ distances = numa->mem_nodes[node].distances;
+ if (!numa->mem_nodes[node].ndistances || !distances)
+ return 0;
+
+ return distances[cellid].value;
+}
+
+
+size_t
+virDomainNumaSetNodeDistance(virDomainNumaPtr numa,
+ size_t node,
+ size_t cellid,
+ unsigned int value)
+{
+ virDomainNumaDistancePtr distances;
+
+ /*
+ * Advanced Configuration and Power Interface
+ * Specification version 6.1. Chapter 5.2.17
+ * System Locality Distance Information Table
+ * ... Distance values of 0-9 are reserved.
+ */
+ if (!numa || value < 10)
+ return 0;
+
+ distances = numa->mem_nodes[node].distances;
+
+ if (numa->mem_nodes[node].ndistances > 0 &&
+ distances[cellid].value)
+ return 0;
+
+ distances[cellid].cellid = cellid;
+ distances[cellid].value = value;
+
+ return distances[cellid].value;
+}
+
+
+size_t
+virDomainNumaGetNodeDistanceCount(virDomainNumaPtr numa,
+ size_t node)
+{
+ if (!numa)
+ return 0;
+
+ return numa->mem_nodes[node].ndistances;
+}
+
+
+size_t
+virDomainNumaSetNodeDistanceCount(virDomainNumaPtr numa,
+ size_t node,
+ size_t ndistances)
+{
+ virDomainNumaDistancePtr distances;
+
+ if (!numa || !ndistances)
+ return 0;
+
+ distances = numa->mem_nodes[node].distances;
+ if (distances) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot alter an existing nmem_nodes distances set for
node: %zu"),
+ node);
+ return 0;
+ }
+
+ if (VIR_ALLOC_N(distances, ndistances) < 0)
+ return 0;
+
+ numa->mem_nodes[node].distances = distances;
+ numa->mem_nodes[node].ndistances = ndistances;
+
+ return numa->mem_nodes[node].ndistances;
+}
+
+
virBitmapPtr
virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
size_t node)
@@ -937,6 +1175,20 @@ virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
}
+virBitmapPtr
+virDomainNumaSetNodeCpumask(virDomainNumaPtr numa,
+ size_t node,
+ virBitmapPtr cpumask)
+{
+ if (!numa || !cpumask)
+ return NULL;
+
+ numa->mem_nodes[node].cpumask = cpumask;
+
+ return numa->mem_nodes[node].cpumask;
+}
+
+
virDomainMemoryAccess
virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa,
size_t node)
diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h
index b6a5354..96dda90 100644
--- a/src/conf/numa_conf.h
+++ b/src/conf/numa_conf.h
@@ -87,12 +87,35 @@ int virDomainNumatuneMaybeGetNodeset(virDomainNumaPtr numatune,
size_t virDomainNumaGetNodeCount(virDomainNumaPtr numa);
+size_t virDomainNumaSetNodeCount(virDomainNumaPtr numa,
+ size_t nmem_nodes);
+
+size_t virDomainNumaGetNodeDistance(virDomainNumaPtr numa,
+ size_t node,
+ size_t sibling)
+ ATTRIBUTE_NONNULL(1);
+size_t virDomainNumaSetNodeDistance(virDomainNumaPtr numa,
+ size_t node,
+ size_t sibling,
+ unsigned int value)
+ ATTRIBUTE_NONNULL(1);
+size_t virDomainNumaGetNodeDistanceCount(virDomainNumaPtr numa,
+ size_t node)
+ ATTRIBUTE_NONNULL(1);
+size_t virDomainNumaSetNodeDistanceCount(virDomainNumaPtr numa,
+ size_t node,
+ size_t ndistances)
+ ATTRIBUTE_NONNULL(1);
virBitmapPtr virDomainNumaGetNodeCpumask(virDomainNumaPtr numa,
size_t node)
ATTRIBUTE_NONNULL(1);
virDomainMemoryAccess virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa,
size_t node)
ATTRIBUTE_NONNULL(1);
+virBitmapPtr virDomainNumaSetNodeCpumask(virDomainNumaPtr numa,
+ size_t node,
+ virBitmapPtr cpumask)
+ ATTRIBUTE_NONNULL(1);
unsigned long long virDomainNumaGetNodeMemorySize(virDomainNumaPtr numa,
size_t node)
ATTRIBUTE_NONNULL(1);
@@ -151,7 +174,7 @@ bool virDomainNumatuneNodeSpecified(virDomainNumaPtr numatune,
int cellid);
int virDomainNumaDefCPUParseXML(virDomainNumaPtr def, xmlXPathContextPtr ctxt);
-int virDomainNumaDefCPUFormat(virBufferPtr buf, virDomainNumaPtr def);
+int virDomainNumaDefCPUFormatXML(virBufferPtr buf, virDomainNumaPtr def);
unsigned int virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 044510f..e7fb9c0 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -703,9 +703,15 @@ virDomainNumaGetMaxCPUID;
virDomainNumaGetMemorySize;
virDomainNumaGetNodeCount;
virDomainNumaGetNodeCpumask;
+virDomainNumaGetNodeDistance;
+virDomainNumaGetNodeDistanceCount;
virDomainNumaGetNodeMemoryAccessMode;
virDomainNumaGetNodeMemorySize;
virDomainNumaNew;
+virDomainNumaSetNodeCount;
+virDomainNumaSetNodeCpumask;
+virDomainNumaSetNodeDistance;
+virDomainNumaSetNodeDistanceCount;
virDomainNumaSetNodeMemorySize;
virDomainNumatuneFormatNodeset;
virDomainNumatuneFormatXML;