Links between NUMA nodes can have different latencies and
bandwidths. This info is newly defined in ACPI 6.2 under
Heterogeneous Memory Attribute Table (HMAT) table. Linux kernel
learned how to report these values under sysfs and thus we can
expose them in our capabilities XML. The sysfs interface is
documented in kernel's Documentation/admin-guide/mm/numaperf.rst.
Long story short, two nodes can be in initiator-target
relationship. A node can be initiator if it has a CPU or a device
that's capable of initiating memory transfer. Therefore a node
that has just memory can only be target. An initiator-target link
can then have any combination of {bandwidth, latency} - {access,
read, write} attribute (6 in total). However, the standard says
access is applicable iff read and write values are the same.
Therefore, we really have just four combinations of attributes:
bandwidth-read, bandwidth-write, latency-read, latency-write.
This is the combination that kernel reports anyway.
Then, under /sys/system/devices/node/nodeX/acccess0/initiators we
find values for those 4 attributes and also symlinks named
"nodeN" which then represent initiators to nodeX. For instance:
/sys/system/node/node1/access0/initiators/node0 -> ../../node0
/sys/system/node/node1/access0/initiators/read_bandwidth
/sys/system/node/node1/access0/initiators/read_latency
/sys/system/node/node1/access0/initiators/write_bandwidth
/sys/system/node/node1/access0/initiators/write_latency
This means that node0 is initiator and node1 is target and values
of the interconnect can be read.
In theory, there can be separate links to memory side caches too
(e.g. one link from node X to node Y's main memory, another from
node X to node Y's L1 cache, another one to L2 cache and so on).
But sysfs does not express this relationship just yet.
Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=1786309
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
docs/schemas/capability.rng | 3 +
src/conf/capabilities.c | 181 +++++++++++++++++++++++++++++++++++-
src/conf/capabilities.h | 1 +
3 files changed, 184 insertions(+), 1 deletion(-)
diff --git a/docs/schemas/capability.rng b/docs/schemas/capability.rng
index 5c1fb3607c..66dba829a8 100644
--- a/docs/schemas/capability.rng
+++ b/docs/schemas/capability.rng
@@ -138,6 +138,9 @@
<ref name="cell"/>
</oneOrMore>
</element>
+ <optional>
+ <ref name="numaInterconnects"/>
+ </optional>
</element>
</define>
diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c
index 7b60676070..8f2d7b75d7 100644
--- a/src/conf/capabilities.c
+++ b/src/conf/capabilities.c
@@ -191,7 +191,8 @@ virCapabilitiesHostNUMAUnref(virCapsHostNUMA *caps)
if (g_atomic_int_dec_and_test(&caps->refs)) {
g_ptr_array_unref(caps->cells);
-
+ if (caps->interconnects)
+ g_array_unref(caps->interconnects);
g_free(caps);
}
}
@@ -890,6 +891,13 @@ virCapabilitiesHostNUMAFormat(virBuffer *buf,
}
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</cells>\n");
+
+ if (caps->interconnects) {
+ const virNumaInterconnect *interconnects;
+ interconnects = &g_array_index(caps->interconnects, virNumaInterconnect,
0);
+ virNumaInterconnectFormat(buf, interconnects, caps->interconnects->len);
+ }
+
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</topology>\n");
return 0;
@@ -1735,6 +1743,174 @@ virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps)
}
+static void
+virCapabilitiesHostInsertHMAT(GArray *interconnects,
+ int initiator,
+ int target,
+ unsigned int read_bandwidth,
+ unsigned int write_bandwidth,
+ unsigned int read_latency,
+ unsigned int write_latency)
+{
+ virNumaInterconnect ni;
+
+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
+ initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth};
+ g_array_append_val(interconnects, ni);
+
+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
+ initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth};
+ g_array_append_val(interconnects, ni);
+
+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
+ initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency};
+ g_array_append_val(interconnects, ni);
+
+ ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
+ initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency};
+ g_array_append_val(interconnects, ni);
+}
+
+
+static int
+virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects,
+ int node)
+{
+ g_autofree char *path = NULL;
+ g_autofree char *initPath = NULL;
+ g_autoptr(DIR) dir = NULL;
+ int direrr = 0;
+ struct dirent *entry;
+ unsigned int read_bandwidth;
+ unsigned int write_bandwidth;
+ unsigned int read_latency;
+ unsigned int write_latency;
+
+ path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access0", node);
+
+ if (!virFileExists(path))
+ return 0;
+
+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
+ "read_bandwidth",
+ &read_bandwidth) < 0)
+ return -1;
+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
+ "write_bandwidth",
+ &write_bandwidth) < 0)
+ return -1;
+
+ /* Bandwidths are read in MiB but stored in KiB */
+ read_bandwidth <<= 10;
+ write_bandwidth <<= 10;
+
+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
+ "read_latency",
+ &read_latency) < 0)
+ return -1;
+ if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
+ "write_latency",
+ &write_latency) < 0)
+ return -1;
+
+ initPath = g_strdup_printf("%s/initiators", path);
+
+ if (virDirOpen(&dir, initPath) < 0)
+ return -1;
+
+ while ((direrr = virDirRead(dir, &entry, path)) > 0) {
+ const char *dname = STRSKIP(entry->d_name, "node");
+ int initNode;
+
+ if (!dname)
+ continue;
+
+ if (virStrToLong_i(dname, NULL, 10, &initNode) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("unable to parse %s"),
+ entry->d_name);
+ return -1;
+ }
+
+ virCapabilitiesHostInsertHMAT(interconnects,
+ initNode, node,
+ read_bandwidth,
+ write_bandwidth,
+ read_latency,
+ write_latency);
+ }
+
+ return 0;
+}
+
+
+static int
+virCapsHostNUMAInterconnectComparator(const void *a,
+ const void *b)
+{
+ const virNumaInterconnect *aa = a;
+ const virNumaInterconnect *bb = b;
+
+ if (aa->type != bb->type)
+ return aa->type - bb->type;
+
+ if (aa->initiator != bb->initiator)
+ return aa->initiator - bb->initiator;
+
+ if (aa->target != bb->target)
+ return aa->target - bb->target;
+
+ if (aa->cache != bb->cache)
+ return aa->cache - bb->cache;
+
+ if (aa->accessType != bb->accessType)
+ return aa->accessType - bb->accessType;
+
+ return aa->value - bb->value;
+}
+
+
+static int
+virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps)
+{
+ g_autoptr(DIR) dir = NULL;
+ int direrr = 0;
+ struct dirent *entry;
+ g_autofree char *path = NULL;
+ g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE,
sizeof(virNumaInterconnect));
+
+ path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/");
+
+ if (virDirOpenIfExists(&dir, path) < 0)
+ return -1;
+
+ while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
+ const char *dname = STRSKIP(entry->d_name, "node");
+ int node;
+
+ if (!dname)
+ continue;
+
+ if (virStrToLong_i(dname, NULL, 10, &node) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("unable to parse %s"),
+ entry->d_name);
+ return -1;
+ }
+
+ if (virCapabilitiesHostNUMAInitInterconnectsNode(interconnects, node) < 0)
+ return -1;
+ }
+
+ if (interconnects->len > 0) {
+ g_array_sort(interconnects, virCapsHostNUMAInterconnectComparator);
+ caps->interconnects = g_steal_pointer(&interconnects);
+ }
+
+ return 0;
+}
+
+
static int
virCapabilitiesHostNUMAInitReal(virCapsHostNUMA *caps)
{
@@ -1795,6 +1971,9 @@ virCapabilitiesHostNUMAInitReal(virCapsHostNUMA *caps)
&caches);
}
+ if (virCapabilitiesHostNUMAInitInterconnects(caps) < 0)
+ goto cleanup;
+
ret = 0;
cleanup:
diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h
index 334b361e7a..1b99202c9b 100644
--- a/src/conf/capabilities.h
+++ b/src/conf/capabilities.h
@@ -114,6 +114,7 @@ struct _virCapsHostNUMACell {
struct _virCapsHostNUMA {
gint refs;
GPtrArray *cells;
+ GArray *interconnects; /* virNumaInterconnect */
};
struct _virCapsHostSecModelLabel {
--
2.31.1