The aim of thread-context object is to set affinity on threads
that allocate memory for a memory-backend-* object. For instance:
-object
'{"qom-type":"thread-context","id":"tc-ram-node0","node-affinity":[3]}'
\
-object
'{"qom-type":"memory-backend-memfd","id":"ram-node0","hugetlb":true,\
"hugetlbsize":2097152,"share":true,"prealloc":true,"prealloc-threads":8,\
"size":15032385536,"host-nodes":[3],"policy":"preferred",\
"prealloc-context":"tc-ram-node0"}' \
allocates 14GiB worth of memory, backed by 2MiB hugepages from
host NUMA node 3, using 8 threads. If it weren't for
thread-context these threads wouldn't have any affinity and thus
theoretically could be scheduled to run on CPUs of different NUMA
node (which is what I saw occasionally).
Therefore, whenever we are pinning memory (IOW setting host-nodes
attribute), we can generate thread-context object with the same
affinity.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/qemu/qemu_command.c | 48 +++++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_command.h | 5 +++++
2 files changed, 53 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 41b9f7cb52..392b248628 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3603,6 +3603,54 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg,
}
+int
+qemuBuildThreadContextProps(virJSONValue **tcProps,
+ virJSONValue **memProps,
+ qemuDomainObjPrivate *priv)
+{
+ g_autoptr(virJSONValue) props = NULL;
+ virJSONValue *nodemask = NULL;
+ g_autoptr(virJSONValue) nodemaskCopy = NULL;
+ g_autofree char *tcAlias = NULL;
+ const char *memalias = NULL;
+
+ if (tcProps)
+ *tcProps = NULL;
+
+ if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_THREAD_CONTEXT))
+ return 0;
+
+ nodemask = virJSONValueObjectGetArray(*memProps, "host-nodes");
+ if (!nodemask)
+ return 0;
+
+ memalias = virJSONValueObjectGetString(*memProps, "id");
+ if (!memalias) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("memory device alias is not assigned"));
+ return -1;
+ }
+
+ tcAlias = g_strdup_printf("tc-%s", memalias);
+ nodemaskCopy = virJSONValueCopy(nodemask);
+
+ if (virJSONValueObjectAdd(&props,
+ "s:qom-type", "thread-context",
+ "s:id", tcAlias,
+ "a:node-affinity", &nodemaskCopy,
+ NULL) < 0)
+ return -1;
+
+ if (virJSONValueObjectAdd(memProps,
+ "s:prealloc-context", tcAlias,
+ NULL) < 0)
+ return -1;
+
+ *tcProps = g_steal_pointer(&props);
+ return 0;
+}
+
+
static char *
qemuBuildLegacyNicStr(virDomainNetDef *net)
{
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index 2578367df9..761cc5d865 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -147,6 +147,11 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg,
const virDomainDef *def,
const virDomainMemoryDef *mem);
+int
+qemuBuildThreadContextProps(virJSONValue **tcProps,
+ virJSONValue **memProps,
+ qemuDomainObjPrivate *priv);
+
/* Current, best practice */
virJSONValue *
qemuBuildPCIHostdevDevProps(const virDomainDef *def,
--
2.37.4