<numatune>
<memory mode="strict" nodeset="1-4,^3"
migratable="yes/no" />
<memnode cellid="0" mode="strict" nodeset="1"/>
<memnode cellid="2" mode="preferred"
nodeset="2"/>
</numatune>
Attribute ``migratable`` will be 'no' by default, and 'yes' indicates
that it allows operating system or hypervisor migrating the memory
pages between different memory nodes, that also means we will not
rely on hypervisor to set the memory policy or memory affinity, we only
use cgroups to restrict the memory nodes, so if ``migratable`` is 'yes',
the ``mode`` which indicates memory policy will be ignored.
---
docs/formatdomain.rst | 8 +++-
docs/schemas/domaincommon.rng | 5 +++
src/conf/numa_conf.c | 45 +++++++++++++++++++
src/conf/numa_conf.h | 3 ++
src/libvirt_private.syms | 1 +
src/qemu/qemu_command.c | 6 ++-
src/qemu/qemu_process.c | 21 +++++++++
.../numatune-memory-migratable.args | 34 ++++++++++++++
.../numatune-memory-migratable.err | 1 +
.../numatune-memory-migratable.xml | 33 ++++++++++++++
tests/qemuxml2argvtest.c | 5 +++
11 files changed, 160 insertions(+), 2 deletions(-)
create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.args
create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.err
create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.xml
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index cc4f91d4ea..4e386a0c3d 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -1072,7 +1072,7 @@ NUMA Node Tuning
<domain>
...
<numatune>
- <memory mode="strict" nodeset="1-4,^3"/>
+ <memory mode="strict" nodeset="1-4,^3"
migratable="yes"/>
<memnode cellid="0" mode="strict"
nodeset="1"/>
<memnode cellid="2" mode="preferred"
nodeset="2"/>
</numatune>
@@ -1097,6 +1097,12 @@ NUMA Node Tuning
will be ignored if it's specified. If ``placement`` of ``vcpu`` is
'auto',
and ``numatune`` is not specified, a default ``numatune`` with ``placement``
'auto' and ``mode`` 'strict' will be added implicitly. :since:`Since
0.9.3`
+ Attribute ``migratable`` is 'no' by default, and 'yes' indicates that
it
+ allows operating system or hypervisor migrating the memory pages between
+ different memory nodes, that also means we will not rely on hypervisor to
+ set the memory policy or memory affinity, we only use cgroups to restrict
+ the memory nodes, so if ``migratable`` is 'yes', the ``mode`` which indicates
+ memory policy will be ignored.
``memnode``
Optional ``memnode`` elements can specify memory allocation policies per each
guest NUMA node. For those nodes having no corresponding ``memnode`` element,
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 316d93fb69..a0283e8449 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -1078,6 +1078,11 @@
<interleave>
<optional>
<element name="memory">
+ <optional>
+ <attribute name="migratable">
+ <ref name="virYesNo"/>
+ </attribute>
+ </optional>
<optional>
<attribute name="mode">
<choice>
diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c
index 6653ba05a6..c14ba1295c 100644
--- a/src/conf/numa_conf.c
+++ b/src/conf/numa_conf.c
@@ -99,6 +99,7 @@ struct _virDomainNuma {
virBitmapPtr nodeset;
virDomainNumatuneMemMode mode;
virDomainNumatunePlacement placement;
+ virTristateBool migratable;
} memory; /* pinning for all the memory */
struct _virDomainNumaNode {
@@ -275,6 +276,7 @@ virDomainNumatuneParseXML(virDomainNumaPtr numa,
int ret = -1;
virBitmapPtr nodeset = NULL;
xmlNodePtr node = NULL;
+ int migratable = 0;
if (virXPathInt("count(./numatune)", ctxt, &n) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
@@ -292,6 +294,15 @@ virDomainNumatuneParseXML(virDomainNumaPtr numa,
placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
if (node) {
+ if ((tmp = virXMLPropString(node, "migratable")) &&
+ (migratable = virTristateBoolTypeFromString(tmp)) <= 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Invalid 'migratable' attribute value
'%s'"), tmp);
+ goto cleanup;
+ }
+ numa->memory.migratable = migratable;
+ VIR_FREE(tmp);
+
if ((tmp = virXMLPropString(node, "mode")) &&
(mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
@@ -369,6 +380,11 @@ virDomainNumatuneFormatXML(virBufferPtr buf,
tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode);
virBufferAsprintf(buf, "<memory mode='%s' ", tmp);
+ if (numatune->memory.migratable) {
+ tmp = virTristateBoolTypeToString(numatune->memory.migratable);
+ virBufferAsprintf(buf, "migratable='%s' ", tmp);
+ }
+
if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) {
if (!(nodeset = virBitmapFormat(numatune->memory.nodeset)))
return -1;
@@ -427,6 +443,35 @@ virDomainNumaFree(virDomainNumaPtr numa)
VIR_FREE(numa);
}
+/**
+ * virDomainNumatuneGetMigratable:
+ * @numatune: pointer to numatune definition
+ * @migratable: where to store the result
+ *
+ * Get the migratable attribute for domain's memory. It's safe to
+ * pass NULL to @migratable if the return value is the only info needed.
+ *
+ * Returns: migratable value
+ */
+int virDomainNumatuneGetMigratable(virDomainNumaPtr numatune,
+ virTristateBool *migratable)
+{
+ virTristateBool tmp_migratable = 0;
+
+ if (!numatune)
+ return tmp_migratable;
+
+ if (numatune->memory.specified)
+ tmp_migratable = numatune->memory.migratable;
+ else
+ return tmp_migratable;
+
+ if (migratable != NULL)
+ *migratable = tmp_migratable;
+
+ return tmp_migratable;
+}
+
/**
* virDomainNumatuneGetMode:
* @numatune: pointer to numatune definition
diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h
index db5d79e62a..fe3a8a69c1 100644
--- a/src/conf/numa_conf.h
+++ b/src/conf/numa_conf.h
@@ -102,6 +102,9 @@ int virDomainNumatuneFormatXML(virBufferPtr buf, virDomainNumaPtr
numatune)
/*
* Getters
*/
+int virDomainNumatuneGetMigratable(virDomainNumaPtr numatune,
+ virTristateBool *migratable);
+
int virDomainNumatuneGetMode(virDomainNumaPtr numatune,
int cellid,
virDomainNumatuneMemMode *mode);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 7cf8bea962..de9483065f 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -875,6 +875,7 @@ virDomainNumaSetNodeDistanceCount;
virDomainNumaSetNodeMemorySize;
virDomainNumatuneFormatNodeset;
virDomainNumatuneFormatXML;
+virDomainNumatuneGetMigratable;
virDomainNumatuneGetMode;
virDomainNumatuneGetNodeset;
virDomainNumatuneHasPerNodeBinding;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 476cf6972e..882e7e6ba2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3189,7 +3189,11 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
return -1;
}
- if (nodemask) {
+ // If migratable attribute is yes, we should only use cgroups setting
+ // memory affinity, and skip passing the host-nodes and policy parameters
+ // to QEMU command line.
+ if (nodemask &&
+ virDomainNumatuneGetMigratable(def->numa, NULL) != VIR_TRISTATE_BOOL_YES) {
if (!virNumaNodesetIsAvailable(nodemask))
return -1;
if (virJSONValueObjectAdd(props,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 6b5de29fdb..9c1116064b 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2654,6 +2654,7 @@ qemuProcessSetupPid(virDomainObjPtr vm,
g_autoptr(virBitmap) hostcpumap = NULL;
g_autofree char *mem_mask = NULL;
int ret = -1;
+ size_t i;
if ((period || quota) &&
!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
@@ -2694,6 +2695,26 @@ qemuProcessSetupPid(virDomainObjPtr vm,
&mem_mask, -1) < 0)
goto cleanup;
+ // For vCPU threads, mem_mask is different among cells
+ if (nameval == VIR_CGROUP_THREAD_VCPU) {
+ virDomainNumaPtr numatune = vm->def->numa;
+ virBitmapPtr numanode_cpumask = NULL;
+ for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) {
+ numanode_cpumask = virDomainNumaGetNodeCpumask(numatune, i);
+ // 'i' indicates the cell id, if the vCPU id is in this cell,
+ // we need get the corresonding nodeset
+ if (virBitmapIsBitSet(numanode_cpumask, id)) {
+ if (virDomainNumatuneMaybeFormatNodeset(numatune,
+ priv->autoNodeset,
+ &mem_mask, i) < 0) {
+ goto cleanup;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
goto cleanup;
diff --git a/tests/qemuxml2argvdata/numatune-memory-migratable.args
b/tests/qemuxml2argvdata/numatune-memory-migratable.args
new file mode 100644
index 0000000000..62300d72a2
--- /dev/null
+++ b/tests/qemuxml2argvdata/numatune-memory-migratable.args
@@ -0,0 +1,34 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/tmp/lib/domain--1-QEMUGuest \
+USER=test \
+LOGNAME=test \
+XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest/.local/share \
+XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest/.cache \
+XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name QEMUGuest \
+-S \
+-machine pc,accel=tcg,usb=off,dump-guest-core=off \
+-m 24105 \
+-realtime mlock=off \
+-smp 32,sockets=32,cores=1,threads=1 \
+-object memory-backend-ram,id=ram-node0,size=20971520 \
+-numa node,nodeid=0,cpus=0,memdev=ram-node0 \
+-object memory-backend-ram,id=ram-node1,size=676331520 \
+-numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \
+-object memory-backend-ram,id=ram-node2,size=24578621440 \
+-numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \
+-uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-no-acpi \
+-usb \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
diff --git a/tests/qemuxml2argvdata/numatune-memory-migratable.err
b/tests/qemuxml2argvdata/numatune-memory-migratable.err
new file mode 100644
index 0000000000..1d5e57f996
--- /dev/null
+++ b/tests/qemuxml2argvdata/numatune-memory-migratable.err
@@ -0,0 +1 @@
+unsupported configuration: Per-node memory binding is not supported with this QEMU
diff --git a/tests/qemuxml2argvdata/numatune-memory-migratable.xml
b/tests/qemuxml2argvdata/numatune-memory-migratable.xml
new file mode 100644
index 0000000000..5665358e24
--- /dev/null
+++ b/tests/qemuxml2argvdata/numatune-memory-migratable.xml
@@ -0,0 +1,33 @@
+<domain type='qemu'>
+ <name>QEMUGuest</name>
+ <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid>
+ <memory unit='KiB'>24682468</memory>
+ <currentMemory unit='KiB'>24682468</currentMemory>
+ <vcpu placement='static'>32</vcpu>
+ <numatune>
+ <memory mode='strict' nodeset='0-7'
migratable="yes"/>
+ <memnode cellid='0' mode='preferred' nodeset='3'/>
+ <memnode cellid='2' mode='strict'
nodeset='1-2,5-7,^6'/>
+ </numatune>
+ <os>
+ <type arch='x86_64' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0' memory='20002'
unit='KiB'/>
+ <cell id='1' cpus='1-27,29' memory='660066'
unit='KiB'/>
+ <cell id='2' cpus='28,30-31' memory='24002400'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 49567326d4..e6554518d7 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1950,6 +1950,11 @@ mymain(void)
DO_TEST("numatune-memory", NONE);
DO_TEST_PARSE_ERROR("numatune-memory-invalid-nodeset", NONE);
+ DO_TEST("numatune-memory-migratable",
+ QEMU_CAPS_NUMA,
+ QEMU_CAPS_OBJECT_MEMORY_RAM);
+ DO_TEST_PARSE_ERROR("numatune-memory-migratable", NONE);
+
DO_TEST("numatune-memnode",
QEMU_CAPS_NUMA,
QEMU_CAPS_OBJECT_MEMORY_RAM);
--
2.25.4