[PATCH 0/4] Introduce memory allocation threads

*** BLURB HERE *** Michal Prívozník (4): conf: Introduce memory allocation threads qemu_capabilities: Detect memory-backend-*.prealloc-threads property qemu_validate: Validate prealloc threads against qemuCpas qemu_command: Generate prealloc-threads property docs/formatdomain.rst | 6 ++++-- docs/schemas/domaincommon.rng | 19 +++++++++++++------ src/conf/domain_conf.c | 15 ++++++++++++++- src/conf/domain_conf.h | 1 + src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 5 ++++- src/qemu/qemu_validate.c | 7 +++++++ .../caps_5.0.0.aarch64.xml | 1 + .../qemucapabilitiesdata/caps_5.0.0.ppc64.xml | 1 + .../caps_5.0.0.riscv64.xml | 1 + .../caps_5.0.0.x86_64.xml | 1 + .../qemucapabilitiesdata/caps_5.1.0.sparc.xml | 1 + .../caps_5.1.0.x86_64.xml | 1 + .../caps_5.2.0.aarch64.xml | 1 + .../qemucapabilitiesdata/caps_5.2.0.ppc64.xml | 1 + .../caps_5.2.0.riscv64.xml | 1 + .../qemucapabilitiesdata/caps_5.2.0.s390x.xml | 1 + .../caps_5.2.0.x86_64.xml | 1 + .../caps_6.0.0.aarch64.xml | 1 + .../qemucapabilitiesdata/caps_6.0.0.s390x.xml | 1 + .../caps_6.0.0.x86_64.xml | 1 + .../caps_6.1.0.x86_64.xml | 1 + .../caps_6.2.0.aarch64.xml | 1 + .../qemucapabilitiesdata/caps_6.2.0.ppc64.xml | 1 + .../caps_6.2.0.x86_64.xml | 1 + .../qemucapabilitiesdata/caps_7.0.0.ppc64.xml | 1 + .../caps_7.0.0.x86_64.xml | 1 + .../memfd-memory-numa.x86_64-latest.args | 4 ++-- tests/qemuxml2argvdata/memfd-memory-numa.xml | 2 +- 30 files changed, 69 insertions(+), 13 deletions(-) -- 2.34.1

Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of specifying number of threads used to allocate memory. While it defaults to the number of vCPUs, users might want to use a different value (especially for humongous guests with gigantic pages). In general, on QEMU cmd line level it is possible to use different number of threads per each memory-backend-* object, in practical terms it's not useful. Therefore, use <memoryBacking/> to set guest wide value and let all memory devices 'inherit' it, silently. IOW, don't introduce per device knob because that would only complicate things for a little or no benefit. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++-- docs/schemas/domaincommon.rng | 19 +++++++++++++------ src/conf/domain_conf.c | 15 ++++++++++++++- src/conf/domain_conf.h | 1 + tests/qemuxml2argvdata/memfd-memory-numa.xml | 2 +- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 9b1b69bb4d..8e25474db0 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1004,7 +1004,7 @@ Memory Backing <locked/> <source type="file|anonymous|memfd"/> <access mode="shared|private"/> - <allocation mode="immediate|ondemand"/> + <allocation mode="immediate|ondemand" threads='8'/> <discard/> </memoryBacking> ... @@ -1054,7 +1054,9 @@ influence how virtual memory pages are backed by host pages. "private". This can be overridden per numa node by ``memAccess``. ``allocation`` Using the ``mode`` attribute, specify when to allocate the memory by - supplying either "immediate" or "ondemand". + supplying either "immediate" or "ondemand". :since:`Since 8.2.0` this + attribute is optional among with ``threads`` attribute, that sets the number + of threads that hypervisor uses to allocate memory. ``discard`` When set and supported by hypervisor the memory content is discarded just before guest shuts down (or when DIMM module is unplugged). Please note that diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 9c1b64a644..34bccee2f5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -740,12 +740,19 @@ </optional> <optional> <element name="allocation"> - <attribute name="mode"> - <choice> - <value>immediate</value> - <value>ondemand</value> - </choice> - </attribute> + <optional> + <attribute name="mode"> + <choice> + <value>immediate</value> + <value>ondemand</value> + </choice> + </attribute> + </optional> + <optional> + <attribute name="threads"> + <ref name="unsignedInt"/> + </attribute> + </optional> </element> </optional> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e0dfc9e45f..2414a806d0 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -18914,6 +18914,13 @@ virDomainDefParseMemory(virDomainDef *def, VIR_FREE(tmp); } + if (virXPathUInt("string(./memoryBacking/allocation/@threads)", + ctxt, &def->mem.allocation_threads) == -2) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Failed to parse memory allocation threads")); + return -1; + } + if (virXPathNode("./memoryBacking/hugepages", ctxt)) { /* hugepages will be used */ if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) { @@ -27464,6 +27471,7 @@ virDomainMemorybackingFormat(virBuffer *buf, const virDomainMemtune *mem) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) allocAttrBuf = VIR_BUFFER_INITIALIZER; if (mem->nhugepages) virDomainHugepagesFormat(&childBuf, mem->hugepages, mem->nhugepages); @@ -27478,8 +27486,13 @@ virDomainMemorybackingFormat(virBuffer *buf, virBufferAsprintf(&childBuf, "<access mode='%s'/>\n", virDomainMemoryAccessTypeToString(mem->access)); if (mem->allocation) - virBufferAsprintf(&childBuf, "<allocation mode='%s'/>\n", + virBufferAsprintf(&allocAttrBuf, " mode='%s'", virDomainMemoryAllocationTypeToString(mem->allocation)); + if (mem->allocation_threads) + virBufferAsprintf(&allocAttrBuf, " threads='%u'", mem->allocation_threads); + + virXMLFormatElement(&childBuf, "allocation", &allocAttrBuf, NULL); + if (mem->discard) virBufferAddLit(&childBuf, "<discard/>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index a4de46773c..48df6cc389 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2701,6 +2701,7 @@ struct _virDomainMemtune { int source; /* enum virDomainMemorySource */ int access; /* enum virDomainMemoryAccess */ int allocation; /* enum virDomainMemoryAllocation */ + unsigned int allocation_threads; virTristateBool discard; }; diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml index 1ebcee8939..1ac87e3aef 100644 --- a/tests/qemuxml2argvdata/memfd-memory-numa.xml +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml @@ -10,7 +10,7 @@ </hugepages> <source type='memfd'/> <access mode='shared'/> - <allocation mode='immediate'/> + <allocation mode='immediate' threads='8'/> </memoryBacking> <vcpu placement='static'>8</vcpu> <numatune> -- 2.34.1

On 3/22/22 16:05, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of specifying number of threads used to allocate memory. While it defaults to the number of vCPUs, users might want to use a different value (especially for humongous guests with gigantic pages).
Err, this statement is not correct. The default value is 1 and not number of vCPUs. If it was numbed of vCPUs then these patches would be that necessary. I've reworded this paragraph locally to: Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of specifying number of threads used to allocate memory. It defaults to 1, which may be too low for humongous guests with gigantic pages. Thank to David Hildebrand for pointing this out to me. Michal

On Tue, Mar 22, 2022 at 04:05:14PM +0100, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of
Anything wrong with the old "commit ffac16fab33b" ?
specifying number of threads used to allocate memory. While it defaults to the number of vCPUs, users might want to use a different value (especially for humongous guests with gigantic pages).
In general, on QEMU cmd line level it is possible to use different number of threads per each memory-backend-* object, in practical terms it's not useful. Therefore, use <memoryBacking/> to set guest wide value and let all memory devices 'inherit' it, silently. IOW, don't introduce per device knob because that would only complicate things for a little or no benefit.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++-- docs/schemas/domaincommon.rng | 19 +++++++++++++------ src/conf/domain_conf.c | 15 ++++++++++++++- src/conf/domain_conf.h | 1 + tests/qemuxml2argvdata/memfd-memory-numa.xml | 2 +- 5 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 9b1b69bb4d..8e25474db0 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1004,7 +1004,7 @@ Memory Backing <locked/> <source type="file|anonymous|memfd"/> <access mode="shared|private"/> - <allocation mode="immediate|ondemand"/> + <allocation mode="immediate|ondemand" threads='8'/> <discard/> </memoryBacking> ... @@ -1054,7 +1054,9 @@ influence how virtual memory pages are backed by host pages. "private". This can be overridden per numa node by ``memAccess``. ``allocation`` Using the ``mode`` attribute, specify when to allocate the memory by - supplying either "immediate" or "ondemand". + supplying either "immediate" or "ondemand". :since:`Since 8.2.0` this + attribute is optional among with ``threads`` attribute, that sets the number + of threads that hypervisor uses to allocate memory.
This is weird to read. Just say "Using the optional mode attribute" and refer to threads as optional too. If anyone wants to use just the allocation threads and leave out the mode they have to be on 8.2.0 anyway, so no need to complicate things.
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e0dfc9e45f..2414a806d0 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -18914,6 +18914,13 @@ virDomainDefParseMemory(virDomainDef *def, VIR_FREE(tmp); }
+ if (virXPathUInt("string(./memoryBacking/allocation/@threads)", + ctxt, &def->mem.allocation_threads) == -2) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Failed to parse memory allocation threads")); + return -1; + } + if (virXPathNode("./memoryBacking/hugepages", ctxt)) { /* hugepages will be used */ if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) { @@ -27464,6 +27471,7 @@ virDomainMemorybackingFormat(virBuffer *buf, const virDomainMemtune *mem) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) allocAttrBuf = VIR_BUFFER_INITIALIZER;
if (mem->nhugepages) virDomainHugepagesFormat(&childBuf, mem->hugepages, mem->nhugepages); @@ -27478,8 +27486,13 @@ virDomainMemorybackingFormat(virBuffer *buf, virBufferAsprintf(&childBuf, "<access mode='%s'/>\n", virDomainMemoryAccessTypeToString(mem->access)); if (mem->allocation) - virBufferAsprintf(&childBuf, "<allocation mode='%s'/>\n", + virBufferAsprintf(&allocAttrBuf, " mode='%s'", virDomainMemoryAllocationTypeToString(mem->allocation)); + if (mem->allocation_threads)
Here you check if (mem->allocation_threads), but in 3/4 you check if (allocation_threads > 0), which is a bit inconsistent. I prefer the former although I know we had some disputes with this, so pick whatever one and make it consistent.

On 3/24/22 10:26, Martin Kletzander wrote:
On Tue, Mar 22, 2022 at 04:05:14PM +0100, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of
Anything wrong with the old "commit ffac16fab33b" ?
Not specifically no. But I do it this ways so that it's visible at the first sight what QEMU version was this feature introduced in. And it works with 'git show' too (for those who are interested in the particular commit). IOW: "Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable .." contains more information than: "Since its commit ffac16fab33b QEMU is capable .." But I don't care that much. I agree that in this particular case it's a bit messy, because QEMU doesn't have linear history. It's way nicer with libvirt commits.
specifying number of threads used to allocate memory. While it defaults to the number of vCPUs, users might want to use a different value (especially for humongous guests with gigantic pages).
In general, on QEMU cmd line level it is possible to use different number of threads per each memory-backend-* object, in practical terms it's not useful. Therefore, use <memoryBacking/> to set guest wide value and let all memory devices 'inherit' it, silently. IOW, don't introduce per device knob because that would only complicate things for a little or no benefit.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++-- docs/schemas/domaincommon.rng | 19 +++++++++++++------ src/conf/domain_conf.c | 15 ++++++++++++++- src/conf/domain_conf.h | 1 + tests/qemuxml2argvdata/memfd-memory-numa.xml | 2 +- 5 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 9b1b69bb4d..8e25474db0 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1004,7 +1004,7 @@ Memory Backing <locked/> <source type="file|anonymous|memfd"/> <access mode="shared|private"/> - <allocation mode="immediate|ondemand"/> + <allocation mode="immediate|ondemand" threads='8'/> <discard/> </memoryBacking> ... @@ -1054,7 +1054,9 @@ influence how virtual memory pages are backed by host pages. "private". This can be overridden per numa node by ``memAccess``. ``allocation`` Using the ``mode`` attribute, specify when to allocate the memory by - supplying either "immediate" or "ondemand". + supplying either "immediate" or "ondemand". :since:`Since 8.2.0` this + attribute is optional among with ``threads`` attribute, that sets the number + of threads that hypervisor uses to allocate memory.
This is weird to read. Just say "Using the optional mode attribute" and refer to threads as optional too. If anyone wants to use just the allocation threads and leave out the mode they have to be on 8.2.0 anyway, so no need to complicate things.
Alright, fair enough.
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e0dfc9e45f..2414a806d0 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -18914,6 +18914,13 @@ virDomainDefParseMemory(virDomainDef *def, VIR_FREE(tmp); }
+ if (virXPathUInt("string(./memoryBacking/allocation/@threads)", + ctxt, &def->mem.allocation_threads) == -2) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Failed to parse memory allocation threads")); + return -1; + } + if (virXPathNode("./memoryBacking/hugepages", ctxt)) { /* hugepages will be used */ if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) { @@ -27464,6 +27471,7 @@ virDomainMemorybackingFormat(virBuffer *buf, const virDomainMemtune *mem) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) allocAttrBuf = VIR_BUFFER_INITIALIZER;
if (mem->nhugepages) virDomainHugepagesFormat(&childBuf, mem->hugepages, mem->nhugepages); @@ -27478,8 +27486,13 @@ virDomainMemorybackingFormat(virBuffer *buf, virBufferAsprintf(&childBuf, "<access mode='%s'/>\n", virDomainMemoryAccessTypeToString(mem->access)); if (mem->allocation) - virBufferAsprintf(&childBuf, "<allocation mode='%s'/>\n", + virBufferAsprintf(&allocAttrBuf, " mode='%s'", virDomainMemoryAllocationTypeToString(mem->allocation)); + if (mem->allocation_threads)
Here you check if (mem->allocation_threads), but in 3/4 you check if (allocation_threads > 0), which is a bit inconsistent. I prefer the former although I know we had some disputes with this, so pick whatever one and make it consistent.
D'oh! I remember I wanted to fix this. > 0 it is. Michal

On Thu, Mar 24, 2022 at 12:15:43PM +0100, Michal Prívozník wrote:
On 3/24/22 10:26, Martin Kletzander wrote:
On Tue, Mar 22, 2022 at 04:05:14PM +0100, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of
Anything wrong with the old "commit ffac16fab33b" ?
Not specifically no. But I do it this ways so that it's visible at the first sight what QEMU version was this feature introduced in. And it works with 'git show' too (for those who are interested in the particular commit). IOW:
"Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable .."
How do you generate such a reference ? It is different from what a simple git describe shows $ git describe --match 'v*' ffac16fab33b v4.2.0-1618-gffac16fab3 and I feel the git describe output is much nicer, as it actually includes the commit short hash still. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Thu, Mar 24, 2022 at 11:22:54AM +0000, Daniel P. Berrangé wrote:
On Thu, Mar 24, 2022 at 12:15:43PM +0100, Michal Prívozník wrote:
On 3/24/22 10:26, Martin Kletzander wrote:
On Tue, Mar 22, 2022 at 04:05:14PM +0100, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of
Anything wrong with the old "commit ffac16fab33b" ?
Not specifically no. But I do it this ways so that it's visible at the first sight what QEMU version was this feature introduced in. And it works with 'git show' too (for those who are interested in the particular commit). IOW:
"Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable .."
How do you generate such a reference ? It is different from what a simple git describe shows
$ git describe --match 'v*' ffac16fab33b v4.2.0-1618-gffac16fab3
and I feel the git describe output is much nicer, as it actually includes the commit short hash still.
I think Michal used --contains which clearly shows from which tag is the commit included, but tracking down through merge commits is what makes it look like the above.
With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 3/24/22 12:49, Martin Kletzander wrote:
On Thu, Mar 24, 2022 at 11:22:54AM +0000, Daniel P. Berrangé wrote:
On Thu, Mar 24, 2022 at 12:15:43PM +0100, Michal Prívozník wrote:
On 3/24/22 10:26, Martin Kletzander wrote:
On Tue, Mar 22, 2022 at 04:05:14PM +0100, Michal Privoznik wrote:
Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable of
Anything wrong with the old "commit ffac16fab33b" ?
Not specifically no. But I do it this ways so that it's visible at the first sight what QEMU version was this feature introduced in. And it works with 'git show' too (for those who are interested in the particular commit). IOW:
"Since its commit v5.0.0-rc0~75^2~1^2~3 QEMU is capable .."
How do you generate such a reference ? It is different from what a simple git describe shows
$ git describe --match 'v*' ffac16fab33b v4.2.0-1618-gffac16fab3
and I feel the git describe output is much nicer, as it actually includes the commit short hash still.
I think Michal used --contains which clearly shows from which tag is the commit included, but tracking down through merge commits is what makes it look like the above.
Indeed. qemu.git $ git describe --contains ffac16fab33bb42f17e47624985220c1fd864e9d v5.0.0-rc0~75^2~1^2~3 Michal

The prealloc-threads is property of memory-backend class which is parent to the other three classes memory-backend-{ram,file,memfd}. Therefore the property is present for all, or none if QEMU is older than v5.0.0-rc0~75^2~1^2~3 which introduced the property. Anyway, the .reserve property is the same story, and we chose memory-backend-file to detect it, so stick with our earlier decision and use the same backend to detect this new property. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml | 1 + tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml | 1 + tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml | 1 + tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml | 1 + tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml | 1 + tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml | 1 + tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml | 1 + tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml | 1 + tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_7.0.0.ppc64.xml | 1 + tests/qemucapabilitiesdata/caps_7.0.0.x86_64.xml | 1 + 22 files changed, 23 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 32980e7330..6b4ed08499 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -668,6 +668,7 @@ VIR_ENUM_IMPL(virQEMUCaps, /* 425 */ "blockdev.nbd.tls-hostname", /* QEMU_CAPS_BLOCKDEV_NBD_TLS_HOSTNAME */ + "memory-backend-file.prealloc-threads", /* QEMU_CAPS_MEMORY_BACKEND_PREALLOC_THREADS */ ); @@ -1766,6 +1767,7 @@ static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsMemoryBackendFile[] = * released qemu versions. */ { "x-use-canonical-path-for-ramblock-id", QEMU_CAPS_X_USE_CANONICAL_PATH_FOR_RAMBLOCK_ID }, { "reserve", QEMU_CAPS_MEMORY_BACKEND_RESERVE }, + { "prealloc-threads", QEMU_CAPS_MEMORY_BACKEND_PREALLOC_THREADS }, }; static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsMemoryBackendMemfd[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 0a215a11d5..948029d60d 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -643,6 +643,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ /* 425 */ QEMU_CAPS_BLOCKDEV_NBD_TLS_HOSTNAME, /* tls hostname can be overriden for NBD clients */ + QEMU_CAPS_MEMORY_BACKEND_PREALLOC_THREADS, /* -object memory-backend-*.prealloc-threads */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml index 4c1a37104c..d188eab3fe 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml @@ -178,6 +178,7 @@ <flag name='input-linux'/> <flag name='query-display-options'/> <flag name='virtio-blk.queue-size'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700241</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml index 070d0f2982..12592f457a 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml @@ -186,6 +186,7 @@ <flag name='input-linux'/> <flag name='query-display-options'/> <flag name='virtio-blk.queue-size'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>42900241</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml index 286049a8a6..75c3a9b4fc 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml @@ -170,6 +170,7 @@ <flag name='input-linux'/> <flag name='query-display-options'/> <flag name='virtio-blk.queue-size'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>0</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml index f563d672b4..5927c48ee4 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml @@ -219,6 +219,7 @@ <flag name='input-linux'/> <flag name='query-display-options'/> <flag name='virtio-blk.queue-size'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100241</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml b/tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml index 58af90b29f..7de7c291f5 100644 --- a/tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml +++ b/tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml @@ -90,6 +90,7 @@ <flag name='rotation-rate'/> <flag name='input-linux'/> <flag name='query-display-options'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5001000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>0</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml index 2d0ef18147..e0291268b3 100644 --- a/tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml @@ -222,6 +222,7 @@ <flag name='query-display-options'/> <flag name='virtio-blk.queue-size'/> <flag name='virtio-mem-pci'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5001000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml index 9f14d59efb..20a1a8154b 100644 --- a/tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml @@ -184,6 +184,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml b/tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml index e050514fc1..add2ded00a 100644 --- a/tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml +++ b/tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml @@ -190,6 +190,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>42900243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml b/tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml index 4b123d4ce9..237d043e5d 100644 --- a/tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml +++ b/tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml @@ -174,6 +174,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>0</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml b/tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml index bc3c3c351a..57e40f8f86 100644 --- a/tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml +++ b/tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml @@ -141,6 +141,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>39100243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml index 70ad14f548..7c65648c06 100644 --- a/tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml @@ -226,6 +226,7 @@ <flag name='piix4.acpi-root-pci-hotplug'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>5002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml index 9d501f29af..7557e6ad71 100644 --- a/tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml @@ -192,6 +192,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml b/tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml index 02b24f5d7e..00009be3dc 100644 --- a/tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml +++ b/tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml @@ -149,6 +149,7 @@ <flag name='virtio-blk.queue-size'/> <flag name='query-dirty-rate'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>39100242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml index 8ea688f7a6..61d561dc69 100644 --- a/tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml @@ -235,6 +235,7 @@ <flag name='query-dirty-rate'/> <flag name='sev-inject-launch-secret'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6000000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml index ba1aecc37e..0b58210335 100644 --- a/tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml @@ -239,6 +239,7 @@ <flag name='rbd-encryption'/> <flag name='sev-inject-launch-secret'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6001000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml index 17d563eb5b..d08b2c0213 100644 --- a/tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml @@ -203,6 +203,7 @@ <flag name='query-dirty-rate'/> <flag name='rbd-encryption'/> <flag name='calc-dirty-rate'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6001050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700244</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml b/tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml index 9fe9c27d14..8c52964ec0 100644 --- a/tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml +++ b/tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml @@ -201,6 +201,7 @@ <flag name='sev-guest-kernel-hashes'/> <flag name='calc-dirty-rate'/> <flag name='dirtyrate-param.mode'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>42900244</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml index d77907af55..cdf72b9ebf 100644 --- a/tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml @@ -241,6 +241,7 @@ <flag name='sev-inject-launch-secret'/> <flag name='calc-dirty-rate'/> <flag name='dirtyrate-param.mode'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100244</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_7.0.0.ppc64.xml b/tests/qemucapabilitiesdata/caps_7.0.0.ppc64.xml index 5d7f283a73..8aba3329ad 100644 --- a/tests/qemucapabilitiesdata/caps_7.0.0.ppc64.xml +++ b/tests/qemucapabilitiesdata/caps_7.0.0.ppc64.xml @@ -202,6 +202,7 @@ <flag name='device.json+hotplug'/> <flag name='calc-dirty-rate'/> <flag name='dirtyrate-param.mode'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6002050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>42900243</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_7.0.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_7.0.0.x86_64.xml index 0f34a341af..5227e3ee0b 100644 --- a/tests/qemucapabilitiesdata/caps_7.0.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_7.0.0.x86_64.xml @@ -244,6 +244,7 @@ <flag name='calc-dirty-rate'/> <flag name='dirtyrate-param.mode'/> <flag name='blockdev.nbd.tls-hostname'/> + <flag name='memory-backend-file.prealloc-threads'/> <version>6002050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100243</microcodeVersion> -- 2.34.1

On Tue, Mar 22, 2022 at 04:05:15PM +0100, Michal Privoznik wrote:
The prealloc-threads is property of memory-backend class which is parent to the other three classes memory-backend-{ram,file,memfd}. Therefore the property is present for all, or none if QEMU is older than v5.0.0-rc0~75^2~1^2~3 which introduced the property.
Anyway, the .reserve property is the same story, and we chose memory-backend-file to detect it, so stick with our earlier decision and use the same backend to detect this new property.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

Only fairly new QEMUs are capable of user provided number of preallocation threads. Validate this assumption. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index f27e480696..e0708b8a76 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -739,6 +739,13 @@ qemuValidateDomainDefMemory(const virDomainDef *def, return -1; } + if (mem->allocation_threads > 0 && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_MEMORY_BACKEND_PREALLOC_THREADS)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("preallocation threads are unsupported with this QEMU")); + return -1; + } + if (mem->source == VIR_DOMAIN_MEMORY_SOURCE_ANONYMOUS) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("hugepages are not allowed with anonymous " -- 2.34.1

On Tue, Mar 22, 2022 at 04:05:16PM +0100, Michal Privoznik wrote:
Only fairly new QEMUs are capable of user provided number of preallocation threads. Validate this assumption.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

Let's generate prealloc-threads property onto the cmd line if domain configuration requests so. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 5 ++++- tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index c836799888..8246ab515a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3806,7 +3806,10 @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps, return -1; } else { if (!priv->memPrealloc && - virJSONValueObjectAdd(&props, "B:prealloc", prealloc, NULL) < 0) + virJSONValueObjectAdd(&props, + "B:prealloc", prealloc, + "p:prealloc-threads", def->mem.allocation_threads, + NULL) < 0) return -1; } diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args index 6f5a983d3a..68bbd73551 100644 --- a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args @@ -16,7 +16,7 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-instance-00000092/.config \ -m size=14680064k,slots=16,maxmem=1099511627776k \ -overcommit mem-lock=off \ -smp 8,sockets=1,dies=1,cores=8,threads=1 \ --object '{"qom-type":"memory-backend-memfd","id":"ram-node0","hugetlb":true,"hugetlbsize":2097152,"share":true,"prealloc":true,"size":15032385536,"host-nodes":[3],"policy":"preferred"}' \ +-object '{"qom-type":"memory-backend-memfd","id":"ram-node0","hugetlb":true,"hugetlbsize":2097152,"share":true,"prealloc":true,"prealloc-threads":8,"size":15032385536,"host-nodes":[3],"policy":"preferred"}' \ -numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \ -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ -display none \ @@ -29,7 +29,7 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-instance-00000092/.config \ -no-acpi \ -boot strict=on \ -device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \ --object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":true,"prealloc":true,"size":536870912,"host-nodes":[3],"policy":"preferred"}' \ +-object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":true,"prealloc":true,"prealloc-threads":8,"size":536870912,"host-nodes":[3],"policy":"preferred"}' \ -device '{"driver":"nvdimm","node":0,"memdev":"memnvdimm0","id":"nvdimm0","slot":0}' \ -audiodev '{"id":"audio1","driver":"none"}' \ -device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x3"}' \ -- 2.34.1

On Tue, Mar 22, 2022 at 04:05:17PM +0100, Michal Privoznik wrote:
Let's generate prealloc-threads property onto the cmd line if domain configuration requests so.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

Since its v5.0.0 release QEMU is capable of specifying number of threads used to allocate memory. It defaults to 1, which may be too low for humongous guests with gigantic pages. In general, on QEMU cmd line level it is possible to use different number of threads per each memory-backend-* object, in practical terms it's not useful. Therefore, use <memoryBacking/> to set guest wide value and let all memory devices 'inherit' it, silently. IOW, don't introduce per device knob because that would only complicate things for a little or no benefit. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 8 +++++--- src/conf/domain_conf.c | 15 ++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 19 +++++++++++++------ tests/qemuxml2argvdata/memfd-memory-numa.xml | 2 +- 5 files changed, 34 insertions(+), 11 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index d188de4858..e492532004 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1004,7 +1004,7 @@ Memory Backing <locked/> <source type="file|anonymous|memfd"/> <access mode="shared|private"/> - <allocation mode="immediate|ondemand"/> + <allocation mode="immediate|ondemand" threads='8'/> <discard/> </memoryBacking> ... @@ -1053,8 +1053,10 @@ influence how virtual memory pages are backed by host pages. Using the ``mode`` attribute, specify if the memory is to be "shared" or "private". This can be overridden per numa node by ``memAccess``. ``allocation`` - Using the ``mode`` attribute, specify when to allocate the memory by - supplying either "immediate" or "ondemand". + Using the optional ``mode`` attribute, specify when to allocate the memory by + supplying either "immediate" or "ondemand". :since:`Since 8.2.0` it is + possible to set the number of threads that hypervisor uses to allocate + memory via ``threads`` attribute. ``discard`` When set and supported by hypervisor the memory content is discarded just before guest shuts down (or when DIMM module is unplugged). Please note that diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 153954a0b0..731139f80f 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -18915,6 +18915,13 @@ virDomainDefParseMemory(virDomainDef *def, VIR_FREE(tmp); } + if (virXPathUInt("string(./memoryBacking/allocation/@threads)", + ctxt, &def->mem.allocation_threads) == -2) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Failed to parse memory allocation threads")); + return -1; + } + if (virXPathNode("./memoryBacking/hugepages", ctxt)) { /* hugepages will be used */ if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) { @@ -27465,6 +27472,7 @@ virDomainMemorybackingFormat(virBuffer *buf, const virDomainMemtune *mem) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) allocAttrBuf = VIR_BUFFER_INITIALIZER; if (mem->nhugepages) virDomainHugepagesFormat(&childBuf, mem->hugepages, mem->nhugepages); @@ -27479,8 +27487,13 @@ virDomainMemorybackingFormat(virBuffer *buf, virBufferAsprintf(&childBuf, "<access mode='%s'/>\n", virDomainMemoryAccessTypeToString(mem->access)); if (mem->allocation) - virBufferAsprintf(&childBuf, "<allocation mode='%s'/>\n", + virBufferAsprintf(&allocAttrBuf, " mode='%s'", virDomainMemoryAllocationTypeToString(mem->allocation)); + if (mem->allocation_threads > 0) + virBufferAsprintf(&allocAttrBuf, " threads='%u'", mem->allocation_threads); + + virXMLFormatElement(&childBuf, "allocation", &allocAttrBuf, NULL); + if (mem->discard) virBufferAddLit(&childBuf, "<discard/>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index b69abfa270..49c964e6e1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2702,6 +2702,7 @@ struct _virDomainMemtune { int source; /* enum virDomainMemorySource */ int access; /* enum virDomainMemoryAccess */ int allocation; /* enum virDomainMemoryAllocation */ + unsigned int allocation_threads; virTristateBool discard; }; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index 9c1b64a644..34bccee2f5 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -740,12 +740,19 @@ </optional> <optional> <element name="allocation"> - <attribute name="mode"> - <choice> - <value>immediate</value> - <value>ondemand</value> - </choice> - </attribute> + <optional> + <attribute name="mode"> + <choice> + <value>immediate</value> + <value>ondemand</value> + </choice> + </attribute> + </optional> + <optional> + <attribute name="threads"> + <ref name="unsignedInt"/> + </attribute> + </optional> </element> </optional> <optional> diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml index 1ebcee8939..1ac87e3aef 100644 --- a/tests/qemuxml2argvdata/memfd-memory-numa.xml +++ b/tests/qemuxml2argvdata/memfd-memory-numa.xml @@ -10,7 +10,7 @@ </hugepages> <source type='memfd'/> <access mode='shared'/> - <allocation mode='immediate'/> + <allocation mode='immediate' threads='8'/> </memoryBacking> <vcpu placement='static'>8</vcpu> <numatune> -- 2.34.1

On Thu, Mar 24, 2022 at 12:16:01PM +0100, Michal Privoznik wrote:
Since its v5.0.0 release QEMU is capable of specifying number of threads used to allocate memory. It defaults to 1, which may be too low for humongous guests with gigantic pages.
In general, on QEMU cmd line level it is possible to use different number of threads per each memory-backend-* object, in practical terms it's not useful. Therefore, use <memoryBacking/> to set guest wide value and let all memory devices 'inherit' it, silently. IOW, don't introduce per device knob because that would only complicate things for a little or no benefit.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Martin Kletzander <mkletzan@redhat.com>
participants (4)
-
Daniel P. Berrangé
-
Martin Kletzander
-
Michal Privoznik
-
Michal Prívozník