[PATCH v2 0/4] Introduce dynamicMemslots attribute for virtio-mem

v2 of: https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/thread/MWYGW... diff to v1: - Don't set .unplugged-inaccessible because it's not compatible with !x84_64 and even on x86_64 it's likely to get deprecated soon. Thanks David for pointing this out! Michal Prívozník (4): conf: Introduce dynamicMemslots attribute for virtio-mem qemu_capabilities: Add QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS capability qemu_validate: Check capability for virtio-mem dynamicMemslots qemu_command: Generate cmd line for virtio-mem dynamicMemslots docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 3 +++ src/qemu/qemu_validate.c | 7 +++++++ .../qemucapabilitiesdata/caps_8.2.0_x86_64.xml | 1 + ...emory-hotplug-virtio-mem.x86_64-latest.args | 2 +- .../memory-hotplug-virtio-mem.xml | 2 +- 11 files changed, 45 insertions(+), 3 deletions(-) -- 2.41.0

Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation). Therefore, introduce new <target/> attribute which will control that QEMU knob. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured. + For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0` + The following optional elements may be used: ``label`` diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 39e9879a8a..e677023b17 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13499,6 +13499,10 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, &def->target.virtio_mem.requestedsize, false, false) < 0) return -1; + if (virXMLPropTristateBool(node, "dynamicMemslots", VIR_XML_PROP_NONE, + &def->target.virtio_mem.dynamicMemslots) < 0) + return -1; + addrNode = virXPathNode("./address", ctxt); addr = &def->target.virtio_mem.address; break; @@ -21173,6 +21177,12 @@ virDomainMemoryDefCheckABIStability(virDomainMemoryDef *src, src->target.virtio_mem.address); return false; } + + if (src->target.virtio_mem.dynamicMemslots != dst->target.virtio_mem.dynamicMemslots) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Target memory device 'dynamicMemslots' property doesn't match source memory device")); + return false; + } break; case VIR_DOMAIN_MEMORY_MODEL_DIMM: @@ -25374,6 +25384,7 @@ virDomainMemoryTargetDefFormat(virBuffer *buf, unsigned int flags) { g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER; virBufferAsprintf(&childBuf, "<size unit='KiB'>%llu</size>\n", def->size); if (def->targetNode >= 0) @@ -25413,6 +25424,11 @@ virDomainMemoryTargetDefFormat(virBuffer *buf, if (def->target.virtio_mem.address) virBufferAsprintf(&childBuf, "<address base='0x%llx'/>\n", def->target.virtio_mem.address); + + if (def->target.virtio_mem.dynamicMemslots) { + virBufferAsprintf(&attrBuf, " dynamicMemslots='%s'", + virTristateBoolTypeToString(def->target.virtio_mem.dynamicMemslots)); + } break; case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC: @@ -25422,7 +25438,7 @@ virDomainMemoryTargetDefFormat(virBuffer *buf, break; } - virXMLFormatElement(buf, "target", NULL, &childBuf); + virXMLFormatElement(buf, "target", &attrBuf, &childBuf); } static int diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 58b2c92f4c..88ff3e1d84 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2674,6 +2674,7 @@ struct _virDomainMemoryDef { unsigned long long currentsize; /* kibibytes, valid for an active domain only and parsed */ unsigned long long address; /* address where memory is mapped */ + virTristateBool dynamicMemslots; } virtio_mem; struct { } sgx_epc; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index f318c06797..08c408e138 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -7260,6 +7260,11 @@ <define name="memorydev-target"> <element name="target"> + <optional> + <attribute name="dynamicMemslots"> + <ref name="virYesNo"/> + </attribute> + </optional> <interleave> <element name="size"> <ref name="scaledInteger"/> diff --git a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml index c578209d8a..359ece7a77 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml +++ b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml @@ -60,7 +60,7 @@ <nodemask>1-3</nodemask> <pagesize unit='KiB'>2048</pagesize> </source> - <target> + <target dynamicMemslots='yes'> <size unit='KiB'>2097152</size> <node>0</node> <block unit='KiB'>2048</block> -- 2.41.0

On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured.
+ For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0`
Is there any drawback in always setting 'yes'? (modulo cases when it would not work, obviously) Disclaimer, I didn't read the qemu commit, but IMO this is an important information to see how we should approach this feature and libvirt users won't have the pointer to the qemu commit here in the docs.

On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured.
+ For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0`
Is there any drawback in always setting 'yes'? (modulo cases when it would not work, obviously)
In combination with some vhost devices (especially vhost-user), starting QEMU might fail until these devices support more memslots. There is ongoing work to improve these vhost-user devices. To make these setups temporarily work, one would have to manually set "dynamic-memslots=off" here, which is not too bad I guess. Besides that, I don't think there is a real drawback -- there are mostly only benefits of having it :) I'm actually hoping that we can default-enable it, at least in RHEL soon. Maybe we can default-enable it right away and document that vhost-user devices will require it to be manually disabled until they support more memslots? -- Cheers, David / dhildenb

On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured. + For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0`
Is there any drawback in always setting 'yes'? (modulo cases when it would not work, obviously)
In combination with some vhost devices (especially vhost-user), starting QEMU might fail until these devices support more memslots. There is ongoing work to improve these vhost-user devices.
To make these setups temporarily work, one would have to manually set "dynamic-memslots=off" here, which is not too bad I guess.
Besides that, I don't think there is a real drawback -- there are mostly only benefits of having it :)
I'm actually hoping that we can default-enable it, at least in RHEL soon.
Maybe we can default-enable it right away and document that vhost-user devices will require it to be manually disabled until they support more memslots?
This series is very specifically adding it only for virtio-mem, which AFAIK doesn't have any vhost-user variant, thus having this as a configurable knob is almost pointless. The only real reason to have it is to ensure ABI compatibility between configs and when migrating, but ideally should be completely hidden if there's no need for users to disable it. (libvirt can do that but it's a bit less straightforward) Ideally for any kind of options which are ABI incompatible but there's no real reason for users to disable them, qemu should add them as a machine type property (to ensure ABI stability) and just enable them by default. QEMU would need to bind them to a machine type anyways once changing the default.

On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured. + For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0`
Is there any drawback in always setting 'yes'? (modulo cases when it would not work, obviously)
In combination with some vhost devices (especially vhost-user), starting QEMU might fail until these devices support more memslots. There is ongoing work to improve these vhost-user devices.
To make these setups temporarily work, one would have to manually set "dynamic-memslots=off" here, which is not too bad I guess.
Besides that, I don't think there is a real drawback -- there are mostly only benefits of having it :)
I'm actually hoping that we can default-enable it, at least in RHEL soon.
Maybe we can default-enable it right away and document that vhost-user devices will require it to be manually disabled until they support more memslots?
This series is very specifically adding it only for virtio-mem, which AFAIK doesn't have any vhost-user variant, thus having this as a configurable knob is almost pointless.
No, that problem is the combination. Assume you have VM with both virtio-mem to hotplug memory and virtio-user-fs+virtiofsd. The memory that virtio-mem provides has to be usable by virtio-user-fs+virtiofsd. Therefore, we have to map that shared memory (provided by virtio-mem) into the virtiofsd process. However, virtiofsd only supports 32 memslots and virito-mem wants to use up to 256 (like having 256 DIMMs). virtiofsd support is in the works but not upstream yet. So if you start a vm with -device virtio-mem-pci,dynamic-memslots=on,... -device virtio-user-fs-pci, ... QEMU might reject to start if the virtio-user-fs-pci backend has insufficient memory slots. In that case (and only then), we want to disable dynamic memslots. Without the virtio-user complication (that I hate), this wouldn't even be a configurable toggle.
The only real reason to have it is to ensure ABI compatibility between configs and when migrating, but ideally should be completely hidden if there's no need for users to disable it. (libvirt can do that but it's a bit less straightforward)
Ideally for any kind of options which are ABI incompatible but there's no real reason for users to disable them, qemu should add them as a machine type property (to ensure ABI stability) and just enable them by default.
QEMU would need to bind them to a machine type anyways once changing the default.
Yes. -- Cheers, David / dhildenb

On Fri, Jan 05, 2024 at 18:30:56 +0100, David Hildenbrand wrote:
On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
[...]
This series is very specifically adding it only for virtio-mem, which AFAIK doesn't have any vhost-user variant, thus having this as a configurable knob is almost pointless.
No, that problem is the combination. Assume you have VM with both virtio-mem to hotplug memory and virtio-user-fs+virtiofsd.
The memory that virtio-mem provides has to be usable by virtio-user-fs+virtiofsd. Therefore, we have to map that shared memory (provided by virtio-mem) into the virtiofsd process.
However, virtiofsd only supports 32 memslots and virito-mem wants to use up to 256 (like having 256 DIMMs). virtiofsd support is in the works but not upstream yet.
So if you start a vm with
-device virtio-mem-pci,dynamic-memslots=on,... -device virtio-user-fs-pci, ...
QEMU might reject to start if the virtio-user-fs-pci backend has insufficient memory slots.
In that case (and only then), we want to disable dynamic memslots. Without the virtio-user complication (that I hate), this wouldn't even be a configurable toggle.
Ah I see. So we can't simply enable it by default because AND not even automatically: 1) It would break existing configs (but solvable before startup) 2) it would break hotplug into VMs (when hotplugging e.g. virtiofs) This means that if we'd want to implement it now it MUST be a configurable toggle. Note that libvirt will have to carry this code forever even once qemu enables this by default and thus the toggle in libvirt becomes effectively pointless. Thus if this will be a short-term workaround I'd really welcome if we'd re-consider doing this. If the benefits from enabling the feature, and the timeline of auto-enabling it in qemu are such that we want to implement the toggle in libvirt for the time being, this patch needs to document (in formatdomain.rst, not the commit message the following): 1) the fact that it's beneficial to enable it 2) which setup it could block (mentioning also hotplug) 3) that qemu will auto-enable the feature once it's fully available Having this toggle without any of the above will result in basically nobody enabling it.

On 08.01.24 09:38, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 18:30:56 +0100, David Hildenbrand wrote:
On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
[...]
This series is very specifically adding it only for virtio-mem, which AFAIK doesn't have any vhost-user variant, thus having this as a configurable knob is almost pointless.
No, that problem is the combination. Assume you have VM with both virtio-mem to hotplug memory and virtio-user-fs+virtiofsd.
The memory that virtio-mem provides has to be usable by virtio-user-fs+virtiofsd. Therefore, we have to map that shared memory (provided by virtio-mem) into the virtiofsd process.
However, virtiofsd only supports 32 memslots and virito-mem wants to use up to 256 (like having 256 DIMMs). virtiofsd support is in the works but not upstream yet.
So if you start a vm with
-device virtio-mem-pci,dynamic-memslots=on,... -device virtio-user-fs-pci, ...
QEMU might reject to start if the virtio-user-fs-pci backend has insufficient memory slots.
In that case (and only then), we want to disable dynamic memslots. Without the virtio-user complication (that I hate), this wouldn't even be a configurable toggle.
Ah I see. So we can't simply enable it by default because AND not even automatically:
1) It would break existing configs (but solvable before startup)
Right. One could keep the toggle disabled for old QEMU machines, or disable it if any vhost-user device is configured as well. yes, that is more complicated and better avoided.
2) it would break hotplug into VMs (when hotplugging e.g. virtiofs)
Right, although not a common setup, especially not with virtio-mem.
This means that if we'd want to implement it now it MUST be a configurable toggle.
Note that libvirt will have to carry this code forever even once qemu enables this by default and thus the toggle in libvirt becomes effectively pointless. Thus if this will be a short-term workaround I'd really welcome if we'd re-consider doing this.
I assume that in the long-term everything based on the vhost-user rust crate will become compatible. And at this point, we primarily only care about virtiofsd with virito-mem.
If the benefits from enabling the feature, and the timeline of auto-enabling it in qemu are such that we want to implement the toggle in libvirt for the time being, this patch needs to document (in formatdomain.rst, not the commit message the following):
1) the fact that it's beneficial to enable it 2) which setup it could block (mentioning also hotplug) 3) that qemu will auto-enable the feature once it's fully available
Having this toggle without any of the above will result in basically nobody enabling it.
I'm hoping that we can enable it always in RHEL where we can control which version of vortiofsd we ship, and can better control what we actually support. -- Cheers, David / dhildenb

On 1/8/24 10:00, David Hildenbrand wrote:
On 08.01.24 09:38, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 18:30:56 +0100, David Hildenbrand wrote:
On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
[...]
So what's the resolution here? IIUC, QEMU will enable this by default and keep it off for older machine types. If that's the case, we don't need these after all. Or am I misunderstanding something? Michal

On Wed, Jan 10, 2024 at 12:36:58 +0100, Michal Prívozník wrote:
On 1/8/24 10:00, David Hildenbrand wrote:
On 08.01.24 09:38, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 18:30:56 +0100, David Hildenbrand wrote:
On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote:
On 05.01.24 13:40, Peter Krempa wrote: > On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
[...]
So what's the resolution here? IIUC, QEMU will enable this by default and keep it off for older machine types. If that's the case, we don't need these after all. Or am I misunderstanding something?
So, at this point automatically enabling that feature is not possible as it makes the VM incompatible with 'vhost-user' devices, including ones hotplugged in the future. Based on the fact that it's about 'vhost-user' qemu won't be ever 100% sure that the possibly hotplugged device is modern enough to be compatible, as it depends on other software such as virtiofsd. As of such we will most likely need the flag as a workaround for when qemu decides to enable it, in case users want to use older software. Regarding this patchset, the documentation needs to be improved to clearly state the specific details about what users are supposed to, including outlining the workaround it provides: - if users don't want vhost-user devices (now, or hotplug) -> enable it - if users have modern vhost-user devices -> enable it - for now the default value is: disabled, but qemu might want to auto-enable it in the future - if there's a problem in the future once it's auto-enabled, disable it explicitly if you have problems with vhost-user (e.g. virtiofs) devices. With the clear guidance on when to set it the flag is acceptable as it sometimes requires policy decision (such as user wants to hotplug virtiofs, which is not yet fixed) which we can't make automatically

On 10.01.24 12:50, Peter Krempa wrote:
On Wed, Jan 10, 2024 at 12:36:58 +0100, Michal Prívozník wrote:
On 1/8/24 10:00, David Hildenbrand wrote:
On 08.01.24 09:38, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 18:30:56 +0100, David Hildenbrand wrote:
On 05.01.24 14:58, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 14:34:53 +0100, David Hildenbrand wrote: > On 05.01.24 13:40, Peter Krempa wrote: >> On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
[...]
So what's the resolution here? IIUC, QEMU will enable this by default and keep it off for older machine types. If that's the case, we don't need these after all. Or am I misunderstanding something?
So, at this point automatically enabling that feature is not possible as it makes the VM incompatible with 'vhost-user' devices, including ones hotplugged in the future.
Based on the fact that it's about 'vhost-user' qemu won't be ever 100% sure that the possibly hotplugged device is modern enough to be compatible, as it depends on other software such as virtiofsd.
As of such we will most likely need the flag as a workaround for when qemu decides to enable it, in case users want to use older software.
Agreed with all. -- Cheers, David / dhildenb

On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured.
+ For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0` +
Except for the docs here, which are insufficient as outlined in the other subthread, the code looks good, so for the code: Reviewed-by: Peter Krempa <pkrempa@redhat.com>

On 1/10/24 12:59, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured.
+ For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0` +
Except for the docs here, which are insufficient as outlined in the other subthread, the code looks good, so for the code:
How about this: For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified (accepted values "yes"/"no") which allows hypervisor to spread memory into multiple memory slots (allocate them dynamically based on the amount of memory exposed to the guest), resulting in smaller memory footprint. But be aware this may affect vhost-user devices. When enabled, older vhost-user devices may refuse to initialize resulting in failed domain startup or device hotplug (set this to "no" then). On the other hand, modern vhost-user devices, or when no vhost-user devices are present (nor they will be hotplugged), this leads to smaller memory footprint (set this to "yes" then). The current default is hypervisor dependant (for QEMU is "no"). If the default changes and you are having difficulties with vhost-user devices, try toggling this to "no". :since:`Since 10.1.0 and QEMU 8.2.0`
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
Thank you! Michal

On Wed, Jan 10, 2024 at 15:54:18 +0100, Michal Prívozník wrote:
On 1/10/24 12:59, Peter Krempa wrote:
On Fri, Jan 05, 2024 at 13:33:32 +0100, Michal Privoznik wrote:
Introduced in v8.2.0-rc0~74^2~2, QEMU now allows setting .dynamic-memslots attribute for virtio-mem-pci devices. When turned on, it allows memory exposed to guest to be split into multiple memslots and thus smaller memory footprint (see the original commit for detailed explanation).
Therefore, introduce new <target/> attribute which will control that QEMU knob.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 18 +++++++++++++++++- src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 5 +++++ .../memory-hotplug-virtio-mem.xml | 2 +- 5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 96e03a3807..57974de9f4 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -8395,6 +8395,12 @@ Example: usage of the memory devices The ``node`` subelement configures the guest NUMA node to attach the memory to. The element shall be used only if the guest has NUMA nodes configured.
+ For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified + (accepted values "yes"/"no") which allows hypervisor to spread memory into + multiple memory slots (allocate them dynamically based on the amount of + memory exposed to the guest), resulting in smaller memory footprint. + :since:`Since 10.0.0 and QEMU 8.2.0` +
Except for the docs here, which are insufficient as outlined in the other subthread, the code looks good, so for the code:
How about this:
For ``virtio-mem`` optional attribute ``dynamicMemslots`` can be specified (accepted values "yes"/"no") which allows hypervisor to spread memory into multiple memory slots (allocate them dynamically based on the amount of memory exposed to the guest), resulting in smaller memory footprint. But be aware this may affect vhost-user devices. When enabled, older vhost-user
I'd add: 'vhost-user devices (such as virtiofsd)'. Optionally you can add more, but virtiofsd is the one users can configure without being aware of it.a also perhaps 'vhost-user device implementations' for the next sentence.
devices may refuse to initialize resulting in failed domain startup or device hotplug (set this to "no" then). On the other hand, modern vhost-user
IMO the stuff in the parentheses can be dropped.
devices, or when no vhost-user devices are present (nor they will be hotplugged), this leads to smaller memory footprint (set this to "yes"
the memory footprint note is redundant from the first sentence. How about: When only modern vhost-user based devices will be used or when no vhost-user devices are expected to be used it's beneficial to enable this feature.
then). The current default is hypervisor dependant (for QEMU is "no"). If the default changes and you are having difficulties with vhost-user devices, try toggling this to "no". :since:`Since 10.1.0 and QEMU 8.2.0`
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
Thank you!
Michal

Starting from v8.2.0-rc0~74^2~2 QEMU has .dynamic-memslots attribute for virtio-mem-pci device. Introduce a capability which reflects that. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml | 1 + 3 files changed, 4 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index eb6b345906..f44e961e11 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -698,6 +698,7 @@ VIR_ENUM_IMPL(virQEMUCaps, /* 450 */ "run-with.async-teardown", /* QEMU_CAPS_RUN_WITH_ASYNC_TEARDOWN */ "virtio-blk-vhost-vdpa", /* QEMU_CAPS_DEVICE_VIRTIO_BLK_VHOST_VDPA */ + "virtio-mem-pci.dynamic-memslots", /* QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS */ ); @@ -1516,6 +1517,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVhostUserFS[] = static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioMemPCI[] = { { "prealloc", QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_PREALLOC, NULL }, + { "dynamic-memslots", QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS, NULL }, }; static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioIOMMU[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 3c4f7f625b..97022eab91 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -677,6 +677,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ /* 450 */ QEMU_CAPS_RUN_WITH_ASYNC_TEARDOWN, /* asynchronous teardown -run-with async-teardown=on|off */ QEMU_CAPS_DEVICE_VIRTIO_BLK_VHOST_VDPA, /* virtio-blk-vhost-vdpa block driver */ + QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS, /* -device virtio-mem-pci.dynamic-memslots= */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml b/tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml index ee52952702..f976a8d2df 100644 --- a/tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml @@ -199,6 +199,7 @@ <flag name='qcow2-discard-no-unref'/> <flag name='run-with.async-teardown'/> <flag name='virtio-blk-vhost-vdpa'/> + <flag name='virtio-mem-pci.dynamic-memslots'/> <version>8002000</version> <microcodeVersion>43100246</microcodeVersion> <package>v8.2.0</package> -- 2.41.0

On Fri, Jan 05, 2024 at 13:33:33 +0100, Michal Privoznik wrote:
Starting from v8.2.0-rc0~74^2~2 QEMU has .dynamic-memslots attribute for virtio-mem-pci device. Introduce a capability which reflects that.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + tests/qemucapabilitiesdata/caps_8.2.0_x86_64.xml | 1 + 3 files changed, 4 insertions(+)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

The QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS reflects whether QEMU is capable of .dynamic-memslots for virtio-mem. Use it when validating domain configuration. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index 9e50c2f45b..3b0dfec2b4 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -4971,6 +4971,13 @@ qemuValidateDomainDeviceDefMemory(virDomainMemoryDef *mem, _("virtio-mem isn't supported by this QEMU binary")); return -1; } + + if (mem->target.virtio_mem.dynamicMemslots == VIR_TRISTATE_BOOL_YES && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtio-mem does not support dynamicMemslots")); + return -1; + } break; case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC: -- 2.41.0

On Fri, Jan 05, 2024 at 13:33:34 +0100, Michal Privoznik wrote:
The QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI_DYNAMIC_MEMSLOTS reflects whether QEMU is capable of .dynamic-memslots for virtio-mem. Use it when validating domain configuration.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 7 +++++++ 1 file changed, 7 insertions(+)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

This is pretty straightforward. Resolves: https://issues.redhat.com/browse/RHEL-15316 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 +++ .../memory-hotplug-virtio-mem.x86_64-latest.args | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index bc285c0b6f..0ebd668a9c 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3608,6 +3608,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, unsigned long long requestedsize = 0; unsigned long long address = 0; bool prealloc = false; + bool dynamicMemslots = false; if (!mem->info.alias) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3649,6 +3650,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, blocksize = mem->target.virtio_mem.blocksize; requestedsize = mem->target.virtio_mem.requestedsize; address = mem->target.virtio_mem.address; + dynamicMemslots = mem->target.virtio_mem.dynamicMemslots; break; case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC: @@ -3671,6 +3673,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, "s:memdev", memdev, "B:prealloc", prealloc, "P:memaddr", address, + "B:dynamic-memslots", dynamicMemslots, "s:id", mem->info.alias, NULL) < 0) return NULL; diff --git a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args index 607ce9b0e8..489983a2cd 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args @@ -32,7 +32,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/.config \ -object '{"qom-type":"memory-backend-ram","id":"memvirtiomem0","reserve":false,"size":1073741824}' \ -device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":536870912,"memdev":"memvirtiomem0","id":"virtiomem0","bus":"pci.0","addr":"0x2"}' \ -object '{"qom-type":"memory-backend-file","id":"memvirtiomem1","mem-path":"/dev/hugepages2M/libvirt/qemu/-1-QEMUGuest1","reserve":false,"size":2147483648,"host-nodes":[1,2,3],"policy":"bind"}' \ --device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":1073741824,"memdev":"memvirtiomem1","prealloc":true,"memaddr":5637144576,"id":"virtiomem1","bus":"pci.1","addr":"0x1"}' \ +-device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":1073741824,"memdev":"memvirtiomem1","prealloc":true,"memaddr":5637144576,"dynamic-memslots":true,"id":"virtiomem1","bus":"pci.1","addr":"0x1"}' \ -blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}' \ -blockdev '{"node-name":"libvirt-1-format","read-only":false,"driver":"raw","file":"libvirt-1-storage"}' \ -device '{"driver":"ide-hd","bus":"ide.0","unit":0,"drive":"libvirt-1-format","id":"ide0-0-0","bootindex":1}' \ -- 2.41.0

On Fri, Jan 05, 2024 at 13:33:35 +0100, Michal Privoznik wrote:
This is pretty straightforward.
Resolves: https://issues.redhat.com/browse/RHEL-15316 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 +++ .../memory-hotplug-virtio-mem.x86_64-latest.args | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index bc285c0b6f..0ebd668a9c 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3608,6 +3608,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, unsigned long long requestedsize = 0; unsigned long long address = 0; bool prealloc = false; + bool dynamicMemslots = false;
if (!mem->info.alias) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3649,6 +3650,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, blocksize = mem->target.virtio_mem.blocksize; requestedsize = mem->target.virtio_mem.requestedsize; address = mem->target.virtio_mem.address; + dynamicMemslots = mem->target.virtio_mem.dynamicMemslots;
virTristateBool dynamicMemslots; typedef enum { VIR_TRISTATE_BOOL_ABSENT = 0, VIR_TRISTATE_BOOL_YES, VIR_TRISTATE_BOOL_NO, VIR_TRISTATE_BOOL_LAST } virTristateBool;
break;
case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC: @@ -3671,6 +3673,7 @@ qemuBuildMemoryDeviceProps(virQEMUDriverConfig *cfg, "s:memdev", memdev, "B:prealloc", prealloc, "P:memaddr", address, + "B:dynamic-memslots", dynamicMemslots,
Either VIR_TRISTATE_BOOL_YES or VIR_TRISTATE_BOOL_NO are non-zero, so the typecast to bool will make this set dynamic memslots to true in both cases if the parameter is present ... Since the test case doesn't contain an explicit 'no' ...
"s:id", mem->info.alias, NULL) < 0) return NULL; diff --git a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args index 607ce9b0e8..489983a2cd 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-virtio-mem.x86_64-latest.args @@ -32,7 +32,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/.config \ -object '{"qom-type":"memory-backend-ram","id":"memvirtiomem0","reserve":false,"size":1073741824}' \ -device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":536870912,"memdev":"memvirtiomem0","id":"virtiomem0","bus":"pci.0","addr":"0x2"}' \ -object '{"qom-type":"memory-backend-file","id":"memvirtiomem1","mem-path":"/dev/hugepages2M/libvirt/qemu/-1-QEMUGuest1","reserve":false,"size":2147483648,"host-nodes":[1,2,3],"policy":"bind"}' \ --device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":1073741824,"memdev":"memvirtiomem1","prealloc":true,"memaddr":5637144576,"id":"virtiomem1","bus":"pci.1","addr":"0x1"}' \ +-device '{"driver":"virtio-mem-pci","node":0,"block-size":2097152,"requested-size":1073741824,"memdev":"memvirtiomem1","prealloc":true,"memaddr":5637144576,"dynamic-memslots":true,"id":"virtiomem1","bus":"pci.1","addr":"0x1"}' \
... it's not visible here.
-blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}' \ -blockdev '{"node-name":"libvirt-1-format","read-only":false,"driver":"raw","file":"libvirt-1-storage"}' \ -device '{"driver":"ide-hd","bus":"ide.0","unit":0,"drive":"libvirt-1-format","id":"ide0-0-0","bootindex":1}' \
Use the proper tristate variable and "T:asdf" convertor when formatting the json and add also the explicit 'no' to the test case. With the above Reviewed-by: Peter Krempa <pkrempa@redhat.com>
participants (4)
-
David Hildenbrand
-
Michal Privoznik
-
Michal Prívozník
-
Peter Krempa