[PATCH 0/3] Allow reserving more memory for PCI controllers

This is a rebased version I've posted a while ago: https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/thread/DFQXP... Michal Prívozník (3): conf: Introduce @memReserve to <controller/> qemu_validate: Restrict setting @memReserve only to some controllers qemu_command: Generate mem-reserve for controllers docs/formatdomain.rst | 6 +++++ src/conf/domain_conf.c | 9 +++++++ src/conf/domain_conf.h | 3 +++ src/conf/schemas/domaincommon.rng | 5 ++++ src/qemu/qemu_command.c | 3 +++ src/qemu/qemu_validate.c | 25 +++++++++++++++++++ .../q35-usb2.x86_64-latest.args | 2 +- .../q35-usb2.x86_64-latest.xml | 2 +- tests/qemuxmlconfdata/q35-usb2.xml | 2 +- 9 files changed, 54 insertions(+), 3 deletions(-) -- 2.43.2

There are PCI devices with pretty large non-prefetchable memory, for instance: Memory at 9d800000 (64-bit, non-prefetchable) [size=8M] Memory at a6800000 (64-bit, non-prefetchable) [size=16K] For cold plugged devices this is not a problem, because firmware sets PCI controllers in a way that make devices behind them just work. Problem arises if such PCI device is to be hot plugged. Since the PCI device wasn't present at cold boot, firmware could not take it into calculations and the amount of reserved memory is not sufficient. Introduce a know that allows users overriding value computed by FW and thus allow hot plug of such PCI devices. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 9 +++++++++ src/conf/domain_conf.h | 3 +++ src/conf/schemas/domaincommon.rng | 5 +++++ tests/qemuxmlconfdata/q35-usb2.x86_64-latest.xml | 2 +- tests/qemuxmlconfdata/q35-usb2.xml | 2 +- 6 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index e2f66b982c..de7036e49c 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -4153,6 +4153,12 @@ generated by libvirt. :since:`Since 1.2.19 (QEMU only)`. ``index`` pci-root controllers for pSeries guests use this attribute to record the order they will show up in the guest. :since:`Since 3.6.0` +``memReserve`` + Some PCI devices have non-prefetchable memory bar larger than 2MiB. Use this + attribute to override value computed by firmware and thus make controller + reserve more memory (in KiB) so that such PCI device can be hot plugged. + For cold plugged PCI devices firmware recognizes this and computes correct + value. :since:`Since 10.3.0` For machine types which provide an implicit PCI bus, the pci-root controller with index=0 is auto-added and required to use PCI devices. pci-root has no diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 48c5d546da..a0912062ff 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -8528,6 +8528,11 @@ virDomainControllerDefParseXML(virDomainXMLOption *xmlopt, &def->opts.pciopts.targetIndex, def->opts.pciopts.targetIndex) < 0) return NULL; + + if (virXMLPropULongLong(targetNodes[0], "memReserve", 0, + VIR_XML_PROP_NONZERO, + &def->opts.pciopts.memReserve) < 0) + return NULL; } } @@ -23095,6 +23100,10 @@ virDomainControllerDefFormatPCI(virBuffer *buf, virBufferAsprintf(&targetAttrBuf, " hotplug='%s'", virTristateSwitchTypeToString(def->opts.pciopts.hotplug)); } + if (def->opts.pciopts.memReserve) { + virBufferAsprintf(&targetAttrBuf, " memReserve='%llu'", + def->opts.pciopts.memReserve); + } if (def->opts.pciopts.numaNode != -1) virBufferAsprintf(&targetChildBuf, "<node>%d</node>\n", def->opts.pciopts.numaNode); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5925faaf1a..356c25405b 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -738,6 +738,9 @@ struct _virDomainPCIControllerOpts { */ int numaNode; virTristateSwitch hotplug; /* 'off' to prevent hotplug/unplug, default 'on' */ + + unsigned long long memReserve; /* used by pci-bridge and pcie-root-port, + 0 == undef, KiB */ }; struct _virDomainUSBControllerOpts { diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index f386e46fae..d84e030255 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -2897,6 +2897,11 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <attribute name="memReserve"> + <ref name="unsignedLong"/> + </attribute> + </optional> </element> </optional> <!-- *-root controllers have an optional element "pcihole64"--> diff --git a/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.xml b/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.xml index b860ae2dee..2bbbbfe346 100644 --- a/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.xml +++ b/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.xml @@ -30,7 +30,7 @@ </controller> <controller type='pci' index='2' model='pci-bridge'> <model name='pci-bridge'/> - <target chassisNr='56'/> + <target chassisNr='56' memReserve='8196'/> <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> </controller> <controller type='usb' index='0' model='ich9-ehci1'> diff --git a/tests/qemuxmlconfdata/q35-usb2.xml b/tests/qemuxmlconfdata/q35-usb2.xml index 571782a17c..4f5a5580ac 100644 --- a/tests/qemuxmlconfdata/q35-usb2.xml +++ b/tests/qemuxmlconfdata/q35-usb2.xml @@ -25,7 +25,7 @@ </controller> <controller type='pci' index='2' model='pci-bridge'> <model name='pci-bridge'/> - <target chassisNr='56'/> + <target chassisNr='56' memReserve='8196'/> </controller> <controller type='usb' index='0' model='ich9-ehci1'/> <controller type='usb' index='0' model='ich9-uhci1'/> -- 2.43.2

On Mon, Apr 15, 2024 at 10:53:16AM +0200, Michal Privoznik wrote:
+++ b/docs/formatdomain.rst @@ -4153,6 +4153,12 @@ generated by libvirt. :since:`Since 1.2.19 (QEMU only)`. ``index`` pci-root controllers for pSeries guests use this attribute to record the order they will show up in the guest. :since:`Since 3.6.0` +``memReserve`` + Some PCI devices have non-prefetchable memory bar larger than 2MiB. Use this + attribute to override value computed by firmware and thus make controller + reserve more memory (in KiB) so that such PCI device can be hot plugged. + For cold plugged PCI devices firmware recognizes this and computes correct + value. :since:`Since 10.3.0`
Suggestion: "For cold plugged PCI devices, the firmware will automatically reserve the correct amount of memory."
+++ b/src/conf/domain_conf.c @@ -8528,6 +8528,11 @@ virDomainControllerDefParseXML(virDomainXMLOption *xmlopt, &def->opts.pciopts.targetIndex, def->opts.pciopts.targetIndex) < 0) return NULL; + + if (virXMLPropULongLong(targetNodes[0], "memReserve", 0, + VIR_XML_PROP_NONZERO, + &def->opts.pciopts.memReserve) < 0)
When I saw this, I immediately thought of https://bugzilla.redhat.com/show_bug.cgi?id=1408810 and was concerned that whatever you implemented here might rule out potentially implementing that in the future, or having to resort to some hacks. But since the parser will reject memReserve='0', we can still decide to relax things in the future and permit ioReserve='0' without creating nasty inconsistencies. Good. Reviewed-by: Andrea Bolognani <abologna@redhat.com> -- Andrea Bolognani / Red Hat / Virtualization

Only two controller models allow setting mem-reserve: pcie-root-port and pci-bridge. Reflect this fact during validation. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index de0867c4b1..fd7b90e47d 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -4091,6 +4091,31 @@ qemuValidateDomainDeviceDefControllerPCI(const virDomainControllerDef *cont, } } + /* memReserve */ + switch ((virDomainControllerModelPCI) cont->model) { + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_BRIDGE: + break; + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_SWITCH_DOWNSTREAM_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT: + case VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_TO_PCI_BRIDGE: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_SWITCH_UPSTREAM_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS: + if (pciopts->memReserve) { + virReportControllerInvalidOption(cont, model, modelName, "memReserve"); + return -1; + } + break; + + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_DEFAULT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST: + default: + virReportEnumRangeError(virDomainControllerModelPCI, cont->model); + } + /* QEMU device availability */ if (cap < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, -- 2.43.2

On Mon, Apr 15, 2024 at 10:53:17AM +0200, Michal Privoznik wrote:
Only two controller models allow setting mem-reserve: pcie-root-port and pci-bridge. Reflect this fact during validation.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
Reviewed-by: Andrea Bolognani <abologna@redhat.com> -- Andrea Bolognani / Red Hat / Virtualization

Pretty straightforward. Just put mem-reserve attribute whenever it's set. Previous commit ensures it's set only for valid controller models. Resolves: https://issues.redhat.com/browse/RHEL-7461 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 +++ tests/qemuxmlconfdata/q35-usb2.x86_64-latest.args | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9d4563861f..8d4442c360 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2776,6 +2776,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, if (virJSONValueObjectAdd(&props, "s:driver", modelName, "i:chassis_nr", pciopts->chassisNr, + "P:mem-reserve", pciopts->memReserve * 1024, "s:id", def->info.alias, NULL) < 0) return -1; @@ -2816,6 +2817,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, "i:chassis", pciopts->chassis, "s:id", def->info.alias, "T:hotplug", pciopts->hotplug, + "P:mem-reserve", pciopts->memReserve * 1024, NULL) < 0) return -1; @@ -2824,6 +2826,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, if (virJSONValueObjectAdd(&props, "s:driver", modelName, "i:index", pciopts->targetIndex, + "P:mem-reserve", pciopts->memReserve * 1024, "s:id", def->info.alias, NULL) < 0) return -1; diff --git a/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.args b/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.args index 717e97706e..86776e45f5 100644 --- a/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.args +++ b/tests/qemuxmlconfdata/q35-usb2.x86_64-latest.args @@ -27,7 +27,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ -no-shutdown \ -boot strict=on \ -device '{"driver":"i82801b11-bridge","id":"pci.1","bus":"pcie.0","addr":"0x1e"}' \ --device '{"driver":"pci-bridge","chassis_nr":56,"id":"pci.2","bus":"pci.1","addr":"0x0"}' \ +-device '{"driver":"pci-bridge","chassis_nr":56,"mem-reserve":8392704,"id":"pci.2","bus":"pci.1","addr":"0x0"}' \ -device '{"driver":"ich9-usb-ehci1","id":"usb","bus":"pcie.0","addr":"0x1d.0x7"}' \ -device '{"driver":"ich9-usb-uhci1","masterbus":"usb.0","firstport":0,"bus":"pcie.0","multifunction":true,"addr":"0x1d"}' \ -device '{"driver":"ich9-usb-uhci2","masterbus":"usb.0","firstport":2,"bus":"pcie.0","addr":"0x1d.0x1"}' \ -- 2.43.2

On Mon, Apr 15, 2024 at 10:53:18AM +0200, Michal Privoznik wrote:
+++ b/src/qemu/qemu_command.c @@ -2776,6 +2776,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, if (virJSONValueObjectAdd(&props, "s:driver", modelName, "i:chassis_nr", pciopts->chassisNr, + "P:mem-reserve", pciopts->memReserve * 1024,
I thought this couldn't possibly work correctly at first, but then I looked up what "P:" does :) Reviewed-by: Andrea Bolognani <abologna@redhat.com> -- Andrea Bolognani / Red Hat / Virtualization
participants (2)
-
Andrea Bolognani
-
Michal Privoznik