[PATCH 0/3] Allow reserving more memory for PCI controllers

*** BLURB HERE *** Michal Prívozník (3): conf: Introduce @memReserve to <controller/> qemu_validate: Restrict setting @memReserve only to some controllers qemu_command: Generate mem-reserve for controllers docs/formatdomain.rst | 6 +++++ src/conf/domain_conf.c | 9 +++++++ src/conf/domain_conf.h | 3 +++ src/conf/schemas/domaincommon.rng | 5 ++++ src/qemu/qemu_command.c | 3 +++ src/qemu/qemu_validate.c | 25 +++++++++++++++++++ .../q35-usb2.x86_64-latest.args | 2 +- tests/qemuxml2argvdata/q35-usb2.xml | 2 +- .../q35-usb2.x86_64-latest.xml | 2 +- 9 files changed, 54 insertions(+), 3 deletions(-) -- 2.41.0

There are PCI devices with pretty large non-prefetchable memory, for instance: Memory at 9d800000 (64-bit, non-prefetchable) [size=8M] Memory at a6800000 (64-bit, non-prefetchable) [size=16K] For cold plugged devices this is not a problem, because firmware sets PCI controllers in a way that make devices behind them just work. Problem arises if such PCI device is to be hot plugged. Since the PCI device wasn't present at cold boot, firmware could not take it into calculations and the amount of reserved memory is not sufficient. Introduce a know that allows users overriding value computed by FW and thus allow hot plug of such PCI devices. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 9 +++++++++ src/conf/domain_conf.h | 3 +++ src/conf/schemas/domaincommon.rng | 5 +++++ tests/qemuxml2argvdata/q35-usb2.xml | 2 +- tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml | 2 +- 6 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 310d2bc427..0a19f97618 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -4103,6 +4103,12 @@ generated by libvirt. :since:`Since 1.2.19 (QEMU only).` ``index`` pci-root controllers for pSeries guests use this attribute to record the order they will show up in the guest. :since:`Since 3.6.0` +``memReserve`` + Some PCI devices have non-prefetchable memory bar larger than 2MiB. Use this + attribute to override value computed by firmware and thus make controller + reserve more memory (in KiB) so that such PCI device can be hot plugged. + For cold plugged PCI devices firmware recognizes this and computes correct + value. :since:`Since 10.0.0` For machine types which provide an implicit PCI bus, the pci-root controller with index=0 is auto-added and required to use PCI devices. pci-root has no diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 22ad43e1d7..3b0ce6bdc4 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -8456,6 +8456,11 @@ virDomainControllerDefParseXML(virDomainXMLOption *xmlopt, &def->opts.pciopts.targetIndex, def->opts.pciopts.targetIndex) < 0) return NULL; + + if (virXMLPropULongLong(targetNodes[0], "memReserve", 0, + VIR_XML_PROP_NONZERO, + &def->opts.pciopts.memReserve) < 0) + return NULL; } } @@ -23001,6 +23006,10 @@ virDomainControllerDefFormatPCI(virBuffer *buf, virBufferAsprintf(&targetAttrBuf, " hotplug='%s'", virTristateSwitchTypeToString(def->opts.pciopts.hotplug)); } + if (def->opts.pciopts.memReserve) { + virBufferAsprintf(&targetAttrBuf, " memReserve='%llu'", + def->opts.pciopts.memReserve); + } if (def->opts.pciopts.numaNode != -1) virBufferAsprintf(&targetChildBuf, "<node>%d</node>\n", def->opts.pciopts.numaNode); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index ed07859bc5..32d998243f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -734,6 +734,9 @@ struct _virDomainPCIControllerOpts { */ int numaNode; virTristateSwitch hotplug; /* 'off' to prevent hotplug/unplug, default 'on' */ + + unsigned long long memReserve; /* used by pci-bridge and pcie-root-port, + 0 == undef, KiB */ }; struct _virDomainUSBControllerOpts { diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index b98a2ae602..d26e4c41f2 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -2874,6 +2874,11 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <attribute name="memReserve"> + <ref name="unsignedLong"/> + </attribute> + </optional> </element> </optional> <!-- *-root controllers have an optional element "pcihole64"--> diff --git a/tests/qemuxml2argvdata/q35-usb2.xml b/tests/qemuxml2argvdata/q35-usb2.xml index 571782a17c..4f5a5580ac 100644 --- a/tests/qemuxml2argvdata/q35-usb2.xml +++ b/tests/qemuxml2argvdata/q35-usb2.xml @@ -25,7 +25,7 @@ </controller> <controller type='pci' index='2' model='pci-bridge'> <model name='pci-bridge'/> - <target chassisNr='56'/> + <target chassisNr='56' memReserve='8196'/> </controller> <controller type='usb' index='0' model='ich9-ehci1'/> <controller type='usb' index='0' model='ich9-uhci1'/> diff --git a/tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml b/tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml index b860ae2dee..2bbbbfe346 100644 --- a/tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml +++ b/tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml @@ -30,7 +30,7 @@ </controller> <controller type='pci' index='2' model='pci-bridge'> <model name='pci-bridge'/> - <target chassisNr='56'/> + <target chassisNr='56' memReserve='8196'/> <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> </controller> <controller type='usb' index='0' model='ich9-ehci1'> -- 2.41.0

On a Friday in 2023, Michal Privoznik wrote:
There are PCI devices with pretty large non-prefetchable memory, for instance:
Memory at 9d800000 (64-bit, non-prefetchable) [size=8M] Memory at a6800000 (64-bit, non-prefetchable) [size=16K]
For cold plugged devices this is not a problem, because firmware sets PCI controllers in a way that make devices behind them just work. Problem arises if such PCI device is to be hot plugged. Since the PCI device wasn't present at cold boot, firmware could not take it into calculations and the amount of reserved memory is not sufficient.
Introduce a know that allows users overriding value computed by FW and thus allow hot plug of such PCI devices.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.rst | 6 ++++++ src/conf/domain_conf.c | 9 +++++++++ src/conf/domain_conf.h | 3 +++ src/conf/schemas/domaincommon.rng | 5 +++++ tests/qemuxml2argvdata/q35-usb2.xml | 2 +- tests/qemuxml2xmloutdata/q35-usb2.x86_64-latest.xml | 2 +- 6 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 310d2bc427..0a19f97618 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -4103,6 +4103,12 @@ generated by libvirt. :since:`Since 1.2.19 (QEMU only).` ``index`` pci-root controllers for pSeries guests use this attribute to record the order they will show up in the guest. :since:`Since 3.6.0` +``memReserve`` + Some PCI devices have non-prefetchable memory bar larger than 2MiB. Use this
As far as I understand, 2 MiB is the default in some firmwares and configurations, but this should not be universally needed for devices with larger memory requirements while using newer edk2: https://bugzilla.redhat.com/show_bug.cgi?id=2055123#c32
+ attribute to override value computed by firmware and thus make controller + reserve more memory (in KiB) so that such PCI device can be hot plugged. + For cold plugged PCI devices firmware recognizes this and computes correct + value. :since:`Since 10.0.0`
So is the value is for all the devices plugged (hot or cold) into the controller? also, "Since 10.1.0" now.
diff --git a/tests/qemuxml2argvdata/q35-usb2.xml b/tests/qemuxml2argvdata/q35-usb2.xml index 571782a17c..4f5a5580ac 100644 --- a/tests/qemuxml2argvdata/q35-usb2.xml +++ b/tests/qemuxml2argvdata/q35-usb2.xml @@ -25,7 +25,7 @@ </controller> <controller type='pci' index='2' model='pci-bridge'> <model name='pci-bridge'/> - <target chassisNr='56'/> + <target chassisNr='56' memReserve='8196'/> </controller> <controller type='usb' index='0' model='ich9-ehci1'/> <controller type='usb' index='0' model='ich9-uhci1'/>
Please rebase on top of Peter's test changes. Jano

Only two controller models allow setting mem-reserve: pcie-root-port and pci-bridge. Reflect this fact during validation. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_validate.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index e475ad035e..38aaf72422 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -3987,6 +3987,31 @@ qemuValidateDomainDeviceDefControllerPCI(const virDomainControllerDef *cont, } } + /* memReserve */ + switch ((virDomainControllerModelPCI) cont->model) { + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_BRIDGE: + break; + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_SWITCH_DOWNSTREAM_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT: + case VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_TO_PCI_BRIDGE: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_SWITCH_UPSTREAM_PORT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS: + case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS: + if (pciopts->memReserve) { + virReportControllerInvalidOption(cont, model, modelName, "memReserve"); + return -1; + } + break; + + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_DEFAULT: + case VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST: + default: + virReportEnumRangeError(virDomainControllerModelPCI, cont->model); + } + /* QEMU device availability */ if (cap < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, -- 2.41.0

Pretty straightforward. Just put mem-reserve attribute whenever it's set. Previous commit ensures it's set only for valid controller models. Resolves: https://issues.redhat.com/browse/RHEL-7461 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 +++ tests/qemuxml2argvdata/q35-usb2.x86_64-latest.args | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 23909dbbab..7d611cfa87 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2679,6 +2679,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, if (virJSONValueObjectAdd(&props, "s:driver", modelName, "i:chassis_nr", pciopts->chassisNr, + "P:mem-reserve", pciopts->memReserve * 1024, "s:id", def->info.alias, NULL) < 0) return -1; @@ -2719,6 +2720,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, "i:chassis", pciopts->chassis, "s:id", def->info.alias, "T:hotplug", pciopts->hotplug, + "P:mem-reserve", pciopts->memReserve * 1024, NULL) < 0) return -1; @@ -2727,6 +2729,7 @@ qemuBuildControllerPCIDevProps(virDomainControllerDef *def, if (virJSONValueObjectAdd(&props, "s:driver", modelName, "i:index", pciopts->targetIndex, + "P:mem-reserve", pciopts->memReserve * 1024, "s:id", def->info.alias, NULL) < 0) return -1; diff --git a/tests/qemuxml2argvdata/q35-usb2.x86_64-latest.args b/tests/qemuxml2argvdata/q35-usb2.x86_64-latest.args index 83eb7d7b4e..debf77b456 100644 --- a/tests/qemuxml2argvdata/q35-usb2.x86_64-latest.args +++ b/tests/qemuxml2argvdata/q35-usb2.x86_64-latest.args @@ -27,7 +27,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ -no-shutdown \ -boot strict=on \ -device '{"driver":"i82801b11-bridge","id":"pci.1","bus":"pcie.0","addr":"0x1e"}' \ --device '{"driver":"pci-bridge","chassis_nr":56,"id":"pci.2","bus":"pci.1","addr":"0x0"}' \ +-device '{"driver":"pci-bridge","chassis_nr":56,"mem-reserve":8392704,"id":"pci.2","bus":"pci.1","addr":"0x0"}' \ -device '{"driver":"ich9-usb-ehci1","id":"usb","bus":"pcie.0","addr":"0x1d.0x7"}' \ -device '{"driver":"ich9-usb-uhci1","masterbus":"usb.0","firstport":0,"bus":"pcie.0","multifunction":true,"addr":"0x1d"}' \ -device '{"driver":"ich9-usb-uhci2","masterbus":"usb.0","firstport":2,"bus":"pcie.0","addr":"0x1d.0x1"}' \ -- 2.41.0
participants (3)
-
Ján Tomko
-
Michal Privoznik
-
Michal Prívozník