The recently added 'unassigned' address type for PCI hostdevs revealed
an uncommon behavior in a Power 8 host with a Broadcom BCM5719 PCIe
Multifunction card. With this setup, it is possible to passthrough the
BCM5719 functions to the domain, but unassigning the function zero. This
means that the guest is able to use non-zero functions of the device
without having access to the function zero. This is the output of
'lspci' in this scenario where only function 2 is being assigned:
$ lspci
0000:00:01.0 Unclassified device [00ff]: Red Hat, Inc Virtio memory balloon
0000:00:03.0 USB controller: Red Hat, Inc. QEMU XHCI Host Controller (rev 01)
0000:00:04.0 SCSI storage controller: Red Hat, Inc Virtio block device
0001:00:01.2 Ethernet controller: Broadcom Limited NetXtreme BCM5719 Gigabit Ethernet PCIe
(rev 01)
$
More details can be read at [1]. In short, no other architecture is allowing
this kind of behavior as PowerPC is displaying here. For x86, an extra
'pci=pcie_scan_all' kernel option would be needed for the guest to find this
device, while in PowerPC this wasn't needed. There's the question of
compliance with the PCI 3.0 spec that Alex Williamson brought up in [2] that
would be hurt if we allow this type of use. Allowing the zero function to
be unassigned will also conflict with the logic use for hotplug/unplug of
multifunction devices, which is a work in progress in Libvirt, since in both
hotplug and unplug we rely on function zero to trigger the operation in QEMU.
With all these downsides, summed up with the fact that the only case we have
proof where this might be a thing is a Power 8 host comboed with the BCM5719
card, let's forbid this kind of use in Libvirt altogether.
[1]
https://www.redhat.com/archives/libvir-list/2019-November/msg01099.html
[2]
https://www.redhat.com/archives/libvir-list/2019-November/msg01104.html
Signed-off-by: Daniel Henrique Barboza <danielhb413(a)gmail.com>
---
src/conf/domain_conf.c | 13 ++++++
...pci-multifunction-zero-unassigned-fail.xml | 42 +++++++++++++++++++
tests/qemuxml2argvtest.c | 4 ++
tests/virpcimock.c | 9 ++--
4 files changed, 65 insertions(+), 3 deletions(-)
create mode 100644
tests/qemuxml2argvdata/hostdev-pci-multifunction-zero-unassigned-fail.xml
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 52b6c40c44..270c70eafb 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -15644,7 +15644,20 @@ virDomainHostdevDefParseXML(virDomainXMLOptionPtr xmlopt,
| VIR_DOMAIN_DEF_PARSE_ALLOW_ROM) < 0)
goto error;
}
+
if (def->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {
+ /* Do not allow function 0 of a PCI multifunction device to
+ * be unassigned */
+ if (virHostdevIsPCIMultifunctionDevice(def)) {
+ if (def->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED
&&
+ def->source.subsys.u.pci.addr.function == 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Function zero of PCI Multifunction device "
+ "must always be assigned"));
+ goto error;
+ }
+ }
+
switch ((virDomainHostdevSubsysType) def->source.subsys.type) {
case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI:
if (virXPathBoolean("boolean(./readonly)", ctxt))
diff --git a/tests/qemuxml2argvdata/hostdev-pci-multifunction-zero-unassigned-fail.xml
b/tests/qemuxml2argvdata/hostdev-pci-multifunction-zero-unassigned-fail.xml
new file mode 100644
index 0000000000..1abce3b2e7
--- /dev/null
+++ b/tests/qemuxml2argvdata/hostdev-pci-multifunction-zero-unassigned-fail.xml
@@ -0,0 +1,42 @@
+<domain type='kvm'>
+ <name>delete</name>
+ <uuid>583a8e8e-f0ce-4f53-89ab-092862148b25</uuid>
+ <memory unit='KiB'>262144</memory>
+ <vcpu placement='static'>4</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc'>hvm</type>
+ </os>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x0'/>
+ </source>
+ <address type='unassigned'/>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x1'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x2'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x3'/>
+ </source>
+ </hostdev>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 5ee9c9b2dc..163664d913 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1330,6 +1330,10 @@ mymain(void)
QEMU_CAPS_KVM,
QEMU_CAPS_DEVICE_VFIO_PCI);
+ DO_TEST_PARSE_ERROR("hostdev-pci-multifunction-zero-unassigned-fail",
+ QEMU_CAPS_KVM,
+ QEMU_CAPS_DEVICE_VFIO_PCI);
+
DO_TEST("serial-file-log",
QEMU_CAPS_CHARDEV_FILE_APPEND,
QEMU_CAPS_DEVICE_ISA_SERIAL,
diff --git a/tests/virpcimock.c b/tests/virpcimock.c
index cd6ae1cff6..7be133d7a7 100644
--- a/tests/virpcimock.c
+++ b/tests/virpcimock.c
@@ -1000,9 +1000,12 @@ init_env(void)
MAKE_PCI_DEVICE("0001:01:00.1", 0x8086, 0x105e, 5);
MAKE_PCI_DEVICE("0005:80:00.0", 0x10b5, 0x8112, 6, .klass = 0x060400);
MAKE_PCI_DEVICE("0005:90:01.0", 0x1033, 0x0035, 7);
- MAKE_PCI_DEVICE("0005:90:01.1", 0x1033, 0x0035, 7);
- MAKE_PCI_DEVICE("0005:90:01.2", 0x1033, 0x00e0, 7);
- MAKE_PCI_DEVICE("0005:90:01.3", 0x1033, 0x00e0, 7);
+ MAKE_PCI_DEVICE("0005:90:01.1", 0x1033, 0x0035, 7,
+ .physfn = "0005:90:01.0"); /* Virtual Function */
+ MAKE_PCI_DEVICE("0005:90:01.2", 0x1033, 0x00e0, 7,
+ .physfn = "0005:90:01.0"); /* Virtual Function */
+ MAKE_PCI_DEVICE("0005:90:01.3", 0x1033, 0x00e0, 7,
+ .physfn = "0005:90:01.0"); /* Virtual Function */
MAKE_PCI_DEVICE("0000:0a:01.0", 0x8086, 0x0047, 8);
MAKE_PCI_DEVICE("0000:0a:02.0", 0x8286, 0x0048, 8);
MAKE_PCI_DEVICE("0000:0a:03.0", 0x8386, 0x0048, 8);
--
2.23.0