virHostdevPreparePCIDevices verifies if a PCI hostdev "A" is
in use by another domain by checking the 'activePCIHostdevs'
list. It also verifies if other domains are using any other
hostdev that belongs to the same IOMMU of "A".
This is not enough to cover all the cases. Suppose a PCI hostdev
"B" that shares the IOMMU with "A". We are currently able to abort
guest launch if "B" is being used by another domain, but if "B" is
not being used by any domain, we'll proceed guest execution. What
happens then is: if "A" is being used in managed mode, the guest
will fail to launch because Libvirt didn't detach "B" from the
host. If "A" is being used in un-managed mode, the guest might fail
or succeed to launch depending on whether both "A" and "B" were
detached manually prior to guest start.
Libvirt is now able to deal with these scenarios in a homogeneous
fashion, providing the same behavior for both managed and
un-managed mode while allowing parcial assignment for both cases as
well. This patch changes virHostdevPreparePCIDevices to check all
the PCI hostdevs of the domain against all the PCI hostdevs of the
IOMMU, failing to launch if the domain does not contain all the
PCI hostdevs declared, in both managed and un-managed cases.
After this patch, any existing domains that were using PCI hostdevs
with un-managed mode, and were getting away with partial assignment
without declaring all the IOMMU PCI hostdevs in the domain XML, will
be forced to do so. The missing PCI hostdevs can be declared with
"<address type='none'/>" to keep them invisible to the guest,
making
the guest oblivious of this change. This is an annoyance for such
domains, but the payoff is more consistency of behavior between
managed and un-managed mode and more clarity on the domain XML,
forcing the admin to declare all PCI hostdevs of the IOMMU and
knowing exactly what will be detached from the host.
Signed-off-by: Daniel Henrique Barboza <danielhb413(a)gmail.com>
---
src/util/virhostdev.c | 64 +++++++++++++++++--
...ostdev-pci-multifunction-partial-fail.args | 31 +++++++++
...hostdev-pci-multifunction-partial-fail.xml | 35 ++++++++++
tests/qemuxml2argvtest.c | 4 ++
4 files changed, 127 insertions(+), 7 deletions(-)
create mode 100644 tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.args
create mode 100644 tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.xml
diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c
index 17eb290825..eff3ecb839 100644
--- a/src/util/virhostdev.c
+++ b/src/util/virhostdev.c
@@ -56,6 +56,13 @@ struct virHostdevIsPCINodeDeviceUsedData {
bool usesVFIO;
};
+struct virHostdevIsAllIOMMUGroupUsedData {
+ virPCIDeviceListPtr pcidevs;
+ const char *domainName;
+ const char *deviceName;
+ int iommuGroup;
+};
+
/* This module makes heavy use of bookkeeping lists contained inside a
* virHostdevManager instance to keep track of the devices' status. To make
* it easy to spot potential ownership errors when moving devices from one
@@ -111,6 +118,27 @@ static int virHostdevIsPCINodeDeviceUsed(virPCIDeviceAddressPtr
devAddr, void *o
return 0;
}
+static int virHostdevIsAllIOMMUGroupUsed(virPCIDeviceAddressPtr devAddr, void *opaque)
+{
+ struct virHostdevIsAllIOMMUGroupUsedData *helperData = opaque;
+ virPCIDevicePtr actual;
+
+ actual = virPCIDeviceListFindByIDs(helperData->pcidevs,
+ devAddr->domain, devAddr->bus,
+ devAddr->slot, devAddr->function);
+ if (actual) {
+ return 0;
+ } else {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("All devices of the same IOMMU group %d of "
+ "the PCI device %s must belong to domain %s"),
+ helperData->iommuGroup,
+ helperData->deviceName,
+ helperData->domainName);
+ return -1;
+ }
+}
+
static int virHostdevManagerOnceInit(void)
{
if (!VIR_CLASS_NEW(virHostdevManager, virClassForObject()))
@@ -724,9 +752,10 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr,
unsigned int flags)
{
g_autoptr(virPCIDeviceList) pcidevs = NULL;
+ g_autofree unsigned int *searchedIOMMUs = NULL;
int last_processed_hostdev_vf = -1;
- size_t i;
- int ret = -1;
+ size_t i, j;
+ int ret = -1, nSearchedIOMMUs = 0;
virPCIDeviceAddressPtr devAddr = NULL;
if (!nhostdevs)
@@ -735,6 +764,8 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr,
if (!(pcidevs = virHostdevGetPCIHostDeviceList(hostdevs, nhostdevs)))
return -1;
+ searchedIOMMUs = g_new0(unsigned int, virPCIDeviceListCount(pcidevs));
+
virObjectLock(mgr->activePCIHostdevs);
virObjectLock(mgr->inactivePCIHostdevs);
@@ -778,13 +809,32 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr,
goto cleanup;
/* VFIO devices belonging to same IOMMU group can't be
- * shared across guests. Check if that's the case. */
+ * shared across guests, and all of the must belong to the
+ * same domain. If a device isn't going to be assigned
+ * (flag unassigned is true) the guest will not use it, but
+ * the actual device must be detached together from the host
+ * anyway. */
if (usesVFIO) {
- data.usesVFIO = true;
- if (virPCIDeviceAddressIOMMUGroupIterate(devAddr,
- virHostdevIsPCINodeDeviceUsed,
- &data) < 0)
+ int devIOMMUGroup = virPCIDeviceAddressGetIOMMUGroupNum(devAddr);
+ struct virHostdevIsAllIOMMUGroupUsedData helper = {
+ pcidevs, dom_name, virPCIDeviceGetName(pci), devIOMMUGroup};
+ bool alreadySearched = false;
+
+ for (j = 0; j < nSearchedIOMMUs; j++) {
+ if (devIOMMUGroup == searchedIOMMUs[j]) {
+ alreadySearched = true;
+ break;
+ }
+ }
+
+ if (alreadySearched)
+ continue;
+
+ if (virPCIDeviceAddressIOMMUGroupIterate(
+ devAddr, virHostdevIsAllIOMMUGroupUsed, &helper) < 0)
goto cleanup;
+
+ searchedIOMMUs[nSearchedIOMMUs++] = devIOMMUGroup;
}
}
diff --git a/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.args
b/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.args
new file mode 100644
index 0000000000..4074968c84
--- /dev/null
+++ b/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.args
@@ -0,0 +1,31 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/tmp/lib/domain--1-delete \
+USER=test \
+LOGNAME=test \
+XDG_DATA_HOME=/tmp/lib/domain--1-delete/.local/share \
+XDG_CACHE_HOME=/tmp/lib/domain--1-delete/.cache \
+XDG_CONFIG_HOME=/tmp/lib/domain--1-delete/.config \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name delete \
+-S \
+-machine pc,accel=kvm,usb=off,dump-guest-core=off \
+-m 256 \
+-realtime mlock=off \
+-smp 4,sockets=4,cores=1,threads=1 \
+-uuid 583a8e8e-f0ce-4f53-89ab-092862148b25 \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-delete/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-no-acpi \
+-usb \
+-device vfio-pci,host=0005:90:01.0,id=hostdev0,bus=pci.0,addr=0x3 \
+-device vfio-pci,host=0005:90:01.1,id=hostdev1,bus=pci.0,addr=0x4 \
+-device vfio-pci,host=0005:90:01.3,id=hostdev2,bus=pci.0,addr=0x5 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6
diff --git a/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.xml
b/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.xml
new file mode 100644
index 0000000000..fc3e3bb520
--- /dev/null
+++ b/tests/qemuxml2argvdata/hostdev-pci-multifunction-partial-fail.xml
@@ -0,0 +1,35 @@
+<domain type='kvm'>
+ <name>delete</name>
+ <uuid>583a8e8e-f0ce-4f53-89ab-092862148b25</uuid>
+ <memory unit='KiB'>262144</memory>
+ <vcpu placement='static'>4</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc'>hvm</type>
+ </os>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x0'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x1'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x3'/>
+ </source>
+ </hostdev>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index ea9c440f5f..46a0ac15f0 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1330,6 +1330,10 @@ mymain(void)
QEMU_CAPS_KVM,
QEMU_CAPS_DEVICE_VFIO_PCI);
+ DO_TEST("hostdev-pci-multifunction-partial-fail",
+ QEMU_CAPS_KVM,
+ QEMU_CAPS_DEVICE_VFIO_PCI);
+
DO_TEST("serial-file-log",
QEMU_CAPS_CHARDEV_FILE_APPEND,
QEMU_CAPS_DEVICE_ISA_SERIAL,
--
2.21.0