Today, to use a PCI hostdev "A" in a domain, all PCI devices
that belongs to the same IOMMU group must also be declared in
the domain XML, meaning that all IOMMU devices are detached
from the host and all of them are visible to the guest.
The result is that the guest will have access to all devices,
but this is not necessarily what the administrator wanted. If only
the hostdev "A" was intended for guest usage, but hostdevs "B" and
"C" happened to be in the same IOMMU group of "A", the guest will
gain access to all 3 devices. This makes the administrator rely
on alternative solutions, such as use all hostdevs with un-managed
mode and detached all the IOMMU before the guest starts. If
use un-managed mode is not an option, the only alternative left is
an ACS patch to deny guest access to "B" and "C".
Discussions made in [1] and [2] led to the approach implemented
here. This patch builds upon the already existing
VIR_DOMAIN_DEF_FORMAT_ADDRESS_NONE format to use it as a indication
of whether a hostdev will be owned by a domain, but not visible
to the guest OS. This allows the administrator to declare all the
IOMMU while also choosing which hostdevs will be usable by the
guest. This new mechanic applies to all PCI hostdevs, regardless
of whether they are a PCI multifunction hostdev or not. Using
<address type='none'/> in any case other than a PCI hostdev
will result in error.
The use of a new 'unassigned' flag in virDomainHostdevDef makes
it easier to distinguish between the case we're handling here
versus the case in which a PCI hostdev has no <address> element.
Both are interpreted as VIR_DOMAIN_DEF_FORMAT_ADDRESS_NONE in
the existing code, thus reducing the logic to a flag makes
it easier to handle this new use case. In the next patch we'll
use the 'unassigned' flag to filter the hostdevs being
initialized in the QEMU command line.
[1]
https://www.redhat.com/archives/libvir-list/2019-July/msg01175.html
[2]
https://www.redhat.com/archives/libvir-list/2019-October/msg00298.html
Signed-off-by: Daniel Henrique Barboza <danielhb413(a)gmail.com>
---
docs/schemas/domaincommon.rng | 5 ++
src/conf/domain_conf.c | 56 ++++++++++++++++--
src/conf/domain_conf.h | 3 +
src/qemu/qemu_domain_address.c | 6 ++
.../hostdev-pci-address-none.xml | 42 ++++++++++++++
.../hostdev-pci-address-none.xml | 58 +++++++++++++++++++
tests/qemuxml2xmltest.c | 1 +
7 files changed, 165 insertions(+), 6 deletions(-)
create mode 100644 tests/qemuxml2argvdata/hostdev-pci-address-none.xml
create mode 100644 tests/qemuxml2xmloutdata/hostdev-pci-address-none.xml
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index a83c9ae7a5..a9a3f2e2d1 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -5463,6 +5463,11 @@
</attribute>
<ref name="dimmaddress"/>
</group>
+ <group>
+ <attribute name="type">
+ <value>none</value>
+ </attribute>
+ </group>
</choice>
</element>
</define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 54d6ae297e..1bb24d2632 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7159,6 +7159,15 @@ virDomainDeviceInfoFormat(virBufferPtr buf,
virBufferAddLit(buf, "/>\n");
}
+ /* Format <address type='none'/> for un-assigned hostdevs */
+ if (flags & VIR_DOMAIN_DEF_FORMAT_ADDRESS_NONE) {
+ virBufferAsprintf(&attrBuf, " type='%s'",
+ virDomainDeviceAddressTypeToString(info->type));
+ virXMLFormatElement(buf, "address", &attrBuf, &childBuf);
+
+ return 0;
+ }
+
if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE ||
info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_S390)
/* We're done here */
@@ -8129,7 +8138,7 @@ virDomainHostdevDefParseXMLSubsys(xmlNodePtr node,
virDomainHostdevDefPtr def,
unsigned int flags)
{
- xmlNodePtr sourcenode;
+ xmlNodePtr sourcenode, addressnode;
int backend;
virDomainHostdevSubsysPCIPtr pcisrc = &def->source.subsys.u.pci;
virDomainHostdevSubsysSCSIPtr scsisrc = &def->source.subsys.u.scsi;
@@ -8280,6 +8289,28 @@ virDomainHostdevDefParseXMLSubsys(xmlNodePtr node,
if (virDomainHostdevSubsysPCIDefParseXML(sourcenode, def, flags) < 0)
return -1;
+ /* @unassigned has meaning only for hostdev PCI devices.
+ * <address type='none/> has a special meaning in this case,
+ * telling the guest that this hostdev shouldn't be assigned
+ * by it.
+ *
+ * Note that both <address type='none'/> and no <address>
element
+ * declared implies in VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE
+ * through the code. This is why we can't simply check for
+ * VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE to set the @unassigned
+ * flag.
+ */
+ def->unassigned = false;
+
+ if ((addressnode = virXPathNode("./address", ctxt))) {
+ g_autofree char *address_type = virXMLPropString(addressnode,
+ "type");
+ int typeFromString = virDomainDeviceAddressTypeFromString(address_type);
+
+ if (typeFromString == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE)
+ def->unassigned = true;
+ }
+
backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT;
if ((backendStr = virXPathString("string(./driver/@name)", ctxt))
&&
(((backend = virDomainHostdevSubsysPCIBackendTypeFromString(backendStr)) <
0) ||
@@ -15579,7 +15610,17 @@ virDomainHostdevDefParseXML(virDomainXMLOptionPtr xmlopt,
}
if (def->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) {
- if (virDomainDeviceInfoParseXML(xmlopt, node, def->info,
+ /* skip address parsing if we already know it is an un-assigned
+ * pci hostdev */
+ bool skipParse = false;
+
+ if (def->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
+ def->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
+ def->unassigned)
+ skipParse = true;
+
+ if (!skipParse &&
+ virDomainDeviceInfoParseXML(xmlopt, node, def->info,
flags | VIR_DOMAIN_DEF_PARSE_ALLOW_BOOT
| VIR_DOMAIN_DEF_PARSE_ALLOW_ROM) < 0)
goto error;
@@ -27082,6 +27123,7 @@ virDomainHostdevDefFormat(virBufferPtr buf,
virDomainHostdevSubsysMediatedDevPtr mdevsrc = &def->source.subsys.u.mdev;
virDomainHostdevSubsysSCSIVHostPtr scsihostsrc =
&def->source.subsys.u.scsi_host;
const char *type;
+ unsigned int formatFlags;
if (!mode) {
virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -27165,11 +27207,13 @@ virDomainHostdevDefFormat(virBufferPtr buf,
if (def->shareable)
virBufferAddLit(buf, "<shareable/>\n");
- if (virDomainDeviceInfoFormat(buf, def->info,
- flags | VIR_DOMAIN_DEF_FORMAT_ALLOW_BOOT
- | VIR_DOMAIN_DEF_FORMAT_ALLOW_ROM) < 0) {
+ formatFlags = flags | VIR_DOMAIN_DEF_FORMAT_ALLOW_BOOT
+ | VIR_DOMAIN_DEF_FORMAT_ALLOW_ROM;
+ if (def->unassigned)
+ formatFlags |= VIR_DOMAIN_DEF_FORMAT_ADDRESS_NONE;
+
+ if (virDomainDeviceInfoFormat(buf, def->info, formatFlags) < 0)
return -1;
- }
virBufferAdjustIndent(buf, -2);
virBufferAddLit(buf, "</hostdev>\n");
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 39fb42e29d..51851e3638 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -344,6 +344,7 @@ struct _virDomainHostdevDef {
bool missing;
bool readonly;
bool shareable;
+ bool unassigned;
union {
virDomainHostdevSubsys subsys;
virDomainHostdevCaps caps;
@@ -3021,6 +3022,8 @@ typedef enum {
VIR_DOMAIN_DEF_FORMAT_ALLOW_ROM = 1 << 6,
VIR_DOMAIN_DEF_FORMAT_ALLOW_BOOT = 1 << 7,
VIR_DOMAIN_DEF_FORMAT_CLOCK_ADJUST = 1 << 8,
+ /* format address type='none' for un-assigned PCI hostdevs */
+ VIR_DOMAIN_DEF_FORMAT_ADDRESS_NONE = 1 << 9,
} virDomainDefFormatFlags;
/* Use these flags to skip specific domain ABI consistency checks done
diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c
index 605984f80f..301a08197b 100644
--- a/src/qemu/qemu_domain_address.c
+++ b/src/qemu/qemu_domain_address.c
@@ -2313,6 +2313,12 @@ qemuDomainAssignDevicePCISlots(virDomainDefPtr def,
continue;
}
+ /* Do not assign an address to a hostdev that will not
+ * be assigned to the guest.
+ */
+ if (def->hostdevs[i]->unassigned)
+ continue;
+
if (qemuDomainPCIAddressReserveNextAddr(addrs,
def->hostdevs[i]->info) < 0)
goto error;
diff --git a/tests/qemuxml2argvdata/hostdev-pci-address-none.xml
b/tests/qemuxml2argvdata/hostdev-pci-address-none.xml
new file mode 100644
index 0000000000..63c9d4eef6
--- /dev/null
+++ b/tests/qemuxml2argvdata/hostdev-pci-address-none.xml
@@ -0,0 +1,42 @@
+<domain type='kvm'>
+ <name>delete</name>
+ <uuid>583a8e8e-f0ce-4f53-89ab-092862148b25</uuid>
+ <memory unit='KiB'>262144</memory>
+ <vcpu placement='static'>4</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc'>hvm</type>
+ </os>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x0'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x1'/>
+ </source>
+ <address type='none'/>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x2'/>
+ </source>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x3'/>
+ </source>
+ </hostdev>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2xmloutdata/hostdev-pci-address-none.xml
b/tests/qemuxml2xmloutdata/hostdev-pci-address-none.xml
new file mode 100644
index 0000000000..08aa5323e4
--- /dev/null
+++ b/tests/qemuxml2xmloutdata/hostdev-pci-address-none.xml
@@ -0,0 +1,58 @@
+<domain type='kvm'>
+ <name>delete</name>
+ <uuid>583a8e8e-f0ce-4f53-89ab-092862148b25</uuid>
+ <memory unit='KiB'>262144</memory>
+ <currentMemory unit='KiB'>262144</currentMemory>
+ <vcpu placement='static'>4</vcpu>
+ <os>
+ <type arch='x86_64' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <controller type='usb' index='0'>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x01' function='0x2'/>
+ </controller>
+ <controller type='ide' index='0'>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x01' function='0x1'/>
+ </controller>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x0'/>
+ </source>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x03' function='0x0'/>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x1'/>
+ </source>
+ <address type='none'/>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x2'/>
+ </source>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x04' function='0x0'/>
+ </hostdev>
+ <hostdev mode='subsystem' type='pci' managed='yes'>
+ <driver name='vfio'/>
+ <source>
+ <address domain='0x0005' bus='0x90' slot='0x01'
function='0x3'/>
+ </source>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x05' function='0x0'/>
+ </hostdev>
+ <memballoon model='virtio'>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x06' function='0x0'/>
+ </memballoon>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index ed53ddc8c6..039d5ac1ad 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -423,6 +423,7 @@ mymain(void)
DO_TEST("hostdev-usb-address", NONE);
DO_TEST("hostdev-pci-address", NONE);
+ DO_TEST("hostdev-pci-address-none", NONE);
DO_TEST("hostdev-pci-multifunction", NONE);
DO_TEST("hostdev-vfio", NONE);
DO_TEST("hostdev-vfio-zpci",
--
2.21.0