[PATCH v4 0/7] qemu: Implement support for iommufd
Hi, This is a follow up to the third patch series [0] for using iommufd to propagate DMA mappings to the kernel for VM-assigned host devices in a qemu VM. We add a new 'iommufd' attribute for hostdev devices to be associated with the iommufd object. For instance, specifying the iommufd object and associated hostdev in a VM definition: <devices> ... <hostdev mode='subsystem' type='pci' managed='no'> <driver iommufd='yes'/> <source> <address domain='0x0009' bus='0x01' slot='0x00' function='0x0'/> </source> <address type='pci' domain='0x0000' bus='0x15' slot='0x00' function='0x0'/> </hostdev> <hostdev mode='subsystem' type='pci' managed='no'> <driver iommufd='yes'/> <source> <address domain='0x0019' bus='0x01' slot='0x00' function='0x0'/> </source> <address type='pci' domain='0x0000' bus='0x16' slot='0x00' function='0x0'/> </hostdev> ... </devices> This would get translated to a qemu command line with the arguments below. Note that libvirt will open the /dev/iommu and VFIO cdev, passing the associated fd number to qemu: -object '{"qom-type":"iommufd","id":"iommufd0","fd":"24"}' \ -device '{"driver":"vfio-pci","host":"0009:01:00.0","id":"hostdev0","iommufd":"iommufd0","fd":"22","bus":"pci.21","addr":"0x0"}' \ -device '{"driver":"vfio-pci","host":"0019:01:00.0","id":"hostdev1","iommufd":"iommufd0","fd":"25","bus":"pci.22","addr":"0x0"}' \ Changes from v3: - Resolved issue from v2 where stale FD from previous VM boot was in use - Remove second approach for retrieving VFIO device path in virPCIDeviceGetVfioPath() - Resolve broken build of libvirt on non-Linux platforms - Conditionally define iommufd headers and use system headers where possible - Add non-fatal handling + warning print for EPERM for the IOMMU_OPTION_RLIMIT_MODE ioctl - Replace references to /dev/iommu with VIR_IOMMU_DEV_PATH - Implement virIOMMUFDSupported(void) to check for existence of /dev/iommu on host - Include tests for multiple hostdevs Changes from v2: - Set per-process memory accounting mode for iommufd - Separated out formatting of iommufd object from qemuBuildHostdevCommandLine - Placed hostdev private data implementation in a separate commit - Allocate hostdev private data unconditionally - Compare FDs against -1 - Integrated callback function in virQEMUDriverPrivateDataCallbacks for qemuDomainHostdevPrivateNew - Dropped qemuProcessCloseVfioFds - Addressed other feedback from v2 (formatting, includes, etc.) - Revised seclabel logic to be device-specific for AppArmor and to allow paths for SELinux/DAC This series is on Github: https://github.com/NathanChenNVIDIA/libvirt/tree/iommufd-01-26 Thanks, Nathan [0] https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/thread/WIBZ6... Signed-off-by: Nathan Chen <nathanc@nvidia.com> Nathan Chen (7): qemu: Implement support for associating iommufd to hostdev qemu: Introduce privateData for hostdevs qemu: Set per-process memory accounting for iommufd qemu: open VFIO FDs from libvirt backend qemu: open iommufd FD from libvirt backend qemu: Update Cgroup, namespace, and seclabel for iommufd tests: qemuxmlconfdata: provide iommufd sample XML and CLI args docs/formatdomain.rst | 7 + po/POTFILES | 1 + src/bhyve/bhyve_parse_command.c | 2 +- src/conf/device_conf.c | 11 ++ src/conf/device_conf.h | 1 + src/conf/domain_conf.c | 13 +- src/conf/domain_conf.h | 5 +- src/conf/schemas/basictypes.rng | 5 + src/libvirt_private.syms | 5 + src/libxl/xen_common.c | 2 +- src/libxl/xen_xl.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_cgroup.c | 26 ++-- src/qemu/qemu_command.c | 76 +++++++++++ src/qemu/qemu_domain.c | 41 ++++++ src/qemu/qemu_domain.h | 20 +++ src/qemu/qemu_namespace.c | 16 ++- src/qemu/qemu_process.c | 118 ++++++++++++++++ src/security/security_apparmor.c | 32 ++++- src/security/security_dac.c | 59 ++++++-- src/security/security_selinux.c | 57 ++++++-- src/security/virt-aa-helper.c | 33 ++++- src/util/meson.build | 1 + src/util/viriommufd.c | 127 ++++++++++++++++++ src/util/viriommufd.h | 27 ++++ src/util/virpci.c | 42 ++++++ src/util/virpci.h | 2 + src/vbox/vbox_common.c | 2 +- .../iommufd-q35.x86_64-latest.args | 41 ++++++ .../iommufd-q35.x86_64-latest.xml | 60 +++++++++ tests/qemuxmlconfdata/iommufd-q35.xml | 38 ++++++ ...fd-virt-pci-bus-single.aarch64-latest.args | 33 +++++ ...ufd-virt-pci-bus-single.aarch64-latest.xml | 34 +++++ .../iommufd-virt-pci-bus-single.xml | 22 +++ .../iommufd-virt.aarch64-latest.args | 37 +++++ .../iommufd-virt.aarch64-latest.xml | 56 ++++++++ tests/qemuxmlconfdata/iommufd-virt.xml | 29 ++++ .../iommufd.x86_64-latest.args | 35 +++++ .../qemuxmlconfdata/iommufd.x86_64-latest.xml | 38 ++++++ tests/qemuxmlconfdata/iommufd.xml | 30 +++++ tests/qemuxmlconftest.c | 34 +++++ tests/virhostdevtest.c | 2 +- 42 files changed, 1162 insertions(+), 62 deletions(-) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.xml create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd.xml -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Implement a new iommufd attribute under hostdevs' PCI subsystem driver that can be used to specify associated iommufd object when launching a qemu VM. Signed-off-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- docs/formatdomain.rst | 7 +++++ src/conf/device_conf.c | 11 ++++++++ src/conf/device_conf.h | 1 + src/conf/schemas/basictypes.rng | 5 ++++ src/qemu/qemu_command.c | 46 +++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 1467fc7e10..c8f827d460 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -4907,6 +4907,13 @@ or: found is "problematic" in some way, the generic vfio-pci driver similarly be forced. + The ``<driver>`` element's ``iommufd`` attribute is used to specify + using the iommufd interface to propagate DMA mappings to the kernel, + instead of VFIO alone. When the attribute is present, an iommufd + object will be created by the resulting qemu command. Libvirt will + open the /dev/iommu and VFIO device cdev, passing the associated + file descriptor numbers to the qemu command. + (Note: :since:`Since 1.0.5`, the ``name`` attribute has been described to be used to select the type of PCI device assignment ("vfio", "kvm", or "xen"), but those values have been mostly diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c index c278b81652..d68232a4f4 100644 --- a/src/conf/device_conf.c +++ b/src/conf/device_conf.c @@ -67,6 +67,11 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node, return -1; } + if (virXMLPropTristateBool(node, "iommufd", + VIR_XML_PROP_NONE, + &driver->iommufd) < 0) + return -1; + driver->model = virXMLPropString(node, "model"); return 0; } @@ -93,6 +98,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf, virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model); + if (driver->iommufd == VIR_TRISTATE_BOOL_YES) { + virBufferAddLit(&driverAttrBuf, " iommufd='yes'"); + } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) { + virBufferAddLit(&driverAttrBuf, " iommufd='no'"); + } + virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); return 0; } diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h index e570f51824..116b959143 100644 --- a/src/conf/device_conf.h +++ b/src/conf/device_conf.h @@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName); struct _virDeviceHostdevPCIDriverInfo { virDeviceHostdevPCIDriverName name; char *model; + virTristateBool iommufd; }; typedef enum { diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng index 5689170fad..381e0ac24f 100644 --- a/src/conf/schemas/basictypes.rng +++ b/src/conf/schemas/basictypes.rng @@ -673,6 +673,11 @@ <ref name="genericName"/> </attribute> </optional> + <optional> + <attribute name="iommufd"> + <ref name="virYesNo"/> + </attribute> + </optional> <empty/> </element> </define> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 98229d7cf9..98e4469c25 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4760,6 +4760,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr); const char *failover_pair_id = NULL; const char *driver = NULL; + const char *iommufdId = NULL; /* 'ramfb' property must be omitted unless it's to be enabled */ bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON; @@ -4793,6 +4794,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, teaming->persistent) failover_pair_id = teaming->persistent; + if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) + iommufdId = "iommufd0"; + if (virJSONValueObjectAdd(&props, "s:driver", driver, "s:host", host, @@ -4801,6 +4805,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, "S:failover_pair_id", failover_pair_id, "S:display", qemuOnOffAuto(pcisrc->display), "B:ramfb", ramfb, + "S:iommufd", iommufdId, NULL) < 0) return NULL; @@ -5320,6 +5325,44 @@ qemuBuildHostdevCommandLine(virCommand *cmd, } +static int +qemuBuildIOMMUFDCommandLine(virCommand *cmd, + const virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = def->hostdevs[i]; + virDomainHostdevSubsys *subsys = &hostdev->source.subsys; + g_autoptr(virJSONValue) props = NULL; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + continue; + + if (subsys->type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + continue; + + if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED) + continue; + + if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) + continue; + + if (qemuMonitorCreateObjectProps(&props, "iommufd", + "iommufd0", + NULL) < 0) + return -1; + + if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0) + return -1; + + break; + } + + return 0; +} + + static int qemuBuildMonitorCommandLine(virCommand *cmd, qemuDomainObjPrivate *priv) @@ -10932,6 +10975,9 @@ qemuBuildCommandLine(virDomainObj *vm, if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; + if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) + return NULL; + if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Introduce private data for hostdevs and allocate hostdev private data by default. Signed-off-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/bhyve/bhyve_parse_command.c | 2 +- src/conf/domain_conf.c | 13 +++++++++-- src/conf/domain_conf.h | 5 ++++- src/libxl/xen_common.c | 2 +- src/libxl/xen_xl.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_domain.c | 40 +++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 18 +++++++++++++++ src/vbox/vbox_common.c | 2 +- tests/virhostdevtest.c | 2 +- 10 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/bhyve/bhyve_parse_command.c b/src/bhyve/bhyve_parse_command.c index d62ea64beb..8b405206bd 100644 --- a/src/bhyve/bhyve_parse_command.c +++ b/src/bhyve/bhyve_parse_command.c @@ -687,7 +687,7 @@ bhyveParsePassthru(virDomainDef *def G_GNUC_UNUSED, return -1; } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 541dad5bdc..f950f7c75d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2733,6 +2733,8 @@ virDomainHostdevDefClear(virDomainHostdevDef *def) case VIR_DOMAIN_HOSTDEV_MODE_LAST: break; } + + g_clear_pointer(&def->privateData, virObjectUnref); } @@ -3483,7 +3485,7 @@ void virDomainVideoDefFree(virDomainVideoDef *def) virDomainHostdevDef * -virDomainHostdevDefNew(void) +virDomainHostdevDefNew(virDomainXMLOption *xmlopt) { virDomainHostdevDef *def; @@ -3491,6 +3493,13 @@ virDomainHostdevDefNew(void) def->info = g_new0(virDomainDeviceInfo, 1); + if (xmlopt && xmlopt->privateData.hostdevNew && + !(def->privateData = xmlopt->privateData.hostdevNew())) { + VIR_FREE(def->info); + VIR_FREE(def); + return NULL; + } + return def; } @@ -13678,7 +13687,7 @@ virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt, ctxt->node = node; - def = virDomainHostdevDefNew(); + def = virDomainHostdevDefNew(xmlopt); if (virXMLPropEnumDefault(node, "mode", virDomainHostdevModeTypeFromString, VIR_XML_PROP_NONE, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index cb35ff06bd..8f53ed96c0 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -364,6 +364,8 @@ struct _virDomainHostdevDef { */ virDomainNetDef *parentnet; + virObject *privateData; + virDomainHostdevMode mode; virDomainStartupPolicy startupPolicy; bool managed; @@ -3588,6 +3590,7 @@ struct _virDomainXMLPrivateDataCallbacks { virDomainXMLPrivateDataNewFunc vsockNew; virDomainXMLPrivateDataNewFunc cryptoNew; virDomainXMLPrivateDataNewFunc graphicsNew; + virDomainXMLPrivateDataNewFunc hostdevNew; virDomainXMLPrivateDataNewFunc networkNew; virDomainXMLPrivateDataNetParseFunc networkParse; virDomainXMLPrivateDataNetFormatFunc networkFormat; @@ -3797,7 +3800,7 @@ virDomainVideoDef *virDomainVideoDefNew(virDomainXMLOption *xmlopt); void virDomainVideoDefFree(virDomainVideoDef *def); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virDomainVideoDef, virDomainVideoDefFree); void virDomainVideoDefClear(virDomainVideoDef *def); -virDomainHostdevDef *virDomainHostdevDefNew(void); +virDomainHostdevDef *virDomainHostdevDefNew(virDomainXMLOption *xmlopt); void virDomainHostdevDefFree(virDomainHostdevDef *def); void virDomainHubDefFree(virDomainHubDef *def); void virDomainRedirdevDefFree(virDomainRedirdevDef *def); diff --git a/src/libxl/xen_common.c b/src/libxl/xen_common.c index 890ef11723..e6a372e078 100644 --- a/src/libxl/xen_common.c +++ b/src/libxl/xen_common.c @@ -445,7 +445,7 @@ xenParsePCI(char *entry) } } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->managed = false; hostdev->writeFiltering = filtered; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c index b2ff0edcf2..e62302736b 100644 --- a/src/libxl/xen_xl.c +++ b/src/libxl/xen_xl.c @@ -930,7 +930,7 @@ xenParseXLUSB(virConf *conf, virDomainDef *def) key = nextkey; } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->managed = false; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB; hostdev->source.subsys.u.usb.bus = busNum; diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index 7700804429..a94427b027 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -376,7 +376,7 @@ lxcCreateNetDef(const char *type, static virDomainHostdevDef * lxcCreateHostdevDef(const char *data) { - virDomainHostdevDef *hostdev = virDomainHostdevDefNew(); + virDomainHostdevDef *hostdev = virDomainHostdevDefNew(NULL); hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES; hostdev->source.caps.type = VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET; hostdev->source.caps.u.net.ifname = g_strdup(data); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index ac56fc7cb4..85eea1801f 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1238,6 +1238,45 @@ qemuDomainNetworkPrivateFormat(const virDomainNetDef *net, } +static virClass *qemuDomainHostdevPrivateClass; + +static void +qemuDomainHostdevPrivateDispose(void *obj) +{ + qemuDomainHostdevPrivate *priv = obj; + + VIR_FORCE_CLOSE(priv->vfioDeviceFd); +} + + +static int +qemuDomainHostdevPrivateOnceInit(void) +{ + if (!VIR_CLASS_NEW(qemuDomainHostdevPrivate, virClassForObject())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(qemuDomainHostdevPrivate); + +virObject * +qemuDomainHostdevPrivateNew(void) +{ + qemuDomainHostdevPrivate *priv; + + if (qemuDomainHostdevPrivateInitialize() < 0) + return NULL; + + if (!(priv = virObjectNew(qemuDomainHostdevPrivateClass))) + return NULL; + + priv->vfioDeviceFd = -1; + + return (virObject *) priv; +} + + /* qemuDomainSecretInfoSetup: * @priv: pointer to domain private object * @alias: alias of the secret @@ -3563,6 +3602,7 @@ virDomainXMLPrivateDataCallbacks virQEMUDriverPrivateDataCallbacks = { .chrSourceNew = qemuDomainChrSourcePrivateNew, .vsockNew = qemuDomainVsockPrivateNew, .graphicsNew = qemuDomainGraphicsPrivateNew, + .hostdevNew = qemuDomainHostdevPrivateNew, .networkNew = qemuDomainNetworkPrivateNew, .networkParse = qemuDomainNetworkPrivateParse, .networkFormat = qemuDomainNetworkPrivateFormat, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 3396f929fd..e91435c062 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -461,6 +461,18 @@ struct _qemuDomainTPMPrivate { }; +#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \ + ((qemuDomainHostdevPrivate *) (hostdev)->privateData) + +typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate; +struct _qemuDomainHostdevPrivate { + virObject parent; + + /* VFIO device file descriptor for iommufd passthrough */ + int vfioDeviceFd; +}; + + void qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv); @@ -1174,3 +1186,9 @@ qemuDomainCheckCPU(virArch arch, bool qemuDomainMachineSupportsFloppy(const char *machine, virQEMUCaps *qemuCaps); + +virObject * +qemuDomainHostdevPrivateNew(void); + +int +qemuProcessOpenVfioFds(virDomainObj *vm); diff --git a/src/vbox/vbox_common.c b/src/vbox/vbox_common.c index 26c5fdfef6..d2a8cf8da4 100644 --- a/src/vbox/vbox_common.c +++ b/src/vbox/vbox_common.c @@ -3090,7 +3090,7 @@ vboxHostDeviceGetXMLDesc(struct _vboxDriver *data, virDomainDef *def, IMachine * def->hostdevs = g_new0(virDomainHostdevDef *, def->nhostdevs); for (i = 0; i < def->nhostdevs; i++) - def->hostdevs[i] = virDomainHostdevDefNew(); + def->hostdevs[i] = virDomainHostdevDefNew(NULL); for (i = 0; i < deviceFilters.count; i++) { PRBool active = PR_FALSE; diff --git a/tests/virhostdevtest.c b/tests/virhostdevtest.c index aec474a148..a35c1d9402 100644 --- a/tests/virhostdevtest.c +++ b/tests/virhostdevtest.c @@ -124,7 +124,7 @@ myInit(void) for (i = 0; i < nhostdevs; i++) { virDomainHostdevSubsys *subsys; - hostdevs[i] = virDomainHostdevDefNew(); + hostdevs[i] = virDomainHostdevDefNew(NULL); if (!hostdevs[i]) goto cleanup; hostdevs[i]->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Integrate and use the IOMMU_OPTION_RLIMIT_MODE ioctl to set per-process memory accounting for iommufd. This prevents ENOMEM errors from the default per-user memory accounting when multiple VMs under the libvirt-qemu user have their pinned memory summed and checked against a per-process RLIMIT_MEMLOCK limit. Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 4 ++ src/util/meson.build | 1 + src/util/viriommufd.c | 127 +++++++++++++++++++++++++++++++++++++++ src/util/viriommufd.h | 27 +++++++++ 5 files changed, 160 insertions(+) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h diff --git a/po/POTFILES b/po/POTFILES index f0aad35c8c..c78d2b8000 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -303,6 +303,7 @@ src/util/virhostuptime.c src/util/viridentity.c src/util/virinhibitor.c src/util/virinitctl.c +src/util/viriommufd.c src/util/viriscsi.c src/util/virjson.c src/util/virlease.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4e57e4a8f6..a8eadbfb8a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2652,6 +2652,10 @@ virInhibitorRelease; virInitctlFifos; virInitctlSetRunLevel; +# util/viriommufd.h +virIOMMUFDSetRLimitMode; +virIOMMUFDSupported; + # util/viriscsi.h virISCSIConnectionLogin; virISCSIConnectionLogout; diff --git a/src/util/meson.build b/src/util/meson.build index 4950a795cc..9fb0aa0fe7 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -46,6 +46,7 @@ util_sources = [ 'viridentity.c', 'virinhibitor.c', 'virinitctl.c', + 'viriommufd.c', 'viriscsi.c', 'virjson.c', 'virkeycode.c', diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c new file mode 100644 index 0000000000..0f87f95330 --- /dev/null +++ b/src/util/viriommufd.c @@ -0,0 +1,127 @@ +#include <config.h> + +#include "viriommufd.h" +#include "virlog.h" +#include "virerror.h" +#include "virfile.h" + +#ifdef __linux__ + +# include <sys/ioctl.h> +# include <linux/types.h> + +# ifdef HAVE_LINUX_IOMMUFD_H +# include <linux/iommufd.h> +# endif + +#define VIR_FROM_THIS VIR_FROM_NONE + +VIR_LOG_INIT("util.iommufd"); + +#ifndef IOMMU_OPTION + +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; + +# define IOMMUFD_TYPE (';') +# define IOMMUFD_CMD_OPTION 0x87 +# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +#endif + +/** + * virIOMMUFDSetRLimitMode: + * @fd: iommufd file descriptor + * @processAccounting: true for per-process, false for per-user + * + * Set RLIMIT_MEMLOCK accounting mode for the iommufd. + * + * Returns: 0 on success, -1 on error + */ +int +virIOMMUFDSetRLimitMode(int fd, bool processAccounting) +{ + struct iommu_option option = { + .size = sizeof(struct iommu_option), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .__reserved = 0, + .object_id = 0, + .val64 = processAccounting ? 1 : 0, + }; + + if (ioctl(fd, IOMMU_OPTION, &option) < 0) { + switch (errno) { + case ENOTTY: + VIR_WARN("IOMMU_OPTION ioctl not supported"); + return 0; + + case EOPNOTSUPP: + VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel"); + return 0; + + case EINVAL: + virReportSystemError(errno, "%s", + _("invalid iommufd option parameters")); + return -1; + + case EPERM: + VIR_WARN("Permission denied for IOMMU_OPTION ioctl. " + "Per-user-based memory accounting to be used by default."); + return 0; + + default: + virReportSystemError(errno, "%s", + _("failed to set iommufd option")); + return -1; + } + } + + VIR_DEBUG("Set iommufd rlimit mode to %s-based accounting", + processAccounting ? "process" : "user"); + return 0; +} + +/** + * virIOMMUFDSupported: Check for presence of /dev/iommu on host. + * + * Returns true if the file exists and false if it does not. + */ +bool +virIOMMUFDSupported(void) +{ + return virFileExists(VIR_IOMMU_DEV_PATH); +} + +#else + +int virIOMMUFDSetRLimitMode(int fd G_GNUC_UNUSED, + bool processAccounting G_GNUC_UNUSED) +{ + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("IOMMUFD is not supported on this platform")); + return -1; +} + +bool virIOMMUFDSupported(void) +{ + return false; +} + +#endif diff --git a/src/util/viriommufd.h b/src/util/viriommufd.h new file mode 100644 index 0000000000..ec6be9fa66 --- /dev/null +++ b/src/util/viriommufd.h @@ -0,0 +1,27 @@ +/* + * viriommufd.h: iommufd helpers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "internal.h" + +#define VIR_IOMMU_DEV_PATH "/dev/iommu" + +int virIOMMUFDSetRLimitMode(int fd, bool processAccounting); + +bool virIOMMUFDSupported(void); -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Open VFIO FDs from libvirt backend without exposing these FDs to XML users, i.e. one per iommufd hostdev for /dev/vfio/devices/vfioX, and pass the FD to qemu command line. Suggested-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 21 +++++++++++ src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++ src/util/virpci.c | 42 +++++++++++++++++++++ src/util/virpci.h | 2 + 5 files changed, 145 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a8eadbfb8a..0904265459 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3163,6 +3163,7 @@ virPCIDeviceGetStubDriverName; virPCIDeviceGetStubDriverType; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; +virPCIDeviceGetVfioPath; virPCIDeviceGetVPD; virPCIDeviceHasPCIExpressLink; virPCIDeviceIsAssignable; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 98e4469c25..2a16f9df63 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4809,6 +4809,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, NULL) < 0) return NULL; + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev); + + if (hostdevPriv->vfioDeviceFd != -1) { + g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd); + if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0) + return NULL; + hostdevPriv->vfioDeviceFd = -1; + } + } + if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0) return NULL; @@ -5253,6 +5265,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd, if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) return -1; + if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdevPriv->vfioDeviceFd != -1) { + virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } + } + if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev))) return -1; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0e50cd1ccc..ab88a6bf62 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -103,6 +103,7 @@ #include "storage_source.h" #include "backup_conf.h" #include "storage_file_probe.h" +#include "virpci.h" #include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0) goto cleanup; + if (qemuProcessOpenVfioFds(vm) < 0) + goto cleanup; + if (!(cmd = qemuBuildCommandLine(vm, incoming ? "defer" : NULL, vmop, @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); virObjectUnlock(vm); } + +/** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition + * @vfioFd: returned file descriptor + * + * Opens the VFIO device file descriptor for a hostdev. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) +{ + g_autofree char *vfioPath = NULL; + int fd = -1; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("VFIO FD only supported for PCI hostdevs")); + return -1; + } + + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) + return -1; + + VIR_DEBUG("Opening VFIO device %s", vfioPath); + + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), + vfioPath); + } else { + virReportSystemError(errno, + _("cannot open VFIO device %1$s"), vfioPath); + } + return -1; + } + + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); + return fd; +} + +/** + * qemuProcessOpenVfioFds: + * @vm: domain object + * + * Opens all necessary VFIO file descriptors for the domain. + * + * Returns: 0 on success, -1 on failure + */ +int +qemuProcessOpenVfioFds(virDomainObj *vm) +{ + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; + } + } + + return 0; +} diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..886e2c55e6 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -3320,3 +3320,45 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) { g_free(address); } + +/** + * virPCIDeviceGetVfioPath: + * @addr: host device PCI address + * @vfioPath: returned VFIO device path + * + * Constructs the VFIO device path for a PCI hostdev. + * + * Returns: 0 on success, -1 on failure + */ +int +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, + char **vfioPath) +{ + g_autofree char *addrStr = NULL; + + *vfioPath = NULL; + addrStr = virPCIDeviceAddressAsString(addr); + + /* First try: Direct lookup in device's vfio-dev subdirectory */ + { + g_autofree char *sysfsPath = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + if (STRPREFIX(entry->d_name, "vfio")) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find VFIO device for PCI device %1$s"), + addrStr); + return -1; +} diff --git a/src/util/virpci.h b/src/util/virpci.h index fc538566e1..24ede10755 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev); void virPCIDeviceAddressFree(virPCIDeviceAddress *address); +int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree); -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Open iommufd FD from libvirt backend without exposing these FDs to XML users, i.e. one per domain for /dev/iommu, and pass the FD to qemu command line. Suggested-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_command.c | 13 +++++++++++-- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_process.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 2a16f9df63..5916283651 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5348,9 +5348,13 @@ qemuBuildHostdevCommandLine(virCommand *cmd, static int qemuBuildIOMMUFDCommandLine(virCommand *cmd, - const virDomainDef *def) + const virDomainDef *def, + virDomainObj *vm) { size_t i; + qemuDomainObjPrivate *priv = vm->privateData; + g_autofree char *fdstr = g_strdup_printf("%d", priv->iommufd); + for (i = 0; i < def->nhostdevs; i++) { virDomainHostdevDef *hostdev = def->hostdevs[i]; @@ -5369,8 +5373,13 @@ qemuBuildIOMMUFDCommandLine(virCommand *cmd, if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) continue; + virCommandPassFD(cmd, priv->iommufd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + + priv->iommufd = -1; + if (qemuMonitorCreateObjectProps(&props, "iommufd", "iommufd0", + "S:fd", fdstr, NULL) < 0) return -1; @@ -10996,7 +11005,7 @@ qemuBuildCommandLine(virDomainObj *vm, if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; - if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) + if (qemuBuildIOMMUFDCommandLine(cmd, def, vm) < 0) return NULL; if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 85eea1801f..c5e1cd5279 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2042,6 +2042,7 @@ qemuDomainObjPrivateAlloc(void *opaque) priv->blockjobs = virHashNew(virObjectUnref); priv->fds = virHashNew(g_object_unref); + priv->iommufd = -1; priv->pidMonitored = -1; /* agent commands block by default, user can choose different behavior */ diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index e91435c062..18ced7ebba 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -264,6 +264,8 @@ struct _qemuDomainObjPrivate { /* named file descriptor groups associated with the VM */ GHashTable *fds; + int iommufd; + char *memoryBackingDir; }; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index ab88a6bf62..0ef46e4880 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -104,6 +104,7 @@ #include "backup_conf.h" #include "storage_file_probe.h" #include "virpci.h" +#include "viriommufd.h" #include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -10365,6 +10366,37 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, virObjectUnlock(vm); } +/** + * qemuProcessOpenIommuFd: + * @vm: domain object + * @iommuFd: returned file descriptor + * + * Opens /dev/iommu file descriptor for the VM. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenIommuFd(virDomainObj *vm) +{ + int fd = -1; + + VIR_DEBUG("Opening IOMMU FD for domain %s", vm->def->name); + + if ((fd = open(VIR_IOMMU_DEV_PATH, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("IOMMU FD support requires /dev/iommu device")); + } else { + virReportSystemError(errno, "%s", + _("cannot open /dev/iommu")); + } + return -1; + } + + VIR_DEBUG("Opened IOMMU FD %d for domain %s", fd, vm->def->name); + return fd; +} + /** * qemuProcessOpenVfioDeviceFd: * @hostdev: host device definition @@ -10419,6 +10451,7 @@ qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) int qemuProcessOpenVfioFds(virDomainObj *vm) { + qemuDomainObjPrivate *priv = vm->privateData; size_t i; /* Check if we have any hostdevs that need VFIO FDs */ @@ -10430,10 +10463,16 @@ qemuProcessOpenVfioFds(virDomainObj *vm) hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); if (hostdevPriv->vfioDeviceFd == -1) return -1; + + /* Open IOMMU FD */ + priv->iommufd = qemuProcessOpenIommuFd(vm); + if (priv->iommufd == -1) + return -1; } } -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7dadef0739..7190a4f80f 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,21 +479,23 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms; - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) - return 0; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; - if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) - return -1; + if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) + return -1; - if (path && - qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { - return -1; - } + if (path && + qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { + return -1; + } - if (virHostdevNeedsVFIO(dev) && - qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RW, false) < 0) { - return -1; + if (virHostdevNeedsVFIO(dev) && + qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } } return 0; diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index c689cc3e40..907b2773cf 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,15 +345,17 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL; - if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) - return -1; + if (hostdev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) + return -1; - if (path) - *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + if (path) + *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); - if (virHostdevNeedsVFIO(hostdev) && - (!hotplug || !qemuDomainNeedsVFIO(vm->def))) - *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + if (virHostdevNeedsVFIO(hostdev) && + (!hotplug || !qemuDomainNeedsVFIO(vm->def))) + *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + } return 0; } diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1; + + ret = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); + if (ret) + return ret; + + ret = AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, ptr); + if (ret) + return ret; } - ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); - VIR_FREE(vfioGroupDev); } else { ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 2f788b872a..fbe216637f 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -41,6 +41,7 @@ #include "virscsivhost.h" #include "virstring.h" #include "virutil.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -1282,14 +1283,32 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, + false, + &cbdata); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, - false, - &cbdata); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfiofdDev, false, &cbdata); + if (ret) + return ret; + + ret = virSecurityDACSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &cbdata); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACSetPCILabel, @@ -1443,13 +1462,33 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; - ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, vfioGroupDev, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfiofdDev, false); + if (ret) + return ret; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + VIR_IOMMU_DEV_PATH, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 2f3cc274a5..05086ad9e1 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -41,6 +41,7 @@ #include "virconf.h" #include "virtpm.h" #include "virstring.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -2256,14 +2257,32 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, + false, + &data); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, - false, - &data); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, false, &data); + if (ret) + return ret; + + ret = virSecuritySELinuxSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &data); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); } @@ -2491,12 +2510,30 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false, false); + if (ret) + return ret; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, VIR_IOMMU_DEV_PATH, false, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); } diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c index de0a826063..43046ab831 100644 --- a/src/security/virt-aa-helper.c +++ b/src/security/virt-aa-helper.c @@ -50,6 +50,7 @@ #include "virstring.h" #include "virgettext.h" #include "virhostdev.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -1114,8 +1115,9 @@ get_files(vahControl * ctl) virDeviceHostdevPCIDriverName driverName = dev->source.subsys.u.pci.driver.name; - if (driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || - driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) { + if ((driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || + driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) && + dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { needsVfio = true; } @@ -1348,6 +1350,7 @@ get_files(vahControl * ctl) virBufferAddLit(&buf, " \"/dev/vfio/vfio\" rw,\n"); virBufferAddLit(&buf, " \"/dev/vfio/[0-9]*\" rw,\n"); } + if (needsgl) { /* if using gl all sorts of further dri related paths will be needed */ virBufferAddLit(&buf, " # DRI/Mesa/(e)GL config and driver paths\n"); @@ -1385,9 +1388,18 @@ get_files(vahControl * ctl) } } - if (ctl->newfile && - vah_add_file(&buf, ctl->newfile, "rwk") != 0) { - return -1; + if (ctl->newfile) { + const char *perms = "rwk"; + + /* VFIO and iommufd devices need mmap permission */ + if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || + STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { + perms = "rwm"; + } + + if (vah_add_file(&buf, ctl->newfile, perms) != 0) { + return -1; + } } ctl->files = virBufferContentAndReset(&buf); @@ -1561,8 +1573,15 @@ main(int argc, char **argv) } } if (ctl->append && ctl->newfile) { - if (vah_add_file(&buf, ctl->newfile, "rwk") != 0) - goto cleanup; + const char *perms = "rwk"; + + if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || + STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { + perms = "rwm"; + } + + if (vah_add_file(&buf, ctl->newfile, perms) != 0) + return -1; } else { if (ctl->def->virtType == VIR_DOMAIN_VIRT_QEMU || ctl->def->virtType == VIR_DOMAIN_VIRT_KQEMU || -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Provide sample XML and CLI args for the iommufd XML schema for pc, q35, and virt machine types. Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- .../iommufd-q35.x86_64-latest.args | 41 +++++++++++++ .../iommufd-q35.x86_64-latest.xml | 60 +++++++++++++++++++ tests/qemuxmlconfdata/iommufd-q35.xml | 38 ++++++++++++ ...fd-virt-pci-bus-single.aarch64-latest.args | 33 ++++++++++ ...ufd-virt-pci-bus-single.aarch64-latest.xml | 34 +++++++++++ .../iommufd-virt-pci-bus-single.xml | 22 +++++++ .../iommufd-virt.aarch64-latest.args | 37 ++++++++++++ .../iommufd-virt.aarch64-latest.xml | 56 +++++++++++++++++ tests/qemuxmlconfdata/iommufd-virt.xml | 29 +++++++++ .../iommufd.x86_64-latest.args | 35 +++++++++++ .../qemuxmlconfdata/iommufd.x86_64-latest.xml | 38 ++++++++++++ tests/qemuxmlconfdata/iommufd.xml | 30 ++++++++++ tests/qemuxmlconftest.c | 34 +++++++++++ 13 files changed, 487 insertions(+) create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.xml create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd.xml diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args new file mode 100644 index 0000000000..7d819e141b --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args @@ -0,0 +1,41 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-q35-test \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=q35-test,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-q35-test/master-key.aes"}' \ +-machine q35,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ +-accel tcg \ +-cpu qemu64 \ +-m size=2097152k \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-uuid 11dbdcdd-4c3b-482b-8903-9bdb8c0a2774 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"}' \ +-device '{"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"}' \ +-device '{"driver":"qemu-xhci","id":"usb","bus":"pci.1","addr":"0x0"}' \ +-blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","read-only":false}' \ +-device '{"driver":"ide-hd","bus":"ide.0","drive":"libvirt-1-storage","id":"sata0-0-0","bootindex":1}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-device '{"driver":"qxl-vga","id":"video0","max_outputs":1,"ram_size":67108864,"vram_size":33554432,"vram64_size_mb":0,"vgamem_mb":8,"bus":"pcie.0","addr":"0x1"}' \ +-global ICH9-LPC.noreboot=off \ +-watchdog-action reset \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x3"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml new file mode 100644 index 0000000000..bb76252b61 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml @@ -0,0 +1,60 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <driver name='qemu' type='raw'/> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x10'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x11'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/> + </controller> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <controller type='usb' index='0' model='qemu-xhci'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <audio id='1' type='none'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1' primary='yes'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </video> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <watchdog model='itco' action='reset'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-q35.xml b/tests/qemuxmlconfdata/iommufd-q35.xml new file mode 100644 index 0000000000..f3c2269fb1 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <controller type='sata' index='0'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1'/> + </video> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args new file mode 100644 index 0000000000..dbfd395168 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args @@ -0,0 +1,33 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-aarch64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine virt,usb=off,gic-version=2,dump-guest-core=off,memory-backend=mach-virt.ram,acpi=off \ +-accel tcg \ +-cpu cortex-a15 \ +-m size=1048576k \ +-object '{"qom-type":"memory-backend-ram","id":"mach-virt.ram","size":1073741824}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid 6ba7b810-9dad-11d1-80b4-00c04fd430c8 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x1"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml new file mode 100644 index 0000000000..97b6e1e1c7 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml @@ -0,0 +1,34 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <gic version='2'/> + </features> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>cortex-a15</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml new file mode 100644 index 0000000000..c0b9d643b4 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml @@ -0,0 +1,22 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + </os> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args new file mode 100644 index 0000000000..d5713ff826 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args @@ -0,0 +1,37 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-aarch64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine virt,usb=off,gic-version=2,dump-guest-core=off,memory-backend=mach-virt.ram,acpi=off \ +-accel tcg \ +-cpu cortex-a15 \ +-m size=1048576k \ +-object '{"qom-type":"memory-backend-ram","id":"mach-virt.ram","size":1073741824}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid 6ba7b810-9dad-11d1-80b4-00c04fd430c8 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"pcie-root-port","port":8,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x1"}' \ +-device '{"driver":"pcie-root-port","port":9,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x1.0x1"}' \ +-device '{"driver":"pcie-root-port","port":10,"chassis":3,"id":"pci.3","bus":"pcie.0","addr":"0x1.0x2"}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pci.1","addr":"0x0"}' \ +-device '{"driver":"vfio-pci","host":"0000:07:12.5","id":"hostdev1","iommufd":"iommufd0","fd":"0","bus":"pci.2","addr":"0x0"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml new file mode 100644 index 0000000000..6b9fe543b3 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <gic version='2'/> + </features> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>cortex-a15</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x8'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x9'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='3' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='3' port='0xa'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x07' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt.xml b/tests/qemuxmlconfdata/iommufd-virt.xml new file mode 100644 index 0000000000..a75c1cc13b --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.xml @@ -0,0 +1,29 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + </os> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x07' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd.x86_64-latest.args new file mode 100644 index 0000000000..3130ba2e3a --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.x86_64-latest.args @@ -0,0 +1,35 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine pc,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ +-accel tcg \ +-cpu qemu64 \ +-m size=2097152k \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-uuid 3c7c30b5-7866-4b05-8a29-efebccba52a0 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pci.0","addr":"0x3"}' \ +-device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x2"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml new file mode 100644 index 0000000000..2e8951aaf6 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>3c7c30b5-7866-4b05-8a29-efebccba52a0</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='pci' index='0' model='pci-root'/> + <controller type='usb' index='0' model='piix3-uhci'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd.xml b/tests/qemuxmlconfdata/iommufd.xml new file mode 100644 index 0000000000..eb278414d2 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.xml @@ -0,0 +1,30 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>3c7c30b5-7866-4b05-8a29-efebccba52a0</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='pci' index='0' model='pci-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <controller type='usb' index='0'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxmlconftest.c b/tests/qemuxmlconftest.c index 5fd538d26a..6ac2cda5c3 100644 --- a/tests/qemuxmlconftest.c +++ b/tests/qemuxmlconftest.c @@ -351,6 +351,33 @@ fakeNetworkPortGetXMLDesc(virNetworkPortPtr port, } +static void +testSetupHostdevPrivateData(virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + + qemuDomainHostdevPrivate *priv; + + if (!hostdev->privateData) { + hostdev->privateData = qemuDomainHostdevPrivateNew(); + } + + priv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + /* Use a placeholder FD value for tests */ + priv->vfioDeviceFd = 0; + } + } +} + + static virNetworkDriver fakeNetworkDriver = { .networkLookupByName = fakeNetworkLookupByName, .networkGetXMLDesc = fakeNetworkGetXMLDesc, @@ -404,6 +431,8 @@ testCompareXMLToArgvCreateArgs(virQEMUDriver *drv, if (testQemuPrepareHostBackendChardevOne(NULL, priv->monConfig, vm) < 0) return NULL; + testSetupHostdevPrivateData(vm->def); + for (i = 0; i < vm->def->ndisks; i++) { virDomainDiskDef *disk = vm->def->disks[i]; virStorageSource *src; @@ -3050,6 +3079,11 @@ mymain(void) DO_TEST_CAPS_LATEST_PARSE_ERROR("virtio-iommu-dma-translation"); DO_TEST_CAPS_LATEST("acpi-generic-initiator"); + DO_TEST_CAPS_LATEST("iommufd"); + DO_TEST_CAPS_LATEST("iommufd-q35"); + DO_TEST_CAPS_ARCH_LATEST("iommufd-virt", "aarch64"); + DO_TEST_CAPS_ARCH_LATEST("iommufd-virt-pci-bus-single", "aarch64"); + DO_TEST_CAPS_LATEST("cpu-hotplug-startup"); DO_TEST_CAPS_ARCH_LATEST_PARSE_ERROR("cpu-hotplug-granularity", "ppc64"); -- 2.43.0
participants (1)
-
Nathan Chen