[PATCH v4 0/7] qemu: Implement support for iommufd
Hi, This is a follow up to the third patch series [0] for using iommufd to propagate DMA mappings to the kernel for VM-assigned host devices in a qemu VM. We add a new 'iommufd' attribute for hostdev devices to be associated with the iommufd object. For instance, specifying the iommufd object and associated hostdev in a VM definition: <devices> ... <hostdev mode='subsystem' type='pci' managed='no'> <driver iommufd='yes'/> <source> <address domain='0x0009' bus='0x01' slot='0x00' function='0x0'/> </source> <address type='pci' domain='0x0000' bus='0x15' slot='0x00' function='0x0'/> </hostdev> <hostdev mode='subsystem' type='pci' managed='no'> <driver iommufd='yes'/> <source> <address domain='0x0019' bus='0x01' slot='0x00' function='0x0'/> </source> <address type='pci' domain='0x0000' bus='0x16' slot='0x00' function='0x0'/> </hostdev> ... </devices> This would get translated to a qemu command line with the arguments below. Note that libvirt will open the /dev/iommu and VFIO cdev, passing the associated fd number to qemu: -object '{"qom-type":"iommufd","id":"iommufd0","fd":"24"}' \ -device '{"driver":"vfio-pci","host":"0009:01:00.0","id":"hostdev0","iommufd":"iommufd0","fd":"22","bus":"pci.21","addr":"0x0"}' \ -device '{"driver":"vfio-pci","host":"0019:01:00.0","id":"hostdev1","iommufd":"iommufd0","fd":"25","bus":"pci.22","addr":"0x0"}' \ Changes from v3: - Resolved issue from v2 where stale FD from previous VM boot was in use - Remove second approach for retrieving VFIO device path in virPCIDeviceGetVfioPath() - Resolve broken build of libvirt on non-Linux platforms - Conditionally define iommufd headers and use system headers where possible - Add non-fatal handling + warning print for EPERM for the IOMMU_OPTION_RLIMIT_MODE ioctl - Replace references to /dev/iommu with VIR_IOMMU_DEV_PATH - Implement virIOMMUFDSupported(void) to check for existence of /dev/iommu on host - Include tests for multiple hostdevs Changes from v2: - Set per-process memory accounting mode for iommufd - Separated out formatting of iommufd object from qemuBuildHostdevCommandLine - Placed hostdev private data implementation in a separate commit - Allocate hostdev private data unconditionally - Compare FDs against -1 - Integrated callback function in virQEMUDriverPrivateDataCallbacks for qemuDomainHostdevPrivateNew - Dropped qemuProcessCloseVfioFds - Addressed other feedback from v2 (formatting, includes, etc.) - Revised seclabel logic to be device-specific for AppArmor and to allow paths for SELinux/DAC This series is on Github: https://github.com/NathanChenNVIDIA/libvirt/tree/iommufd-01-26 Thanks, Nathan [0] https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/thread/WIBZ6... Signed-off-by: Nathan Chen <nathanc@nvidia.com> Nathan Chen (7): qemu: Implement support for associating iommufd to hostdev qemu: Introduce privateData for hostdevs qemu: Set per-process memory accounting for iommufd qemu: open VFIO FDs from libvirt backend qemu: open iommufd FD from libvirt backend qemu: Update Cgroup, namespace, and seclabel for iommufd tests: qemuxmlconfdata: provide iommufd sample XML and CLI args docs/formatdomain.rst | 7 + po/POTFILES | 1 + src/bhyve/bhyve_parse_command.c | 2 +- src/conf/device_conf.c | 11 ++ src/conf/device_conf.h | 1 + src/conf/domain_conf.c | 13 +- src/conf/domain_conf.h | 5 +- src/conf/schemas/basictypes.rng | 5 + src/libvirt_private.syms | 5 + src/libxl/xen_common.c | 2 +- src/libxl/xen_xl.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_cgroup.c | 26 ++-- src/qemu/qemu_command.c | 76 +++++++++++ src/qemu/qemu_domain.c | 41 ++++++ src/qemu/qemu_domain.h | 20 +++ src/qemu/qemu_namespace.c | 16 ++- src/qemu/qemu_process.c | 118 ++++++++++++++++ src/security/security_apparmor.c | 32 ++++- src/security/security_dac.c | 59 ++++++-- src/security/security_selinux.c | 57 ++++++-- src/security/virt-aa-helper.c | 33 ++++- src/util/meson.build | 1 + src/util/viriommufd.c | 127 ++++++++++++++++++ src/util/viriommufd.h | 27 ++++ src/util/virpci.c | 42 ++++++ src/util/virpci.h | 2 + src/vbox/vbox_common.c | 2 +- .../iommufd-q35.x86_64-latest.args | 41 ++++++ .../iommufd-q35.x86_64-latest.xml | 60 +++++++++ tests/qemuxmlconfdata/iommufd-q35.xml | 38 ++++++ ...fd-virt-pci-bus-single.aarch64-latest.args | 33 +++++ ...ufd-virt-pci-bus-single.aarch64-latest.xml | 34 +++++ .../iommufd-virt-pci-bus-single.xml | 22 +++ .../iommufd-virt.aarch64-latest.args | 37 +++++ .../iommufd-virt.aarch64-latest.xml | 56 ++++++++ tests/qemuxmlconfdata/iommufd-virt.xml | 29 ++++ .../iommufd.x86_64-latest.args | 35 +++++ .../qemuxmlconfdata/iommufd.x86_64-latest.xml | 38 ++++++ tests/qemuxmlconfdata/iommufd.xml | 30 +++++ tests/qemuxmlconftest.c | 34 +++++ tests/virhostdevtest.c | 2 +- 42 files changed, 1162 insertions(+), 62 deletions(-) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.xml create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd.xml -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Implement a new iommufd attribute under hostdevs' PCI subsystem driver that can be used to specify associated iommufd object when launching a qemu VM. Signed-off-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- docs/formatdomain.rst | 7 +++++ src/conf/device_conf.c | 11 ++++++++ src/conf/device_conf.h | 1 + src/conf/schemas/basictypes.rng | 5 ++++ src/qemu/qemu_command.c | 46 +++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 1467fc7e10..c8f827d460 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -4907,6 +4907,13 @@ or: found is "problematic" in some way, the generic vfio-pci driver similarly be forced. + The ``<driver>`` element's ``iommufd`` attribute is used to specify + using the iommufd interface to propagate DMA mappings to the kernel, + instead of VFIO alone. When the attribute is present, an iommufd + object will be created by the resulting qemu command. Libvirt will + open the /dev/iommu and VFIO device cdev, passing the associated + file descriptor numbers to the qemu command. + (Note: :since:`Since 1.0.5`, the ``name`` attribute has been described to be used to select the type of PCI device assignment ("vfio", "kvm", or "xen"), but those values have been mostly diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c index c278b81652..d68232a4f4 100644 --- a/src/conf/device_conf.c +++ b/src/conf/device_conf.c @@ -67,6 +67,11 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node, return -1; } + if (virXMLPropTristateBool(node, "iommufd", + VIR_XML_PROP_NONE, + &driver->iommufd) < 0) + return -1; + driver->model = virXMLPropString(node, "model"); return 0; } @@ -93,6 +98,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf, virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model); + if (driver->iommufd == VIR_TRISTATE_BOOL_YES) { + virBufferAddLit(&driverAttrBuf, " iommufd='yes'"); + } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) { + virBufferAddLit(&driverAttrBuf, " iommufd='no'"); + } + virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); return 0; } diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h index e570f51824..116b959143 100644 --- a/src/conf/device_conf.h +++ b/src/conf/device_conf.h @@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName); struct _virDeviceHostdevPCIDriverInfo { virDeviceHostdevPCIDriverName name; char *model; + virTristateBool iommufd; }; typedef enum { diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng index 5689170fad..381e0ac24f 100644 --- a/src/conf/schemas/basictypes.rng +++ b/src/conf/schemas/basictypes.rng @@ -673,6 +673,11 @@ <ref name="genericName"/> </attribute> </optional> + <optional> + <attribute name="iommufd"> + <ref name="virYesNo"/> + </attribute> + </optional> <empty/> </element> </define> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 98229d7cf9..98e4469c25 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4760,6 +4760,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr); const char *failover_pair_id = NULL; const char *driver = NULL; + const char *iommufdId = NULL; /* 'ramfb' property must be omitted unless it's to be enabled */ bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON; @@ -4793,6 +4794,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, teaming->persistent) failover_pair_id = teaming->persistent; + if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) + iommufdId = "iommufd0"; + if (virJSONValueObjectAdd(&props, "s:driver", driver, "s:host", host, @@ -4801,6 +4805,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, "S:failover_pair_id", failover_pair_id, "S:display", qemuOnOffAuto(pcisrc->display), "B:ramfb", ramfb, + "S:iommufd", iommufdId, NULL) < 0) return NULL; @@ -5320,6 +5325,44 @@ qemuBuildHostdevCommandLine(virCommand *cmd, } +static int +qemuBuildIOMMUFDCommandLine(virCommand *cmd, + const virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = def->hostdevs[i]; + virDomainHostdevSubsys *subsys = &hostdev->source.subsys; + g_autoptr(virJSONValue) props = NULL; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + continue; + + if (subsys->type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + continue; + + if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED) + continue; + + if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) + continue; + + if (qemuMonitorCreateObjectProps(&props, "iommufd", + "iommufd0", + NULL) < 0) + return -1; + + if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0) + return -1; + + break; + } + + return 0; +} + + static int qemuBuildMonitorCommandLine(virCommand *cmd, qemuDomainObjPrivate *priv) @@ -10932,6 +10975,9 @@ qemuBuildCommandLine(virDomainObj *vm, if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; + if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) + return NULL; + if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> Introduce private data for hostdevs and allocate hostdev private data by default. Signed-off-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/bhyve/bhyve_parse_command.c | 2 +- src/conf/domain_conf.c | 13 +++++++++-- src/conf/domain_conf.h | 5 ++++- src/libxl/xen_common.c | 2 +- src/libxl/xen_xl.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_domain.c | 40 +++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 18 +++++++++++++++ src/vbox/vbox_common.c | 2 +- tests/virhostdevtest.c | 2 +- 10 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/bhyve/bhyve_parse_command.c b/src/bhyve/bhyve_parse_command.c index d62ea64beb..8b405206bd 100644 --- a/src/bhyve/bhyve_parse_command.c +++ b/src/bhyve/bhyve_parse_command.c @@ -687,7 +687,7 @@ bhyveParsePassthru(virDomainDef *def G_GNUC_UNUSED, return -1; } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 541dad5bdc..f950f7c75d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2733,6 +2733,8 @@ virDomainHostdevDefClear(virDomainHostdevDef *def) case VIR_DOMAIN_HOSTDEV_MODE_LAST: break; } + + g_clear_pointer(&def->privateData, virObjectUnref); } @@ -3483,7 +3485,7 @@ void virDomainVideoDefFree(virDomainVideoDef *def) virDomainHostdevDef * -virDomainHostdevDefNew(void) +virDomainHostdevDefNew(virDomainXMLOption *xmlopt) { virDomainHostdevDef *def; @@ -3491,6 +3493,13 @@ virDomainHostdevDefNew(void) def->info = g_new0(virDomainDeviceInfo, 1); + if (xmlopt && xmlopt->privateData.hostdevNew && + !(def->privateData = xmlopt->privateData.hostdevNew())) { + VIR_FREE(def->info); + VIR_FREE(def); + return NULL; + } + return def; } @@ -13678,7 +13687,7 @@ virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt, ctxt->node = node; - def = virDomainHostdevDefNew(); + def = virDomainHostdevDefNew(xmlopt); if (virXMLPropEnumDefault(node, "mode", virDomainHostdevModeTypeFromString, VIR_XML_PROP_NONE, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index cb35ff06bd..8f53ed96c0 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -364,6 +364,8 @@ struct _virDomainHostdevDef { */ virDomainNetDef *parentnet; + virObject *privateData; + virDomainHostdevMode mode; virDomainStartupPolicy startupPolicy; bool managed; @@ -3588,6 +3590,7 @@ struct _virDomainXMLPrivateDataCallbacks { virDomainXMLPrivateDataNewFunc vsockNew; virDomainXMLPrivateDataNewFunc cryptoNew; virDomainXMLPrivateDataNewFunc graphicsNew; + virDomainXMLPrivateDataNewFunc hostdevNew; virDomainXMLPrivateDataNewFunc networkNew; virDomainXMLPrivateDataNetParseFunc networkParse; virDomainXMLPrivateDataNetFormatFunc networkFormat; @@ -3797,7 +3800,7 @@ virDomainVideoDef *virDomainVideoDefNew(virDomainXMLOption *xmlopt); void virDomainVideoDefFree(virDomainVideoDef *def); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virDomainVideoDef, virDomainVideoDefFree); void virDomainVideoDefClear(virDomainVideoDef *def); -virDomainHostdevDef *virDomainHostdevDefNew(void); +virDomainHostdevDef *virDomainHostdevDefNew(virDomainXMLOption *xmlopt); void virDomainHostdevDefFree(virDomainHostdevDef *def); void virDomainHubDefFree(virDomainHubDef *def); void virDomainRedirdevDefFree(virDomainRedirdevDef *def); diff --git a/src/libxl/xen_common.c b/src/libxl/xen_common.c index 890ef11723..e6a372e078 100644 --- a/src/libxl/xen_common.c +++ b/src/libxl/xen_common.c @@ -445,7 +445,7 @@ xenParsePCI(char *entry) } } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->managed = false; hostdev->writeFiltering = filtered; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c index b2ff0edcf2..e62302736b 100644 --- a/src/libxl/xen_xl.c +++ b/src/libxl/xen_xl.c @@ -930,7 +930,7 @@ xenParseXLUSB(virConf *conf, virDomainDef *def) key = nextkey; } - hostdev = virDomainHostdevDefNew(); + hostdev = virDomainHostdevDefNew(NULL); hostdev->managed = false; hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB; hostdev->source.subsys.u.usb.bus = busNum; diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index 7700804429..a94427b027 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -376,7 +376,7 @@ lxcCreateNetDef(const char *type, static virDomainHostdevDef * lxcCreateHostdevDef(const char *data) { - virDomainHostdevDef *hostdev = virDomainHostdevDefNew(); + virDomainHostdevDef *hostdev = virDomainHostdevDefNew(NULL); hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES; hostdev->source.caps.type = VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET; hostdev->source.caps.u.net.ifname = g_strdup(data); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index ac56fc7cb4..85eea1801f 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1238,6 +1238,45 @@ qemuDomainNetworkPrivateFormat(const virDomainNetDef *net, } +static virClass *qemuDomainHostdevPrivateClass; + +static void +qemuDomainHostdevPrivateDispose(void *obj) +{ + qemuDomainHostdevPrivate *priv = obj; + + VIR_FORCE_CLOSE(priv->vfioDeviceFd); +} + + +static int +qemuDomainHostdevPrivateOnceInit(void) +{ + if (!VIR_CLASS_NEW(qemuDomainHostdevPrivate, virClassForObject())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(qemuDomainHostdevPrivate); + +virObject * +qemuDomainHostdevPrivateNew(void) +{ + qemuDomainHostdevPrivate *priv; + + if (qemuDomainHostdevPrivateInitialize() < 0) + return NULL; + + if (!(priv = virObjectNew(qemuDomainHostdevPrivateClass))) + return NULL; + + priv->vfioDeviceFd = -1; + + return (virObject *) priv; +} + + /* qemuDomainSecretInfoSetup: * @priv: pointer to domain private object * @alias: alias of the secret @@ -3563,6 +3602,7 @@ virDomainXMLPrivateDataCallbacks virQEMUDriverPrivateDataCallbacks = { .chrSourceNew = qemuDomainChrSourcePrivateNew, .vsockNew = qemuDomainVsockPrivateNew, .graphicsNew = qemuDomainGraphicsPrivateNew, + .hostdevNew = qemuDomainHostdevPrivateNew, .networkNew = qemuDomainNetworkPrivateNew, .networkParse = qemuDomainNetworkPrivateParse, .networkFormat = qemuDomainNetworkPrivateFormat, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 3396f929fd..e91435c062 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -461,6 +461,18 @@ struct _qemuDomainTPMPrivate { }; +#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \ + ((qemuDomainHostdevPrivate *) (hostdev)->privateData) + +typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate; +struct _qemuDomainHostdevPrivate { + virObject parent; + + /* VFIO device file descriptor for iommufd passthrough */ + int vfioDeviceFd; +}; + + void qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv); @@ -1174,3 +1186,9 @@ qemuDomainCheckCPU(virArch arch, bool qemuDomainMachineSupportsFloppy(const char *machine, virQEMUCaps *qemuCaps); + +virObject * +qemuDomainHostdevPrivateNew(void); + +int +qemuProcessOpenVfioFds(virDomainObj *vm); diff --git a/src/vbox/vbox_common.c b/src/vbox/vbox_common.c index 26c5fdfef6..d2a8cf8da4 100644 --- a/src/vbox/vbox_common.c +++ b/src/vbox/vbox_common.c @@ -3090,7 +3090,7 @@ vboxHostDeviceGetXMLDesc(struct _vboxDriver *data, virDomainDef *def, IMachine * def->hostdevs = g_new0(virDomainHostdevDef *, def->nhostdevs); for (i = 0; i < def->nhostdevs; i++) - def->hostdevs[i] = virDomainHostdevDefNew(); + def->hostdevs[i] = virDomainHostdevDefNew(NULL); for (i = 0; i < deviceFilters.count; i++) { PRBool active = PR_FALSE; diff --git a/tests/virhostdevtest.c b/tests/virhostdevtest.c index aec474a148..a35c1d9402 100644 --- a/tests/virhostdevtest.c +++ b/tests/virhostdevtest.c @@ -124,7 +124,7 @@ myInit(void) for (i = 0; i < nhostdevs; i++) { virDomainHostdevSubsys *subsys; - hostdevs[i] = virDomainHostdevDefNew(); + hostdevs[i] = virDomainHostdevDefNew(NULL); if (!hostdevs[i]) goto cleanup; hostdevs[i]->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; -- 2.43.0
On Tue, Jan 06, 2026 at 06:49:33PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
Introduce private data for hostdevs and allocate hostdev private data by default.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/bhyve/bhyve_parse_command.c | 2 +- src/conf/domain_conf.c | 13 +++++++++-- src/conf/domain_conf.h | 5 ++++- src/libxl/xen_common.c | 2 +- src/libxl/xen_xl.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_domain.c | 40 +++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 18 +++++++++++++++ src/vbox/vbox_common.c | 2 +- tests/virhostdevtest.c | 2 +- 10 files changed, 79 insertions(+), 9 deletions(-)
[...]
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 3396f929fd..e91435c062 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -461,6 +461,18 @@ struct _qemuDomainTPMPrivate { };
+#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \ + ((qemuDomainHostdevPrivate *) (hostdev)->privateData) + +typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate; +struct _qemuDomainHostdevPrivate { + virObject parent; + + /* VFIO device file descriptor for iommufd passthrough */ + int vfioDeviceFd; +}; + + void qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv);
@@ -1174,3 +1186,9 @@ qemuDomainCheckCPU(virArch arch, bool qemuDomainMachineSupportsFloppy(const char *machine, virQEMUCaps *qemuCaps); + +virObject * +qemuDomainHostdevPrivateNew(void); + +int +qemuProcessOpenVfioFds(virDomainObj *vm);
This function doesn't belong here. It is not introduced by this patch and it is used only within qemu_process.c.
On 1/15/2026 7:32 AM, Pavel Hrdina wrote:
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 3396f929fd..e91435c062 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -461,6 +461,18 @@ struct _qemuDomainTPMPrivate { };
+#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \ + ((qemuDomainHostdevPrivate *) (hostdev)->privateData) + +typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate; +struct _qemuDomainHostdevPrivate { + virObject parent; + + /* VFIO device file descriptor for iommufd passthrough */ + int vfioDeviceFd; +}; + + void qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv);
@@ -1174,3 +1186,9 @@ qemuDomainCheckCPU(virArch arch, bool qemuDomainMachineSupportsFloppy(const char *machine, virQEMUCaps *qemuCaps); + +virObject * +qemuDomainHostdevPrivateNew(void); + +int +qemuProcessOpenVfioFds(virDomainObj *vm); This function doesn't belong here. It is not introduced by this patch and it is used only within qemu_process.c.
Yes, I will move this to [PATCH 4/7] qemu: open VFIO FDs from libvirt backend and place it under qemu_process.h. It must have been misplaced in the wrong patch during rebasing - thanks for catching this. Nathan
From: Nathan Chen <nathanc@nvidia.com> Integrate and use the IOMMU_OPTION_RLIMIT_MODE ioctl to set per-process memory accounting for iommufd. This prevents ENOMEM errors from the default per-user memory accounting when multiple VMs under the libvirt-qemu user have their pinned memory summed and checked against a per-process RLIMIT_MEMLOCK limit. Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 4 ++ src/util/meson.build | 1 + src/util/viriommufd.c | 127 +++++++++++++++++++++++++++++++++++++++ src/util/viriommufd.h | 27 +++++++++ 5 files changed, 160 insertions(+) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h diff --git a/po/POTFILES b/po/POTFILES index f0aad35c8c..c78d2b8000 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -303,6 +303,7 @@ src/util/virhostuptime.c src/util/viridentity.c src/util/virinhibitor.c src/util/virinitctl.c +src/util/viriommufd.c src/util/viriscsi.c src/util/virjson.c src/util/virlease.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4e57e4a8f6..a8eadbfb8a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2652,6 +2652,10 @@ virInhibitorRelease; virInitctlFifos; virInitctlSetRunLevel; +# util/viriommufd.h +virIOMMUFDSetRLimitMode; +virIOMMUFDSupported; + # util/viriscsi.h virISCSIConnectionLogin; virISCSIConnectionLogout; diff --git a/src/util/meson.build b/src/util/meson.build index 4950a795cc..9fb0aa0fe7 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -46,6 +46,7 @@ util_sources = [ 'viridentity.c', 'virinhibitor.c', 'virinitctl.c', + 'viriommufd.c', 'viriscsi.c', 'virjson.c', 'virkeycode.c', diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c new file mode 100644 index 0000000000..0f87f95330 --- /dev/null +++ b/src/util/viriommufd.c @@ -0,0 +1,127 @@ +#include <config.h> + +#include "viriommufd.h" +#include "virlog.h" +#include "virerror.h" +#include "virfile.h" + +#ifdef __linux__ + +# include <sys/ioctl.h> +# include <linux/types.h> + +# ifdef HAVE_LINUX_IOMMUFD_H +# include <linux/iommufd.h> +# endif + +#define VIR_FROM_THIS VIR_FROM_NONE + +VIR_LOG_INIT("util.iommufd"); + +#ifndef IOMMU_OPTION + +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; + +# define IOMMUFD_TYPE (';') +# define IOMMUFD_CMD_OPTION 0x87 +# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +#endif + +/** + * virIOMMUFDSetRLimitMode: + * @fd: iommufd file descriptor + * @processAccounting: true for per-process, false for per-user + * + * Set RLIMIT_MEMLOCK accounting mode for the iommufd. + * + * Returns: 0 on success, -1 on error + */ +int +virIOMMUFDSetRLimitMode(int fd, bool processAccounting) +{ + struct iommu_option option = { + .size = sizeof(struct iommu_option), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .__reserved = 0, + .object_id = 0, + .val64 = processAccounting ? 1 : 0, + }; + + if (ioctl(fd, IOMMU_OPTION, &option) < 0) { + switch (errno) { + case ENOTTY: + VIR_WARN("IOMMU_OPTION ioctl not supported"); + return 0; + + case EOPNOTSUPP: + VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel"); + return 0; + + case EINVAL: + virReportSystemError(errno, "%s", + _("invalid iommufd option parameters")); + return -1; + + case EPERM: + VIR_WARN("Permission denied for IOMMU_OPTION ioctl. " + "Per-user-based memory accounting to be used by default."); + return 0; + + default: + virReportSystemError(errno, "%s", + _("failed to set iommufd option")); + return -1; + } + } + + VIR_DEBUG("Set iommufd rlimit mode to %s-based accounting", + processAccounting ? "process" : "user"); + return 0; +} + +/** + * virIOMMUFDSupported: Check for presence of /dev/iommu on host. + * + * Returns true if the file exists and false if it does not. + */ +bool +virIOMMUFDSupported(void) +{ + return virFileExists(VIR_IOMMU_DEV_PATH); +} + +#else + +int virIOMMUFDSetRLimitMode(int fd G_GNUC_UNUSED, + bool processAccounting G_GNUC_UNUSED) +{ + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("IOMMUFD is not supported on this platform")); + return -1; +} + +bool virIOMMUFDSupported(void) +{ + return false; +} + +#endif diff --git a/src/util/viriommufd.h b/src/util/viriommufd.h new file mode 100644 index 0000000000..ec6be9fa66 --- /dev/null +++ b/src/util/viriommufd.h @@ -0,0 +1,27 @@ +/* + * viriommufd.h: iommufd helpers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "internal.h" + +#define VIR_IOMMU_DEV_PATH "/dev/iommu" + +int virIOMMUFDSetRLimitMode(int fd, bool processAccounting); + +bool virIOMMUFDSupported(void); -- 2.43.0
On Tue, Jan 06, 2026 at 06:49:34PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
Integrate and use the IOMMU_OPTION_RLIMIT_MODE ioctl to set per-process memory accounting for iommufd. This prevents ENOMEM errors from the default per-user memory accounting when multiple VMs under the libvirt-qemu user have their pinned memory summed and checked against a per-process RLIMIT_MEMLOCK limit.
Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 4 ++ src/util/meson.build | 1 + src/util/viriommufd.c | 127 +++++++++++++++++++++++++++++++++++++++ src/util/viriommufd.h | 27 +++++++++ 5 files changed, 160 insertions(+) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h
diff --git a/po/POTFILES b/po/POTFILES index f0aad35c8c..c78d2b8000 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -303,6 +303,7 @@ src/util/virhostuptime.c src/util/viridentity.c src/util/virinhibitor.c src/util/virinitctl.c +src/util/viriommufd.c src/util/viriscsi.c src/util/virjson.c src/util/virlease.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4e57e4a8f6..a8eadbfb8a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2652,6 +2652,10 @@ virInhibitorRelease; virInitctlFifos; virInitctlSetRunLevel;
+# util/viriommufd.h +virIOMMUFDSetRLimitMode; +virIOMMUFDSupported; + # util/viriscsi.h virISCSIConnectionLogin; virISCSIConnectionLogout; diff --git a/src/util/meson.build b/src/util/meson.build index 4950a795cc..9fb0aa0fe7 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -46,6 +46,7 @@ util_sources = [ 'viridentity.c', 'virinhibitor.c', 'virinitctl.c', + 'viriommufd.c', 'viriscsi.c', 'virjson.c', 'virkeycode.c', diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c new file mode 100644 index 0000000000..0f87f95330 --- /dev/null +++ b/src/util/viriommufd.c @@ -0,0 +1,127 @@ +#include <config.h> + +#include "viriommufd.h" +#include "virlog.h" +#include "virerror.h" +#include "virfile.h" + +#ifdef __linux__ + +# include <sys/ioctl.h> +# include <linux/types.h> + +# ifdef HAVE_LINUX_IOMMUFD_H
Currently this will always be false, you need to add `linux/iommufd.h` into headers list in the meson.build file.
+# include <linux/iommufd.h> +# endif + +#define VIR_FROM_THIS VIR_FROM_NONE
Incorrect indentation. You probably don't have cppi installed, otherwise running our tests would complain about it.
+ +VIR_LOG_INIT("util.iommufd"); + +#ifndef IOMMU_OPTION
Same here, and the whole #ifndef block including #endif.
+ +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; + +# define IOMMUFD_TYPE (';') +# define IOMMUFD_CMD_OPTION 0x87 +# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +#endif + +/** + * virIOMMUFDSetRLimitMode: + * @fd: iommufd file descriptor + * @processAccounting: true for per-process, false for per-user + * + * Set RLIMIT_MEMLOCK accounting mode for the iommufd. + * + * Returns: 0 on success, -1 on error + */
I don't see this function used anywhere in this patch series. Later you implement support to open /dev/iommu, should it be called there?
+int +virIOMMUFDSetRLimitMode(int fd, bool processAccounting) +{ + struct iommu_option option = { + .size = sizeof(struct iommu_option), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .__reserved = 0, + .object_id = 0, + .val64 = processAccounting ? 1 : 0, + }; + + if (ioctl(fd, IOMMU_OPTION, &option) < 0) { + switch (errno) { + case ENOTTY: + VIR_WARN("IOMMU_OPTION ioctl not supported"); + return 0; + + case EOPNOTSUPP: + VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel"); + return 0; + + case EINVAL: + virReportSystemError(errno, "%s", + _("invalid iommufd option parameters"));
Wrong indentation.
+ return -1; + + case EPERM: + VIR_WARN("Permission denied for IOMMU_OPTION ioctl. " + "Per-user-based memory accounting to be used by default."); + return 0; + + default: + virReportSystemError(errno, "%s", + _("failed to set iommufd option"));
Wrong indentation. Pavel
On 1/15/2026 6:54 AM, Pavel Hrdina wrote:
On Tue, Jan 06, 2026 at 06:49:34PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen<nathanc@nvidia.com>
Integrate and use the IOMMU_OPTION_RLIMIT_MODE ioctl to set per-process memory accounting for iommufd. This prevents ENOMEM errors from the default per-user memory accounting when multiple VMs under the libvirt-qemu user have their pinned memory summed and checked against a per-process RLIMIT_MEMLOCK limit.
Signed-off-by: Nathan Chen<nathanc@nvidia.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 4 ++ src/util/meson.build | 1 + src/util/viriommufd.c | 127 +++++++++++++++++++++++++++++++++++++++ src/util/viriommufd.h | 27 +++++++++ 5 files changed, 160 insertions(+) create mode 100644 src/util/viriommufd.c create mode 100644 src/util/viriommufd.h
diff --git a/po/POTFILES b/po/POTFILES index f0aad35c8c..c78d2b8000 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -303,6 +303,7 @@ src/util/virhostuptime.c src/util/viridentity.c src/util/virinhibitor.c src/util/virinitctl.c +src/util/viriommufd.c src/util/viriscsi.c src/util/virjson.c src/util/virlease.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4e57e4a8f6..a8eadbfb8a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2652,6 +2652,10 @@ virInhibitorRelease; virInitctlFifos; virInitctlSetRunLevel;
+# util/viriommufd.h +virIOMMUFDSetRLimitMode; +virIOMMUFDSupported; + # util/viriscsi.h virISCSIConnectionLogin; virISCSIConnectionLogout; diff --git a/src/util/meson.build b/src/util/meson.build index 4950a795cc..9fb0aa0fe7 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -46,6 +46,7 @@ util_sources = [ 'viridentity.c', 'virinhibitor.c', 'virinitctl.c', + 'viriommufd.c', 'viriscsi.c', 'virjson.c', 'virkeycode.c', diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c new file mode 100644 index 0000000000..0f87f95330 --- /dev/null +++ b/src/util/viriommufd.c @@ -0,0 +1,127 @@ +#include <config.h> + +#include "viriommufd.h" +#include "virlog.h" +#include "virerror.h" +#include "virfile.h" + +#ifdef __linux__ + +# include <sys/ioctl.h> +# include <linux/types.h> + +# ifdef HAVE_LINUX_IOMMUFD_H Currently this will always be false, you need to add `linux/iommufd.h` into headers list in the meson.build file.
Ok, I will include this in meson.build.
+# include <linux/iommufd.h> +# endif + +#define VIR_FROM_THIS VIR_FROM_NONE Incorrect indentation. You probably don't have cppi installed, otherwise running our tests would complain about it.
+ +VIR_LOG_INIT("util.iommufd"); + +#ifndef IOMMU_OPTION Same here, and the whole #ifndef block including #endif.
I'll install cppi and correct this by adding spaces to be nested inside #ifdef __linux__, thank you for the pointer.
+ +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; + +# define IOMMUFD_TYPE (';') +# define IOMMUFD_CMD_OPTION 0x87 +# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +#endif + +/** + * virIOMMUFDSetRLimitMode: + * @fd: iommufd file descriptor + * @processAccounting: true for per-process, false for per-user + * + * Set RLIMIT_MEMLOCK accounting mode for the iommufd. + * + * Returns: 0 on success, -1 on error + */ I don't see this function used anywhere in this patch series. Later you implement support to open /dev/iommu, should it be called there?
Yes, the call to this function was removed during rebasing. I will add it back under [PATCH 5/7] qemu: open iommufd FD from libvirt backend.
+int +virIOMMUFDSetRLimitMode(int fd, bool processAccounting) +{ + struct iommu_option option = { + .size = sizeof(struct iommu_option), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .__reserved = 0, + .object_id = 0, + .val64 = processAccounting ? 1 : 0, + }; + + if (ioctl(fd, IOMMU_OPTION, &option) < 0) { + switch (errno) { + case ENOTTY: + VIR_WARN("IOMMU_OPTION ioctl not supported"); + return 0; + + case EOPNOTSUPP: + VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel"); + return 0; + + case EINVAL: + virReportSystemError(errno, "%s", + _("invalid iommufd option parameters")); Wrong indentation.
+ return -1; + + case EPERM: + VIR_WARN("Permission denied for IOMMU_OPTION ioctl. " + "Per-user-based memory accounting to be used by default."); + return 0; + + default: + virReportSystemError(errno, "%s", + _("failed to set iommufd option")); Wrong indentation.
I will indent it one more space so it starts after the opening parenthesis. Nathan
From: Nathan Chen <nathanc@nvidia.com> Open VFIO FDs from libvirt backend without exposing these FDs to XML users, i.e. one per iommufd hostdev for /dev/vfio/devices/vfioX, and pass the FD to qemu command line. Suggested-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 21 +++++++++++ src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++ src/util/virpci.c | 42 +++++++++++++++++++++ src/util/virpci.h | 2 + 5 files changed, 145 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a8eadbfb8a..0904265459 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3163,6 +3163,7 @@ virPCIDeviceGetStubDriverName; virPCIDeviceGetStubDriverType; virPCIDeviceGetUnbindFromStub; virPCIDeviceGetUsedBy; +virPCIDeviceGetVfioPath; virPCIDeviceGetVPD; virPCIDeviceHasPCIExpressLink; virPCIDeviceIsAssignable; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 98e4469c25..2a16f9df63 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4809,6 +4809,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, NULL) < 0) return NULL; + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev); + + if (hostdevPriv->vfioDeviceFd != -1) { + g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd); + if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0) + return NULL; + hostdevPriv->vfioDeviceFd = -1; + } + } + if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0) return NULL; @@ -5253,6 +5265,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd, if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) return -1; + if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdevPriv->vfioDeviceFd != -1) { + virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } + } + if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev))) return -1; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0e50cd1ccc..ab88a6bf62 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -103,6 +103,7 @@ #include "storage_source.h" #include "backup_conf.h" #include "storage_file_probe.h" +#include "virpci.h" #include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0) goto cleanup; + if (qemuProcessOpenVfioFds(vm) < 0) + goto cleanup; + if (!(cmd = qemuBuildCommandLine(vm, incoming ? "defer" : NULL, vmop, @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); virObjectUnlock(vm); } + +/** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition + * @vfioFd: returned file descriptor + * + * Opens the VFIO device file descriptor for a hostdev. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) +{ + g_autofree char *vfioPath = NULL; + int fd = -1; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("VFIO FD only supported for PCI hostdevs")); + return -1; + } + + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) + return -1; + + VIR_DEBUG("Opening VFIO device %s", vfioPath); + + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), + vfioPath); + } else { + virReportSystemError(errno, + _("cannot open VFIO device %1$s"), vfioPath); + } + return -1; + } + + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); + return fd; +} + +/** + * qemuProcessOpenVfioFds: + * @vm: domain object + * + * Opens all necessary VFIO file descriptors for the domain. + * + * Returns: 0 on success, -1 on failure + */ +int +qemuProcessOpenVfioFds(virDomainObj *vm) +{ + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; + } + } + + return 0; +} diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..886e2c55e6 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -3320,3 +3320,45 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) { g_free(address); } + +/** + * virPCIDeviceGetVfioPath: + * @addr: host device PCI address + * @vfioPath: returned VFIO device path + * + * Constructs the VFIO device path for a PCI hostdev. + * + * Returns: 0 on success, -1 on failure + */ +int +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, + char **vfioPath) +{ + g_autofree char *addrStr = NULL; + + *vfioPath = NULL; + addrStr = virPCIDeviceAddressAsString(addr); + + /* First try: Direct lookup in device's vfio-dev subdirectory */ + { + g_autofree char *sysfsPath = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + if (STRPREFIX(entry->d_name, "vfio")) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find VFIO device for PCI device %1$s"), + addrStr); + return -1; +} diff --git a/src/util/virpci.h b/src/util/virpci.h index fc538566e1..24ede10755 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev); void virPCIDeviceAddressFree(virPCIDeviceAddress *address); +int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree); -- 2.43.0
On Tue, Jan 06, 2026 at 06:49:35PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
Open VFIO FDs from libvirt backend without exposing these FDs to XML users, i.e. one per iommufd hostdev for /dev/vfio/devices/vfioX, and pass the FD to qemu command line.
Suggested-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 21 +++++++++++ src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++ src/util/virpci.c | 42 +++++++++++++++++++++ src/util/virpci.h | 2 + 5 files changed, 145 insertions(+)
[...]
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0e50cd1ccc..ab88a6bf62 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -103,6 +103,7 @@ #include "storage_source.h" #include "backup_conf.h" #include "storage_file_probe.h" +#include "virpci.h"
#include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0) goto cleanup;
+ if (qemuProcessOpenVfioFds(vm) < 0) + goto cleanup; +
This call should be probably moved to qemuProcessPrepareHost().
if (!(cmd = qemuBuildCommandLine(vm, incoming ? "defer" : NULL, vmop, @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); virObjectUnlock(vm); } + +/** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition + * @vfioFd: returned file descriptor + * + * Opens the VFIO device file descriptor for a hostdev. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) +{ + g_autofree char *vfioPath = NULL; + int fd = -1; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("VFIO FD only supported for PCI hostdevs")); + return -1; + } + + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) + return -1; + + VIR_DEBUG("Opening VFIO device %s", vfioPath); + + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), + vfioPath); + } else { + virReportSystemError(errno, + _("cannot open VFIO device %1$s"), vfioPath); + } + return -1; + } + + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); + return fd; +} + +/** + * qemuProcessOpenVfioFds: + * @vm: domain object + * + * Opens all necessary VFIO file descriptors for the domain. + * + * Returns: 0 on success, -1 on failure + */ +int +qemuProcessOpenVfioFds(virDomainObj *vm) +{ + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; + } + } + + return 0; +}
These two functions should be moved above qemuProcessPrepareHost(), qemuProcessOpenVfioFds() can be changed to static as it is used only in this file.
diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..886e2c55e6 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -3320,3 +3320,45 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) { g_free(address); } + +/** + * virPCIDeviceGetVfioPath: + * @addr: host device PCI address + * @vfioPath: returned VFIO device path + * + * Constructs the VFIO device path for a PCI hostdev. + * + * Returns: 0 on success, -1 on failure + */ +int +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, + char **vfioPath) +{ + g_autofree char *addrStr = NULL; + + *vfioPath = NULL; + addrStr = virPCIDeviceAddressAsString(addr); + + /* First try: Direct lookup in device's vfio-dev subdirectory */ + {
There is no need for the extra {} block and also you can remove "First try:" from the comment as there is no other try.
+ g_autofree char *sysfsPath = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + if (STRPREFIX(entry->d_name, "vfio")) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find VFIO device for PCI device %1$s"), + addrStr);
Wrong indentation. Pavel
+ return -1; +} diff --git a/src/util/virpci.h b/src/util/virpci.h index fc538566e1..24ede10755 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev);
void virPCIDeviceAddressFree(virPCIDeviceAddress *address);
+int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree); -- 2.43.0
On 1/15/2026 9:04 AM, Pavel Hrdina wrote:
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0e50cd1ccc..ab88a6bf62 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -103,6 +103,7 @@ #include "storage_source.h" #include "backup_conf.h" #include "storage_file_probe.h" +#include "virpci.h"
#include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0) goto cleanup;
+ if (qemuProcessOpenVfioFds(vm) < 0) + goto cleanup; + This call should be probably moved to qemuProcessPrepareHost().
Ok, I will move it there in the next revision.
if (!(cmd = qemuBuildCommandLine(vm, incoming ? "defer" : NULL, vmop, @@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); virObjectUnlock(vm); } + +/** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition + * @vfioFd: returned file descriptor + * + * Opens the VFIO device file descriptor for a hostdev. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) +{ + g_autofree char *vfioPath = NULL; + int fd = -1; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("VFIO FD only supported for PCI hostdevs")); + return -1; + } + + if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) + return -1; + + VIR_DEBUG("Opening VFIO device %s", vfioPath); + + if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), + vfioPath); + } else { + virReportSystemError(errno, + _("cannot open VFIO device %1$s"), vfioPath); + } + return -1; + } + + VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); + return fd; +} + +/** + * qemuProcessOpenVfioFds: + * @vm: domain object + * + * Opens all necessary VFIO file descriptors for the domain. + * + * Returns: 0 on success, -1 on failure + */ +int +qemuProcessOpenVfioFds(virDomainObj *vm) +{ + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; + qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; + } + } + + return 0; +}
These two functions should be moved above qemuProcessPrepareHost(), qemuProcessOpenVfioFds() can be changed to static as it is used only in this file.
Ok, I will move qemuProcessOpenVfioFds() to qemuProcessPrepareHost() and change it to static.
diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..886e2c55e6 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -3320,3 +3320,45 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) { g_free(address); } + +/** + * virPCIDeviceGetVfioPath: + * @addr: host device PCI address + * @vfioPath: returned VFIO device path + * + * Constructs the VFIO device path for a PCI hostdev. + * + * Returns: 0 on success, -1 on failure + */ +int +virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, + char **vfioPath) +{ + g_autofree char *addrStr = NULL; + + *vfioPath = NULL; + addrStr = virPCIDeviceAddressAsString(addr); + + /* First try: Direct lookup in device's vfio-dev subdirectory */ + { There is no need for the extra {} block and also you can remove "First try:" from the comment as there is no other try.
Ok, I will remove these.
+ g_autofree char *sysfsPath = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry = NULL; + + sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); + + if (virDirOpen(&dir, sysfsPath) == 1) { + while (virDirRead(dir, &entry, sysfsPath) > 0) { + if (STRPREFIX(entry->d_name, "vfio")) { + *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + return 0; + } + } + } + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find VFIO device for PCI device %1$s"), + addrStr); Wrong indentation. I will indent it by one space in the next revision, thanks.
Nathan
From: Nathan Chen <nathanc@nvidia.com> Open iommufd FD from libvirt backend without exposing these FDs to XML users, i.e. one per domain for /dev/iommu, and pass the FD to qemu command line. Suggested-by: Ján Tomko <jtomko@redhat.com> Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_command.c | 13 +++++++++++-- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_process.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 2a16f9df63..5916283651 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5348,9 +5348,13 @@ qemuBuildHostdevCommandLine(virCommand *cmd, static int qemuBuildIOMMUFDCommandLine(virCommand *cmd, - const virDomainDef *def) + const virDomainDef *def, + virDomainObj *vm) { size_t i; + qemuDomainObjPrivate *priv = vm->privateData; + g_autofree char *fdstr = g_strdup_printf("%d", priv->iommufd); + for (i = 0; i < def->nhostdevs; i++) { virDomainHostdevDef *hostdev = def->hostdevs[i]; @@ -5369,8 +5373,13 @@ qemuBuildIOMMUFDCommandLine(virCommand *cmd, if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) continue; + virCommandPassFD(cmd, priv->iommufd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + + priv->iommufd = -1; + if (qemuMonitorCreateObjectProps(&props, "iommufd", "iommufd0", + "S:fd", fdstr, NULL) < 0) return -1; @@ -10996,7 +11005,7 @@ qemuBuildCommandLine(virDomainObj *vm, if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) return NULL; - if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) + if (qemuBuildIOMMUFDCommandLine(cmd, def, vm) < 0) return NULL; if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 85eea1801f..c5e1cd5279 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2042,6 +2042,7 @@ qemuDomainObjPrivateAlloc(void *opaque) priv->blockjobs = virHashNew(virObjectUnref); priv->fds = virHashNew(g_object_unref); + priv->iommufd = -1; priv->pidMonitored = -1; /* agent commands block by default, user can choose different behavior */ diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index e91435c062..18ced7ebba 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -264,6 +264,8 @@ struct _qemuDomainObjPrivate { /* named file descriptor groups associated with the VM */ GHashTable *fds; + int iommufd; + char *memoryBackingDir; }; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index ab88a6bf62..0ef46e4880 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -104,6 +104,7 @@ #include "backup_conf.h" #include "storage_file_probe.h" #include "virpci.h" +#include "viriommufd.h" #include "logging/log_manager.h" #include "logging/log_protocol.h" @@ -10365,6 +10366,37 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, virObjectUnlock(vm); } +/** + * qemuProcessOpenIommuFd: + * @vm: domain object + * @iommuFd: returned file descriptor + * + * Opens /dev/iommu file descriptor for the VM. + * + * Returns: FD on success, -1 on failure + */ +static int +qemuProcessOpenIommuFd(virDomainObj *vm) +{ + int fd = -1; + + VIR_DEBUG("Opening IOMMU FD for domain %s", vm->def->name); + + if ((fd = open(VIR_IOMMU_DEV_PATH, O_RDWR | O_CLOEXEC)) < 0) { + if (errno == ENOENT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("IOMMU FD support requires /dev/iommu device")); + } else { + virReportSystemError(errno, "%s", + _("cannot open /dev/iommu")); + } + return -1; + } + + VIR_DEBUG("Opened IOMMU FD %d for domain %s", fd, vm->def->name); + return fd; +} + /** * qemuProcessOpenVfioDeviceFd: * @hostdev: host device definition @@ -10419,6 +10451,7 @@ qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) int qemuProcessOpenVfioFds(virDomainObj *vm) { + qemuDomainObjPrivate *priv = vm->privateData; size_t i; /* Check if we have any hostdevs that need VFIO FDs */ @@ -10430,10 +10463,16 @@ qemuProcessOpenVfioFds(virDomainObj *vm) hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + /* Open VFIO device FD */ hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); if (hostdevPriv->vfioDeviceFd == -1) return -1; + + /* Open IOMMU FD */ + priv->iommufd = qemuProcessOpenIommuFd(vm); + if (priv->iommufd == -1) + return -1; } } -- 2.43.0
From: Nathan Chen <nathanc@nvidia.com> When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7dadef0739..7190a4f80f 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,21 +479,23 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms; - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) - return 0; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; - if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) - return -1; + if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) + return -1; - if (path && - qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { - return -1; - } + if (path && + qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { + return -1; + } - if (virHostdevNeedsVFIO(dev) && - qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RW, false) < 0) { - return -1; + if (virHostdevNeedsVFIO(dev) && + qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } } return 0; diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index c689cc3e40..907b2773cf 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,15 +345,17 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL; - if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) - return -1; + if (hostdev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) + return -1; - if (path) - *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + if (path) + *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); - if (virHostdevNeedsVFIO(hostdev) && - (!hotplug || !qemuDomainNeedsVFIO(vm->def))) - *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + if (virHostdevNeedsVFIO(hostdev) && + (!hotplug || !qemuDomainNeedsVFIO(vm->def))) + *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + } return 0; } diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1; + + ret = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); + if (ret) + return ret; + + ret = AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, ptr); + if (ret) + return ret; } - ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); - VIR_FREE(vfioGroupDev); } else { ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 2f788b872a..fbe216637f 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -41,6 +41,7 @@ #include "virscsivhost.h" #include "virstring.h" #include "virutil.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -1282,14 +1283,32 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, + false, + &cbdata); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, - false, - &cbdata); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfiofdDev, false, &cbdata); + if (ret) + return ret; + + ret = virSecurityDACSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &cbdata); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACSetPCILabel, @@ -1443,13 +1462,33 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; - ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, vfioGroupDev, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfiofdDev, false); + if (ret) + return ret; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + VIR_IOMMU_DEV_PATH, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 2f3cc274a5..05086ad9e1 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -41,6 +41,7 @@ #include "virconf.h" #include "virtpm.h" #include "virstring.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -2256,14 +2257,32 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, + false, + &data); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, - false, - &data); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, false, &data); + if (ret) + return ret; + + ret = virSecuritySELinuxSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &data); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); } @@ -2491,12 +2510,30 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; - ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + if (!virIOMMUFDSupported()) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false, false); + if (ret) + return ret; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, VIR_IOMMU_DEV_PATH, false, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); } diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c index de0a826063..43046ab831 100644 --- a/src/security/virt-aa-helper.c +++ b/src/security/virt-aa-helper.c @@ -50,6 +50,7 @@ #include "virstring.h" #include "virgettext.h" #include "virhostdev.h" +#include "viriommufd.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -1114,8 +1115,9 @@ get_files(vahControl * ctl) virDeviceHostdevPCIDriverName driverName = dev->source.subsys.u.pci.driver.name; - if (driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || - driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) { + if ((driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || + driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) && + dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { needsVfio = true; } @@ -1348,6 +1350,7 @@ get_files(vahControl * ctl) virBufferAddLit(&buf, " \"/dev/vfio/vfio\" rw,\n"); virBufferAddLit(&buf, " \"/dev/vfio/[0-9]*\" rw,\n"); } + if (needsgl) { /* if using gl all sorts of further dri related paths will be needed */ virBufferAddLit(&buf, " # DRI/Mesa/(e)GL config and driver paths\n"); @@ -1385,9 +1388,18 @@ get_files(vahControl * ctl) } } - if (ctl->newfile && - vah_add_file(&buf, ctl->newfile, "rwk") != 0) { - return -1; + if (ctl->newfile) { + const char *perms = "rwk"; + + /* VFIO and iommufd devices need mmap permission */ + if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || + STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { + perms = "rwm"; + } + + if (vah_add_file(&buf, ctl->newfile, perms) != 0) { + return -1; + } } ctl->files = virBufferContentAndReset(&buf); @@ -1561,8 +1573,15 @@ main(int argc, char **argv) } } if (ctl->append && ctl->newfile) { - if (vah_add_file(&buf, ctl->newfile, "rwk") != 0) - goto cleanup; + const char *perms = "rwk"; + + if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || + STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { + perms = "rwm"; + } + + if (vah_add_file(&buf, ctl->newfile, perms) != 0) + return -1; } else { if (ctl->def->virtType == VIR_DOMAIN_VIRT_QEMU || ctl->def->virtType == VIR_DOMAIN_VIRT_KQEMU || -- 2.43.0
On Tue, Jan 06, 2026 at 06:49:37PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC
Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7dadef0739..7190a4f80f 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,21 +479,23 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms;
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) - return 0; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0;
I would change the code like this: if (dev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) return 0; ... That way you don't have to move the whole code into the if condition. This applies to changes in qemu_namespace.c as well.
- if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) - return -1; + if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) + return -1;
- if (path && - qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { - return -1; - } + if (path && + qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { + return -1; + }
- if (virHostdevNeedsVFIO(dev) && - qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RW, false) < 0) { - return -1; + if (virHostdevNeedsVFIO(dev) && + qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } }
return 0; diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index c689cc3e40..907b2773cf 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,15 +345,17 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL;
- if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) - return -1; + if (hostdev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) + return -1;
- if (path) - *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + if (path) + *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
- if (virHostdevNeedsVFIO(hostdev) && - (!hotplug || !qemuDomainNeedsVFIO(vm->def))) - *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + if (virHostdevNeedsVFIO(hostdev) && + (!hotplug || !qemuDomainNeedsVFIO(vm->def))) + *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + }
return 0; } diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1;
Move this check before we try to get vfio path as there is no need to construct the path if iommufd is not supported. We should also report error here, if virIOMMUFDSupported() fails it only sets errno. User goto done; instead of return -1; other we are going to leak ptr and pci.
+ + ret = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); + if (ret) + return ret;
if (ret < 0) goto end;
+ + ret = AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, ptr); + if (ret) + return ret;
Don't call return or goto end; as it is not required here and we would also leak pci. I would suggest to change: virPCIDevice *pci = virPCIDeviceNew(&pcisrc->addr); into g_autoptr(virPCIDevice) pci = virPCIDeviceNew(&pcisrc->addr); remove the virPCIDeviceFree(pci); within this switch case and there is no need to care about freeing pci.
} - ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); - VIR_FREE(vfioGroupDev); } else { ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 2f788b872a..fbe216637f 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -41,6 +41,7 @@ #include "virscsivhost.h" #include "virstring.h" #include "virutil.h" +#include "viriommufd.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
@@ -1282,14 +1283,32 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, + false, + &cbdata); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, - false, - &cbdata); + if (!virIOMMUFDSupported()) + return -1;
Same as in apparmor, move this check before virPCIDeviceGetVfioPath() and report error.
+ + ret = virSecurityDACSetHostdevLabelHelper(vfiofdDev, false, &cbdata); + if (ret) + return ret; + + ret = virSecurityDACSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &cbdata); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACSetPCILabel, @@ -1443,13 +1462,33 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1;
- ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, vfioGroupDev, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1;
Same here.
+ + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfiofdDev, false); + if (ret) + return ret; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + VIR_IOMMU_DEV_PATH, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 2f3cc274a5..05086ad9e1 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -41,6 +41,7 @@ #include "virconf.h" #include "virtpm.h" #include "virstring.h" +#include "viriommufd.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
@@ -2256,14 +2257,32 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, + false, + &data); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, - false, - &data); + if (!virIOMMUFDSupported()) + return -1;
Same here.
+ + ret = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, false, &data); + if (ret) + return ret; + + ret = virSecuritySELinuxSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &data); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); } @@ -2491,12 +2510,30 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + if (!virIOMMUFDSupported()) + return -1;
Same here.
+ + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false, false); + if (ret) + return ret; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, VIR_IOMMU_DEV_PATH, false, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); }
Pavel
On a Thursday in 2026, Pavel Hrdina via Devel wrote:
On Tue, Jan 06, 2026 at 06:49:37PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC
Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-)
diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1;
Move this check before we try to get vfio path as there is no need to construct the path if iommufd is not supported. We should also report error here, if virIOMMUFDSupported() fails it only sets errno.
I don't think we should get here at all without IOMMUFD being supported. Per Pavel's suggestion to patch 4/7, moving the qemuProcessOpenVfioFds call to qemuProcessPrepareHost would mean we hit that function before attempting to set security labels. Jano
User goto done; instead of return -1; other we are going to leak ptr and pci.
On 1/16/2026 8:32 AM, Ján Tomko wrote:
On a Thursday in 2026, Pavel Hrdina via Devel wrote:
On Tue, Jan 06, 2026 at 06:49:37PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen <nathanc@nvidia.com>
When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC
Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-)
diff --git a/src/security/security_apparmor.c b/src/security/ security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done;
source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev- + + if (!virIOMMUFDSupported()) + return -1;
Move this check before we try to get vfio path as there is no need to construct the path if iommufd is not supported. We should also report error here, if virIOMMUFDSupported() fails it only sets errno.
I don't think we should get here at all without IOMMUFD being supported.
Per Pavel's suggestion to patch 4/7, moving the qemuProcessOpenVfioFds call to qemuProcessPrepareHost would mean we hit that function before attempting to set security labels.
I think we can remove virIOMMUFDSupported() altogether as it is only used in the security labeling changes. As you mention, if qemuProcessOpenVfioFds() gets called in qemuProcessPrepareHost(), it will call qemuProcessOpenIommuFd() which returns -1 if the open() call to /dev/iommu fails. Then we wouldn't reach the seclabel lines here. Nathan
On 1/15/2026 10:09 AM, Pavel Hrdina wrote:
On Tue, Jan 06, 2026 at 06:49:37PM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen<nathanc@nvidia.com>
When launching a qemu VM with the iommufd feature enabled for VFIO hostdevs: - Do not allow cgroup, namespace, and seclabel access to VFIO paths (/dev/vfio/vfio and /dev/vfio/<iommugroup>) - Allow access to iommufd paths (/dev/iommu and /dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC
Signed-off-by: Nathan Chen<nathanc@nvidia.com> --- src/qemu/qemu_cgroup.c | 26 +++++++------- src/qemu/qemu_namespace.c | 16 +++++---- src/security/security_apparmor.c | 32 +++++++++++++---- src/security/security_dac.c | 59 ++++++++++++++++++++++++++------ src/security/security_selinux.c | 57 ++++++++++++++++++++++++------ src/security/virt-aa-helper.c | 33 ++++++++++++++---- 6 files changed, 170 insertions(+), 53 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7dadef0739..7190a4f80f 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,21 +479,23 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms;
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) - return 0; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; I would change the code like this:
if (dev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) return 0;
...
That way you don't have to move the whole code into the if condition.
This applies to changes in qemu_namespace.c as well.
Got it, I will do this in the next revision.
- if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) - return -1; + if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) + return -1;
- if (path && - qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { - return -1; - } + if (path && + qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { + return -1; + }
- if (virHostdevNeedsVFIO(dev) && - qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RW, false) < 0) { - return -1; + if (virHostdevNeedsVFIO(dev) && + qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } }
return 0; diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index c689cc3e40..907b2773cf 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,15 +345,17 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL;
- if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) - return -1; + if (hostdev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) + return -1;
- if (path) - *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + if (path) + *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
- if (virHostdevNeedsVFIO(hostdev) && - (!hotplug || !qemuDomainNeedsVFIO(vm->def))) - *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + if (virHostdevNeedsVFIO(hostdev) && + (!hotplug || !qemuDomainNeedsVFIO(vm->def))) + *paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_VFIO)); + }
return 0; } diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..362ca09562 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -848,14 +848,32 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, goto done;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); - - if (!vfioGroupDev) { - virPCIDeviceFree(pci); - goto done; + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + + if (!vfioGroupDev) { + virPCIDeviceFree(pci); + goto done; + } + ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); + VIR_FREE(vfioGroupDev); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1;
Move this check before we try to get vfio path as there is no need to construct the path if iommufd is not supported. We should also report error here, if virIOMMUFDSupported() fails it only sets errno.
User goto done; instead of return -1; other we are going to leak ptr and pci.
I plan to remove virIOMMUFDSupported() entirely as discussed in our thread with Jano. For the conditional with virPCIDeviceGetVfioPath(), I will change it to goto done; instead of return -1;
+ + ret = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); + if (ret) + return ret; if (ret < 0) goto end;
I will change this to goto done;
+ + ret = AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, ptr); + if (ret) + return ret; Don't call return or goto end; as it is not required here and we would also leak pci.
Ok, I will omit this check.
I would suggest to change:
virPCIDevice *pci = virPCIDeviceNew(&pcisrc->addr);
into
g_autoptr(virPCIDevice) pci = virPCIDeviceNew(&pcisrc->addr);
remove the virPCIDeviceFree(pci); within this switch case and there is no need to care about freeing pci.
Ok, I will implement this to match security_dac.c and security_selinux.c.
} - ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); - VIR_FREE(vfioGroupDev); } else { ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 2f788b872a..fbe216637f 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -41,6 +41,7 @@ #include "virscsivhost.h" #include "virstring.h" #include "virutil.h" +#include "viriommufd.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
@@ -1282,14 +1283,32 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, + false, + &cbdata); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, - false, - &cbdata); + if (!virIOMMUFDSupported()) + return -1;
Same as in apparmor, move this check before virPCIDeviceGetVfioPath() and report error.
+ + ret = virSecurityDACSetHostdevLabelHelper(vfiofdDev, false, &cbdata); + if (ret) + return ret; + + ret = virSecurityDACSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &cbdata); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACSetPCILabel, @@ -1443,13 +1462,33 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1;
- ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, vfioGroupDev, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1; + + if (!virIOMMUFDSupported()) + return -1; Same here.
+ + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfiofdDev, false); + if (ret) + return ret; + + ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + VIR_IOMMU_DEV_PATH, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 2f3cc274a5..05086ad9e1 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -41,6 +41,7 @@ #include "virconf.h" #include "virtpm.h" #include "virstring.h" +#include "viriommufd.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
@@ -2256,14 +2257,32 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, + false, + &data); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, - false, - &data); + if (!virIOMMUFDSupported()) + return -1; Same here.
+ + ret = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, false, &data); + if (ret) + return ret; + + ret = virSecuritySELinuxSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &data); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); } @@ -2491,12 +2510,30 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
- if (!vfioGroupDev) - return -1; + if (!vfioGroupDev) + return -1; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + } else { + g_autofree char *vfiofdDev = NULL; + + if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) + return -1;
- ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); + if (!virIOMMUFDSupported()) + return -1; Same here.
+ + ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false, false); + if (ret) + return ret; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, VIR_IOMMU_DEV_PATH, false, false); + if (ret) + return ret; + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); } Pavel
Thanks, Nathan
From: Nathan Chen <nathanc@nvidia.com> Provide sample XML and CLI args for the iommufd XML schema for pc, q35, and virt machine types. Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- .../iommufd-q35.x86_64-latest.args | 41 +++++++++++++ .../iommufd-q35.x86_64-latest.xml | 60 +++++++++++++++++++ tests/qemuxmlconfdata/iommufd-q35.xml | 38 ++++++++++++ ...fd-virt-pci-bus-single.aarch64-latest.args | 33 ++++++++++ ...ufd-virt-pci-bus-single.aarch64-latest.xml | 34 +++++++++++ .../iommufd-virt-pci-bus-single.xml | 22 +++++++ .../iommufd-virt.aarch64-latest.args | 37 ++++++++++++ .../iommufd-virt.aarch64-latest.xml | 56 +++++++++++++++++ tests/qemuxmlconfdata/iommufd-virt.xml | 29 +++++++++ .../iommufd.x86_64-latest.args | 35 +++++++++++ .../qemuxmlconfdata/iommufd.x86_64-latest.xml | 38 ++++++++++++ tests/qemuxmlconfdata/iommufd.xml | 30 ++++++++++ tests/qemuxmlconftest.c | 34 +++++++++++ 13 files changed, 487 insertions(+) create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-virt.xml create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd.xml diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args new file mode 100644 index 0000000000..7d819e141b --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args @@ -0,0 +1,41 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-q35-test \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=q35-test,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-q35-test/master-key.aes"}' \ +-machine q35,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ +-accel tcg \ +-cpu qemu64 \ +-m size=2097152k \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-uuid 11dbdcdd-4c3b-482b-8903-9bdb8c0a2774 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"}' \ +-device '{"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"}' \ +-device '{"driver":"qemu-xhci","id":"usb","bus":"pci.1","addr":"0x0"}' \ +-blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","read-only":false}' \ +-device '{"driver":"ide-hd","bus":"ide.0","drive":"libvirt-1-storage","id":"sata0-0-0","bootindex":1}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-device '{"driver":"qxl-vga","id":"video0","max_outputs":1,"ram_size":67108864,"vram_size":33554432,"vram64_size_mb":0,"vgamem_mb":8,"bus":"pcie.0","addr":"0x1"}' \ +-global ICH9-LPC.noreboot=off \ +-watchdog-action reset \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x3"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml new file mode 100644 index 0000000000..bb76252b61 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml @@ -0,0 +1,60 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <driver name='qemu' type='raw'/> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x10'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x11'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/> + </controller> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <controller type='usb' index='0' model='qemu-xhci'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <audio id='1' type='none'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1' primary='yes'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </video> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <watchdog model='itco' action='reset'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-q35.xml b/tests/qemuxmlconfdata/iommufd-q35.xml new file mode 100644 index 0000000000..f3c2269fb1 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <controller type='sata' index='0'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1'/> + </video> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args new file mode 100644 index 0000000000..dbfd395168 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args @@ -0,0 +1,33 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-aarch64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine virt,usb=off,gic-version=2,dump-guest-core=off,memory-backend=mach-virt.ram,acpi=off \ +-accel tcg \ +-cpu cortex-a15 \ +-m size=1048576k \ +-object '{"qom-type":"memory-backend-ram","id":"mach-virt.ram","size":1073741824}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid 6ba7b810-9dad-11d1-80b4-00c04fd430c8 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x1"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml new file mode 100644 index 0000000000..97b6e1e1c7 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml @@ -0,0 +1,34 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <gic version='2'/> + </features> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>cortex-a15</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml new file mode 100644 index 0000000000..c0b9d643b4 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml @@ -0,0 +1,22 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + </os> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args new file mode 100644 index 0000000000..d5713ff826 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args @@ -0,0 +1,37 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-aarch64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine virt,usb=off,gic-version=2,dump-guest-core=off,memory-backend=mach-virt.ram,acpi=off \ +-accel tcg \ +-cpu cortex-a15 \ +-m size=1048576k \ +-object '{"qom-type":"memory-backend-ram","id":"mach-virt.ram","size":1073741824}' \ +-overcommit mem-lock=off \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid 6ba7b810-9dad-11d1-80b4-00c04fd430c8 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"pcie-root-port","port":8,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x1"}' \ +-device '{"driver":"pcie-root-port","port":9,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x1.0x1"}' \ +-device '{"driver":"pcie-root-port","port":10,"chassis":3,"id":"pci.3","bus":"pcie.0","addr":"0x1.0x2"}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pci.1","addr":"0x0"}' \ +-device '{"driver":"vfio-pci","host":"0000:07:12.5","id":"hostdev1","iommufd":"iommufd0","fd":"0","bus":"pci.2","addr":"0x0"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml new file mode 100644 index 0000000000..6b9fe543b3 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <gic version='2'/> + </features> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>cortex-a15</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x8'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x9'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='3' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='3' port='0xa'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x07' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-virt.xml b/tests/qemuxmlconfdata/iommufd-virt.xml new file mode 100644 index 0000000000..a75c1cc13b --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-virt.xml @@ -0,0 +1,29 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>6ba7b810-9dad-11d1-80b4-00c04fd430c8</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='aarch64' machine='virt'>hvm</type> + </os> + <devices> + <emulator>/usr/bin/qemu-system-aarch64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x07' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> + </hostdev> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd.x86_64-latest.args new file mode 100644 index 0000000000..3130ba2e3a --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.x86_64-latest.args @@ -0,0 +1,35 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-foo \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-foo/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-foo/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-foo/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=foo,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-foo/master-key.aes"}' \ +-machine pc,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ +-accel tcg \ +-cpu qemu64 \ +-m size=2097152k \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-uuid 3c7c30b5-7866-4b05-8a29-efebccba52a0 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ +-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pci.0","addr":"0x3"}' \ +-device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x2"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml new file mode 100644 index 0000000000..2e8951aaf6 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.x86_64-latest.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>3c7c30b5-7866-4b05-8a29-efebccba52a0</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='pci' index='0' model='pci-root'/> + <controller type='usb' index='0' model='piix3-uhci'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <audio id='1' type='none'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd.xml b/tests/qemuxmlconfdata/iommufd.xml new file mode 100644 index 0000000000..eb278414d2 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd.xml @@ -0,0 +1,30 @@ +<domain type='qemu'> + <name>foo</name> + <uuid>3c7c30b5-7866-4b05-8a29-efebccba52a0</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='pci' index='0' model='pci-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <driver iommufd='yes'/> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <controller type='usb' index='0'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxmlconftest.c b/tests/qemuxmlconftest.c index 5fd538d26a..6ac2cda5c3 100644 --- a/tests/qemuxmlconftest.c +++ b/tests/qemuxmlconftest.c @@ -351,6 +351,33 @@ fakeNetworkPortGetXMLDesc(virNetworkPortPtr port, } +static void +testSetupHostdevPrivateData(virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + + qemuDomainHostdevPrivate *priv; + + if (!hostdev->privateData) { + hostdev->privateData = qemuDomainHostdevPrivateNew(); + } + + priv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); + /* Use a placeholder FD value for tests */ + priv->vfioDeviceFd = 0; + } + } +} + + static virNetworkDriver fakeNetworkDriver = { .networkLookupByName = fakeNetworkLookupByName, .networkGetXMLDesc = fakeNetworkGetXMLDesc, @@ -404,6 +431,8 @@ testCompareXMLToArgvCreateArgs(virQEMUDriver *drv, if (testQemuPrepareHostBackendChardevOne(NULL, priv->monConfig, vm) < 0) return NULL; + testSetupHostdevPrivateData(vm->def); + for (i = 0; i < vm->def->ndisks; i++) { virDomainDiskDef *disk = vm->def->disks[i]; virStorageSource *src; @@ -3050,6 +3079,11 @@ mymain(void) DO_TEST_CAPS_LATEST_PARSE_ERROR("virtio-iommu-dma-translation"); DO_TEST_CAPS_LATEST("acpi-generic-initiator"); + DO_TEST_CAPS_LATEST("iommufd"); + DO_TEST_CAPS_LATEST("iommufd-q35"); + DO_TEST_CAPS_ARCH_LATEST("iommufd-virt", "aarch64"); + DO_TEST_CAPS_ARCH_LATEST("iommufd-virt-pci-bus-single", "aarch64"); + DO_TEST_CAPS_LATEST("cpu-hotplug-startup"); DO_TEST_CAPS_ARCH_LATEST_PARSE_ERROR("cpu-hotplug-granularity", "ppc64"); -- 2.43.0
participants (3)
-
Ján Tomko -
Nathan Chen -
Pavel Hrdina