[libvirt PATCH 00/11] Introduce support to pass iommu FD to libvirt
Pavel Hrdina (11): conf: Move and rename virStorageSourceFDTuple object conf: Refactor virHostdevIsPCIDevice hypervisor: Fix virHostdevNeedsVFIO detection qemu: Expand call to qemuDomainNeedsVFIO qemu: Update qemuDomainNeedsVFIO to ignore PCI hostdev with IOMMUFD src: Use virHostdevIsPCIDeviceWith* to check for IOMMUFD conf: Introduce domain iommufd element qemu: Implement iommufd conf: Add iommufd fdgroup support qemu: Implement iommufd fdgroup tests: Add iommufd fdgroup test docs/formatdomain.rst | 27 +++++ src/conf/domain_conf.c | 114 +++++++++++++++++- src/conf/domain_conf.h | 9 ++ src/conf/domain_validate.c | 16 +++ src/conf/meson.build | 1 + src/conf/schemas/domaincommon.rng | 15 +++ src/conf/storage_source_conf.c | 42 ------- src/conf/storage_source_conf.h | 24 +--- src/conf/virdomainfd.c | 52 ++++++++ src/conf/virdomainfd.h | 27 +++++ src/hypervisor/virhostdev.c | 2 +- src/libvirt_private.syms | 7 +- src/qemu/qemu_backup.c | 2 +- src/qemu/qemu_cgroup.c | 2 +- src/qemu/qemu_command.c | 6 +- src/qemu/qemu_domain.c | 21 +++- src/qemu/qemu_domain.h | 3 +- src/qemu/qemu_driver.c | 6 +- src/qemu/qemu_hotplug.c | 6 +- src/qemu/qemu_namespace.c | 2 +- src/qemu/qemu_process.c | 55 ++++++++- src/qemu/qemu_processpriv.h | 2 + src/qemu/qemu_validate.c | 2 +- src/security/security_apparmor.c | 2 +- src/security/security_dac.c | 4 +- src/security/security_selinux.c | 4 +- src/security/virt-aa-helper.c | 2 +- tests/genericxml2xmlindata/iommufd.xml | 18 +++ tests/genericxml2xmltest.c | 2 + .../iommufd-q35-fd.x86_64-latest.args | 41 +++++++ .../iommufd-q35-fd.x86_64-latest.xml | 60 +++++++++ tests/qemuxmlconfdata/iommufd-q35-fd.xml | 38 ++++++ tests/qemuxmlconftest.c | 9 +- tests/testutilsqemu.c | 2 +- 34 files changed, 522 insertions(+), 103 deletions(-) create mode 100644 src/conf/virdomainfd.c create mode 100644 src/conf/virdomainfd.h create mode 100644 tests/genericxml2xmlindata/iommufd.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.xml -- 2.53.0
From: Pavel Hrdina <phrdina@redhat.com> Associating FD can be used by other parts of VM so rename it to generic virDomainFDTuple. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/meson.build | 1 + src/conf/storage_source_conf.c | 42 --------------------------- src/conf/storage_source_conf.h | 24 ++-------------- src/conf/virdomainfd.c | 52 ++++++++++++++++++++++++++++++++++ src/conf/virdomainfd.h | 27 ++++++++++++++++++ src/libvirt_private.syms | 5 +++- src/qemu/qemu_backup.c | 2 +- src/qemu/qemu_domain.c | 2 +- src/qemu/qemu_driver.c | 6 ++-- tests/testutilsqemu.c | 2 +- 10 files changed, 92 insertions(+), 71 deletions(-) create mode 100644 src/conf/virdomainfd.c create mode 100644 src/conf/virdomainfd.h diff --git a/src/conf/meson.build b/src/conf/meson.build index 5116c23fe3..6f95b23cce 100644 --- a/src/conf/meson.build +++ b/src/conf/meson.build @@ -20,6 +20,7 @@ domain_conf_sources = [ 'numa_conf.c', 'snapshot_conf.c', 'virdomaincheckpointobjlist.c', + 'virdomainfd.c', 'virdomainjob.c', 'virdomainmomentobjlist.c', 'virdomainobjlist.c', diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c index e5f20fba80..010b44ccb0 100644 --- a/src/conf/storage_source_conf.c +++ b/src/conf/storage_source_conf.c @@ -1417,48 +1417,6 @@ virStorageSourceInitiatorClear(virStorageSourceInitiatorDef *initiator) VIR_FREE(initiator->iqn); } -G_DEFINE_TYPE(virStorageSourceFDTuple, vir_storage_source_fd_tuple, G_TYPE_OBJECT); - -static void -vir_storage_source_fd_tuple_init(virStorageSourceFDTuple *fdt G_GNUC_UNUSED) -{ -} - - -static void -virStorageSourceFDTupleFinalize(GObject *object) -{ - virStorageSourceFDTuple *fdt = VIR_STORAGE_SOURCE_FD_TUPLE(object); - size_t i; - - if (!fdt) - return; - - for (i = 0; i < fdt->nfds; i++) - VIR_FORCE_CLOSE(fdt->fds[i]); - - g_free(fdt->fds); - g_free(fdt->testfds); - g_free(fdt->selinuxLabel); - G_OBJECT_CLASS(vir_storage_source_fd_tuple_parent_class)->finalize(object); -} - - -static void -vir_storage_source_fd_tuple_class_init(virStorageSourceFDTupleClass *klass) -{ - GObjectClass *obj = G_OBJECT_CLASS(klass); - - obj->finalize = virStorageSourceFDTupleFinalize; -} - - -virStorageSourceFDTuple * -virStorageSourceFDTupleNew(void) -{ - return g_object_new(vir_storage_source_fd_tuple_get_type(), NULL); -} - /** * virStorageSourceNetworkProtocolPathSplit: diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h index d3a9b0e7a2..5ddcebb282 100644 --- a/src/conf/storage_source_conf.h +++ b/src/conf/storage_source_conf.h @@ -24,6 +24,7 @@ #include "storage_encryption_conf.h" #include "virbitmap.h" #include "virconftypes.h" +#include "virdomainfd.h" #include "virenum.h" #include "virobject.h" #include "virpci.h" @@ -269,27 +270,6 @@ struct _virStorageSourceSlice { void virStorageSourceSliceFree(virStorageSourceSlice *slice); -struct _virStorageSourceFDTuple { - GObject parent; - int *fds; - size_t nfds; - int *testfds; /* populated by tests to ensure stable FDs */ - - bool writable; - bool tryRestoreLabel; - - /* connection this FD tuple is associated with for auto-closing */ - virConnect *conn; - - /* original selinux label when we relabel the image */ - char *selinuxLabel; -}; -G_DECLARE_FINAL_TYPE(virStorageSourceFDTuple, vir_storage_source_fd_tuple, VIR, STORAGE_SOURCE_FD_TUPLE, GObject); - -virStorageSourceFDTuple * -virStorageSourceFDTupleNew(void); - - typedef struct _virStorageSource virStorageSource; /* Stores information related to a host resource. In the case of backing @@ -442,7 +422,7 @@ struct _virStorageSource { * one event for it */ bool thresholdEventWithIndex; - virStorageSourceFDTuple *fdtuple; + virDomainFDTuple *fdtuple; /* Setting 'seclabelSkipRemember' to true will cause the security driver to * not remember the security label even if it otherwise were to be diff --git a/src/conf/virdomainfd.c b/src/conf/virdomainfd.c new file mode 100644 index 0000000000..13c3161e6a --- /dev/null +++ b/src/conf/virdomainfd.c @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#include <config.h> + +#include "virdomainfd.h" + +#include "virfile.h" + +G_DEFINE_TYPE(virDomainFDTuple, vir_domain_fd_tuple, G_TYPE_OBJECT); + + +static void +vir_domain_fd_tuple_init(virDomainFDTuple *fdt G_GNUC_UNUSED) +{ +} + + +static void +virDomainFDTupleFinalize(GObject *object) +{ + virDomainFDTuple *fdt = VIR_DOMAIN_FD_TUPLE(object); + size_t i; + + if (!fdt) + return; + + for (i = 0; i < fdt->nfds; i++) + VIR_FORCE_CLOSE(fdt->fds[i]); + + g_free(fdt->fds); + g_free(fdt->testfds); + g_free(fdt->selinuxLabel); + G_OBJECT_CLASS(vir_domain_fd_tuple_parent_class)->finalize(object); +} + + +static void +vir_domain_fd_tuple_class_init(virDomainFDTupleClass *klass) +{ + GObjectClass *obj = G_OBJECT_CLASS(klass); + + obj->finalize = virDomainFDTupleFinalize; +} + + +virDomainFDTuple * +virDomainFDTupleNew(void) +{ + return g_object_new(vir_domain_fd_tuple_get_type(), NULL); +} diff --git a/src/conf/virdomainfd.h b/src/conf/virdomainfd.h new file mode 100644 index 0000000000..0c0d475ed6 --- /dev/null +++ b/src/conf/virdomainfd.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#pragma once + +#include "internal.h" + +struct _virDomainFDTuple { + GObject parent; + int *fds; + size_t nfds; + int *testfds; /* populated by tests to ensure stable FDs */ + + bool writable; + bool tryRestoreLabel; + + /* connection this FD tuple is associated with for auto-closing */ + virConnect *conn; + + /* original selinux label when we relabel the image */ + char *selinuxLabel; +}; +G_DECLARE_FINAL_TYPE(virDomainFDTuple, vir_domain_fd_tuple, VIR, DOMAIN_FD_TUPLE, GObject); + +virDomainFDTuple * +virDomainFDTupleNew(void); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f5acf46bc0..cd028c488a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1180,7 +1180,6 @@ virStorageSourceChainHasManagedPR; virStorageSourceChainHasNVMe; virStorageSourceClear; virStorageSourceCopy; -virStorageSourceFDTupleNew; virStorageSourceGetActualType; virStorageSourceGetSecurityLabelDef; virStorageSourceHasBacking; @@ -1233,6 +1232,10 @@ virDomainCheckpointUpdateRelations; virDomainListCheckpoints; +# conf/virdomainfd.h +virDomainFDTupleNew; + + #conf/virdomainjob.h virDomainAgentJobTypeToString; virDomainAsyncJobTypeFromString; diff --git a/src/qemu/qemu_backup.c b/src/qemu/qemu_backup.c index 44514d08fc..d380dd3a63 100644 --- a/src/qemu/qemu_backup.c +++ b/src/qemu/qemu_backup.c @@ -876,7 +876,7 @@ qemuBackupBegin(virDomainObj *vm, priv->backup = g_steal_pointer(&def); if (pull && priv->backup->server->fdgroup) { - virStorageSourceFDTuple *fdt = NULL; + virDomainFDTuple *fdt = NULL; VIR_AUTOCLOSE fdcopy = -1; if (!(fdt = virHashLookup(priv->fds, priv->backup->server->fdgroup))) { diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 6fdca4be09..90d0f02612 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9835,7 +9835,7 @@ qemuDomainPrepareStorageSourceFDs(virStorageSource *src, { qemuDomainStorageSourcePrivate *srcpriv = NULL; virStorageType actualType = virStorageSourceGetActualType(src); - virStorageSourceFDTuple *fdt = NULL; + virDomainFDTuple *fdt = NULL; size_t i; if (actualType != VIR_STORAGE_TYPE_FILE && diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 8b148b33b4..5ef3ec649f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -20269,7 +20269,7 @@ qemuDomainFDHashCloseConnect(virDomainObj *vm, virConnectPtr conn) { qemuDomainObjPrivate *priv = QEMU_DOMAIN_PRIVATE(vm); - virStorageSourceFDTuple *data; + virDomainFDTuple *data; GHashTableIter htitr; if (!priv->fds) @@ -20293,7 +20293,7 @@ qemuDomainFDAssociate(virDomainPtr domain, { virDomainObj *vm = NULL; qemuDomainObjPrivate *priv; - g_autoptr(virStorageSourceFDTuple) new = NULL; + g_autoptr(virDomainFDTuple) new = NULL; size_t i; int ret = -1; @@ -20311,7 +20311,7 @@ qemuDomainFDAssociate(virDomainPtr domain, priv = vm->privateData; - new = virStorageSourceFDTupleNew(); + new = virDomainFDTupleNew(); new->nfds = nfds; new->fds = g_new0(int, new->nfds); for (i = 0; i < new->nfds; i++) { diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index e00e52d2a8..e9bdbdbbe7 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -712,7 +712,7 @@ testQemuInfoSetArgs(testQemuInfo *info, break; case ARG_FD_GROUP: { - virStorageSourceFDTuple *new = virStorageSourceFDTupleNew(); + virDomainFDTuple *new = virDomainFDTupleNew(); const char *fdname = va_arg(argptr, char *); VIR_AUTOCLOSE fakefd = open("/dev/zero", O_RDWR); bool writable = va_arg(argptr, int); -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:47 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Associating FD can be used by other parts of VM so rename it to generic virDomainFDTuple.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/meson.build | 1 + src/conf/storage_source_conf.c | 42 --------------------------- src/conf/storage_source_conf.h | 24 ++-------------- src/conf/virdomainfd.c | 52 ++++++++++++++++++++++++++++++++++ src/conf/virdomainfd.h | 27 ++++++++++++++++++ src/libvirt_private.syms | 5 +++- src/qemu/qemu_backup.c | 2 +- src/qemu/qemu_domain.c | 2 +- src/qemu/qemu_driver.c | 6 ++-- tests/testutilsqemu.c | 2 +- 10 files changed, 92 insertions(+), 71 deletions(-) create mode 100644 src/conf/virdomainfd.c create mode 100644 src/conf/virdomainfd.h
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> Future patches will need to check if the host device uses IOMMUFD or not but we also need to keep a function that will check only if it is PCI device. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index b1a73afef2..01e2bd631a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -33026,6 +33026,33 @@ virHostdevIsMdevDevice(const virDomainHostdevDef *hostdev) } +static bool +virHostdevPCIDevHasIOMMUFD(const virDomainHostdevDef *hostdev) +{ + return hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES; +} + + +static bool +virHostdevIsPCIDeviceImpl(const virDomainHostdevDef *hostdev, + virTristateBool iommufd) +{ + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + return false; + + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + return false; + + if (iommufd != VIR_TRISTATE_BOOL_ABSENT) { + bool hasIOMMUFD = iommufd == VIR_TRISTATE_BOOL_YES; + return hasIOMMUFD == virHostdevPCIDevHasIOMMUFD(hostdev); + } + + return true; +} + + /** * virHostdevIsPCIDevice: * @hostdev: host device to check @@ -33035,8 +33062,7 @@ virHostdevIsMdevDevice(const virDomainHostdevDef *hostdev) bool virHostdevIsPCIDevice(const virDomainHostdevDef *hostdev) { - return hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && - hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; + return virHostdevIsPCIDeviceImpl(hostdev, VIR_TRISTATE_BOOL_ABSENT); } @@ -33049,9 +33075,7 @@ virHostdevIsPCIDevice(const virDomainHostdevDef *hostdev) bool virHostdevIsPCIDeviceWithIOMMUFD(const virDomainHostdevDef *hostdev) { - return virHostdevIsPCIDevice(hostdev) && - hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && - hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES; + return virHostdevIsPCIDeviceImpl(hostdev, VIR_TRISTATE_BOOL_YES); } -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:48 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Future patches will need to check if the host device uses IOMMUFD or not but we also need to keep a function that will check only if it is PCI device.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index b1a73afef2..01e2bd631a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -33026,6 +33026,33 @@ virHostdevIsMdevDevice(const virDomainHostdevDef *hostdev) }
+static bool +virHostdevPCIDevHasIOMMUFD(const virDomainHostdevDef *hostdev) +{ + return hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && + hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES; +} + + +static bool +virHostdevIsPCIDeviceImpl(const virDomainHostdevDef *hostdev, + virTristateBool iommufd) +{ + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + return false; + + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + return false; + + if (iommufd != VIR_TRISTATE_BOOL_ABSENT) { + bool hasIOMMUFD = iommufd == VIR_TRISTATE_BOOL_YES; + return hasIOMMUFD == virHostdevPCIDevHasIOMMUFD(hostdev);
I guess using 'virTristateBoolToBool' wouldn't make this any better.
+ } + + return true; +} + + /** * virHostdevIsPCIDevice: * @hostdev: host device to check
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> Function virHostdevNeedsVFIO is used only in QEMU to figure out if the host device needs access to /dev/vfio/vfio, for PCI host devices that is true only if libvirt is not using IOMMUFD. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 13 +++++++++++++ src/conf/domain_conf.h | 3 +++ src/hypervisor/virhostdev.c | 2 +- src/libvirt_private.syms | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 01e2bd631a..2d6ae64210 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -33079,6 +33079,19 @@ virHostdevIsPCIDeviceWithIOMMUFD(const virDomainHostdevDef *hostdev) } +/** + * virHostdevIsPCIDeviceWithIOMMUFD: + * @hostdev: host device to check + * + * Returns true if @hostdev is a PCI device with IOMMUFD disabled, false otherwise. + */ +bool +virHostdevIsPCIDeviceWithoutIOMMUFD(const virDomainHostdevDef *hostdev) +{ + return virHostdevIsPCIDeviceImpl(hostdev, VIR_TRISTATE_BOOL_NO); +} + + static void virDomainObjGetMessagesIOErrorsSrc(virStorageSource *src, const char *diskdst, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e63230beec..018daef4c7 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -4735,6 +4735,9 @@ virHostdevIsPCIDevice(const virDomainHostdevDef *hostdev) bool virHostdevIsPCIDeviceWithIOMMUFD(const virDomainHostdevDef *hostdev) ATTRIBUTE_NONNULL(1); +bool +virHostdevIsPCIDeviceWithoutIOMMUFD(const virDomainHostdevDef *hostdev) + ATTRIBUTE_NONNULL(1); void virDomainObjGetMessagesIOErrorsChain(virStorageSource *src, diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c index 19907c76ba..0d5e582c08 100644 --- a/src/hypervisor/virhostdev.c +++ b/src/hypervisor/virhostdev.c @@ -2536,7 +2536,7 @@ virHostdevUpdateActiveNVMeDevices(virHostdevManager *hostdev_mgr, bool virHostdevNeedsVFIO(const virDomainHostdevDef *hostdev) { - return virHostdevIsPCIDevice(hostdev) || + return virHostdevIsPCIDeviceWithoutIOMMUFD(hostdev) || virHostdevIsMdevDevice(hostdev); } diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index cd028c488a..8dbbd7f64d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -817,6 +817,7 @@ virDomainQemuMonitorEventStateRegisterID; virHostdevIsMdevDevice; virHostdevIsPCIDevice; virHostdevIsPCIDeviceWithIOMMUFD; +virHostdevIsPCIDeviceWithoutIOMMUFD; virHostdevIsSCSIDevice; -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:49 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Function virHostdevNeedsVFIO is used only in QEMU to figure out if the host device needs access to /dev/vfio/vfio, for PCI host devices that is true only if libvirt is not using IOMMUFD.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 13 +++++++++++++ src/conf/domain_conf.h | 3 +++ src/hypervisor/virhostdev.c | 2 +- src/libvirt_private.syms | 1 + 4 files changed, 18 insertions(+), 1 deletion(-)
[...]
diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c index 19907c76ba..0d5e582c08 100644 --- a/src/hypervisor/virhostdev.c +++ b/src/hypervisor/virhostdev.c @@ -2536,7 +2536,7 @@ virHostdevUpdateActiveNVMeDevices(virHostdevManager *hostdev_mgr,
While it seems for now obvious what's happening in the function I'd suggest adding a comment about why the function, if the device uses iommufd, returns false here.
bool virHostdevNeedsVFIO(const virDomainHostdevDef *hostdev) { - return virHostdevIsPCIDevice(hostdev) || + return virHostdevIsPCIDeviceWithoutIOMMUFD(hostdev) || virHostdevIsMdevDevice(hostdev); }
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> The function qemuDomainNeedsVFIO() was originally used by other parts of qemu code to figure out if the VM needs /dev/vfio/vfio. Later it was also used by code calculating locked memory limit for all architectures, and after that change again and used only for PPC64. Now it needs to be changed again due to IOMMUFD support, the /dev/vfio/vfio device is used by QEMU only if IOMMUFD is not used but for accounting we should most likely still consider any PCI host device. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 90d0f02612..4520c3c28d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8259,7 +8259,10 @@ getPPC64MemLockLimitBytes(virDomainDef *def) passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) { + } else if (virDomainDefHasPCIHostdev(def) || + virDomainDefHasMdevHostdev(def) || + virDomainDefHasNVMeDisk(def) || + virDomainDefHasVDPANet(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: * -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:50 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
The function qemuDomainNeedsVFIO() was originally used by other parts of qemu code to figure out if the VM needs /dev/vfio/vfio.
Later it was also used by code calculating locked memory limit for all architectures, and after that change again and used only for PPC64.
Now it needs to be changed again due to IOMMUFD support, the /dev/vfio/vfio device is used by QEMU only if IOMMUFD is not used but for accounting we should most likely still consider any PCI host
I guess the reason for uncertainity is the absence of hardware, right?
device.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 90d0f02612..4520c3c28d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8259,7 +8259,10 @@ getPPC64MemLockLimitBytes(virDomainDef *def) passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) { + } else if (virDomainDefHasPCIHostdev(def) ||
I'd suggest adding a comment stating that qemuDomainNeedsVFIO is not used here to preserve old limits in cases when iommufd may be used. Or something stating why this was done in this place.
+ virDomainDefHasMdevHostdev(def) || + virDomainDefHasNVMeDisk(def) || + virDomainDefHasVDPANet(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: * -- 2.53.0
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
On Fri, Mar 20, 2026 at 09:19:43AM +0100, Peter Krempa wrote:
On Thu, Mar 19, 2026 at 17:36:50 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
The function qemuDomainNeedsVFIO() was originally used by other parts of qemu code to figure out if the VM needs /dev/vfio/vfio.
Later it was also used by code calculating locked memory limit for all architectures, and after that change again and used only for PPC64.
Now it needs to be changed again due to IOMMUFD support, the /dev/vfio/vfio device is used by QEMU only if IOMMUFD is not used but for accounting we should most likely still consider any PCI host
I guess the reason for uncertainity is the absence of hardware, right?
IOMMUFD can by used with existing network devices as well, it's a replacement for the old VFIO container. It still needs increasing locked memory limit, the question is if it should be the same as with VFIO container. So for now we can use the same logic for both. Pavel
From: Pavel Hrdina <phrdina@redhat.com> This function is used to figure out if VM needs access to /dev/vfio/vfio. In case of PCI host devices that is true only if IOMMUFD is not enabled. This fixes error when hotplugging PCI host device with IOMMUFD disabled to a VM that already has PCI host device with IOMMIFD enabled: Could not open '/dev/vfio/vfio': No such file or directory The function is used in this case to check if /dev/vfio/vfio was already made available to QEMU or not. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 14 ++++++++++++++ src/conf/domain_conf.h | 3 +++ src/libvirt_private.syms | 1 + src/qemu/qemu_domain.c | 2 +- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 2d6ae64210..562803ea87 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -32764,6 +32764,20 @@ virDomainDefHasPCIHostdevWithIOMMUFD(const virDomainDef *def) } +bool +virDomainDefHasPCIHostdevWithoutIOMMUFD(const virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nhostdevs; i++) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(def->hostdevs[i])) + return true; + } + + return false; +} + + bool virDomainDefHasMdevHostdev(const virDomainDef *def) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 018daef4c7..3b4980394e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -4674,6 +4674,9 @@ virDomainDefHasPCIHostdev(const virDomainDef *def); bool virDomainDefHasPCIHostdevWithIOMMUFD(const virDomainDef *def); +bool +virDomainDefHasPCIHostdevWithoutIOMMUFD(const virDomainDef *def); + bool virDomainDefHasMdevHostdev(const virDomainDef *def); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 8dbbd7f64d..cf0e71cc6a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -349,6 +349,7 @@ virDomainDefHasOldStyleROUEFI; virDomainDefHasOldStyleUEFI; virDomainDefHasPCIHostdev; virDomainDefHasPCIHostdevWithIOMMUFD; +virDomainDefHasPCIHostdevWithoutIOMMUFD; virDomainDefHasTimer; virDomainDefHasUSB; virDomainDefHasVcpusOffline; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 4520c3c28d..1d00ff2845 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9284,7 +9284,7 @@ qemuDomainSupportsVideoVga(const virDomainVideoDef *video, bool qemuDomainNeedsVFIO(const virDomainDef *def) { - return virDomainDefHasPCIHostdev(def) || + return virDomainDefHasPCIHostdevWithoutIOMMUFD(def) || virDomainDefHasMdevHostdev(def) || virDomainDefHasNVMeDisk(def); } -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:51 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
This function is used to figure out if VM needs access to /dev/vfio/vfio. In case of PCI host devices that is true only if IOMMUFD is not enabled.
This fixes error when hotplugging PCI host device with IOMMUFD disabled to a VM that already has PCI host device with IOMMIFD enabled:
Could not open '/dev/vfio/vfio': No such file or directory
The function is used in this case to check if /dev/vfio/vfio was already made available to QEMU or not.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/conf/domain_conf.c | 14 ++++++++++++++ src/conf/domain_conf.h | 3 +++ src/libvirt_private.syms | 1 + src/qemu/qemu_domain.c | 2 +- 4 files changed, 19 insertions(+), 1 deletion(-)
[...]
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 4520c3c28d..1d00ff2845 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9284,7 +9284,7 @@ qemuDomainSupportsVideoVga(const virDomainVideoDef *video,
This function really now needs a comment stating why/what and the quirks.
bool qemuDomainNeedsVFIO(const virDomainDef *def) { - return virDomainDefHasPCIHostdev(def) || + return virDomainDefHasPCIHostdevWithoutIOMMUFD(def) || virDomainDefHasMdevHostdev(def) || virDomainDefHasNVMeDisk(def); }
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_cgroup.c | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_namespace.c | 2 +- src/qemu/qemu_validate.c | 2 +- src/security/security_apparmor.c | 2 +- src/security/security_dac.c | 4 ++-- src/security/security_selinux.c | 4 ++-- src/security/virt-aa-helper.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 6148990f19..0e1815f571 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,7 +479,7 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms; - if (dev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) + if (virHostdevIsPCIDeviceWithIOMMUFD(dev)) return 0; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index d0df7b7826..7286fd8b83 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5266,7 +5266,7 @@ qemuBuildHostdevCommandLine(virCommand *cmd, if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) return -1; - if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) { qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); qemuFDPassDirectTransferCommand(hostdevPriv->vfioDeviceFd, cmd); diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index fb0734193d..4a063064f1 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,7 +345,7 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL; - if (hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) + if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) return 0; if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index f8a15374c9..b3db2c71d8 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -2791,7 +2791,7 @@ qemuValidateDomainDeviceDefHostdev(const virDomainHostdevDef *hostdev, return -1; } - if (hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) { if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_IOMMUFD)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("IOMMUFD is not supported by this version of qemu")); diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 40f13ec1a5..e53486ee0c 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -847,7 +847,7 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); if (!vfioGroupDev) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 5aa13741e6..05ab7ec2f9 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1283,7 +1283,7 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); if (!vfioGroupDev) @@ -1454,7 +1454,7 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); if (!vfioGroupDev) diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 89546e3316..0824217f24 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -2255,7 +2255,7 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); if (!vfioGroupDev) @@ -2499,7 +2499,7 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1; if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); if (!vfioGroupDev) diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c index e932e79dab..14b202bf7b 100644 --- a/src/security/virt-aa-helper.c +++ b/src/security/virt-aa-helper.c @@ -1133,7 +1133,7 @@ get_files(vahControl * ctl) if ((driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) && - dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) { needsVfio = true; } -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:52 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_cgroup.c | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_namespace.c | 2 +- src/qemu/qemu_validate.c | 2 +- src/security/security_apparmor.c | 2 +- src/security/security_dac.c | 4 ++-- src/security/security_selinux.c | 4 ++-- src/security/virt-aa-helper.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 6148990f19..0e1815f571 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -479,7 +479,7 @@ qemuSetupHostdevCgroup(virDomainObj *vm, g_autofree char *path = NULL; int perms;
- if (dev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) + if (virHostdevIsPCIDeviceWithIOMMUFD(dev)) return 0;
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index d0df7b7826..7286fd8b83 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5266,7 +5266,7 @@ qemuBuildHostdevCommandLine(virCommand *cmd, if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) return -1;
- if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) { qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
qemuFDPassDirectTransferCommand(hostdevPriv->vfioDeviceFd, cmd); diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index fb0734193d..4a063064f1 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -345,7 +345,7 @@ qemuDomainSetupHostdev(virDomainObj *vm, { g_autofree char *path = NULL;
- if (hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES)
At least this function gets called on any 'hostdev' not just PCI, so this patch fixes the invalid access to the union here, because 'pci' variant might not have been filled. That likely happens in the two cases above too but I didn't check closely. That should be mentioned in the commit message as well ass add a: Fixes: 7d2f91f9cb572ab95d0916bdd1a46dd198874529 tag (unless there are more commits which added such invalid access)
+ if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) return 0;
if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index f8a15374c9..b3db2c71d8 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -2791,7 +2791,7 @@ qemuValidateDomainDeviceDefHostdev(const virDomainHostdevDef *hostdev, return -1; }
- if (hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) {
^^^^
if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_IOMMUFD)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("IOMMUFD is not supported by this version of qemu")); diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 40f13ec1a5..e53486ee0c 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -847,7 +847,7 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, return -1;
if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { - if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + if (virHostdevIsPCIDeviceWithoutIOMMUFD(dev)) {
^^^^^^^ Having virHostdevIsPCIDeviceWithIOMMUFD and virHostdevIsPCIDeviceWithoutIOMMUFD is really confusing BTW ... especially when mixed in one commit.
g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci);
if (!vfioGroupDev)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> In addition to configuring IOMMUFD for each host device add configuration for the whole VM. This will be extended to add support for passing FD to libvirt from management applications. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- docs/formatdomain.rst | 21 ++++++++++++ src/conf/domain_conf.c | 46 ++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/conf/schemas/domaincommon.rng | 12 +++++++ tests/genericxml2xmlindata/iommufd.xml | 18 ++++++++++ tests/genericxml2xmltest.c | 2 ++ 6 files changed, 101 insertions(+) create mode 100644 tests/genericxml2xmlindata/iommufd.xml diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 9f245293e6..f6096b2b9b 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1382,6 +1382,27 @@ Block I/O Tuning ``write_iops_sec`` Write I/O operations per second limit. :since:`Since 1.2.2` +Host Device IOMMUFD +------------------- + +:: + + <domain> + ... + <iommufd enabled='yes'/> + ... + </domain> + +``iommufd`` + :since:`Since 12.2.0 (QEMU/KVM only)` The optional ``iommufd`` element with + mandatory ``enabled`` attribute can be used to enable IOMMUFD backned for + VFIO host devices. This provides an interface to propagate DMA mappings to + kernel for assigned devices. Libvirt will open the /dev/iommu and VFIO device + cdev and pass associated file descriptors to QEMU. + + This controls IOMMUFD usage for all host devices, each device can change this + global default by setting ``iommufd`` attribute for ``driver`` element. + Resource partitioning --------------------- diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 562803ea87..950c755ad9 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -19883,6 +19883,31 @@ virDomainDefControllersParse(virDomainDef *def, return 0; } +static int +virDomainDefIommufdParse(virDomainDef *def, + xmlXPathContextPtr ctxt) +{ + int n; + g_autofree xmlNodePtr *nodes = NULL; + + if ((n = virXPathNodeSet("./iommufd", ctxt, &nodes)) < 0) + return -1; + + if (n > 1) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("only one 'iommufd' element is supported")); + return -1; + } + + if (n == 0) + return 0; + + if (virXMLPropTristateBool(nodes[0], "enabled", VIR_XML_PROP_REQUIRED, &def->iommufd) < 0) + return -1; + + return 0; +} + static virDomainDef * virDomainDefParseXML(xmlXPathContextPtr ctxt, virDomainXMLOption *xmlopt, @@ -19961,6 +19986,9 @@ virDomainDefParseXML(xmlXPathContextPtr ctxt, !virDomainIOThreadIDArrayHasPin(def)) def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; + if (virDomainDefIommufdParse(def, ctxt) < 0) + return NULL; + if ((n = virXPathNodeSet("./resource", ctxt, &nodes)) < 0) return NULL; @@ -28172,6 +28200,22 @@ virDomainHubDefFormat(virBuffer *buf, } +static void +virDomainDefIommufdFormat(virBuffer *buf, + virDomainDef *def) +{ + g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER; + + if (def->iommufd == VIR_TRISTATE_BOOL_ABSENT) + return; + + virBufferAsprintf(&attrBuf, " enabled='%s'", + virTristateBoolTypeToString(def->iommufd)); + + virXMLFormatElement(buf, "iommufd", &attrBuf, NULL); +} + + static void virDomainResourceDefFormat(virBuffer *buf, virDomainResourceDef *def) @@ -29721,6 +29765,8 @@ virDomainDefFormatInternalSetRootName(virDomainDef *def, if (virDomainNumatuneFormatXML(buf, def->numa) < 0) return -1; + virDomainDefIommufdFormat(buf, def); + virDomainResourceDefFormat(buf, def->resource); for (i = 0; i < def->nsysinfo; i++) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 3b4980394e..f7e2eb6f5e 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -3244,6 +3244,8 @@ struct _virDomainDef { virTristateSwitch apic_eoi; virDomainFeatureTCG *tcg_features; + virTristateBool iommufd; + bool tseg_specified; unsigned long long tseg_size; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index 376218118d..0436ec8edc 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -1025,6 +1025,10 @@ <ref name="numatune"/> </optional> + <optional> + <ref name="iommufd"/> + </optional> + <optional> <ref name="respartition"/> </optional> @@ -1368,6 +1372,14 @@ </element> </define> + <define name="iommufd"> + <element name="iommufd"> + <attribute name="enabled"> + <ref name="virYesNo"/> + </attribute> + </element> + </define> + <define name="respartition"> <element name="resource"> <optional> diff --git a/tests/genericxml2xmlindata/iommufd.xml b/tests/genericxml2xmlindata/iommufd.xml new file mode 100644 index 0000000000..63ea839383 --- /dev/null +++ b/tests/genericxml2xmlindata/iommufd.xml @@ -0,0 +1,18 @@ +<domain type='kvm'> + <name>foo</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>1</vcpu> + <iommufd enabled='yes'/> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + </devices> +</domain> diff --git a/tests/genericxml2xmltest.c b/tests/genericxml2xmltest.c index 6757fc44de..6be694cac5 100644 --- a/tests/genericxml2xmltest.c +++ b/tests/genericxml2xmltest.c @@ -263,6 +263,8 @@ mymain(void) DO_TEST("iothreadids"); + DO_TEST("iommufd"); + virObjectUnref(caps); virObjectUnref(xmlopt); -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:53 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
In addition to configuring IOMMUFD for each host device add configuration for the whole VM. This will be extended to add support for passing FD to libvirt from management applications.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- docs/formatdomain.rst | 21 ++++++++++++ src/conf/domain_conf.c | 46 ++++++++++++++++++++++++++ src/conf/domain_conf.h | 2 ++ src/conf/schemas/domaincommon.rng | 12 +++++++ tests/genericxml2xmlindata/iommufd.xml | 18 ++++++++++ tests/genericxml2xmltest.c | 2 ++ 6 files changed, 101 insertions(+) create mode 100644 tests/genericxml2xmlindata/iommufd.xml
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 9f245293e6..f6096b2b9b 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1382,6 +1382,27 @@ Block I/O Tuning ``write_iops_sec`` Write I/O operations per second limit. :since:`Since 1.2.2`
+Host Device IOMMUFD +------------------- + +:: + + <domain> + ... + <iommufd enabled='yes'/> + ... + </domain> + +``iommufd`` + :since:`Since 12.2.0 (QEMU/KVM only)` The optional ``iommufd`` element with + mandatory ``enabled`` attribute can be used to enable IOMMUFD backned for
backend
+ VFIO host devices. This provides an interface to propagate DMA mappings to + kernel for assigned devices. Libvirt will open the /dev/iommu and VFIO device + cdev and pass associated file descriptors to QEMU. + + This controls IOMMUFD usage for all host devices, each device can change this + global default by setting ``iommufd`` attribute for ``driver`` element. + Resource partitioning ---------------------
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> Ideally this should be done in qemuDomainHostdevDefPostParse but that would require a lot of refactoring mainly due to how interface backed by hostdev works. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_domain.c | 12 +++++++++--- src/qemu/qemu_domain.h | 3 ++- src/qemu/qemu_hotplug.c | 2 +- src/qemu/qemu_process.c | 4 ++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 1d00ff2845..e667d74f1a 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -10110,10 +10110,12 @@ qemuDomainPrepareHostdevSCSI(virDomainHostdevDef *hostdev, static int -qemuDomainPrepareHostdevPCI(virDomainHostdevDef *hostdev, +qemuDomainPrepareHostdevPCI(const virDomainDef *def, + virDomainHostdevDef *hostdev, virQEMUCaps *qemuCaps) { virDeviceHostdevPCIDriverName *driverName = &hostdev->source.subsys.u.pci.driver.name; + virDomainHostdevSubsysPCI *pcisrc = &hostdev->source.subsys.u.pci; /* assign defaults for hostdev passthrough */ switch (*driverName) { @@ -10150,12 +10152,16 @@ qemuDomainPrepareHostdevPCI(virDomainHostdevDef *hostdev, return -1; } + if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_ABSENT) + pcisrc->driver.iommufd = def->iommufd; + return 0; } int -qemuDomainPrepareHostdev(virDomainHostdevDef *hostdev, +qemuDomainPrepareHostdev(const virDomainDef *def, + virDomainHostdevDef *hostdev, qemuDomainObjPrivate *priv) { if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) @@ -10165,7 +10171,7 @@ qemuDomainPrepareHostdev(virDomainHostdevDef *hostdev, case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI: return qemuDomainPrepareHostdevSCSI(hostdev, priv); case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: - return qemuDomainPrepareHostdevPCI(hostdev, priv->qemuCaps); + return qemuDomainPrepareHostdevPCI(def, hostdev, priv->qemuCaps); case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB: case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI_HOST: case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 5755d2adb0..f797542a87 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -1040,7 +1040,8 @@ qemuDomainDiskCachemodeFlags(virDomainDiskCache cachemode, bool *noflush); int -qemuDomainPrepareHostdev(virDomainHostdevDef *hostdev, +qemuDomainPrepareHostdev(const virDomainDef *def, + virDomainHostdevDef *hostdev, qemuDomainObjPrivate *priv); char * qemuDomainGetManagedPRSocketPath(qemuDomainObjPrivate *priv); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index d6be851e61..adae94f0a2 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2891,7 +2891,7 @@ qemuDomainAttachHostDevice(virQEMUDriver *driver, return -1; } - if (qemuDomainPrepareHostdev(hostdev, vm->privateData) < 0) + if (qemuDomainPrepareHostdev(vm->def, hostdev, vm->privateData) < 0) return -1; switch (hostdev->source.subsys.type) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index ecd05b4bf6..fed6079ad2 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -6087,7 +6087,7 @@ qemuProcessPrepareDomainNetwork(virDomainObj *vm) /* For hostdev present in qemuProcessPrepareDomain() phase this was * done already, but this code runs after that, so we have to call * it ourselves. */ - if (qemuDomainPrepareHostdev(hostdev, priv) < 0) + if (qemuDomainPrepareHostdev(def, hostdev, priv) < 0) return -1; virDomainHostdevInsert(def, hostdev); @@ -6875,7 +6875,7 @@ qemuProcessPrepareDomainHostdevs(virDomainObj *vm, for (i = 0; i < vm->def->nhostdevs; i++) { virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; - if (qemuDomainPrepareHostdev(hostdev, priv) < 0) + if (qemuDomainPrepareHostdev(vm->def, hostdev, priv) < 0) return -1; } -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:54 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Ideally this should be done in qemuDomainHostdevDefPostParse but that would require a lot of refactoring mainly due to how interface backed by hostdev works.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_domain.c | 12 +++++++++--- src/qemu/qemu_domain.h | 3 ++- src/qemu/qemu_hotplug.c | 2 +- src/qemu/qemu_process.c | 4 ++-- 4 files changed, 14 insertions(+), 7 deletions(-)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> This will allow management applications running libvirt without necessary permissions to pass FD for /dev/iommu with per-process locked memory accounting enabled. Kernel uses per-user locked memory accounting by default which may cause error while starting multiple VMs with host devices using IOMMUFD. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- docs/formatdomain.rst | 8 +++++++- src/conf/domain_conf.c | 7 +++++++ src/conf/domain_conf.h | 1 + src/conf/domain_validate.c | 16 ++++++++++++++++ src/conf/schemas/domaincommon.rng | 3 +++ tests/genericxml2xmlindata/iommufd.xml | 2 +- 6 files changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index f6096b2b9b..412b2b4ddd 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1389,7 +1389,7 @@ Host Device IOMMUFD <domain> ... - <iommufd enabled='yes'/> + <iommufd enabled='yes' fdgroup='iommu'/> ... </domain> @@ -1403,6 +1403,12 @@ Host Device IOMMUFD This controls IOMMUFD usage for all host devices, each device can change this global default by setting ``iommufd`` attribute for ``driver`` element. + Optional ``fdgroup`` attribute can be used together with + ``virDomainFDAssociate()`` to pass /dev/iommu FD instead of letting + libvirt to open it. Caller is responsible for setting per-process locked + memory accounting otherwise starting multiple VMs with host devices using + IOMMUFD may fail. + Resource partitioning --------------------- diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 950c755ad9..71488a0840 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4242,6 +4242,8 @@ void virDomainDefFree(virDomainDef *def) g_free(def->kvm_features); g_free(def->tcg_features); + g_free(def->iommufd_fdgroup); + virBlkioDeviceArrayClear(def->blkio.devices, def->blkio.ndevices); g_free(def->blkio.devices); @@ -19905,6 +19907,8 @@ virDomainDefIommufdParse(virDomainDef *def, if (virXMLPropTristateBool(nodes[0], "enabled", VIR_XML_PROP_REQUIRED, &def->iommufd) < 0) return -1; + def->iommufd_fdgroup = virXMLPropString(nodes[0], "fdgroup"); + return 0; } @@ -28212,6 +28216,9 @@ virDomainDefIommufdFormat(virBuffer *buf, virBufferAsprintf(&attrBuf, " enabled='%s'", virTristateBoolTypeToString(def->iommufd)); + if (def->iommufd_fdgroup) + virBufferAsprintf(&attrBuf, " fdgroup='%s'", def->iommufd_fdgroup); + virXMLFormatElement(buf, "iommufd", &attrBuf, NULL); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index f7e2eb6f5e..75acfc46bf 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -3245,6 +3245,7 @@ struct _virDomainDef { virDomainFeatureTCG *tcg_features; virTristateBool iommufd; + char *iommufd_fdgroup; bool tseg_specified; unsigned long long tseg_size; diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c index 7e3da84767..a49156061f 100644 --- a/src/conf/domain_validate.c +++ b/src/conf/domain_validate.c @@ -2003,6 +2003,19 @@ virDomainDefValidateThrottleGroups(const virDomainDef *def) } +static int +virDomainDefValidateIommufd(const virDomainDef *def) +{ + if (def->iommufd == VIR_TRISTATE_BOOL_NO && def->iommufd_fdgroup) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Setting 'fdgroup' when 'iommufd' is disabled is not supported.")); + return -1; + } + + return 0; +} + + static int virDomainDefValidateInternal(const virDomainDef *def, virDomainXMLOption *xmlopt) @@ -2064,6 +2077,9 @@ virDomainDefValidateInternal(const virDomainDef *def, if (virDomainDefValidateThrottleGroups(def) < 0) return -1; + if (virDomainDefValidateIommufd(def) < 0) + return -1; + return 0; } diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index 0436ec8edc..db1dcd3bb7 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -1377,6 +1377,9 @@ <attribute name="enabled"> <ref name="virYesNo"/> </attribute> + <optional> + <attribute name="fdgroup"/> + </optional> </element> </define> diff --git a/tests/genericxml2xmlindata/iommufd.xml b/tests/genericxml2xmlindata/iommufd.xml index 63ea839383..10d59ca548 100644 --- a/tests/genericxml2xmlindata/iommufd.xml +++ b/tests/genericxml2xmlindata/iommufd.xml @@ -4,7 +4,7 @@ <memory unit='KiB'>219136</memory> <currentMemory unit='KiB'>219136</currentMemory> <vcpu placement='static'>1</vcpu> - <iommufd enabled='yes'/> + <iommufd enabled='yes' fdgroup='iommu'/> <os> <type arch='i686' machine='pc'>hvm</type> <boot dev='hd'/> -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:55 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
This will allow management applications running libvirt without necessary permissions to pass FD for /dev/iommu with per-process locked memory accounting enabled.
Kernel uses per-user locked memory accounting by default which may cause error while starting multiple VMs with host devices using IOMMUFD.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- docs/formatdomain.rst | 8 +++++++- src/conf/domain_conf.c | 7 +++++++ src/conf/domain_conf.h | 1 + src/conf/domain_validate.c | 16 ++++++++++++++++ src/conf/schemas/domaincommon.rng | 3 +++ tests/genericxml2xmlindata/iommufd.xml | 2 +- 6 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index f6096b2b9b..412b2b4ddd 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1389,7 +1389,7 @@ Host Device IOMMUFD
<domain> ... - <iommufd enabled='yes'/> + <iommufd enabled='yes' fdgroup='iommu'/> ... </domain>
@@ -1403,6 +1403,12 @@ Host Device IOMMUFD This controls IOMMUFD usage for all host devices, each device can change this global default by setting ``iommufd`` attribute for ``driver`` element.
+ Optional ``fdgroup`` attribute can be used together with + ``virDomainFDAssociate()`` to pass /dev/iommu FD instead of letting
Consider linking the API docs: `virDomainFDAssociate() <html/libvirt-libvirt-domain.html#virDomainFDAssociate>`__
+ libvirt to open it. Caller is responsible for setting per-process locked + memory accounting otherwise starting multiple VMs with host devices using + IOMMUFD may fail. + Resource partitioning ---------------------
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 950c755ad9..71488a0840 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4242,6 +4242,8 @@ void virDomainDefFree(virDomainDef *def) g_free(def->kvm_features); g_free(def->tcg_features);
+ g_free(def->iommufd_fdgroup); + virBlkioDeviceArrayClear(def->blkio.devices, def->blkio.ndevices); g_free(def->blkio.devices); @@ -19905,6 +19907,8 @@ virDomainDefIommufdParse(virDomainDef *def, if (virXMLPropTristateBool(nodes[0], "enabled", VIR_XML_PROP_REQUIRED, &def->iommufd) < 0) return -1;
+ def->iommufd_fdgroup = virXMLPropString(nodes[0], "fdgroup"); + return 0; }
@@ -28212,6 +28216,9 @@ virDomainDefIommufdFormat(virBuffer *buf, virBufferAsprintf(&attrBuf, " enabled='%s'", virTristateBoolTypeToString(def->iommufd));
+ if (def->iommufd_fdgroup) + virBufferAsprintf(&attrBuf, " fdgroup='%s'", def->iommufd_fdgroup);
User originating XML values *must* be formatted using virBufferEscapeString (which doesn't require the NULL check).
+ virXMLFormatElement(buf, "iommufd", &attrBuf, NULL);
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> When fdgroup is used for iommufd we will start QEMU with -object iommufd even if the VM has no host device. When virDomainFDAssociate() is used the FD libvirt is holding is closed with connection. Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_command.c | 4 +++- src/qemu/qemu_hotplug.c | 4 ++-- src/qemu/qemu_process.c | 47 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7286fd8b83..7801d99738 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5367,8 +5367,10 @@ qemuBuildIOMMUFDCommandLine(virCommand *cmd, qemuDomainObjPrivate *priv = vm->privateData; g_autoptr(virJSONValue) props = NULL; - if (!virDomainDefHasPCIHostdevWithIOMMUFD(def)) + if (!virDomainDefHasPCIHostdevWithIOMMUFD(def) && + !def->iommufd_fdgroup) { return 0; + } qemuFDPassDirectTransferCommand(priv->iommufd, cmd); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index adae94f0a2..c86ebc59d0 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1615,7 +1615,7 @@ qemuDomainAttachHostPCIDevice(virQEMUDriver *driver, if (qemuProcessOpenVfioDeviceFd(vm, hostdev) < 0) goto error; - if (!priv->iommufdState) { + if (!priv->iommufdState && !vm->def->iommufd_fdgroup) { if (qemuProcessOpenIommuFd(vm) < 0) goto error; @@ -5041,7 +5041,7 @@ qemuDomainRemoveHostDevice(virQEMUDriver *driver, } } - if (priv->iommufdState && + if (priv->iommufdState && !vm->def->iommufd_fdgroup && !virDomainDefHasPCIHostdevWithIOMMUFD(vm->def)) { qemuDomainObjEnterMonitor(vm); ignore_value(qemuMonitorDelObject(priv->mon, "iommufd0", false)); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index fed6079ad2..c78fb4273c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -7743,6 +7743,44 @@ qemuProcessOpenIommuFd(virDomainObj *vm) return 0; } +/** + * qemuProcessPrepareIommuFd: + * @vm: domain object + * + * Find passed FD via virDomainFDAssociate() API for the VM. + * + * Returns: 0 on success, -1 on failure + */ +static int +qemuProcessPrepareIommuFd(virDomainObj *vm) +{ + qemuDomainObjPrivate *priv = vm->privateData; + virDomainFDTuple *fdt = virHashLookup(priv->fds, vm->def->iommufd_fdgroup); + VIR_AUTOCLOSE iommufd = -1; + + if (!fdt) { + virReportError(VIR_ERR_INVALID_ARG, + _("file descriptor group '%1$s' was not associated with the domain"), + vm->def->iommufd_fdgroup); + return -1; + } + + if (fdt->nfds != 1) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("Only one file descriptor needs to be associated with iommufd")); + return -1; + } + + iommufd = dup(fdt->fds[0]); + + if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, iommufd) < 0) + return -1; + + priv->iommufd = qemuFDPassDirectNew("iommufd", &iommufd); + + return 0; +} + /** * qemuProcessOpenVfioDeviceFd: * @hostdev: host device definition @@ -7798,9 +7836,12 @@ qemuProcessPrepareHostHostdev(virDomainObj *vm) } /* Open IOMMU FD */ - if (virDomainDefHasPCIHostdevWithIOMMUFD(vm->def) && - qemuProcessOpenIommuFd(vm) < 0) { - return -1; + if (vm->def->iommufd_fdgroup) { + if (qemuProcessPrepareIommuFd(vm) < 0) + return -1; + } else if (virDomainDefHasPCIHostdevWithIOMMUFD(vm->def)) { + if (qemuProcessOpenIommuFd(vm) < 0) + return -1; } return 0; -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:56 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
When fdgroup is used for iommufd we will start QEMU with -object iommufd even if the VM has no host device. When virDomainFDAssociate() is used the FD libvirt is holding is closed with connection.
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_command.c | 4 +++- src/qemu/qemu_hotplug.c | 4 ++-- src/qemu/qemu_process.c | 47 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7286fd8b83..7801d99738 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5367,8 +5367,10 @@ qemuBuildIOMMUFDCommandLine(virCommand *cmd, qemuDomainObjPrivate *priv = vm->privateData; g_autoptr(virJSONValue) props = NULL;
- if (!virDomainDefHasPCIHostdevWithIOMMUFD(def)) + if (!virDomainDefHasPCIHostdevWithIOMMUFD(def) && + !def->iommufd_fdgroup) { return 0; + }
qemuFDPassDirectTransferCommand(priv->iommufd, cmd);
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index adae94f0a2..c86ebc59d0 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1615,7 +1615,7 @@ qemuDomainAttachHostPCIDevice(virQEMUDriver *driver, if (qemuProcessOpenVfioDeviceFd(vm, hostdev) < 0) goto error;
- if (!priv->iommufdState) { + if (!priv->iommufdState && !vm->def->iommufd_fdgroup) { if (qemuProcessOpenIommuFd(vm) < 0) goto error;
@@ -5041,7 +5041,7 @@ qemuDomainRemoveHostDevice(virQEMUDriver *driver, } }
- if (priv->iommufdState && + if (priv->iommufdState && !vm->def->iommufd_fdgroup && !virDomainDefHasPCIHostdevWithIOMMUFD(vm->def)) { qemuDomainObjEnterMonitor(vm); ignore_value(qemuMonitorDelObject(priv->mon, "iommufd0", false)); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index fed6079ad2..c78fb4273c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -7743,6 +7743,44 @@ qemuProcessOpenIommuFd(virDomainObj *vm) return 0; }
+/** + * qemuProcessPrepareIommuFd: + * @vm: domain object + * + * Find passed FD via virDomainFDAssociate() API for the VM. + * + * Returns: 0 on success, -1 on failure + */ +static int +qemuProcessPrepareIommuFd(virDomainObj *vm)
Since this function prepares the iommufd only when passed via FDpass, it really should have it in the name.
+{ + qemuDomainObjPrivate *priv = vm->privateData; + virDomainFDTuple *fdt = virHashLookup(priv->fds, vm->def->iommufd_fdgroup); + VIR_AUTOCLOSE iommufd = -1; + + if (!fdt) { + virReportError(VIR_ERR_INVALID_ARG, + _("file descriptor group '%1$s' was not associated with the domain"), + vm->def->iommufd_fdgroup); + return -1; + } + + if (fdt->nfds != 1) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("Only one file descriptor needs to be associated with iommufd")); + return -1; + } + + iommufd = dup(fdt->fds[0]); + + if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, iommufd) < 0) + return -1;
I wanted to complain that this doesn't look right. (setting 'image' label on the FD), but noticed that the other branch of when qemu opens it does the same and additionally the selinux driver also internally sets the image label.
+ + priv->iommufd = qemuFDPassDirectNew("iommufd", &iommufd); + + return 0;a
With the function renamed: Reviewed-by: Peter Krempa <pkrempa@redhat.com>
From: Pavel Hrdina <phrdina@redhat.com> Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_process.c | 12 ++-- src/qemu/qemu_processpriv.h | 2 + .../iommufd-q35-fd.x86_64-latest.args | 41 +++++++++++++ .../iommufd-q35-fd.x86_64-latest.xml | 60 +++++++++++++++++++ tests/qemuxmlconfdata/iommufd-q35-fd.xml | 38 ++++++++++++ tests/qemuxmlconftest.c | 9 ++- 6 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.xml diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c78fb4273c..14bc88b5cc 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -7751,7 +7751,7 @@ qemuProcessOpenIommuFd(virDomainObj *vm) * * Returns: 0 on success, -1 on failure */ -static int +int qemuProcessPrepareIommuFd(virDomainObj *vm) { qemuDomainObjPrivate *priv = vm->privateData; @@ -7771,10 +7771,14 @@ qemuProcessPrepareIommuFd(virDomainObj *vm) return -1; } - iommufd = dup(fdt->fds[0]); + if (fdt->testfds) { + iommufd = dup2(fdt->fds[0], fdt->testfds[0]); + } else { + iommufd = dup(fdt->fds[0]); - if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, iommufd) < 0) - return -1; + if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, iommufd) < 0) + return -1; + } priv->iommufd = qemuFDPassDirectNew("iommufd", &iommufd); diff --git a/src/qemu/qemu_processpriv.h b/src/qemu/qemu_processpriv.h index 0ba5897f40..39cb7dd0dc 100644 --- a/src/qemu/qemu_processpriv.h +++ b/src/qemu/qemu_processpriv.h @@ -37,3 +37,5 @@ void qemuProcessHandleDeviceDeleted(qemuMonitor *mon, const char *devAlias); int qemuProcessQMPInitMonitor(qemuMonitor *mon); + +int qemuProcessPrepareIommuFd(virDomainObj *vm); diff --git a/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args new file mode 100644 index 0000000000..7df3d173f3 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args @@ -0,0 +1,41 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/var/lib/libvirt/qemu/domain--1-q35-test \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.local/share \ +XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.cache \ +XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ +/usr/bin/qemu-system-x86_64 \ +-name guest=q35-test,debug-threads=on \ +-S \ +-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-q35-test/master-key.aes"}' \ +-machine q35,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ +-accel tcg \ +-cpu qemu64 \ +-m size=2097152k \ +-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-uuid 11dbdcdd-4c3b-482b-8903-9bdb8c0a2774 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device '{"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"}' \ +-device '{"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"}' \ +-device '{"driver":"qemu-xhci","id":"usb","bus":"pci.1","addr":"0x0"}' \ +-blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","read-only":false}' \ +-device '{"driver":"ide-hd","bus":"ide.0","drive":"libvirt-1-storage","id":"sata0-0-0","bootindex":1}' \ +-audiodev '{"id":"audio1","driver":"none"}' \ +-device '{"driver":"qxl-vga","id":"video0","max_outputs":1,"ram_size":67108864,"vram_size":33554432,"vram64_size_mb":0,"vgamem_mb":8,"bus":"pcie.0","addr":"0x1"}' \ +-global ICH9-LPC.noreboot=off \ +-watchdog-action reset \ +-object '{"qom-type":"iommufd","id":"iommufd0","fd":"20"}' \ +-device '{"driver":"vfio-pci","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x3"}' \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml new file mode 100644 index 0000000000..a6be49cbb3 --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml @@ -0,0 +1,60 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <iommufd enabled='yes' fdgroup='iommu'/> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <driver name='qemu' type='raw'/> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x10'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x11'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/> + </controller> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <controller type='usb' index='0' model='qemu-xhci'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <audio id='1' type='none'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1' primary='yes'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </video> + <hostdev mode='subsystem' type='pci' managed='yes'> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <watchdog model='itco' action='reset'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconfdata/iommufd-q35-fd.xml b/tests/qemuxmlconfdata/iommufd-q35-fd.xml new file mode 100644 index 0000000000..1cef31fffa --- /dev/null +++ b/tests/qemuxmlconfdata/iommufd-q35-fd.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>q35-test</name> + <uuid>11dbdcdd-4c3b-482b-8903-9bdb8c0a2774</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='KiB'>2097152</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <iommufd enabled='yes' fdgroup='iommu'/> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='sda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='pci' index='0' model='pcie-root'/> + <hostdev mode='subsystem' type='pci' managed='yes'> + <source> + <address domain='0x0000' bus='0x06' slot='0x12' function='0x5'/> + </source> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </hostdev> + <controller type='sata' index='0'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <video> + <model type='qxl' ram='65536' vram='32768' vgamem='8192' heads='1'/> + </video> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxmlconftest.c b/tests/qemuxmlconftest.c index 32eb42dd19..3b0c212577 100644 --- a/tests/qemuxmlconftest.c +++ b/tests/qemuxmlconftest.c @@ -28,6 +28,9 @@ #define LIBVIRT_QEMU_CAPSPRIV_H_ALLOW #include "qemu/qemu_capspriv.h" +#define LIBVIRT_QEMU_PROCESSPRIV_H_ALLOW +#include "qemu/qemu_processpriv.h" + #include "testutilsqemu.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -98,7 +101,9 @@ testQemuPrepareHostdev(virDomainObj *vm) } } - if (virDomainDefHasPCIHostdevWithIOMMUFD(vm->def)) { + if (vm->def->iommufd_fdgroup) { + ignore_value(qemuProcessPrepareIommuFd(vm)); + } else if (virDomainDefHasPCIHostdevWithIOMMUFD(vm->def)) { int iommufd = 0; priv->iommufd = qemuFDPassDirectNew("iommufd", &iommufd); } @@ -2815,6 +2820,8 @@ mymain(void) DO_TEST_CAPS_LATEST("iommufd"); DO_TEST_CAPS_LATEST("iommufd-q35"); + DO_TEST_CAPS_ARCH_LATEST_FULL("iommufd-q35-fd", "x86_64", + ARG_FD_GROUP, "iommu", false, 1, 20); DO_TEST_CAPS_ARCH_LATEST("iommufd-virt", "aarch64"); DO_TEST_CAPS_ARCH_LATEST("iommufd-virt-pci-bus-single", "aarch64"); -- 2.53.0
On Thu, Mar 19, 2026 at 17:36:57 +0100, Pavel Hrdina via Devel wrote:
From: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com> --- src/qemu/qemu_process.c | 12 ++-- src/qemu/qemu_processpriv.h | 2 + .../iommufd-q35-fd.x86_64-latest.args | 41 +++++++++++++ .../iommufd-q35-fd.x86_64-latest.xml | 60 +++++++++++++++++++ tests/qemuxmlconfdata/iommufd-q35-fd.xml | 38 ++++++++++++ tests/qemuxmlconftest.c | 9 ++- 6 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.args create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.x86_64-latest.xml create mode 100644 tests/qemuxmlconfdata/iommufd-q35-fd.xml
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c78fb4273c..14bc88b5cc 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -7751,7 +7751,7 @@ qemuProcessOpenIommuFd(virDomainObj *vm) * * Returns: 0 on success, -1 on failure */ -static int +int
Add a note that it's exported only for testing. Reviewed-by: Peter Krempa <pkrempa@redhat.com>
participants (2)
-
Pavel Hrdina -
Peter Krempa