On a Wednesday in 2025, Nathan Chen via Devel wrote:
Add support for parsing multiple IOMMU devices from the VM definition when "smmuv3Dev" is the IOMMU model.
Signed-off-by: Nathan Chen <nathanc@nvidia.com> --- src/conf/domain_conf.c | 84 +++++++++++++---- src/conf/domain_conf.h | 9 +- src/conf/domain_validate.c | 32 ++++--- src/conf/schemas/domaincommon.rng | 4 +- src/libvirt_private.syms | 2 + src/qemu/qemu_alias.c | 15 ++- src/qemu/qemu_command.c | 146 ++++++++++++++++-------------- src/qemu/qemu_domain_address.c | 35 +++---- src/qemu/qemu_driver.c | 8 +- src/qemu/qemu_postparse.c | 11 ++- src/qemu/qemu_validate.c | 2 +- 11 files changed, 215 insertions(+), 133 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 6d1adb831d..1c2cf9a2d9 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4134,7 +4134,8 @@ void virDomainDefFree(virDomainDef *def) virDomainCryptoDefFree(def->cryptos[i]); g_free(def->cryptos);
- virDomainIOMMUDefFree(def->iommu); + for (i = 0; i < def->niommus; i++) + virDomainIOMMUDefFree(def->iommu[i]);
virDomainPstoreDefFree(def->pstore);
@@ -5006,9 +5007,9 @@ virDomainDeviceInfoIterateFlags(virDomainDef *def, }
device.type = VIR_DOMAIN_DEVICE_IOMMU; - if (def->iommu) { - device.data.iommu = def->iommu; - if ((rc = cb(def, &device, &def->iommu->info, opaque)) != 0) + for (i = 0; i < def->niommus; i++) { + device.data.iommu = def->iommu[i]; + if ((rc = cb(def, &device, &def->iommu[i]->info, opaque)) != 0) return rc; }
@@ -16487,6 +16488,43 @@ virDomainInputDefFind(const virDomainDef *def, }
+bool +virDomainIOMMUDefEquals(const virDomainIOMMUDef *a, + const virDomainIOMMUDef *b) +{ + if (a->model != b->model || + a->intremap != b->intremap || + a->caching_mode != b->caching_mode || + a->eim != b->eim || + a->iotlb != b->iotlb || + a->aw_bits != b->aw_bits || + a->parent_idx != b->parent_idx || + a->dma_translation != b->dma_translation) + return false; + + if (a->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE && + !virDomainDeviceInfoAddressIsEqual(&a->info, &b->info)) + return false; + + return true; +} + + +ssize_t +virDomainIOMMUDefFind(const virDomainDef *def, + const virDomainIOMMUDef *iommu) +{ + size_t i; + + for (i = 0; i < def->niommus; i++) { + if (virDomainIOMMUDefEquals(iommu, def->iommu[i])) + return i; + } + + return -1; +} + + bool virDomainVsockDefEquals(const virDomainVsockDef *a, const virDomainVsockDef *b) @@ -20154,19 +20192,28 @@ virDomainDefParseXML(xmlXPathContextPtr ctxt, } VIR_FREE(nodes);
+ /* analysis of iommu devices */
Not sure if 'analysis' is the best word to use here. Or why it's already present in so many other cases.
if ((n = virXPathNodeSet("./devices/iommu", ctxt, &nodes)) < 0) return NULL;
- if (n > 1) { + if (n > 1 && !virXPathBoolean("./devices/iommu/@model = 'smmuv3Dev'", ctxt)) { virReportError(VIR_ERR_XML_ERROR, "%s", - _("only a single IOMMU device is supported")); + _("multiple IOMMU devices are only supported with model smmuv3Dev")); return NULL;
Before, having a single iommu device was a limitation of the parser because it was not stored in array. After the switch, the parser should just parse what it's given and the validator would reject incorrect combinations. Also, it would be nicer if the switch to an array was in a separate, preceding patch, that does not change the behavior at all.
}
- if (n > 0) { - if (!(def->iommu = virDomainIOMMUDefParseXML(xmlopt, nodes[0], - ctxt, flags))) + if (n > 0) + def->iommu = g_new0(virDomainIOMMUDef *, n); + + for (i = 0; i < n; i++) { + virDomainIOMMUDef *iommu; + + iommu = virDomainIOMMUDefParseXML(xmlopt, nodes[i], ctxt, flags); + + if (!iommu) return NULL; + + def->iommu[def->niommus++] = iommu; } VIR_FREE(nodes);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1d0c94a00a..f830fe5226 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -3297,6 +3297,9 @@ struct _virDomainDef { size_t nwatchdogs; virDomainWatchdogDef **watchdogs;
+ size_t niommus; + virDomainIOMMUDef **iommu;
'iommus', to make it obvious that there are multiple. Also to let compiler throw an error or any places where the conversion omitted something.
+ /* At maximum 2 TPMs on the domain if a TPM Proxy is present. */ size_t ntpms; virDomainTPMDef **tpms; diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c index 8a599fdbc6..588ecdaa20 100644 --- a/src/conf/domain_validate.c +++ b/src/conf/domain_validate.c @@ -1851,21 +1851,31 @@ virDomainDefCputuneValidate(const virDomainDef *def) static int virDomainDefIOMMUValidate(const virDomainDef *def) { + size_t i; + if (!def->iommu) return 0;
This is no longer needed - if there were no iommus parsed, the loop below will be skipped.
- if (def->iommu->intremap == VIR_TRISTATE_SWITCH_ON && - def->features[VIR_DOMAIN_FEATURE_IOAPIC] != VIR_DOMAIN_IOAPIC_QEMU) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("IOMMU interrupt remapping requires split I/O APIC (ioapic driver='qemu')")); - return -1; - } + for (i = 0; i < def->niommus; i++) { + virDomainIOMMUDef *iommu = def->iommu[i]; + if (def->niommus > 1 && iommu->model != VIR_DOMAIN_IOMMU_MODEL_SMMUV3_DEV) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("IOMMU model smmuv3Dev must be specified for multiple IOMMU definitions")); + }
- if (def->iommu->eim == VIR_TRISTATE_SWITCH_ON && - def->iommu->intremap != VIR_TRISTATE_SWITCH_ON) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("IOMMU eim requires interrupt remapping to be enabled")); - return -1; + if (iommu->intremap == VIR_TRISTATE_SWITCH_ON && + def->features[VIR_DOMAIN_FEATURE_IOAPIC] != VIR_DOMAIN_IOAPIC_QEMU) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("IOMMU interrupt remapping requires split I/O APIC (ioapic driver='qemu')")); + return -1; + } + + if (iommu->eim == VIR_TRISTATE_SWITCH_ON && + iommu->intremap != VIR_TRISTATE_SWITCH_ON) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("IOMMU eim requires interrupt remapping to be enabled")); + return -1; + } }
return 0; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index 26880b6db2..b9c00e98a5 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -6959,9 +6959,9 @@ <zeroOrMore> <ref name="panic"/> </zeroOrMore> - <optional> + <zeroOrMore> <ref name="iommu"/> - </optional> + </zeroOrMore> <optional> <ref name="vsock"/> </optional> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fe72402527..34d9c263d4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -491,6 +491,8 @@ virDomainInputSourceGrabToggleTypeToString; virDomainInputSourceGrabTypeFromString; virDomainInputSourceGrabTypeToString; virDomainInputTypeToString; +virDomainIOMMUDefEquals; +virDomainIOMMUDefFind; virDomainIOMMUDefFree; virDomainIOMMUDefNew; virDomainIOMMUModelTypeFromString; diff --git a/src/qemu/qemu_alias.c b/src/qemu/qemu_alias.c index a27c688d79..5f2b11b9a6 100644 --- a/src/qemu/qemu_alias.c +++ b/src/qemu/qemu_alias.c @@ -647,10 +647,14 @@ qemuAssignDeviceVsockAlias(virDomainVsockDef *vsock)
static void -qemuAssignDeviceIOMMUAlias(virDomainIOMMUDef *iommu) +qemuAssignDeviceIOMMUAlias(virDomainDef *def, + virDomainIOMMUDef **iommu) { - if (!iommu->info.alias) - iommu->info.alias = g_strdup("iommu0"); + size_t i; + for (i = 0; i < def->niommus; i++) { + if (!iommu[i]->info.alias) + iommu[i]->info.alias = g_strdup_printf("iommu%zu", i); + } }
@@ -766,8 +770,9 @@ qemuAssignDeviceAliases(virDomainDef *def) if (def->vsock) { qemuAssignDeviceVsockAlias(def->vsock); } - if (def->iommu) - qemuAssignDeviceIOMMUAlias(def->iommu); + if (def->iommu && def->niommus > 0) {
def->niommus > 0 is a sufficient condition.
+ qemuAssignDeviceIOMMUAlias(def, def->iommu); + } for (i = 0; i < def->ncryptos; i++) { qemuAssignDeviceCryptoAlias(def, def->cryptos[i]); } diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index e789e8cf2c..15ae919047 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6296,10 +6296,12 @@ qemuBuildBootCommandLine(virCommand *cmd,
static virJSONValue * qemuBuildPCISmmuv3DevDevProps(const virDomainDef *def, - const virDomainIOMMUDef *iommu) + const virDomainIOMMUDef *iommu, + size_t id) { g_autoptr(virJSONValue) props = NULL; g_autofree char *bus = NULL; + g_autofree char *smmuv3_id = NULL; size_t i; bool contIsPHB = false;
@@ -6340,9 +6342,12 @@ qemuBuildPCISmmuv3DevDevProps(const virDomainDef *def, bus = temp_bus; }
+ smmuv3_id = g_strdup_printf("smmuv3.%zu", id); + if (virJSONValueObjectAdd(&props, "s:driver", "arm-smmuv3", "s:primary-bus", bus, + "s:id", smmuv3_id,
This change belongs to the commit that added this function.
NULL) < 0) return NULL;
@@ -6355,91 +6360,92 @@ qemuBuildIOMMUCommandLine(virCommand *cmd, const virDomainDef *def, virQEMUCaps *qemuCaps) { + size_t i; g_autoptr(virJSONValue) props = NULL; g_autoptr(virJSONValue) wrapperProps = NULL; - const virDomainIOMMUDef *iommu = def->iommu; - - if (!iommu) + if (!def->iommu || def->niommus <= 0) return 0;
if (def->niommus == 0) return 0; niommus is size_t so it cannot be negative and we don't care whether def->iommu(s) is allocated or not if it does not contain any devices.
- switch (iommu->model) { - case VIR_DOMAIN_IOMMU_MODEL_INTEL: - if (virJSONValueObjectAdd(&props, - "s:driver", "intel-iommu", - "s:id", iommu->info.alias, - "S:intremap", qemuOnOffAuto(iommu->intremap), - "T:caching-mode", iommu->caching_mode, - "S:eim", qemuOnOffAuto(iommu->eim), - "T:device-iotlb", iommu->iotlb, - "z:aw-bits", iommu->aw_bits, - "T:dma-translation", iommu->dma_translation, - NULL) < 0) - return -1;
@@ -7260,8 +7266,8 @@ qemuBuildMachineCommandLine(virCommand *cmd, if (qemuAppendDomainFeaturesMachineParam(&buf, def, qemuCaps) < 0) return -1;
- if (def->iommu) { - switch (def->iommu->model) { + if (def->iommu && def->niommus == 1) {
Only 'niommus' is relevant here.
+ switch (def->iommu[0]->model) { case VIR_DOMAIN_IOMMU_MODEL_SMMUV3: virBufferAddLit(&buf, ",iommu=smmuv3"); break; @@ -7275,7 +7281,7 @@ qemuBuildMachineCommandLine(virCommand *cmd,
case VIR_DOMAIN_IOMMU_MODEL_LAST: default: - virReportEnumRangeError(virDomainIOMMUModel, def->iommu->model); + virReportEnumRangeError(virDomainIOMMUModel, def->iommu[0]->model); return -1; } } diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index 06bf4fab32..2ddc629304 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -2365,24 +2365,25 @@ qemuDomainAssignDevicePCISlots(virDomainDef *def, /* Nada - none are PCI based (yet) */ }
- if (def->iommu) { - virDomainIOMMUDef *iommu = def->iommu; - - switch (iommu->model) { - case VIR_DOMAIN_IOMMU_MODEL_VIRTIO: - case VIR_DOMAIN_IOMMU_MODEL_AMD: - if (virDeviceInfoPCIAddressIsWanted(&iommu->info) && - qemuDomainPCIAddressReserveNextAddr(addrs, &iommu->info) < 0) { - return -1; - } - break; + if (def->iommu && def->niommus > 0) {
The if is not necessary - if niommus is non-zero, def->iommu(s) will be allocated. And if it's zero the for loop is a no-op.
+ for (i = 0; i < def->niommus; i++) { + virDomainIOMMUDef *iommu = def->iommu[i]; + switch (iommu->model) { + case VIR_DOMAIN_IOMMU_MODEL_VIRTIO: + case VIR_DOMAIN_IOMMU_MODEL_AMD: + if (virDeviceInfoPCIAddressIsWanted(&iommu->info) && + qemuDomainPCIAddressReserveNextAddr(addrs, &iommu->info) < 0) { + return -1; + } + break;
- case VIR_DOMAIN_IOMMU_MODEL_INTEL: - case VIR_DOMAIN_IOMMU_MODEL_SMMUV3: - case VIR_DOMAIN_IOMMU_MODEL_SMMUV3_DEV: - case VIR_DOMAIN_IOMMU_MODEL_LAST: - /* These are not PCI devices */ - break; + case VIR_DOMAIN_IOMMU_MODEL_INTEL: + case VIR_DOMAIN_IOMMU_MODEL_SMMUV3: + case VIR_DOMAIN_IOMMU_MODEL_SMMUV3_DEV: + case VIR_DOMAIN_IOMMU_MODEL_LAST: + /* These are not PCI devices */ + break; + } } }
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ac72ea5cb0..3d65f78c9e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6894,12 +6894,12 @@ qemuDomainAttachDeviceConfig(virDomainDef *vmdef, break;
case VIR_DOMAIN_DEVICE_IOMMU: - if (vmdef->iommu) { + if (vmdef->iommu && vmdef->niommus > 0) {
Same comment about checking niommus. Jano
virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain already has an iommu device")); return -1; } - vmdef->iommu = g_steal_pointer(&dev->data.iommu); + VIR_APPEND_ELEMENT(vmdef->iommu, vmdef->niommus, dev->data.iommu); break;
case VIR_DOMAIN_DEVICE_VIDEO: