[libvirt] [PATCH 0/2] Add eim attribute to iommu device

Extended interrupt mode allows >255 vCPUs with q35-based machine types. https://bugzilla.redhat.com/show_bug.cgi?id=1451282 Ján Tomko (2): conf: add eim attribute to <iommu><driver> qemu: format eim on intel-iommu command line docs/formatdomain.html.in | 10 +++++++ docs/schemas/domaincommon.rng | 5 ++++ src/conf/domain_conf.c | 20 ++++++++++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 11 ++++++++ src/qemu/qemu_domain.c | 20 ++++++++++++++ tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml | 1 + .../qemuxml2argv-intel-iommu-eim.args | 19 +++++++++++++ .../qemuxml2argv-intel-iommu-eim.xml | 31 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 7 +++++ .../qemuxml2xmlout-intel-iommu-eim.xml | 1 + tests/qemuxml2xmltest.c | 1 + 15 files changed, 131 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-intel-iommu-eim.xml -- 2.10.2

Add an attribute to control extended interrupt mode. https://bugzilla.redhat.com/show_bug.cgi?id=1451282 --- docs/formatdomain.html.in | 10 +++++++ docs/schemas/domaincommon.rng | 5 ++++ src/conf/domain_conf.c | 20 ++++++++++++++ src/conf/domain_conf.h | 1 + .../qemuxml2argv-intel-iommu-eim.xml | 31 ++++++++++++++++++++++ .../qemuxml2xmlout-intel-iommu-eim.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 69 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-intel-iommu-eim.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3135db4..b5026b1 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7436,6 +7436,16 @@ qemu-kvm -net nic,model=? /dev/null <span class="since">Since 3.4.0</span> (QEMU/KVM only) </p> </dd> + <dt><code>eim</code></dt> + <dd> + <p> + The <code>eim</code> attribute with possible values + <code>on</code> and <code>off</code> can be used to + turn on extended interrupt mode. In combination with intremap + and split I/O APIC, this allows for more vCPUs to be used. + <span class="since">Since 3.4.0</span> (QEMU/KVM only) + </p> + </dd> </dl> </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index f88e84a..144c281 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -3959,6 +3959,11 @@ <ref name="virOnOff"/> </attribute> </optional> + <optional> + <attribute name="eim"> + <ref name="virOnOff"/> + </attribute> + </optional> </element> </optional> </element> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9eba70a..2264d96 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -14184,6 +14184,14 @@ virDomainIOMMUDefParseXML(xmlNodePtr node, } iommu->caching_mode = val; } + VIR_FREE(tmp); + if ((tmp = virXPathString("string(./driver/@eim)", ctxt))) { + if ((val = virTristateSwitchTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, _("unknown eim value: %s"), tmp); + goto cleanup; + } + iommu->eim = val; + } ret = iommu; iommu = NULL; @@ -19856,6 +19864,14 @@ virDomainIOMMUDefCheckABIStability(virDomainIOMMUDefPtr src, virTristateSwitchTypeToString(src->caching_mode)); return false; } + if (src->eim != dst->eim) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Target domain IOMMU device eim value '%s' " + "does not match source '%s'"), + virTristateSwitchTypeToString(dst->eim), + virTristateSwitchTypeToString(src->eim)); + return false; + } return true; } @@ -24199,6 +24215,10 @@ virDomainIOMMUDefFormat(virBufferPtr buf, virBufferAsprintf(&childBuf, " caching_mode='%s'", virTristateSwitchTypeToString(iommu->caching_mode)); } + if (iommu->eim != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&childBuf, " eim='%s'", + virTristateSwitchTypeToString(iommu->eim)); + } virBufferAddLit(&childBuf, "/>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 34a3596..83e0672 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2211,6 +2211,7 @@ struct _virDomainIOMMUDef { virDomainIOMMUModel model; virTristateSwitch intremap; virTristateSwitch caching_mode; + virTristateSwitch eim; }; /* * Guest VM main configuration diff --git a/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml b/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml new file mode 100644 index 0000000..8642ed3 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml @@ -0,0 +1,31 @@ +<domain type='kvm'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219100</memory> + <currentMemory unit='KiB'>219100</currentMemory> + <vcpu placement='static'>288</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <ioapic driver='qemu'/> + </features> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='none'/> + <iommu model='intel'> + <driver intremap='on' eim='on'/> + </iommu> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-intel-iommu-eim.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-intel-iommu-eim.xml new file mode 120000 index 0000000..9fbec36 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-intel-iommu-eim.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index fed74d0..fff13e2 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1127,6 +1127,7 @@ mymain(void) QEMU_CAPS_MACHINE_IOMMU); DO_TEST("intel-iommu-ioapic", NONE); DO_TEST("intel-iommu-caching-mode", NONE); + DO_TEST("intel-iommu-eim", NONE); DO_TEST("cpu-check-none", NONE); DO_TEST("cpu-check-partial", NONE); -- 2.10.2

On Thu, 2017-05-18 at 11:09 +0200, Ján Tomko wrote:
Add an attribute to control extended interrupt mode. https://bugzilla.redhat.com/show_bug.cgi?id=1451282
[...]
@@ -14184,6 +14184,14 @@ virDomainIOMMUDefParseXML(xmlNodePtr node, } iommu->caching_mode = val; }
Add an empty line here, please.
+ VIR_FREE(tmp); + if ((tmp = virXPathString("string(./driver/@eim)", ctxt))) { + if ((val = virTristateSwitchTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, _("unknown eim value: %s"), tmp); + goto cleanup; + } + iommu->eim = val; + }
Reviewed-by: Andrea Bolognani <abologna@redhat.com> -- Andrea Bolognani / Red Hat / Virtualization

On Thu, 2017-05-18 at 11:09 +0200, Ján Tomko wrote:
Add an attribute to control extended interrupt mode. https://bugzilla.redhat.com/show_bug.cgi?id=1451282
[...]
@@ -7436,6 +7436,16 @@ qemu-kvm -net nic,model=? /dev/null <span class="since">Since 3.4.0</span> (QEMU/KVM only) </p> </dd> + <dt><code>eim</code></dt> + <dd> + <p> + The <code>eim</code> attribute with possible values + <code>on</code> and <code>off</code> can be used to + turn on extended interrupt mode. In combination with intremap + and split I/O APIC, this allows for more vCPUs to be used. + <span class="since">Since 3.4.0</span> (QEMU/KVM only) + </p> + </dd> </dl> </dd> </dl>
Actually, the documentation could use some work too. How about: The <code>eim</code> attribute (with possible values <code>on</code> and <code>off</code>) can be used to configure Extended Interrupt Mode. A q35 domain with split I/O APIC (as described in <a href="#elementsFeatures">hypervisor features</a>), and both interrupt remapping and EIM turned on for the IOMMU, will be able to use more than 255 vCPUs. <span class="since">Since 3.4.0</span> (QEMU/KVM only) -- Andrea Bolognani / Red Hat / Virtualization

This option turns on extended interrupt mode, which allows more than 255 vCPUs. https://bugzilla.redhat.com/show_bug.cgi?id=1451282 --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 11 +++++++++++ src/qemu/qemu_domain.c | 20 ++++++++++++++++++++ tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml | 1 + .../qemuxml2argv-intel-iommu-eim.args | 19 +++++++++++++++++++ tests/qemuxml2argvtest.c | 7 +++++++ 8 files changed, 62 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 546dfd7..7ea8505 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -371,6 +371,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "kernel-irqchip.split", "intel-iommu.intremap", "intel-iommu.caching-mode", + "intel-iommu.eim", ); @@ -1728,6 +1729,7 @@ static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsUSBNECXHCI[] = { static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsIntelIOMMU[] = { { "intremap", QEMU_CAPS_INTEL_IOMMU_INTREMAP }, { "caching-mode", QEMU_CAPS_INTEL_IOMMU_CACHING_MODE }, + { "eim", QEMU_CAPS_INTEL_IOMMU_EIM }, }; /* see documentation for virQEMUCapsQMPSchemaGetByPath for the query format */ diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index aa99fda..eba9814 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -409,6 +409,7 @@ typedef enum { QEMU_CAPS_MACHINE_KERNEL_IRQCHIP_SPLIT, /* -machine kernel_irqchip=split */ QEMU_CAPS_INTEL_IOMMU_INTREMAP, /* intel-iommu.intremap */ QEMU_CAPS_INTEL_IOMMU_CACHING_MODE, /* intel-iommu.caching-mode */ + QEMU_CAPS_INTEL_IOMMU_EIM, /* intel-iommu.eim */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 4c1a266..f190d51 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6701,6 +6701,13 @@ qemuBuildIOMMUCommandLine(virCommandPtr cmd, "with this QEMU binary")); return -1; } + if (iommu->eim != VIR_TRISTATE_SWITCH_ABSENT && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_INTEL_IOMMU_EIM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("iommu: eim is not supported " + "with this QEMU binary")); + return -1; + } break; case VIR_DOMAIN_IOMMU_MODEL_LAST: break; @@ -6734,6 +6741,10 @@ qemuBuildIOMMUCommandLine(virCommandPtr cmd, virBufferAsprintf(&opts, ",caching-mode=%s", virTristateSwitchTypeToString(iommu->caching_mode)); } + if (iommu->eim != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&opts, ",eim=%s", + virTristateSwitchTypeToString(iommu->eim)); + } case VIR_DOMAIN_IOMMU_MODEL_LAST: break; } diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 7c54f69..cd1825e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2663,6 +2663,9 @@ qemuDomainRecheckInternalPaths(virDomainDefPtr def, } +#define QEMU_MAX_VCPUS_WITHOUT_EIM 255 + + static int qemuDomainDefVcpusPostParse(virDomainDefPtr def) { @@ -3071,6 +3074,23 @@ qemuDomainDefValidate(const virDomainDef *def, } } + if (ARCH_IS_X86(def->os.arch) && + virDomainDefGetVcpusMax(def) > QEMU_MAX_VCPUS_WITHOUT_EIM) { + if (!qemuDomainIsQ35(def)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("more than %d vCPUs are only supported on " + "q35-based machine types"), + QEMU_MAX_VCPUS_WITHOUT_EIM); + goto cleanup; + } + if (!def->iommu || def->iommu->eim != VIR_TRISTATE_SWITCH_ON) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("more than %d vCPUs require extended interrupt " + "mode enabled on the iommu device"), + QEMU_MAX_VCPUS_WITHOUT_EIM); + } + } + if (qemuDomainDefValidateVideo(def) < 0) goto cleanup; diff --git a/tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml index e515678..01edbc8 100644 --- a/tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml @@ -206,6 +206,7 @@ <flag name='kernel-irqchip'/> <flag name='kernel-irqchip.split'/> <flag name='intel-iommu.intremap'/> + <flag name='intel-iommu.eim'/> <version>2008000</version> <kvmVersion>0</kvmVersion> <package> (v2.8.0)</package> diff --git a/tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml index 19fe4b7..95b04dd 100644 --- a/tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml @@ -214,6 +214,7 @@ <flag name='kernel-irqchip.split'/> <flag name='intel-iommu.intremap'/> <flag name='intel-iommu.caching-mode'/> + <flag name='intel-iommu.eim'/> <version>2009000</version> <kvmVersion>0</kvmVersion> <package> (v2.9.0)</package> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args b/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args new file mode 100644 index 0000000..ebf7c49 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args @@ -0,0 +1,19 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name QEMUGuest1 \ +-S \ +-machine q35,accel=kvm,kernel_irqchip=split \ +-m 214 \ +-smp 288,sockets=288,cores=1,threads=1 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-device intel-iommu,intremap=on,eim=on diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 4269598..b360185 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2529,6 +2529,13 @@ mymain(void) QEMU_CAPS_DEVICE_INTEL_IOMMU, QEMU_CAPS_INTEL_IOMMU_INTREMAP, QEMU_CAPS_INTEL_IOMMU_CACHING_MODE); + DO_TEST("intel-iommu-eim", + QEMU_CAPS_MACHINE_OPT, + QEMU_CAPS_MACHINE_KERNEL_IRQCHIP, + QEMU_CAPS_MACHINE_KERNEL_IRQCHIP_SPLIT, + QEMU_CAPS_INTEL_IOMMU_INTREMAP, + QEMU_CAPS_INTEL_IOMMU_EIM, + QEMU_CAPS_DEVICE_INTEL_IOMMU); DO_TEST("cpu-hotplug-startup", QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS); -- 2.10.2

On Thu, 2017-05-18 at 11:09 +0200, Ján Tomko wrote:
This option turns on extended interrupt mode, which allows more than 255 vCPUs. https://bugzilla.redhat.com/show_bug.cgi?id=1451282 --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 11 +++++++++++ src/qemu/qemu_domain.c | 20 ++++++++++++++++++++ tests/qemucapabilitiesdata/caps_2.8.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_2.9.0.x86_64.xml | 1 + .../qemuxml2argv-intel-iommu-eim.args | 19 +++++++++++++++++++ tests/qemuxml2argvtest.c | 7 +++++++ 8 files changed, 62 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-intel-iommu-eim.args
[...]
@@ -2663,6 +2663,9 @@ qemuDomainRecheckInternalPaths(virDomainDefPtr def, } +#define QEMU_MAX_VCPUS_WITHOUT_EIM 255
This define is very far from the code that actually uses it, can you please move it right before qemuDomainDefValidate()? You could even...
+ + static int qemuDomainDefVcpusPostParse(virDomainDefPtr def) { @@ -3071,6 +3074,23 @@ qemuDomainDefValidate(const virDomainDef *def, } }
... define it here, and...
+ if (ARCH_IS_X86(def->os.arch) && + virDomainDefGetVcpusMax(def) > QEMU_MAX_VCPUS_WITHOUT_EIM) { + if (!qemuDomainIsQ35(def)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("more than %d vCPUs are only supported on " + "q35-based machine types"), + QEMU_MAX_VCPUS_WITHOUT_EIM); + goto cleanup; + } + if (!def->iommu || def->iommu->eim != VIR_TRISTATE_SWITCH_ON) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("more than %d vCPUs require extended interrupt " + "mode enabled on the iommu device"), + QEMU_MAX_VCPUS_WITHOUT_EIM); + } + }
.. undefine it here. Either solution is fine with me. Reviewed-by: Andrea Bolognani <abologna@redhat.com> -- Andrea Bolognani / Red Hat / Virtualization
participants (2)
-
Andrea Bolognani
-
Ján Tomko