[libvirt] [PATCH] qemu: Permit PCI-free aarch64 mach-virt guests

There has been some progress lately in enabling virtio-pci on aarch64 guests; however, guest OS support is still spotty at best, so most guests are going to be using virtio-mmio instead. Currently, mach-virt guests are closely modeled after q35 guests, and that includes always adding a dmi-to-pci-bridge that's just impossible to get rid of. While that's acceptable (if suboptimal) for q35, where you will always need some kind of PCI device anyway, mach-virt guests should be allowed to avoid it. --- src/qemu/qemu_domain.c | 8 ++++++-- src/qemu/qemu_domain_address.c | 8 +++++++- .../qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args | 1 - .../qemuxml2argv-aarch64-virtio-pci-default.args | 1 - .../qemuxml2argv-aarch64-virtio-pci-manual-addresses.args | 5 +++-- .../qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml | 14 ++++++++++++-- .../qemuxml2xmlout-aarch64-virtio-pci-default.xml | 4 ---- .../qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml | 13 +++++++++---- 8 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 1eb5644..595ad64 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1951,11 +1951,15 @@ qemuDomainDefAddDefaultDevices(virDomainDefPtr def, VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) { goto cleanup; } - /* add a dmi-to-pci-bridge and a pci-bridge if there are no pci controllers + /* Add a dmi-to-pci-bridge bridge if there are no PCI controllers * other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) && !virDomainDefAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1, VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE)) goto cleanup; diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index ca3adc0..883264a 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -1483,9 +1483,15 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def, } /* Reserve 1 extra slot for a (potential) bridge only if buses - * are not fully reserved yet + * are not fully reserved yet. + * + * We don't reserve the extra slot for aarch64 mach-virt guests + * either because we want to be able to have pure virtio-mmio + * guests, and reserving this slot would force us to add at least + * a dmi-to-pci-bridge to an otherwise PCI-free topology */ if (!buses_reserved && + !qemuDomainMachineIsVirt(def) && virDomainPCIAddressReserveNextSlot(addrs, &info, flags) < 0) goto cleanup; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args index 2a5702f..3e6bee9 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args @@ -21,7 +21,6 @@ QEMU_AUDIO_DRV=none \ -initrd /aarch64.initrd \ -append 'earlyprintk console=ttyAMA0,115200n8 rw root=/dev/vda rootwait' \ -dtb /aarch64.dtb \ --device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device virtio-serial-device,id=virtio-serial0 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-virtio-disk0 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args index a2df858..566bee2 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args @@ -21,7 +21,6 @@ QEMU_AUDIO_DRV=none \ -initrd /aarch64.initrd \ -append 'earlyprintk console=ttyAMA0,115200n8 rw root=/dev/vda rootwait' \ -dtb /aarch64.dtb \ --device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device virtio-serial-device,id=virtio-serial0 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-virtio-disk0 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args index 0234404..4e5dbdb 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args @@ -23,12 +23,13 @@ QEMU_AUDIO_DRV=none \ -dtb /aarch64.dtb \ -device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device pci-bridge,chassis_nr=2,id=pci.2,bus=pci.1,addr=0x0 \ --device virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3 \ +-device pci-bridge,chassis_nr=3,id=pci.3,bus=pci.1,addr=0x1 \ +-device virtio-scsi-pci,id=scsi0,bus=pci.3,addr=0x1 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-scsi0-0-0-0 \ -device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,\ id=scsi0-0-0-0 \ --device virtio-net-pci,vlan=0,id=net0,mac=52:54:00:09:a4:37,bus=pcie.0,addr=0x2 \ +-device virtio-net-pci,vlan=0,id=net0,mac=52:54:00:09:a4:37,bus=pci.3,addr=0x2 \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:09:a4:38,bus=pci.2,addr=0x1 \ -net user,vlan=1,name=hostnet1 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml index bf0f249..5e1b494 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml @@ -31,13 +31,23 @@ <target dev='sda' bus='scsi'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='dmi-to-pci-bridge'> + <model name='i82801b11-bridge'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </controller> <controller type='scsi' index='0' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x01' function='0x0'/> </controller> <interface type='user'> <mac address='52:54:00:09:a4:37'/> <model type='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x02' function='0x0'/> </interface> <interface type='user'> <mac address='52:54:00:09:a4:38'/> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml index a212601..7c3fc19 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml @@ -33,10 +33,6 @@ <address type='virtio-mmio'/> </disk> <controller type='pci' index='0' model='pcie-root'/> - <controller type='pci' index='1' model='dmi-to-pci-bridge'> - <model name='i82801b11-bridge'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> - </controller> <controller type='virtio-serial' index='0'> <address type='virtio-mmio'/> </controller> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml index 4fdedac..1b50f75 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml @@ -32,9 +32,6 @@ <target dev='sda' bus='scsi'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> - <controller type='scsi' index='0' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> - </controller> <controller type='pci' index='0' model='pcie-root'/> <controller type='pci' index='1' model='dmi-to-pci-bridge'> <model name='i82801b11-bridge'/> @@ -45,10 +42,18 @@ <target chassisNr='2'/> <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> </controller> + <controller type='scsi' index='0' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x03' slot='0x01' function='0x0'/> + </controller> + <controller type='pci' index='3' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='3'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' function='0x0'/> + </controller> <interface type='user'> <mac address='52:54:00:09:a4:37'/> <model type='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x02' function='0x0'/> </interface> <interface type='user'> <mac address='52:54:00:09:a4:38'/> -- 2.5.5

On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
There has been some progress lately in enabling virtio-pci on aarch64 guests; however, guest OS support is still spotty at best, so most guests are going to be using virtio-mmio instead.
Currently, mach-virt guests are closely modeled after q35 guests, and that includes always adding a dmi-to-pci-bridge that's just impossible to get rid of.
Yeah. Sorry my simple patches from yesterday didn't get you as far as you needed to go.
While that's acceptable (if suboptimal) for q35, where you will always need some kind of PCI device anyway, mach-virt guests should be allowed to avoid it. --- src/qemu/qemu_domain.c | 8 ++++++-- src/qemu/qemu_domain_address.c | 8 +++++++- .../qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args | 1 - .../qemuxml2argv-aarch64-virtio-pci-default.args | 1 - .../qemuxml2argv-aarch64-virtio-pci-manual-addresses.args | 5 +++-- .../qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml | 14 ++++++++++++-- .../qemuxml2xmlout-aarch64-virtio-pci-default.xml | 4 ---- .../qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml | 13 +++++++++---- 8 files changed, 37 insertions(+), 17 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 1eb5644..595ad64 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1951,11 +1951,15 @@ qemuDomainDefAddDefaultDevices(virDomainDefPtr def, VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) { goto cleanup; } - /* add a dmi-to-pci-bridge and a pci-bridge if there are no pci controllers + /* Add a dmi-to-pci-bridge bridge if there are no PCI controllers
That should have been in my patch yesterday, but too late for that now! Thanks for fixing it.
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) &&
You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)?
!virDomainDefAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1, VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE)) goto cleanup;
Okay, so pcie-root is still always "there", which isn't a problem since the presence of pcie-root in the config doesn't actually change anything in the qemu commandline or resulting virtual machine (it's a default device in qemu that can't be removed).
diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index ca3adc0..883264a 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -1483,9 +1483,15 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def, }
/* Reserve 1 extra slot for a (potential) bridge only if buses - * are not fully reserved yet + * are not fully reserved yet. + * + * We don't reserve the extra slot for aarch64 mach-virt guests + * either because we want to be able to have pure virtio-mmio + * guests, and reserving this slot would force us to add at least + * a dmi-to-pci-bridge to an otherwise PCI-free topology */ if (!buses_reserved && + !qemuDomainMachineIsVirt(def) && virDomainPCIAddressReserveNextSlot(addrs, &info, flags) < 0) goto cleanup;
You could save some time by just putting the whole thing from "for (i = 0; i < addrs->nbuses; i++)" down to that "goto cleanup" inside an "if (!qemuDomainMachineIsVirt(def)) { ... }". (maybe replacing qemuDomainMachineIsVirt() with something more specific, as noted above). As we've discussed in IRC, eventually there should be a way to disable this for *every* platform, not just aarch64/virt. Possibly an "openPCISlots" attribute somewhere in the domain that tells how many slots should be left open for hotplug (with default being 1 for x86/440fx, and up for discussion on other platforms).
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args index 2a5702f..3e6bee9 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virt-2.6-virtio-pci-default.args @@ -21,7 +21,6 @@ QEMU_AUDIO_DRV=none \ -initrd /aarch64.initrd \ -append 'earlyprintk console=ttyAMA0,115200n8 rw root=/dev/vda rootwait' \ -dtb /aarch64.dtb \ --device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device virtio-serial-device,id=virtio-serial0 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-virtio-disk0 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args index a2df858..566bee2 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-default.args @@ -21,7 +21,6 @@ QEMU_AUDIO_DRV=none \ -initrd /aarch64.initrd \ -append 'earlyprintk console=ttyAMA0,115200n8 rw root=/dev/vda rootwait' \ -dtb /aarch64.dtb \ --device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device virtio-serial-device,id=virtio-serial0 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-virtio-disk0 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args index 0234404..4e5dbdb 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.args @@ -23,12 +23,13 @@ QEMU_AUDIO_DRV=none \ -dtb /aarch64.dtb \ -device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1 \ -device pci-bridge,chassis_nr=2,id=pci.2,bus=pci.1,addr=0x0 \ --device virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3 \ +-device pci-bridge,chassis_nr=3,id=pci.3,bus=pci.1,addr=0x1 \ +-device virtio-scsi-pci,id=scsi0,bus=pci.3,addr=0x1 \ -usb \ -drive file=/aarch64.raw,format=raw,if=none,id=drive-scsi0-0-0-0 \ -device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,\ id=scsi0-0-0-0 \ --device virtio-net-pci,vlan=0,id=net0,mac=52:54:00:09:a4:37,bus=pcie.0,addr=0x2 \ +-device virtio-net-pci,vlan=0,id=net0,mac=52:54:00:09:a4:37,bus=pci.3,addr=0x2 \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:09:a4:38,bus=pci.2,addr=0x1 \ -net user,vlan=1,name=hostnet1 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml index bf0f249..5e1b494 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-aarch64-virtio-pci-manual-addresses.xml @@ -31,13 +31,23 @@ <target dev='sda' bus='scsi'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> + <controller type='pci' index='0' model='pcie-root'/>
This surprised me for a minute, since you're now explicitly adding pcie-root even though it's still implicitly added for you (and, for example, qemuxml2argvtest completes successfully without it). But I guess it doesn't hurt anything to have it in the file, as it makes it obvious that the dmi-to-pci-bridge isn't just being added on top of nothing. (Hopefully all of this will soon be a thing of the past - for *all* arches/machinetypes if you put in a device with <address type='pci'/> (or a device that can only be connected via PCI), then all the necessary pci controllers should just magically appear, otherwise not). ACK.
+ <controller type='pci' index='1' model='dmi-to-pci-bridge'> + <model name='i82801b11-bridge'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </controller> <controller type='scsi' index='0' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x01' function='0x0'/> </controller> <interface type='user'> <mac address='52:54:00:09:a4:37'/> <model type='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x02' function='0x0'/> </interface> <interface type='user'> <mac address='52:54:00:09:a4:38'/> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml index a212601..7c3fc19 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-default.xml @@ -33,10 +33,6 @@ <address type='virtio-mmio'/> </disk> <controller type='pci' index='0' model='pcie-root'/> - <controller type='pci' index='1' model='dmi-to-pci-bridge'> - <model name='i82801b11-bridge'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> - </controller> <controller type='virtio-serial' index='0'> <address type='virtio-mmio'/> </controller> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml index 4fdedac..1b50f75 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-aarch64-virtio-pci-manual-addresses.xml @@ -32,9 +32,6 @@ <target dev='sda' bus='scsi'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> - <controller type='scsi' index='0' model='virtio-scsi'> - <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> - </controller> <controller type='pci' index='0' model='pcie-root'/> <controller type='pci' index='1' model='dmi-to-pci-bridge'> <model name='i82801b11-bridge'/> @@ -45,10 +42,18 @@ <target chassisNr='2'/> <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> </controller> + <controller type='scsi' index='0' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x03' slot='0x01' function='0x0'/> + </controller> + <controller type='pci' index='3' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='3'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' function='0x0'/> + </controller> <interface type='user'> <mac address='52:54:00:09:a4:37'/> <model type='virtio'/> - <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + <address type='pci' domain='0x0000' bus='0x03' slot='0x02' function='0x0'/> </interface> <interface type='user'> <mac address='52:54:00:09:a4:38'/>

On Fri, Jun 17, 2016 at 11:36:05AM -0400, Laine Stump wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) &&
You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)?
Just my $.02 here, but since our qemuDomainMachineIsVirt() is made specifically for aarch64 arches, I think the right thing to do would be just add architecture check into that function as using it throughout the codebase ought to actually be what all the callers want.

On 06/17/2016 11:46 AM, Martin Kletzander wrote:
On Fri, Jun 17, 2016 at 11:36:05AM -0400, Laine Stump wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) &&
You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)?
Just my $.02 here, but since our qemuDomainMachineIsVirt() is made specifically for aarch64 arches, I think the right thing to do would be just add architecture check into that function as using it throughout the codebase ought to actually be what all the callers want.
Sure, a single function would be great. Its name should reflect that it is for *aarch64* virt machines though (in anticipation of other arches getting a virt machinetype).

On Fri, Jun 17, 2016 at 12:01:58PM -0400, Laine Stump wrote:
On 06/17/2016 11:46 AM, Martin Kletzander wrote:
On Fri, Jun 17, 2016 at 11:36:05AM -0400, Laine Stump wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) &&
You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)?
Just my $.02 here, but since our qemuDomainMachineIsVirt() is made specifically for aarch64 arches, I think the right thing to do would be just add architecture check into that function as using it throughout the codebase ought to actually be what all the callers want.
Sure, a single function would be great. Its name should reflect that it is for *aarch64* virt machines though (in anticipation of other arches getting a virt machinetype).
I thought that was obvious but re-reading what I wrote it isn't. So to be sure, I'll express my feelings with code, so it's more exact =) Something along the lines of: diff --git i/src/qemu/qemu_domain.c w/src/qemu/qemu_domain.c index 2fe59d78fd2f..cc6b7ae10a08 100644 --- i/src/qemu/qemu_domain.c +++ w/src/qemu/qemu_domain.c @@ -4906,10 +4906,11 @@ qemuDomainMachineIsS390CCW(const virDomainDef *def) bool -qemuDomainMachineIsVirt(const virDomainDef *def) +qemuDomainIsAArch64Virt(const virDomainDef *def) { - return STREQ(def->os.machine, "virt") || - STRPREFIX(def->os.machine, "virt-"); + if (def->os.arch != VIR_ARCH_AARCH64) && + STREQ(def->os.machine, "virt") || + STRPREFIX(def->os.machine, "virt-"); } -- Martin

On Fri, 2016-06-17 at 12:01 -0400, Laine Stump wrote:
On 06/17/2016 11:46 AM, Martin Kletzander wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) && You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)? Just my $.02 here, but since our qemuDomainMachineIsVirt() is made specifically for aarch64 arches, I think the right thing to do would be just add architecture check into that function as using it throughout
On Fri, Jun 17, 2016 at 11:36:05AM -0400, Laine Stump wrote: the codebase ought to actually be what all the callers want. Sure, a single function would be great. Its name should reflect that it is for *aarch64* virt machines though (in anticipation of other arches getting a virt machinetype).
I think we can stick with qemuDomainMachineIsVirt() for now, and create per-arch variants if and when another architecture grows its own virt machine type. But I'm totally for moving the architecture checks inside the function; in fact, I'd like to do the same for pSeries guests. I'll work on it next week. -- Andrea Bolognani Software Engineer - Virtualization Team

On Fri, 2016-06-17 at 18:38 +0200, Andrea Bolognani wrote:
On Fri, 2016-06-17 at 12:01 -0400, Laine Stump wrote:
On 06/17/2016 11:46 AM, Martin Kletzander wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) && You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)? Just my $.02 here, but since our qemuDomainMachineIsVirt() is made specifically for aarch64 arches, I think the right thing to do would be just add architecture check into that function as using it throughout
On Fri, Jun 17, 2016 at 11:36:05AM -0400, Laine Stump wrote: the codebase ought to actually be what all the callers want. Sure, a single function would be great. Its name should reflect that it is for *aarch64* virt machines though (in anticipation of other arches getting a virt machinetype).
I think we can stick with qemuDomainMachineIsVirt() for now, and create per-arch variants if and when another architecture grows its own virt machine type.
But I'm totally for moving the architecture checks inside the function; in fact, I'd like to do the same for pSeries guests. I'll work on it next week.
I've now implemented the suggestion. https://www.redhat.com/archives/libvir-list/2016-June/msg01635.html -- Andrea Bolognani / Red Hat / Virtualization

On Fri, 2016-06-17 at 11:36 -0400, Laine Stump wrote:
On 06/17/2016 08:43 AM, Andrea Bolognani wrote:
There has been some progress lately in enabling virtio-pci on aarch64 guests; however, guest OS support is still spotty at best, so most guests are going to be using virtio-mmio instead. Currently, mach-virt guests are closely modeled after q35 guests, and that includes always adding a dmi-to-pci-bridge that's just impossible to get rid of. Yeah. Sorry my simple patches from yesterday didn't get you as far as you needed to go.
That's quite all right, you did a bunch of work and I merely walked the last mile :)
* other than the pcie-root. This is so that there will be hot-pluggable - * PCI slots available + * PCI slots available. + * + * We skip this step for aarch64 mach-virt guests, where we want to + * be able to have a pure virtio-mmio topology */ if (virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1) < 0 && + !qemuDomainMachineIsVirt(def) && You're assuming that the only virt* machinetypes will be aarch64, which may be reasonable now, but not in the future (periodically someone from qemu will mention the idea of a "virt" machinetype for x86, which is legacy-free and accepts only virtio devices). Wouldn't a more specific comparison be better here (and in the other places in this patch)?
I'll reply to this one in the sub-thread Martin started.
!virDomainDefAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1, VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE)) goto cleanup; Okay, so pcie-root is still always "there", which isn't a problem since the presence of pcie-root in the config doesn't actually change anything in the qemu commandline or resulting virtual machine (it's a default device in qemu that can't be removed).
Exactly, that's why it *has* to be in the domain XML. Since it can't be removed or disabled, it should always be displayed to the user.
diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index ca3adc0..883264a 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -1483,9 +1483,15 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def, } /* Reserve 1 extra slot for a (potential) bridge only if buses - * are not fully reserved yet + * are not fully reserved yet. + * + * We don't reserve the extra slot for aarch64 mach-virt guests + * either because we want to be able to have pure virtio-mmio + * guests, and reserving this slot would force us to add at least + * a dmi-to-pci-bridge to an otherwise PCI-free topology */ if (!buses_reserved && + !qemuDomainMachineIsVirt(def) && virDomainPCIAddressReserveNextSlot(addrs, &info, flags) < 0) goto cleanup; You could save some time by just putting the whole thing from "for (i = 0; i < addrs->nbuses; i++)" down to that "goto cleanup" inside an "if (!qemuDomainMachineIsVirt(def)) { ... }". (maybe replacing qemuDomainMachineIsVirt() with something more specific, as noted above).
I didn't change that because it would have added one level of nesting to the code, and qemuDomainPCIBusFullyReserved() should be fairly cheap anyway. We can always revisit it later.
As we've discussed in IRC, eventually there should be a way to disable this for *every* platform, not just aarch64/virt. Possibly an "openPCISlots" attribute somewhere in the domain that tells how many slots should be left open for hotplug (with default being 1 for x86/440fx, and up for discussion on other platforms).
Agreed. We're not quite there yet, unfortunately, so for now this mach-virt specific fix will have to do :)
<target dev='sda' bus='scsi'/> <address type='drive' controller='0' bus='0' target='0' unit='0'/> </disk> + <controller type='pci' index='0' model='pcie-root'/> This surprised me for a minute, since you're now explicitly adding pcie-root even though it's still implicitly added for you (and, for example, qemuxml2argvtest completes successfully without it). But I guess it doesn't hurt anything to have it in the file, as it makes it obvious that the dmi-to-pci-bridge isn't just being added on top of nothing.
Yeah, that was the pretty much my reasoning :)
(Hopefully all of this will soon be a thing of the past - for *all* arches/machinetypes if you put in a device with <address type='pci'/> (or a device that can only be connected via PCI), then all the necessary pci controllers should just magically appear, otherwise not).
I think we're all looking forward to that brighter day! :)
ACK.
Thanks, I've pushed it. -- Andrea Bolognani Software Engineer - Virtualization Team
participants (3)
-
Andrea Bolognani
-
Laine Stump
-
Martin Kletzander