[libvirt] [PATCH] qemu: qxl devices don't support multifunction yet

This is a workaround for QXL bug RHBZ#728174. --- src/qemu/qemu_command.c | 33 +++++++++++--------- .../qemuxml2argv-graphics-spice-qxl-vga.args | 2 +- tests/qemuxml2argvtest.c | 3 +- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index c7c183a..1bb136d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1350,7 +1350,8 @@ qemuUsbId(virBufferPtr buf, int idx) static int qemuBuildDeviceAddressStr(virBufferPtr buf, virDomainDeviceInfoPtr info, - virBitmapPtr qemuCaps) + virBitmapPtr qemuCaps, + bool multiFunc) { if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) { if (info->addr.pci.domain != 0) { @@ -1389,7 +1390,7 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, virBufferAsprintf(buf, ",bus=pci.0"); else virBufferAsprintf(buf, ",bus=pci"); - if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION) && multiFunc) virBufferAsprintf(buf, ",multifunction=on,addr=0x%x.0x%x", info->addr.pci.slot, info->addr.pci.function); else @@ -1697,7 +1698,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk, virBufferAsprintf(&opt, ",event_idx=%s", virDomainVirtioEventIdxTypeToString(disk->event_idx)); } - qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps); + qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps, true); break; case VIR_DOMAIN_DISK_BUS_USB: virBufferAddLit(&opt, "usb-storage"); @@ -1776,7 +1777,7 @@ qemuBuildFSDevStr(virDomainFSDefPtr fs, virBufferAsprintf(&opt, ",id=%s", fs->info.alias); virBufferAsprintf(&opt, ",fsdev=%s%s", QEMU_FSDEV_HOST_PREFIX, fs->info.alias); virBufferAsprintf(&opt, ",mount_tag=%s", fs->dst); - qemuBuildDeviceAddressStr(&opt, &fs->info, qemuCaps); + qemuBuildDeviceAddressStr(&opt, &fs->info, qemuCaps, true); if (virBufferError(&opt)) { virReportOOMError(); @@ -1904,7 +1905,7 @@ qemuBuildControllerDevStr(virDomainControllerDefPtr def, goto error; } - if (qemuBuildDeviceAddressStr(&buf, &def->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &def->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2007,7 +2008,7 @@ qemuBuildNicDevStr(virDomainNetDefPtr net, net->mac[0], net->mac[1], net->mac[2], net->mac[3], net->mac[4], net->mac[5]); - if (qemuBuildDeviceAddressStr(&buf, &net->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &net->info, qemuCaps, true) < 0) goto error; if (bootindex && qemuCapsGet(qemuCaps, QEMU_CAPS_BOOTINDEX)) virBufferAsprintf(&buf, ",bootindex=%d", bootindex); @@ -2141,7 +2142,7 @@ qemuBuildWatchdogDevStr(virDomainWatchdogDefPtr dev, } virBufferAsprintf(&buf, "%s,id=%s", model, dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2165,7 +2166,7 @@ qemuBuildMemballoonDevStr(virDomainMemballoonDefPtr dev, virBufferAddLit(&buf, "virtio-balloon-pci"); virBufferAsprintf(&buf, ",id=%s", dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2191,7 +2192,7 @@ qemuBuildUSBInputDevStr(virDomainInputDefPtr dev, dev->type == VIR_DOMAIN_INPUT_TYPE_MOUSE ? "usb-mouse" : "usb-tablet", dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2229,7 +2230,7 @@ qemuBuildSoundDevStr(virDomainSoundDefPtr sound, model = "intel-hda"; virBufferAsprintf(&buf, "%s,id=%s", model, sound->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &sound->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &sound->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2272,6 +2273,7 @@ qemuBuildVideoDevStr(virDomainVideoDefPtr video, { virBuffer buf = VIR_BUFFER_INITIALIZER; const char *model = qemuVideoTypeToString(video->type); + bool multiFunc = true; if (!model) { qemuReportError(VIR_ERR_INTERNAL_ERROR, @@ -2291,9 +2293,10 @@ qemuBuildVideoDevStr(virDomainVideoDefPtr video, /* QEMU accepts bytes for vram_size. */ virBufferAsprintf(&buf, ",vram_size=%u", video->vram * 1024); + multiFunc = false; } - if (qemuBuildDeviceAddressStr(&buf, &video->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &video->info, qemuCaps, multiFunc) < 0) goto error; if (virBufferError(&buf)) { @@ -2350,7 +2353,7 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, const char *configfd, virBufferAsprintf(&buf, ",configfd=%s", configfd); if (dev->bootIndex) virBufferAsprintf(&buf, ",bootindex=%d", dev->bootIndex); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2405,7 +2408,7 @@ qemuBuildRedirdevDevStr(virDomainRedirdevDefPtr dev, dev->info.alias, dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2438,7 +2441,7 @@ qemuBuildUSBHostdevDevStr(virDomainHostdevDefPtr dev, dev->source.subsys.u.usb.device, dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { @@ -2475,7 +2478,7 @@ qemuBuildHubDevStr(virDomainHubDefPtr dev, virBufferAddLit(&buf, "usb-hub"); virBufferAsprintf(&buf, ",id=%s", dev->info.alias); - if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps) < 0) + if (qemuBuildDeviceAddressStr(&buf, &dev->info, qemuCaps, true) < 0) goto error; if (virBufferError(&buf)) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-graphics-spice-qxl-vga.args b/tests/qemuxml2argvdata/qemuxml2argv-graphics-spice-qxl-vga.args index 18013a5..5cd36f8 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-graphics-spice-qxl-vga.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-graphics-spice-qxl-vga.args @@ -4,4 +4,4 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda \ /dev/HostVG/QEMUGuest1 -usb -spice port=5903,tls-port=5904,addr=127.0.0.1,\ x509-dir=/etc/pki/libvirt-spice,tls-channel=main,plaintext-channel=inputs -vga \ qxl -global qxl-vga.vram_size=33554432 -device qxl,id=video1,vram_size=67108864,bus=pci.0,addr=0x4 \ --device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 +-device virtio-balloon-pci,id=balloon0,bus=pci.0,multifunction=on,addr=0x3.0x0 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 335af4a..a453511 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -402,7 +402,8 @@ mymain(void) DO_TEST("graphics-spice-qxl-vga", false, QEMU_CAPS_VGA, QEMU_CAPS_VGA_QXL, QEMU_CAPS_DEVICE, QEMU_CAPS_SPICE, - QEMU_CAPS_DEVICE_QXL_VGA); + QEMU_CAPS_DEVICE_QXL_VGA, + QEMU_CAPS_PCI_MULTIFUNCTION); DO_TEST("input-usbmouse", false, NONE); DO_TEST("input-usbtablet", false, NONE); -- 1.7.6

On Wed, Sep 14, 2011 at 06:27:54PM +0200, Marc-André Lureau wrote:
This is a workaround for QXL bug RHBZ#728174. --- src/qemu/qemu_command.c | 33 +++++++++++--------- .../qemuxml2argv-graphics-spice-qxl-vga.args | 2 +- tests/qemuxml2argvtest.c | 3 +- 3 files changed, 21 insertions(+), 17 deletions(-)
This isn't really desirable, because our PCI device address tracking code is still going to allow other devices to share the slot with QXL, and not know that the ARGV has been hacked to disable multifunction. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Hi Daniel On Thu, Sep 15, 2011 at 10:18 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
This isn't really desirable, because our PCI device address tracking code is still going to allow other devices to share the slot with QXL, and not know that the ARGV has been hacked to disable multifunction.
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot. -- Marc-André Lureau

At 09/15/2011 08:23 PM, Marc-André Lureau Write:
Hi Daniel
On Thu, Sep 15, 2011 at 10:18 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
This isn't really desirable, because our PCI device address tracking code is still going to allow other devices to share the slot with QXL, and not know that the ARGV has been hacked to disable multifunction.
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot.
If the user specifies pci address, we use the pci address. If the user does not specify pci address, we will allocate whole slot for the device. If the pci device is hot plugged, it should be single function(hotplugging multi function pci device is not supported now) So, if the user wants to use multi function pci device, he should specify the pci address. Thanks Wen Congyang

Hi ----- Original Message -----
At 09/15/2011 08:23 PM, Marc-André Lureau Write:
Hi Daniel
On Thu, Sep 15, 2011 at 10:18 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
This isn't really desirable, because our PCI device address tracking code is still going to allow other devices to share the slot with QXL, and not know that the ARGV has been hacked to disable multifunction.
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot.
So, if the user wants to use multi function pci device, he should specify the pci address.
So adding a check such as: if (!multiFunc && info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address") Should be enough? regards -- Marc-André Lureau

Hi hi On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau <mlureau@redhat.com> wrote:
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot.
So, if the user wants to use multi function pci device, he should specify the pci address.
So adding a check such as:
if (!multiFunc && info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you? Daniel, did you had time to verify that PCI allocation is per-slot? (It would be nice to get this "workaround" for the next release) thanks! -- Marc-André Lureau

On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau <mlureau@redhat.com> wrote:
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot.
So, if the user wants to use multi function pci device, he should specify the pci address.
So adding a check such as:
if (!multiFunc && info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release)
IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the problem appears to lie. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/19/2011 01:32 PM, Daniel P. Berrange wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau<mlureau@redhat.com> wrote:
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot. So, if the user wants to use multi function pci device, he should specify the pci address. So adding a check such as:
if (!multiFunc&& info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release) IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the
On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote: problem appears to lie.
As it stands, Fedora 16 (currently using unpatched libvirt-0.9.6) will be going into beta with QXL video broken for Windows guests, so we need some kind of Fedora-only patch very soon (see the schedule here: https://fedoraproject.org/wiki/Releases/16/Schedule - fortunately just delayed another week) The original patch in this thread: https://www.redhat.com/archives/libvir-list/2011-September/msg00534.html of course doesn't include the above mentioned additional code, and there isn't a followup patch. It would be very good to push a patch to the F16 git for this so it would hopefully get into the beta, but want to make sure what I push is the "right" thing, so a "final" patch (and some testing by people with F16 hosts) would be very helpful!

On Tue, Sep 27, 2011 at 01:19:20AM -0400, Laine Stump wrote:
On 09/19/2011 01:32 PM, Daniel P. Berrange wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau<mlureau@redhat.com> wrote:
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot. So, if the user wants to use multi function pci device, he should specify the pci address. So adding a check such as:
if (!multiFunc&& info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release) IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the
On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote: problem appears to lie.
As it stands, Fedora 16 (currently using unpatched libvirt-0.9.6) will be going into beta with QXL video broken for Windows guests, so we need some kind of Fedora-only patch very soon (see the schedule here: https://fedoraproject.org/wiki/Releases/16/Schedule - fortunately just delayed another week)
The original patch in this thread:
https://www.redhat.com/archives/libvir-list/2011-September/msg00534.html
of course doesn't include the above mentioned additional code, and there isn't a followup patch. It would be very good to push a patch to the F16 git for this so it would hopefully get into the beta, but want to make sure what I push is the "right" thing, so a "final" patch (and some testing by people with F16 hosts) would be very helpful!
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot. Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt & QEMU releases. Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility. Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
On Tue, Sep 27, 2011 at 01:19:20AM -0400, Laine Stump wrote:
On 09/19/2011 01:32 PM, Daniel P. Berrange wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau<mlureau@redhat.com> wrote:
> How do we allow other devices to share the slot? It seems to me that > qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while > making sure there is no conflicts on the same slot. So, if the user wants to use multi function pci device, he should specify the pci address. So adding a check such as:
if (!multiFunc&& info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release) IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the
On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote: problem appears to lie. As it stands, Fedora 16 (currently using unpatched libvirt-0.9.6) will be going into beta with QXL video broken for Windows guests, so we need some kind of Fedora-only patch very soon (see the schedule here: https://fedoraproject.org/wiki/Releases/16/Schedule - fortunately just delayed another week)
The original patch in this thread:
https://www.redhat.com/archives/libvir-list/2011-September/msg00534.html
of course doesn't include the above mentioned additional code, and there isn't a followup patch. It would be very good to push a patch to the F16 git for this so it would hopefully get into the beta, but want to make sure what I push is the "right" thing, so a "final" patch (and some testing by people with F16 hosts) would be very helpful! When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Of course this has been in since 0.9.3, meaning there are official releases with broken ABI compatibility :-(. Still, the sooner it's fixed the better.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
Will it be acceptable to put that in a patch on top of 0.9.6 for Fedora 16? (I think the answer is "yes", since it's only an XML change, but just want to make sure).

On Wed, Sep 28, 2011 at 03:02:59AM -0400, Laine Stump wrote:
On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
On Tue, Sep 27, 2011 at 01:19:20AM -0400, Laine Stump wrote:
On 09/19/2011 01:32 PM, Daniel P. Berrange wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau<mlureau@redhat.com> wrote:
>>How do we allow other devices to share the slot? It seems to me that >>qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while >>making sure there is no conflicts on the same slot. >So, if the user wants to use multi function pci device, he should >specify the >pci address. So adding a check such as:
if (!multiFunc&& info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release) IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the
On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote: problem appears to lie. As it stands, Fedora 16 (currently using unpatched libvirt-0.9.6) will be going into beta with QXL video broken for Windows guests, so we need some kind of Fedora-only patch very soon (see the schedule here: https://fedoraproject.org/wiki/Releases/16/Schedule - fortunately just delayed another week)
The original patch in this thread:
https://www.redhat.com/archives/libvir-list/2011-September/msg00534.html
of course doesn't include the above mentioned additional code, and there isn't a followup patch. It would be very good to push a patch to the F16 git for this so it would hopefully get into the beta, but want to make sure what I push is the "right" thing, so a "final" patch (and some testing by people with F16 hosts) would be very helpful! When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Of course this has been in since 0.9.3, meaning there are official releases with broken ABI compatibility :-(. Still, the sooner it's fixed the better.
Fortunately it has only affected guests with a QXL device, to the best of our knowledge.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
Will it be acceptable to put that in a patch on top of 0.9.6 for Fedora 16? (I think the answer is "yes", since it's only an XML change, but just want to make sure).
Yep. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
A couple of questions: 1) Is it possible/useful to have function > 0 without multifunction=on in the qemu commandline? 2) If the answer to (1) is "No", if the XML has function > 0 but no multifunction=on, should we implicitly add multifunction=on, or should we log an error?

On 09/29/2011 10:30 AM, Laine Stump wrote:
On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
A couple of questions:
1) Is it possible/useful to have function > 0 without multifunction=on in the qemu commandline?
2) If the answer to (1) is "No", if the XML has function > 0 but no multifunction=on, should we implicitly add multifunction=on, or should we log an error?
Also: 3) if you explicitly/(implicitly?) set multifunction on for one function of a slot, does qemu automatically do so for other functions on the slot (ie, if you have a device that uses slot 1 function 1 (and turns on multifunction), then will slot 1 function 0 automatically have multifunction turned on too?)

* Laine Stump (laine@laine.org) wrote:
On 09/29/2011 10:30 AM, Laine Stump wrote:
On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Yes, I think that's correct. It should only be on for multifunction devices.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
A couple of questions:
1) Is it possible/useful to have function > 0 without multifunction=on in the qemu commandline?
qemu should blcok function > 0 when function 0 does not have multifunction=on.
2) If the answer to (1) is "No", if the XML has function > 0 but no multifunction=on, should we implicitly add multifunction=on, or should we log an error?
You have to have a function 0 _and_ and function > 0 to be multifunction. YOu have to specify multifucntion=on in function 0 (at a minimum, spec is unclear whether it's required or implied for > 0).
Also:
3) if you explicitly/(implicitly?) set multifunction on for one function of a slot, does qemu automatically do so for other functions on the slot (ie, if you have a device that uses slot 1 function 1 (and turns on multifunction), then will slot 1 function 0 automatically have multifunction turned on too?)
It will not automagically set multifunction=on on func 0. It will generate an error. thanks, -chris

At 09/29/2011 10:56 PM, Laine Stump Write:
On 09/29/2011 10:30 AM, Laine Stump wrote:
On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
A couple of questions:
1) Is it possible/useful to have function > 0 without multifunction=on in the qemu commandline?
2) If the answer to (1) is "No", if the XML has function > 0 but no multifunction=on, should we implicitly add multifunction=on, or should we log an error?
Also:
3) if you explicitly/(implicitly?) set multifunction on for one function of a slot, does qemu automatically do so for other functions on the slot (ie, if you have a device that uses slot 1 function 1 (and turns on multifunction), then will slot 1 function 0 automatically have multifunction turned on too?)
IIRC, qemu does not do it. If bit 7 in the Header Type register of function 1 is 1, and bit 7 in the Header Type register of function 0 is 0, it's a single device, and you will not see the other function on the guest. Thanks Wen Congyang
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

At 09/29/2011 10:30 PM, Laine Stump Write:
On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
A couple of questions:
1) Is it possible/useful to have function > 0 without multifunction=on in the qemu commandline?
The answer is Yes. If a single function device is detected (i.e., bit 7 in the Header Type register of function 0 is 0), no more functions for that Device Number will be checked. If a multi-function device is detected (i.e., bit 7 in the Header Type register of function 0 is 1), then all remaining Function Numbers will be checked. Thanks Wen Congyang
2) If the answer to (1) is "No", if the XML has function > 0 but no multifunction=on, should we implicitly add multifunction=on, or should we log an error?
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 09/27/2011 05:13 AM, Daniel P. Berrange wrote:
On Tue, Sep 27, 2011 at 01:19:20AM -0400, Laine Stump wrote:
On 09/19/2011 01:32 PM, Daniel P. Berrange wrote:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau<mlureau@redhat.com> wrote:
> How do we allow other devices to share the slot? It seems to me that > qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while > making sure there is no conflicts on the same slot. So, if the user wants to use multi function pci device, he should specify the pci address. So adding a check such as:
if (!multiFunc&& info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release) IMHO this kind of hack doesn't belong in libvirt. It is fine for distro vendors to consider as a one off quick-hack for their packages of libvirt, if they don't have time to fix the real QXL bug, but not for libvirt upstream releases. QXL/QEMU should really be fixed since that's where the
On Mon, Sep 19, 2011 at 07:16:22PM +0200, Marc-André Lureau wrote: problem appears to lie. As it stands, Fedora 16 (currently using unpatched libvirt-0.9.6) will be going into beta with QXL video broken for Windows guests, so we need some kind of Fedora-only patch very soon (see the schedule here: https://fedoraproject.org/wiki/Releases/16/Schedule - fortunately just delayed another week)
The original patch in this thread:
https://www.redhat.com/archives/libvir-list/2011-September/msg00534.html
of course doesn't include the above mentioned additional code, and there isn't a followup patch. It would be very good to push a patch to the F16 git for this so it would hopefully get into the beta, but want to make sure what I push is the "right" thing, so a "final" patch (and some testing by people with F16 hosts) would be very helpful! When we originally enabled multifunction for all PCI devices, we did so in the belief that this was effectively a no-op for guest OSes. ie it should not have changed guest OS behaviour at all, unless multiple devices were actually inserted in the slot.
Evidentally this turns out to be incorrect. In other words we introduced a guest ABI change. This is very bad, because our stated goal is to have a stable guest ABI across libvirt& QEMU releases.
Thus IMHO we need to disable *all* setting of the 'multifunction=on' parameter for all guests by default, to unbreak the ABI compatibility.
Then we should introduce a new parameter 'multifunction='on' in the <address type=pci> element to allow it to be optionally enabled per device.
I just sent a patch to do this, in a separate thread so it doesn't get buried: https://www.redhat.com/archives/libvir-list/2011-September/msg01291.html

At 09/20/2011 01:16 AM, Marc-André Lureau Write:
Hi hi
On Fri, Sep 16, 2011 at 1:38 PM, Marc-André Lureau <mlureau@redhat.com> wrote:
How do we allow other devices to share the slot? It seems to me that qemuDomainPCIAddressSetNextAddr() only allocate whole slot, while making sure there is no conflicts on the same slot.
So, if the user wants to use multi function pci device, he should specify the pci address.
So adding a check such as:
if (!multiFunc && info->addr.pci.function != 0) return error("The %s device doesn't support multifunction address")
Wen, does that sound reasonable to you?
Here is part of my xml config file: <video> <model type='qxl' vram='65536' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </video> <video> <model type='qxl' vram='32768' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x1'/> </video> <memballoon model='virtio'> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> </memballoon> I can start the domain, and I run lspci on guest: [root@RHEL6RCi386 ~]# lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Red Hat, Inc. Device 0100 (rev 02) 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:04.1 Display controller: Red Hat, Inc. Device 0100 (rev 02) 00:06.0 Ethernet controller: Red Hat, Inc Virtio network device 00:07.0 SCSI storage controller: Red Hat, Inc Virtio block device 00:08.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a So I think qxl device supports multifunction. Thanks Wen Congyang
Daniel, did you had time to verify that PCI allocation is per-slot?
(It would be nice to get this "workaround" for the next release)
thanks!

Hi On Tue, Sep 20, 2011 at 3:11 AM, Wen Congyang <wency@cn.fujitsu.com> wrote:
So I think qxl device supports multifunction.
It works on Linux, but not on Windows. It is most likely a driver bug, but nobody so far found a solution. Using windows kernel debugger, the only "interesting" message I could find is: "VIDEOPRT: Multifunction board not allowed to start" (I compared windows kernel log of various pnp/pci/video mask with and without multifunction qxl, it didn't help me to find why windows refuse to start it) regards -- Marc-André Lureau

On Tue, Sep 20, 2011 at 02:41:44PM +0200, Marc-André Lureau wrote:
Hi
On Tue, Sep 20, 2011 at 3:11 AM, Wen Congyang <wency@cn.fujitsu.com> wrote:
So I think qxl device supports multifunction.
It works on Linux, but not on Windows. It is most likely a driver bug, but nobody so far found a solution.
Using windows kernel debugger, the only "interesting" message I could find is:
"VIDEOPRT: Multifunction board not allowed to start"
(I compared windows kernel log of various pnp/pci/video mask with and without multifunction qxl, it didn't help me to find why windows refuse to start it)
Google brought me to these pages http://msdn.microsoft.com/en-us/library/ff542778%28v=VS.85%29.aspx http://msdn.microsoft.com/en-us/library/windows/hardware/ff542756%28v=vs.85%... Which suggest we need a different .inf file to refer to something called 'mf.sys'. I've no idea whether this is relevant or not, but perhaps one of the windows QXL devs can take a look at it. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Hi On Tue, Sep 20, 2011 at 2:46 PM, Daniel P. Berrange <berrange@redhat.com> wrote:
On Tue, Sep 20, 2011 at 02:41:44PM +0200, Marc-André Lureau wrote:
Hi
On Tue, Sep 20, 2011 at 3:11 AM, Wen Congyang <wency@cn.fujitsu.com> wrote:
So I think qxl device supports multifunction.
It works on Linux, but not on Windows. It is most likely a driver bug, but nobody so far found a solution.
Using windows kernel debugger, the only "interesting" message I could find is:
"VIDEOPRT: Multifunction board not allowed to start"
(I compared windows kernel log of various pnp/pci/video mask with and without multifunction qxl, it didn't help me to find why windows refuse to start it)
Google brought me to these pages
http://msdn.microsoft.com/en-us/library/ff542778%28v=VS.85%29.aspx http://msdn.microsoft.com/en-us/library/windows/hardware/ff542756%28v=vs.85%...
Which suggest we need a different .inf file to refer to something called 'mf.sys'. I've no idea whether this is relevant or not, but perhaps one of the windows QXL devs can take a look at it.
I read it differently, to me PCI multifunction devices are handled by the system mf.sys. (the PCI multifunction page doesn't say anything about mf.sys) "The system supplied multifunction driver (mf.sys) can handle the bus-level enumeration and resource allocation requirements for the device, and the system-supplied INF (mf.sys) can install the multifunction device. You need to supply only a function driver and INF file for each of the individual device functions. If the device does not comply with the standard for its bus, you might need to supply a driver equivalent to mf.sys in functionality, in addition to function drivers and INF files for the device functions." Also, virtio devices seem to work fine with multifunction without special mf.sys case, from what I can see. regards -- Marc-André Lureau

On Tue, Sep 20, 2011 at 02:41:44PM +0200, Marc-André Lureau wrote:
Hi
On Tue, Sep 20, 2011 at 3:11 AM, Wen Congyang <wency@cn.fujitsu.com> wrote:
So I think qxl device supports multifunction.
It works on Linux, but not on Windows. It is most likely a driver bug, but nobody so far found a solution.
Using windows kernel debugger, the only "interesting" message I could find is:
"VIDEOPRT: Multifunction board not allowed to start"
Now that I see it a second time, this looks obviously like a problem with the built in videoprt.sys, so nothing we can fix without rewriting our driver to use the newer WDDM framework, and that would not work for XP anyway. I would look for documentation on videoprt.sys limitations.
(I compared windows kernel log of various pnp/pci/video mask with and without multifunction qxl, it didn't help me to find why windows refuse to start it)
regards
-- Marc-André Lureau
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
participants (7)
-
Alon Levy
-
Chris Wright
-
Daniel P. Berrange
-
Laine Stump
-
Marc-André Lureau
-
Marc-André Lureau
-
Wen Congyang