[libvirt] [PATCH v4 0/4] add pci-bridge support

Now, it's impossible to arrange devices into multi-pci-bus, for example: <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x1' slot='0x02' function='0x0'/> </video> libvirt will complain about "bus != 0", fortunately, qemu supports pci-to-pci bridge, if we want to use multi-pci-bus, we can define 2 pci bridge controllers, then attach 1 to the other as a subordinate pci-bus, so, 2 pci-buses appear. for example: <controller type='pci-bridge' index='0'/> <controller type='pci-bridge' index='1'> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> </controller> <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x01' slot='0x02' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </video> src/conf/domain_conf.c | 98 ++++- src/conf/domain_conf.h | 1 + docs/schemas/domaincommon.rng | 1 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 43 ++++++++++++++++++++----- 6 files changed, 126 insertions(+), 22 deletions(-)

add a new controller type, then one can define a pci-bridge controller like this: <controller type='pci-bridge' index='0'/> <controller type='pci-bridge' index='1'> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> </controller> actually, it works as a pci-bus, so as to support multi-pci-bus via pci-to-pci bridge Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- src/conf/domain_conf.c | 3 ++- src/conf/domain_conf.h | 1 + 2 files changed, 3 insertions(+), 1 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 8dbfb96..b64e5b5 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -269,7 +269,8 @@ VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "sata", "virtio-serial", "ccid", - "usb") + "usb", + "pci-bridge") VIR_ENUM_IMPL(virDomainControllerModelSCSI, VIR_DOMAIN_CONTROLLER_MODEL_SCSI_LAST, "auto", diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9a9e0b1..e68892c 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -661,6 +661,7 @@ enum virDomainControllerType { VIR_DOMAIN_CONTROLLER_TYPE_VIRTIO_SERIAL, VIR_DOMAIN_CONTROLLER_TYPE_CCID, VIR_DOMAIN_CONTROLLER_TYPE_USB, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, VIR_DOMAIN_CONTROLLER_TYPE_LAST }; -- 1.7.2.5

format command line of qemu to add pci-bridge like this: -device pci-bridge. and also add a qemu capability to check if qemu support pci-bridge device Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- src/qemu/qemu_capabilities.c | 4 ++- src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 43 ++++++++++++++++++++++------------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 29693c3..d2e379b 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -202,7 +202,8 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, "s390-sclp", "usb-serial", /* 125 */ - "usb-net", + "usb-serial", + "pci-bridge" ); @@ -1351,6 +1352,7 @@ struct qemuCapsStringFlags qemuCapsObjectTypes[] = { { "cirrus-vga", QEMU_CAPS_DEVICE_CIRRUS_VGA }, { "vmware-svga", QEMU_CAPS_DEVICE_VMWARE_SVGA }, { "usb-serial", QEMU_CAPS_DEVICE_USB_SERIAL}, + { "pci-bridge", QEMU_CAPS_DEVICE_PCI_BRIDGE }, { "usb-net", QEMU_CAPS_DEVICE_USB_NET}, }; diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 5279d07..c50bba8 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -165,6 +165,7 @@ enum qemuCapsFlags { QEMU_CAPS_SCLP_S390 = 124, /* -device sclp* */ QEMU_CAPS_DEVICE_USB_SERIAL = 125, /* -device usb-serial */ QEMU_CAPS_DEVICE_USB_NET = 126, /* -device usb-net */ + QEMU_CAPS_DEVICE_PCI_BRIDGE = 127, /* -device pci-bridge */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f6273c1..9ad3c77 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -956,13 +956,6 @@ static char *qemuPCIAddressAsString(virDomainDeviceInfoPtr dev) { char *addr; - if (dev->addr.pci.domain != 0 || - dev->addr.pci.bus != 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Only PCI domain 0 and bus 0 are available")); - return NULL; - } - if (virAsprintf(&addr, "%d:%d:%d.%d", dev->addr.pci.domain, dev->addr.pci.bus, @@ -974,7 +967,6 @@ static char *qemuPCIAddressAsString(virDomainDeviceInfoPtr dev) return addr; } - static int qemuCollectPCIAddress(virDomainDefPtr def ATTRIBUTE_UNUSED, virDomainDeviceDefPtr device, virDomainDeviceInfoPtr info, @@ -994,6 +986,13 @@ static int qemuCollectPCIAddress(virDomainDefPtr def ATTRIBUTE_UNUSED, return 0; } + if (info->addr.pci.domain != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Only PCI device addresses with " + "domain=0 are supported")); + return -1; + } + addr = qemuPCIAddressAsString(info); if (!addr) goto cleanup; @@ -1753,16 +1752,6 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, qemuCapsPtr caps) { if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) { - if (info->addr.pci.domain != 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Only PCI device addresses with domain=0 are supported")); - return -1; - } - if (info->addr.pci.bus != 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Only PCI device addresses with bus=0 are supported")); - return -1; - } if (qemuCapsGet(caps, QEMU_CAPS_PCI_MULTIFUNCTION)) { if (info->addr.pci.function > 7) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -1777,6 +1766,7 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, "are supported with this QEMU binary")); return -1; } + if (info->addr.pci.multi == VIR_DEVICE_ADDRESS_PCI_MULTI_ON) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("'multifunction=on' is not supported with " @@ -1787,11 +1777,13 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, /* XXX * When QEMU grows support for > 1 PCI bus, then pci.0 changes - * to pci.1, pci.2, etc + * to pci.1, pci.2, etc, (e.g. when support pci-to-pci bridge) * When QEMU grows support for > 1 PCI domain, then pci.0 change * to pciNN.0 where NN is the domain number */ - if (qemuCapsGet(caps, QEMU_CAPS_PCI_MULTIBUS)) + if (qemuCapsGet(caps, QEMU_CAPS_DEVICE_PCI_BRIDGE)) + virBufferAsprintf(buf, ",bus=pci.%d", info->addr.pci.bus); + else if (qemuCapsGet(caps, QEMU_CAPS_PCI_MULTIBUS)) virBufferAsprintf(buf, ",bus=pci.0"); else virBufferAsprintf(buf, ",bus=pci"); @@ -3072,6 +3064,15 @@ qemuBuildControllerDevStr(virDomainDefPtr domainDef, int model; switch (def->type) { + case VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE: + if (def->idx == 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("pci-bridge no. shouldn't be 0 at here.")); + goto out; + } + virBufferAsprintf(&buf, "pci-bridge,chassis_nr=%d", def->idx + 1); + virBufferAsprintf(&buf, ",id=pci.%d", def->idx); + break; case VIR_DOMAIN_CONTROLLER_TYPE_SCSI: model = def->model; if ((qemuSetScsiControllerModel(domainDef, caps, &model)) < 0) @@ -3145,6 +3146,7 @@ qemuBuildControllerDevStr(virDomainDefPtr domainDef, if (qemuBuildDeviceAddressStr(&buf, &def->info, caps) < 0) goto error; +out: if (virBufferError(&buf)) { virReportOOMError(); goto error; @@ -5080,6 +5082,7 @@ qemuBuildCommandLine(virConnectPtr conn, /* We don't add an explicit IDE or FD controller because the * provided PIIX4 device already includes one. It isn't possible to * remove the PIIX4. */ + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, VIR_DOMAIN_CONTROLLER_TYPE_USB, VIR_DOMAIN_CONTROLLER_TYPE_SCSI, VIR_DOMAIN_CONTROLLER_TYPE_SATA, -- 1.7.2.5

Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- docs/schemas/domaincommon.rng | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 049f232..205e93a 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1345,6 +1345,7 @@ <value>sata</value> <value>ccid</value> <value>usb</value> + <value>pci-bridge</value> </choice> </attribute> </optional> -- 1.7.2.5

if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently. Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- src/conf/domain_conf.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 95 insertions(+), 0 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index b64e5b5..5cf8e04 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11813,6 +11813,98 @@ virDomainDefMaybeAddSmartcardController(virDomainDefPtr def) } +static int +virDomainDefMaybeAddPcibridgeController(virDomainDefPtr def) +{ + int i, idx = 0; + + for (i = 0; i < def->nnets; i++) { + if (def->nets[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->nets[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->nsounds; i++) { + if (def->sounds[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->sounds[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->nvideos; i++) { + if (def->videos[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->videos[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->ncontrollers; i++) { + if (def->controllers[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->controllers[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->nfss; i++) { + if (def->fss[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->fss[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->nhostdevs; i++) { + if (def->hostdevs[i]->info->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->hostdevs[i]->info->addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + for (i = 0; i < def->ndisks; i++) { + if (def->disks[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + continue; + idx = def->disks[i]->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + } + + if (def->watchdog->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + idx = def->watchdog->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + + if (def->memballoon->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + idx = def->memballoon->info.addr.pci.bus; + if (virDomainDefMaybeAddController(def, + VIR_DOMAIN_CONTROLLER_TYPE_PCI_BRIDGE, + idx) < 0) + return -1; + + return 0; +} + /* * Based on the declared <address/> info for any devices, * add necessary drive controllers which are not already present @@ -11847,6 +11939,9 @@ int virDomainDefAddImplicitControllers(virDomainDefPtr def) if (virDomainDefMaybeAddSmartcardController(def) < 0) return -1; + if (virDomainDefMaybeAddPcibridgeController(def) < 0) + return -1; + return 0; } -- 1.7.2.5

On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> ---
This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max. It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that. Maybe Peter's patches for config parser defaults will be helpful: https://www.redhat.com/archives/libvir-list/2013-March/msg00042.html Jan

On 03/04/2013 11:51 AM, Ján Tomko wrote:
On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-) Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen. Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0) This also brings up the fact that we need to deal with the possibility of all buses being full during hotplug - when that happens I suppose we will also want to auto-add a pci bridge.
Maybe Peter's patches for config parser defaults will be helpful: https://www.redhat.com/archives/libvir-list/2013-March/msg00042.html
A circular reference of data definitions led to danpb saying that this is probably the wrong way to do it, so it looks like it will require some rework, but definitely whatever shape that takes, this code will want to fit into that framework.

On 03/04/13 20:39, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-)
Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
The only thing I can think of is that the PCI addresses in the XML config will match those that can be seen in the guest.
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
My first (1/2) patch (pushed already) changed the function parameters to use the PCI address struct instead of just slot and function and it did not change any functionality. I still need to post another version of 2/2, which should make the allocation of addresses with bus > 0 possible.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
So when searching for a new slot (when multiple buses are enabled), we would for example stop at slot 29 and only look at 30 and 31 for bridges? The only other way I can think of is assign the addresses twice, the first time only to see how many bridges we need. But this won't work for hotplug.
This also brings up the fact that we need to deal with the possibility of all buses being full during hotplug - when that happens I suppose we will also want to auto-add a pci bridge.
Hmm. This would require the address allocation function to know how many buses we have and go through all of them. It seems we wouldn't need to do that for the initial allocation - when we get to the end of the bus, we know it's full.
Maybe Peter's patches for config parser defaults will be helpful: https://www.redhat.com/archives/libvir-list/2013-March/msg00042.html
A circular reference of data definitions led to danpb saying that this is probably the wrong way to do it, so it looks like it will require some rework, but definitely whatever shape that takes, this code will want to fit into that framework.
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-)
Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses. All this time though, bear in mind that auto-address assignment is just a convenience. At some point we are well within our right to stop and say this is as good as its going to get, you must do address assignment if the app if you want something more clever.
This also brings up the fact that we need to deal with the possibility of all buses being full during hotplug - when that happens I suppose we will also want to auto-add a pci bridge.
Well if buses are all full, there's no where to hook in the bridge :-) So you need to do that before filling up, or just say this is out of scope and the app must explicitly plug a bridge if they want more space. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

在 2013-03-05二的 10:36 +0000,Daniel P. Berrange写道:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-)
Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses.
All this time though, bear in mind that auto-address assignment is just a convenience. At some point we are well within our right to stop and say this is as good as its going to get, you must do address assignment if the app if you want something more clever.
This also brings up the fact that we need to deal with the possibility of all buses being full during hotplug - when that happens I suppose we will also want to auto-add a pci bridge.
Well if buses are all full, there's no where to hook in the bridge :-) So you need to do that before filling up, or just say this is out of scope and the app must explicitly plug a bridge if they want more space.
Hmm, it's good point that haven't been considered by me, so, do I have to hack into device hot-plug to know if bus slot is almost exhausted and decide whether add pci-bridge or not? or, just let user bear in mind that?
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

在 2013-03-06三的 13:13 +0800,li guang写道:
在 2013-03-05二的 10:36 +0000,Daniel P. Berrange写道:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-)
Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses.
All this time though, bear in mind that auto-address assignment is just a convenience. At some point we are well within our right to stop and say this is as good as its going to get, you must do address assignment if the app if you want something more clever.
This also brings up the fact that we need to deal with the possibility of all buses being full during hotplug - when that happens I suppose we will also want to auto-add a pci bridge.
Well if buses are all full, there's no where to hook in the bridge :-) So you need to do that before filling up, or just say this is out of scope and the app must explicitly plug a bridge if they want more space.
Hmm, it's good point that haven't been considered by me, so, do I have to hack into device hot-plug to know if bus slot is almost exhausted and decide whether add pci-bridge or not? or, just let user bear in mind that?
So ... , what should I do next? and what about other 3 patches?
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 03/05/13 11:36, Daniel P. Berrange wrote:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
Maybe we could reserve the first bus as well?
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses.
Which one is better? The reserved slots would make auto-adding bridges on hotplug possible, but I fear the code would be too ugly. On the other hand, 2-pass assignment sounds really easy to do :) Jan

On Wed, Mar 06, 2013 at 04:15:28PM +0100, Ján Tomko wrote:
On 03/05/13 11:36, Daniel P. Berrange wrote:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that.
If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0)
Maybe we could reserve the first bus as well?
I don't think we should artifically reserve anything that isn't absolutely required by the underlying machine type.
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses.
Which one is better? The reserved slots would make auto-adding bridges on hotplug possible, but I fear the code would be too ugly. On the other hand, 2-pass assignment sounds really easy to do :)
I think we just do the 2-pass auto-assignment. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 03/06/2013 10:23 AM, Daniel P. Berrange wrote:
On Wed, Mar 06, 2013 at 04:15:28PM +0100, Ján Tomko wrote:
On 03/05/13 11:36, Daniel P. Berrange wrote:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that. If we auto-assign addresses for un-addressed devices first (your recent
On 03/04/2013 11:51 AM, Ján Tomko wrote: patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0) Maybe we could reserve the first bus as well?
I'm guessing that putting things behind a bridge might reduce performance somewhat? (or maybe it's a completely artificial concept within qemu that only exists for the sake of making things look right to the guest, I just don't know)
I don't think we should artifically reserve anything that isn't absolutely required by the underlying machine type.
Yeah, I don't think we should reserve an entire bus. However, when auto-assigning slots, I think we should make a "best effort" to always leave at least one slot open at the end. This will ensure that anyone hotplugging bunches of devices with no explicit pci address will succeed (although their final topology may be suboptimal).
You'd have todo a 2 pass assignment. First count the number of devices that need to have PCI addresses assigned. Then create the required number of bridges, then assign the addresses.
Which one is better? The reserved slots would make auto-adding bridges on hotplug possible, but I fear the code would be too ugly. On the other hand, 2-pass assignment sounds really easy to do :) I think we just do the 2-pass auto-assignment.
As long as the auto-assignment always finishes with at least one slot open, and we also do the same 2-pass scan when hotplugging a single device, then I think everybody is satisfied (basically it will scan and find that one device (the new one) doesn't have an address assigned, but when it counts it will discover that assigning that address on an existing bus will exhaust all slots, so it will add a new bus (which will be placed into the single remaining slot), then do the 2nd pass which will place the new device on the new bus.) If someone explicitly assigns device addresses such that all available slots are taken up (leaving no room for future expansion), then they've dug their own grave (or know what they're doing) and there's nothing we can do about it.

On Thu, Mar 07, 2013 at 01:34:16PM -0500, Laine Stump wrote:
On 03/06/2013 10:23 AM, Daniel P. Berrange wrote:
On Wed, Mar 06, 2013 at 04:15:28PM +0100, Ján Tomko wrote:
On 03/05/13 11:36, Daniel P. Berrange wrote:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
It would be nice if we could add pci bridges even when there weren't any specified in the XML, but there are too many PCI devices. I don't know what would be the nicest way to do that. If we auto-assign addresses for un-addressed devices first (your recent
On 03/04/2013 11:51 AM, Ján Tomko wrote: patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0) Maybe we could reserve the first bus as well?
I'm guessing that putting things behind a bridge might reduce performance somewhat? (or maybe it's a completely artificial concept within qemu that only exists for the sake of making things look right to the guest, I just don't know)
I don't think we should artifically reserve anything that isn't absolutely required by the underlying machine type.
Yeah, I don't think we should reserve an entire bus. However, when auto-assigning slots, I think we should make a "best effort" to always leave at least one slot open at the end. This will ensure that anyone hotplugging bunches of devices with no explicit pci address will succeed (although their final topology may be suboptimal).
I worry that trying to keep one slot open is going to break existing users which may be filling up their slots 100%, but not have a new enough QEMU for bridge support. We need to be very careful not to regress for anyone when changing this address assignemnt. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 03/07/2013 01:58 PM, Daniel P. Berrange wrote:
On Thu, Mar 07, 2013 at 01:34:16PM -0500, Laine Stump wrote:
On Wed, Mar 06, 2013 at 04:15:28PM +0100, Ján Tomko wrote:
On 03/05/13 11:36, Daniel P. Berrange wrote:
On Mon, Mar 04, 2013 at 02:39:40PM -0500, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote: > It would be nice if we could add pci bridges even when there weren't any > specified in the XML, but there are too many PCI devices. I don't know > what would be the nicest way to do that. If we auto-assign addresses for un-addressed devices first (your recent patches did that, right? I forget the status of those), *then* auto-create the required bridges (which themselves will need an auto-assigned address), that should just happen.
Of course, if we do that, then we won't have reserved any slots on bus 0 for even a single pci-bridge device, so we'll fail. Is the proper way out of this to always reserve one (or maybe two, for good measure) slots on any given bus to be used only for bridges? That way no matter how out of hand things get, we'll always have a place we can add another bus. (In the case of *lots* of devices, I assume that a device on a nested PCI bridge would be less efficient, and may have some other limitations, but it would be better than nothing. And if the admin was really concerned about that, they could modify their config to explicitly place several bridges directly on bus 0) Maybe we could reserve the first bus as well? I'm guessing that putting things behind a bridge might reduce
On 03/06/2013 10:23 AM, Daniel P. Berrange wrote: performance somewhat? (or maybe it's a completely artificial concept within qemu that only exists for the sake of making things look right to the guest, I just don't know)
I don't think we should artifically reserve anything that isn't absolutely required by the underlying machine type. Yeah, I don't think we should reserve an entire bus. However, when auto-assigning slots, I think we should make a "best effort" to always leave at least one slot open at the end. This will ensure that anyone hotplugging bunches of devices with no explicit pci address will succeed (although their final topology may be suboptimal). I worry that trying to keep one slot open is going to break existing users which may be filling up their slots 100%, but not have a new enough QEMU for bridge support. We need to be very careful not to regress for anyone when changing this address assignemnt
Sure. This should be one of those "post-parse postprocessing" things that is aware of qemu's capabilities. If there's no support for the pci-bridge device, it won't even do the 2 passes, and won't have any reason to try and keep a slot open (except maybe in anticipation of a future where the host's qemu *does* support pci-bridge, but I think that's beyond the scope of "best effort" :-) And of course any existing domain will already have had addresses assigned to all devices, so we wouldn't be able to break them (although I guess that means nothing for transient domains).

On 03/04/2013 08:39 PM, Laine Stump wrote:
On 03/04/2013 11:51 AM, Ján Tomko wrote:
On 02/19/13 03:25, liguang wrote:
if some devices specify a pci bus number that haven't been defined by a pci-bridge controller then fill the required correct controller info silently.
Acked-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- This will only add bridges for the explictly mentioned buses, which would mean we could have buses 0 and 6 with no buses between them. Maybe we should add them the way we add disk controllers - find the highest index and add all bridges with indexes from 1 to max.
But each pci bridge device takes up another slot, which we may not want to give up. (Although arguably, once we have pci-bridge devices, we can create as many slots as we like :-)
Is there some advantage to having all the buses filled in (other than similarity to what's done for other types of controllers)?
Okay, I naively thought that the bus numbers visible in the guest are assigned sequentially, but it's a bit more complicated than that. http://www.tldp.org/LDP/tlk/dd/pci.html#pci-pci-bus-numbering If we don't need the 'buses' (which would be just bridge indexes in this case) to match the buses visible in guest, then having holes in them is fine. If we do, things get more complicated, since we should a) arrange the bridges in a way that would preserve index<->bus mapping (The easiest way to do is to do a cascade of bridges, but it might look ugly and have performance issues. But it might be necessary when hotplugging) b) check if the user-specified bridges with addresses fit within this arrangement Is this a requirement or just a convenience that's not worth the trouble? Jan

ping ... 在 2013-02-19二的 10:25 +0800,liguang写道:
Now, it's impossible to arrange devices into multi-pci-bus, for example: <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x1' slot='0x02' function='0x0'/> </video> libvirt will complain about "bus != 0", fortunately, qemu supports pci-to-pci bridge, if we want to use multi-pci-bus, we can define 2 pci bridge controllers, then attach 1 to the other as a subordinate pci-bus, so, 2 pci-buses appear. for example: <controller type='pci-bridge' index='0'/> <controller type='pci-bridge' index='1'> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> </controller> <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x01' slot='0x02' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </video>
src/conf/domain_conf.c | 98 ++++- src/conf/domain_conf.h | 1 + docs/schemas/domaincommon.rng | 1 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 43 ++++++++++++++++++++----- 6 files changed, 126 insertions(+), 22 deletions(-)

Hi, any other comments? 在 2013-02-25一的 09:38 +0800,li guang写道:
ping ...
在 2013-02-19二的 10:25 +0800,liguang写道:
Now, it's impossible to arrange devices into multi-pci-bus, for example: <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x1' slot='0x02' function='0x0'/> </video> libvirt will complain about "bus != 0", fortunately, qemu supports pci-to-pci bridge, if we want to use multi-pci-bus, we can define 2 pci bridge controllers, then attach 1 to the other as a subordinate pci-bus, so, 2 pci-buses appear. for example: <controller type='pci-bridge' index='0'/> <controller type='pci-bridge' index='1'> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> </controller> <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x01' slot='0x02' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </video>
src/conf/domain_conf.c | 98 ++++- src/conf/domain_conf.h | 1 + docs/schemas/domaincommon.rng | 1 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 43 ++++++++++++++++++++----- 6 files changed, 126 insertions(+), 22 deletions(-)

ping... 2013/3/1 li guang <lig.fnst@cn.fujitsu.com>
Hi,
any other comments?
在 2013-02-25一的 09:38 +0800,li guang写道:
ping ...
在 2013-02-19二的 10:25 +0800,liguang写道:
Now, it's impossible to arrange devices into multi-pci-bus, for example: <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x1' slot='0x02' function='0x0'/> </video> libvirt will complain about "bus != 0", fortunately, qemu supports pci-to-pci bridge, if we want to use multi-pci-bus, we can define 2 pci bridge controllers, then attach 1 to the other as a subordinate pci-bus, so, 2 pci-buses appear. for example: <controller type='pci-bridge' index='0'/> <controller type='pci-bridge' index='1'> <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/> </controller> <sound model='ac97'> <address type='pci' domain='0x0000' bus='0x01' slot='0x02' function='0x0'/> </sound> <video> <model type='cirrus' vram='9216' heads='1'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </video>
src/conf/domain_conf.c | 98 ++++- src/conf/domain_conf.h | 1 + docs/schemas/domaincommon.rng | 1 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 43 ++++++++++++++++++++----- 6 files changed, 126 insertions(+), 22 deletions(-)
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
participants (6)
-
Daniel P. Berrange
-
Forwarding
-
Ján Tomko
-
Laine Stump
-
li guang
-
liguang