[libvirt] [RFC PATCH] support multifunction PCI device

We want to use more than 200+ device. Libvirt does not use multi function PCI device and PCI-to-PCI bridge. So we can not use more than 200+ device if it's a PCI device or it's controller is a PCI device. This patch adds the support of multi function PCI device. It does not support to hot plug/unplug multi function PCI device. TODO: 1. support to hot plug multi function PCI device We only can hot plug one device at one time. I think we should introduce another command 'virsh attach-devices XXX' to support hot plug more than one device at one time? 2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
From 85a14928f2d445012f293638b44dd476a15aac3c Mon Sep 17 00:00:00 2001 From: Wen Congyang <wency@cn.fujitsu.com> Date: Mon, 9 May 2011 14:59:16 +0800 Subject: [PATCH] multifunction PCI device
--- src/conf/domain_conf.c | 3 + src/qemu/qemu_capabilities.c | 5 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 339 ++++++++++++++++++++++++++++++++++++------ src/qemu/qemu_command.h | 13 ++- src/qemu/qemu_hotplug.c | 76 +++++++++- src/qemu/qemu_process.c | 6 +- tests/qemuhelptest.c | 3 +- tests/qemuxml2argvtest.c | 2 +- 9 files changed, 390 insertions(+), 58 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d3efec6..83fdfe8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1237,6 +1237,9 @@ int virDomainDeviceAddressIsValid(virDomainDeviceInfoPtr info, int virDomainDevicePCIAddressIsValid(virDomainDevicePCIAddressPtr addr) { + /* PCI bus has 32 slots and 8 functions per slot */ + if (addr->slot >= 32 || addr->function >= 8) + return 0; return addr->domain || addr->bus || addr->slot; } diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 620143e..5d0145d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -119,6 +119,8 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, "device-spicevmc", "virtio-tx-alg", "device-qxl-vga", + + "pci-multifunction", /* 60 */ ); struct qemu_feature_flags { @@ -1024,6 +1026,9 @@ qemuCapsComputeCmdFlags(const char *help, */ if (version >= 13000) qemuCapsSet(flags, QEMU_CAPS_MONITOR_JSON); + + if (version >= 13000) + qemuCapsSet(flags, QEMU_CAPS_PCI_MULTIFUNCTION); } /* We parse the output of 'qemu -help' to get the QEMU diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index ab47f22..4691e90 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -95,6 +95,7 @@ enum qemuCapsFlags { QEMU_CAPS_DEVICE_SPICEVMC = 57, /* older -device spicevmc*/ QEMU_CAPS_VIRTIO_TX_ALG = 58, /* -device virtio-net-pci,tx=string */ QEMU_CAPS_DEVICE_QXL_VGA = 59, /* Is the primary and vga campatible qxl device named qxl-vga? */ + QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 22b2634..9ac438c 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -667,9 +667,15 @@ qemuAssignDeviceAliases(virDomainDefPtr def, virBitmapPtr qemuCaps) #define QEMU_PCI_ADDRESS_LAST_SLOT 31 +#define QEMU_PCI_ADDRESS_LAST_FUNCTION 7 +#define QEMU_PCI_ADDRESS_LAST_DEVFN 0xFF +#define QEMU_PCI_ADDRESS_DEVFN(slot, function) (((slot) << 3) + (function)) +#define QEMU_PCI_ADDRESS_SLOT(devfn) (((devfn) >> 3) & 0x1f) +#define QEMU_PCI_ADDRESS_FUNCTION(devfn) ((devfn) & 3) struct _qemuDomainPCIAddressSet { virHashTablePtr used; int nextslot; + int nextfunction; }; @@ -684,10 +690,11 @@ static char *qemuPCIAddressAsString(virDomainDeviceInfoPtr dev) return NULL; } - if (virAsprintf(&addr, "%d:%d:%d", + if (virAsprintf(&addr, "%d:%d:%d.%d", dev->addr.pci.domain, dev->addr.pci.bus, - dev->addr.pci.slot) < 0) { + dev->addr.pci.slot, + dev->addr.pci.function) < 0) { virReportOOMError(); return NULL; } @@ -717,6 +724,29 @@ static int qemuCollectPCIAddress(virDomainDefPtr def ATTRIBUTE_UNUSED, return 0; } +static void qemuGotoNextFunction(qemuDomainPCIAddressSetPtr addrs, + int current_slot, int current_function) +{ + int current_devfn, next_devfn; + + current_devfn = QEMU_PCI_ADDRESS_DEVFN(current_slot, current_function); + next_devfn = QEMU_PCI_ADDRESS_DEVFN(addrs->nextslot, addrs->nextfunction); + + if (current_devfn >= next_devfn) { + next_devfn = (current_devfn + 1) % (QEMU_PCI_ADDRESS_LAST_DEVFN + 1); + addrs->nextslot = QEMU_PCI_ADDRESS_SLOT(next_devfn); + addrs->nextfunction = QEMU_PCI_ADDRESS_FUNCTION(next_devfn); + } +} + +static void qemuGotoNextSlot(qemuDomainPCIAddressSetPtr addrs, + int current_slot) +{ + if (current_slot >= addrs->nextslot) { + addrs->nextslot = (current_slot + 1) % (QEMU_PCI_ADDRESS_LAST_SLOT +1); + addrs->nextfunction = 0; + } +} int qemuDomainAssignPCIAddresses(virDomainDefPtr def) @@ -734,7 +764,7 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def) if (!(addrs = qemuDomainPCIAddressSetCreate(def))) goto cleanup; - if (qemuAssignDevicePCISlots(def, addrs) < 0) + if (qemuAssignDevicePCISlots(def, addrs, qemuCaps) < 0) goto cleanup; } @@ -777,6 +807,35 @@ error: return NULL; } +/* check whether the slot is used by the other device + * Return 0 if the slot is not used by the other device, or -1 if the slot + * is used by the other device. + */ +static int qemuDomainPCIAddressCheckSlot(qemuDomainPCIAddressSetPtr addrs, + virDomainDeviceInfoPtr dev) +{ + char *addr; + virDomainDeviceInfo temp_dev; + int function; + + temp_dev = *dev; + for (function = 0; function < QEMU_PCI_ADDRESS_LAST_FUNCTION; function++) { + temp_dev.addr.pci.function = function; + addr = qemuPCIAddressAsString(&temp_dev); + if (!addr) + return -1; + + if (virHashLookup(addrs->used, addr)) { + VIR_FREE(addr); + return -1; + } + + VIR_FREE(addr); + } + + return 0; +} + int qemuDomainPCIAddressReserveAddr(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev) { @@ -800,36 +859,61 @@ int qemuDomainPCIAddressReserveAddr(qemuDomainPCIAddressSetPtr addrs, return -1; } - if (dev->addr.pci.slot > addrs->nextslot) { - addrs->nextslot = dev->addr.pci.slot + 1; - if (QEMU_PCI_ADDRESS_LAST_SLOT < addrs->nextslot) - addrs->nextslot = 0; - } + qemuGotoNextFunction(addrs, dev->addr.pci.slot, dev->addr.pci.function); return 0; } -int qemuDomainPCIAddressReserveSlot(qemuDomainPCIAddressSetPtr addrs, - int slot) +int qemuDomainPCIAddressReserveFunction(qemuDomainPCIAddressSetPtr addrs, + int slot, int function) { virDomainDeviceInfo dev; dev.addr.pci.domain = 0; dev.addr.pci.bus = 0; dev.addr.pci.slot = slot; + dev.addr.pci.function = function; return qemuDomainPCIAddressReserveAddr(addrs, &dev); } +int qemuDomainPCIAddressReserveSlot(qemuDomainPCIAddressSetPtr addrs, + int slot) +{ + int function; + + for (function = 0; function <= QEMU_PCI_ADDRESS_LAST_FUNCTION; function++) { + if (qemuDomainPCIAddressReserveFunction(addrs, slot, function) < 0) + goto cleanup; + } + + return 0; + +cleanup: + for (function--; function >= 0; function--) { + qemuDomainPCIAddressReleaseFunction(addrs, slot, function); + } + return -1; +} int qemuDomainPCIAddressEnsureAddr(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev) { int ret = 0; - if (dev->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) - ret = qemuDomainPCIAddressReserveAddr(addrs, dev); - else - ret = qemuDomainPCIAddressSetNextAddr(addrs, dev); + if (dev->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) { + /* We do not support hotplug multi-function PCI device now, so we should + * reserve the whole slot. The function of the PCI device must be 0. + */ + if (dev->addr.pci.function != 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Only PCI device addresses with function=0" + " are supported")); + return -1; + } + + ret = qemuDomainPCIAddressReserveSlot(addrs, dev->addr.pci.slot); + } else + ret = qemuDomainPCIAddressSetNextSlot(addrs, dev); return ret; } @@ -851,6 +935,48 @@ int qemuDomainPCIAddressReleaseAddr(qemuDomainPCIAddressSetPtr addrs, return ret; } +int qemuDomainPCIAddressReleaseFunction(qemuDomainPCIAddressSetPtr addrs, + int slot, int function) +{ + virDomainDeviceInfo dev; + + dev.addr.pci.domain = 0; + dev.addr.pci.bus = 0; + dev.addr.pci.slot = slot; + dev.addr.pci.function = function; + + return qemuDomainPCIAddressReleaseAddr(addrs, &dev); +} + +int qemuDomainPCIAddressReleaseSlot(qemuDomainPCIAddressSetPtr addrs, int slot) +{ + virDomainDeviceInfo dev; + char *addr; + int function; + int ret = 0; + + dev.addr.pci.domain = 0; + dev.addr.pci.bus = 0; + dev.addr.pci.slot = slot; + + for (function = 0; function <= QEMU_PCI_ADDRESS_LAST_FUNCTION; function++) { + addr = qemuPCIAddressAsString(&dev); + if (!addr) + return -1; + + if (!virHashLookup(addrs->used, addr)) { + VIR_FREE(addr); + continue; + } + + VIR_FREE(addr); + + if (qemuDomainPCIAddressReleaseFunction(addrs, slot, function) < 0) + ret = -1; + } + + return ret; +} void qemuDomainPCIAddressSetFree(qemuDomainPCIAddressSetPtr addrs) { @@ -861,8 +987,7 @@ void qemuDomainPCIAddressSetFree(qemuDomainPCIAddressSetPtr addrs) VIR_FREE(addrs); } - -int qemuDomainPCIAddressSetNextAddr(qemuDomainPCIAddressSetPtr addrs, +int qemuDomainPCIAddressSetNextSlot(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev) { int i; @@ -879,6 +1004,56 @@ int qemuDomainPCIAddressSetNextAddr(qemuDomainPCIAddressSetPtr addrs, maybe.addr.pci.domain = 0; maybe.addr.pci.bus = 0; maybe.addr.pci.slot = i; + maybe.addr.pci.function = 0; + + if (!(addr = qemuPCIAddressAsString(&maybe))) + return -1; + + if (qemuDomainPCIAddressCheckSlot(addrs, &maybe) < 0) { + VIR_DEBUG("PCI addr %s already in use", addr); + VIR_FREE(addr); + continue; + } + + VIR_DEBUG("Allocating PCI addr %s", addr); + VIR_FREE(addr); + + if (qemuDomainPCIAddressReserveSlot(addrs, i) < 0) + return -1; + + dev->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI; + dev->addr.pci = maybe.addr.pci; + + qemuGotoNextSlot(addrs, maybe.addr.pci.slot); + + return 0; + } + + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No more available PCI addresses")); + return -1; +} + +int qemuDomainPCIAddressSetNextFunction(qemuDomainPCIAddressSetPtr addrs, + virDomainDeviceInfoPtr dev) +{ + int i; + int iteration; + int next_devfn; + + next_devfn = QEMU_PCI_ADDRESS_DEVFN(addrs->nextslot, addrs->nextfunction); + for (i = next_devfn, iteration = 0; + iteration <= QEMU_PCI_ADDRESS_LAST_DEVFN; i++, iteration++) { + virDomainDeviceInfo maybe; + char *addr; + + if (QEMU_PCI_ADDRESS_LAST_DEVFN < i) + i = 0; + memset(&maybe, 0, sizeof(maybe)); + maybe.addr.pci.domain = 0; + maybe.addr.pci.bus = 0; + maybe.addr.pci.slot = QEMU_PCI_ADDRESS_SLOT(i); + maybe.addr.pci.function = QEMU_PCI_ADDRESS_FUNCTION(i); if (!(addr = qemuPCIAddressAsString(&maybe))) return -1; @@ -899,9 +1074,8 @@ int qemuDomainPCIAddressSetNextAddr(qemuDomainPCIAddressSetPtr addrs, dev->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI; dev->addr.pci = maybe.addr.pci; - addrs->nextslot = i + 1; - if (QEMU_PCI_ADDRESS_LAST_SLOT < addrs->nextslot) - addrs->nextslot = 0; + qemuGotoNextFunction(addrs, maybe.addr.pci.slot, + maybe.addr.pci.function); return 0; } @@ -944,7 +1118,8 @@ int qemuDomainPCIAddressSetNextAddr(qemuDomainPCIAddressSetPtr addrs, * skip over info.type == PCI */ int -qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) +qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs, + virBitmapPtr qemuCaps) { int i; bool reservedIDE = false; @@ -1021,16 +1196,28 @@ qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) /* Only support VirtIO-9p-pci so far. If that changes, * we might need to skip devices here */ - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->fss[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->fss[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, &def->fss[i]->info) < 0) + goto error; + } } /* Network interfaces */ for (i = 0; i < def->nnets ; i++) { if (def->nets[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->nets[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->nets[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, &def->nets[i]->info) < 0) + goto error; + } } /* Sound cards */ @@ -1042,8 +1229,15 @@ qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) def->sounds[i]->model == VIR_DOMAIN_SOUND_MODEL_PCSPK) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->sounds[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->sounds[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->sounds[i]->info) < 0) + goto error; + } } /* Disk controllers (SCSI only for now) */ @@ -1061,8 +1255,15 @@ qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) if (def->controllers[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->controllers[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->controllers[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->controllers[i]->info) < 0) + goto error; + } } /* Disks (VirtIO only for now */ @@ -1074,8 +1275,15 @@ qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) if (def->disks[i]->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->disks[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->disks[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->disks[i]->info) < 0) + goto error; + } } /* Host PCI devices */ @@ -1086,32 +1294,60 @@ qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs) def->hostdevs[i]->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->hostdevs[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->hostdevs[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->hostdevs[i]->info) < 0) + goto error; + } } /* VirtIO balloon */ if (def->memballoon && def->memballoon->model == VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO && def->memballoon->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->memballoon->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->memballoon->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->memballoon->info) < 0) + goto error; + } } /* A watchdog - skip IB700, it is not a PCI device */ if (def->watchdog && def->watchdog->model != VIR_DOMAIN_WATCHDOG_MODEL_IB700 && def->watchdog->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->watchdog->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->watchdog->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->watchdog->info) < 0) + goto error; + } } /* Further non-primary video cards */ for (i = 1; i < def->nvideos ; i++) { if (def->videos[i]->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) continue; - if (qemuDomainPCIAddressSetNextAddr(addrs, &def->videos[i]->info) < 0) - goto error; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (qemuDomainPCIAddressSetNextFunction(addrs, + &def->videos[i]->info) < 0) + goto error; + } else { + if (qemuDomainPCIAddressSetNextSlot(addrs, + &def->videos[i]->info) < 0) + goto error; + } } for (i = 0; i < def->ninputs ; i++) { /* Nada - none are PCI based (yet) */ @@ -1149,10 +1385,20 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, _("Only PCI device addresses with bus=0 are supported")); return -1; } - if (info->addr.pci.function != 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Only PCI device addresses with function=0 are supported")); - return -1; + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) { + if (info->addr.pci.function > 7) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The function of PCI device addresses must " + "less than 8")); + return -1; + } + } else { + if (info->addr.pci.function != 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Only PCI device addresses with function=0 " + "are supported")); + return -1; + } } /* XXX @@ -1162,9 +1408,14 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, * to pciNN.0 where NN is the domain number */ if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIBUS)) - virBufferAsprintf(buf, ",bus=pci.0,addr=0x%x", info->addr.pci.slot); + virBufferAsprintf(buf, ",bus=pci.0"); + else + virBufferAsprintf(buf, ",bus=pci"); + if (qemuCapsGet(qemuCaps, QEMU_CAPS_PCI_MULTIFUNCTION)) + virBufferAsprintf(buf, ",multifunction=on,addr=0x%x.0x%x", + info->addr.pci.slot, info->addr.pci.function); else - virBufferAsprintf(buf, ",bus=pci,addr=0x%x", info->addr.pci.slot); + virBufferAsprintf(buf, ",addr=0x%x", info->addr.pci.slot); } return 0; } diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 528031d..1a4f471 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -146,19 +146,28 @@ virDomainDefPtr qemuParseCommandLineString(virCapsPtr caps, int qemuDomainAssignPCIAddresses(virDomainDefPtr def); qemuDomainPCIAddressSetPtr qemuDomainPCIAddressSetCreate(virDomainDefPtr def); +int qemuDomainPCIAddressReserveFunction(qemuDomainPCIAddressSetPtr addrs, + int slot, int function); int qemuDomainPCIAddressReserveSlot(qemuDomainPCIAddressSetPtr addrs, int slot); int qemuDomainPCIAddressReserveAddr(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev); -int qemuDomainPCIAddressSetNextAddr(qemuDomainPCIAddressSetPtr addrs, +int qemuDomainPCIAddressSetNextSlot(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev); +int qemuDomainPCIAddressSetNextFunction(qemuDomainPCIAddressSetPtr addrs, + virDomainDeviceInfoPtr dev); int qemuDomainPCIAddressEnsureAddr(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev); int qemuDomainPCIAddressReleaseAddr(qemuDomainPCIAddressSetPtr addrs, virDomainDeviceInfoPtr dev); +int qemuDomainPCIAddressReleaseFunction(qemuDomainPCIAddressSetPtr addrs, + int slot, int function); +int qemuDomainPCIAddressReleaseSlot(qemuDomainPCIAddressSetPtr addrs, int slot); void qemuDomainPCIAddressSetFree(qemuDomainPCIAddressSetPtr addrs); -int qemuAssignDevicePCISlots(virDomainDefPtr def, qemuDomainPCIAddressSetPtr addrs); +int qemuAssignDevicePCISlots(virDomainDefPtr def, + qemuDomainPCIAddressSetPtr addrs, + virBitmapPtr qemuCaps); int qemuDomainNetVLAN(virDomainNetDefPtr def); int qemuAssignDeviceNetAlias(virDomainDefPtr def, virDomainNetDefPtr net, int idx); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index dae2269..4731959 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -221,7 +221,8 @@ error: if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && (disk->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) && releaseaddr && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &disk->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + disk->info.addr.pci.slot) < 0) VIR_WARN("Unable to release PCI address on %s", disk->src); if (virSecurityManagerRestoreImageLabel(driver->securityManager, @@ -290,7 +291,8 @@ cleanup: qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && (controller->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) && releaseaddr && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &controller->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + controller->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on controller"); VIR_FREE(devstr); @@ -697,7 +699,8 @@ cleanup: qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && (net->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) && releaseaddr && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &net->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + net->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on NIC"); if (ret != 0) @@ -828,7 +831,8 @@ error: if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && (hostdev->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) && releaseaddr && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &hostdev->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + hostdev->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on host device"); qemuDomainReAttachHostdevDevices(driver, &hostdev, 1); @@ -1100,6 +1104,30 @@ static inline int qemuFindDisk(virDomainDefPtr def, const char *dst) return -1; } +static int qemuComparePCIDevice(virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainDeviceInfoPtr dev1, + void *opaque) +{ + virDomainDeviceInfoPtr dev2 = opaque; + + if (dev1->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI || + dev2->type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) + return 0; + + if (dev1->addr.pci.slot == dev2->addr.pci.slot && + dev1->addr.pci.function != dev2->addr.pci.function) + return -1; + return 0; +} + +static bool qemuIsMultiFunctionDevice(virDomainDefPtr def, + virDomainDeviceInfoPtr dev) +{ + if (virDomainDeviceInfoIterate(def, qemuComparePCIDevice, dev) < 0) + return true; + return false; +} + int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, virDomainObjPtr vm, @@ -1121,6 +1149,13 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, detach = vm->def->disks[i]; + if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("cannot hot unplug multifunction PCI device: %s"), + dev->data.disk->dst); + goto cleanup; + } + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_DEVICES)) { if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) { qemuReportError(VIR_ERR_INTERNAL_ERROR, @@ -1167,7 +1202,8 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, qemuAuditDisk(vm, detach, NULL, "detach", ret >= 0); if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &detach->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + detach->info.addr.pci.slot) < 0) VIR_WARN("Unable to release PCI address on %s", dev->data.disk->src); virDomainDiskRemove(vm->def, i); @@ -1351,6 +1387,13 @@ int qemuDomainDetachPciControllerDevice(struct qemud_driver *driver, goto cleanup; } + if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("cannot hot unplug multifunction PCI device: %s"), + dev->data.disk->dst); + goto cleanup; + } + if (qemuDomainControllerIsBusy(vm, detach)) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("device cannot be detached: device is busy")); @@ -1392,7 +1435,8 @@ int qemuDomainDetachPciControllerDevice(struct qemud_driver *driver, } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &detach->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + detach->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on controller"); virDomainControllerDefFree(detach); @@ -1438,6 +1482,13 @@ int qemuDomainDetachNetDevice(struct qemud_driver *driver, goto cleanup; } + if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("cannot hot unplug multifunction PCI device :%s"), + dev->data.disk->dst); + goto cleanup; + } + if ((vlan = qemuDomainNetVLAN(detach)) < 0) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("unable to determine original VLAN")); @@ -1484,7 +1535,8 @@ int qemuDomainDetachNetDevice(struct qemud_driver *driver, qemuAuditNet(vm, detach, NULL, "detach", true); if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &detach->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + detach->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on NIC"); virDomainConfNWFilterTeardown(detach); @@ -1567,6 +1619,13 @@ int qemuDomainDetachHostPciDevice(struct qemud_driver *driver, return -1; } + if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("cannot hot unplug multifunction PCI device: %s"), + dev->data.disk->dst); + return -1; + } + if (!virDomainDeviceAddressIsValid(&detach->info, VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI)) { qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -1601,7 +1660,8 @@ int qemuDomainDetachHostPciDevice(struct qemud_driver *driver, } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE) && - qemuDomainPCIAddressReleaseAddr(priv->pciaddrs, &detach->info) < 0) + qemuDomainPCIAddressReleaseSlot(priv->pciaddrs, + detach->info.addr.pci.slot) < 0) VIR_WARN0("Unable to release PCI address on host device"); if (vm->def->nhostdevs > 1) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index bd7c932..f3fe78c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1921,7 +1921,8 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa priv->persistentAddrs = 1; if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(obj->def)) || - qemuAssignDevicePCISlots(obj->def, priv->pciaddrs) < 0) + qemuAssignDevicePCISlots(obj->def, priv->pciaddrs, + priv->qemuCaps) < 0) goto error; } @@ -2187,7 +2188,8 @@ int qemuProcessStart(virConnectPtr conn, /* Assign any remaining addresses */ - if (qemuAssignDevicePCISlots(vm->def, priv->pciaddrs) < 0) + if (qemuAssignDevicePCISlots(vm->def, priv->pciaddrs, + priv->qemuCaps) < 0) goto cleanup; priv->persistentAddrs = 1; diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 2522396..2a0a923 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -430,7 +430,8 @@ mymain(void) QEMU_CAPS_VGA_NONE, QEMU_CAPS_MIGRATE_QEMU_FD, QEMU_CAPS_DRIVE_AIO, - QEMU_CAPS_DEVICE_SPICEVMC); + QEMU_CAPS_DEVICE_SPICEVMC, + QEMU_CAPS_PCI_MULTIFUNCTION); DO_TEST("qemu-kvm-0.12.1.2-rhel61", 12001, 1, 0, QEMU_CAPS_VNC_COLON, QEMU_CAPS_NO_REBOOT, diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index a7e4cc0..04b8326 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -96,7 +96,7 @@ static int testCompareXMLToArgvFiles(const char *xml, if (!(pciaddrs = qemuDomainPCIAddressSetCreate(vmdef))) goto fail; - if (qemuAssignDevicePCISlots(vmdef, pciaddrs) < 0) + if (qemuAssignDevicePCISlots(vmdef, pciaddrs, extraFlags) < 0) goto fail; qemuDomainPCIAddressSetFree(pciaddrs); -- 1.7.1

On Tue, May 10, 2011 at 02:00:17PM +0800, Wen Congyang wrote:
We want to use more than 200+ device. Libvirt does not use multi function PCI device and PCI-to-PCI bridge. So we can not use more than 200+ device if it's a PCI device or it's controller is a PCI device.
IMHO using multi function support is a dead end, because it makes hotplug completely unusable and is not even possible for most PCI devices. The way we want to raise the device limit is a. Supporting multiple PCI domains (multiple PCI root complexes) b. Adding PCI bridges c. A virtio-scsi controller to allow > 1 virtio disk per PCI device any of those will dramatically increase the number of devices we can use without the horrible hotplug problems that multifunction introduces.
This patch adds the support of multi function PCI device. It does not support to hot plug/unplug multi function PCI device.
TODO: 1. support to hot plug multi function PCI device We only can hot plug one device at one time. I think we should introduce another command 'virsh attach-devices XXX' to support hot plug more than one device at one time?
Since you can't do practical hotplug of multifunction devices at the QEMU layer at all, there's nothing useful we can do at libvirt either.
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

At 05/10/2011 06:00 PM, Daniel P. Berrange Write:
On Tue, May 10, 2011 at 02:00:17PM +0800, Wen Congyang wrote:
We want to use more than 200+ device. Libvirt does not use multi function PCI device and PCI-to-PCI bridge. So we can not use more than 200+ device if it's a PCI device or it's controller is a PCI device.
IMHO using multi function support is a dead end, because it makes hotplug completely unusable and is not even possible for most PCI devices. The way we want to raise the device limit is
a. Supporting multiple PCI domains (multiple PCI root complexes)
IIUC, qemu do not support multiple PCI domains now.
b. Adding PCI bridges
I do not know whether qemu supports it. I find hw/pci_bridge.c in qemu's source, so qemu may support it. I will confirm it.
c. A virtio-scsi controller to allow > 1 virtio disk per PCI device
Hmm, do you mean this: -device virtio-blk-pci,ports=4,drive0=hda,drive1=hdb,drive2=hdc,...
any of those will dramatically increase the number of devices we can use without the horrible hotplug problems that multifunction introduces.
This patch adds the support of multi function PCI device. It does not support to hot plug/unplug multi function PCI device.
TODO: 1. support to hot plug multi function PCI device We only can hot plug one device at one time. I think we should introduce another command 'virsh attach-devices XXX' to support hot plug more than one device at one time?
Since you can't do practical hotplug of multifunction devices at the QEMU layer at all, there's nothing useful we can do at libvirt either.
We can hotplug multifunction PCI devices like this: 1. Before hot pluging, # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 2. hot plug multi function PCI devices: # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi2,bus=pci.0,addr=0x06.0x07' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi3,bus=pci.0,addr=0x06.0x06' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi4,bus=pci.0,addr=0x06.0x05' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi5,bus=pci.0,addr=0x06.0x04' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi6,bus=pci.0,addr=0x06.0x03' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi7,bus=pci.0,addr=0x06.0x02' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi8,bus=pci.0,addr=0x06.0x01' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi9,multifunction=on,bus=pci.0,addr=0x06.0x00' 3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Yes, there's too many restrictions about hot plug/unplug multifunction PCI devices.
Daniel

On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
At 05/10/2011 06:00 PM, Daniel P. Berrange Write:
On Tue, May 10, 2011 at 02:00:17PM +0800, Wen Congyang wrote:
We want to use more than 200+ device. Libvirt does not use multi function PCI device and PCI-to-PCI bridge. So we can not use more than 200+ device if it's a PCI device or it's controller is a PCI device.
IMHO using multi function support is a dead end, because it makes hotplug completely unusable and is not even possible for most PCI devices. The way we want to raise the device limit is
a. Supporting multiple PCI domains (multiple PCI root complexes)
IIUC, qemu do not support multiple PCI domains now.
There are people who are working on making that supported in KVM real soon.
b. Adding PCI bridges
I do not know whether qemu supports it. I find hw/pci_bridge.c in qemu's source, so qemu may support it. I will confirm it.
I'm told this is already supported, but I've not tested it myself.
c. A virtio-scsi controller to allow > 1 virtio disk per PCI device
Hmm, do you mean this: -device virtio-blk-pci,ports=4,drive0=hda,drive1=hdb,drive2=hdc,...
No it is actually going to be a new device type, and I'm expecting it will work very like the LSI scsi does in terms of device setup. eg create a controller device, and then create devices for each drive. -device virtio-scsi-pci,id=vscsi1 -drive file=/some/disk,id=vscsi1.1 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.1 -drive file=/some/disk,id=vscsi1.2 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.2 -drive file=/some/disk,id=vscsi1.2 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.2
any of those will dramatically increase the number of devices we can use without the horrible hotplug problems that multifunction introduces.
This patch adds the support of multi function PCI device. It does not support to hot plug/unplug multi function PCI device.
TODO: 1. support to hot plug multi function PCI device We only can hot plug one device at one time. I think we should introduce another command 'virsh attach-devices XXX' to support hot plug more than one device at one time?
Since you can't do practical hotplug of multifunction devices at the QEMU layer at all, there's nothing useful we can do at libvirt either.
We can hotplug multifunction PCI devices like this: 1. Before hot pluging, # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. hot plug multi function PCI devices: # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi2,bus=pci.0,addr=0x06.0x07' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi3,bus=pci.0,addr=0x06.0x06' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi4,bus=pci.0,addr=0x06.0x05' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi5,bus=pci.0,addr=0x06.0x04' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi6,bus=pci.0,addr=0x06.0x03' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi7,bus=pci.0,addr=0x06.0x02' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi8,bus=pci.0,addr=0x06.0x01' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi9,multifunction=on,bus=pci.0,addr=0x06.0x00'
Hmm, that's kinda wierd & i'm suprised it works, particularly for LSI since I thought guest drivers would need support for multifunction too.
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Yes, there's too many restrictions about hot plug/unplug multifunction PCI devices.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

At 05/11/2011 03:55 PM, Daniel P. Berrange Write:
On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
At 05/10/2011 06:00 PM, Daniel P. Berrange Write:
On Tue, May 10, 2011 at 02:00:17PM +0800, Wen Congyang wrote:
We want to use more than 200+ device. Libvirt does not use multi function PCI device and PCI-to-PCI bridge. So we can not use more than 200+ device if it's a PCI device or it's controller is a PCI device.
IMHO using multi function support is a dead end, because it makes hotplug completely unusable and is not even possible for most PCI devices. The way we want to raise the device limit is
a. Supporting multiple PCI domains (multiple PCI root complexes)
IIUC, qemu do not support multiple PCI domains now.
There are people who are working on making that supported in KVM real soon.
Good news.
b. Adding PCI bridges
I do not know whether qemu supports it. I find hw/pci_bridge.c in qemu's source, so qemu may support it. I will confirm it.
I'm told this is already supported, but I've not tested it myself.
I'm still reading the qemu's code. Do you know the driver's name(For example: the driver for scsi controller in qemu is lsi.)
c. A virtio-scsi controller to allow > 1 virtio disk per PCI device
Hmm, do you mean this: -device virtio-blk-pci,ports=4,drive0=hda,drive1=hdb,drive2=hdc,...
No it is actually going to be a new device type, and I'm expecting it will work very like the LSI scsi does in terms of device setup. eg create a controller device, and then create devices for each drive.
-device virtio-scsi-pci,id=vscsi1 -drive file=/some/disk,id=vscsi1.1 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.1 -drive file=/some/disk,id=vscsi1.2 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.2 -drive file=/some/disk,id=vscsi1.2 -device virtio-blk-pci,bus=vscsi1,drive=vscsi1.2
any of those will dramatically increase the number of devices we can use without the horrible hotplug problems that multifunction introduces.
This patch adds the support of multi function PCI device. It does not support to hot plug/unplug multi function PCI device.
TODO: 1. support to hot plug multi function PCI device We only can hot plug one device at one time. I think we should introduce another command 'virsh attach-devices XXX' to support hot plug more than one device at one time?
Since you can't do practical hotplug of multifunction devices at the QEMU layer at all, there's nothing useful we can do at libvirt either.
We can hotplug multifunction PCI devices like this: 1. Before hot pluging, # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. hot plug multi function PCI devices: # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi2,bus=pci.0,addr=0x06.0x07' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi3,bus=pci.0,addr=0x06.0x06' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi4,bus=pci.0,addr=0x06.0x05' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi5,bus=pci.0,addr=0x06.0x04' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi6,bus=pci.0,addr=0x06.0x03' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi7,bus=pci.0,addr=0x06.0x02' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi8,bus=pci.0,addr=0x06.0x01' # virsh qemu-monitor-command vm1 --hmp 'device_add lsi,id=scsi9,multifunction=on,bus=pci.0,addr=0x06.0x00'
Hmm, that's kinda wierd & i'm suprised it works, particularly for LSI since I thought guest drivers would need support for multifunction too.
I only test LSI. If we can hotplug the device by single PCI device, I think it can work fine for multi function.
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Yes, there's too many restrictions about hot plug/unplug multifunction PCI devices.
Regards, Daniel

On Wed, 11 May 2011 08:55:56 +0100 "Daniel P. Berrange" <berrange@redhat.com> wrote:
On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Hmm, how about adding <unpluggable> attribute to <device> definitions ? IIUC, 1. There are some unpluggable default devices. 2. There are devices which never should be removed by mistake, as rootfs. When I've google'd "how to handle 100+ nics with KVM", a qemu community guy answers "please use multifunction devices" but I disappointed to know libvirt doesn't support it, now. And, IIUC, if multifunction device is supporetd by libvirt, it can tie up default 'unpluggable' devices (as serial, IDE, etc) into a slot and we'll have 3? more empty slot at boot.. Thanks, -Kame

At 05/13/2011 10:32 AM, KAMEZAWA Hiroyuki Write:
On Wed, 11 May 2011 08:55:56 +0100 "Daniel P. Berrange" <berrange@redhat.com> wrote:
On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Hmm, how about adding <unpluggable> attribute to <device> definitions ? IIUC, 1. There are some unpluggable default devices. 2. There are devices which never should be removed by mistake, as rootfs.
When I've google'd "how to handle 100+ nics with KVM", a qemu community guy answers "please use multifunction devices" but I disappointed to know libvirt doesn't support it, now.
And, IIUC, if multifunction device is supporetd by libvirt, it can tie up default 'unpluggable' devices (as serial, IDE, etc) into a slot and we'll have 3? more empty slot at boot..
Hi, Daniel P. Berrange: What's your opinion about kamezawa's advice?
Thanks, -Kame

On Fri, May 13, 2011 at 11:32:43AM +0900, KAMEZAWA Hiroyuki wrote:
On Wed, 11 May 2011 08:55:56 +0100 "Daniel P. Berrange" <berrange@redhat.com> wrote:
On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Hmm, how about adding <unpluggable> attribute to <device> definitions ? IIUC, 1. There are some unpluggable default devices.
This is a little complex. A PCI card can have a boolean unpluggable attribute. A libvirt device can have a tri-state though, unpluggable, not unpluggable, or unpluggable only if all these other functions are unplugged at the same time. May be the third case is actually just the same as the first case here and we should ignore it, and just have a <unpluggable> wrt to the PCI device as a whole.
2. There are devices which never should be removed by mistake, as rootfs.
What devices are considered to be in list 2, can only be determined by the guest OS, so I don't think we can represent that in the libvirt XML. It is also a little bit more fuzzy that a boolean unpluggable. eg from the guest OS POV, no device is unpluggable if it is in use. A block device becomes unpluggable, once the guest filesystem is unmounted. A network card becomes unpluggable, once the network interface is taken offline. It could be desirable to expose this information to management apps, but this won't be possible until QEMU gets some kind of guest agent that can report this info from the guest OS. I think it needs to be reported separately from whether the physical PCI card is unpluggable or not.
When I've google'd "how to handle 100+ nics with KVM", a qemu community guy answers "please use multifunction devices" but I disappointed to know libvirt doesn't support it, now.
Ok, I think it is reasonable to support multifunction devices in libvirt, but only if the mgmt app explicitly configures them via the XML. ie, the default behaviour where libvirt auto-assigns PCI addresses should always be to assign slots. And when we get support for many PCI domains or bridges, we'll assign slots from extra domains/bridges too by default. The mgmt app could explicitly override this and configure multifunction by providing an <address> element for devices in the XML, with the function number set to non-zero.
And, IIUC, if multifunction device is supporetd by libvirt, it can tie up default 'unpluggable' devices (as serial, IDE, etc) into a slot and we'll have 3? more empty slot at boot..
The IDE controller is a function on the PIIX device, as is the default USB controller. The ISA bridge, behind which the serial/parallel ports live is also a funtion on the PIIX device. So we are in fact already using a multifunction device in this case, so there's no slots we can free up wrt serial/IDE devices. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Fri, 20 May 2011 10:22:31 +0100 "Daniel P. Berrange" <berrange@redhat.com> wrote:
On Fri, May 13, 2011 at 11:32:43AM +0900, KAMEZAWA Hiroyuki wrote:
On Wed, 11 May 2011 08:55:56 +0100 "Daniel P. Berrange" <berrange@redhat.com> wrote:
On Wed, May 11, 2011 at 10:30:43AM +0800, Wen Congyang wrote:
3. After hot plug multi function PCI devices: # lspci 00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02) 00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II] 00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II] 00:01.2 USB Controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01) 00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03) 00:02.0 VGA compatible controller: Cirrus Logic GD 5446 00:03.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139/8139C/8139C+ (rev 20) 00:04.0 RAM memory: Red Hat, Inc Virtio memory balloon 00:05.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.0 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.1 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.2 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.3 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.4 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.5 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.6 SCSI storage controller: LSI Logic / Symbios Logic 53c895a 00:06.7 SCSI storage controller: LSI Logic / Symbios Logic 53c895a
2. support to hot unplug multi function PCI device hot unpluging multi function PCI device meas that all the other devices on the same slot will be hot unpluged. So we should do some cleanup and remove the other devices too. If the other device on the same slot does not support hot unpluged, or it is a a controller and some other devices still use this controller, I think we should refuse to hot unplug this mutlti function PCI device.
IMHO these kind of restrictions will make life really unpleasant for applications and are a reason we should *not* support the multifunction code. Instead we should focus on one of the other 3 options I mention above.
Hmm, how about adding <unpluggable> attribute to <device> definitions ? IIUC, 1. There are some unpluggable default devices.
This is a little complex. A PCI card can have a boolean unpluggable attribute. A libvirt device can have a tri-state though, unpluggable, not unpluggable, or unpluggable only if all these other functions are unplugged at the same time. May be the third case is actually just the same as the first case here and we should ignore it, and just have a <unpluggable> wrt to the PCI device as a whole.
2. There are devices which never should be removed by mistake, as rootfs.
What devices are considered to be in list 2, can only be determined by the guest OS, so I don't think we can represent that in the libvirt XML. It is also a little bit more fuzzy that a boolean unpluggable. eg from the guest OS POV, no device is unpluggable if it is in use. A block device becomes unpluggable, once the guest filesystem is unmounted. A network card becomes unpluggable, once the network interface is taken offline.
It could be desirable to expose this information to management apps, but this won't be possible until QEMU gets some kind of guest agent that can report this info from the guest OS. I think it needs to be reported separately from whether the physical PCI card is unpluggable or not.
Sure.
When I've google'd "how to handle 100+ nics with KVM", a qemu community guy answers "please use multifunction devices" but I disappointed to know libvirt doesn't support it, now.
Ok, I think it is reasonable to support multifunction devices in libvirt, but only if the mgmt app explicitly configures them via the XML.
ie, the default behaviour where libvirt auto-assigns PCI addresses should always be to assign slots. And when we get support for many PCI domains or bridges, we'll assign slots from extra domains/bridges too by default. The mgmt app could explicitly override this and configure multifunction by providing an <address> element for devices in the XML, with the function number set to non-zero.
Ok, thank you. Our team will discuss and implement it, try again. Thanks, -Kame

On 05/11/2011 09:55 AM, Daniel P. Berrange wrote:
Hmm, that's kinda wierd& i'm suprised it works, particularly for LSI since I thought guest drivers would need support for multifunction too.
For well-behaved {kernel,device,driver}s multifunction should be totally transparent. Example from http://msdn.microsoft.com/en-us/library/ff542756%28v=vs.85%29.aspx "Since the system-supplied bus driver handles the multifunction semantics, the function drivers can be the same drivers that would be used if the functions were packaged as individual devices. Rather than enumerating one multifunction device, the PCI driver enumerates two child devices. The PnP manager treats each child device like a typical device. [...] The PCI driver arbitrates the resources for the child devices and manages any other multifunction aspects of the device. And an old whitepaper also from MS at http://msdn.microsoft.com/en-us/windows/hardware/gg463194 "Each functional unit must be able to operate as a separate device, even if it happens to be serviced by an instance of the same driver(s) as another functional unit on the device. The operating system must be able to separately access each logical device that is individually enumerated, configure the device resources independently, and disable individual devices. [...] Each separate functional unit on a multifunction device must not share addresses or registers with other functional units [...] No start-order dependencies. The operating system must be able to configure and manage functions in any order. Therefore, no function on a multifunction device can depend on another device (that is, another function) to be started before the function can be started by the operating system. [...] No hidden dependencies. Separate functional units must be able to operate concurrently, without interfering with each other or with other devices on the system. [...] Vendors of multifunction devices should implement a separate configuration space with a unique device ID and independent resources for each function. [....]" Paolo

On Fri, May 13, 2011 at 10:14:20AM +0200, Paolo Bonzini wrote:
On 05/11/2011 09:55 AM, Daniel P. Berrange wrote:
Hmm, that's kinda wierd& i'm suprised it works, particularly for LSI since I thought guest drivers would need support for multifunction too.
For well-behaved {kernel,device,driver}s multifunction should be totally transparent.
Example from http://msdn.microsoft.com/en-us/library/ff542756%28v=vs.85%29.aspx
"Since the system-supplied bus driver handles the multifunction semantics, the function drivers can be the same drivers that would be used if the functions were packaged as individual devices. Rather than enumerating one multifunction device, the PCI driver enumerates two child devices. The PnP manager treats each child device like a typical device. [...] The PCI driver arbitrates the resources for the child devices and manages any other multifunction aspects of the device.
And an old whitepaper also from MS at http://msdn.microsoft.com/en-us/windows/hardware/gg463194
"Each functional unit must be able to operate as a separate device, even if it happens to be serviced by an instance of the same driver(s) as another functional unit on the device. The operating system must be able to separately access each logical device that is individually enumerated, configure the device resources independently, and disable individual devices. [...]
Each separate functional unit on a multifunction device must not share addresses or registers with other functional units [...]
No start-order dependencies. The operating system must be able to configure and manage functions in any order. Therefore, no function on a multifunction device can depend on another device (that is, another function) to be started before the function can be started by the operating system. [...]
No hidden dependencies. Separate functional units must be able to operate concurrently, without interfering with each other or with other devices on the system. [...]
Vendors of multifunction devices should implement a separate configuration space with a unique device ID and independent resources for each function. [....]"
So if I'm understanding correctly, the only difference we'll see with assigning devices to functions, instead of slots, is that hotplug of individual devices is not possible. From a guest (driver) POV everything else is 100% functionally unchanged. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 05/20/2011 11:06 AM, Daniel P. Berrange wrote:
So if I'm understanding correctly, the only difference we'll see with assigning devices to functions, instead of slots, is that hotplug of individual devices is not possible. From a guest (driver) POV everything else is 100% functionally unchanged.
That's correct. Paolo
participants (4)
-
Daniel P. Berrange
-
KAMEZAWA Hiroyuki
-
Paolo Bonzini
-
Wen Congyang