[libvirt] PATCH: Xen PCI device passthrough

This patch implements PCI device passthrough for Xen. In pci.c there is a fair bit of refactoring to better support both pcistub.ko and pciback.ko. The latter has two extra sysfs files, add_slot and remove_slot for associating the driver with a specific PCI device domain:bus:slot:func address, whereas pcistub can only associate based on vendor:product ID The Xen driver gets an implementation of the dettach/reattach APIs for node devices. The XML <-> SEXPR conversion gets adapted to cope with PCI devices The XM <-> XML conversion gets adapted to load/save PCI device info in /etc/xen config files Daniel Index: src/pci.c =================================================================== RCS file: /data/cvs/libvirt/src/pci.c,v retrieving revision 1.4 diff -u -p -r1.4 pci.c --- src/pci.c 3 Mar 2009 17:00:18 -0000 1.4 +++ src/pci.c 30 Mar 2009 13:09:44 -0000 @@ -614,46 +614,80 @@ pciResetDevice(virConnectPtr conn, pciDe return ret; } -static int -pciBindDeviceToStub(virConnectPtr conn, pciDevice *dev, const char *stub_module) + +static void +pciDriverDir(char *buf, size_t buflen, const char *driver) { - char stub_dir[PATH_MAX]; - char path[PATH_MAX]; + snprintf(buf, buflen, PCI_SYSFS "drivers/%s", driver); +} - snprintf(stub_dir, sizeof(stub_dir), PCI_SYSFS "drivers/%s", stub_module); +static void +pciDriverFile(char *buf, size_t buflen, const char *driver, const char *file) +{ + snprintf(buf, buflen, PCI_SYSFS "drivers/%s/%s", driver, file); +} + +static void +pciDeviceFile(char *buf, size_t buflen, const char *device, const char *file) +{ + snprintf(buf, buflen, PCI_SYSFS "devices/%s/%s", device, file); +} - /* Try loading the stub module if it isn't already loaded; - * Do not return an error if the stub module is not available. - */ - if (!virFileExists(stub_dir)) { - const char *const modprobeargv[] = { MODPROBE, stub_module, NULL }; - if (virRun(conn, modprobeargv, NULL) < 0) { +static const char * +pciFindStubDriver(virConnectPtr conn) +{ + char drvpath[PATH_MAX]; + int probed = 0; + +recheck: + pciDriverDir(drvpath, sizeof(drvpath), "pci-stub"); + if (virFileExists(drvpath)) + return "pci-stub"; + pciDriverDir(drvpath, sizeof(drvpath), "pciback"); + if (virFileExists(drvpath)) + return "pciback"; + + if (!probed) { + const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL }; + const char *const backprobe[] = { MODPROBE, "pciback", NULL }; + + probed = 1; + if (virRun(conn, stubprobe, NULL) < 0 && + virRun(conn, backprobe, NULL) < 0) { char ebuf[1024]; - VIR_WARN(_("modprobe %s failed: %s"), stub_module, + VIR_WARN(_("failed to load pci-stub or pciback drivers: %s"), virStrerror(errno, ebuf, sizeof ebuf)); + return 0; } + + goto recheck; } - if (!virFileExists(stub_dir)) { - VIR_WARN(_("%s module not available, cannot bind device %s to it"), - stub_module, dev->name); - } else { - /* Add the PCI device ID to the stub's dynamic ID table; - * this is needed to allow us to bind the device to the stub. - * Note: if the device is not currently bound to any driver, - * stub will immediately be bound to the device. Also, note - * that if a new device with this ID is hotplugged, or if a probe - * is triggered for such a device, it will also be immediately - * bound by the stub. - */ - snprintf(path, sizeof(path), "%s/new_id", stub_dir); - if (virFileWriteStr(path, dev->id) < 0) { - virReportSystemError(conn, errno, - _("Failed to add PCI device ID '%s' to %s"), - dev->id, stub_module); - return -1; - } + return NULL; +} + + +static int +pciBindDeviceToStub(virConnectPtr conn, pciDevice *dev, const char *driver) +{ + char drvdir[PATH_MAX]; + char path[PATH_MAX]; + + /* Add the PCI device ID to the stub's dynamic ID table; + * this is needed to allow us to bind the device to the stub. + * Note: if the device is not currently bound to any driver, + * stub will immediately be bound to the device. Also, note + * that if a new device with this ID is hotplugged, or if a probe + * is triggered for such a device, it will also be immediately + * bound by the stub. + */ + pciDriverFile(path, sizeof(path), driver, "new_id"); + if (virFileWriteStr(path, dev->id) < 0) { + virReportSystemError(conn, errno, + _("Failed to add PCI device ID '%s' to %s"), + dev->id, driver); + return -1; } /* If the device is already bound to a driver, unbind it. @@ -661,77 +695,100 @@ pciBindDeviceToStub(virConnectPtr conn, * PCI device happens to be IDE controller for the disk hosting * your root filesystem. */ - snprintf(path, sizeof(path), - PCI_SYSFS "devices/%s/driver/unbind", dev->name); + pciDeviceFile(path, sizeof(path), dev->name, "driver/unbind"); if (virFileExists(path) && virFileWriteStr(path, dev->name) < 0) { virReportSystemError(conn, errno, _("Failed to unbind PCI device '%s'"), dev->name); return -1; } - if (virFileExists(stub_dir)) { - /* If the device isn't already bound to pci-stub, try binding it now. - */ - snprintf(path, sizeof(path), PCI_SYSFS "devices/%s/driver", dev->name); - if (!virFileLinkPointsTo(path, stub_dir)) { - snprintf(path, sizeof(path), "%s/bind", stub_dir); - if (virFileWriteStr(path, dev->name) < 0) { - virReportSystemError(conn, errno, - _("Failed to bind PCI device '%s' to %s"), - dev->name, stub_module); - return -1; - } + /* If the device isn't already bound to pci-stub, try binding it now. + */ + pciDriverDir(drvdir, sizeof(drvdir), driver); + pciDeviceFile(path, sizeof(path), dev->name, "driver"); + if (!virFileLinkPointsTo(path, drvdir)) { + /* Xen's pciback.ko wants you to use new_slot first */ + pciDriverFile(path, sizeof(path), driver, "new_slot"); + if (virFileExists(path) && virFileWriteStr(path, dev->name) < 0) { + virReportSystemError(conn, errno, + _("Failed to add slot for PCI device '%s' to %s"), + dev->name, driver); + return -1; } - /* If 'remove_id' exists, remove the device id from pci-stub's dynamic - * ID table so that 'drivers_probe' works below. - */ - snprintf(path, sizeof(path), "%s/remove_id", stub_dir); - if (virFileExists(path) && virFileWriteStr(path, dev->id) < 0) { + pciDriverFile(path, sizeof(path), driver, "bind"); + if (virFileWriteStr(path, dev->name) < 0) { virReportSystemError(conn, errno, - _("Failed to remove PCI ID '%s' from %s"), - dev->id, stub_module); + _("Failed to bind PCI device '%s' to %s"), + dev->name, driver); return -1; } } + /* If 'remove_id' exists, remove the device id from pci-stub's dynamic + * ID table so that 'drivers_probe' works below. + */ + pciDriverFile(path, sizeof(path), driver, "remove_id"); + if (virFileExists(path) && virFileWriteStr(path, dev->id) < 0) { + virReportSystemError(conn, errno, + _("Failed to remove PCI ID '%s' from %s"), + dev->id, driver); + return -1; + } + return 0; } int pciDettachDevice(virConnectPtr conn, pciDevice *dev) { - return pciBindDeviceToStub(conn, dev, "pci-stub"); + const char *driver = pciFindStubDriver(conn); + if (!driver) { + pciReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot find any PCI stub module")); + return -1; + } + + return pciBindDeviceToStub(conn, dev, driver); } static int -pciUnBindDeviceFromStub(virConnectPtr conn, pciDevice *dev, const char *stub_module) +pciUnBindDeviceFromStub(virConnectPtr conn, pciDevice *dev, const char *driver) { - char stub_dir[PATH_MAX]; + char drvdir[PATH_MAX]; char path[PATH_MAX]; - snprintf(stub_dir, sizeof(stub_dir), PCI_SYSFS "drivers/%s", stub_module); - /* If the device is bound to stub, unbind it. */ - snprintf(path, sizeof(path), PCI_SYSFS "devices/%s/driver", dev->name); - if (virFileExists(stub_dir) && virFileLinkPointsTo(path, stub_dir)) { - snprintf(path, sizeof(path), "%s/unbind", stub_dir); + pciDriverDir(drvdir, sizeof(drvdir), driver); + pciDeviceFile(path, sizeof(path), dev->name, "driver"); + if (virFileExists(drvdir) && virFileLinkPointsTo(path, drvdir)) { + pciDriverFile(path, sizeof(path), driver, "unbind"); if (virFileWriteStr(path, dev->name) < 0) { virReportSystemError(conn, errno, _("Failed to bind PCI device '%s' to %s"), - dev->name, stub_module); + dev->name, driver); return -1; } } + /* Xen's pciback.ko wants you to use remove_slot on the specific device */ + pciDriverFile(path, sizeof(path), driver, "remove_slot"); + if (virFileExists(path) && virFileWriteStr(path, dev->name) < 0) { + virReportSystemError(conn, errno, + _("Failed to remove slot for PCI device '%s' to %s"), + dev->name, driver); + return -1; + } + + /* Trigger a re-probe of the device is not in the stub's dynamic * ID table. If the stub is available, but 'remove_id' isn't * available, then re-probing would just cause the device to be * re-bound to the stub. */ - snprintf(path, sizeof(path), "%s/remove_id", stub_dir); - if (!virFileExists(stub_dir) || virFileExists(path)) { + pciDriverFile(path, sizeof(path), driver, "remove_id"); + if (!virFileExists(drvdir) || virFileExists(path)) { if (virFileWriteStr(PCI_SYSFS "drivers_probe", dev->name) < 0) { virReportSystemError(conn, errno, _("Failed to trigger a re-probe for PCI device '%s'"), @@ -746,7 +803,14 @@ pciUnBindDeviceFromStub(virConnectPtr co int pciReAttachDevice(virConnectPtr conn, pciDevice *dev) { - return pciUnBindDeviceFromStub(conn, dev, "pci-stub"); + const char *driver = pciFindStubDriver(conn); + if (!driver) { + pciReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot find any PCI stub module")); + return -1; + } + + return pciUnBindDeviceFromStub(conn, dev, driver); } static char * Index: src/xen_unified.c =================================================================== RCS file: /data/cvs/libvirt/src/xen_unified.c,v retrieving revision 1.83 diff -u -p -r1.83 xen_unified.c --- src/xen_unified.c 5 Feb 2009 16:03:11 -0000 1.83 +++ src/xen_unified.c 30 Mar 2009 13:09:44 -0000 @@ -43,6 +43,8 @@ #include "xml.h" #include "util.h" #include "memory.h" +#include "node_device_conf.h" +#include "pci.h" #define VIR_FROM_THIS VIR_FROM_XEN @@ -1420,6 +1422,123 @@ xenUnifiedDomainEventDeregister (virConn return ret; } + +static int +xenUnifiedNodeDeviceGetPciInfo (virNodeDevicePtr dev, + unsigned *domain, + unsigned *bus, + unsigned *slot, + unsigned *function) +{ + virNodeDeviceDefPtr def = NULL; + virNodeDevCapsDefPtr cap; + char *xml = NULL; + int ret = -1; + + xml = virNodeDeviceGetXMLDesc(dev, 0); + if (!xml) + goto out; + + def = virNodeDeviceDefParseString(dev->conn, xml); + if (!def) + goto out; + + cap = def->caps; + while (cap) { + if (cap->type == VIR_NODE_DEV_CAP_PCI_DEV) { + *domain = cap->data.pci_dev.domain; + *bus = cap->data.pci_dev.bus; + *slot = cap->data.pci_dev.slot; + *function = cap->data.pci_dev.function; + break; + } + + cap = cap->next; + } + + if (!cap) { + xenUnifiedError(dev->conn, VIR_ERR_INVALID_ARG, + _("device %s is not a PCI device"), dev->name); + goto out; + } + + ret = 0; +out: + virNodeDeviceDefFree(def); + VIR_FREE(xml); + return ret; +} + +static int +xenUnifiedNodeDeviceDettach (virNodeDevicePtr dev) +{ + pciDevice *pci; + unsigned domain, bus, slot, function; + int ret = -1; + + if (xenUnifiedNodeDeviceGetPciInfo(dev, &domain, &bus, &slot, &function) < 0) + return -1; + + pci = pciGetDevice(dev->conn, domain, bus, slot, function); + if (!pci) + return -1; + + if (pciDettachDevice(dev->conn, pci) < 0) + goto out; + + ret = 0; +out: + pciFreeDevice(dev->conn, pci); + return ret; +} + +static int +xenUnifiedNodeDeviceReAttach (virNodeDevicePtr dev) +{ + pciDevice *pci; + unsigned domain, bus, slot, function; + int ret = -1; + + if (xenUnifiedNodeDeviceGetPciInfo(dev, &domain, &bus, &slot, &function) < 0) + return -1; + + pci = pciGetDevice(dev->conn, domain, bus, slot, function); + if (!pci) + return -1; + + if (pciReAttachDevice(dev->conn, pci) < 0) + goto out; + + ret = 0; +out: + pciFreeDevice(dev->conn, pci); + return ret; +} + +static int +xenUnifiedNodeDeviceReset (virNodeDevicePtr dev) +{ + pciDevice *pci; + unsigned domain, bus, slot, function; + int ret = -1; + + if (xenUnifiedNodeDeviceGetPciInfo(dev, &domain, &bus, &slot, &function) < 0) + return -1; + + pci = pciGetDevice(dev->conn, domain, bus, slot, function); + if (!pci) + return -1; + + if (pciResetDevice(dev->conn, pci) < 0) + goto out; + + ret = 0; +out: + pciFreeDevice(dev->conn, pci); + return ret; +} + + /*----- Register with libvirt.c, and initialise Xen drivers. -----*/ /* The interface which we export upwards to libvirt.c. */ @@ -1481,6 +1600,9 @@ static virDriver xenUnifiedDriver = { .getFreeMemory = xenUnifiedNodeGetFreeMemory, .domainEventRegister = xenUnifiedDomainEventRegister, .domainEventDeregister = xenUnifiedDomainEventDeregister, + .nodeDeviceDettach = xenUnifiedNodeDeviceDettach, + .nodeDeviceReAttach = xenUnifiedNodeDeviceReAttach, + .nodeDeviceReset = xenUnifiedNodeDeviceReset, }; /** Index: src/xend_internal.c =================================================================== RCS file: /data/cvs/libvirt/src/xend_internal.c,v retrieving revision 1.253 diff -u -p -r1.253 xend_internal.c --- src/xend_internal.c 10 Mar 2009 11:13:32 -0000 1.253 +++ src/xend_internal.c 30 Mar 2009 13:09:45 -0000 @@ -92,6 +92,11 @@ xenDaemonFormatSxprNet(virConnectPtr con int xendConfigVersion, int isAttach); static int +xenDaemonFormatSxprOnePCI(virConnectPtr conn, + virDomainHostdevDefPtr def, + virBufferPtr buf); + +static int virDomainXMLDevID(virDomainPtr domain, virDomainDeviceDefPtr dev, char *class, @@ -2145,6 +2150,131 @@ error: return -1; } +/** + * xenDaemonParseSxprPCI + * @conn: connection + * @root: root sexpr + * + * This parses out block devices from the domain sexpr + * + * Returns 0 if successful or -1 if failed. + */ +static int +xenDaemonParseSxprPCI(virConnectPtr conn, + virDomainDefPtr def, + const struct sexpr *root) +{ + const struct sexpr *cur, *tmp = NULL, *node; + virDomainHostdevDefPtr dev = NULL; + + /* + * With the (domain ...) block we have the following odd setup + * + * (device + * (pci + * (dev (domain 0x0000) (bus 0x00) (slot 0x1b) (func 0x0)) + * (dev (domain 0x0000) (bus 0x00) (slot 0x13) (func 0x0)) + * ) + * ) + * + * Normally there is one (device ...) block per device, but in + * wierd world of Xen PCI, once (device ...) covers multiple + * devices. + */ + + for (cur = root; cur->kind == SEXPR_CONS; cur = cur->u.s.cdr) { + node = cur->u.s.car; + if ((tmp = sexpr_lookup(node, "device/pci")) != NULL) + break; + } + + if (!tmp) + return 0; + + for (cur = tmp; cur->kind == SEXPR_CONS; cur = cur->u.s.cdr) { + const char *domain = NULL; + const char *bus = NULL; + const char *slot = NULL; + const char *func = NULL; + int domainID; + int busID; + int slotID; + int funcID; + + node = cur->u.s.car; + if (!sexpr_lookup(node, "dev")) + continue; + + if (!(domain = sexpr_node(node, "dev/domain"))) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + "%s", _("missing PCI domain")); + goto error; + } + if (!(bus = sexpr_node(node, "dev/bus"))) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + "%s", _("missing PCI bus")); + goto error; + } + if (!(slot = sexpr_node(node, "dev/slot"))) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + "%s", _("missing PCI slot")); + goto error; + } + if (!(func = sexpr_node(node, "dev/func"))) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + "%s", _("missing PCI func")); + goto error; + } + + if (virStrToLong_i(domain, NULL, 0, &domainID) < 0) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + _("cannot parse PCI domain '%s'"), domain); + goto error; + } + if (virStrToLong_i(bus, NULL, 0, &busID) < 0) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + _("cannot parse PCI bus '%s'"), bus); + goto error; + } + if (virStrToLong_i(slot, NULL, 0, &slotID) < 0) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + _("cannot parse PCI slot '%s'"), slot); + goto error; + } + if (virStrToLong_i(func, NULL, 0, &funcID) < 0) { + virXendError(conn, VIR_ERR_INTERNAL_ERROR, + _("cannot parse PCI func '%s'"), func); + goto error; + } + + if (VIR_ALLOC(dev) < 0) + goto no_memory; + + dev->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; + dev->managed = 0; + dev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; + dev->source.subsys.u.pci.domain = domainID; + dev->source.subsys.u.pci.bus = busID; + dev->source.subsys.u.pci.slot = slotID; + dev->source.subsys.u.pci.function = funcID; + + if (VIR_REALLOC_N(def->hostdevs, def->nhostdevs+1) < 0) { + goto no_memory; + } + + def->hostdevs[def->nhostdevs++] = dev; + } + + return 0; + +no_memory: + virReportOOMError(conn); + +error: + virDomainHostdevDefFree(dev); + return -1; +} + /** * xenDaemonParseSxpr: @@ -2310,6 +2440,9 @@ xenDaemonParseSxpr(virConnectPtr conn, if (xenDaemonParseSxprNets(conn, def, root) < 0) goto error; + if (xenDaemonParseSxprPCI(conn, def, root) < 0) + goto error; + /* New style graphics device config */ if (xenDaemonParseSxprGraphicsNew(conn, def, root) < 0) goto error; @@ -3953,6 +4086,20 @@ xenDaemonAttachDevice(virDomainPtr domai goto cleanup; break; + case VIR_DOMAIN_DEVICE_HOSTDEV: + if (dev->data.hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + dev->data.hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + if (xenDaemonFormatSxprOnePCI(domain->conn, + dev->data.hostdev, + &buf) < 0) + goto cleanup; + } else { + virXendError(domain->conn, VIR_ERR_NO_SUPPORT, "%s", + _("unsupported device type")); + goto cleanup; + } + break; + default: virXendError(domain->conn, VIR_ERR_NO_SUPPORT, "%s", _("unsupported device type")); @@ -5263,6 +5410,85 @@ xenDaemonFormatSxprNet(virConnectPtr con return 0; } + +static void +xenDaemonFormatSxprPCI(virDomainHostdevDefPtr def, + virBufferPtr buf) +{ + virBufferVSprintf(buf, "(dev (domain 0x%04x)(bus 0x%02x)(slot 0x%02x)(func 0x%x))", + def->source.subsys.u.pci.domain, + def->source.subsys.u.pci.bus, + def->source.subsys.u.pci.slot, + def->source.subsys.u.pci.function); +} + +static int +xenDaemonFormatSxprOnePCI(virConnectPtr conn, + virDomainHostdevDefPtr def, + virBufferPtr buf) +{ + if (def->managed) { + virXendError(conn, VIR_ERR_NO_SUPPORT, "%s", + _("managed PCI devices not supported with XenD")); + return -1; + } + + virBufferAddLit(buf, "(pci "); + xenDaemonFormatSxprPCI(def, buf); + virBufferAddLit(buf, ")"); + + return 0; +} + +static int +xenDaemonFormatSxprAllPCI(virConnectPtr conn, + virDomainDefPtr def, + virBufferPtr buf) +{ + int hasPCI = 0; + int i; + + for (i = 0 ; i < def->nhostdevs ; i++) + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + def->hostdevs[i]->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + hasPCI = 1; + + if (!hasPCI) + return 0; + + /* + * With the (domain ...) block we have the following odd setup + * + * (device + * (pci + * (dev (domain 0x0000) (bus 0x00) (slot 0x1b) (func 0x0)) + * (dev (domain 0x0000) (bus 0x00) (slot 0x13) (func 0x0)) + * ) + * ) + * + * Normally there is one (device ...) block per device, but in + * wierd world of Xen PCI, once (device ...) covers multiple + * devices. + */ + + virBufferAddLit(buf, "(device (pci "); + for (i = 0 ; i < def->nhostdevs ; i++) { + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + def->hostdevs[i]->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + if (def->hostdevs[i]->managed) { + virXendError(conn, VIR_ERR_NO_SUPPORT, "%s", + _("managed PCI devices not supported with XenD")); + return -1; + } + + xenDaemonFormatSxprPCI(def->hostdevs[i], buf); + } + } + virBufferAddLit(buf, "))"); + + return 0; +} + int xenDaemonFormatSxprSound(virConnectPtr conn, virDomainDefPtr def, @@ -5529,6 +5755,9 @@ xenDaemonFormatSxpr(virConnectPtr conn, &buf, hvm, xendConfigVersion, 0) < 0) goto error; + if (xenDaemonFormatSxprAllPCI(conn, def, &buf) < 0) + goto error; + /* New style PV graphics config xen >= 3.0.4, * or HVM graphics config xen >= 3.0.5 */ if ((xendConfigVersion >= XEND_CONFIG_MIN_VERS_PVFB_NEWCONF && !hvm) || @@ -5611,6 +5840,9 @@ virDomainXMLDevID(virDomainPtr domain, strncpy(ref, xref, ref_len); free(xref); ref[ref_len - 1] = '\0'; + } else if (dev->type == VIR_DOMAIN_DEVICE_HOSTDEV && + dev->data.hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + dev->data.hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { } else { virXendError(NULL, VIR_ERR_NO_SUPPORT, "%s", _("hotplug of device type not supported")); Index: src/xm_internal.c =================================================================== RCS file: /data/cvs/libvirt/src/xm_internal.c,v retrieving revision 1.118 diff -u -p -r1.118 xm_internal.c --- src/xm_internal.c 24 Mar 2009 11:16:29 -0000 1.118 +++ src/xm_internal.c 30 Mar 2009 13:09:45 -0000 @@ -673,6 +673,7 @@ xenXMDomainConfigParse(virConnectPtr con virDomainDiskDefPtr disk = NULL; virDomainNetDefPtr net = NULL; virDomainGraphicsDefPtr graphics = NULL; + virDomainHostdevDefPtr hostdev = NULL; int i; const char *defaultArch, *defaultMachine; @@ -1115,6 +1116,88 @@ xenXMDomainConfigParse(virConnectPtr con } } + list = virConfGetValue(conf, "pci"); + if (list && list->type == VIR_CONF_LIST) { + list = list->list; + while (list) { + char domain[5]; + char bus[3]; + char slot[3]; + char func[2]; + char *key, *nextkey; + int domainID; + int busID; + int slotID; + int funcID; + + domain[0] = bus[0] = slot[0] = func[0] = '\0'; + + if ((list->type != VIR_CONF_STRING) || (list->str == NULL)) + goto skippci; + + /* pci=['0000:00:1b.0','0000:00:13.0'] */ + key = list->str; + if (!(key = list->str)) + goto skippci; + if (!(nextkey = strchr(key, ':'))) + goto skippci; + + if ((nextkey - key) > (sizeof(domain)-1)) + goto skippci; + + strncpy(domain, key, sizeof(domain)); + domain[sizeof(domain)-1] = '\0'; + + key = nextkey + 1; + if (!(nextkey = strchr(key, ':'))) + goto skippci; + + strncpy(bus, key, sizeof(bus)); + bus[sizeof(bus)-1] = '\0'; + + key = nextkey + 1; + if (!(nextkey = strchr(key, '.'))) + goto skippci; + + strncpy(slot, key, sizeof(slot)); + slot[sizeof(slot)-1] = '\0'; + + key = nextkey + 1; + if (strlen(key) != 1) + goto skippci; + + strncpy(func, key, sizeof(func)); + func[sizeof(func)-1] = '\0'; + + if (virStrToLong_i(domain, NULL, 16, &domainID) < 0) + goto skippci; + if (virStrToLong_i(bus, NULL, 16, &busID) < 0) + goto skippci; + if (virStrToLong_i(slot, NULL, 16, &slotID) < 0) + goto skippci; + if (virStrToLong_i(func, NULL, 16, &funcID) < 0) + goto skippci; + + if (VIR_ALLOC(hostdev) < 0) + goto cleanup; + + hostdev->managed = 0; + hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; + hostdev->source.subsys.u.pci.domain = domainID; + hostdev->source.subsys.u.pci.bus = busID; + hostdev->source.subsys.u.pci.slot = slotID; + hostdev->source.subsys.u.pci.function = funcID; + + if (VIR_REALLOC_N(def->hostdevs, def->nhostdevs+1) < 0) + goto no_memory; + def->hostdevs[def->nhostdevs++] = hostdev; + hostdev = NULL; + + skippci: + list = list->next; + } + } + if (hvm) { if (xenXMConfigGetString(conn, conf, "usbdevice", &str, NULL) < 0) goto cleanup; @@ -1939,6 +2022,76 @@ cleanup: +static int +xenXMDomainConfigFormatPCI(virConnectPtr conn, + virConfPtr conf, + virDomainDefPtr def) +{ + + virConfValuePtr pciVal = NULL; + int hasPCI = 0; + int i; + + for (i = 0 ; i < def->nhostdevs ; i++) + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + def->hostdevs[i]->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + hasPCI = 1; + + if (!hasPCI) + return 0; + + if (VIR_ALLOC(pciVal) < 0) + return -1; + + pciVal->type = VIR_CONF_LIST; + pciVal->list = NULL; + + for (i = 0 ; i < def->nhostdevs ; i++) { + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + def->hostdevs[i]->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + virConfValuePtr val, tmp; + char *buf; + + if (virAsprintf(&buf, "%04x:%02x:%02x.%x", + def->hostdevs[i]->source.subsys.u.pci.domain, + def->hostdevs[i]->source.subsys.u.pci.bus, + def->hostdevs[i]->source.subsys.u.pci.slot, + def->hostdevs[i]->source.subsys.u.pci.function) < 0) + goto error; + + if (VIR_ALLOC(val) < 0) { + VIR_FREE(buf); + virReportOOMError(conn); + goto error; + } + val->type = VIR_CONF_STRING; + val->str = buf; + tmp = pciVal->list; + while (tmp && tmp->next) + tmp = tmp->next; + if (tmp) + tmp->next = val; + else + pciVal->list = val; + } + } + + if (pciVal->list != NULL) { + int ret = virConfSetValue(conf, "pci", pciVal); + pciVal = NULL; + if (ret < 0) + return -1; + } + VIR_FREE(pciVal); + + return 0; + +error: + virConfFreeValue(pciVal); + return -1; +} + + virConfPtr xenXMDomainConfigFormat(virConnectPtr conn, virDomainDefPtr def) { virConfPtr conf = NULL; @@ -2260,6 +2413,9 @@ virConfPtr xenXMDomainConfigFormat(virCo } VIR_FREE(netVal); + if (xenXMDomainConfigFormatPCI(conn, conf, def) < 0) + goto cleanup; + if (hvm) { if (def->nparallels) { virBuffer buf = VIR_BUFFER_INITIALIZER; Index: tests/sexpr2xmltest.c =================================================================== RCS file: /data/cvs/libvirt/tests/sexpr2xmltest.c,v retrieving revision 1.36 diff -u -p -r1.36 sexpr2xmltest.c --- tests/sexpr2xmltest.c 29 Jan 2009 17:02:00 -0000 1.36 +++ tests/sexpr2xmltest.c 30 Mar 2009 13:09:45 -0000 @@ -142,6 +142,7 @@ mymain(int argc, char **argv) DO_TEST("net-e1000", "net-e1000", 2); DO_TEST("bridge-ipaddr", "bridge-ipaddr", 3); DO_TEST("no-source-cdrom", "no-source-cdrom", 2); + DO_TEST("pci-devs", "pci-devs", 2); DO_TEST("fv-utc", "fv-utc", 1); DO_TEST("fv-localtime", "fv-localtime", 1); Index: tests/xmconfigtest.c =================================================================== RCS file: /data/cvs/libvirt/tests/xmconfigtest.c,v retrieving revision 1.26 diff -u -p -r1.26 xmconfigtest.c --- tests/xmconfigtest.c 8 Jan 2009 19:52:15 -0000 1.26 +++ tests/xmconfigtest.c 30 Mar 2009 13:09:45 -0000 @@ -231,6 +231,7 @@ mymain(int argc, char **argv) DO_TEST("escape-paths", 2); DO_TEST("no-source-cdrom", 2); + DO_TEST("pci-devs", 2); virCapabilitiesFree(caps); Index: tests/xml2sexprtest.c =================================================================== RCS file: /data/cvs/libvirt/tests/xml2sexprtest.c,v retrieving revision 1.34 diff -u -p -r1.34 xml2sexprtest.c --- tests/xml2sexprtest.c 23 Jan 2009 01:48:47 -0000 1.34 +++ tests/xml2sexprtest.c 30 Mar 2009 13:09:45 -0000 @@ -128,6 +128,7 @@ mymain(int argc, char **argv) DO_TEST("net-e1000", "net-e1000", "pvtest", 2); DO_TEST("bridge-ipaddr", "bridge-ipaddr", "pvtest", 2); DO_TEST("no-source-cdrom", "no-source-cdrom", "test", 2); + DO_TEST("pci-devs", "pci-devs", "pvtest", 2); DO_TEST("fv-utc", "fv-utc", "fvtest", 1); DO_TEST("fv-localtime", "fv-localtime", "fvtest", 1); Index: tests/sexpr2xmldata/sexpr2xml-pci-devs.sexpr =================================================================== RCS file: tests/sexpr2xmldata/sexpr2xml-pci-devs.sexpr diff -N tests/sexpr2xmldata/sexpr2xml-pci-devs.sexpr --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/sexpr2xmldata/sexpr2xml-pci-devs.sexpr 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1,2 @@ +(domain (domid 6)(name 'pvtest')(memory 420)(maxmem 420)(vcpus 2)(uuid '596a5d2171f48fb2e068e2386a5c413e')(on_poweroff 'destroy')(on_reboot 'destroy')(on_crash 'destroy')(image (linux (kernel '/var/lib/xen/vmlinuz.2Dn2YT')(ramdisk '/var/lib/xen/initrd.img.0u-Vhq')(args ' method=http://download.fedora.devel.redhat.com/pub/fedora/linux/core/test/5.91/x86_... ')))(device (pci (backend 0)(dev (domain 0x0001) (bus 0x0c) (slot 0x1b) (func 0x2))(dev (domain 0x0000) (bus 0x01) (slot 0x13) (func 0x0))))(device (vbd (dev 'xvda')(uname 'phy:/dev/MainVG/GuestVG')(mode 'w')))) + Index: tests/sexpr2xmldata/sexpr2xml-pci-devs.xml =================================================================== RCS file: tests/sexpr2xmldata/sexpr2xml-pci-devs.xml diff -N tests/sexpr2xmldata/sexpr2xml-pci-devs.xml --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/sexpr2xmldata/sexpr2xml-pci-devs.xml 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1,37 @@ +<domain type='xen' id='6'> + <name>pvtest</name> + <uuid>596a5d21-71f4-8fb2-e068-e2386a5c413e</uuid> + <memory>430080</memory> + <currentMemory>430080</currentMemory> + <vcpu>2</vcpu> + <os> + <type>linux</type> + <kernel>/var/lib/xen/vmlinuz.2Dn2YT</kernel> + <initrd>/var/lib/xen/initrd.img.0u-Vhq</initrd> + <cmdline> method=http://download.fedora.devel.redhat.com/pub/fedora/linux/core/test/5.91/x86_... </cmdline> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>destroy</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <disk type='block' device='disk'> + <driver name='phy'/> + <source dev='/dev/MainVG/GuestVG'/> + <target dev='xvda' bus='xen'/> + </disk> + <console type='pty'> + <target port='0'/> + </console> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0001' bus='0x0c' slot='0x1b' function='0x2'/> + </source> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0000' bus='0x01' slot='0x13' function='0x0'/> + </source> + </hostdev> + </devices> +</domain> Index: tests/xmconfigdata/test-pci-devs.cfg =================================================================== RCS file: tests/xmconfigdata/test-pci-devs.cfg diff -N tests/xmconfigdata/test-pci-devs.cfg --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/xmconfigdata/test-pci-devs.cfg 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1,24 @@ +name = "test" +uuid = "cc2315e7-d26a-307a-438c-6d188ec4c09c" +maxmem = 382 +memory = 350 +vcpus = 1 +builder = "hvm" +kernel = "/usr/lib/xen/boot/hvmloader" +boot = "c" +pae = 1 +acpi = 1 +apic = 1 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "destroy" +on_crash = "destroy" +device_model = "/usr/lib/xen/bin/qemu-dm" +sdl = 0 +vnc = 1 +vncunused = 1 +disk = [ "phy:/dev/sda8,hda,w", ",hdc:cdrom,r" ] +vif = [ "mac=00:16:3e:0a:7b:39,bridge=xenbr0,type=ioemu" ] +pci = [ "0001:0c:1b.2", "0000:01:13.0" ] +parallel = "none" +serial = "pty" Index: tests/xmconfigdata/test-pci-devs.xml =================================================================== RCS file: tests/xmconfigdata/test-pci-devs.xml diff -N tests/xmconfigdata/test-pci-devs.xml --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/xmconfigdata/test-pci-devs.xml 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1,56 @@ +<domain type='xen'> + <name>test</name> + <uuid>cc2315e7-d26a-307a-438c-6d188ec4c09c</uuid> + <memory>391168</memory> + <currentMemory>358400</currentMemory> + <vcpu>1</vcpu> + <os> + <type arch='i686' machine='xenfv'>hvm</type> + <loader>/usr/lib/xen/boot/hvmloader</loader> + <boot dev='hd'/> + </os> + <features> + <acpi/> + <apic/> + <pae/> + </features> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>destroy</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/lib/xen/bin/qemu-dm</emulator> + <disk type='block' device='disk'> + <driver name='phy'/> + <source dev='/dev/sda8'/> + <target dev='hda' bus='ide'/> + </disk> + <disk type='block' device='cdrom'> + <driver name='phy'/> + <target dev='hdc' bus='ide'/> + <readonly/> + </disk> + <interface type='bridge'> + <mac address='00:16:3e:0a:7b:39'/> + <source bridge='xenbr0'/> + </interface> + <serial type='pty'> + <target port='0'/> + </serial> + <console type='pty'> + <target port='0'/> + </console> + <input type='mouse' bus='ps2'/> + <graphics type='vnc' port='-1' autoport='yes'/> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0001' bus='0x0c' slot='0x1b' function='0x2'/> + </source> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0000' bus='0x01' slot='0x13' function='0x0'/> + </source> + </hostdev> + </devices> +</domain> Index: tests/xml2sexprdata/xml2sexpr-pci-devs.sexpr =================================================================== RCS file: tests/xml2sexprdata/xml2sexpr-pci-devs.sexpr diff -N tests/xml2sexprdata/xml2sexpr-pci-devs.sexpr --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/xml2sexprdata/xml2sexpr-pci-devs.sexpr 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1 @@ +(vm (name 'pvtest')(memory 420)(maxmem 420)(vcpus 2)(uuid '596a5d21-71f4-8fb2-e068-e2386a5c413e')(on_poweroff 'destroy')(on_reboot 'destroy')(on_crash 'destroy')(image (linux (kernel '/var/lib/xen/vmlinuz.2Dn2YT')(ramdisk '/var/lib/xen/initrd.img.0u-Vhq')(args ' method=http://download.fedora.devel.redhat.com/pub/fedora/linux/core/test/5.91/x86_... ')))(device (vbd (dev 'xvda')(uname 'phy:/dev/MainVG/GuestLV')(mode 'w')))(device (pci (dev (domain 0x0001)(bus 0x0c)(slot 0x1b)(func 0x2))(dev (domain 0x0000)(bus 0x01)(slot 0x13)(func 0x0))))) \ No newline at end of file Index: tests/xml2sexprdata/xml2sexpr-pci-devs.xml =================================================================== RCS file: tests/xml2sexprdata/xml2sexpr-pci-devs.xml diff -N tests/xml2sexprdata/xml2sexpr-pci-devs.xml --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tests/xml2sexprdata/xml2sexpr-pci-devs.xml 30 Mar 2009 13:09:45 -0000 @@ -0,0 +1,33 @@ +<domain type='xen' id='15'> + <name>pvtest</name> + <uuid>596a5d2171f48fb2e068e2386a5c413e</uuid> + <os> + <type>linux</type> + <kernel>/var/lib/xen/vmlinuz.2Dn2YT</kernel> + <initrd>/var/lib/xen/initrd.img.0u-Vhq</initrd> + <cmdline> method=http://download.fedora.devel.redhat.com/pub/fedora/linux/core/test/5.91/x86_... </cmdline> + </os> + <memory>430080</memory> + <vcpu>2</vcpu> + <on_poweroff>destroy</on_poweroff> + <on_reboot>destroy</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <disk type='block' device='disk'> + <source dev='/dev/MainVG/GuestLV'/> + <target dev='xvda'/> + </disk> + <console tty='/dev/pts/4'/> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0001' bus='0x0c' slot='0x1b' function='0x2'/> + </source> + </hostdev> + <hostdev mode='subsystem' type='pci' managed='no'> + <source> + <address domain='0x0000' bus='0x01' slot='0x13' function='0x0'/> + </source> + </hostdev> + </devices> +</domain> + -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Hi,
+static const char * +pciFindStubDriver(virConnectPtr conn) +{ + char drvpath[PATH_MAX]; + int probed = 0; + +recheck: + pciDriverDir(drvpath, sizeof(drvpath), "pci-stub"); + if (virFileExists(drvpath)) + return "pci-stub"; + pciDriverDir(drvpath, sizeof(drvpath), "pciback"); + if (virFileExists(drvpath)) + return "pciback"; + + if (!probed) { + const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL }; + const char *const backprobe[] = { MODPROBE, "pciback", NULL };on xen.
+ + probed = 1; + if (virRun(conn, stubprobe, NULL) < 0 && + virRun(conn, backprobe, NULL) < 0) { char ebuf[1024]; - VIR_WARN(_("modprobe %s failed: %s"), stub_module, + VIR_WARN(_("failed to load pci-stub or pciback drivers: %s"), virStrerror(errno, ebuf, sizeof ebuf)); + return 0; } + + goto recheck; }
I'd tend to reverse that ordering. It isn't a issue today, but once pv_ops/dom0 gets pciback support you might actually have both modules present for a given kernel. pci-stub doesn't do anything special and will load just fine in any environment. pciback is xen-specific (especially the part which handles pci passthrough to pv domains) and should not load when not running on xen. cheers, Gerd

On Tue, Mar 31, 2009 at 10:10:14AM +0200, Gerd Hoffmann wrote:
Hi,
+static const char * +pciFindStubDriver(virConnectPtr conn) +{ + char drvpath[PATH_MAX]; + int probed = 0; + +recheck: + pciDriverDir(drvpath, sizeof(drvpath), "pci-stub"); + if (virFileExists(drvpath)) + return "pci-stub"; + pciDriverDir(drvpath, sizeof(drvpath), "pciback"); + if (virFileExists(drvpath)) + return "pciback"; + + if (!probed) { + const char *const stubprobe[] = { MODPROBE, "pci-stub", NULL }; + const char *const backprobe[] = { MODPROBE, "pciback", NULL };on xen.
+ + probed = 1; + if (virRun(conn, stubprobe, NULL) < 0 && + virRun(conn, backprobe, NULL) < 0) { char ebuf[1024]; - VIR_WARN(_("modprobe %s failed: %s"), stub_module, + VIR_WARN(_("failed to load pci-stub or pciback drivers: %s"), virStrerror(errno, ebuf, sizeof ebuf)); + return 0; } + + goto recheck; }
I'd tend to reverse that ordering. It isn't a issue today, but once pv_ops/dom0 gets pciback support you might actually have both modules present for a given kernel.
pci-stub doesn't do anything special and will load just fine in any environment. pciback is xen-specific (especially the part which handles pci passthrough to pv domains) and should not load when not running on xen.
Ahh, I was assuming that pciback would just go the way of the dodo when dom0 pvops merged. I had forgotten it would still be needed for pure PV pci passthrough without VT-d. So reversing it seems reasonable Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On Mon, Mar 30, 2009 at 03:00:55PM +0100, Daniel P. Berrange wrote:
This patch implements PCI device passthrough for Xen.
In pci.c there is a fair bit of refactoring to better support both pcistub.ko and pciback.ko. The latter has two extra sysfs files, add_slot and remove_slot for associating the driver with a specific PCI device domain:bus:slot:func address, whereas pcistub can only associate based on vendor:product ID
The Xen driver gets an implementation of the dettach/reattach APIs for node devices.
The XML <-> SEXPR conversion gets adapted to cope with PCI devices
The XM <-> XML conversion gets adapted to load/save PCI device info in /etc/xen config files
Okay, I also wondered about the 2 modprobe commands test, their order and what the Run() command return value < 0 really meant (man modprobe doesn't document any return value at least on Fedora 9 !). So I'm a bit worried about the portability of that specific test, maybe something more appropriate than just the return value should be used to detect failure. Still this can't generate regression since it's a new feature, so fine by me, ACK , but I'm still wondering a bit :-) Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/
participants (3)
-
Daniel P. Berrange
-
Daniel Veillard
-
Gerd Hoffmann