[libvirt] [PATCHv2 0/3] Change preference of default PCI passthrough type to VFIO

Peter Krempa (3): qemu: hostdev: Refactor PCI passhrough handling qemu: hostdev: Add checks if PCI passthrough is availabe in the host qemu: Prefer VFIO for PCI device passthrough docs/formatdomain.html.in | 9 ++- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 28 ++++++--- src/qemu/qemu_hostdev.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_hostdev.h | 5 ++ src/qemu/qemu_hotplug.c | 29 ++++++---- src/qemu/qemu_process.c | 6 ++ tests/qemuxml2argvtest.c | 11 ++++ 8 files changed, 208 insertions(+), 25 deletions(-) -- 1.8.3.2

To simplify future patches dealing with this code, simplify and refactor some conditions to switch statements. --- Notes: Version 2: - No change, wasn't reviewed earlier src/qemu/qemu_command.c | 27 ++++++++++++++++++--------- src/qemu/qemu_hotplug.c | 27 ++++++++++++++++----------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index e6239c9..06c0fcb 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5390,14 +5390,25 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, { virBuffer buf = VIR_BUFFER_INITIALIZER; - if (dev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - virBufferAddLit(&buf, "vfio-pci"); - } else { + switch ((virDomainHostdevSubsysPciBackendType) + dev->source.subsys.u.pci.backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + virBufferAddLit(&buf, "vfio-pci"); + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("unknown PCI passhthrough type")); + break; } + virBufferAsprintf(&buf, ",host=%.2x:%.2x.%.1x", dev->source.subsys.u.pci.addr.bus, dev->source.subsys.u.pci.addr.slot, @@ -9132,7 +9143,6 @@ qemuBuildCommandLine(virConnectPtr conn, VIR_FREE(devstr); } - /* Add host passthrough hardware */ for (i = 0; i < def->nhostdevs; i++) { virDomainHostdevDefPtr hostdev = def->hostdevs[i]; @@ -9205,9 +9215,9 @@ qemuBuildCommandLine(virConnectPtr conn, /* PCI */ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int backend = hostdev->source.subsys.u.pci.backend; - if (hostdev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + if (backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("VFIO PCI device assignment is not " @@ -9221,8 +9231,7 @@ qemuBuildCommandLine(virConnectPtr conn, if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE)) { char *configfd_name = NULL; - if ((hostdev->source.subsys.u.pci.backend - != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && + if ((backend != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && virQEMUCapsGet(qemuCaps, QEMU_CAPS_PCI_CONFIGFD)) { int configfd = qemuOpenPCIConfig(hostdev); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 6cdee44..728c734 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1134,6 +1134,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, int configfd = -1; char *configfd_name = NULL; bool releaseaddr = false; + int *backend = &hostdev->source.subsys.u.pci.backend; if (VIR_REALLOC_N(vm->def->hostdevs, vm->def->nhostdevs + 1) < 0) return -1; @@ -1142,10 +1143,8 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, &hostdev, 1) < 0) return -1; - if (hostdev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - unsigned long long memKB; - + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("VFIO PCI device assignment is not " @@ -1157,11 +1156,18 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, * In this case, the guest's memory may already be locked, but it * doesn't hurt to "change" the limit to the same value. */ - vm->def->hostdevs[vm->def->nhostdevs++] = hostdev; - memKB = vm->def->mem.hard_limit ? - vm->def->mem.hard_limit : vm->def->mem.max_balloon + 1024 * 1024; - virProcessSetMaxMemLock(vm->pid, memKB); - vm->def->hostdevs[vm->def->nhostdevs--] = NULL; + if (vm->def->mem.hard_limit) + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); + else + virProcessSetMaxMemLock(vm->pid, + vm->def->mem.max_balloon + (1024 * 1024)); + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; } if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { @@ -1170,8 +1176,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, hostdev->info) < 0) goto error; releaseaddr = true; - if ((hostdev->source.subsys.u.pci.backend - != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && + if ((*backend != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCI_CONFIGFD)) { configfd = qemuOpenPCIConfig(hostdev); if (configfd >= 0) { -- 1.8.3.2

Add code to check availability of PCI passhthrough using VFIO and the legacy KVM passthrough and use it when starting VMs and hotplugging devices to live machine. --- Notes: Version 2: - Merge of multiple patches from v1 posting - changed +qemuHostdevHostSupportsPassthrough*() to static - added function splitting out common parts of the code between machine start and hotplug src/qemu/qemu_hostdev.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_hostdev.h | 4 ++ src/qemu/qemu_hotplug.c | 4 ++ src/qemu/qemu_process.c | 4 ++ 4 files changed, 138 insertions(+) diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c index 21fe47f..dbbc2b4 100644 --- a/src/qemu/qemu_hostdev.c +++ b/src/qemu/qemu_hostdev.c @@ -23,6 +23,11 @@ #include <config.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> + #include "qemu_hostdev.h" #include "virlog.h" #include "virerror.h" @@ -31,6 +36,7 @@ #include "virusb.h" #include "virscsi.h" #include "virnetdev.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -1287,3 +1293,123 @@ void qemuDomainReAttachHostDevices(virQEMUDriverPtr driver, qemuDomainReAttachHostScsiDevices(driver, def->name, def->hostdevs, def->nhostdevs); } + + +static bool +qemuHostdevHostSupportsPassthroughVFIO(void) +{ + DIR *iommuDir = NULL; + struct dirent *iommuGroup = NULL; + bool ret = false; + + /* condition 1 - /sys/kernel/iommu_groups/ contains entries */ + if (!(iommuDir = opendir("/sys/kernel/iommu_groups/"))) + goto cleanup; + + while ((iommuGroup = readdir(iommuDir))) { + /* skip ./ ../ */ + if (STRPREFIX(iommuGroup->d_name, ".")) + continue; + + /* assume we found a group */ + break; + } + + if (!iommuGroup) + goto cleanup; + /* okay, iommu is on and recognizes groups */ + + /* condition 2 - /dev/vfio/vfio exists */ + if (!virFileExists("/dev/vfio/vfio")) + goto cleanup; + + ret = true; + +cleanup: + if (iommuDir) + closedir(iommuDir); + + return ret; +} + + +#if HAVE_LINUX_KVM_H +# include <linux/kvm.h> +static bool +qemuHostdevHostSupportsPassthroughLegacy(void) +{ + int kvmfd = -1; + bool ret = false; + + if ((kvmfd = open("/dev/kvm", O_RDONLY)) < 0) + goto cleanup; + +# ifdef KVM_CAP_IOMMU + if ((ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_IOMMU)) <= 0) + goto cleanup; + + ret = true; +# endif + +cleanup: + VIR_FORCE_CLOSE(kvmfd); + + return ret; +} +#else +static bool +qemuHostdevHostSupportsPassthroughLegacy(void) +{ + return false; +} +#endif + +bool +qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, + size_t nhostdevs) +{ + int supportsPassthroughKVM = -1; + int supportsPassthroughVFIO = -1; + size_t i; + + /* assign defaults for hostdev passthrough */ + for (i = 0; i < nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int *backend = &hostdev->source.subsys.u.pci.backend; + + /* cache host state of passthrough support */ + if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) { + supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy(); + supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO(); + } + + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + if (!supportsPassthroughVFIO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + return false; + } + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!supportsPassthroughKVM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + return false; + } + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; + } + } + } + + return true; +} diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h index 327d4d5..6d88830 100644 --- a/src/qemu/qemu_hostdev.h +++ b/src/qemu/qemu_hostdev.h @@ -69,4 +69,8 @@ int qemuDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev, int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev, char *stateDir); +bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, + size_t nhostdevs); + + #endif /* __QEMU_HOSTDEV_H__ */ diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 728c734..604a0ed 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1143,6 +1143,10 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, &hostdev, 1) < 0) return -1; + /* verify the availability of passthrough support */ + if (!qemuHostdevHostVerifySupport(&hostdev, 1)) + goto error; + switch ((virDomainHostdevSubsysPciBackendType) *backend) { case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index dd16f6c..baf021f 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3713,6 +3713,10 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } + /* check and assign device assignment settings */ + if (!qemuHostdevHostVerifySupport(vm->def->hostdevs, vm->def->nhostdevs)) + goto cleanup; + /* * Normally PCI addresses are assigned in the virDomainCreate * or virDomainDefine methods. We might still need to assign -- 1.8.3.2

Prefer using VFIO (if available) to the legacy KVM device passthrough. With this patch a PCI passthrough device without the driver configured will be started with VFIO if it's available on the host. If not legacy KVM passthrough is checked and error is reported if it's not available. --- Notes: Version 2: - adapted to changes from previous changes - error out if VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT reaches CMD generator and adapt tests to avoid it - change the code to use the shared function docs/formatdomain.html.in | 9 ++++----- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 5 +++-- src/qemu/qemu_hostdev.c | 21 +++++++++++++++++++-- src/qemu/qemu_hostdev.h | 3 ++- src/qemu/qemu_hotplug.c | 2 +- src/qemu/qemu_process.c | 4 +++- tests/qemuxml2argvtest.c | 11 +++++++++++ 8 files changed, 44 insertions(+), 13 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3689399..6f3f7cf 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2755,11 +2755,10 @@ backend, which is compatible with UEFI SecureBoot) or "kvm" (for the legacy device assignment handled directly by the KVM kernel module)<span class="since">Since 1.0.5 (QEMU and KVM - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" - is the default used by libvirt when not explicitly provided, - but since the two are functionally equivalent, this default - could be changed in the future with no impact to domains that - don't specify anything. + only, requires kernel 3.6 or newer)</span>. The default, when + the driver name is not explicitly specified, is to check wether + VFIO is available and use it if it's the case. If VFIO is not + available, the legacy "kvm" assignment is attempted. </dd> <dt><code>readonly</code></dt> <dd>Indicates that the device is readonly, only supported by SCSI host diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9414ebf..43d8746 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType { /* the backend driver used for PCI hostdev devices */ typedef enum { - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 06c0fcb..c346816 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5392,7 +5392,6 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, switch ((virDomainHostdevSubsysPciBackendType) dev->source.subsys.u.pci.backend) { - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) @@ -5403,9 +5402,11 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, virBufferAddLit(&buf, "vfio-pci"); break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("unknown PCI passhthrough type")); + _("PCI passhthrough type needs to be specified")); break; } diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c index dbbc2b4..ad408d8 100644 --- a/src/qemu/qemu_hostdev.c +++ b/src/qemu/qemu_hostdev.c @@ -1366,7 +1366,8 @@ qemuHostdevHostSupportsPassthroughLegacy(void) bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, - size_t nhostdevs) + size_t nhostdevs, + virQEMUCapsPtr qemuCaps) { int supportsPassthroughKVM = -1; int supportsPassthroughVFIO = -1; @@ -1387,6 +1388,23 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, } switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (supportsPassthroughVFIO && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + } else if (supportsPassthroughKVM && + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + return false; + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!supportsPassthroughVFIO) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -1395,7 +1413,6 @@ qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, } break; - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!supportsPassthroughKVM) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h index 6d88830..afe67a5 100644 --- a/src/qemu/qemu_hostdev.h +++ b/src/qemu/qemu_hostdev.h @@ -70,7 +70,8 @@ int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev, char *stateDir); bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs, - size_t nhostdevs); + size_t nhostdevs, + virQEMUCapsPtr qemuCaps); #endif /* __QEMU_HOSTDEV_H__ */ diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 604a0ed..a6afebe 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1144,7 +1144,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, return -1; /* verify the availability of passthrough support */ - if (!qemuHostdevHostVerifySupport(&hostdev, 1)) + if (!qemuHostdevHostVerifySupport(&hostdev, 1, priv->qemuCaps)) goto error; switch ((virDomainHostdevSubsysPciBackendType) *backend) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index baf021f..6af4223 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3714,7 +3714,9 @@ int qemuProcessStart(virConnectPtr conn, } /* check and assign device assignment settings */ - if (!qemuHostdevHostVerifySupport(vm->def->hostdevs, vm->def->nhostdevs)) + if (!qemuHostdevHostVerifySupport(vm->def->hostdevs, + vm->def->nhostdevs, + priv->qemuCaps)) goto cleanup; /* diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index ec4a6e5..690b705 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -98,6 +98,7 @@ static int testCompareXMLToArgvFiles(const char *xml, virConnectPtr conn; char *log = NULL; virCommandPtr cmd = NULL; + size_t i; if (!(conn = virGetConnect())) goto out; @@ -154,6 +155,16 @@ static int testCompareXMLToArgvFiles(const char *xml, if (qemuAssignDeviceAliases(vmdef, extraFlags) < 0) goto out; + for (i = 0; i < vmdef->nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = vmdef->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + hostdev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT) { + hostdev->source.subsys.u.pci.backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } + } + if (!(cmd = qemuBuildCommandLine(conn, &driver, vmdef, &monitor_chr, (flags & FLAG_JSON), extraFlags, migrateFrom, migrateFd, NULL, -- 1.8.3.2
participants (1)
-
Peter Krempa