[libvirt] [PATCH 0/5] Change preference of default PCI passthrough type

Change the default type to VFIO if available. Please note that the majority of the code here is only compile tested as I don't have appropriate hardware to test it. Peter Krempa (5): qemu: hostdev: Refactor PCI passhrough handling qemu: hostdev: Add checks if PCI passthrough is availabe in the host qemu: hotplug: Verify that the selected pci passhtrough option is present qemu: Check if PCI passhtrough is available in the host when starting qemu: Prefer VFIO for PCI device passthrough docs/formatdomain.html.in | 9 ++- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 27 +++++--- src/qemu/qemu_hostdev.c | 76 ++++++++++++++++++++++ src/qemu/qemu_hostdev.h | 3 + src/qemu/qemu_hotplug.c | 63 +++++++++++++++--- src/qemu/qemu_process.c | 57 ++++++++++++++++ .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 +- 10 files changed, 216 insertions(+), 29 deletions(-) -- 1.8.3.2

To simplify future patches dealing with this code, simplify and refactor some conditions to switch statements. --- src/qemu/qemu_command.c | 27 ++++++++++++++++++--------- src/qemu/qemu_hotplug.c | 27 ++++++++++++++++----------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 4628dac..1411533 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5393,14 +5393,25 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, { virBuffer buf = VIR_BUFFER_INITIALIZER; - if (dev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - virBufferAddLit(&buf, "vfio-pci"); - } else { + switch ((virDomainHostdevSubsysPciBackendType) + dev->source.subsys.u.pci.backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + virBufferAddLit(&buf, "vfio-pci"); + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("unknown PCI passhthrough type")); + break; } + virBufferAsprintf(&buf, ",host=%.2x:%.2x.%.1x", dev->source.subsys.u.pci.addr.bus, dev->source.subsys.u.pci.addr.slot, @@ -9135,7 +9146,6 @@ qemuBuildCommandLine(virConnectPtr conn, VIR_FREE(devstr); } - /* Add host passthrough hardware */ for (i = 0; i < def->nhostdevs; i++) { virDomainHostdevDefPtr hostdev = def->hostdevs[i]; @@ -9208,9 +9218,9 @@ qemuBuildCommandLine(virConnectPtr conn, /* PCI */ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int backend = hostdev->source.subsys.u.pci.backend; - if (hostdev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + if (backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("VFIO PCI device assignment is not " @@ -9224,8 +9234,7 @@ qemuBuildCommandLine(virConnectPtr conn, if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE)) { char *configfd_name = NULL; - if ((hostdev->source.subsys.u.pci.backend - != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && + if ((backend != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && virQEMUCapsGet(qemuCaps, QEMU_CAPS_PCI_CONFIGFD)) { int configfd = qemuOpenPCIConfig(hostdev); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 6cdee44..728c734 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1134,6 +1134,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, int configfd = -1; char *configfd_name = NULL; bool releaseaddr = false; + int *backend = &hostdev->source.subsys.u.pci.backend; if (VIR_REALLOC_N(vm->def->hostdevs, vm->def->nhostdevs + 1) < 0) return -1; @@ -1142,10 +1143,8 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, &hostdev, 1) < 0) return -1; - if (hostdev->source.subsys.u.pci.backend - == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - unsigned long long memKB; - + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("VFIO PCI device assignment is not " @@ -1157,11 +1156,18 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, * In this case, the guest's memory may already be locked, but it * doesn't hurt to "change" the limit to the same value. */ - vm->def->hostdevs[vm->def->nhostdevs++] = hostdev; - memKB = vm->def->mem.hard_limit ? - vm->def->mem.hard_limit : vm->def->mem.max_balloon + 1024 * 1024; - virProcessSetMaxMemLock(vm->pid, memKB); - vm->def->hostdevs[vm->def->nhostdevs--] = NULL; + if (vm->def->mem.hard_limit) + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); + else + virProcessSetMaxMemLock(vm->pid, + vm->def->mem.max_balloon + (1024 * 1024)); + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; } if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { @@ -1170,8 +1176,7 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, hostdev->info) < 0) goto error; releaseaddr = true; - if ((hostdev->source.subsys.u.pci.backend - != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && + if ((*backend != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) && virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCI_CONFIGFD)) { configfd = qemuOpenPCIConfig(hostdev); if (configfd >= 0) { -- 1.8.3.2

Add code that will be used to check availability of PCI passhthrough using VFIO and the legacy KVM passthrough. These will be later used to determine the preferred passthrough option. --- src/qemu/qemu_hostdev.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_hostdev.h | 3 ++ 2 files changed, 79 insertions(+) diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c index 21fe47f..1967a47 100644 --- a/src/qemu/qemu_hostdev.c +++ b/src/qemu/qemu_hostdev.c @@ -23,6 +23,11 @@ #include <config.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> + #include "qemu_hostdev.h" #include "virlog.h" #include "virerror.h" @@ -31,6 +36,7 @@ #include "virusb.h" #include "virscsi.h" #include "virnetdev.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -1287,3 +1293,73 @@ void qemuDomainReAttachHostDevices(virQEMUDriverPtr driver, qemuDomainReAttachHostScsiDevices(driver, def->name, def->hostdevs, def->nhostdevs); } + + +bool +qemuHostdevHostSupportsPassthroughVFIO(void) +{ + DIR *iommuDir = NULL; + struct dirent *iommuGroup = NULL; + bool ret = false; + + /* condition 1 - /sys/kernel/iommu_groups/ contains entries */ + if (!(iommuDir = opendir("/sys/kernel/iommu_groups/"))) + goto cleanup; + + while ((iommuGroup = readdir(iommuDir))) { + /* skip ./ ../ */ + if (STRPREFIX(iommuGroup->d_name, ".")) + continue; + + /* assume we found a group */ + break; + } + + if (!iommuGroup) + goto cleanup; + /* okay, iommu is on and recognizes groups */ + + /* condition 2 - /dev/vfio/vfio exists */ + if (!virFileExists("/dev/vfio/vfio")) + goto cleanup; + + ret = true; + +cleanup: + if (iommuDir) + closedir(iommuDir); + + return ret; +} + + +#if HAVE_LINUX_KVM_H +# include <linux/kvm.h> +bool +qemuHostdevHostSupportsPassthroughLegacy(void) +{ + int kvmfd = -1; + bool ret = false; + + if ((kvmfd = open("/dev/kvm", O_RDONLY)) < 0) + goto cleanup; + +# ifdef KVM_CAP_IOMMU + if ((ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_IOMMU)) <= 0) + goto cleanup; + + ret = true; +# endif + +cleanup: + VIR_FORCE_CLOSE(kvmfd); + + return ret; +} +#else +bool +qemuHostdevHostSupportsPassthroughLegacy(void) +{ + return false; +} +#endif diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h index 327d4d5..7f33486 100644 --- a/src/qemu/qemu_hostdev.h +++ b/src/qemu/qemu_hostdev.h @@ -69,4 +69,7 @@ int qemuDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev, int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev, char *stateDir); +bool qemuHostdevHostSupportsPassthroughVFIO(void); +bool qemuHostdevHostSupportsPassthroughLegacy(void); + #endif /* __QEMU_HOSTDEV_H__ */ -- 1.8.3.2

Use the checking functions to verify that the host supports PCI passthrough before attempting it. --- src/qemu/qemu_hotplug.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 728c734..9320ab9 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1152,6 +1152,12 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, goto error; } + if (!qemuHostdevHostSupportsPassthroughVFIO()) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + goto error; + } + /* VFIO requires all of the guest's memory to be locked resident. * In this case, the guest's memory may already be locked, but it * doesn't hurt to "change" the limit to the same value. @@ -1166,6 +1172,14 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!qemuHostdevHostSupportsPassthroughLegacy()) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + goto error; + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: break; } -- 1.8.3.2

Check the presence of the selected PCI passthrough option when starting a VM. --- src/qemu/qemu_process.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index dd16f6c..e36ab99 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3481,6 +3481,8 @@ int qemuProcessStart(virConnectPtr conn, unsigned int stop_flags; virQEMUDriverConfigPtr cfg; virCapsPtr caps = NULL; + int supportsPassthroughKVM = -1; + int supportsPassthroughVFIO = -1; /* Okay, these are just internal flags, * but doesn't hurt to check */ @@ -3713,6 +3715,45 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } + /* assign defaults for hostdev passthrough */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int *backend = &hostdev->source.subsys.u.pci.backend; + + /* cache host state of passthrough support */ + if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) { + supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy(); + supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO(); + } + + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + if (!supportsPassthroughVFIO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + goto cleanup; + } + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!supportsPassthroughKVM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + goto cleanup; + } + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; + } + } + } + /* * Normally PCI addresses are assigned in the virDomainCreate * or virDomainDefine methods. We might still need to assign -- 1.8.3.2

On Fri, Sep 20, 2013 at 11:06:59AM +0200, Peter Krempa wrote:
Check the presence of the selected PCI passthrough option when starting a VM. --- src/qemu/qemu_process.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index dd16f6c..e36ab99 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3481,6 +3481,8 @@ int qemuProcessStart(virConnectPtr conn, unsigned int stop_flags; virQEMUDriverConfigPtr cfg; virCapsPtr caps = NULL; + int supportsPassthroughKVM = -1; + int supportsPassthroughVFIO = -1;
/* Okay, these are just internal flags, * but doesn't hurt to check */ @@ -3713,6 +3715,45 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; }
+ /* assign defaults for hostdev passthrough */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int *backend = &hostdev->source.subsys.u.pci.backend; + + /* cache host state of passthrough support */ + if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) { + supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy(); + supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO(); + } + + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + if (!supportsPassthroughVFIO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + goto cleanup; + } + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!supportsPassthroughKVM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + goto cleanup; + } + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; + } + } + }
It seems like the body of this loop ought to be shared with the hotplug code, in a helper function, so we don't have so much extra code in this start method Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/20/13 13:57, Daniel P. Berrange wrote:
On Fri, Sep 20, 2013 at 11:06:59AM +0200, Peter Krempa wrote:
Check the presence of the selected PCI passthrough option when starting a VM. --- src/qemu/qemu_process.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index dd16f6c..e36ab99 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3481,6 +3481,8 @@ int qemuProcessStart(virConnectPtr conn, unsigned int stop_flags; virQEMUDriverConfigPtr cfg; virCapsPtr caps = NULL; + int supportsPassthroughKVM = -1; + int supportsPassthroughVFIO = -1;
/* Okay, these are just internal flags, * but doesn't hurt to check */ @@ -3713,6 +3715,45 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; }
+ /* assign defaults for hostdev passthrough */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int *backend = &hostdev->source.subsys.u.pci.backend; + + /* cache host state of passthrough support */ + if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) { + supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy(); + supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO(); + } + + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + if (!supportsPassthroughVFIO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + goto cleanup; + } + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!supportsPassthroughKVM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + goto cleanup; + } + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; + } + } + }
It seems like the body of this loop ought to be shared with the hotplug code, in a helper function, so we don't have so much extra code in this start method
Yeah, the basic idea of the code is the same between those two places. The issue with the hotplug code path is that it's using only one device, whereas here we iterate through a list. I will change this to a common func where the hotplug code will pass a fake array of just the single device. Peter
Daniel

On 09/23/2013 02:39 AM, Peter Krempa wrote:
On 09/20/13 13:57, Daniel P. Berrange wrote:
On Fri, Sep 20, 2013 at 11:06:59AM +0200, Peter Krempa wrote:
Check the presence of the selected PCI passthrough option when starting a VM. --- src/qemu/qemu_process.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index dd16f6c..e36ab99 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3481,6 +3481,8 @@ int qemuProcessStart(virConnectPtr conn, unsigned int stop_flags; virQEMUDriverConfigPtr cfg; virCapsPtr caps = NULL; + int supportsPassthroughKVM = -1; + int supportsPassthroughVFIO = -1;
/* Okay, these are just internal flags, * but doesn't hurt to check */ @@ -3713,6 +3715,45 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; }
+ /* assign defaults for hostdev passthrough */ + for (i = 0; i < vm->def->nhostdevs; i++) { + virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i]; + + if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + int *backend = &hostdev->source.subsys.u.pci.backend; + + /* cache host state of passthrough support */ + if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) { + supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy(); + supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO(); + } + + switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: + if (!supportsPassthroughVFIO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support VFIO PCI passthrough")); + goto cleanup; + } + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: + if (!supportsPassthroughKVM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support legacy PCI passthrough")); + goto cleanup; + } + + break; + + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST: + break; + } + } + } It seems like the body of this loop ought to be shared with the hotplug code, in a helper function, so we don't have so much extra code in this start method Yeah, the basic idea of the code is the same between those two places. The issue with the hotplug code path is that it's using only one device, whereas here we iterate through a list. I will change this to a common func where the hotplug code will pass a fake array of just the single device.
That's exactly how qemuPrepareHostdevPCIDevices() is used. Maybe this should just be another part of that function?

Prefer using VFIO (if available) to the legacy KVM device passthrough. With this patch a PCI passthrough device without the driver configured will be started with VFIO if it's available on the host. If not legacy KVM passthrough is checked and error is reported if it's not available. --- docs/formatdomain.html.in | 9 ++++---- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++- src/qemu/qemu_process.c | 18 ++++++++++++++- .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++-- 8 files changed, 52 insertions(+), 13 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3689399..6f3f7cf 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2755,11 +2755,10 @@ backend, which is compatible with UEFI SecureBoot) or "kvm" (for the legacy device assignment handled directly by the KVM kernel module)<span class="since">Since 1.0.5 (QEMU and KVM - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" - is the default used by libvirt when not explicitly provided, - but since the two are functionally equivalent, this default - could be changed in the future with no impact to domains that - don't specify anything. + only, requires kernel 3.6 or newer)</span>. The default, when + the driver name is not explicitly specified, is to check wether + VFIO is available and use it if it's the case. If VFIO is not + available, the legacy "kvm" assignment is attempted. </dd> <dt><code>readonly</code></dt> <dd>Indicates that the device is readonly, only supported by SCSI host diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9414ebf..43d8746 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType { /* the backend driver used for PCI hostdev devices */ typedef enum { - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1411533..0d7c02e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, switch ((virDomainHostdevSubsysPciBackendType) dev->source.subsys.u.pci.backend) { - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: virBufferAddLit(&buf, "vfio-pci"); break; diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 9320ab9..5a8fa44 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, return -1; switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (qemuHostdevHostSupportsPassthroughVFIO() && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + /* VFIO requires all of the guest's memory to be locked resident. + * In this case, the guest's memory may already be locked, but it + * doesn't hurt to "change" the limit to the same value. + */ + if (vm->def->mem.hard_limit) + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); + else + virProcessSetMaxMemLock(vm->pid, + vm->def->mem.max_balloon + (1024 * 1024)); + } else if (qemuHostdevHostSupportsPassthroughLegacy() && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, break; - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!qemuHostdevHostSupportsPassthroughLegacy()) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index e36ab99..9648aa0 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn, } switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (supportsPassthroughVFIO && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + } else if (supportsPassthroughKVM && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + goto cleanup; + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!supportsPassthroughVFIO) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn, } break; - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!supportsPassthroughKVM) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args index 214b246..557b733 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/qemu -S -M \ pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\ +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\ bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args index 184811b..3a3963c 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \ -usb -hda /dev/HostVG/QEMUGuest1 \ --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args index c850613..615047a 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\ romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \ --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ romfile=/etc/fake/bootrom.bin \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 -- 1.8.3.2

On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
Prefer using VFIO (if available) to the legacy KVM device passthrough.
With this patch a PCI passthrough device without the driver configured will be started with VFIO if it's available on the host. If not legacy KVM passthrough is checked and error is reported if it's not available. --- docs/formatdomain.html.in | 9 ++++---- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++- src/qemu/qemu_process.c | 18 ++++++++++++++- .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++-- 8 files changed, 52 insertions(+), 13 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3689399..6f3f7cf 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2755,11 +2755,10 @@ backend, which is compatible with UEFI SecureBoot) or "kvm" (for the legacy device assignment handled directly by the KVM kernel module)<span class="since">Since 1.0.5 (QEMU and KVM - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" - is the default used by libvirt when not explicitly provided, - but since the two are functionally equivalent, this default - could be changed in the future with no impact to domains that - don't specify anything. + only, requires kernel 3.6 or newer)</span>. The default, when + the driver name is not explicitly specified, is to check wether + VFIO is available and use it if it's the case. If VFIO is not + available, the legacy "kvm" assignment is attempted. </dd> <dt><code>readonly</code></dt> <dd>Indicates that the device is readonly, only supported by SCSI host diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9414ebf..43d8746 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
/* the backend driver used for PCI hostdev devices */ typedef enum { - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1411533..0d7c02e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
switch ((virDomainHostdevSubsysPciBackendType) dev->source.subsys.u.pci.backend) { - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); break;
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: virBufferAddLit(&buf, "vfio-pci"); break;
The code calling this function should have translated 'DEFAULT' into either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we raise an error.
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 9320ab9..5a8fa44 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, return -1;
switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (qemuHostdevHostSupportsPassthroughVFIO() && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + /* VFIO requires all of the guest's memory to be locked resident. + * In this case, the guest's memory may already be locked, but it + * doesn't hurt to "change" the limit to the same value. + */ + if (vm->def->mem.hard_limit) + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); + else + virProcessSetMaxMemLock(vm->pid, + vm->def->mem.max_balloon + (1024 * 1024)); + } else if (qemuHostdevHostSupportsPassthroughLegacy() && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
break;
- case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!qemuHostdevHostSupportsPassthroughLegacy()) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index e36ab99..9648aa0 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn, }
switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (supportsPassthroughVFIO && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + } else if (supportsPassthroughKVM && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + goto cleanup; + } + + break;
There's alot of duplication between here & the hotplug code - can we get this logic shared in a helper function ?
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!supportsPassthroughVFIO) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn, } break;
- case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!supportsPassthroughKVM) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args index 214b246..557b733 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/qemu -S -M \ pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\ +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\ bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args index 184811b..3a3963c 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \ -usb -hda /dev/HostVG/QEMUGuest1 \ --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args index c850613..615047a 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\ romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \ --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ romfile=/etc/fake/bootrom.bin \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
Why is the XML here changing ? Shouldn't the test suite be stable in what it generates regardless of what the host happens to support ? Danielx -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/20/13 13:56, Daniel P. Berrange wrote:
On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
Prefer using VFIO (if available) to the legacy KVM device passthrough.
With this patch a PCI passthrough device without the driver configured will be started with VFIO if it's available on the host. If not legacy KVM passthrough is checked and error is reported if it's not available. --- docs/formatdomain.html.in | 9 ++++---- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++- src/qemu/qemu_process.c | 18 ++++++++++++++- .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++-- 8 files changed, 52 insertions(+), 13 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3689399..6f3f7cf 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2755,11 +2755,10 @@ backend, which is compatible with UEFI SecureBoot) or "kvm" (for the legacy device assignment handled directly by the KVM kernel module)<span class="since">Since 1.0.5 (QEMU and KVM - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" - is the default used by libvirt when not explicitly provided, - but since the two are functionally equivalent, this default - could be changed in the future with no impact to domains that - don't specify anything. + only, requires kernel 3.6 or newer)</span>. The default, when + the driver name is not explicitly specified, is to check wether + VFIO is available and use it if it's the case. If VFIO is not + available, the legacy "kvm" assignment is attempted. </dd> <dt><code>readonly</code></dt> <dd>Indicates that the device is readonly, only supported by SCSI host diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9414ebf..43d8746 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
/* the backend driver used for PCI hostdev devices */ typedef enum { - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1411533..0d7c02e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
switch ((virDomainHostdevSubsysPciBackendType) dev->source.subsys.u.pci.backend) { - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); break;
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: virBufferAddLit(&buf, "vfio-pci"); break;
The code calling this function should have translated 'DEFAULT' into either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we raise an error.
If we change this to produce error in case DEFAULT is passed, then all the tests that are checking correctness of the generated commandline with passthrough devices will fail or need to be hardcoded with driver names as the default setting function can't be called in the testsuite. Do you think it's better to change the tests so that "DEFAULT" is avoided and error out if default is passed to the command line generator?
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 9320ab9..5a8fa44 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, return -1;
switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (qemuHostdevHostSupportsPassthroughVFIO() && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + /* VFIO requires all of the guest's memory to be locked resident. + * In this case, the guest's memory may already be locked, but it + * doesn't hurt to "change" the limit to the same value. + */ + if (vm->def->mem.hard_limit) + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); + else + virProcessSetMaxMemLock(vm->pid, + vm->def->mem.max_balloon + (1024 * 1024)); + } else if (qemuHostdevHostSupportsPassthroughLegacy() && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + } + + break; + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
break;
- case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!qemuHostdevHostSupportsPassthroughLegacy()) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index e36ab99..9648aa0 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn, }
switch ((virDomainHostdevSubsysPciBackendType) *backend) { + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: + if (supportsPassthroughVFIO && + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; + } else if (supportsPassthroughKVM && + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("host doesn't support passthrough of " + "host PCI devices")); + goto cleanup; + } + + break;
There's alot of duplication between here & the hotplug code - can we get this logic shared in a helper function ?
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: if (!supportsPassthroughVFIO) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn, } break;
- case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: if (!supportsPassthroughKVM) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args index 214b246..557b733 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/qemu -S -M \ pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\ +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\ bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args index 184811b..3a3963c 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \ -usb -hda /dev/HostVG/QEMUGuest1 \ --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args index c850613..615047a 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\ romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \ --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ romfile=/etc/fake/bootrom.bin \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
Why is the XML here changing ? Shouldn't the test suite be stable in what it generates regardless of what the host happens to support ?
The difference comes from the change of the default to VFIO in the commandline generator function.
Danielx

On 09/23/2013 02:42 AM, Peter Krempa wrote:
On 09/20/13 13:56, Daniel P. Berrange wrote:
On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
Prefer using VFIO (if available) to the legacy KVM device passthrough.
With this patch a PCI passthrough device without the driver configured will be started with VFIO if it's available on the host. If not legacy KVM passthrough is checked and error is reported if it's not available. --- docs/formatdomain.html.in | 9 ++++---- src/conf/domain_conf.h | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++- src/qemu/qemu_process.c | 18 ++++++++++++++- .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++-- 8 files changed, 52 insertions(+), 13 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3689399..6f3f7cf 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2755,11 +2755,10 @@ backend, which is compatible with UEFI SecureBoot) or "kvm" (for the legacy device assignment handled directly by the KVM kernel module)<span class="since">Since 1.0.5 (QEMU and KVM - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" - is the default used by libvirt when not explicitly provided, - but since the two are functionally equivalent, this default - could be changed in the future with no impact to domains that - don't specify anything. + only, requires kernel 3.6 or newer)</span>. The default, when + the driver name is not explicitly specified, is to check wether + VFIO is available and use it if it's the case. If VFIO is not + available, the legacy "kvm" assignment is attempted. </dd> <dt><code>readonly</code></dt> <dd>Indicates that the device is readonly, only supported by SCSI host diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9414ebf..43d8746 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
/* the backend driver used for PCI hostdev devices */ typedef enum { - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 1411533..0d7c02e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
switch ((virDomainHostdevSubsysPciBackendType) dev->source.subsys.u.pci.backend) { - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: virBufferAddLit(&buf, "pci-assign"); if (configfd && *configfd) virBufferAsprintf(&buf, ",configfd=%s", configfd); break;
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: virBufferAddLit(&buf, "vfio-pci"); break; The code calling this function should have translated 'DEFAULT' into either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we raise an error.
I thought what had been discussed in IRC was to modify the qemu capabilities in the caller according to stricter checks for properly operating vfio/legacy passthrough. Then at a lower level, we would still always prefer vfio when available, but that behavior could be changed by the higher level function (e.g. qemuProcessStart()) modifying the qemu capabilities to clear the vfio capability bit. So the value in the config object wouldn't be changed, just the flags in the capabilities. But I guess I must have misunderstood because, thinking about it, the capabilities object should be immutable (right?). So were you suggesting that we change the config value instead? But normally we don't want to change the config object either though, since that affects what gets saved when we save or migrate a domain. (Of course, you can't save or migrate a domain with a passed-through device anyway, so maybe that's not a problem :-) Am I just pointlessly arguing with myself here? At any rate, I don't see either of those things happening in this patch series - as far as I can see, the result of the series is that if someone specifies no driver in the config and vfio isn't available, then an error will be thrown even if legacy kvm device assignment *is* available. Instead, we should be instructing the commandline generator to use legacy assigment "pci-assign" instead of vfio in this case.
If we change this to produce error in case DEFAULT is passed, then all the tests that are checking correctness of the generated commandline with passthrough devices will fail or need to be hardcoded with driver names as the default setting function can't be called in the testsuite.
Do you think it's better to change the tests so that "DEFAULT" is avoided and error out if default is passed to the command line generator?
In the long run, the user shouldn't care at all which driver is used for passthrough, as they both produce the same results. "default" (i.e. complete absence of <driver name='xx'/>) should remain a valid option in the config, and that when that is the case it should mean "prefer vfio if available, fallback to legacy if necessary, or fail if neither is available.
--- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \ -usb -hda /dev/HostVG/QEMUGuest1 \ --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args index c850613..615047a 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ -net user,vlan=0,name=hostnet0 \ -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\ romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \ --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ romfile=/etc/fake/bootrom.bin \ -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 Why is the XML here changing ? Shouldn't the test suite be stable in what it generates regardless of what the host happens to support ? The difference comes from the change of the default to VFIO in the commandline generator function.
Right. Although this does indicate a change in the commandline that is generated for identical XML, that change is okay; it just means that for the case where both vfio and legacy assignment are available (in the cases of tests, purely based on the capability bits, which are provided as an arg to the toplevel test macro), we've decided to now prefer vfio over legacy.
participants (3)
-
Daniel P. Berrange
-
Laine Stump
-
Peter Krempa