On 10/04/2013 08:55 AM, Peter Krempa wrote:
Add code to check availability of PCI passhthrough using VFIO and
the
legacy KVM passthrough and use it when starting VMs and hotplugging
devices to live machine.
---
src/qemu/qemu_hostdev.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++
src/qemu/qemu_hostdev.h | 4 ++
src/qemu/qemu_hotplug.c | 4 ++
src/qemu/qemu_process.c | 5 ++
4 files changed, 139 insertions(+)
diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c
index 21fe47f..dbbc2b4 100644
--- a/src/qemu/qemu_hostdev.c
+++ b/src/qemu/qemu_hostdev.c
@@ -23,6 +23,11 @@
#include <config.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
#include "qemu_hostdev.h"
#include "virlog.h"
#include "virerror.h"
@@ -31,6 +36,7 @@
#include "virusb.h"
#include "virscsi.h"
#include "virnetdev.h"
+#include "virfile.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -1287,3 +1293,123 @@ void qemuDomainReAttachHostDevices(virQEMUDriverPtr driver,
qemuDomainReAttachHostScsiDevices(driver, def->name, def->hostdevs,
def->nhostdevs);
}
+
+
+static bool
+qemuHostdevHostSupportsPassthroughVFIO(void)
If you're concerned about name length, you could probably remove the
"Passthrough" and it would still make sense...
+{
+ DIR *iommuDir = NULL;
+ struct dirent *iommuGroup = NULL;
+ bool ret = false;
+
+ /* condition 1 - /sys/kernel/iommu_groups/ contains entries */
+ if (!(iommuDir = opendir("/sys/kernel/iommu_groups/")))
+ goto cleanup;
+
+ while ((iommuGroup = readdir(iommuDir))) {
+ /* skip ./ ../ */
+ if (STRPREFIX(iommuGroup->d_name, "."))
+ continue;
+
+ /* assume we found a group */
+ break;
+ }
+
+ if (!iommuGroup)
+ goto cleanup;
+ /* okay, iommu is on and recognizes groups */
+
+ /* condition 2 - /dev/vfio/vfio exists */
+ if (!virFileExists("/dev/vfio/vfio"))
+ goto cleanup;
Was this all that was suggested? At any rate it's a good start even if
there are other checks that can be added later.
+
+ ret = true;
+
+cleanup:
+ if (iommuDir)
+ closedir(iommuDir);
+
+ return ret;
+}
+
+
+#if HAVE_LINUX_KVM_H
+# include <linux/kvm.h>
+static bool
+qemuHostdevHostSupportsPassthroughLegacy(void)
+{
+ int kvmfd = -1;
+ bool ret = false;
+
+ if ((kvmfd = open("/dev/kvm", O_RDONLY)) < 0)
+ goto cleanup;
+
+# ifdef KVM_CAP_IOMMU
+ if ((ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_IOMMU)) <= 0)
+ goto cleanup;
+
+ ret = true;
+# endif
+
+cleanup:
+ VIR_FORCE_CLOSE(kvmfd);
+
+ return ret;
+}
+#else
+static bool
+qemuHostdevHostSupportsPassthroughLegacy(void)
+{
+ return false;
+}
+#endif
+
+bool
+qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
+ size_t nhostdevs)
+{
+ int supportsPassthroughKVM = -1;
+ int supportsPassthroughVFIO = -1;
+ size_t i;
+
+ /* assign defaults for hostdev passthrough */
+ for (i = 0; i < nhostdevs; i++) {
+ virDomainHostdevDefPtr hostdev = hostdevs[i];
+
+ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
+ hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
+ int *backend = &hostdev->source.subsys.u.pci.backend;
+
+ /* cache host state of passthrough support */
+ if (supportsPassthroughKVM == -1 || supportsPassthroughVFIO == -1) {
+ supportsPassthroughKVM = qemuHostdevHostSupportsPassthroughLegacy();
+ supportsPassthroughVFIO = qemuHostdevHostSupportsPassthroughVFIO();
These should be checked once at the top of the function and their return
values cached.
+ }
+
+ switch ((virDomainHostdevSubsysPciBackendType) *backend) {
You know my opinion on these typecasts :-) (but don't let that stop you).
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
+ if (!supportsPassthroughVFIO) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("host doesn't support VFIO PCI
passthrough"));
+ return false;
+ }
+ break;
+
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
+ if (!supportsPassthroughKVM) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("host doesn't support legacy PCI
passthrough"));
+ return false;
+ }
+
+ break;
In the case of DEFAULT, we don't want to require support for legacy PCI
passthrough, we instead want to make sure that one or the other of the
methods is supported on the current host. So DEFAULT should have a
separate case that says "if (!(supportsPassthroughKVM ||
supportsPassthroughVFIO) { error }"
+
+ case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_TYPE_LAST:
+ break;
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/src/qemu/qemu_hostdev.h b/src/qemu/qemu_hostdev.h
index 327d4d5..6d88830 100644
--- a/src/qemu/qemu_hostdev.h
+++ b/src/qemu/qemu_hostdev.h
@@ -69,4 +69,8 @@ int qemuDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev,
int qemuDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev,
char *stateDir);
+bool qemuHostdevHostVerifySupport(virDomainHostdevDefPtr *hostdevs,
+ size_t nhostdevs);
+
+
#endif /* __QEMU_HOSTDEV_H__ */
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index ae2cbc0..c72fdc3 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -1143,6 +1143,10 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
&hostdev, 1) < 0)
return -1;
+ /* verify the availability of passthrough support */
+ if (!qemuHostdevHostVerifySupport(&hostdev, 1))
+ goto error;
+
switch ((virDomainHostdevSubsysPciBackendType) *backend) {
case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 7a30a5e..4aa9582 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3567,6 +3567,11 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
}
+ /* check and assign device assignment settings */
+ if (!qemuHostdevHostVerifySupport(vm->def->hostdevs,
+ vm->def->nhostdevs))
+ goto cleanup;
+
In one case you're calling this very near to a call to
qemuPrepareHostdevPCIDevices(), in the other very near to a call to
qemuPrepareHostDevices, which itself calls
qemuPrepareHostdevPCIDevices(); I think this check should be made inside
qemuPrepareHostdevPCIDevices() so that the higher level code isn't
polluted with the detail.
/* network devices must be "prepared" before hostdevs,
because
* setting up a network device might create a new hostdev that
* will need to be setup.