Allow access to /dev/iommu and /dev/vfio/devices/vfio* when launching a qemu
VM with iommufd feature enabled.
Signed-off-by: Nathan Chen <nathanc(a)nvidia.com>
---
src/qemu/qemu_cgroup.c | 61 ++++++++++++++++++++++++++++
src/qemu/qemu_cgroup.h | 1 +
src/qemu/qemu_namespace.c | 44 +++++++++++++++++++++
src/security/security_apparmor.c | 11 ++++++
src/security/security_dac.c | 23 +++++++++++
src/security/security_selinux.c | 24 +++++++++++
src/util/virpci.c | 68 ++++++++++++++++++++++++++++++++
src/util/virpci.h | 1 +
8 files changed, 233 insertions(+)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f10976c2b0..73d0cb3a7a 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -462,6 +462,54 @@ qemuTeardownInputCgroup(virDomainObj *vm,
}
+int
+qemuSetupIommufdCgroup(virDomainObj *vm)
+{
+ qemuDomainObjPrivate *priv = vm->privateData;
+ g_autoptr(DIR) dir = NULL;
+ struct dirent *dent;
+ g_autofree char *path = NULL;
+ int iommufd = 0;
+ size_t i;
+
+ for (i = 0; i < vm->def->nhostdevs; i++) {
+ if (vm->def->hostdevs[i]->iommufdId) {
+ iommufd = 1;
+ break;
+ }
+ }
+
+ if (iommufd == 1) {
+ if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
+ return 0;
+ if (virDirOpen(&dir, "/dev/vfio/devices") < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return -1;
+ }
+ while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) {
+ if (STRPREFIX(dent->d_name, "vfio")) {
+ path = g_strdup_printf("/dev/vfio/devices/%s",
dent->d_name);
+ }
+ if (path &&
+ qemuCgroupAllowDevicePath(vm, path,
+ VIR_CGROUP_DEVICE_RW, false) < 0) {
+ return -1;
+ }
+ path = NULL;
+ }
+ if (virFileExists("/dev/iommu"))
+ path = g_strdup("/dev/iommu");
+ if (path &&
+ qemuCgroupAllowDevicePath(vm, path,
+ VIR_CGROUP_DEVICE_RW, false) < 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
/**
* qemuSetupHostdevCgroup:
* vm: domain object
@@ -760,6 +808,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
const char *const *deviceACL = (const char *const *) cfg->cgroupDeviceACL;
int rv = -1;
+ int iommufd = 0;
size_t i;
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
@@ -830,6 +879,18 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
return -1;
}
+ for (i = 0; i < vm->def->nhostdevs; i++) {
+ if (vm->def->hostdevs[i]->iommufdId) {
+ iommufd = 1;
+ break;
+ }
+ }
+
+ if (iommufd == 1) {
+ if (qemuSetupIommufdCgroup(vm) < 0)
+ return -1;
+ }
+
for (i = 0; i < vm->def->nmems; i++) {
if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0)
return -1;
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 3668034cde..bea677ba3c 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -42,6 +42,7 @@ int qemuSetupHostdevCgroup(virDomainObj *vm,
int qemuTeardownHostdevCgroup(virDomainObj *vm,
virDomainHostdevDef *dev)
G_GNUC_WARN_UNUSED_RESULT;
+int qemuSetupIommufdCgroup(virDomainObj *vm);
int qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
virDomainMemoryDef *mem);
int qemuTeardownMemoryDevicesCgroup(virDomainObj *vm,
diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c
index f72da83929..965a304f7f 100644
--- a/src/qemu/qemu_namespace.c
+++ b/src/qemu/qemu_namespace.c
@@ -677,6 +677,47 @@ qemuDomainSetupLaunchSecurity(virDomainObj *vm,
}
+static int
+qemuDomainSetupIommufd(virDomainObj *vm,
+ GSList **paths)
+{
+ g_autoptr(DIR) dir = NULL;
+ struct dirent *dent;
+ g_autofree char *path = NULL;
+ int iommufd = 0;
+ size_t i;
+
+ for (i = 0; i < vm->def->nhostdevs; i++) {
+ if (vm->def->hostdevs[i]->iommufdId) {
+ iommufd = 1;
+ break;
+ }
+ }
+
+ /* Check if iommufd is enabled */
+ if (iommufd == 1) {
+ if (virDirOpen(&dir, "/dev/vfio/devices") < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return -1;
+ }
+ while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) {
+ if (STRPREFIX(dent->d_name, "vfio")) {
+ path = g_strdup_printf("/dev/vfio/devices/%s",
dent->d_name);
+ *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
+ }
+ }
+ path = NULL;
+ if (virFileExists("/dev/iommu"))
+ path = g_strdup("/dev/iommu");
+ if (path)
+ *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
+ }
+
+ return 0;
+}
+
+
static int
qemuNamespaceMknodPaths(virDomainObj *vm,
GSList *paths,
@@ -700,6 +741,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfig *cfg,
if (qemuDomainSetupAllDisks(vm, &paths) < 0)
return -1;
+ if (qemuDomainSetupIommufd(vm, &paths) < 0)
+ return -1;
+
if (qemuDomainSetupAllHostdevs(vm, &paths) < 0)
return -1;
diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c
index 68ac39611f..73dc750c94 100644
--- a/src/security/security_apparmor.c
+++ b/src/security/security_apparmor.c
@@ -856,6 +856,17 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr,
}
ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr);
VIR_FREE(vfioGroupDev);
+
+ if (dev->iommufdId) {
+ g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+ if (vfiofdDev) {
+ int ret2 = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr);
+ if (ret2 < 0)
+ ret = ret2;
+ } else {
+ return -1;
+ }
+ }
} else {
ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr);
}
diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index 2f788b872a..327e36466d 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -1290,6 +1290,18 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr,
ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev,
false,
&cbdata);
+ if (dev->iommufdId) {
+ g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+ if (vfiofdDev) {
+ int ret2 = virSecurityDACSetHostdevLabelHelper(vfiofdDev,
+ false,
+ &cbdata);
+ if (ret2 < 0)
+ ret = ret2;
+ } else {
+ return -1;
+ }
+ }
} else {
ret = virPCIDeviceFileIterate(pci,
virSecurityDACSetPCILabel,
@@ -1450,6 +1462,17 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr,
ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL,
vfioGroupDev, false);
+ if (dev->iommufdId) {
+ g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+ if (vfiofdDev) {
+ int ret2 = virSecurityDACRestoreFileLabelInternal(mgr, NULL,
+ vfiofdDev, false);
+ if (ret2 < 0)
+ ret = ret2;
+ } else {
+ return -1;
+ }
+ }
} else {
ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr);
}
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c
index fa5d1568eb..60dcadd839 100644
--- a/src/security/security_selinux.c
+++ b/src/security/security_selinux.c
@@ -2248,6 +2248,19 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr,
ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev,
false,
&data);
+ if (dev->iommufdId) {
+ g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+ if (vfiofdDev) {
+ int ret2 = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev,
+ false,
+ &data);
+ if (ret2 < 0)
+ ret = ret2;
+ } else {
+ return -1;
+ }
+ }
+
} else {
ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel,
&data);
}
@@ -2481,6 +2494,17 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager
*mgr,
return -1;
ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false);
+
+ if (dev->iommufdId) {
+ g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+ if (vfiofdDev) {
+ int ret2 = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev,
false);
+ if (ret2 < 0)
+ ret = ret2;
+ } else {
+ return -1;
+ }
+ }
} else {
ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr);
}
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 90617e69c6..6e6e5e47c0 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -2478,6 +2478,74 @@ virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev)
return g_strdup_printf("/dev/vfio/%s", groupFile);
}
+/* virPCIDeviceGetIOMMUFDDev - return the name of the device used
+ * to control this PCI device's group (e.g. "/dev/vfio/devices/vfio15")
+ */
+char *
+virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev)
+{
+ g_autofree char *path = NULL;
+ const char *pci_addr = NULL;
+ g_autoptr(DIR) dir = NULL;
+ struct dirent *entry;
+ char *vfiodev = NULL;
+
+ /* Get PCI device address */
+ pci_addr = virPCIDeviceGetName(dev);
+ if (!pci_addr)
+ return NULL;
+
+ /* First try: look in PCI device's vfio-dev subdirectory */
+ path = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev", pci_addr);
+
+ if (virDirOpen(&dir, path) == 1) {
+ while (virDirRead(dir, &entry, path) > 0) {
+ if (!g_str_has_prefix(entry->d_name, "vfio"))
+ continue;
+
+ vfiodev = g_strdup_printf("/dev/vfio/devices/%s",
entry->d_name);
+ break;
+ }
+ /* g_autoptr will automatically close dir when it goes out of scope */
+ dir = NULL;
+ }
+
+ /* Second try: scan /sys/class/vfio-dev for matching device */
+ if (!vfiodev) {
+ g_free(path);
+ path = g_strdup("/sys/class/vfio-dev");
+
+ if (virDirOpen(&dir, path) == 1) {
+ while (virDirRead(dir, &entry, path) > 0) {
+ g_autofree char *dev_link = NULL;
+ g_autofree char *target = NULL;
+
+ if (!g_str_has_prefix(entry->d_name, "vfio"))
+ continue;
+
+ dev_link = g_strdup_printf("/sys/class/vfio-dev/%s/device",
entry->d_name);
+
+ if (virFileResolveLink(dev_link, &target) < 0)
+ continue;
+
+ if (strstr(target, pci_addr)) {
+ vfiodev = g_strdup_printf("/dev/vfio/devices/%s",
entry->d_name);
+ break;
+ }
+ }
+ /* g_autoptr will automatically close dir */
+ }
+ }
+
+ /* Verify the device path exists and is accessible */
+ if (vfiodev && !virFileExists(vfiodev)) {
+ VIR_FREE(vfiodev);
+ return NULL;
+ }
+
+ return vfiodev;
+}
+
static int
virPCIDeviceDownstreamLacksACS(virPCIDevice *dev)
{
diff --git a/src/util/virpci.h b/src/util/virpci.h
index fc538566e1..996ffab2f9 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -203,6 +203,7 @@ int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress *addr);
char *virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr);
bool virPCIDeviceExists(const virPCIDeviceAddress *addr);
char *virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev);
+char *virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev);
int virPCIDeviceIsAssignable(virPCIDevice *dev,
int strict_acs_check);
--
2.43.0