PCIO device assignment using VFIO requires read/write access by the
qemu process to /dev/vfio/vfio, and /dev/vfio/nn, where "nn" is the
VFIO group number that the assigned device belongs to (and can be
found with the function virPCIDeviceGetVFIOGroupDev)
/dev/vfio/vfio can be accessible to any guest without danger
(according to vfio developers), so it is added to the static ACL.
The group device must be dynamically added to the cgroup ACL for each
vfio hostdev in two places:
1) for any devices in the persistent config when the domain is started
(done during qemuSetupCgroup())
2) at device attach time for any hotplug devices (done in
qemuDomainAttachHostDevice)
The group device must be removed from the ACL when a device it
"hot-unplugged" (in qemuDomainDetachHostDevice())
Note that USB devices are already doing their own cgroup setup and
teardown in the hostdev-usb specific function. I chose to make the new
functions generic and call them in a common location though. We can
then move the USB-specific code (which is duplicated in two locations)
to this single location. I'll be posting a followup patch to do that.
---
src/qemu/qemu.conf | 2 +-
src/qemu/qemu_cgroup.c | 133 ++++++++++++++++++++++++++++++++++++-
src/qemu/qemu_cgroup.h | 6 +-
src/qemu/qemu_hotplug.c | 10 ++-
src/qemu/test_libvirtd_qemu.aug.in | 1 +
5 files changed, 148 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index 87bdf70..0f0a24c 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -241,7 +241,7 @@
# "/dev/null", "/dev/full", "/dev/zero",
# "/dev/random", "/dev/urandom",
# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-# "/dev/rtc","/dev/hpet"
+# "/dev/rtc","/dev/hpet", "/dev/vfio/vfio"
#]
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 891984a..92c53d9 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -39,7 +39,7 @@ static const char *const defaultDeviceACL[] = {
"/dev/null", "/dev/full", "/dev/zero",
"/dev/random", "/dev/urandom",
"/dev/ptmx", "/dev/kvm", "/dev/kqemu",
- "/dev/rtc", "/dev/hpet",
+ "/dev/rtc", "/dev/hpet", "/dev/vfio/vfio",
NULL,
};
#define DEVICE_PTY_MAJOR 136
@@ -214,6 +214,131 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev
ATTRIBUTE_UNUSED,
}
+int
+qemuSetupHostdevCGroup(virDomainObjPtr vm,
+ virDomainHostdevDefPtr dev)
+{
+ int ret = -1;
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ virPCIDevicePtr pci = NULL;
+ char *path = NULL;
+
+ /* currently this only does something for PCI devices using vfio
+ * for device assignment, but it is called for *all* hostdev
+ * devices.
+ */
+
+ if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
+ return 0;
+
+ if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {
+
+ switch (dev->source.subsys.type) {
+ case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
+ if (dev->source.subsys.u.pci.backend
+ != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
+ int rc;
+
+ pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain,
+ dev->source.subsys.u.pci.addr.bus,
+ dev->source.subsys.u.pci.addr.slot,
+ dev->source.subsys.u.pci.addr.function);
+ if (!pci)
+ goto cleanup;
+
+ if (!(path = virPCIDeviceGetVFIOGroupDev(pci)))
+ goto cleanup;
+
+ VIR_DEBUG("Cgroup allow %s for PCI device assignment", path);
+ rc = virCgroupAllowDevicePath(priv->cgroup, path,
+ VIR_CGROUP_DEVICE_RW);
+ virDomainAuditCgroupPath(vm, priv->cgroup,
+ "allow", path, "rw", rc);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("Unable to allow access "
+ "for device path %s"),
+ path);
+ goto cleanup;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ ret = 0;
+cleanup:
+ virPCIDeviceFree(pci);
+ VIR_FREE(path);
+ return ret;
+}
+
+
+
+int
+qemuTeardownHostdevCgroup(virDomainObjPtr vm,
+ virDomainHostdevDefPtr dev)
+{
+ int ret = -1;
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ virPCIDevicePtr pci = NULL;
+ char *path = NULL;
+
+ /* currently this only does something for PCI devices using vfio
+ * for device assignment, but it is called for *all* hostdev
+ * devices.
+ */
+
+ if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
+ return 0;
+
+ if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {
+
+ switch (dev->source.subsys.type) {
+ case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
+ if (dev->source.subsys.u.pci.backend
+ != VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
+ int rc;
+
+ pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain,
+ dev->source.subsys.u.pci.addr.bus,
+ dev->source.subsys.u.pci.addr.slot,
+ dev->source.subsys.u.pci.addr.function);
+ if (!pci)
+ goto cleanup;
+
+ if (!(path = virPCIDeviceGetVFIOGroupDev(pci)))
+ goto cleanup;
+
+ VIR_DEBUG("Cgroup deny %s for PCI device assignment", path);
+ rc = virCgroupDenyDevicePath(priv->cgroup, path,
+ VIR_CGROUP_DEVICE_RWM);
+ virDomainAuditCgroupPath(vm, priv->cgroup,
+ "deny", path, "rwm", rc);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("Unable to deny access "
+ "for device path %s"),
+ path);
+ goto cleanup;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ ret = 0;
+cleanup:
+ virPCIDeviceFree(pci);
+ VIR_FREE(path);
+ return ret;
+}
+
+
int qemuInitCgroup(virQEMUDriverPtr driver,
virDomainObjPtr vm,
bool startup)
@@ -423,6 +548,12 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
virDomainHostdevDefPtr hostdev = vm->def->hostdevs[i];
virUSBDevicePtr usb;
+ if (qemuSetupHostdevCGroup(vm, hostdev) < 0)
+ goto cleanup;
+
+ /* NB: the code below here should be moved into
+ * qemuSetupHostdevCGroup()
+ */
if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
continue;
if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index e63f443..64d71a5 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -1,7 +1,7 @@
/*
* qemu_cgroup.h: QEMU cgroup management
*
- * Copyright (C) 2006-2007, 2009-2012 Red Hat, Inc.
+ * Copyright (C) 2006-2007, 2009-2013 Red Hat, Inc.
* Copyright (C) 2006 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
@@ -36,6 +36,10 @@ int qemuTeardownDiskCgroup(virDomainObjPtr vm,
int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev,
const char *path,
void *opaque);
+int qemuSetupHostdevCGroup(virDomainObjPtr vm,
+ virDomainHostdevDefPtr dev) ATTRIBUTE_RETURN_CHECK;
+int qemuTeardownHostdevCgroup(virDomainObjPtr vm,
+ virDomainHostdevDefPtr dev);
int qemuInitCgroup(virQEMUDriverPtr driver,
virDomainObjPtr vm,
bool startup);
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index f5fa1c4..eeee507 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -1225,9 +1225,12 @@ int qemuDomainAttachHostDevice(virQEMUDriverPtr driver,
virUSBDeviceListSteal(list, usb);
}
+ if (qemuSetupHostdevCGroup(vm, hostdev) < 0)
+ goto cleanup;
+
if (virSecurityManagerSetHostdevLabel(driver->securityManager,
vm->def, hostdev, NULL) < 0)
- goto cleanup;
+ goto teardown_cgroup;
switch (hostdev->source.subsys.type) {
case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
@@ -1257,6 +1260,9 @@ error:
vm->def, hostdev, NULL) < 0)
VIR_WARN("Unable to restore host device labelling on hotplug fail");
+teardown_cgroup:
+ qemuTeardownHostdevCgroup(vm, hostdev);
+
cleanup:
virObjectUnref(list);
if (usb)
@@ -2499,6 +2505,8 @@ int qemuDomainDetachThisHostDevice(virQEMUDriverPtr driver,
}
if (!ret) {
+ qemuTeardownHostdevCgroup(vm, detach);
+
if (virSecurityManagerRestoreHostdevLabel(driver->securityManager,
vm->def, detach, NULL) < 0) {
VIR_WARN("Failed to restore host device labelling");
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index 0aec997..26ca068 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -42,6 +42,7 @@ module Test_libvirtd_qemu =
{ "8" = "/dev/kqemu" }
{ "9" = "/dev/rtc" }
{ "10" = "/dev/hpet" }
+ { "11" = "/dev/vfio/vfio" }
}
{ "save_image_format" = "raw" }
{ "dump_image_format" = "raw" }
--
1.7.11.7