
On Thu, Jul 11, 2019 at 17:54:12 +0200, Michal Privoznik wrote:
If a domain has an NVMe disk configured, then we need to allow it on devices CGroup so that qemu can access it. There is one caveat though - if an NVMe disk is read only we need CGroup to allow write too. This is because when opening the device, qemu does couple of ioctl()-s which are considered as write.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 59 +++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 12 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 19ca60905a..2a7fc07ac7 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -118,10 +118,29 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm, virStorageSourcePtr src, bool forceReadonly) { - if (!src->path || !virStorageSourceIsLocalStorage(src)) { - VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", - NULLSTR(src->path), virStorageTypeToString(src->type)); - return 0; + VIR_AUTOFREE(char *) path = NULL; + bool readonly = src->readonly || forceReadonly; + + if (src->type == VIR_STORAGE_TYPE_NVME) { + /* Even though disk is R/O we can't make it so in + * CGroups. QEMU will try to do some ioctl()-s over the + * device and such operations are R/W. */ + readonly = false;
Yeah, that should be fine. We tell qemu to open it R/O afterwards and we can't do better here.
+ + if (!(path = qemuDomainGetNVMeDiskPath(src->nvme))) + return -1; + + if (qemuSetupImagePathCgroup(vm, QEMU_DEV_VFIO, false) < 0) + return -1; + } else { + if (!src->path || !virStorageSourceIsLocalStorage(src)) { + VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", + NULLSTR(src->path), virStorageTypeToString(src->type)); + return 0; + } + + if (VIR_STRDUP(path, src->path) < 0) + return -1; }
if (virStoragePRDefIsManaged(src->pr) && @@ -129,7 +148,7 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm, qemuSetupImagePathCgroup(vm, QEMU_DEVICE_MAPPER_CONTROL_PATH, false) < 0) return -1;
- return qemuSetupImagePathCgroup(vm, src->path, src->readonly || forceReadonly); + return qemuSetupImagePathCgroup(vm, path, readonly); }
[...]
@@ -154,10 +174,25 @@ qemuTeardownImageCgroup(virDomainObjPtr vm, VIR_CGROUP_CONTROLLER_DEVICES)) return 0;
- if (!src->path || !virStorageSourceIsLocalStorage(src)) { - VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", - NULLSTR(src->path), virStorageTypeToString(src->type)); - return 0; + if (src->type == VIR_STORAGE_TYPE_NVME) { + if (!(path = qemuDomainGetNVMeDiskPath(src->nvme))) + return -1; + + ret = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, perms, true); + virDomainAuditCgroupPath(vm, priv->cgroup, "deny", + QEMU_DEV_VFIO, + virCgroupGetDevicePermsString(perms), ret);
What if the IOMMU group is shared by another disk? or perhaps even an hostdev?
+ if (ret < 0) + return -1; + } else { + if (!src->path || !virStorageSourceIsLocalStorage(src)) { + VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", + NULLSTR(src->path), virStorageTypeToString(src->type)); + return 0; + } + + if (VIR_STRDUP(path, src->path) < 0) + return -1; }
if (virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) {