On Thu, Jul 11, 2019 at 17:54:12 +0200, Michal Privoznik wrote:
If a domain has an NVMe disk configured, then we need to allow it
on devices CGroup so that qemu can access it. There is one caveat
though - if an NVMe disk is read only we need CGroup to allow
write too. This is because when opening the device, qemu does
couple of ioctl()-s which are considered as write.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/qemu/qemu_cgroup.c | 59 +++++++++++++++++++++++++++++++++---------
1 file changed, 47 insertions(+), 12 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 19ca60905a..2a7fc07ac7 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -118,10 +118,29 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm,
virStorageSourcePtr src,
bool forceReadonly)
{
- if (!src->path || !virStorageSourceIsLocalStorage(src)) {
- VIR_DEBUG("Not updating cgroups for disk path '%s', type:
%s",
- NULLSTR(src->path), virStorageTypeToString(src->type));
- return 0;
+ VIR_AUTOFREE(char *) path = NULL;
+ bool readonly = src->readonly || forceReadonly;
+
+ if (src->type == VIR_STORAGE_TYPE_NVME) {
+ /* Even though disk is R/O we can't make it so in
+ * CGroups. QEMU will try to do some ioctl()-s over the
+ * device and such operations are R/W. */
+ readonly = false;
Yeah, that should be fine. We tell qemu to open it R/O afterwards and we
can't do better here.
+
+ if (!(path = qemuDomainGetNVMeDiskPath(src->nvme)))
+ return -1;
+
+ if (qemuSetupImagePathCgroup(vm, QEMU_DEV_VFIO, false) < 0)
+ return -1;
+ } else {
+ if (!src->path || !virStorageSourceIsLocalStorage(src)) {
+ VIR_DEBUG("Not updating cgroups for disk path '%s', type:
%s",
+ NULLSTR(src->path), virStorageTypeToString(src->type));
+ return 0;
+ }
+
+ if (VIR_STRDUP(path, src->path) < 0)
+ return -1;
}
if (virStoragePRDefIsManaged(src->pr) &&
@@ -129,7 +148,7 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm,
qemuSetupImagePathCgroup(vm, QEMU_DEVICE_MAPPER_CONTROL_PATH, false) < 0)
return -1;
- return qemuSetupImagePathCgroup(vm, src->path, src->readonly ||
forceReadonly);
+ return qemuSetupImagePathCgroup(vm, path, readonly);
}
[...]
@@ -154,10 +174,25 @@ qemuTeardownImageCgroup(virDomainObjPtr vm,
VIR_CGROUP_CONTROLLER_DEVICES))
return 0;
- if (!src->path || !virStorageSourceIsLocalStorage(src)) {
- VIR_DEBUG("Not updating cgroups for disk path '%s', type:
%s",
- NULLSTR(src->path), virStorageTypeToString(src->type));
- return 0;
+ if (src->type == VIR_STORAGE_TYPE_NVME) {
+ if (!(path = qemuDomainGetNVMeDiskPath(src->nvme)))
+ return -1;
+
+ ret = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, perms, true);
+ virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
+ QEMU_DEV_VFIO,
+ virCgroupGetDevicePermsString(perms), ret);
What if the IOMMU group is shared by another disk? or perhaps even an
hostdev?
+ if (ret < 0)
+ return -1;
+ } else {
+ if (!src->path || !virStorageSourceIsLocalStorage(src)) {
+ VIR_DEBUG("Not updating cgroups for disk path '%s', type:
%s",
+ NULLSTR(src->path), virStorageTypeToString(src->type));
+ return 0;
+ }
+
+ if (VIR_STRDUP(path, src->path) < 0)
+ return -1;
}
if (virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) {