[PATCH 0/4] qemu_cgroup: Slightly rework

These are inspired by my earlier patches that solved the same issue for namespaces: https://listman.redhat.com/archives/libvir-list/2022-March/229266.html Michal Prívozník (4): qemu_cgroup: Drop ENOENT special case for RNG devices qemu_cgroup: Introduce and use qemuCgroupAllowDevicePath() qemu_cgroup: Introduce and use qemuCgroupDenyDevicePath() qemu_cgroup: Don't deny devices from cgroupDeviceACL src/qemu/qemu_cgroup.c | 246 +++++++++++++++++------------------------ 1 file changed, 100 insertions(+), 146 deletions(-) -- 2.34.1

When allowing or denying RNG device in CGroups there's a special check if the backend device exists (errno == ENOENT) in which case success is returned to caller. This is in contrast with the rest of the functions and in fact wrong too - if the backend device doesn't exist then QEMU will fail opening it. Might as well signal error here. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 34b50ddd1d..9d47803fce 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -630,8 +630,7 @@ qemuSetupRNGCgroup(virDomainObj *vm, virDomainAuditCgroupPath(vm, priv->cgroup, "allow", rng->source.file, "rw", rv); - if (rv < 0 && - !virLastErrorIsSystemErrno(ENOENT)) + if (rv < 0) return -1; } @@ -657,8 +656,7 @@ qemuTeardownRNGCgroup(virDomainObj *vm, virDomainAuditCgroupPath(vm, priv->cgroup, "deny", rng->source.file, "rw", rv); - if (rv < 0 && - !virLastErrorIsSystemErrno(ENOENT)) + if (rv < 0) return -1; } -- 2.34.1

In all cases virCgroupAllowDevicePath() is followed by virDomainAuditCgroupPath(). Might as well pack that into one function and call it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 127 +++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 82 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 9d47803fce..258172c5a5 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -54,6 +54,26 @@ const char *const defaultDeviceACL[] = { #define DEVICE_SND_MAJOR 116 +static int +qemuCgroupAllowDevicePath(virDomainObj *vm, + const char *path, + int perms, + bool ignoreEacces) +{ + qemuDomainObjPrivate *priv = vm->privateData; + int ret; + + VIR_DEBUG("Allow path %s, perms: %s", + path, virCgroupGetDevicePermsString(perms)); + + ret = virCgroupAllowDevicePath(priv->cgroup, path, perms, ignoreEacces); + + virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path, + virCgroupGetDevicePermsString(perms), ret); + return ret; +} + + static int qemuSetupImagePathCgroup(virDomainObj *vm, const char *path, @@ -71,14 +91,7 @@ qemuSetupImagePathCgroup(virDomainObj *vm, if (!readonly) perms |= VIR_CGROUP_DEVICE_WRITE; - VIR_DEBUG("Allow path %s, perms: %s", - path, virCgroupGetDevicePermsString(perms)); - - rv = virCgroupAllowDevicePath(priv->cgroup, path, perms, true); - - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path, - virCgroupGetDevicePermsString(perms), - rv); + rv = qemuCgroupAllowDevicePath(vm, path, perms, true); if (rv < 0) return -1; @@ -96,12 +109,7 @@ qemuSetupImagePathCgroup(virDomainObj *vm, } for (n = targetPaths; n; n = n->next) { - rv = virCgroupAllowDevicePath(priv->cgroup, n->data, perms, false); - - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", n->data, - virCgroupGetDevicePermsString(perms), - rv); - if (rv < 0) + if (qemuCgroupAllowDevicePath(vm, n->data, perms, false) < 0) return -1; } @@ -278,7 +286,6 @@ qemuSetupChrSourceCgroup(virDomainObj *vm, virDomainChrSourceDef *source) { qemuDomainObjPrivate *priv = vm->privateData; - int ret; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; @@ -288,12 +295,8 @@ qemuSetupChrSourceCgroup(virDomainObj *vm, VIR_DEBUG("Process path '%s' for device", source->data.file.path); - ret = virCgroupAllowDevicePath(priv->cgroup, source->data.file.path, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", - source->data.file.path, "rw", ret); - - return ret; + return qemuCgroupAllowDevicePath(vm, source->data.file.path, + VIR_CGROUP_DEVICE_RW, false); } @@ -361,10 +364,8 @@ qemuSetupInputCgroup(virDomainObj *vm, switch (dev->type) { case VIR_DOMAIN_INPUT_TYPE_PASSTHROUGH: case VIR_DOMAIN_INPUT_TYPE_EVDEV: - VIR_DEBUG("Process path '%s' for input device", dev->source.evdev); - ret = virCgroupAllowDevicePath(priv->cgroup, dev->source.evdev, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", dev->source.evdev, "rw", ret); + return qemuCgroupAllowDevicePath(vm, dev->source.evdev, + VIR_CGROUP_DEVICE_RW, false); break; } @@ -413,7 +414,6 @@ qemuSetupHostdevCgroup(virDomainObj *vm, qemuDomainObjPrivate *priv = vm->privateData; g_autofree char *path = NULL; int perms; - int rv; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; @@ -421,24 +421,15 @@ qemuSetupHostdevCgroup(virDomainObj *vm, if (qemuDomainGetHostdevPath(dev, &path, &perms) < 0) return -1; - if (path) { - VIR_DEBUG("Cgroup allow %s perms=%d", path, perms); - rv = virCgroupAllowDevicePath(priv->cgroup, path, perms, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path, - virCgroupGetDevicePermsString(perms), - rv); - if (rv < 0) - return -1; + if (path && + qemuCgroupAllowDevicePath(vm, path, perms, false) < 0) { + return -1; } - if (qemuHostdevNeedsVFIO(dev)) { - VIR_DEBUG("Cgroup allow %s perms=%d", QEMU_DEV_VFIO, VIR_CGROUP_DEVICE_RW); - rv = virCgroupAllowDevicePath(priv->cgroup, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", - QEMU_DEV_VFIO, "rw", rv); - if (rv < 0) - return -1; + if (qemuHostdevNeedsVFIO(dev) && + qemuCgroupAllowDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; } return 0; @@ -510,7 +501,6 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm, virDomainMemoryDef *mem) { qemuDomainObjPrivate *priv = vm->privateData; - int rv; if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM && mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM) @@ -519,13 +509,8 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->nvdimmPath); - rv = virCgroupAllowDevicePath(priv->cgroup, mem->nvdimmPath, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", - mem->nvdimmPath, "rw", rv); - - return rv; + return qemuCgroupAllowDevicePath(vm, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RW, false); } @@ -557,17 +542,12 @@ qemuSetupGraphicsCgroup(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; const char *rendernode = virDomainGraphicsGetRenderNode(gfx); - int ret; if (!rendernode || !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - ret = virCgroupAllowDevicePath(priv->cgroup, rendernode, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", rendernode, - "rw", ret); - return ret; + return qemuCgroupAllowDevicePath(vm, rendernode, VIR_CGROUP_DEVICE_RW, false); } @@ -577,7 +557,6 @@ qemuSetupVideoCgroup(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; virDomainVideoAccelDef *accel = def->accel; - int ret; if (!accel) return 0; @@ -586,11 +565,8 @@ qemuSetupVideoCgroup(virDomainObj *vm, !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - ret = virCgroupAllowDevicePath(priv->cgroup, accel->rendernode, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", accel->rendernode, - "rw", ret); - return ret; + return qemuCgroupAllowDevicePath(vm, accel->rendernode, + VIR_CGROUP_DEVICE_RW, false); } static int @@ -617,21 +593,14 @@ qemuSetupRNGCgroup(virDomainObj *vm, virDomainRNGDef *rng) { qemuDomainObjPrivate *priv = vm->privateData; - int rv; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - if (rng->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM) { - VIR_DEBUG("Setting Cgroup ACL for RNG device"); - rv = virCgroupAllowDevicePath(priv->cgroup, - rng->source.file, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", - rng->source.file, - "rw", rv); - if (rv < 0) - return -1; + if (rng->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM && + qemuCgroupAllowDevicePath(vm, rng->source.file, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; } return 0; @@ -684,16 +653,12 @@ static int qemuSetupSEVCgroup(virDomainObj *vm) { qemuDomainObjPrivate *priv = vm->privateData; - int ret; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - ret = virCgroupAllowDevicePath(priv->cgroup, "/dev/sev", - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", "/dev/sev", - "rw", ret); - return ret; + return qemuCgroupAllowDevicePath(vm, "/dev/sev", + VIR_CGROUP_DEVICE_RW, false); } static int @@ -759,9 +724,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm) continue; } - rv = virCgroupAllowDevicePath(priv->cgroup, deviceACL[i], - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "allow", deviceACL[i], "rw", rv); + rv = qemuCgroupAllowDevicePath(vm, deviceACL[i], VIR_CGROUP_DEVICE_RW, false); if (rv < 0 && !virLastErrorIsSystemErrno(ENOENT)) return -1; -- 2.34.1

In all cases virCgroupDenyDevicePath() is followed by virDomainAuditCgroupPath(). Might as well pack that into one function and call it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 106 +++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 62 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 258172c5a5..c46e7878bc 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -74,6 +74,26 @@ qemuCgroupAllowDevicePath(virDomainObj *vm, } +static int +qemuCgroupDenyDevicePath(virDomainObj *vm, + const char *path, + int perms, + bool ignoreEacces) +{ + qemuDomainObjPrivate *priv = vm->privateData; + int ret; + + VIR_DEBUG("Deny path %s, perms: %s", + path, virCgroupGetDevicePermsString(perms)); + + ret = virCgroupDenyDevicePath(priv->cgroup, path, perms, ignoreEacces); + + virDomainAuditCgroupPath(vm, priv->cgroup, "deny", path, + virCgroupGetDevicePermsString(perms), ret); + return ret; +} + + static int qemuSetupImagePathCgroup(virDomainObj *vm, const char *path, @@ -199,10 +219,8 @@ qemuTeardownImageCgroup(virDomainObj *vm, if (!hasNVMe && !qemuDomainNeedsVFIO(vm->def)) { - ret = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, perms, true); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - QEMU_DEV_VFIO, - virCgroupGetDevicePermsString(perms), ret); + ret = qemuCgroupDenyDevicePath(vm, QEMU_DEV_VFIO, perms, true); + if (ret < 0) return -1; } @@ -218,23 +236,16 @@ qemuTeardownImageCgroup(virDomainObj *vm, if (!hasPR && virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) { - VIR_DEBUG("Disabling device mapper control"); - ret = virCgroupDenyDevicePath(priv->cgroup, - QEMU_DEVICE_MAPPER_CONTROL_PATH, - perms, true); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - QEMU_DEVICE_MAPPER_CONTROL_PATH, - virCgroupGetDevicePermsString(perms), ret); + ret = qemuCgroupDenyDevicePath(vm, QEMU_DEVICE_MAPPER_CONTROL_PATH, + perms, true); + if (ret < 0) return ret; } VIR_DEBUG("Deny path %s", path); - ret = virCgroupDenyDevicePath(priv->cgroup, path, perms, true); - - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", path, - virCgroupGetDevicePermsString(perms), ret); + ret = qemuCgroupDenyDevicePath(vm, path, perms, true); /* If you're looking for a counter part to * qemuSetupImagePathCgroup you're at the right place. @@ -305,7 +316,6 @@ qemuTeardownChrSourceCgroup(virDomainObj *vm, virDomainChrSourceDef *source) { qemuDomainObjPrivate *priv = vm->privateData; - int ret; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; @@ -315,12 +325,8 @@ qemuTeardownChrSourceCgroup(virDomainObj *vm, VIR_DEBUG("Process path '%s' for device", source->data.file.path); - ret = virCgroupDenyDevicePath(priv->cgroup, source->data.file.path, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - source->data.file.path, "rw", ret); - - return ret; + return qemuCgroupDenyDevicePath(vm, source->data.file.path, + VIR_CGROUP_DEVICE_RW, false); } @@ -378,7 +384,6 @@ qemuTeardownInputCgroup(virDomainObj *vm, virDomainInputDef *dev) { qemuDomainObjPrivate *priv = vm->privateData; - int ret = 0; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; @@ -386,14 +391,12 @@ qemuTeardownInputCgroup(virDomainObj *vm, switch (dev->type) { case VIR_DOMAIN_INPUT_TYPE_PASSTHROUGH: case VIR_DOMAIN_INPUT_TYPE_EVDEV: - VIR_DEBUG("Process path '%s' for input device", dev->source.evdev); - ret = virCgroupDenyDevicePath(priv->cgroup, dev->source.evdev, - VIR_CGROUP_DEVICE_RWM, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", dev->source.evdev, "rwm", ret); + return qemuCgroupDenyDevicePath(vm, dev->source.evdev, + VIR_CGROUP_DEVICE_RWM, false); break; } - return ret; + return 0; } @@ -453,7 +456,6 @@ qemuTeardownHostdevCgroup(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; g_autofree char *path = NULL; - int rv; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; @@ -471,25 +473,16 @@ qemuTeardownHostdevCgroup(virDomainObj *vm, if (qemuDomainGetHostdevPath(dev, &path, NULL) < 0) return -1; - if (path) { - VIR_DEBUG("Cgroup deny %s", path); - rv = virCgroupDenyDevicePath(priv->cgroup, path, - VIR_CGROUP_DEVICE_RWM, false); - virDomainAuditCgroupPath(vm, priv->cgroup, - "deny", path, "rwm", rv); - if (rv < 0) - return -1; + if (path && + qemuCgroupDenyDevicePath(vm, path, VIR_CGROUP_DEVICE_RWM, false) < 0) { + return -1; } if (qemuHostdevNeedsVFIO(dev) && - !qemuDomainNeedsVFIO(vm->def)) { - VIR_DEBUG("Cgroup deny " QEMU_DEV_VFIO); - rv = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, - VIR_CGROUP_DEVICE_RWM, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - QEMU_DEV_VFIO, "rwm", rv); - if (rv < 0) - return -1; + !qemuDomainNeedsVFIO(vm->def) && + qemuCgroupDenyDevicePath(vm, QEMU_DEV_VFIO, + VIR_CGROUP_DEVICE_RWM, false) < 0) { + return -1; } return 0; @@ -519,7 +512,6 @@ qemuTeardownMemoryDevicesCgroup(virDomainObj *vm, virDomainMemoryDef *mem) { qemuDomainObjPrivate *priv = vm->privateData; - int rv; if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM && mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM) @@ -528,11 +520,8 @@ qemuTeardownMemoryDevicesCgroup(virDomainObj *vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - rv = virCgroupDenyDevicePath(priv->cgroup, mem->nvdimmPath, - VIR_CGROUP_DEVICE_RWM, false); - virDomainAuditCgroupPath(vm, priv->cgroup, - "deny", mem->nvdimmPath, "rwm", rv); - return rv; + return qemuCgroupDenyDevicePath(vm, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RWM, false); } @@ -612,21 +601,14 @@ qemuTeardownRNGCgroup(virDomainObj *vm, virDomainRNGDef *rng) { qemuDomainObjPrivate *priv = vm->privateData; - int rv; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; - if (rng->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM) { - VIR_DEBUG("Tearing down Cgroup ACL for RNG device"); - rv = virCgroupDenyDevicePath(priv->cgroup, - rng->source.file, - VIR_CGROUP_DEVICE_RW, false); - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - rng->source.file, - "rw", rv); - if (rv < 0) - return -1; + if (rng->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM && + qemuCgroupDenyDevicePath(vm, rng->source.file, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; } return 0; -- 2.34.1

On domain startup a couple of devices are allowed in the devices controller no matter the domain configuration. The aim is to allow devices crucial for QEMU or one of its libraries, or user is passing through a device (e.g. through additional cmd line arguments) and wants QEMU to access it. However, during unplug it may happen that a device is configured to use one of such devices and since we deny /dev nodes on hotplug we would deny such device too. For example, /dev/urandom belongs onto the list of implicit devices and users can hotplug and hotunplug an RNG device with /dev/urandom as backend. The fix is fortunately simple - just consult the list of implicit devices before removing the device from the namespace. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index c46e7878bc..aa0c927578 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -81,8 +81,19 @@ qemuCgroupDenyDevicePath(virDomainObj *vm, bool ignoreEacces) { qemuDomainObjPrivate *priv = vm->privateData; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); + const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL; int ret; + if (!deviceACL) + deviceACL = defaultDeviceACL; + + if (g_strv_contains(deviceACL, path)) { + VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL", + path); + return 0; + } + VIR_DEBUG("Deny path %s, perms: %s", path, virCgroupGetDevicePermsString(perms)); -- 2.34.1

On Tue, Mar 15, 2022 at 05:07:15PM +0100, Michal Privoznik wrote:
These are inspired by my earlier patches that solved the same issue for namespaces:
https://listman.redhat.com/archives/libvir-list/2022-March/229266.html
Michal Prívozník (4): qemu_cgroup: Drop ENOENT special case for RNG devices qemu_cgroup: Introduce and use qemuCgroupAllowDevicePath() qemu_cgroup: Introduce and use qemuCgroupDenyDevicePath() qemu_cgroup: Don't deny devices from cgroupDeviceACL
Reviewed-by: Pavel Hrdina <phrdina@redhat.com>
participants (2)
-
Michal Privoznik
-
Pavel Hrdina