On 10/20/2025 8:07 AM, Michal Prívozník wrote:
On 10/17/25 18:54, Praveen K Paladugu wrote:
A domain that runs with TCG emulation does not need kvm device, so drop it from default device ACL.
To dynamically add devices to defaultDeviceACL, make it a GSList. This variable will be initialized when qemu driver is initalized.
Lastly, dynamically append /dev/kvm to default ACL only if the domain is of type VIR_DOMAIN_VIRT_KVM.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/qemu/qemu.conf.in | 3 +- src/qemu/qemu_cgroup.c | 52 ++++++++++++++++++++++-------- src/qemu/qemu_cgroup.h | 5 ++- src/qemu/qemu_conf.c | 14 ++++++-- src/qemu/qemu_conf.h | 2 +- src/qemu/qemu_driver.c | 4 +++ src/qemu/qemu_namespace.c | 12 +++---- src/qemu/qemu_process.c | 6 ++-- src/qemu/test_libvirtd_qemu.aug.in | 3 +- 9 files changed, 71 insertions(+), 30 deletions(-)
diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in index fc91ba8f08..0a8abd9544 100644 --- a/src/qemu/qemu.conf.in +++ b/src/qemu/qemu.conf.in @@ -618,8 +618,7 @@ #cgroup_device_acl = [ # "/dev/null", "/dev/full", "/dev/zero", # "/dev/random", "/dev/urandom", -# "/dev/ptmx", "/dev/kvm", -# "/dev/userfaultfd" +# "/dev/ptmx", "/dev/userfaultfd" #] # # RDMA migration requires the following extra files to be added to the list: diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index f10976c2b0..b2dcefd81e 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -38,17 +38,38 @@
VIR_LOG_INIT("qemu.qemu_cgroup");
-const char *const defaultDeviceACL[] = { +GSList *defaultDeviceACL; + +const char *const _defaultDeviceACL[] = { "/dev/null", "/dev/full", "/dev/zero", "/dev/random", "/dev/urandom", - "/dev/ptmx", "/dev/kvm", - "/dev/userfaultfd", + "/dev/ptmx", "/dev/userfaultfd", NULL, }; #define DEVICE_PTY_MAJOR 136 #define DEVICE_SND_MAJOR 116
+void +initDefaultDeviceACL(void) +{ + size_t i; + + for (i = 0; _defaultDeviceACL[i] != NULL; i++) { + defaultDeviceACL = g_slist_append(defaultDeviceACL, + g_strdup(_defaultDeviceACL[i])); + } +} + +void +updateDefaultDeviceACL(virDomainObj *vm) +{ + if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) { + defaultDeviceACL = g_slist_append(defaultDeviceACL, + g_strdup("/dev/kvm")); + } +}
So if this function is called multiple times then "/dev/kvm" will appear multiple times on the list. Worse, if the first VM is of KVM type, then /dev/kvm is added onto the list and the accelerator of the second one is irrelevant as /dev/kvm will be allowed in its devices CGroup controller.
What you want to do is:
1) remove "/dev/kvm" from the defaultDeviceACL array, 2) audit and fix all users of defaultDeviceACL and let them act accordingly if domain is of KVM type. For instance, qemuSetupDevicesCgroup() will need to have something like:
qemuSetupDevicesCgroup(vm) { ... qemuCgroupAllowDevicesPaths(vm, deviceACL, VIR_CGROUP_DEVICE_RW, false);
if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) { qemuCgroupAllowDevicePath(vm, "/dev/kvm", VIR_CGROUP_DEVICE_RW, false); } ... }
/* Modulo error checking, code fromatting, etc. */
+ static int qemuCgroupAllowDevicePath(virDomainObj *vm, const char *path, @@ -71,19 +92,19 @@ qemuCgroupAllowDevicePath(virDomainObj *vm,
static int qemuCgroupAllowDevicesPaths(virDomainObj *vm, - const char *const *deviceACL, + GSList *deviceACL, int perms, bool ignoreEacces) { - size_t i; + GSList *cur = NULL;
- for (i = 0; deviceACL[i] != NULL; i++) { - if (!virFileExists(deviceACL[i])) { - VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]); + for (cur = deviceACL; cur; cur = g_slist_next(cur)) { + if (!virFileExists(cur->data)) { + VIR_DEBUG("Ignoring non-existent device %s", (char *)cur->data); continue; }
- if (qemuCgroupAllowDevicePath(vm, deviceACL[i], perms, ignoreEacces) < 0) + if (qemuCgroupAllowDevicePath(vm, cur->data, perms, ignoreEacces) < 0) return -1; }
@@ -99,13 +120,13 @@ qemuCgroupDenyDevicePath(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - const char *const *deviceACL = (const char *const *)cfg->cgroupDeviceACL; int ret; + GSList *deviceACL = cfg->cgroupDeviceACL;
if (!deviceACL) deviceACL = defaultDeviceACL;
- if (g_strv_contains(deviceACL, path)) { + if (g_slist_find(deviceACL, path)) { VIR_DEBUG("Skipping deny of path %s in CGroups because it's in cgroupDeviceACL", path); return 0; @@ -556,8 +577,11 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm, virDomainMemoryDef *mem) { qemuDomainObjPrivate *priv = vm->privateData; - const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC, - QEMU_DEV_SGX_PROVISION, NULL }; + g_autoptr(virGSListString) sgxPaths = NULL; + + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_VEPVC)); + sgxPaths = g_slist_append(sgxPaths, g_strdup(QEMU_DEV_SGX_PROVISION)); + sgxPaths = g_slist_append(sgxPaths, NULL);
if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) return 0;
This change is unrelated (and needless anyway). With current code, the function requires 3 pointers on stack, with your change, 1 pointer is required on stack and 6 pointers on the heap and two strdup()-s. IOW at least two times more space.
This change was made to adopt the new definition of qemuCgroupAllowDevicesPaths which takes GSList* as an argument in place of "char *const *deviceACL". I agree with your feedback above that using a GSList for defaultDeviceACL and appending to the list will have undesired effects. I will refactor this patch in the next revision.
Michal
-- Regards, Praveen K Paladugu