[libvirt] [PATCH 0/2] Fix case with non-root domain and hugepages

Yet another bug found due to my work on containerizing qemu. Michal Privoznik (2): qemu: Create hugepage path on per domain basis security: Implement virSecurityManagerSetHugepages src/qemu/qemu_command.c | 4 +- src/qemu/qemu_conf.c | 45 ++++++++++++++++------ src/qemu/qemu_conf.h | 16 +++++--- src/qemu/qemu_driver.c | 19 +++------ src/qemu/qemu_process.c | 25 +++++++++++- src/security/security_dac.c | 11 ++++++ src/security/security_selinux.c | 10 +++++ .../qemuxml2argv-hugepages-numa.args | 4 +- .../qemuxml2argv-hugepages-pages.args | 14 +++---- .../qemuxml2argv-hugepages-pages2.args | 2 +- .../qemuxml2argv-hugepages-pages3.args | 2 +- .../qemuxml2argv-hugepages-pages5.args | 2 +- .../qemuxml2argv-hugepages-shared.args | 12 +++--- tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- .../qemuxml2argv-memory-hotplug-dimm-addr.args | 4 +- .../qemuxml2argv-memory-hotplug-dimm.args | 4 +- 16 files changed, 118 insertions(+), 58 deletions(-) -- 2.8.4

If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path: /hugepageMount/libvirt/qemu Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.: /hugepageMount/libvirt/qemu/domainName and chown domainName dir to the user that domain is configured to run under. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 +- src/qemu/qemu_conf.c | 45 ++++++++++++++++------ src/qemu/qemu_conf.h | 16 +++++--- src/qemu/qemu_driver.c | 19 +++------ src/qemu/qemu_process.c | 25 +++++++++++- .../qemuxml2argv-hugepages-numa.args | 4 +- .../qemuxml2argv-hugepages-pages.args | 14 +++---- .../qemuxml2argv-hugepages-pages2.args | 2 +- .../qemuxml2argv-hugepages-pages3.args | 2 +- .../qemuxml2argv-hugepages-pages5.args | 2 +- .../qemuxml2argv-hugepages-shared.args | 12 +++--- tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- .../qemuxml2argv-memory-hotplug-dimm-addr.args | 4 +- .../qemuxml2argv-memory-hotplug-dimm.args | 4 +- 14 files changed, 97 insertions(+), 58 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 4a5fce3..e59aef0 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3303,7 +3303,7 @@ qemuBuildMemoryBackendStr(unsigned long long size, return -1; if (pagesize) { - if (qemuGetHupageMemPath(cfg, pagesize, &mem_path) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) goto cleanup; *backendType = "memory-backend-file"; @@ -7178,7 +7178,7 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, return -1; } - if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, def->mem.hugepages[0].size, &mem_path) < 0) return -1; virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path, NULL); diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 0ed88f5..942ad86 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1457,7 +1457,7 @@ qemuTranslateSnapshotDiskSourcePool(virConnectPtr conn ATTRIBUTE_UNUSED, } char * -qemuGetHugepagePath(virHugeTLBFSPtr hugepage) +qemuGetBaseHugepagePath(virHugeTLBFSPtr hugepage) { char *ret; @@ -1468,8 +1468,26 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) } +char * +qemuGetDomainHugepagePath(const virDomainDef *def, + virHugeTLBFSPtr hugepage) +{ + char *base = qemuGetBaseHugepagePath(hugepage); + char *ret; + + if (!base || + virAsprintf(&ret, "%s/%s", base, def->name) < 0) { + VIR_FREE(base); + return NULL; + } + + return ret; +} + + /** - * qemuGetDefaultHugepath: + * qemuGetDomainDefaultHugepath: + * @def: domain definition * @hugetlbfs: array of configured hugepages * @nhugetlbfs: number of item in the array * @@ -1478,8 +1496,9 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) * Returns 0 on success, -1 otherwise. * */ char * -qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, - size_t nhugetlbfs) +qemuGetDomainDefaultHugepath(const virDomainDef *def, + virHugeTLBFSPtr hugetlbfs, + size_t nhugetlbfs) { size_t i; @@ -1490,12 +1509,12 @@ qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, if (i == nhugetlbfs) i = 0; - return qemuGetHugepagePath(&hugetlbfs[i]); + return qemuGetDomainHugepagePath(def, &hugetlbfs[i]); } /** - * qemuGetHupageMemPath: Construct HP enabled memory backend path + * qemuGetDomainHupageMemPath: Construct HP enabled memory backend path * * If no specific hugepage size is requested (@pagesize is zero) * the default hugepage size is used). @@ -1505,9 +1524,10 @@ qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, * -1 otherwise. */ int -qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, - unsigned long long pagesize, - char **memPath) +qemuGetDomainHupageMemPath(const virDomainDef *def, + virQEMUDriverConfigPtr cfg, + unsigned long long pagesize, + char **memPath) { size_t i = 0; @@ -1519,8 +1539,9 @@ qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, } if (!pagesize) { - if (!(*memPath = qemuGetDefaultHugepath(cfg->hugetlbfs, - cfg->nhugetlbfs))) + if (!(*memPath = qemuGetDomainDefaultHugepath(def, + cfg->hugetlbfs, + cfg->nhugetlbfs))) return -1; } else { for (i = 0; i < cfg->nhugetlbfs; i++) { @@ -1536,7 +1557,7 @@ qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, return -1; } - if (!(*memPath = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) + if (!(*memPath = qemuGetDomainHugepagePath(def, &cfg->hugetlbfs[i]))) return -1; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index f6e3257..d191e10 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -335,11 +335,15 @@ virDomainXMLOptionPtr virQEMUDriverCreateXMLConf(virQEMUDriverPtr driver); int qemuTranslateSnapshotDiskSourcePool(virConnectPtr conn, virDomainSnapshotDiskDefPtr def); -char * qemuGetHugepagePath(virHugeTLBFSPtr hugepage); -char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, - size_t nhugetlbfs); +char * qemuGetBaseHugepagePath(virHugeTLBFSPtr hugepage); +char * qemuGetDomainHugepagePath(const virDomainDef *def, + virHugeTLBFSPtr hugepage); +char * qemuGetDomainDefaultHugepath(const virDomainDef *def, + virHugeTLBFSPtr hugetlbfs, + size_t nhugetlbfs); -int qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, - unsigned long long pagesize, - char **memPath); +int qemuGetDomainHupageMemPath(const virDomainDef *def, + virQEMUDriverConfigPtr cfg, + unsigned long long pagesize, + char **memPath); #endif /* __QEMUD_CONF_H */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d039255..5d9ab2b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -852,7 +852,7 @@ qemuStateInitialize(bool privileged, * it, since we can't assume the root mount point has permissions that * will let our spawned QEMU instances use it. */ for (i = 0; i < cfg->nhugetlbfs; i++) { - hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]); + hugepagePath = qemuGetBaseHugepagePath(&cfg->hugetlbfs[i]); if (!hugepagePath) goto error; @@ -863,19 +863,10 @@ qemuStateInitialize(bool privileged, hugepagePath); goto error; } - if (privileged) { - if (virFileUpdatePerm(cfg->hugetlbfs[i].mnt_dir, - 0, S_IXGRP | S_IXOTH) < 0) - goto error; - if (chown(hugepagePath, cfg->user, cfg->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership on %s to %d:%d"), - hugepagePath, - (int) cfg->user, - (int) cfg->group); - goto error; - } - } + if (privileged && + virFileUpdatePerm(cfg->hugetlbfs[i].mnt_dir, + 0, S_IXGRP | S_IXOTH) < 0) + goto error; VIR_FREE(hugepagePath); } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3552a31..3bd3368 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -5273,11 +5273,19 @@ qemuProcessPrepareHost(virQEMUDriverPtr driver, if (vm->def->mem.nhugepages) { for (i = 0; i < cfg->nhugetlbfs; i++) { - char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]); + char *hugepagePath = qemuGetDomainHugepagePath(vm->def, &cfg->hugetlbfs[i]); if (!hugepagePath) goto cleanup; + if (virFileMakePathWithMode(hugepagePath, 0700) < 0) { + virReportSystemError(errno, + _("Unable to create %s"), + hugepagePath); + VIR_FREE(hugepagePath); + goto cleanup; + } + if (virSecurityManagerSetHugepages(driver->securityManager, vm->def, hugepagePath) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -6146,6 +6154,21 @@ void qemuProcessStop(virQEMUDriverPtr driver, networkReleaseActualDevice(vm->def, net); } + if (vm->def->mem.nhugepages) { + for (i = 0; i < cfg->nhugetlbfs; i++) { + char *hugepagePath = qemuGetDomainHugepagePath(vm->def, &cfg->hugetlbfs[i]); + + if (!hugepagePath) + continue; + + if (rmdir(hugepagePath) < 0) + VIR_WARN("Unable to remove hugepage path: %s (errno=%d)", + hugepagePath, errno); + + VIR_FREE(hugepagePath); + } + } + retry: if ((ret = qemuRemoveCgroup(vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-numa.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-numa.args index 7b90784..bd562a8 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-numa.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-numa.args @@ -11,10 +11,10 @@ QEMU_AUDIO_DRV=spice \ -m size=1048576k,slots=16,maxmem=1099511627776k \ -smp 2,sockets=2,cores=1,threads=1 \ -mem-prealloc \ --mem-path /dev/hugepages2M/libvirt/qemu \ +-mem-path /dev/hugepages2M/libvirt/qemu/fedora \ -numa node,nodeid=0,cpus=0-1,mem=1024 \ -object memory-backend-file,id=memdimm0,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=1-3,\ +mem-path=/dev/hugepages1G/libvirt/qemu/fedora,size=1073741824,host-nodes=1-3,\ policy=bind \ -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -uuid 63840878-0deb-4095-97e6-fc444d9bc9fa \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.args index 9f0e696..e0aa151 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.args @@ -11,19 +11,19 @@ QEMU_AUDIO_DRV=none \ -m 4096 \ -smp 4,sockets=4,cores=1,threads=1 \ -object memory-backend-file,id=ram-node0,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=0-3,\ -policy=bind \ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,size=1073741824,\ +host-nodes=0-3,policy=bind \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-backend-file,id=ram-node1,prealloc=yes,\ -mem-path=/dev/hugepages2M/libvirt/qemu,size=1073741824,host-nodes=0-3,\ -policy=bind \ +mem-path=/dev/hugepages2M/libvirt/qemu/QEMUGuest1,size=1073741824,\ +host-nodes=0-3,policy=bind \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 \ -object memory-backend-file,id=ram-node2,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=0-3,\ -policy=bind \ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,size=1073741824,\ +host-nodes=0-3,policy=bind \ -numa node,nodeid=2,cpus=2,memdev=ram-node2 \ -object memory-backend-file,id=ram-node3,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=3,\ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,size=1073741824,host-nodes=3,\ policy=bind \ -numa node,nodeid=3,cpus=3,memdev=ram-node3 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages2.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages2.args index a37f03d..b4cd088 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages2.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages2.args @@ -11,7 +11,7 @@ QEMU_AUDIO_DRV=none \ -m 1024 \ -smp 2,sockets=2,cores=1,threads=1 \ -mem-prealloc \ --mem-path /dev/hugepages2M/libvirt/qemu \ +-mem-path /dev/hugepages2M/libvirt/qemu/SomeDummyHugepagesGuest \ -numa node,nodeid=0,cpus=0,mem=256 \ -numa node,nodeid=1,cpus=1,mem=768 \ -uuid ef1bdff4-27f3-4e85-a807-5fb4d58463cc \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages3.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages3.args index 57dd3fa..bf28d74 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages3.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages3.args @@ -13,7 +13,7 @@ QEMU_AUDIO_DRV=none \ -object memory-backend-ram,id=ram-node0,size=268435456 \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-backend-file,id=ram-node1,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=805306368 \ +mem-path=/dev/hugepages1G/libvirt/qemu/SomeDummyHugepagesGuest,size=805306368 \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 \ -uuid ef1bdff4-27f3-4e85-a807-5fb4d58463cc \ -nographic \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args index a826149..a0a6506 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args @@ -10,7 +10,7 @@ QEMU_AUDIO_DRV=none \ -M pc \ -m 1024 \ -mem-prealloc \ --mem-path /dev/hugepages2M/libvirt/qemu \ +-mem-path /dev/hugepages2M/libvirt/qemu/SomeDummyHugepagesGuest \ -smp 2,sockets=2,cores=1,threads=1 \ -uuid ef1bdff4-27f3-4e85-a807-5fb4d58463cc \ -nographic \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-shared.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-shared.args index f9fc218..03d736a 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-shared.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-shared.args @@ -11,19 +11,19 @@ QEMU_AUDIO_DRV=none \ -m 4096 \ -smp 4,sockets=4,cores=1,threads=1 \ -object memory-backend-file,id=ram-node0,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=0-3,\ -policy=bind \ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,size=1073741824,\ +host-nodes=0-3,policy=bind \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-backend-file,id=ram-node1,prealloc=yes,\ -mem-path=/dev/hugepages2M/libvirt/qemu,share=yes,size=1073741824,\ +mem-path=/dev/hugepages2M/libvirt/qemu/QEMUGuest1,share=yes,size=1073741824,\ host-nodes=0-3,policy=bind \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 \ -object memory-backend-file,id=ram-node2,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,share=no,size=1073741824,host-nodes=0-3,\ -policy=bind \ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,share=no,size=1073741824,\ +host-nodes=0-3,policy=bind \ -numa node,nodeid=2,cpus=2,memdev=ram-node2 \ -object memory-backend-file,id=ram-node3,prealloc=yes,\ -mem-path=/dev/hugepages1G/libvirt/qemu,size=1073741824,host-nodes=3,\ +mem-path=/dev/hugepages1G/libvirt/qemu/QEMUGuest1,size=1073741824,host-nodes=3,\ policy=bind \ -numa node,nodeid=3,cpus=3,memdev=ram-node3 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args index 361c8e5..2d437da 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args @@ -10,7 +10,7 @@ QEMU_AUDIO_DRV=none \ -M pc \ -m 214 \ -mem-prealloc \ --mem-path /dev/hugepages2M/libvirt/qemu \ +-mem-path /dev/hugepages2M/libvirt/qemu/QEMUGuest1 \ -smp 1,sockets=1,cores=1,threads=1 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -nographic \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm-addr.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm-addr.args index fdbb4c3..b635327 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm-addr.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm-addr.args @@ -12,8 +12,8 @@ QEMU_AUDIO_DRV=none \ -smp 2,sockets=2,cores=1,threads=1 \ -numa node,nodeid=0,cpus=0-1,mem=214 \ -object memory-backend-file,id=memdimm0,prealloc=yes,\ -mem-path=/dev/hugepages2M/libvirt/qemu,size=536870912,host-nodes=1-3,\ -policy=bind \ +mem-path=/dev/hugepages2M/libvirt/qemu/QEMUGuest1,size=536870912,\ +host-nodes=1-3,policy=bind \ -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0,addr=4294967296 \ -object memory-backend-ram,id=memdimm2,size=536870912 \ -device pc-dimm,node=0,memdev=memdimm2,id=dimm2,slot=2 \ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm.args index 1587aba..10d8593 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-dimm.args @@ -14,8 +14,8 @@ QEMU_AUDIO_DRV=none \ -object memory-backend-ram,id=memdimm0,size=536870912 \ -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -object memory-backend-file,id=memdimm1,prealloc=yes,\ -mem-path=/dev/hugepages2M/libvirt/qemu,size=536870912,host-nodes=1-3,\ -policy=bind \ +mem-path=/dev/hugepages2M/libvirt/qemu/QEMUGuest1,size=536870912,\ +host-nodes=1-3,policy=bind \ -device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -nographic \ -- 2.8.4

On Tue, Nov 22, 2016 at 01:45:42PM +0100, Michal Privoznik wrote:
If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even
It works fine - this functionality has existed for years and apps like OpenStack use it and certainly never run QEMU as root. In qemuStateInitialize we create $MOUNT/libvirt/qemu and chown it to the qemu:qemu user/group pair. That all said....
though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path:
/hugepageMount/libvirt/qemu
Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.:
/hugepageMount/libvirt/qemu/domainName
and chown domainName dir to the user that domain is configured to run under.
...I agree it is better to create a dir per QEMU, since that lets us run each QEMU as a distinct user or group ID. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

On 22.11.2016 13:53, Daniel P. Berrange wrote:
On Tue, Nov 22, 2016 at 01:45:42PM +0100, Michal Privoznik wrote:
If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even
It works fine - this functionality has existed for years and apps like OpenStack use it and certainly never run QEMU as root.
In qemuStateInitialize we create $MOUNT/libvirt/qemu and chown it to the qemu:qemu user/group pair.
Well, this works as long as all the huge page enabled guests are run under the the same user. For instance, if your user/group from qemu.conf is root:root and you have one domain with qemu:qemu (configured via domain XML) it won't start.
That all said....
though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path:
/hugepageMount/libvirt/qemu
Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.:
/hugepageMount/libvirt/qemu/domainName
and chown domainName dir to the user that domain is configured to run under.
...I agree it is better to create a dir per QEMU, since that lets us run each QEMU as a distinct user or group ID.
Exactly. Michal

On Tue, Nov 22, 2016 at 01:45:42PM +0100, Michal Privoznik wrote:
If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even
Surely you mean different than the default user from qemu.conf.
though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path:
/hugepageMount/libvirt/qemu
Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.:
/hugepageMount/libvirt/qemu/domainName
and chown domainName dir to the user that domain is configured to run under.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 +- src/qemu/qemu_conf.c | 45 ++++++++++++++++------ src/qemu/qemu_conf.h | 16 +++++--- src/qemu/qemu_driver.c | 19 +++------ src/qemu/qemu_process.c | 25 +++++++++++- .../qemuxml2argv-hugepages-numa.args | 4 +- .../qemuxml2argv-hugepages-pages.args | 14 +++---- .../qemuxml2argv-hugepages-pages2.args | 2 +- .../qemuxml2argv-hugepages-pages3.args | 2 +- .../qemuxml2argv-hugepages-pages5.args | 2 +- .../qemuxml2argv-hugepages-shared.args | 12 +++--- tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- .../qemuxml2argv-memory-hotplug-dimm-addr.args | 4 +- .../qemuxml2argv-memory-hotplug-dimm.args | 4 +- 14 files changed, 97 insertions(+), 58 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 0ed88f5..942ad86 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1468,8 +1468,26 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) }
+char * +qemuGetDomainHugepagePath(const virDomainDef *def, + virHugeTLBFSPtr hugepage) +{ + char *base = qemuGetBaseHugepagePath(hugepage); + char *ret; + + if (!base || + virAsprintf(&ret, "%s/%s", base, def->name) < 0) { + VIR_FREE(base); + return NULL; + } + + return ret; +} +
You can't simply user the name because our restrictions for the name are too lax. You should get unique directory name usable for this using virDomainObjGetShortName() to make sure the creation doesn't fail. However, that reminds me that you might need to deal with similar thing I had to deal with when adding per-domain subdirectories for private domain paths. You should save the path (or at least the information that the newer path is used) in the domain object and save/restore it in/from the state XML. The way it's implemented now will break for example hotplug of hugepage-backed memory after libvirt upgrade. I know this (and others) are mostly corner-cases, this won't have that many problems as the per-domain path had, but still, not fixing it right away will just increase the complexity of the fix. Martin

On 25.11.2016 09:35, Martin Kletzander wrote:
On Tue, Nov 22, 2016 at 01:45:42PM +0100, Michal Privoznik wrote:
If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even
Surely you mean different than the default user from qemu.conf.
though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path:
/hugepageMount/libvirt/qemu
Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.:
/hugepageMount/libvirt/qemu/domainName
and chown domainName dir to the user that domain is configured to run under.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 +- src/qemu/qemu_conf.c | 45 ++++++++++++++++------ src/qemu/qemu_conf.h | 16 +++++--- src/qemu/qemu_driver.c | 19 +++------ src/qemu/qemu_process.c | 25 +++++++++++- .../qemuxml2argv-hugepages-numa.args | 4 +- .../qemuxml2argv-hugepages-pages.args | 14 +++---- .../qemuxml2argv-hugepages-pages2.args | 2 +- .../qemuxml2argv-hugepages-pages3.args | 2 +- .../qemuxml2argv-hugepages-pages5.args | 2 +- .../qemuxml2argv-hugepages-shared.args | 12 +++--- tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- .../qemuxml2argv-memory-hotplug-dimm-addr.args | 4 +- .../qemuxml2argv-memory-hotplug-dimm.args | 4 +- 14 files changed, 97 insertions(+), 58 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 0ed88f5..942ad86 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1468,8 +1468,26 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) }
+char * +qemuGetDomainHugepagePath(const virDomainDef *def, + virHugeTLBFSPtr hugepage) +{ + char *base = qemuGetBaseHugepagePath(hugepage); + char *ret; + + if (!base || + virAsprintf(&ret, "%s/%s", base, def->name) < 0) { + VIR_FREE(base); + return NULL; + } + + return ret; +} +
You can't simply user the name because our restrictions for the name are too lax. You should get unique directory name usable for this using virDomainObjGetShortName() to make sure the creation doesn't fail.
I thought that when we are using plain domain name for storing domain status XML or pid file that I'm safe here too. But okay, I can change it. I jut hope that by the time the command line is built domain already has id allocated.
However, that reminds me that you might need to deal with similar thing I had to deal with when adding per-domain subdirectories for private domain paths. You should save the path (or at least the information that the newer path is used) in the domain object and save/restore it in/from the state XML. The way it's implemented now will break for example hotplug of hugepage-backed memory after libvirt upgrade.
Not really. We don't expose the path anywhere, and whenever it is needed we construct it. I've tested this and basically the only problem I ran into was that we don't build the path on domain hotplug (rather than on domain startup), but it is trivial to fix. Michal

On Tue, Nov 29, 2016 at 09:28:29AM +0100, Michal Privoznik wrote:
On 25.11.2016 09:35, Martin Kletzander wrote:
On Tue, Nov 22, 2016 at 01:45:42PM +0100, Michal Privoznik wrote:
If you've ever tried running a huge page backed guest under different user than root, you probably failed. Problem is even
Surely you mean different than the default user from qemu.conf.
though we have corresponding APIs in the security drivers, there's no implementation and thus we don't relabel the huge page path. But even if we did, so far all of the domains share the same path:
/hugepageMount/libvirt/qemu
Our only option there would be to set 0777 mode on the qemu dir which is totally unsafe. Therefore, we can create dir on per-domain basis, i.e.:
/hugepageMount/libvirt/qemu/domainName
and chown domainName dir to the user that domain is configured to run under.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 +- src/qemu/qemu_conf.c | 45 ++++++++++++++++------ src/qemu/qemu_conf.h | 16 +++++--- src/qemu/qemu_driver.c | 19 +++------ src/qemu/qemu_process.c | 25 +++++++++++- .../qemuxml2argv-hugepages-numa.args | 4 +- .../qemuxml2argv-hugepages-pages.args | 14 +++---- .../qemuxml2argv-hugepages-pages2.args | 2 +- .../qemuxml2argv-hugepages-pages3.args | 2 +- .../qemuxml2argv-hugepages-pages5.args | 2 +- .../qemuxml2argv-hugepages-shared.args | 12 +++--- tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 2 +- .../qemuxml2argv-memory-hotplug-dimm-addr.args | 4 +- .../qemuxml2argv-memory-hotplug-dimm.args | 4 +- 14 files changed, 97 insertions(+), 58 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 0ed88f5..942ad86 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1468,8 +1468,26 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) }
+char * +qemuGetDomainHugepagePath(const virDomainDef *def, + virHugeTLBFSPtr hugepage) +{ + char *base = qemuGetBaseHugepagePath(hugepage); + char *ret; + + if (!base || + virAsprintf(&ret, "%s/%s", base, def->name) < 0) { + VIR_FREE(base); + return NULL; + } + + return ret; +} +
You can't simply user the name because our restrictions for the name are too lax. You should get unique directory name usable for this using virDomainObjGetShortName() to make sure the creation doesn't fail.
I thought that when we are using plain domain name for storing domain status XML or pid file that I'm safe here too. But okay, I can change it. I jut hope that by the time the command line is built domain already has id allocated.
Oh yeah, I used that because we used a prefix for the name and since you are not doing that here, there's no need for this function to be used. Sorry for the confusion.
However, that reminds me that you might need to deal with similar thing I had to deal with when adding per-domain subdirectories for private domain paths. You should save the path (or at least the information that the newer path is used) in the domain object and save/restore it in/from the state XML. The way it's implemented now will break for example hotplug of hugepage-backed memory after libvirt upgrade.
Not really. We don't expose the path anywhere, and whenever it is needed we construct it. I've tested this and basically the only problem I ran into was that we don't build the path on domain hotplug (rather than on domain startup), but it is trivial to fix.
Yep, since we don't need that, we just need to make sure the path exists (with proper permissions) on hotplug, not only when domain is started. Exactly as you said ;)
Michal

Since its introduction in 2012 this internal API did nothing. Now it's finally the right time to put it into a good use. It's implementation is fairly simple and exactly the same as virSecurityDomainSetPathLabel. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 11 +++++++++++ src/security/security_selinux.c | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index fd74e8b..3fa9df9 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1564,6 +1564,15 @@ virSecurityDACDomainSetPathLabel(virSecurityManagerPtr mgr, return virSecurityDACSetOwnership(priv, NULL, path, user, group); } +static int +virSecurityDACDomainSetHugepages(virSecurityManagerPtr mgr, + virDomainDefPtr def, + const char *path) +{ + return virSecurityDACDomainSetPathLabel(mgr, def, path); +} + + virSecurityDriver virSecurityDriverDAC = { .privateDataLen = sizeof(virSecurityDACData), .name = SECURITY_DAC_NAME, @@ -1613,4 +1622,6 @@ virSecurityDriver virSecurityDriverDAC = { .getBaseLabel = virSecurityDACGetBaseLabel, .domainSetPathLabel = virSecurityDACDomainSetPathLabel, + + .domainSetSecurityHugepages = virSecurityDACDomainSetHugepages, }; diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 89c93dc..0497acc 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -2610,6 +2610,14 @@ virSecuritySELinuxDomainSetPathLabel(virSecurityManagerPtr mgr, return virSecuritySELinuxSetFilecon(mgr, path, seclabel->imagelabel); } +static int +virSecuritySELinuxDomainSetHugepages(virSecurityManagerPtr mgr, + virDomainDefPtr def, + const char *path) +{ + return virSecuritySELinuxDomainSetPathLabel(mgr, def, path); +} + virSecurityDriver virSecurityDriverSELinux = { .privateDataLen = sizeof(virSecuritySELinuxData), .name = SECURITY_SELINUX_NAME, @@ -2656,4 +2664,6 @@ virSecurityDriver virSecurityDriverSELinux = { .getBaseLabel = virSecuritySELinuxGetBaseLabel, .domainSetPathLabel = virSecuritySELinuxDomainSetPathLabel, + + .domainSetSecurityHugepages = virSecuritySELinuxDomainSetHugepages, }; -- 2.8.4

On Tue, Nov 22, 2016 at 01:45:43PM +0100, Michal Privoznik wrote:
Since its introduction in 2012 this internal API did nothing. Now it's finally the right time to put it into a good use. It's implementation is fairly simple and exactly the same as virSecurityDomainSetPathLabel.
Then why do you redefine the function? Why not just say .domainSetSecurityHugepages = …DomainSetPathLabel, or simply: s/virSecurityManagerDomainSetHugepages/virSecurityManagerDomainSetPathLabel/g and zap the former function, qemuProcessPrepareHost() is the only caller anyway. ACK to any of those two (I prefer the latter).
participants (4)
-
Daniel P. Berrange
-
Martin Kletzander
-
Michal Privoznik
-
nert@redhat.com