[libvirt PATCHv2 0/9] support running virtiofsd in session mode

Now with automatic id mapping selection. Ján Tomko (9): conf: move idmap definition earlier conf: move idmap parsing earlier conf: add idmap element to filesystem qemu: format uid/gid map for virtiofs qemu: virtiofs: do not force UID 0 qemu: allow running virtiofsd in session mode util: add virGetSubUIDs qemu: virtiofs: auto-fill idmap for unprivileged use docs: virtiofs: add section about ID remapping docs/formatdomain.rst | 8 + docs/kbase/virtiofs.rst | 19 +++ src/conf/domain_conf.c | 152 ++++++++++++------ src/conf/domain_conf.h | 29 ++-- src/conf/schemas/domaincommon.rng | 3 + src/libvirt_private.syms | 2 + src/qemu/qemu_validate.c | 7 +- src/qemu/qemu_virtiofs.c | 95 ++++++++++- src/util/virutil.c | 70 ++++++++ src/util/virutil.h | 12 ++ .../vhost-user-fs-fd-memory.xml | 4 + tests/utiltest.c | 33 ++++ tests/virutiltestdata/subuid | 4 + 13 files changed, 362 insertions(+), 76 deletions(-) create mode 100644 tests/virutiltestdata/subuid -- 2.42.0

It will be reused for <filesystem> as well. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/conf/domain_conf.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index ed07859bc5..5a93ee0aee 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -765,6 +765,20 @@ struct _virDomainControllerDef { virDomainVirtioOptions *virtio; }; +struct _virDomainIdMapEntry { + unsigned int start; + unsigned int target; + unsigned int count; +}; + +struct _virDomainIdMapDef { + size_t nuidmap; + virDomainIdMapEntry *uidmap; + + size_t ngidmap; + virDomainIdMapEntry *gidmap; +}; + /* Types of disk backends */ typedef enum { @@ -2671,20 +2685,6 @@ virDomainMemoryDef *virDomainMemoryDefNew(virDomainMemoryModel model); void virDomainMemoryDefFree(virDomainMemoryDef *def); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virDomainMemoryDef, virDomainMemoryDefFree); -struct _virDomainIdMapEntry { - unsigned int start; - unsigned int target; - unsigned int count; -}; - -struct _virDomainIdMapDef { - size_t nuidmap; - virDomainIdMapEntry *uidmap; - - size_t ngidmap; - virDomainIdMapEntry *gidmap; -}; - typedef enum { VIR_DOMAIN_PANIC_MODEL_DEFAULT, -- 2.42.0

It will be reused for <filesystem> as well. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/conf/domain_conf.c | 102 +++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 22ad43e1d7..a70a1f29f2 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -8582,6 +8582,58 @@ virDomainNetGenerateMAC(virDomainXMLOption *xmlopt, } +static int virDomainIdMapEntrySort(const void *a, + const void *b, + void *opaque G_GNUC_UNUSED) +{ + const virDomainIdMapEntry *entrya = a; + const virDomainIdMapEntry *entryb = b; + + if (entrya->start > entryb->start) + return 1; + else if (entrya->start < entryb->start) + return -1; + else + return 0; +} + + +/* Parse the XML definition for user namespace id map. + * + * idmap has the form of + * + * <uid start='0' target='1000' count='10'/> + * <gid start='0' target='1000' count='10'/> + */ +static virDomainIdMapEntry * +virDomainIdmapDefParseXML(xmlXPathContextPtr ctxt, + xmlNodePtr *node, + size_t num) +{ + size_t i; + virDomainIdMapEntry *idmap = NULL; + VIR_XPATH_NODE_AUTORESTORE(ctxt) + + idmap = g_new0(virDomainIdMapEntry, num); + + for (i = 0; i < num; i++) { + ctxt->node = node[i]; + if (virXPathUInt("string(./@start)", ctxt, &idmap[i].start) < 0 || + virXPathUInt("string(./@target)", ctxt, &idmap[i].target) < 0 || + virXPathUInt("string(./@count)", ctxt, &idmap[i].count) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("invalid idmap start/target/count settings")); + VIR_FREE(idmap); + return NULL; + } + } + + g_qsort_with_data(idmap, num, sizeof(idmap[0]), virDomainIdMapEntrySort, NULL); + + return idmap; +} + + static virDomainFSDef * virDomainFSDefParseXML(virDomainXMLOption *xmlopt, xmlNodePtr node, @@ -15742,56 +15794,6 @@ virDomainDefParseBootXML(xmlXPathContextPtr ctxt, } -static int virDomainIdMapEntrySort(const void *a, - const void *b, - void *opaque G_GNUC_UNUSED) -{ - const virDomainIdMapEntry *entrya = a; - const virDomainIdMapEntry *entryb = b; - - if (entrya->start > entryb->start) - return 1; - else if (entrya->start < entryb->start) - return -1; - else - return 0; -} - -/* Parse the XML definition for user namespace id map. - * - * idmap has the form of - * - * <uid start='0' target='1000' count='10'/> - * <gid start='0' target='1000' count='10'/> - */ -static virDomainIdMapEntry * -virDomainIdmapDefParseXML(xmlXPathContextPtr ctxt, - xmlNodePtr *node, - size_t num) -{ - size_t i; - virDomainIdMapEntry *idmap = NULL; - VIR_XPATH_NODE_AUTORESTORE(ctxt) - - idmap = g_new0(virDomainIdMapEntry, num); - - for (i = 0; i < num; i++) { - ctxt->node = node[i]; - if (virXPathUInt("string(./@start)", ctxt, &idmap[i].start) < 0 || - virXPathUInt("string(./@target)", ctxt, &idmap[i].target) < 0 || - virXPathUInt("string(./@count)", ctxt, &idmap[i].count) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("invalid idmap start/target/count settings")); - VIR_FREE(idmap); - return NULL; - } - } - - g_qsort_with_data(idmap, num, sizeof(idmap[0]), virDomainIdMapEntrySort, NULL); - - return idmap; -} - /* Parse the XML definition for an IOThread ID * * Format is : -- 2.42.0

Allow the user to manually tweak the ID mapping that will allow virtiofsd to run unprivileged. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- docs/formatdomain.rst | 8 +++ src/conf/domain_conf.c | 50 +++++++++++++++++++ src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 3 ++ .../vhost-user-fs-fd-memory.xml | 4 ++ 5 files changed, 66 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 310d2bc427..96e03a3807 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -3548,6 +3548,10 @@ A directory on the host that can be accessed directly from the guest. </binary> <source dir='/path'/> <target dir='mount_tag'/> + <idmap> + <uid start='0' target='100000' count='65535'/> + <gid start='0' target='100000' count='65535'/> + </idmap> </filesystem> <filesystem type='mount'> <driver type='virtiofs' queue='1024'/> @@ -3697,6 +3701,10 @@ A directory on the host that can be accessed directly from the guest. Where the ``source`` can be accessed in the guest. For most drivers this is an automatic mount point, but for QEMU/KVM this is merely an arbitrary string tag that is exported to the guest as a hint for where to mount. +``idmap`` + For ``virtiofs``, an ``idmap`` element can be specified to map IDs in the user + namespace. See the `Container boot`_ section for the syntax of the element. + :since:`Since 10.0.0` ``readonly`` Enables exporting filesystem as a readonly mount for guest, by default read-write access is given (currently only works for QEMU/KVM driver; not diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a70a1f29f2..58a985fc5d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2588,6 +2588,8 @@ void virDomainFSDefFree(virDomainFSDef *def) virObjectUnref(def->privateData); g_free(def->binary); g_free(def->sock); + g_free(def->idmap.uidmap); + g_free(def->idmap.gidmap); g_free(def); } @@ -8771,6 +8773,9 @@ virDomainFSDefParseXML(virDomainXMLOption *xmlopt, xmlNodePtr binary_lock_node = virXPathNode("./binary/lock", ctxt); xmlNodePtr binary_cache_node = virXPathNode("./binary/cache", ctxt); xmlNodePtr binary_sandbox_node = virXPathNode("./binary/sandbox", ctxt); + ssize_t n; + g_autofree xmlNodePtr *uid_nodes = NULL; + g_autofree xmlNodePtr *gid_nodes = NULL; if (queue_size && virStrToLong_ull(queue_size, NULL, 10, &def->queue_size) < 0) { virReportError(VIR_ERR_XML_ERROR, @@ -8816,6 +8821,28 @@ virDomainFSDefParseXML(virDomainXMLOption *xmlopt, VIR_XML_PROP_NONZERO, &def->sandbox) < 0) goto error; + + if ((n = virXPathNodeSet("./idmap/uid", ctxt, &uid_nodes)) < 0) + return NULL; + + if (n) { + def->idmap.uidmap = virDomainIdmapDefParseXML(ctxt, uid_nodes, n); + if (!def->idmap.uidmap) + return NULL; + + def->idmap.nuidmap = n; + } + + if ((n = virXPathNodeSet("./idmap/gid", ctxt, &gid_nodes)) < 0) + return NULL; + + if (n) { + def->idmap.gidmap = virDomainIdmapDefParseXML(ctxt, gid_nodes, n); + if (!def->idmap.gidmap) + return NULL; + + def->idmap.ngidmap = n; + } } if (source == NULL && def->type != VIR_DOMAIN_FS_TYPE_RAM @@ -23233,6 +23260,29 @@ virDomainFSDefFormat(virBuffer *buf, virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf); virXMLFormatElement(buf, "binary", &binaryAttrBuf, &binaryBuf); + if (def->idmap.uidmap) { + size_t i; + + virBufferAddLit(buf, "<idmap>\n"); + virBufferAdjustIndent(buf, 2); + for (i = 0; i < def->idmap.nuidmap; i++) { + virBufferAsprintf(buf, + "<uid start='%u' target='%u' count='%u'/>\n", + def->idmap.uidmap[i].start, + def->idmap.uidmap[i].target, + def->idmap.uidmap[i].count); + } + for (i = 0; i < def->idmap.ngidmap; i++) { + virBufferAsprintf(buf, + "<gid start='%u' target='%u' count='%u'/>\n", + def->idmap.gidmap[i].start, + def->idmap.gidmap[i].target, + def->idmap.gidmap[i].count); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</idmap>\n"); + } + switch (def->type) { case VIR_DOMAIN_FS_TYPE_MOUNT: case VIR_DOMAIN_FS_TYPE_BIND: diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5a93ee0aee..0c5e2636e1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -888,6 +888,7 @@ struct _virDomainFSDef { virTristateSwitch flock; virDomainFSSandboxMode sandbox; int thread_pool_size; + virDomainIdMapDef idmap; virDomainVirtioOptions *virtio; virObject *privateData; }; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index b98a2ae602..f318c06797 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -3120,6 +3120,9 @@ </choice> <empty/> </element> + <optional> + <ref name="idmap"/> + </optional> <ref name="filesystemCommon"/> </interleave> </group> diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml index 81de8c0dd7..1d0bc26c46 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -34,6 +34,10 @@ <lock posix='off' flock='off'/> <thread_pool size='16'/> </binary> + <idmap> + <uid start='0' target='100000' count='65535'/> + <gid start='0' target='100000' count='65535'/> + </idmap> <source dir='/path'/> <target dir='mount_tag'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> -- 2.42.0

On 12/13/23 15:47, Ján Tomko wrote:
Allow the user to manually tweak the ID mapping that will allow virtiofsd to run unprivileged.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- docs/formatdomain.rst | 8 +++ src/conf/domain_conf.c | 50 +++++++++++++++++++ src/conf/domain_conf.h | 1 + src/conf/schemas/domaincommon.rng | 3 ++ .../vhost-user-fs-fd-memory.xml | 4 ++ 5 files changed, 66 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 310d2bc427..96e03a3807 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -3548,6 +3548,10 @@ A directory on the host that can be accessed directly from the guest. </binary> <source dir='/path'/> <target dir='mount_tag'/> + <idmap> + <uid start='0' target='100000' count='65535'/> + <gid start='0' target='100000' count='65535'/> + </idmap> </filesystem> <filesystem type='mount'> <driver type='virtiofs' queue='1024'/> @@ -3697,6 +3701,10 @@ A directory on the host that can be accessed directly from the guest. Where the ``source`` can be accessed in the guest. For most drivers this is an automatic mount point, but for QEMU/KVM this is merely an arbitrary string tag that is exported to the guest as a hint for where to mount. +``idmap`` + For ``virtiofs``, an ``idmap`` element can be specified to map IDs in the user + namespace. See the `Container boot`_ section for the syntax of the element. + :since:`Since 10.0.0`
Not a show stopper, but that section does not mention the uid/gid elements can be repeated multiple times. Might be worth of a follow up patch.
``readonly`` Enables exporting filesystem as a readonly mount for guest, by default read-write access is given (currently only works for QEMU/KVM driver; not diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a70a1f29f2..58a985fc5d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2588,6 +2588,8 @@ void virDomainFSDefFree(virDomainFSDef *def) virObjectUnref(def->privateData); g_free(def->binary); g_free(def->sock); + g_free(def->idmap.uidmap); + g_free(def->idmap.gidmap);
g_free(def); } @@ -8771,6 +8773,9 @@ virDomainFSDefParseXML(virDomainXMLOption *xmlopt, xmlNodePtr binary_lock_node = virXPathNode("./binary/lock", ctxt); xmlNodePtr binary_cache_node = virXPathNode("./binary/cache", ctxt); xmlNodePtr binary_sandbox_node = virXPathNode("./binary/sandbox", ctxt); + ssize_t n; + g_autofree xmlNodePtr *uid_nodes = NULL; + g_autofree xmlNodePtr *gid_nodes = NULL;
if (queue_size && virStrToLong_ull(queue_size, NULL, 10, &def->queue_size) < 0) { virReportError(VIR_ERR_XML_ERROR, @@ -8816,6 +8821,28 @@ virDomainFSDefParseXML(virDomainXMLOption *xmlopt, VIR_XML_PROP_NONZERO, &def->sandbox) < 0) goto error; + + if ((n = virXPathNodeSet("./idmap/uid", ctxt, &uid_nodes)) < 0) + return NULL; + + if (n) { + def->idmap.uidmap = virDomainIdmapDefParseXML(ctxt, uid_nodes, n); + if (!def->idmap.uidmap) + return NULL; + + def->idmap.nuidmap = n; + } + + if ((n = virXPathNodeSet("./idmap/gid", ctxt, &gid_nodes)) < 0) + return NULL; + + if (n) { + def->idmap.gidmap = virDomainIdmapDefParseXML(ctxt, gid_nodes, n); + if (!def->idmap.gidmap) + return NULL; + + def->idmap.ngidmap = n; + }
Another area for improvement: this pattern is repeated now 4 times. virXPathNodeset() could be moved to virDomainIdmapDefParseXML. But that's something for future.
}
if (source == NULL && def->type != VIR_DOMAIN_FS_TYPE_RAM
Michal

Pass the ID map to virtiofsd, which will run the suid `newuidmap` binary for us. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 230f85c291..af51d58673 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -131,6 +131,7 @@ qemuVirtioFSBuildCommandLine(virQEMUDriverConfig *cfg, { g_autoptr(virCommand) cmd = NULL; g_auto(virBuffer) opts = VIR_BUFFER_INITIALIZER; + size_t i = 4; cmd = virCommandNew(fs->binary); @@ -169,6 +170,20 @@ qemuVirtioFSBuildCommandLine(virQEMUDriverConfig *cfg, if (cfg->virtiofsdDebug) virCommandAddArg(cmd, "-d"); + for (i = 0; i < fs->idmap.nuidmap; i++) { + virCommandAddArgFormat(cmd, "--uid-map=:%u:%u:%u:", + fs->idmap.uidmap[i].start, + fs->idmap.uidmap[i].target, + fs->idmap.uidmap[i].count); + } + + for (i = 0; i < fs->idmap.ngidmap; i++) { + virCommandAddArgFormat(cmd, "--gid-map=:%u:%u:%u:", + fs->idmap.gidmap[i].start, + fs->idmap.gidmap[i].target, + fs->idmap.gidmap[i].count); + } + return g_steal_pointer(&cmd); } -- 2.42.0

When this check was introduced, virtiofsd required root privileges. This has changed since then - now it does not need to set up all the sandboxing when running as non-root. It even gained support for id mapping, which makes running unprivileged even more useful. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index af51d58673..4dacd37a1c 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -257,10 +257,6 @@ qemuVirtioFSStart(virQEMUDriver *driver, if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) goto error; - /* so far only running as root is supported */ - virCommandSetUID(cmd, 0); - virCommandSetGID(cmd, 0); - virCommandSetPidFile(cmd, pidfile); virCommandSetOutputFD(cmd, &logfd); virCommandSetErrorFD(cmd, &logfd); -- 2.42.0

On 12/13/23 15:47, Ján Tomko wrote:
When this check was introduced, virtiofsd required root privileges.
This has changed since then - now it does not need to set up all the sandboxing when running as non-root. It even gained support for id mapping, which makes running unprivileged even more useful.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index af51d58673..4dacd37a1c 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -257,10 +257,6 @@ qemuVirtioFSStart(virQEMUDriver *driver, if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) goto error;
- /* so far only running as root is supported */ - virCommandSetUID(cmd, 0); - virCommandSetGID(cmd, 0);
This makes us unable to run C version of virtiofsd, becuase that one does privileged syscalls from the very start. I mean, you can't even run `virtiofsd --help` as a non-root. Personally, I'm not against this. But I was told we can't do that, sorry: https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/message/U7FC... Michal

On a Thursday in 2023, Michal Prívozník wrote:
On 12/13/23 15:47, Ján Tomko wrote:
When this check was introduced, virtiofsd required root privileges.
This has changed since then - now it does not need to set up all the sandboxing when running as non-root. It even gained support for id mapping, which makes running unprivileged even more useful.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index af51d58673..4dacd37a1c 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -257,10 +257,6 @@ qemuVirtioFSStart(virQEMUDriver *driver, if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) goto error;
- /* so far only running as root is supported */ - virCommandSetUID(cmd, 0); - virCommandSetGID(cmd, 0);
This makes us unable to run C version of virtiofsd, becuase that one does privileged syscalls from the very start. I mean, you can't even run `virtiofsd --help` as a non-root.
The users of the C version (and all the Rust versions that make privileged calls) will still be able to run virtiofsd and it will still be run as root for privileged libvirtd. The downside is a worse error message. If I remember correctly, the only reason I added these lines was a quick way to give unprivileged users an early error without having to write an error message. The above lines were added by: commit f0f986efa8a8e352fbdce7079ec440a4f3c8f522 qemu: add code for handling virtiofsd The proper validation error (which I also propose to remove earlier in this series) was added by: commit efaf46811c909ee5333360fba1d75ae82352964a qemu: validate virtiofs filesystems Both commits were committed upstream at the same time, so I'm not sure why I kept it there. Jano
Personally, I'm not against this. But I was told we can't do that, sorry:
https://lists.libvirt.org/archives/list/devel@lists.libvirt.org/message/U7FC...
Michal

On 12/14/23 12:07, Ján Tomko wrote:
On a Thursday in 2023, Michal Prívozník wrote:
On 12/13/23 15:47, Ján Tomko wrote:
When this check was introduced, virtiofsd required root privileges.
This has changed since then - now it does not need to set up all the sandboxing when running as non-root. It even gained support for id mapping, which makes running unprivileged even more useful.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index af51d58673..4dacd37a1c 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -257,10 +257,6 @@ qemuVirtioFSStart(virQEMUDriver *driver, if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) goto error;
- /* so far only running as root is supported */ - virCommandSetUID(cmd, 0); - virCommandSetGID(cmd, 0);
This makes us unable to run C version of virtiofsd, becuase that one does privileged syscalls from the very start. I mean, you can't even run `virtiofsd --help` as a non-root.
The users of the C version (and all the Rust versions that make privileged calls) will still be able to run virtiofsd and it will still be run as root for privileged libvirtd.
The downside is a worse error message.
If I remember correctly, the only reason I added these lines was a quick way to give unprivileged users an early error without having to write an error message.
The above lines were added by: commit f0f986efa8a8e352fbdce7079ec440a4f3c8f522 qemu: add code for handling virtiofsd The proper validation error (which I also propose to remove earlier in this series) was added by: commit efaf46811c909ee5333360fba1d75ae82352964a qemu: validate virtiofs filesystems
Both commits were committed upstream at the same time, so I'm not sure why I kept it there.
This makes even less sense than I anticipated. In 6/9 you remove the check which guards running virtiofsd in session mode (which in case of qemu driver means uid === 0). Why we have so many checks is beyond me. Anyway, for some weird reason, after these lines are removed I can still run the C implementation (which is possibly a sign of a deeper problem anyways). So, if you fix the commit message so that it describes what is really happening (libvirtd runs as root and thus virtiofsd is ran as root too), you have my: Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

https://gitlab.com/libvirt/libvirt/-/issues/535 Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_validate.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index e475ad035e..9e50c2f45b 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -4256,7 +4256,7 @@ qemuValidateDomainDeviceDefGraphics(const virDomainGraphicsDef *graphics, static int qemuValidateDomainDeviceDefFS(virDomainFSDef *fs, const virDomainDef *def, - virQEMUDriver *driver, + virQEMUDriver *driver G_GNUC_UNUSED, virQEMUCaps *qemuCaps) { if (fs->type != VIR_DOMAIN_FS_TYPE_MOUNT) { @@ -4320,11 +4320,6 @@ qemuValidateDomainDeviceDefFS(virDomainFSDef *fs, _("virtiofs does not yet support read-only mode")); return -1; } - if (!driver->privileged) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("virtiofs is not yet supported in session mode")); - return -1; - } if (fs->accessmode != VIR_DOMAIN_FS_ACCESSMODE_PASSTHROUGH) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("virtiofs only supports passthrough accessmode")); -- 2.42.0

A function for parsing /etc/sub[ug]id Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/libvirt_private.syms | 2 ++ src/util/virutil.c | 70 ++++++++++++++++++++++++++++++++++++ src/util/virutil.h | 12 +++++++ tests/utiltest.c | 33 +++++++++++++++++ tests/virutiltestdata/subuid | 4 +++ 5 files changed, 121 insertions(+) create mode 100644 tests/virutiltestdata/subuid diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 553b01b8c0..20f34d72b5 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3640,6 +3640,7 @@ virGetHostname; virGetHostnameQuiet; virGetPassword; virGetSelfLastChanged; +virGetSubIDs; virGetSystemPageSize; virGetSystemPageSizeKB; virGetUserCacheDirectory; @@ -3670,6 +3671,7 @@ virSetNonBlock; virSetSockReuseAddr; virSetUIDGID; virSetUIDGIDWithCaps; +virSubIDsFree; virUpdateSelfLastChanged; virValidateWWN; virWaitForDevices; diff --git a/src/util/virutil.c b/src/util/virutil.c index 17d65ad834..369e0bc4dc 100644 --- a/src/util/virutil.c +++ b/src/util/virutil.c @@ -738,6 +738,76 @@ virGetUserIDByName(const char *name, uid_t *uid, bool missing_ok) return ret; } +void +virSubIDsFree(virSubID **uids, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + if ((*uids)[i].idstr) + g_free((*uids)[i].idstr); + } + g_clear_pointer(uids, g_free); +} + +int +virGetSubIDs(virSubID **retval, const char *file) +{ + g_autofree char *buf = NULL; + g_auto(GStrv) lines = NULL; + virSubID *entries = NULL; + size_t i = 0; + size_t len; + int ret = -1; + + *retval = NULL; + + if (virFileReadAll(file, BUFSIZ, &buf) < 0) + return -1; + + lines = g_strsplit(buf, "\n", 0); + if (!lines) + return -1; + + len = g_strv_length(lines); + entries = g_new0(virSubID, len); + + for (i = 0; i < len; i++) { + g_auto(GStrv) fields = NULL; + unsigned long ulong_id; + + fields = g_strsplit(lines[i], ":", 0); + if (!fields) + goto cleanup; + + if (g_strv_length(fields) != 3) + break; + + if (g_ascii_isdigit(fields[0][0])) { + if (virStrToLong_ul(fields[0], NULL, 10, &ulong_id) < 0) + goto cleanup; + entries[i].id = ulong_id; + } else { + entries[i].idstr = g_strdup(fields[0]); + } + + if (virStrToLong_ul(fields[1], NULL, 10, &ulong_id) < 0) + goto cleanup; + entries[i].start = ulong_id; + + if (virStrToLong_ul(fields[2], NULL, 10, &ulong_id) < 0) + goto cleanup; + entries[i].range = ulong_id; + } + + *retval = g_steal_pointer(&entries); + ret = i; + cleanup: + if (entries) + virSubIDsFree(&entries, len); + return ret; +} + /* Try to match a user id based on `user`. The default behavior is to parse * `user` first as a user name and then as a user id. However if `user` * contains a leading '+', the rest of the string is always parsed as a uid. diff --git a/src/util/virutil.h b/src/util/virutil.h index ab8511bf8d..3bac15d02b 100644 --- a/src/util/virutil.h +++ b/src/util/virutil.h @@ -102,6 +102,18 @@ char *virGetUserName(uid_t uid) G_NO_INLINE; char *virGetGroupName(gid_t gid) G_NO_INLINE; int virGetGroupList(uid_t uid, gid_t group, gid_t **groups) ATTRIBUTE_NONNULL(3); + +typedef struct _virSubID { + uid_t id; + char *idstr; + uid_t start; + uid_t range; +} virSubID; + +int virGetSubIDs(virSubID **ret, const char *file); +void virSubIDsFree(virSubID **uids, size_t n); + + int virGetUserID(const char *name, uid_t *uid) G_GNUC_WARN_UNUSED_RESULT; int virGetGroupID(const char *name, diff --git a/tests/utiltest.c b/tests/utiltest.c index 5930557c08..b30bfbed70 100644 --- a/tests/utiltest.c +++ b/tests/utiltest.c @@ -377,6 +377,38 @@ testKernelCmdlineMatchParam(const void *data G_GNUC_UNUSED) } +static int +testGetSubIDs(const void *data G_GNUC_UNUSED) +{ + g_autofree char *subuid_file = g_strdup_printf("%s/virutiltestdata/subuid", abs_srcdir); + virSubID *subids = NULL; + int len = 0; + int ret = -1; + + if ((len = virGetSubIDs(&subids, subuid_file)) < 0) { + VIR_TEST_DEBUG("virGetSubIDs failed"); + goto cleanup; + } + + if (len != 4) { + VIR_TEST_DEBUG("virGetSubIDs returned %d (expected 4)", len); + goto cleanup; + } + + if (STRNEQ(subids[0].idstr, "joe")) { + VIR_TEST_DEBUG("virGetSubIDs returned wrong name for entry 0: '%s'", NULLSTR(subids[0].idstr)); + goto cleanup; + } + + ret = 0; + + cleanup: + if (len >= 0) + virSubIDsFree(&subids, len); + return ret; +} + + static int mymain(void) { @@ -400,6 +432,7 @@ mymain(void) DO_TEST(OverflowCheckMacro); DO_TEST(KernelCmdlineNextParam); DO_TEST(KernelCmdlineMatchParam); + DO_TEST(GetSubIDs); return result == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/virutiltestdata/subuid b/tests/virutiltestdata/subuid new file mode 100644 index 0000000000..c873c0e3bf --- /dev/null +++ b/tests/virutiltestdata/subuid @@ -0,0 +1,4 @@ +joe:100000:65535 +bob:300000:65535 +1024:400000:65535 +alice:200000:65535 -- 2.42.0

If the user did not specify any uid mapping, map its own user ID to ID 0 inside the container and the rest of the IDs to the first found user's authorized range in /etc/sub[ug]id https://issues.redhat.com/browse/RHEL-7386 https://gitlab.com/libvirt/libvirt/-/issues/535 Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_virtiofs.c | 76 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 4dacd37a1c..d539d0a192 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -369,12 +369,84 @@ qemuVirtioFSSetupCgroup(virDomainObj *vm, return 0; } +static int +qemuVirtioFSPrepareIdMap(virDomainFSDef *fs) +{ + g_autofree char *username = NULL; + g_autofree char *groupname = NULL; + virSubID *subuids = NULL; + virSubID *subgids = NULL; + uid_t euid = geteuid(); + uid_t egid = getegid(); + int subuidlen = 0; + int subgidlen = 0; + size_t i; + + username = virGetUserName(euid); + groupname = virGetGroupName(egid); + + fs->idmap.uidmap = g_new0(virDomainIdMapEntry, 2); + fs->idmap.gidmap = g_new0(virDomainIdMapEntry, 2); + + if ((subuidlen = virGetSubIDs(&subuids, "/etc/subuid")) < 0) + return -1; + + fs->idmap.uidmap[0].start = 0; + fs->idmap.uidmap[0].target = euid; + fs->idmap.uidmap[0].count = 1; + fs->idmap.nuidmap = 1; + + for (i = 0; i < subuidlen; i++) { + if ((subuids[i].idstr && STREQ(subuids[i].idstr, username)) || + subuids[i].id == euid) { + fs->idmap.uidmap[1].start = 1; + fs->idmap.uidmap[1].target = subuids[i].start; + fs->idmap.uidmap[1].count = subuids[i].range; + fs->idmap.nuidmap++; + break; + } + } + + virSubIDsFree(&subuids, subuidlen); + + if ((subgidlen = virGetSubIDs(&subgids, "/etc/subgid")) < 0) + return -1; + + fs->idmap.gidmap[0].start = 0; + fs->idmap.gidmap[0].target = getegid(); + fs->idmap.gidmap[0].count = 1; + fs->idmap.ngidmap = 1; + + for (i = 0; i < subgidlen; i++) { + if ((subgids[i].idstr && STREQ(subgids[i].idstr, groupname)) || + subgids[i].id == egid) { + fs->idmap.gidmap[1].start = 1; + fs->idmap.gidmap[1].target = subgids[i].start; + fs->idmap.gidmap[1].count = subgids[i].range; + fs->idmap.ngidmap++; + break; + } + } + + virSubIDsFree(&subgids, subgidlen); + + return 0; +} + int qemuVirtioFSPrepareDomain(virQEMUDriver *driver, virDomainFSDef *fs) { - if (fs->binary || fs->sock) + if (fs->sock) return 0; - return qemuVhostUserFillDomainFS(driver, fs); + if (!fs->binary && qemuVhostUserFillDomainFS(driver, fs) < 0) + return -1; + + if (!driver->privileged && !fs->idmap.uidmap) { + if (qemuVirtioFSPrepareIdMap(fs) < 0) + return -1; + } + + return 0; } -- 2.42.0

Signed-off-by: Ján Tomko <jtomko@redhat.com> --- docs/kbase/virtiofs.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/kbase/virtiofs.rst b/docs/kbase/virtiofs.rst index 5940092db5..457c15da7f 100644 --- a/docs/kbase/virtiofs.rst +++ b/docs/kbase/virtiofs.rst @@ -59,6 +59,25 @@ Sharing a host directory with a guest Note: this requires virtiofs support in the guest kernel (Linux v5.4 or later) +Running unprivileged +==================== + +In unprivileged mode (``qemu:///session``), mapping user/group IDs is available +(since libvirt version 10.0.0). The root user (ID 0) in the guest will be mapped +to the current user on the host. + +The rest of the IDs will be mapped to the subordinate user IDs specified +in `/etc/subuid`: + +:: + + $ cat /etc/subuid + jtomko:100000:65536 + $ cat /etc/subgid + jtomko:100000:65536 + +To manually tweak the user ID mapping, the `idmap` element can be used. + Optional parameters =================== -- 2.42.0

On 12/13/23 15:47, Ján Tomko wrote:
Now with automatic id mapping selection.
Ján Tomko (9): conf: move idmap definition earlier conf: move idmap parsing earlier conf: add idmap element to filesystem qemu: format uid/gid map for virtiofs qemu: virtiofs: do not force UID 0 qemu: allow running virtiofsd in session mode util: add virGetSubUIDs qemu: virtiofs: auto-fill idmap for unprivileged use docs: virtiofs: add section about ID remapping
docs/formatdomain.rst | 8 + docs/kbase/virtiofs.rst | 19 +++ src/conf/domain_conf.c | 152 ++++++++++++------ src/conf/domain_conf.h | 29 ++-- src/conf/schemas/domaincommon.rng | 3 + src/libvirt_private.syms | 2 + src/qemu/qemu_validate.c | 7 +- src/qemu/qemu_virtiofs.c | 95 ++++++++++- src/util/virutil.c | 70 ++++++++ src/util/virutil.h | 12 ++ .../vhost-user-fs-fd-memory.xml | 4 + tests/utiltest.c | 33 ++++ tests/virutiltestdata/subuid | 4 + 13 files changed, 362 insertions(+), 76 deletions(-) create mode 100644 tests/virutiltestdata/subuid
To patches 1-4,6-9: Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Patch 5/9 breaks our support of virtiofsd written in C and as such can't be merged (see more details in my reply to it). Michal
participants (2)
-
Ján Tomko
-
Michal Prívozník