[libvirt PATCHv4 00/15] add virtiofs support (virtio-fs epopee)

For: https://bugzilla.redhat.com/show_bug.cgi?id=1694166 v3: https://www.redhat.com/archives/libvir-list/2020-January/msg01401.html v4: * place virtiofsd into the emulator cgroup * do not leak the log file descriptor * better validation of the path existence and shared memory support * run as root:root explicitly * correctly use listification in RST document bonus: 15/15 wip for SELinux integration Avaliable on my repo (except for the bonus round) git fetch https://repo.or.cz/libvirt/jtomko.git virtiofs-v4 TODO: * a bug against selinux-policy * address the inconsistency of some downstreams wrt placing the json files into /usr/share/qemu vs. /usr/share/qemu-kvm: https://bugzilla.redhat.com/show_bug.cgi?id=1804196 Daniel P. Berrangé (1): docs: reduce excessive spacing in ToC for RST files Ján Tomko (14): schema: wrap fsDriver in a choice group qemuExtDevicesStart: pass logManager qemu: add QEMU_CAPS_VHOST_USER_FS docs: add virtiofs kbase conf: qemu: add virtiofs fsdriver type conf: add virtiofs-related elements and attributes qemu: add virtiofsd_debug to qemu.conf qemu: validate virtiofs filesystems qemu: forbid migration with vhost-user-fs device qemu: add code for handling virtiofsd qemu: put virtiofsd in the emulator cgroup qemu: use the vhost-user schemas to find binary qemu: build vhost-user-fs device command line wip: SELinux integration for virtiofsd docs/formatdomain.html.in | 35 +- docs/kbase.html.in | 3 + docs/kbase/virtiofs.rst | 152 ++++++++ docs/libvirt.css | 4 + docs/schemas/domaincommon.rng | 88 ++++- po/POTFILES.in | 1 + src/conf/domain_conf.c | 108 +++++- src/conf/domain_conf.h | 16 + src/libvirt_private.syms | 2 + src/qemu/Makefile.inc.am | 2 + src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 3 + src/qemu/qemu_command.c | 47 ++- src/qemu/qemu_conf.c | 2 + src/qemu/qemu_conf.h | 1 + src/qemu/qemu_domain.c | 66 +++- src/qemu/qemu_domain.h | 2 +- src/qemu/qemu_domain_address.c | 4 + src/qemu/qemu_extdevice.c | 43 +++ src/qemu/qemu_extdevice.h | 1 + src/qemu/qemu_migration.c | 10 + src/qemu/qemu_process.c | 4 +- src/qemu/qemu_security.c | 40 ++ src/qemu/qemu_security.h | 7 + src/qemu/qemu_vhost_user.c | 40 ++ src/qemu/qemu_vhost_user.h | 4 + src/qemu/qemu_virtiofs.c | 341 ++++++++++++++++++ src/qemu/qemu_virtiofs.h | 48 +++ src/qemu/test_libvirtd_qemu.aug.in | 1 + src/security/security_dac.c | 20 + src/security/security_driver.h | 2 + src/security/security_manager.c | 12 + src/security/security_manager.h | 4 + src/security/security_nop.c | 1 + src/security/security_selinux.c | 69 ++++ src/security/security_stack.c | 19 + .../caps_4.2.0.aarch64.xml | 1 + .../qemucapabilitiesdata/caps_4.2.0.s390x.xml | 1 + .../caps_4.2.0.x86_64.xml | 1 + .../caps_5.0.0.aarch64.xml | 1 + .../caps_5.0.0.x86_64.xml | 1 + ...vhost-user-fs-fd-memory.x86_64-latest.args | 39 ++ .../vhost-user-fs-fd-memory.xml | 43 +++ ...vhost-user-fs-hugepages.x86_64-latest.args | 47 +++ .../vhost-user-fs-hugepages.xml | 75 ++++ tests/qemuxml2argvtest.c | 14 + .../vhost-user-fs-fd-memory.x86_64-latest.xml | 1 + .../vhost-user-fs-hugepages.x86_64-latest.xml | 1 + tests/qemuxml2xmltest.c | 3 + 51 files changed, 1420 insertions(+), 22 deletions(-) create mode 100644 docs/kbase/virtiofs.rst create mode 100644 src/qemu/qemu_virtiofs.c create mode 100644 src/qemu/qemu_virtiofs.h create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml -- 2.24.1

From: Daniel P. Berrangé <berrange@redhat.com> The table of contents in the RST based files uses <p> tags inside the <li>, which results in 1em's worth of spacing above & below each entry. This results in way too much whitespace in the ToC. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> --- docs/libvirt.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/libvirt.css b/docs/libvirt.css index 2fe123395c..18e55dac59 100644 --- a/docs/libvirt.css +++ b/docs/libvirt.css @@ -579,3 +579,7 @@ ul.news-section-content li dl dd { font-family: monospace; background: #eeeeee; } + +.contents li p { + margin: 2px; +} -- 2.24.1

Allow adding new groups without changing indentation. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Peter Krempa <pkrempa@redhat.com> Acked-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- docs/schemas/domaincommon.rng | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 4be751461d..5b609a4756 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2620,29 +2620,33 @@ for this kind of info, and 'type' for the storage format. We need the latter too, so had to invent a new attribute name --> - <optional> - <attribute name="type"> - <choice> - <value>path</value> - <value>handle</value> - <value>loop</value> - <value>nbd</value> - <value>ploop</value> - </choice> - </attribute> - </optional> - <optional> - <attribute name="format"> - <ref name="storageFormat"/> - </attribute> - </optional> - <optional> - <attribute name="wrpolicy"> - <value>immediate</value> - </attribute> - </optional> - <ref name='virtioOptions'/> - <empty/> + <choice> + <group> + <optional> + <attribute name="type"> + <choice> + <value>path</value> + <value>handle</value> + <value>loop</value> + <value>nbd</value> + <value>ploop</value> + </choice> + </attribute> + </optional> + <optional> + <attribute name="format"> + <ref name="storageFormat"/> + </attribute> + </optional> + <optional> + <attribute name="wrpolicy"> + <value>immediate</value> + </attribute> + </optional> + <ref name='virtioOptions'/> + </group> + <empty/> + </choice> </element> </define> -- 2.24.1

Pass logManager to qemuExtDevicesStart for future usage. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_extdevice.c | 1 + src/qemu/qemu_extdevice.h | 1 + src/qemu/qemu_process.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 9c0c0fd573..7f3bb104d9 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -153,6 +153,7 @@ qemuExtDevicesCleanupHost(virQEMUDriverPtr driver, int qemuExtDevicesStart(virQEMUDriverPtr driver, virDomainObjPtr vm, + virLogManagerPtr logManager G_GNUC_UNUSED, bool incomingMigration) { virDomainDefPtr def = vm->def; diff --git a/src/qemu/qemu_extdevice.h b/src/qemu/qemu_extdevice.h index 5cf777ab4b..df29968e16 100644 --- a/src/qemu/qemu_extdevice.h +++ b/src/qemu/qemu_extdevice.h @@ -46,6 +46,7 @@ void qemuExtDevicesCleanupHost(virQEMUDriverPtr driver, int qemuExtDevicesStart(virQEMUDriverPtr driver, virDomainObjPtr vm, + virLogManagerPtr logManager, bool incomingMigration) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) G_GNUC_WARN_UNUSED_RESULT; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 8c1ed76677..5d24dc1645 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -6734,7 +6734,9 @@ qemuProcessLaunch(virConnectPtr conn, if (qemuProcessGenID(vm, flags) < 0) goto cleanup; - if (qemuExtDevicesStart(driver, vm, incoming != NULL) < 0) + if (qemuExtDevicesStart(driver, vm, + qemuDomainLogContextGetManager(logCtxt), + incoming != NULL) < 0) goto cleanup; VIR_DEBUG("Building emulator command line"); -- 2.24.1

Introduced by QEMU commit 98fc1ada4cf70af0f1df1a2d7183cf786fc7da05 virtio: add vhost-user-fs base device Released in QEMU v4.2.0. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Peter Krempa <pkrempa@redhat.com> Acked-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_capabilities.c | 4 ++++ src/qemu/qemu_capabilities.h | 3 +++ tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml | 1 + tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml | 1 + tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml | 1 + tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml | 1 + 7 files changed, 12 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 0e727093bc..13f9a08eb2 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -560,6 +560,9 @@ VIR_ENUM_IMPL(virQEMUCaps, "virtio-net.failover", "tpm-spapr", "cpu.kvm-no-adjvtime", + + /* 355 */ + "vhost-user-fs", ); @@ -1277,6 +1280,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = { { "i8042", QEMU_CAPS_DEVICE_I8042 }, { "rng-builtin", QEMU_CAPS_OBJECT_RNG_BUILTIN }, { "tpm-spapr", QEMU_CAPS_DEVICE_TPM_SPAPR }, + { "vhost-user-fs-device", QEMU_CAPS_DEVICE_VHOST_USER_FS }, }; static struct virQEMUCapsStringFlags virQEMUCapsDevicePropsVirtioBalloon[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 7b6ed53863..5b483a2419 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -543,6 +543,9 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_DEVICE_TPM_SPAPR, /* -device tpm-spapr */ QEMU_CAPS_CPU_KVM_NO_ADJVTIME, /* cpu.kvm-no-adjvtime */ + /* 355 */ + QEMU_CAPS_DEVICE_VHOST_USER_FS, /* -device vhost-user-fs */ + QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml index d0faa4c471..640ce29c8c 100644 --- a/tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml @@ -178,6 +178,7 @@ <flag name='smp-dies'/> <flag name='rng-builtin'/> <flag name='virtio-net.failover'/> + <flag name='vhost-user-fs'/> <version>4001050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml b/tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml index 9275bf36fa..37776e1bbe 100644 --- a/tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml +++ b/tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml @@ -137,6 +137,7 @@ <flag name='drive-nvme'/> <flag name='smp-dies'/> <flag name='rng-builtin'/> + <flag name='vhost-user-fs'/> <version>4001050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>39100242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml index faed23f5c1..83e804ea36 100644 --- a/tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml @@ -222,6 +222,7 @@ <flag name='i8042'/> <flag name='rng-builtin'/> <flag name='virtio-net.failover'/> + <flag name='vhost-user-fs'/> <version>4002000</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100242</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml index c05cea2eb7..e52c60607d 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml @@ -180,6 +180,7 @@ <flag name='rng-builtin'/> <flag name='virtio-net.failover'/> <flag name='cpu.kvm-no-adjvtime'/> + <flag name='vhost-user-fs'/> <version>4002050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>61700241</microcodeVersion> diff --git a/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml b/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml index 1eef7197ef..6902cffd17 100644 --- a/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml +++ b/tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml @@ -223,6 +223,7 @@ <flag name='i8042'/> <flag name='rng-builtin'/> <flag name='virtio-net.failover'/> + <flag name='vhost-user-fs'/> <version>4002050</version> <kvmVersion>0</kvmVersion> <microcodeVersion>43100241</microcodeVersion> -- 2.24.1

In subject: You named the capability QEMU_CAPS_DEVICE_VHOST_USER_FS On Thu, Feb 20, 2020 at 15:32:41 +0100, Ján Tomko wrote:
Introduced by QEMU commit 98fc1ada4cf70af0f1df1a2d7183cf786fc7da05 virtio: add vhost-user-fs base device
Released in QEMU v4.2.0.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Add a document describing the usage of virtiofs. --- docs/kbase.html.in | 3 + docs/kbase/virtiofs.rst | 152 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 docs/kbase/virtiofs.rst diff --git a/docs/kbase.html.in b/docs/kbase.html.in index c156414c41..7d6caf3cb1 100644 --- a/docs/kbase.html.in +++ b/docs/kbase.html.in @@ -29,6 +29,9 @@ <dt><a href="kbase/backing_chains.html">Backing chain management</a></dt> <dd>Explanation of how disk backing chain specification impacts libvirt's behaviour and basic troubleshooting steps of disk problems.</dd> + + <dt><a href="kbase/virtiofs.html">Virtio-FS</a></dt> + <dd>Share a filesystem between the guest and the host</dd> </dl> </div> diff --git a/docs/kbase/virtiofs.rst b/docs/kbase/virtiofs.rst new file mode 100644 index 0000000000..fe6885d139 --- /dev/null +++ b/docs/kbase/virtiofs.rst @@ -0,0 +1,152 @@ +============================ +Sharing files with Virtio-FS +============================ + +=== 8< delete before merging 8< === +NOTE: if you're looking at this note, this is just a proposal. +See the up-to-date version on: https://libvirt.org/kbase/virtiofs.html +=== 8< --------------------- 8< === + +.. contents:: + +========= +Virtio-FS +========= + +Virtio-FS is a shared file system that lets virtual machines access +a directory tree on the host. Unlike existing approaches, it +is designed to offer local file system semantics and performance. + +See https://virtio-fs.gitlab.io/ + +========== +Host setup +========== + +The host-side virtiofsd daemon, like other vhost-user backed devices, +requires shared memory between the host and the guest. As of QEMU 4.2, this +requires specifying a NUMA topology for the guest and explicitly specifying +a memory backend. Multiple options are available: + +Either of the following: + +* Use file-backed memory + + Configure the directory where the files backing the memory will be stored + with the ``memory_backing_dir`` option in ``/etc/libvirt/qemu.conf`` + + :: + + # This directory is used for memoryBacking source if configured as file. + # NOTE: big files will be stored here + memory_backing_dir = "/dev/shm/" + +* Use hugepage-backed memory + + Make sure there are enough huge pages allocated for the requested guest memory. + For example, for one guest with 2 GiB of RAM backed by 2 MiB hugepages: + + :: + + # virsh allocpages 2M 1024 + +=========== +Guest setup +=========== + +#. Specify the NUMA topology + + in the domain XML of the guest. + For the simplest one-node topology for a guest with 2GiB of RAM and 8 vCPUs: + + :: + + <domain> + ... + <cpu ...> + <numa> + <cell id='0' cpus='0-7' memory='2' unit='GiB' memAccess='shared'/> + </numa> + </cpu> + ... + </domain> + + Note that the CPU element might already be specified and only one is allowed. + +#. Specify the memory backend + + Either of the following: + + * File-backed memory + + :: + + <domain> + ... + <memoryBacking> + <access mode='shared'/> + </memoryBacking> + ... + </domain> + + This will create a file in the directory specified in ``qemu.conf`` + + * Hugepage-backed memory + + :: + + <domain> + ... + <memoryBacking> + <hugepages> + <page size='2' unit='M'/> + </hugepages> + <access mode='shared'/> + </memoryBacking> + ... + </domain> + +#. Add the ``vhost-user-fs`` QEMU device via the ``filesystem`` element + + :: + + <domain> + ... + <devices> + ... + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtiofs'/> + <source dir='/path'/> + <target dir='mount_tag'/> + </filesystem> + ... + </devices> + </domain> + + Note that despite its name, the ``target dir`` is actually a mount tag and does + not have to correspond to the desired mount point in the guest. + + So far, ``passthrough`` is the only supported access mode and it requires + running the ``virtiofsd`` daemon as root. + +#. Boot the guest and mount the filesystem + + :: + + guest# mount -t virtiofs mount_tag /mnt/mount/path + + Note: this requires virtiofs support in the guest kernel (Linux v5.4 or later) + +=================== +Optional parameters +=================== + +More optional elements can be specified + +:: + + <driver type='virtiofs' queue='1024'/> + <binary path='/usr/libexec/virtiofsd' xattr='on'> + <cache mode='always'/> + <lock posix_lock='on' flock='on'/> + </binary> -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:42 +0100, Ján Tomko wrote:
Add a document describing the usage of virtiofs. --- docs/kbase.html.in | 3 + docs/kbase/virtiofs.rst | 152 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 docs/kbase/virtiofs.rst
diff --git a/docs/kbase.html.in b/docs/kbase.html.in index c156414c41..7d6caf3cb1 100644 --- a/docs/kbase.html.in +++ b/docs/kbase.html.in @@ -29,6 +29,9 @@ <dt><a href="kbase/backing_chains.html">Backing chain management</a></dt> <dd>Explanation of how disk backing chain specification impacts libvirt's behaviour and basic troubleshooting steps of disk problems.</dd> + + <dt><a href="kbase/virtiofs.html">Virtio-FS</a></dt> + <dd>Share a filesystem between the guest and the host</dd> </dl> </div>
diff --git a/docs/kbase/virtiofs.rst b/docs/kbase/virtiofs.rst new file mode 100644 index 0000000000..fe6885d139 --- /dev/null +++ b/docs/kbase/virtiofs.rst @@ -0,0 +1,152 @@ +============================ +Sharing files with Virtio-FS +============================ + +=== 8< delete before merging 8< === +NOTE: if you're looking at this note, this is just a proposal. +See the up-to-date version on: https://libvirt.org/kbase/virtiofs.html +=== 8< --------------------- 8< === + +.. contents:: + +========= +Virtio-FS +========= + +Virtio-FS is a shared file system that lets virtual machines access +a directory tree on the host. Unlike existing approaches, it +is designed to offer local file system semantics and performance. + +See https://virtio-fs.gitlab.io/ +
I'm lacking description of security implications. The above link doesn't do a good job to pointing to the relevant info. I've seen this document: https://gitlab.com/virtio-fs/qemu/-/commit/718c71fa44f6b92ac27558c903d279352... which describes it nicely so we should link to the formatted version of it probably. Other than that: Reviewed-by: Peter Krempa <pkrempa@redhat.com>

[adding dgilbert] On Wed, Feb 26, 2020 at 10:28:18AM +0100, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:42 +0100, Ján Tomko wrote:
Add a document describing the usage of virtiofs. --- docs/kbase.html.in | 3 + docs/kbase/virtiofs.rst | 152 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 docs/kbase/virtiofs.rst
diff --git a/docs/kbase.html.in b/docs/kbase.html.in index c156414c41..7d6caf3cb1 100644 --- a/docs/kbase.html.in +++ b/docs/kbase.html.in @@ -29,6 +29,9 @@ <dt><a href="kbase/backing_chains.html">Backing chain management</a></dt> <dd>Explanation of how disk backing chain specification impacts libvirt's behaviour and basic troubleshooting steps of disk problems.</dd> + + <dt><a href="kbase/virtiofs.html">Virtio-FS</a></dt> + <dd>Share a filesystem between the guest and the host</dd> </dl> </div>
diff --git a/docs/kbase/virtiofs.rst b/docs/kbase/virtiofs.rst new file mode 100644 index 0000000000..fe6885d139 --- /dev/null +++ b/docs/kbase/virtiofs.rst @@ -0,0 +1,152 @@ +============================ +Sharing files with Virtio-FS +============================ + +=== 8< delete before merging 8< === +NOTE: if you're looking at this note, this is just a proposal. +See the up-to-date version on: https://libvirt.org/kbase/virtiofs.html +=== 8< --------------------- 8< === + +.. contents:: + +========= +Virtio-FS +========= + +Virtio-FS is a shared file system that lets virtual machines access +a directory tree on the host. Unlike existing approaches, it +is designed to offer local file system semantics and performance. + +See https://virtio-fs.gitlab.io/ +
I'm lacking description of security implications. The above link doesn't do a good job to pointing to the relevant info. I've seen this document:
https://gitlab.com/virtio-fs/qemu/-/commit/718c71fa44f6b92ac27558c903d279352...
which describes it nicely so we should link to the formatted version of it probably.
That document was part of the v1 PULL request of virtiofsd to QEMU: https://lists.nongnu.org/archive/html/qemu-devel/2020-01/msg05389.html but due to a discussion about the file's location https://lists.nongnu.org/archive/html/qemu-devel/2020-01/msg05733.html it was dropped from v2: https://lists.nongnu.org/archive/html/qemu-devel/2020-01/msg05780.html Would you like me to include the link from the merge request instead? Jano
Other than that:
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Introduce a new 'virtiofs' driver type for filesystem. <filesystem type='mount' accessmode='passthrough'> <driver type='virtiofs'/> <source dir='/path'/> <target dir='mount_tag'> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </filesystem> Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- docs/formatdomain.html.in | 12 ++- docs/schemas/domaincommon.rng | 6 ++ src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 4 + src/qemu/qemu_domain.c | 4 + src/qemu/qemu_domain_address.c | 4 + .../vhost-user-fs-fd-memory.xml | 39 ++++++++++ .../vhost-user-fs-hugepages.xml | 74 +++++++++++++++++++ .../vhost-user-fs-fd-memory.x86_64-latest.xml | 1 + .../vhost-user-fs-hugepages.x86_64-latest.xml | 1 + tests/qemuxml2xmltest.c | 3 + 12 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f4af65f13f..dab8fb8f6b 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3935,6 +3935,11 @@ <target dir='/import/from/host'/> <readonly/> </filesystem> + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtiofs'/> + <source dir='/path'/> + <target dir='mount_tag'/> + </filesystem> ... </devices> ...</pre> @@ -3963,6 +3968,9 @@ while the value <code>immediate</code> means that a host writeback is immediately triggered for all pages touched during a guest file write operation <span class="since">(since 0.9.10)</span>. + <span class="since">Since 6.1.0</span>, <code>type='virtiofs'</code> + is also supported. Using virtiofs requires setting up shared memory, + see the guide: <a href="kbase/virtiofs.html">Virtio-FS</a> </dd> <dt><code>template</code></dt> <dd> @@ -3998,7 +4006,9 @@ The filesystem element has an optional attribute <code>accessmode</code> which specifies the security mode for accessing the source <span class="since">(since 0.8.5)</span>. Currently this only works - with <code>type='mount'</code> for the QEMU/KVM driver. The possible + with <code>type='mount'</code> for the QEMU/KVM driver. + For driver type <code>virtiofs</code>, only <code>passthrough</code> is + supported. For other driver types, the possible values are: <dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 5b609a4756..f7125e401a 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2645,6 +2645,12 @@ </optional> <ref name='virtioOptions'/> </group> + <group> + <attribute name="type"> + <value>virtiofs</value> + </attribute> + <ref name='virtioOptions'/> + </group> <empty/> </choice> </element> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index dcd070d2ad..d78ea92ead 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -477,6 +477,7 @@ VIR_ENUM_IMPL(virDomainFSDriver, "loop", "nbd", "ploop", + "virtiofs", ); VIR_ENUM_IMPL(virDomainFSAccessMode, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 867a9c7661..0ef1746e2a 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -773,6 +773,7 @@ typedef enum { VIR_DOMAIN_FS_DRIVER_TYPE_LOOP, VIR_DOMAIN_FS_DRIVER_TYPE_NBD, VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP, + VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS, VIR_DOMAIN_FS_DRIVER_TYPE_LAST } virDomainFSDriverType; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f69a9e651c..9fcd06f8c3 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2680,6 +2680,10 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, return -1; break; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS: + /* TODO: vhost-user-fs-pci */ + break; + case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index af6817cc05..c3fc3fed1c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8298,6 +8298,10 @@ qemuDomainDeviceDefValidateFS(virDomainFSDefPtr fs, _("Filesystem driver type not supported")); return -1; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS: + /* TODO: vhost-user-fs-pci */ + return 0; + case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: default: virReportEnumRangeError(virDomainFSDriverType, fs->fsdriver); diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index c7d8a3ac3b..ab6bce19f4 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -699,6 +699,10 @@ qemuDomainDeviceCalculatePCIConnectFlags(virDomainDeviceDefPtr dev, } break; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS: + /* vhost-user-fs-pci */ + return virtioFlags; + case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml new file mode 100644 index 0000000000..a6b6279fb8 --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -0,0 +1,39 @@ +<domain type='kvm'> + <name>guest</name> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid> + <memory unit='KiB'>14680064</memory> + <currentMemory unit='KiB'>14680064</currentMemory> + <memoryBacking> + <source type='file'/> + <access mode='shared'/> + </memoryBacking> + <vcpu placement='static'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + <numa> + <cell id='0' cpus='0-1' memory='14680064' unit='KiB' memAccess='shared'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='usb' index='0' model='none'/> + <controller type='pci' index='0' model='pci-root'/> + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtiofs'/> + <source dir='/path'/> + <target dir='mount_tag'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </filesystem> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml new file mode 100644 index 0000000000..70df7b890d --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml @@ -0,0 +1,74 @@ +<domain type='qemu'> + <name>guest</name> + <uuid>1ccfd97d-5eb4-478a-bbe6-88d254c16db7</uuid> + <memory unit='KiB'>4194304</memory> + <currentMemory unit='KiB'>4194304</currentMemory> + <memoryBacking> + <hugepages> + <page size='2048' unit='KiB'/> + </hugepages> + <access mode='shared'/> + </memoryBacking> + <vcpu placement='static'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <acpi/> + <apic/> + </features> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + <numa> + <cell id='0' cpus='0-1' memory='2097152' unit='KiB' memAccess='shared'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2'/> + <source file='/var/lib/libvirt/images/guest.qcow2'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/> + </disk> + <controller type='usb' index='0' model='none'/> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x8'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x9'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='3' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='3' port='0xa'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='4' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='4' port='0xb'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x3'/> + </controller> + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtiofs'/> + <source dir='/path'/> + <target dir='mount_tag'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> + </filesystem> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml b/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml new file mode 120000 index 0000000000..fbc552ef94 --- /dev/null +++ b/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/vhost-user-fs-fd-memory.xml \ No newline at end of file diff --git a/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml b/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml new file mode 120000 index 0000000000..0c0f05b254 --- /dev/null +++ b/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/vhost-user-fs-hugepages.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index daf9b53ce8..2c96bd5e7b 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1432,6 +1432,9 @@ mymain(void) DO_TEST("vhost-vsock-ccw-auto", QEMU_CAPS_DEVICE_VHOST_VSOCK, QEMU_CAPS_CCW); + DO_TEST_CAPS_LATEST("vhost-user-fs-fd-memory"); + DO_TEST_CAPS_LATEST("vhost-user-fs-hugepages"); + DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO); DO_TEST("riscv64-virt-pci", -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:43 +0100, Ján Tomko wrote:
Introduce a new 'virtiofs' driver type for filesystem.
<filesystem type='mount' accessmode='passthrough'> <driver type='virtiofs'/> <source dir='/path'/> <target dir='mount_tag'> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </filesystem>
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> ---
[...]
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f4af65f13f..dab8fb8f6b 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in [...]
@@ -3963,6 +3968,9 @@ while the value <code>immediate</code> means that a host writeback is immediately triggered for all pages touched during a guest file write operation <span class="since">(since 0.9.10)</span>. + <span class="since">Since 6.1.0</span>, <code>type='virtiofs'</code>
Might require update now.
+ is also supported. Using virtiofs requires setting up shared memory, + see the guide: <a href="kbase/virtiofs.html">Virtio-FS</a> </dd> <dt><code>template</code></dt> <dd>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Add more elements for tuning the virtiofsd daemon and the vhost-user-fs device: <driver type='virtiofs' queue='1024' xattr='on'> <binary path='/usr/libexec/virtiofsd'> <cache mode='always'/> <lock posix='off' flock='off'/> </binary> </driver> Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> --- docs/formatdomain.html.in | 25 +++- docs/schemas/domaincommon.rng | 48 ++++++++ src/conf/domain_conf.c | 107 +++++++++++++++++- src/conf/domain_conf.h | 15 +++ src/libvirt_private.syms | 1 + .../vhost-user-fs-fd-memory.xml | 6 +- .../vhost-user-fs-hugepages.xml | 1 + 7 files changed, 200 insertions(+), 3 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index dab8fb8f6b..7c4153c7ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3936,10 +3936,15 @@ <readonly/> </filesystem> <filesystem type='mount' accessmode='passthrough'> - <driver type='virtiofs'/> + <driver type='virtiofs queue='1024'/> + <binary path='/usr/libexec/virtiofsd' xattr='on'> + <cache mode='always'/> + <lock posix='on' flock='on'/> + </binary> <source dir='/path'/> <target dir='mount_tag'/> </filesystem> + ... </devices> ...</pre> @@ -4063,9 +4068,27 @@ <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) </li> + <li> + For <code>virtiofs</code>, the <code>queue</code> attribute can be used + to specify the queue size (i.e. how many requests can the queue fit). + (<span class="since">Since 6.1.0</span>) + </li> </ul> </dd> + <dt><code>binary</code></dt> + <dd> + The optional <code>binary</code> element can tune the options for virtiofsd. + The attribute <code>path</code> can be used to override the path to the daemon. + Attribute <code>xattr</code> enables the use of filesystem extended attributes. + Caching can be tuned via the <code>cache</code> element, possible <code>mode</code> + values being <code>none</code> and <code>always</code>. + Locking can be controlled via the <code>lock</code> + element - attributes <code>posix</code> and <code>flock</code> both accepting + values <code>yes</code> or <code>no</code>. + (<span class="since">Since 6.1.0</span>) + </dd> + <dt><code>source</code></dt> <dd> The resource on the host that is being accessed in the guest. The diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index f7125e401a..a3e60a6430 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2500,6 +2500,9 @@ <optional> <ref name="fsDriver"/> </optional> + <optional> + <ref name="fsBinary"/> + </optional> <interleave> <element name="source"> <attribute name="dir"> @@ -2649,12 +2652,57 @@ <attribute name="type"> <value>virtiofs</value> </attribute> + <optional> + <attribute name="queue"> + <ref name="unsignedInt"/> + </attribute> + </optional> <ref name='virtioOptions'/> </group> <empty/> </choice> </element> </define> + <define name="fsBinary"> + <element name="binary"> + <optional> + <attribute name="path"> + <ref name="absFilePath"/> + </attribute> + </optional> + <optional> + <attribute name="xattr"> + <ref name="virOnOff"/> + </attribute> + </optional> + <optional> + <element name="cache"> + <optional> + <attribute name="mode"> + <choice> + <value>none</value> + <value>always</value> + </choice> + </attribute> + </optional> + </element> + </optional> + <optional> + <element name="lock"> + <optional> + <attribute name="posix"> + <ref name="virOnOff"/> + </attribute> + </optional> + <optional> + <attribute name="flock"> + <ref name="virOnOff"/> + </attribute> + </optional> + </element> + </optional> + </element> + </define> <define name="interface-network-attributes"> <attribute name="network"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d78ea92ead..8e7400294b 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -501,6 +501,14 @@ VIR_ENUM_IMPL(virDomainFSModel, "virtio-non-transitional", ); +VIR_ENUM_IMPL(virDomainFSCacheMode, + VIR_DOMAIN_FS_CACHE_MODE_LAST, + "default", + "none", + "always", +); + + VIR_ENUM_IMPL(virDomainNet, VIR_DOMAIN_NET_TYPE_LAST, "user", @@ -2325,6 +2333,7 @@ void virDomainFSDefFree(virDomainFSDefPtr def) VIR_FREE(def->dst); virDomainDeviceInfoClear(&def->info); VIR_FREE(def->virtio); + VIR_FREE(def->binary); VIR_FREE(def); } @@ -11311,6 +11320,64 @@ virDomainFSDefParseXML(virDomainXMLOptionPtr xmlopt, } } + if (def->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + g_autofree char *queue_size = virXPathString("string(./driver/@queue)", ctxt); + g_autofree char *binary = virXPathString("string(./binary/@path)", ctxt); + g_autofree char *xattr = virXPathString("string(./binary/@xattr)", ctxt); + g_autofree char *cache = virXPathString("string(./binary/cache/@mode)", ctxt); + g_autofree char *posix_lock = virXPathString("string(./binary/lock/@posix)", ctxt); + g_autofree char *flock = virXPathString("string(./binary/lock/@flock)", ctxt); + int val; + + + if (queue_size && virStrToLong_ull(queue_size, NULL, 10, &def->queue_size) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("cannot parse queue size '%s' for virtiofs"), + queue_size); + goto error; + } + + if (binary) + def->binary = virFileSanitizePath(binary); + + if (xattr) { + if ((val = virTristateSwitchTypeFromString(xattr)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown xattr value '%s'"), xattr); + goto error; + } + def->xattr = val; + } + + if (cache) { + if ((val = virDomainFSCacheModeTypeFromString(cache)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("cannot parse cache mode '%s' for virtiofs"), + cache); + goto error; + } + def->cache = val; + } + + if (posix_lock) { + if ((val = virTristateSwitchTypeFromString(posix_lock)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown posix lock value '%s'"), posix_lock); + goto error; + } + def->posix_lock = val; + } + + if (flock) { + if ((val = virTristateSwitchTypeFromString(flock)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown flock value '%s'"), flock); + goto error; + } + def->flock = val; + } + } + if (format) { if ((def->format = virStorageFileFormatTypeFromString(format)) <= 0) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -25142,6 +25209,9 @@ virDomainFSDefFormat(virBufferPtr buf, const char *wrpolicy = virDomainFSWrpolicyTypeToString(def->wrpolicy); const char *src = def->src->path; g_auto(virBuffer) driverAttrBuf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) driverBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) binaryAttrBuf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) binaryBuf = VIR_BUFFER_INIT_CHILD(buf); if (!type) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -25165,6 +25235,8 @@ virDomainFSDefFormat(virBufferPtr buf, virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); + virBufferAdjustIndent(&driverBuf, 2); + virBufferAdjustIndent(&binaryBuf, 2); if (def->fsdriver) { virBufferAsprintf(&driverAttrBuf, " type='%s'", fsdriver); @@ -25176,11 +25248,44 @@ virDomainFSDefFormat(virBufferPtr buf, if (def->wrpolicy) virBufferAsprintf(&driverAttrBuf, " wrpolicy='%s'", wrpolicy); + if (def->queue_size) + virBufferAsprintf(&driverAttrBuf, " queue='%llu'", def->queue_size); + + } + + if (def->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + g_auto(virBuffer) lockAttrBuf = VIR_BUFFER_INITIALIZER; + virBufferEscapeString(&binaryAttrBuf, " path='%s'", def->binary); + + if (def->xattr != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&binaryAttrBuf, " xattr='%s'", + virTristateSwitchTypeToString(def->xattr)); + } + + if (def->cache != VIR_DOMAIN_FS_CACHE_MODE_DEFAULT) { + virBufferAsprintf(&binaryBuf, "<cache mode='%s'/>\n", + virDomainFSCacheModeTypeToString(def->cache)); + } + + if (def->posix_lock != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&lockAttrBuf, " posix='%s'", + virTristateSwitchTypeToString(def->posix_lock)); + } + + if (def->flock != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&lockAttrBuf, " flock='%s'", + virTristateSwitchTypeToString(def->flock)); + } + + virXMLFormatElement(&binaryBuf, "lock", &lockAttrBuf, NULL); } + virDomainVirtioOptionsFormat(&driverAttrBuf, def->virtio); - virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf); + virXMLFormatElement(buf, "binary", &binaryAttrBuf, &binaryBuf); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf); switch (def->type) { case VIR_DOMAIN_FS_TYPE_MOUNT: diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0ef1746e2a..07815537d8 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -804,6 +804,14 @@ typedef enum { VIR_DOMAIN_FS_MODEL_LAST } virDomainFSModel; +typedef enum { + VIR_DOMAIN_FS_CACHE_MODE_DEFAULT = 0, + VIR_DOMAIN_FS_CACHE_MODE_NONE, + VIR_DOMAIN_FS_CACHE_MODE_ALWAYS, + + VIR_DOMAIN_FS_CACHE_MODE_LAST +} virDomainFSCacheMode; + struct _virDomainFSDef { int type; int fsdriver; /* enum virDomainFSDriverType */ @@ -819,6 +827,12 @@ struct _virDomainFSDef { unsigned long long space_hard_limit; /* in bytes */ unsigned long long space_soft_limit; /* in bytes */ bool symlinksResolved; + char *binary; + unsigned long long queue_size; + virTristateSwitch xattr; + virDomainFSCacheMode cache; + virTristateSwitch posix_lock; + virTristateSwitch flock; virDomainVirtioOptionsPtr virtio; virObjectPtr privateData; }; @@ -3446,6 +3460,7 @@ VIR_ENUM_DECL(virDomainFSDriver); VIR_ENUM_DECL(virDomainFSAccessMode); VIR_ENUM_DECL(virDomainFSWrpolicy); VIR_ENUM_DECL(virDomainFSModel); +VIR_ENUM_DECL(virDomainFSCacheMode); VIR_ENUM_DECL(virDomainNet); VIR_ENUM_DECL(virDomainNetBackend); VIR_ENUM_DECL(virDomainNetVirtioTxMode); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 125d1836dd..49eb8ab7cb 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -395,6 +395,7 @@ virDomainDiskSourceFormat; virDomainDiskTranslateSourcePool; virDomainFeatureTypeFromString; virDomainFeatureTypeToString; +virDomainFSCacheModeTypeToString; virDomainFSDefFree; virDomainFSDefNew; virDomainFSDriverTypeToString; diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml index a6b6279fb8..f6bb663e97 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -27,7 +27,11 @@ <controller type='usb' index='0' model='none'/> <controller type='pci' index='0' model='pci-root'/> <filesystem type='mount' accessmode='passthrough'> - <driver type='virtiofs'/> + <driver type='virtiofs' queue='1024'/> + <binary path='/usr/libexec/virtiofsd' xattr='on'> + <cache mode='always'/> + <lock posix='off' flock='off'/> + </binary> <source dir='/path'/> <target dir='mount_tag'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml index 70df7b890d..96b9774704 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml @@ -63,6 +63,7 @@ </controller> <filesystem type='mount' accessmode='passthrough'> <driver type='virtiofs'/> + <binary path='/usr/libexec/virtiofsd'/> <source dir='/path'/> <target dir='mount_tag'/> <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:44 +0100, Ján Tomko wrote:
Add more elements for tuning the virtiofsd daemon and the vhost-user-fs device:
<driver type='virtiofs' queue='1024' xattr='on'> <binary path='/usr/libexec/virtiofsd'> <cache mode='always'/> <lock posix='off' flock='off'/> </binary> </driver>
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> --- docs/formatdomain.html.in | 25 +++- docs/schemas/domaincommon.rng | 48 ++++++++ src/conf/domain_conf.c | 107 +++++++++++++++++- src/conf/domain_conf.h | 15 +++ src/libvirt_private.syms | 1 + .../vhost-user-fs-fd-memory.xml | 6 +- .../vhost-user-fs-hugepages.xml | 1 + 7 files changed, 200 insertions(+), 3 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index dab8fb8f6b..7c4153c7ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3936,10 +3936,15 @@ <readonly/> </filesystem> <filesystem type='mount' accessmode='passthrough'> - <driver type='virtiofs'/> + <driver type='virtiofs queue='1024'/> + <binary path='/usr/libexec/virtiofsd' xattr='on'> + <cache mode='always'/> + <lock posix='on' flock='on'/> + </binary> <source dir='/path'/> <target dir='mount_tag'/> </filesystem> +
Spurious whitespace?
... </devices> ...</pre> @@ -4063,9 +4068,27 @@ <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) </li> + <li> + For <code>virtiofs</code>, the <code>queue</code> attribute can be used + to specify the queue size (i.e. how many requests can the queue fit). + (<span class="since">Since 6.1.0</span>)
Version.
+ </li> </ul> </dd>
+ <dt><code>binary</code></dt> + <dd> + The optional <code>binary</code> element can tune the options for virtiofsd.
I think you should state that any of the following attributes/elements are optional, so that it's obvious that e.g. 'path' can be omitted if the user wishes to configure locking.
+ The attribute <code>path</code> can be used to override the path to the daemon. + Attribute <code>xattr</code> enables the use of filesystem extended attributes. + Caching can be tuned via the <code>cache</code> element, possible <code>mode</code> + values being <code>none</code> and <code>always</code>. + Locking can be controlled via the <code>lock</code> + element - attributes <code>posix</code> and <code>flock</code> both accepting + values <code>yes</code> or <code>no</code>. + (<span class="since">Since 6.1.0</span>)
Version.
+ </dd> + <dt><code>source</code></dt> <dd> The resource on the host that is being accessed in the guest. The
[...]
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d78ea92ead..8e7400294b 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c
[...]
@@ -11311,6 +11320,64 @@ virDomainFSDefParseXML(virDomainXMLOptionPtr xmlopt, } }
+ if (def->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + g_autofree char *queue_size = virXPathString("string(./driver/@queue)", ctxt); + g_autofree char *binary = virXPathString("string(./binary/@path)", ctxt); + g_autofree char *xattr = virXPathString("string(./binary/@xattr)", ctxt); + g_autofree char *cache = virXPathString("string(./binary/cache/@mode)", ctxt); + g_autofree char *posix_lock = virXPathString("string(./binary/lock/@posix)", ctxt); + g_autofree char *flock = virXPathString("string(./binary/lock/@flock)", ctxt); + int val; + +
Too many newlines.
+ if (queue_size && virStrToLong_ull(queue_size, NULL, 10, &def->queue_size) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("cannot parse queue size '%s' for virtiofs"), + queue_size); + goto error;
[...]
@@ -25142,6 +25209,9 @@ virDomainFSDefFormat(virBufferPtr buf, const char *wrpolicy = virDomainFSWrpolicyTypeToString(def->wrpolicy); const char *src = def->src->path; g_auto(virBuffer) driverAttrBuf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) driverBuf = VIR_BUFFER_INIT_CHILD(buf); + g_auto(virBuffer) binaryAttrBuf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) binaryBuf = VIR_BUFFER_INIT_CHILD(buf);
if (!type) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -25165,6 +25235,8 @@ virDomainFSDefFormat(virBufferPtr buf, virBufferAddLit(buf, ">\n");
virBufferAdjustIndent(buf, 2); + virBufferAdjustIndent(&driverBuf, 2); + virBufferAdjustIndent(&binaryBuf, 2);
Eww. Something is wrong if you need to tweak the indentation after using VIR_BUFFER_INIT_CHILD.
if (def->fsdriver) { virBufferAsprintf(&driverAttrBuf, " type='%s'", fsdriver);
@@ -25176,11 +25248,44 @@ virDomainFSDefFormat(virBufferPtr buf, if (def->wrpolicy) virBufferAsprintf(&driverAttrBuf, " wrpolicy='%s'", wrpolicy);
+ if (def->queue_size) + virBufferAsprintf(&driverAttrBuf, " queue='%llu'", def->queue_size); + + } + + if (def->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + g_auto(virBuffer) lockAttrBuf = VIR_BUFFER_INITIALIZER; + virBufferEscapeString(&binaryAttrBuf, " path='%s'", def->binary); + + if (def->xattr != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&binaryAttrBuf, " xattr='%s'", + virTristateSwitchTypeToString(def->xattr)); + } + + if (def->cache != VIR_DOMAIN_FS_CACHE_MODE_DEFAULT) { + virBufferAsprintf(&binaryBuf, "<cache mode='%s'/>\n", + virDomainFSCacheModeTypeToString(def->cache)); + } + + if (def->posix_lock != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&lockAttrBuf, " posix='%s'", + virTristateSwitchTypeToString(def->posix_lock)); + } + + if (def->flock != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&lockAttrBuf, " flock='%s'", + virTristateSwitchTypeToString(def->flock)); + } + + virXMLFormatElement(&binaryBuf, "lock", &lockAttrBuf, NULL); }
+
Surious newline.
virDomainVirtioOptionsFormat(&driverAttrBuf, def->virtio);
- virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf); + virXMLFormatElement(buf, "binary", &binaryAttrBuf, &binaryBuf); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf);
'driver' would be formatted twice if the first call of virXMLFormatElement wouldn't clear the arguments. Remove the latter one.
switch (def->type) { case VIR_DOMAIN_FS_TYPE_MOUNT:
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

On Wed, Feb 26, 2020 at 08:23:43AM +0100, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:44 +0100, Ján Tomko wrote:
Add more elements for tuning the virtiofsd daemon and the vhost-user-fs device:
<driver type='virtiofs' queue='1024' xattr='on'> <binary path='/usr/libexec/virtiofsd'> <cache mode='always'/> <lock posix='off' flock='off'/> </binary> </driver>
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> --- docs/formatdomain.html.in | 25 +++- docs/schemas/domaincommon.rng | 48 ++++++++ src/conf/domain_conf.c | 107 +++++++++++++++++- src/conf/domain_conf.h | 15 +++ src/libvirt_private.syms | 1 + .../vhost-user-fs-fd-memory.xml | 6 +- .../vhost-user-fs-hugepages.xml | 1 + 7 files changed, 200 insertions(+), 3 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index dab8fb8f6b..7c4153c7ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3936,10 +3936,15 @@ <readonly/> </filesystem> <filesystem type='mount' accessmode='passthrough'> - <driver type='virtiofs'/> + <driver type='virtiofs queue='1024'/> + <binary path='/usr/libexec/virtiofsd' xattr='on'> + <cache mode='always'/> + <lock posix='on' flock='on'/> + </binary> <source dir='/path'/> <target dir='mount_tag'/> </filesystem> +
Spurious whitespace?
Yes.
... </devices> ...</pre> @@ -4063,9 +4068,27 @@ <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) </li> + <li> + For <code>virtiofs</code>, the <code>queue</code> attribute can be used + to specify the queue size (i.e. how many requests can the queue fit). + (<span class="since">Since 6.1.0</span>)
Version.
+ </li> </ul> </dd>
+ <dt><code>binary</code></dt> + <dd> + The optional <code>binary</code> element can tune the options for virtiofsd.
I think you should state that any of the following attributes/elements are optional, so that it's obvious that e.g. 'path' can be omitted if the user wishes to configure locking.
Done.
+ The attribute <code>path</code> can be used to override the path to the daemon. + Attribute <code>xattr</code> enables the use of filesystem extended attributes. + Caching can be tuned via the <code>cache</code> element, possible <code>mode</code> + values being <code>none</code> and <code>always</code>. + Locking can be controlled via the <code>lock</code> + element - attributes <code>posix</code> and <code>flock</code> both accepting + values <code>yes</code> or <code>no</code>. + (<span class="since">Since 6.1.0</span>)
Version.
+ </dd> + <dt><code>source</code></dt> <dd> The resource on the host that is being accessed in the guest. The @@ -25165,6 +25235,8 @@ virDomainFSDefFormat(virBufferPtr buf, virBufferAddLit(buf, ">\n");
virBufferAdjustIndent(buf, 2); + virBufferAdjustIndent(&driverBuf, 2); + virBufferAdjustIndent(&binaryBuf, 2);
Eww. Something is wrong if you need to tweak the indentation after using VIR_BUFFER_INIT_CHILD.
Yes, matches the pre-existing BufferAdjustIndent. I had to fight the urge to clean up everything in this epopee.
if (def->fsdriver) { virBufferAsprintf(&driverAttrBuf, " type='%s'", fsdriver);
@@ -25176,11 +25248,44 @@ virDomainFSDefFormat(virBufferPtr buf, if (def->wrpolicy) virBufferAsprintf(&driverAttrBuf, " wrpolicy='%s'", wrpolicy);
+ if (def->queue_size) + virBufferAsprintf(&driverAttrBuf, " queue='%llu'", def->queue_size); + + } +
[...]
Surious newline.
virDomainVirtioOptionsFormat(&driverAttrBuf, def->virtio);
- virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf); + virXMLFormatElement(buf, "binary", &binaryAttrBuf, &binaryBuf); + virXMLFormatElement(buf, "driver", &driverAttrBuf, &driverBuf);
'driver' would be formatted twice if the first call of virXMLFormatElement wouldn't clear the arguments. Remove the latter one.
Probably a rebase artifact. Jano
switch (def->type) { case VIR_DOMAIN_FS_TYPE_MOUNT:
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Add a 'virtiofsd_debug' option for tuning whether to run virtiofsd in debug mode. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 +++++++ src/qemu/qemu_conf.c | 2 ++ src/qemu/qemu_conf.h | 1 + src/qemu/test_libvirtd_qemu.aug.in | 1 + 5 files changed, 12 insertions(+) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index 557b6f38f8..3014fa6b86 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -116,6 +116,7 @@ module Libvirtd_qemu = let nvram_entry = str_array_entry "nvram" let debug_level_entry = int_entry "gluster_debug_level" + | bool_entry "virtiofsd_debug" let memory_entry = str_entry "memory_backing_dir" diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index b6805ffc41..e82c1b5bd5 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -809,6 +809,13 @@ # #gluster_debug_level = 9 +# virtiofsd debug +# +# Whether to enable the debugging output of the virtiofsd daemon. +# Possible values are 0 or 1. +# +#virtiofsd_debug = 1 + # To enhance security, QEMU driver is capable of creating private namespaces # for each domain started. Well, so far only "mount" namespace is supported. If # enabled it means qemu process is unable to see all the devices on the system, diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 0357501dc6..08c2fffce3 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -835,6 +835,8 @@ virQEMUDriverConfigLoadDebugEntry(virQEMUDriverConfigPtr cfg, { if (virConfGetValueUInt(conf, "gluster_debug_level", &cfg->glusterDebugLevel) < 0) return -1; + if (virConfGetValueBool(conf, "virtiofsd_debug", &cfg->virtiofsdDebug) < 0) + return -1; return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index cedf232223..3ce9566b71 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -202,6 +202,7 @@ struct _virQEMUDriverConfig { virFirmwarePtr *firmwares; size_t nfirmwares; unsigned int glusterDebugLevel; + bool virtiofsdDebug; char *memoryBackingDir; diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in index dd90edf687..fca9a942c9 100644 --- a/src/qemu/test_libvirtd_qemu.aug.in +++ b/src/qemu/test_libvirtd_qemu.aug.in @@ -98,6 +98,7 @@ module Test_libvirtd_qemu = } { "stdio_handler" = "logd" } { "gluster_debug_level" = "9" } +{ "virtiofsd_debug" = "1" } { "namespaces" { "1" = "mount" } } -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:45 +0100, Ján Tomko wrote:
Add a 'virtiofsd_debug' option for tuning whether to run virtiofsd in debug mode.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 +++++++ src/qemu/qemu_conf.c | 2 ++ src/qemu/qemu_conf.h | 1 + src/qemu/test_libvirtd_qemu.aug.in | 1 + 5 files changed, 12 insertions(+)
[...]
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index b6805ffc41..e82c1b5bd5 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -809,6 +809,13 @@ # #gluster_debug_level = 9
+# virtiofsd debug +# +# Whether to enable the debugging output of the virtiofsd daemon. +# Possible values are 0 or 1.
Please mention that the default is 'disabled' or 0.
+# +#virtiofsd_debug = 1 + # To enhance security, QEMU driver is capable of creating private namespaces # for each domain started. Well, so far only "mount" namespace is supported. If # enabled it means qemu process is unable to see all the devices on the system,
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Reject unsupported configurations. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_domain.c | 61 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c3fc3fed1c..7cb283123d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8266,10 +8266,44 @@ qemuDomainDeviceDefValidateIOMMU(const virDomainIOMMUDef *iommu, return 0; } +static int +qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) +{ + size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); + size_t i; + + for (i = 0; i < numa_nodes; i++) { + virDomainMemoryAccess node_access = + virDomainNumaGetNodeMemoryAccessMode(def->numa, i); + + switch (node_access) { + case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: + if (def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory")); + return -1; + } + break; + case VIR_DOMAIN_MEMORY_ACCESS_SHARED: + break; + case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory")); + return -1; + + case VIR_DOMAIN_MEMORY_ACCESS_LAST: + default: + virReportEnumRangeError(virDomainMemoryAccess, node_access); + return -1; + + } + } + return 0; +} static int qemuDomainDeviceDefValidateFS(virDomainFSDefPtr fs, - const virDomainDef *def G_GNUC_UNUSED, + const virDomainDef *def, virQEMUCapsPtr qemuCaps G_GNUC_UNUSED) { if (fs->type != VIR_DOMAIN_FS_TYPE_MOUNT) { @@ -8299,8 +8333,29 @@ qemuDomainDeviceDefValidateFS(virDomainFSDefPtr fs, return -1; case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS: - /* TODO: vhost-user-fs-pci */ - return 0; + if (fs->accessmode != VIR_DOMAIN_FS_ACCESSMODE_PASSTHROUGH) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs only supports passthrough accessmode")); + return -1; + } + if (fs->wrpolicy != VIR_DOMAIN_FS_WRPOLICY_DEFAULT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs does not support wrpolicy")); + return -1; + } + if (fs->model != VIR_DOMAIN_FS_MODEL_DEFAULT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs does not support model")); + return -1; + } + if (fs->format != VIR_STORAGE_FILE_NONE) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs does not support format")); + return -1; + } + if (qemuDomainDefValidateVirtioFSSharedMemory(def) < 0) + return -1; + break; case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: default: -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:46 +0100, Ján Tomko wrote:
Reject unsupported configurations.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_domain.c | 61 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c3fc3fed1c..7cb283123d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8266,10 +8266,44 @@ qemuDomainDeviceDefValidateIOMMU(const virDomainIOMMUDef *iommu, return 0; }
+static int +qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) +{ + size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); + size_t i; + + for (i = 0; i < numa_nodes; i++) {
This won't catch guests with no numa configured ...
+ virDomainMemoryAccess node_access = + virDomainNumaGetNodeMemoryAccessMode(def->numa, i); + + switch (node_access) { + case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: + if (def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory"));
... so this error won't be reported. Also must all nodes have shared memory? Isn't one enough?
+ return -1; + } + break; + case VIR_DOMAIN_MEMORY_ACCESS_SHARED: + break; + case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory")); + return -1; + + case VIR_DOMAIN_MEMORY_ACCESS_LAST: + default: + virReportEnumRangeError(virDomainMemoryAccess, node_access); + return -1; + + } + } + return 0; +}

On 2/26/20 8:33 AM, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:46 +0100, Ján Tomko wrote:
Reject unsupported configurations.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_domain.c | 61 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c3fc3fed1c..7cb283123d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8266,10 +8266,44 @@ qemuDomainDeviceDefValidateIOMMU(const virDomainIOMMUDef *iommu, return 0; }
+static int +qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) +{ + size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); + size_t i; + + for (i = 0; i < numa_nodes; i++) {
This won't catch guests with no numa configured ...
+ virDomainMemoryAccess node_access = + virDomainNumaGetNodeMemoryAccessMode(def->numa, i); + + switch (node_access) { + case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: + if (def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory"));
... so this error won't be reported.
Also must all nodes have shared memory? Isn't one enough?
Maybe it is. But I wouldn't burn our resources trying to figure out all the corner cases and make this work in them. I'd say that we can start with this check and if somebody ever wants us to refine it, we can look into our options. Michal

On Wed, Feb 26, 2020 at 09:15:17AM +0100, Michal Privoznik wrote:
On 2/26/20 8:33 AM, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:46 +0100, Ján Tomko wrote:
Reject unsupported configurations.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_domain.c | 61 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c3fc3fed1c..7cb283123d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8266,10 +8266,44 @@ qemuDomainDeviceDefValidateIOMMU(const virDomainIOMMUDef *iommu, return 0; } +static int +qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) +{ + size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); + size_t i; + + for (i = 0; i < numa_nodes; i++) {
This won't catch guests with no numa configured ...
Consider this squashed in: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 090720fd76..f992e3fc6e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8276,6 +8276,12 @@ qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); size_t i; + if (numa_nodes == 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires one or more NUMA nodes")); + return -1; + } + for (i = 0; i < numa_nodes; i++) { virDomainMemoryAccess node_access = virDomainNumaGetNodeMemoryAccessMode(def->numa, i);
+ virDomainMemoryAccess node_access = + virDomainNumaGetNodeMemoryAccessMode(def->numa, i); + + switch (node_access) { + case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT: + if (def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires shared memory"));
... so this error won't be reported.
Also must all nodes have shared memory? Isn't one enough?
Maybe it is. But I wouldn't burn our resources trying to figure out all the corner cases and make this work in them. I'd say that we can start with this check and if somebody ever wants us to refine it, we can look into our options.
Yes. Checking all nodes is the prudent choice that ensures all of the memory will be shared. Jano
Michal

On Wed, Feb 26, 2020 at 09:44:14 +0100, Ján Tomko wrote:
On Wed, Feb 26, 2020 at 09:15:17AM +0100, Michal Privoznik wrote:
On 2/26/20 8:33 AM, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:46 +0100, Ján Tomko wrote:
Reject unsupported configurations.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_domain.c | 61 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c3fc3fed1c..7cb283123d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8266,10 +8266,44 @@ qemuDomainDeviceDefValidateIOMMU(const virDomainIOMMUDef *iommu, return 0; } +static int +qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) +{ + size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); + size_t i; + + for (i = 0; i < numa_nodes; i++) {
This won't catch guests with no numa configured ...
Consider this squashed in: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 090720fd76..f992e3fc6e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -8276,6 +8276,12 @@ qemuDomainDefValidateVirtioFSSharedMemory(const virDomainDef *def) size_t numa_nodes = virDomainNumaGetNodeCount(def->numa); size_t i;
+ if (numa_nodes == 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtiofs requires one or more NUMA nodes")); + return -1; + } + for (i = 0; i < numa_nodes; i++) { virDomainMemoryAccess node_access = virDomainNumaGetNodeMemoryAccessMode(def->numa, i);
We have a similar requirement for either cpu or memory hotplug so: Reviewed-by: Peter Krempa <pkrempa@redhat.com> Ideally guests instantiated via virt-install should already come with ne numa node, but that's for another time.

This is not yet supported. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_migration.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index a307c5ebe2..2624a7388c 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1280,6 +1280,16 @@ qemuMigrationSrcIsAllowed(virQEMUDriverPtr driver, return false; } } + + for (i = 0; i < vm->def->nfss; i++) { + virDomainFSDefPtr fs = vm->def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("migration with virtiofs device is not supported")); + return false; + } + } } return true; -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:47 +0100, Ján Tomko wrote:
This is not yet supported.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_migration.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Start virtiofsd for each <filesystem> device using it. Pre-create the socket for communication with QEMU and pass it to virtiofsd. Note that virtiofsd needs to run as root. https://bugzilla.redhat.com/show_bug.cgi?id=1694166 Introduced by QEMU commit a43efa34c7d7b628cbf1ec0fe60043e5c91043ea Signed-off-by: Ján Tomko <jtomko@redhat.com> --- po/POTFILES.in | 1 + src/qemu/Makefile.inc.am | 2 + src/qemu/qemu_domain.c | 5 +- src/qemu/qemu_domain.h | 2 +- src/qemu/qemu_extdevice.c | 20 ++- src/qemu/qemu_virtiofs.c | 300 ++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_virtiofs.h | 38 +++++ tests/qemuxml2argvtest.c | 11 ++ 8 files changed, 376 insertions(+), 3 deletions(-) create mode 100644 src/qemu/qemu_virtiofs.c create mode 100644 src/qemu/qemu_virtiofs.h diff --git a/po/POTFILES.in b/po/POTFILES.in index dba0d3a12e..bb97110e83 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -168,6 +168,7 @@ @SRCDIR@/src/qemu/qemu_tpm.c @SRCDIR@/src/qemu/qemu_vhost_user.c @SRCDIR@/src/qemu/qemu_vhost_user_gpu.c +@SRCDIR@/src/qemu/qemu_virtiofs.c @SRCDIR@/src/remote/remote_daemon.c @SRCDIR@/src/remote/remote_daemon_config.c @SRCDIR@/src/remote/remote_daemon_dispatch.c diff --git a/src/qemu/Makefile.inc.am b/src/qemu/Makefile.inc.am index b9c0c6ea9c..baa324dc4b 100644 --- a/src/qemu/Makefile.inc.am +++ b/src/qemu/Makefile.inc.am @@ -67,6 +67,8 @@ QEMU_DRIVER_SOURCES = \ qemu/qemu_vhost_user.h \ qemu/qemu_vhost_user_gpu.c \ qemu/qemu_vhost_user_gpu.h \ + qemu/qemu_virtiofs.c \ + qemu/qemu_virtiofs.h \ qemu/qemu_checkpoint.c \ qemu/qemu_checkpoint.h \ qemu/qemu_backup.c \ diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 7cb283123d..d0b2bff775 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1432,8 +1432,11 @@ qemuDomainFSPrivateNew(void) static void -qemuDomainFSPrivateDispose(void *obj G_GNUC_UNUSED) +qemuDomainFSPrivateDispose(void *obj) { + qemuDomainFSPrivatePtr priv = obj; + + g_free(priv->vhostuser_fs_sock); } static virClassPtr qemuDomainVideoPrivateClass; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index f8fb48f2ff..476056c73f 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -568,7 +568,7 @@ typedef qemuDomainFSPrivate *qemuDomainFSPrivatePtr; struct _qemuDomainFSPrivate { virObject parent; - int dummy; + char *vhostuser_fs_sock; }; diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 7f3bb104d9..5103d4921c 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -20,11 +20,13 @@ #include <config.h> +#include "qemu_command.h" #include "qemu_extdevice.h" #include "qemu_vhost_user_gpu.h" #include "qemu_domain.h" #include "qemu_tpm.h" #include "qemu_slirp.h" +#include "qemu_virtiofs.h" #include "viralloc.h" #include "virlog.h" @@ -153,7 +155,7 @@ qemuExtDevicesCleanupHost(virQEMUDriverPtr driver, int qemuExtDevicesStart(virQEMUDriverPtr driver, virDomainObjPtr vm, - virLogManagerPtr logManager G_GNUC_UNUSED, + virLogManagerPtr logManager, bool incomingMigration) { virDomainDefPtr def = vm->def; @@ -183,6 +185,15 @@ qemuExtDevicesStart(virQEMUDriverPtr driver, return -1; } + for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + if (qemuVirtioFSStart(logManager, driver, vm, fs) < 0) + return -1; + } + } + return 0; } @@ -214,6 +225,13 @@ qemuExtDevicesStop(virQEMUDriverPtr driver, if (slirp) qemuSlirpStop(slirp, vm, driver, net, false); } + + for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) + qemuVirtioFSStop(driver, vm, fs); + } } diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c new file mode 100644 index 0000000000..600a3644bd --- /dev/null +++ b/src/qemu/qemu_virtiofs.c @@ -0,0 +1,300 @@ +/* + * qemu_virtiofs.c: virtiofs support + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "logging/log_manager.h" +#include "virlog.h" +#include "qemu_command.h" +#include "qemu_conf.h" +#include "qemu_extdevice.h" +#include "qemu_security.h" +#include "qemu_virtiofs.h" +#include "virpidfile.h" + +#define VIR_FROM_THIS VIR_FROM_QEMU + + +char * +qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def))) + return NULL; + + name = g_strdup_printf("%s-%s-virtiofsd", shortName, alias); + + return virPidFileBuildPath(cfg->stateDir, name); +} + + +char * +qemuVirtioFSCreateSocketFilename(virDomainObjPtr vm, + const char *alias) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + return virFileBuildPath(priv->libDir, alias, "-virtiofsd.sock"); +} + + +static char * +qemuVirtioFSCreateLogFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *name = NULL; + + name = g_strdup_printf("%s-%s", def->name, alias); + + return virFileBuildPath(cfg->logDir, name, "-virtiofsd.log"); +} + + +static int +qemuVirtioFSOpenChardev(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *socket_path) +{ + virDomainChrSourceDefPtr chrdev = virDomainChrSourceDefNew(NULL); + virDomainChrDef chr = { .source = chrdev }; + VIR_AUTOCLOSE fd = -1; + int ret = -1; + + chrdev->type = VIR_DOMAIN_CHR_TYPE_UNIX; + chrdev->data.nix.listen = true; + chrdev->data.nix.path = g_strdup(socket_path); + + if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) { + ignore_value(qemuSecurityClearSocketLabel(driver->securityManager, vm->def)); + goto cleanup; + } + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + + if (qemuSecuritySetChardevLabel(driver, vm, &chr) < 0) + goto cleanup; + + ret = fd; + fd = -1; + + cleanup: + virObjectUnref(chrdev); + return ret; +} + +static virCommandPtr +qemuVirtioFSBuildCommandLine(virQEMUDriverConfigPtr cfg, + virDomainFSDefPtr fs, + int *fd) +{ + g_autoptr(virCommand) cmd = NULL; + g_auto(virBuffer) opts = VIR_BUFFER_INITIALIZER; + + if (!(cmd = virCommandNew(fs->binary))) + return NULL; + + virCommandAddArgFormat(cmd, "--fd=%d", *fd); + virCommandPassFD(cmd, *fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + *fd = -1; + + virCommandAddArg(cmd, "-o"); + virBufferAsprintf(&opts, "source=%s,", fs->src->path); + if (fs->cache) + virBufferAsprintf(&opts, "cache=%s,", virDomainFSCacheModeTypeToString(fs->cache)); + + if (fs->xattr == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "xattr,"); + else if (fs->xattr == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_xattr,"); + + if (fs->flock == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "flock,"); + else if (fs->flock == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_flock,"); + + if (fs->posix_lock == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "posix_lock,"); + else if (fs->posix_lock == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_posix_lock,"); + + virBufferTrim(&opts, ","); + + virCommandAddArgBuffer(cmd, &opts); + if (cfg->virtiofsdDebug) + virCommandAddArg(cmd, "-d"); + + return g_steal_pointer(&cmd); +} + +int +qemuVirtioFSStart(virLogManagerPtr logManager, + virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autoptr(virCommand) cmd = NULL; + g_autofree char *socket_path = NULL; + g_autofree char *pidfile = NULL; + g_autofree char *logpath = NULL; + pid_t pid = (pid_t) -1; + VIR_AUTOCLOSE fd = -1; + VIR_AUTOCLOSE logfd = -1; + int ret = -1; + int rc; + + if (!virFileExists(fs->src->path)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("the virtiofs export directory '%s' does not exist"), + fs->src->path); + return -1; + } + + if (!(pidfile = qemuVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + if (!(socket_path = qemuVirtioFSCreateSocketFilename(vm, fs->info.alias))) + goto cleanup; + + if ((fd = qemuVirtioFSOpenChardev(driver, vm, socket_path)) < 0) + goto cleanup; + + logpath = qemuVirtioFSCreateLogFilename(cfg, vm->def, fs->info.alias); + + if (cfg->stdioLogD) { + if ((logfd = virLogManagerDomainOpenLogFile(logManager, + "qemu", + vm->def->uuid, + vm->def->name, + logpath, + 0, + NULL, NULL)) < 0) + goto cleanup; + } else { + if ((logfd = open(logpath, O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR)) < 0) { + virReportSystemError(errno, _("failed to create logfile %s"), + logpath); + goto cleanup; + } + if (virSetCloseExec(logfd) < 0) { + virReportSystemError(errno, _("failed to set close-on-exec flag on %s"), + logpath); + goto error; + } + } + + if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) + goto cleanup; + + /* so far only running as root is supported */ + virCommandSetUID(cmd, 0); + virCommandSetGID(cmd, 0); + + virCommandSetPidFile(cmd, pidfile); + virCommandSetOutputFD(cmd, &logfd); + virCommandSetErrorFD(cmd, &logfd); + virCommandNonblockingFDs(cmd); + virCommandDaemonize(cmd); + + if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) + goto cleanup; + + rc = virCommandRun(cmd, NULL); + + if (rc < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Could not start 'virtiofsd'")); + goto error; + } + + rc = virPidFileReadPath(pidfile, &pid); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to read virtiofsd pidfile '%s'"), + pidfile); + goto error; + } + + if (virProcessKill(pid, 0) != 0) { + virReportSystemError(errno, "%s", + _("virtiofsd died unexpectedly")); + goto error; + } + + QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock = g_steal_pointer(&socket_path); + ret = 0; + + cleanup: + if (socket_path) + unlink(socket_path); + return ret; + + error: + if (pid != -1) + virProcessKillPainfully(pid, true); + if (pidfile) + unlink(pidfile); + goto cleanup; +} + + +void +qemuVirtioFSStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autofree char *pidfile = NULL; + virErrorPtr orig_err; + pid_t pid = -1; + int rc; + + virErrorPreserveLast(&orig_err); + + if (!(pidfile = qemuVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + rc = virPidFileReadPathIfAlive(pidfile, &pid, NULL); + if (rc >= 0 && pid != (pid_t) -1) + virProcessKillPainfully(pid, true); + + if (unlink(pidfile) < 0 && + errno != ENOENT) { + virReportSystemError(errno, + _("Unable to remove stale pidfile %s"), + pidfile); + } + + if (QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock) + unlink(QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock); + + cleanup: + virErrorRestore(&orig_err); +} diff --git a/src/qemu/qemu_virtiofs.h b/src/qemu/qemu_virtiofs.h new file mode 100644 index 0000000000..49db807b19 --- /dev/null +++ b/src/qemu/qemu_virtiofs.h @@ -0,0 +1,38 @@ +/* + * qemu_virtiofs.h: virtiofs support + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + + +char * +qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias); +char * +qemuVirtioFSCreateSocketFilename(virDomainObjPtr vm, + const char *alias); + +int +qemuVirtioFSStart(virLogManagerPtr logManager, + virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs); +void +qemuVirtioFSStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs); diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 038fd97580..abbc273889 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -496,6 +496,17 @@ testCompareXMLToArgv(const void *data) } } + for (i = 0; i < vm->def->nfss; i++) { + virDomainFSDefPtr fs = vm->def->fss[i]; + char *s; + + if (fs->fsdriver != VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) + continue; + + s = g_strdup_printf("/tmp/lib/domain--1-guest/fs%zu.vhost-fs.sock", i); + QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock = s; + } + if (vm->def->vsock) { virDomainVsockDefPtr vsock = vm->def->vsock; qemuDomainVsockPrivatePtr vsockPriv = -- 2.24.1

Explicitly CCing danpb to clarify usage of the logging daemon. On Thu, Feb 20, 2020 at 15:32:48 +0100, Ján Tomko wrote:
Start virtiofsd for each <filesystem> device using it.
Pre-create the socket for communication with QEMU and pass it to virtiofsd.
Note that virtiofsd needs to run as root.
https://bugzilla.redhat.com/show_bug.cgi?id=1694166
Introduced by QEMU commit a43efa34c7d7b628cbf1ec0fe60043e5c91043ea
Signed-off-by: Ján Tomko <jtomko@redhat.com> ---
[...]
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c new file mode 100644 index 0000000000..600a3644bd --- /dev/null +++ b/src/qemu/qemu_virtiofs.c @@ -0,0 +1,300 @@
[...]
+char * +qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def))) + return NULL; + + name = g_strdup_printf("%s-%s-virtiofsd", shortName, alias); + + return virPidFileBuildPath(cfg->stateDir, name); +}
Why do we want to store the pid file here?
+ + +char * +qemuVirtioFSCreateSocketFilename(virDomainObjPtr vm, + const char *alias) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + return virFileBuildPath(priv->libDir, alias, "-virtiofsd.sock");
Shouldn't we put it here as well? It's a sub-process of the domain itself and doesn't make much sense to treat it as the qemu pid file.
+}
[...]
+static int +qemuVirtioFSOpenChardev(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *socket_path) +{ + virDomainChrSourceDefPtr chrdev = virDomainChrSourceDefNew(NULL); + virDomainChrDef chr = { .source = chrdev }; + VIR_AUTOCLOSE fd = -1; + int ret = -1; + + chrdev->type = VIR_DOMAIN_CHR_TYPE_UNIX; + chrdev->data.nix.listen = true; + chrdev->data.nix.path = g_strdup(socket_path); + + if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) { + ignore_value(qemuSecurityClearSocketLabel(driver->securityManager, vm->def)); + goto cleanup; + } + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + + if (qemuSecuritySetChardevLabel(driver, vm, &chr) < 0) + goto cleanup; + + ret = fd; + fd = -1; + + cleanup: + virObjectUnref(chrdev); + return ret; +} + +static virCommandPtr
Inconsistent spacing between functions.
+qemuVirtioFSBuildCommandLine(virQEMUDriverConfigPtr cfg, + virDomainFSDefPtr fs, + int *fd) +{ + g_autoptr(virCommand) cmd = NULL; + g_auto(virBuffer) opts = VIR_BUFFER_INITIALIZER; + + if (!(cmd = virCommandNew(fs->binary))) + return NULL; + + virCommandAddArgFormat(cmd, "--fd=%d", *fd); + virCommandPassFD(cmd, *fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + *fd = -1; + + virCommandAddArg(cmd, "-o"); + virBufferAsprintf(&opts, "source=%s,", fs->src->path);
Since the path is an argument of -o along with others you must escape commas in the path.
+ if (fs->cache) + virBufferAsprintf(&opts, "cache=%s,", virDomainFSCacheModeTypeToString(fs->cache)); + + if (fs->xattr == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "xattr,"); + else if (fs->xattr == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_xattr,"); + + if (fs->flock == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "flock,"); + else if (fs->flock == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_flock,"); + + if (fs->posix_lock == VIR_TRISTATE_SWITCH_ON) + virBufferAddLit(&opts, "posix_lock,"); + else if (fs->posix_lock == VIR_TRISTATE_SWITCH_OFF) + virBufferAddLit(&opts, "no_posix_lock,"); + + virBufferTrim(&opts, ","); + + virCommandAddArgBuffer(cmd, &opts); + if (cfg->virtiofsdDebug) + virCommandAddArg(cmd, "-d"); + + return g_steal_pointer(&cmd); +} + +int +qemuVirtioFSStart(virLogManagerPtr logManager, + virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autoptr(virCommand) cmd = NULL; + g_autofree char *socket_path = NULL; + g_autofree char *pidfile = NULL; + g_autofree char *logpath = NULL; + pid_t pid = (pid_t) -1; + VIR_AUTOCLOSE fd = -1; + VIR_AUTOCLOSE logfd = -1; + int ret = -1; + int rc; + + if (!virFileExists(fs->src->path)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("the virtiofs export directory '%s' does not exist"), + fs->src->path); + return -1; + } + + if (!(pidfile = qemuVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + if (!(socket_path = qemuVirtioFSCreateSocketFilename(vm, fs->info.alias))) + goto cleanup; + + if ((fd = qemuVirtioFSOpenChardev(driver, vm, socket_path)) < 0) + goto cleanup; + + logpath = qemuVirtioFSCreateLogFilename(cfg, vm->def, fs->info.alias); + + if (cfg->stdioLogD) { + if ((logfd = virLogManagerDomainOpenLogFile(logManager, + "qemu", + vm->def->uuid, + vm->def->name, + logpath, + 0, + NULL, NULL)) < 0)
Hmm, I'm not sure now what the intented usage of uuid/domname was and there's no docs. Daniel, please review this.
+ goto cleanup; + } else { + if ((logfd = open(logpath, O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR)) < 0) { + virReportSystemError(errno, _("failed to create logfile %s"), + logpath); + goto cleanup; + } + if (virSetCloseExec(logfd) < 0) { + virReportSystemError(errno, _("failed to set close-on-exec flag on %s"), + logpath); + goto error; + } + } + + if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) + goto cleanup; + + /* so far only running as root is supported */ + virCommandSetUID(cmd, 0); + virCommandSetGID(cmd, 0);
Is it necessary to special-case this for cases when we run the session connection? Or is it necessary to forbid usage of virtiofs?
+ virCommandSetPidFile(cmd, pidfile); + virCommandSetOutputFD(cmd, &logfd); + virCommandSetErrorFD(cmd, &logfd); + virCommandNonblockingFDs(cmd); + virCommandDaemonize(cmd); + + if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) + goto cleanup; +
The rest looks good to me.

On Wed, Feb 26, 2020 at 09:42:04AM +0100, Peter Krempa wrote:
Explicitly CCing danpb to clarify usage of the logging daemon.
On Thu, Feb 20, 2020 at 15:32:48 +0100, Ján Tomko wrote:
Start virtiofsd for each <filesystem> device using it.
Pre-create the socket for communication with QEMU and pass it to virtiofsd.
Note that virtiofsd needs to run as root.
https://bugzilla.redhat.com/show_bug.cgi?id=1694166
Introduced by QEMU commit a43efa34c7d7b628cbf1ec0fe60043e5c91043ea
Signed-off-by: Ján Tomko <jtomko@redhat.com> ---
[...]
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c new file mode 100644 index 0000000000..600a3644bd --- /dev/null +++ b/src/qemu/qemu_virtiofs.c @@ -0,0 +1,300 @@
[...]
+char * +qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def))) + return NULL; + + name = g_strdup_printf("%s-%s-virtiofsd", shortName, alias); + + return virPidFileBuildPath(cfg->stateDir, name); +}
Why do we want to store the pid file here?
+ + +char * +qemuVirtioFSCreateSocketFilename(virDomainObjPtr vm, + const char *alias) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + return virFileBuildPath(priv->libDir, alias, "-virtiofsd.sock");
Shouldn't we put it here as well? It's a sub-process of the domain itself and doesn't make much sense to treat it as the qemu pid file.
Right, libDir sounds like a better location. [...]
+ + logpath = qemuVirtioFSCreateLogFilename(cfg, vm->def, fs->info.alias); + + if (cfg->stdioLogD) { + if ((logfd = virLogManagerDomainOpenLogFile(logManager, + "qemu", + vm->def->uuid, + vm->def->name, + logpath, + 0, + NULL, NULL)) < 0)
Hmm, I'm not sure now what the intented usage of uuid/domname was and there's no docs.
Daniel, please review this.
Grepping for uuid in src/logging, this seems to be only used when saving and loading virtlockd's state. But virtiofsd's lifecycle is tied to the domain, so I don't think it should get its own uuid.
+ goto cleanup; + } else { + if ((logfd = open(logpath, O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR)) < 0) { + virReportSystemError(errno, _("failed to create logfile %s"), + logpath); + goto cleanup; + } + if (virSetCloseExec(logfd) < 0) { + virReportSystemError(errno, _("failed to set close-on-exec flag on %s"), + logpath); + goto error; + } + } + + if (!(cmd = qemuVirtioFSBuildCommandLine(cfg, fs, &fd))) + goto cleanup; + + /* so far only running as root is supported */ + virCommandSetUID(cmd, 0); + virCommandSetGID(cmd, 0);
Is it necessary to special-case this for cases when we run the session connection? Or is it necessary to forbid usage of virtiofs?
Right, session users deserve a nicer error message until virtiofsd learns to run as non-root. Jano

On Wed, Feb 26, 2020 at 11:34:06AM +0100, Ján Tomko wrote:
On Wed, Feb 26, 2020 at 09:42:04AM +0100, Peter Krempa wrote:
Explicitly CCing danpb to clarify usage of the logging daemon.
On Thu, Feb 20, 2020 at 15:32:48 +0100, Ján Tomko wrote:
Start virtiofsd for each <filesystem> device using it.
Pre-create the socket for communication with QEMU and pass it to virtiofsd.
Note that virtiofsd needs to run as root.
https://bugzilla.redhat.com/show_bug.cgi?id=1694166
Introduced by QEMU commit a43efa34c7d7b628cbf1ec0fe60043e5c91043ea
Signed-off-by: Ján Tomko <jtomko@redhat.com> ---
[...]
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c new file mode 100644 index 0000000000..600a3644bd --- /dev/null +++ b/src/qemu/qemu_virtiofs.c @@ -0,0 +1,300 @@
[...]
+char * +qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def))) + return NULL; + + name = g_strdup_printf("%s-%s-virtiofsd", shortName, alias); + + return virPidFileBuildPath(cfg->stateDir, name); +}
Why do we want to store the pid file here?
+ + +char * +qemuVirtioFSCreateSocketFilename(virDomainObjPtr vm, + const char *alias) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + return virFileBuildPath(priv->libDir, alias, "-virtiofsd.sock");
Shouldn't we put it here as well? It's a sub-process of the domain itself and doesn't make much sense to treat it as the qemu pid file.
Right, libDir sounds like a better location.
[...]
+ + logpath = qemuVirtioFSCreateLogFilename(cfg, vm->def, fs->info.alias); + + if (cfg->stdioLogD) { + if ((logfd = virLogManagerDomainOpenLogFile(logManager, + "qemu", + vm->def->uuid, + vm->def->name, + logpath, + 0, + NULL, NULL)) < 0)
Hmm, I'm not sure now what the intented usage of uuid/domname was and there's no docs.
Daniel, please review this.
Grepping for uuid in src/logging, this seems to be only used when saving and loading virtlockd's state. But virtiofsd's lifecycle is tied to the domain, so I don't think it should get its own uuid.
Essentially the UUID/name are just a way to associate the log file(s) with what "thing" is owning them. We already have virtlogd handling multiple logs from the same QEMU - the main QEMU stderr, and the file base logs for character devices. Having another log file open for virtiofs, slirp, swtpm, etc, all with the same UUID+name makes total sense. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Wire up the code to put virtiofsd in the emulator cgroup on domain startup. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_extdevice.c | 15 +++++++++++++++ src/qemu/qemu_virtiofs.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_virtiofs.h | 6 ++++++ 3 files changed, 49 insertions(+) diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 5103d4921c..31655b7f0a 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -248,6 +248,13 @@ qemuExtDevicesHasDevice(virDomainDefPtr def) if (def->tpm && def->tpm->type == VIR_DOMAIN_TPM_TYPE_EMULATOR) return true; + for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) + return true; + } + return false; } @@ -271,5 +278,13 @@ qemuExtDevicesSetupCgroup(virQEMUDriverPtr driver, qemuExtTPMSetupCgroup(driver, def, cgroup) < 0) return -1; + for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS && + qemuVirtioFSSetupCgroup(driver, def, fs, cgroup) < 0) + return -1; + } + return 0; } diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 600a3644bd..9e354a30c6 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -298,3 +298,31 @@ qemuVirtioFSStop(virQEMUDriverPtr driver, cleanup: virErrorRestore(&orig_err); } + + +int +qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver, + virDomainDefPtr def, + virDomainFSDefPtr fs, + virCgroupPtr cgroup) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autofree char *pidfile = NULL; + pid_t pid = -1; + int rc; + + if (!(pidfile = qemuVirtioFSCreatePidFilename(cfg, def, fs->info.alias))) + return -1; + + rc = virPidFileReadPathIfAlive(pidfile, &pid, NULL); + if (rc < 0 || pid == (pid_t) -1) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("virtiofsd died unexpectedly")); + return -1; + } + + if (virCgroupAddProcess(cgroup, pid) < 0) + return -1; + + return 0; +} diff --git a/src/qemu/qemu_virtiofs.h b/src/qemu/qemu_virtiofs.h index 49db807b19..341fed1def 100644 --- a/src/qemu/qemu_virtiofs.h +++ b/src/qemu/qemu_virtiofs.h @@ -36,3 +36,9 @@ void qemuVirtioFSStop(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainFSDefPtr fs); + +int +qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver, + virDomainDefPtr def, + virDomainFSDefPtr fs, + virCgroupPtr cgroup); -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:49 +0100, Ján Tomko wrote:
Wire up the code to put virtiofsd in the emulator cgroup on domain startup.
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_extdevice.c | 15 +++++++++++++++ src/qemu/qemu_virtiofs.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_virtiofs.h | 6 ++++++ 3 files changed, 49 insertions(+)
[...]
@@ -271,5 +278,13 @@ qemuExtDevicesSetupCgroup(virQEMUDriverPtr driver, qemuExtTPMSetupCgroup(driver, def, cgroup) < 0) return -1;
+ for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS && + qemuVirtioFSSetupCgroup(driver, def, fs, cgroup) < 0)
While this is consistent with what we do with other cases of 'externa' devices. I'm worried though that that stashing everything into a single cgroup will not scale.
+ return -1; + } + return 0; } diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 600a3644bd..9e354a30c6 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -298,3 +298,31 @@ qemuVirtioFSStop(virQEMUDriverPtr driver, cleanup: virErrorRestore(&orig_err); } + + +int +qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver, + virDomainDefPtr def, + virDomainFSDefPtr fs, + virCgroupPtr cgroup) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autofree char *pidfile = NULL; + pid_t pid = -1; + int rc; + + if (!(pidfile = qemuVirtioFSCreatePidFilename(cfg, def, fs->info.alias))) + return -1; + + rc = virPidFileReadPathIfAlive(pidfile, &pid, NULL); + if (rc < 0 || pid == (pid_t) -1) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("virtiofsd died unexpectedly")); + return -1; + }
Why don't we store the pid in the private data? Seems to be a waste of cycles to read it here.
+ + if (virCgroupAddProcess(cgroup, pid) < 0) + return -1; + + return 0; +}
Please consider storing the pid somewhere rather than looking it up. With that: Reviewed-by: Peter Krempa <pkrempa@redhat.com> We can worry about the cgroup congestion later since that's pre-existing.

Look into /usr/share/qemu/vhost-user to see whether we can find a suitable virtiofsd binary, in case the user did not provide one in the domain XML. Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_extdevice.c | 9 +++++++++ src/qemu/qemu_vhost_user.c | 40 ++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_vhost_user.h | 4 ++++ src/qemu/qemu_virtiofs.c | 11 +++++++++++ src/qemu/qemu_virtiofs.h | 4 ++++ 5 files changed, 68 insertions(+) diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 31655b7f0a..699b4daade 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -105,6 +105,15 @@ qemuExtDevicesPrepareDomain(virQEMUDriverPtr driver, } } + for (i = 0; i < vm->def->nfss; i++) { + virDomainFSDefPtr fs = vm->def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS) { + if (qemuVirtioFSPrepareDomain(driver, fs) < 0) + return -1; + } + } + return ret; } diff --git a/src/qemu/qemu_vhost_user.c b/src/qemu/qemu_vhost_user.c index 4c25b30664..d437fd1bd5 100644 --- a/src/qemu/qemu_vhost_user.c +++ b/src/qemu/qemu_vhost_user.c @@ -416,3 +416,43 @@ qemuVhostUserFillDomainGPU(virQEMUDriverPtr driver, VIR_FREE(vus); return ret; } + + +int +qemuVhostUserFillDomainFS(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + qemuVhostUserPtr *vus = NULL; + ssize_t nvus = 0; + ssize_t i; + int ret = -1; + + if ((nvus = qemuVhostUserFetchParsedConfigs(driver->privileged, + &vus, NULL)) < 0) + goto end; + + for (i = 0; i < nvus; i++) { + qemuVhostUserPtr vu = vus[i]; + + if (vu->type != QEMU_VHOST_USER_TYPE_FS) + continue; + + g_free(fs->binary); + fs->binary = g_strdup(vu->binary); + break; + } + + if (i == nvus) { + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("Unable to find a satisfying virtiofsd")); + goto end; + } + + ret = 0; + + end: + for (i = 0; i < nvus; i++) + qemuVhostUserFree(vus[i]); + g_free(vus); + return ret; +} diff --git a/src/qemu/qemu_vhost_user.h b/src/qemu/qemu_vhost_user.h index 369ba00caa..e505c8a473 100644 --- a/src/qemu/qemu_vhost_user.h +++ b/src/qemu/qemu_vhost_user.h @@ -45,3 +45,7 @@ qemuVhostUserFetchConfigs(char ***configs, int qemuVhostUserFillDomainGPU(virQEMUDriverPtr driver, virDomainVideoDefPtr video); + +int +qemuVhostUserFillDomainFS(virQEMUDriverPtr driver, + virDomainFSDefPtr fs); diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 9e354a30c6..b53e5b0806 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -28,6 +28,7 @@ #include "qemu_conf.h" #include "qemu_extdevice.h" #include "qemu_security.h" +#include "qemu_vhost_user.h" #include "qemu_virtiofs.h" #include "virpidfile.h" @@ -326,3 +327,13 @@ qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver, return 0; } + +int +qemuVirtioFSPrepareDomain(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + if (fs->binary) + return 0; + + return qemuVhostUserFillDomainFS(driver, fs); +} diff --git a/src/qemu/qemu_virtiofs.h b/src/qemu/qemu_virtiofs.h index 341fed1def..b70b2f1b6a 100644 --- a/src/qemu/qemu_virtiofs.h +++ b/src/qemu/qemu_virtiofs.h @@ -42,3 +42,7 @@ qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver, virDomainDefPtr def, virDomainFSDefPtr fs, virCgroupPtr cgroup); + +int +qemuVirtioFSPrepareDomain(virQEMUDriverPtr driver, + virDomainFSDefPtr fs); -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:50 +0100, Ján Tomko wrote:
Look into /usr/share/qemu/vhost-user to see whether we can find a suitable virtiofsd binary, in case the user did not provide one in the domain XML.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_extdevice.c | 9 +++++++++ src/qemu/qemu_vhost_user.c | 40 ++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_vhost_user.h | 4 ++++ src/qemu/qemu_virtiofs.c | 11 +++++++++++ src/qemu/qemu_virtiofs.h | 4 ++++ 5 files changed, 68 insertions(+)
[...]
diff --git a/src/qemu/qemu_vhost_user.c b/src/qemu/qemu_vhost_user.c index 4c25b30664..d437fd1bd5 100644 --- a/src/qemu/qemu_vhost_user.c +++ b/src/qemu/qemu_vhost_user.c @@ -416,3 +416,43 @@ qemuVhostUserFillDomainGPU(virQEMUDriverPtr driver, VIR_FREE(vus); return ret; } + + +int +qemuVhostUserFillDomainFS(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + qemuVhostUserPtr *vus = NULL; + ssize_t nvus = 0; + ssize_t i; + int ret = -1; + + if ((nvus = qemuVhostUserFetchParsedConfigs(driver->privileged, + &vus, NULL)) < 0) + goto end; + + for (i = 0; i < nvus; i++) { + qemuVhostUserPtr vu = vus[i]; + + if (vu->type != QEMU_VHOST_USER_TYPE_FS) + continue; + + g_free(fs->binary);
This feels weird. If the user specified a binary we shouldn't overwrite it. Also it's dead code as you check that it's NULL in [1]. For now if we don't fill in anything else it doesn't matter, but it might later. In such case we should probably check that the binary is the same. Or use this just to fill the binary but in that case the feeing is not necessary.
+ fs->binary = g_strdup(vu->binary); + break; + } + + if (i == nvus) { + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("Unable to find a satisfying virtiofsd")); + goto end; + } + + ret = 0; + + end: + for (i = 0; i < nvus; i++) + qemuVhostUserFree(vus[i]); + g_free(vus); + return ret; +}
[...]
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 9e354a30c6..b53e5b0806 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c
[...]
@@ -326,3 +327,13 @@ qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver,
return 0; } + +int +qemuVirtioFSPrepareDomain(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + if (fs->binary)
[1]
+ return 0; + + return qemuVhostUserFillDomainFS(driver, fs); +}
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

On Wed, Feb 26, 2020 at 09:53:46AM +0100, Peter Krempa wrote:
On Thu, Feb 20, 2020 at 15:32:50 +0100, Ján Tomko wrote:
Look into /usr/share/qemu/vhost-user to see whether we can find a suitable virtiofsd binary, in case the user did not provide one in the domain XML.
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_extdevice.c | 9 +++++++++ src/qemu/qemu_vhost_user.c | 40 ++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_vhost_user.h | 4 ++++ src/qemu/qemu_virtiofs.c | 11 +++++++++++ src/qemu/qemu_virtiofs.h | 4 ++++ 5 files changed, 68 insertions(+)
[...]
diff --git a/src/qemu/qemu_vhost_user.c b/src/qemu/qemu_vhost_user.c index 4c25b30664..d437fd1bd5 100644 --- a/src/qemu/qemu_vhost_user.c +++ b/src/qemu/qemu_vhost_user.c @@ -416,3 +416,43 @@ qemuVhostUserFillDomainGPU(virQEMUDriverPtr driver, VIR_FREE(vus); return ret; } + + +int +qemuVhostUserFillDomainFS(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + qemuVhostUserPtr *vus = NULL; + ssize_t nvus = 0; + ssize_t i; + int ret = -1; + + if ((nvus = qemuVhostUserFetchParsedConfigs(driver->privileged, + &vus, NULL)) < 0) + goto end; + + for (i = 0; i < nvus; i++) { + qemuVhostUserPtr vu = vus[i]; + + if (vu->type != QEMU_VHOST_USER_TYPE_FS) + continue; + + g_free(fs->binary);
This feels weird. If the user specified a binary we shouldn't overwrite it. Also it's dead code as you check that it's NULL in [1].
Yes, artifact copied from vhost-user-gpu. But it seems it's dead code there too - possibly a leftover from earlier version that tried to find the best candidate. Jano
For now if we don't fill in anything else it doesn't matter, but it might later. In such case we should probably check that the binary is the same.
Or use this just to fill the binary but in that case the feeing is not necessary.
+ fs->binary = g_strdup(vu->binary); + break; + } + + if (i == nvus) { + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("Unable to find a satisfying virtiofsd")); + goto end; + } + + ret = 0; + + end: + for (i = 0; i < nvus; i++) + qemuVhostUserFree(vus[i]); + g_free(vus); + return ret; +}
[...]
diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index 9e354a30c6..b53e5b0806 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c
[...]
@@ -326,3 +327,13 @@ qemuVirtioFSSetupCgroup(virQEMUDriverPtr driver,
return 0; } + +int +qemuVirtioFSPrepareDomain(virQEMUDriverPtr driver, + virDomainFSDefPtr fs) +{ + if (fs->binary)
[1]
+ return 0; + + return qemuVhostUserFillDomainFS(driver, fs); +}
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Format the 'vhost-user-fs' device on the QEMU command line. This device provides shared file system access using the FUSE protocol carried over virtio. The actual file server is implemented in an external vhost-user-fs device backend process. https://bugzilla.redhat.com/show_bug.cgi?id=1694166 Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_command.c | 45 +++++++++++++++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 39 +++++++++++++++ ...vhost-user-fs-hugepages.x86_64-latest.args | 47 +++++++++++++++++++ tests/qemuxml2argvtest.c | 3 ++ 4 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9fcd06f8c3..272a0b8d44 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2574,6 +2574,45 @@ qemuBuildDisksCommandLine(virCommandPtr cmd, } +static int +qemuBuildVHostUserFsCommandLine(virCommandPtr cmd, + virDomainFSDef *fs, + const virDomainDef *def, + qemuDomainObjPrivatePtr priv) +{ + g_autofree char *chardev_alias = NULL; + g_auto(virBuffer) opt = VIR_BUFFER_INITIALIZER; + + chardev_alias = g_strdup_printf("chr-vu-%s", fs->info.alias); + + virCommandAddArg(cmd, "-chardev"); + virBufferAddLit(&opt, "socket"); + virBufferAsprintf(&opt, ",id=%s", chardev_alias); + virBufferEscapeString(&opt, ",path=%s", QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock); + virCommandAddArg(cmd, virBufferContentAndReset(&opt)); + + virCommandAddArg(cmd, "-device"); + + if (qemuBuildVirtioDevStr(&opt, "vhost-user-fs", priv->qemuCaps, + VIR_DOMAIN_DEVICE_FS, fs) < 0) + return -1; + + virBufferAsprintf(&opt, ",chardev=%s", chardev_alias); + if (fs->queue_size) + virBufferAsprintf(&opt, ",queue-size=%llu", fs->queue_size); + virBufferAddLit(&opt, ",tag="); + virQEMUBuildBufferEscapeComma(&opt, fs->dst); + if (qemuBuildVirtioOptionsStr(&opt, fs->virtio, priv->qemuCaps) < 0) + return -1; + + if (qemuBuildDeviceAddressStr(&opt, def, &fs->info, priv->qemuCaps) < 0) + return -1; + + virCommandAddArg(cmd, virBufferContentAndReset(&opt)); + return 0; +} + + static char * qemuBuildFSStr(virDomainFSDefPtr fs) { @@ -2666,7 +2705,7 @@ static int qemuBuildFilesystemCommandLine(virCommandPtr cmd, const virDomainDef *def, virQEMUCapsPtr qemuCaps, - qemuDomainObjPrivatePtr priv G_GNUC_UNUSED) + qemuDomainObjPrivatePtr priv) { size_t i; @@ -2681,7 +2720,9 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, break; case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS: - /* TODO: vhost-user-fs-pci */ + /* vhost-user-fs-pci */ + if (qemuBuildVHostUserFsCommandLine(cmd, def->fss[i], def, priv) < 0) + return -1; break; case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args new file mode 100644 index 0000000000..a7df45a7f0 --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args @@ -0,0 +1,39 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-guest \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-guest/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-guest/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=guest,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-guest/master-key.aes \ +-machine pc,accel=kvm,usb=off,dump-guest-core=off \ +-cpu qemu64 \ +-m 14336 \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-object memory-backend-file,id=ram-node0,\ +mem-path=/var/lib/libvirt/qemu/ram/libvirt/qemu/-1-guest/ram-node0,share=yes,\ +size=15032385536 \ +-numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-boot strict=on \ +-chardev socket,id=chr-vu-fs0,path=/tmp/lib/domain--1-guest/fs0.vhost-fs.sock \ +-device vhost-user-fs-pci,chardev=chr-vu-fs0,queue-size=1024,tag=mount_tag,\ +bus=pci.0,addr=0x2 \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args new file mode 100644 index 0000000000..39190b8d3e --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args @@ -0,0 +1,47 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-guest \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-guest/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-guest/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=guest,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-guest/master-key.aes \ +-machine q35,accel=tcg,usb=off,dump-guest-core=off \ +-cpu qemu64 \ +-m 2048 \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-object memory-backend-file,id=ram-node0,prealloc=yes,\ +mem-path=/dev/hugepages2M/libvirt/qemu/-1-guest,share=yes,size=2147483648 \ +-numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-uuid 1ccfd97d-5eb4-478a-bbe6-88d254c16db7 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device pcie-root-port,port=0x8,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,\ +addr=0x1 \ +-device pcie-root-port,port=0x9,chassis=2,id=pci.2,bus=pcie.0,addr=0x1.0x1 \ +-device pcie-root-port,port=0xa,chassis=3,id=pci.3,bus=pcie.0,addr=0x1.0x2 \ +-device pcie-root-port,port=0xb,chassis=4,id=pci.4,bus=pcie.0,addr=0x1.0x3 \ +-blockdev '{"driver":"file","filename":"/var/lib/libvirt/images/guest.qcow2",\ +"node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":false,"driver":"qcow2",\ +"file":"libvirt-1-storage"}' \ +-device virtio-blk-pci,scsi=off,bus=pci.4,addr=0x0,drive=libvirt-1-format,\ +id=virtio-disk0,bootindex=1 \ +-chardev socket,id=chr-vu-fs0,path=/tmp/lib/domain--1-guest/fs0.vhost-fs.sock \ +-device vhost-user-fs-pci,chardev=chr-vu-fs0,tag=mount_tag,bus=pci.1,addr=0x0 \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index abbc273889..35d413d40b 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -3061,6 +3061,9 @@ mymain(void) DO_TEST_CAPS_VER("launch-security-sev", "2.12.0"); + DO_TEST_CAPS_LATEST("vhost-user-fs-fd-memory"); + DO_TEST_CAPS_LATEST("vhost-user-fs-hugepages"); + DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO); DO_TEST("riscv64-virt-pci", -- 2.24.1

On Thu, Feb 20, 2020 at 15:32:51 +0100, Ján Tomko wrote:
Format the 'vhost-user-fs' device on the QEMU command line.
This device provides shared file system access using the FUSE protocol carried over virtio. The actual file server is implemented in an external vhost-user-fs device backend process.
https://bugzilla.redhat.com/show_bug.cgi?id=1694166
Signed-off-by: Ján Tomko <jtomko@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- src/qemu/qemu_command.c | 45 +++++++++++++++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 39 +++++++++++++++ ...vhost-user-fs-hugepages.x86_64-latest.args | 47 +++++++++++++++++++ tests/qemuxml2argvtest.c | 3 ++ 4 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9fcd06f8c3..272a0b8d44 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2574,6 +2574,45 @@ qemuBuildDisksCommandLine(virCommandPtr cmd, }
+static int +qemuBuildVHostUserFsCommandLine(virCommandPtr cmd, + virDomainFSDef *fs, + const virDomainDef *def, + qemuDomainObjPrivatePtr priv) +{ + g_autofree char *chardev_alias = NULL; + g_auto(virBuffer) opt = VIR_BUFFER_INITIALIZER; + + chardev_alias = g_strdup_printf("chr-vu-%s", fs->info.alias);
Neither this function nor any other place actually checks that qemu supports QEMU_CAPS_DEVICE_VHOST_USER_FS. Actually the capability is only ever mentioned in qemu_capabilities.(c|h).
+ virCommandAddArg(cmd, "-chardev"); + virBufferAddLit(&opt, "socket"); + virBufferAsprintf(&opt, ",id=%s", chardev_alias); + virBufferEscapeString(&opt, ",path=%s", QEMU_DOMAIN_FS_PRIVATE(fs)->vhostuser_fs_sock);
This function is for escaping the string for XML use not shell use.
+ virCommandAddArg(cmd, virBufferContentAndReset(&opt));
virCommandAddArgBuffer
+ + virCommandAddArg(cmd, "-device"); + + if (qemuBuildVirtioDevStr(&opt, "vhost-user-fs", priv->qemuCaps, + VIR_DOMAIN_DEVICE_FS, fs) < 0) + return -1; + + virBufferAsprintf(&opt, ",chardev=%s", chardev_alias); + if (fs->queue_size) + virBufferAsprintf(&opt, ",queue-size=%llu", fs->queue_size); + virBufferAddLit(&opt, ",tag="); + virQEMUBuildBufferEscapeComma(&opt, fs->dst);
This one is correct ;)
+ if (qemuBuildVirtioOptionsStr(&opt, fs->virtio, priv->qemuCaps) < 0) + return -1; + + if (qemuBuildDeviceAddressStr(&opt, def, &fs->info, priv->qemuCaps) < 0) + return -1; + + virCommandAddArg(cmd, virBufferContentAndReset(&opt));
virCommandAddArgBuffer
+ return 0; +} + +

This patch attempts to run virtiofsd with libvirtd's label and the category of the currently ran domain, which obviously does nothing without a corresponding policy. Per: https://www.redhat.com/archives/libvir-list/2020-February/msg00108.html a new type might be needed. TODO: file a SELinux policy bug --- src/libvirt_private.syms | 1 + src/qemu/qemu_security.c | 40 +++++++++++++++++++ src/qemu/qemu_security.h | 7 ++++ src/qemu/qemu_virtiofs.c | 4 +- src/security/security_dac.c | 20 ++++++++++ src/security/security_driver.h | 2 + src/security/security_manager.c | 12 ++++++ src/security/security_manager.h | 4 ++ src/security/security_nop.c | 1 + src/security/security_selinux.c | 69 +++++++++++++++++++++++++++++++++ src/security/security_stack.c | 19 +++++++++ 11 files changed, 178 insertions(+), 1 deletion(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 49eb8ab7cb..aaba15297a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1504,6 +1504,7 @@ virSecurityManagerSetSavedStateLabel; virSecurityManagerSetSocketLabel; virSecurityManagerSetTapFDLabel; virSecurityManagerSetTPMLabels; +virSecurityManagerSetVirtioFSProcessLabel; virSecurityManagerStackAddNested; virSecurityManagerTransactionAbort; virSecurityManagerTransactionCommit; diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index 2aa2b5b9c6..834556c910 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -475,6 +475,46 @@ qemuSecurityStartVhostUserGPU(virQEMUDriverPtr driver, } +/* + * qemuSecurityStartVhostUserGPU: + * + * @driver: the QEMU driver + * @vm: the domain object + * @cmd: the command to run + * @existstatus: pointer to int returning exit status of process + * @cmdret: pointer to int returning result of virCommandRun + * + * Start the vhost-user-gpu process with approriate labels. + * This function returns -1 on security setup error, 0 if all the + * setup was done properly. In case the virCommand failed to run + * 0 is returned but cmdret is set appropriately with the process + * exitstatus also set. + */ +int +qemuSecurityStartVirtioFS(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virCommandPtr cmd, + int *exitstatus, + int *cmdret) +{ + if (virSecurityManagerSetVirtioFSProcessLabel(driver->securityManager, + vm->def, cmd) < 0) + return -1; + + if (virSecurityManagerPreFork(driver->securityManager) < 0) + return -1; + + *cmdret = virCommandRun(cmd, exitstatus); + + virSecurityManagerPostFork(driver->securityManager); + + if (*cmdret < 0) + return -1; + + return 0; +} + + /* * qemuSecurityStartTPMEmulator: * diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index a8c648ece1..dbb3bdbc9b 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -84,6 +84,13 @@ int qemuSecurityStartVhostUserGPU(virQEMUDriverPtr driver, int *exitstatus, int *cmdret); +int qemuSecurityStartVirtioFS(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virCommandPtr cmd, + int *exitstatus, + int *cmdret); + + int qemuSecurityStartTPMEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virCommandPtr cmd, diff --git a/src/qemu/qemu_virtiofs.c b/src/qemu/qemu_virtiofs.c index b53e5b0806..17afe27535 100644 --- a/src/qemu/qemu_virtiofs.c +++ b/src/qemu/qemu_virtiofs.c @@ -34,6 +34,7 @@ #define VIR_FROM_THIS VIR_FROM_QEMU +VIR_LOG_INIT("qemu.virtiofs"); char * qemuVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, @@ -227,7 +228,8 @@ qemuVirtioFSStart(virLogManagerPtr logManager, if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) goto cleanup; - rc = virCommandRun(cmd, NULL); + if (qemuSecurityStartVirtioFS(driver, vm, cmd, NULL, &rc) < 0) + goto error; if (rc < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", diff --git a/src/security/security_dac.c b/src/security/security_dac.c index d75b18170b..64e00b0127 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -2263,6 +2263,24 @@ virSecurityDACSetChildProcessLabel(virSecurityManagerPtr mgr, if (virSecurityDACGetIds(secdef, priv, &user, &group, NULL, NULL) < 0) return -1; + VIR_DEBUG("Setting child to drop privileges to %u:%u", + (unsigned int)user, (unsigned int)group); + + virCommandSetUID(cmd, user); + virCommandSetGID(cmd, group); + return 0; +} + + +static int +virSecurityDACSetVirtioFSProcessLabel(virSecurityManagerPtr mgr G_GNUC_UNUSED, + virDomainDefPtr def G_GNUC_UNUSED, + virCommandPtr cmd) +{ + /* only running as root:root is supported */ + uid_t user = 0; + gid_t group = 0; + VIR_DEBUG("Setting child to drop privileges to %u:%u", (unsigned int)user, (unsigned int)group); @@ -2581,4 +2599,6 @@ virSecurityDriver virSecurityDriverDAC = { .domainSetSecurityChardevLabel = virSecurityDACSetChardevLabel, .domainRestoreSecurityChardevLabel = virSecurityDACRestoreChardevLabel, + + .domainSetSecurityVirtioFSProcessLabel = virSecurityDACSetVirtioFSProcessLabel, }; diff --git a/src/security/security_driver.h b/src/security/security_driver.h index 3353955813..d145922198 100644 --- a/src/security/security_driver.h +++ b/src/security/security_driver.h @@ -217,6 +217,8 @@ struct _virSecurityDriver { virSecurityDomainSetTPMLabels domainSetSecurityTPMLabels; virSecurityDomainRestoreTPMLabels domainRestoreSecurityTPMLabels; + + virSecurityDomainSetChildProcessLabel domainSetSecurityVirtioFSProcessLabel; }; virSecurityDriverPtr virSecurityDriverLookup(const char *name, diff --git a/src/security/security_manager.c b/src/security/security_manager.c index fe9def7fb9..13e6380ed6 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -945,6 +945,18 @@ virSecurityManagerSetChildProcessLabel(virSecurityManagerPtr mgr, } +int virSecurityManagerSetVirtioFSProcessLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virCommandPtr cmd) +{ + if (mgr->drv->domainSetSecurityVirtioFSProcessLabel) + return mgr->drv->domainSetSecurityVirtioFSProcessLabel(mgr, def, cmd); + + virReportUnsupportedError(); + return -1; +} + + int virSecurityManagerVerify(virSecurityManagerPtr mgr, virDomainDefPtr def) diff --git a/src/security/security_manager.h b/src/security/security_manager.h index f835356b7e..0465f99203 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -137,6 +137,10 @@ int virSecurityManagerSetProcessLabel(virSecurityManagerPtr mgr, int virSecurityManagerSetChildProcessLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, virCommandPtr cmd); +int virSecurityManagerSetVirtioFSProcessLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virCommandPtr cmd); + int virSecurityManagerVerify(virSecurityManagerPtr mgr, virDomainDefPtr def); int virSecurityManagerSetImageFDLabel(virSecurityManagerPtr mgr, diff --git a/src/security/security_nop.c b/src/security/security_nop.c index c1856eb421..f783bfa7ee 100644 --- a/src/security/security_nop.c +++ b/src/security/security_nop.c @@ -328,4 +328,5 @@ virSecurityDriver virSecurityDriverNop = { .domainSetSecurityChardevLabel = virSecurityDomainSetChardevLabelNop, .domainRestoreSecurityChardevLabel = virSecurityDomainRestoreChardevLabelNop, + .domainSetSecurityVirtioFSProcessLabel = virSecurityDomainSetChildProcessLabelNop, }; diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 3f6968a57a..853f5beb67 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -2941,6 +2941,73 @@ virSecuritySELinuxSetChildProcessLabel(virSecurityManagerPtr mgr G_GNUC_UNUSED, return 0; } +static int +virSecuritySELinuxSetVirtioFSProcessLabel(virSecurityManagerPtr mgr G_GNUC_UNUSED, + virDomainDefPtr def, + virCommandPtr cmd) +{ + virSecurityLabelDefPtr secdef; + security_context_t daemon_ctx; + g_auto(GStrv) daemon_label = NULL; + g_auto(GStrv) domain_label = NULL; + g_autofree char *frankenlabel = NULL; + + VIR_ERROR("WE got here"); + + secdef = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (!secdef || !secdef->label) + return 0; + + if (getcon_raw(&daemon_ctx) == -1) { + virReportSystemError(errno, "%s", + _("unable to get security context")); + return -1; + } + + VIR_ERROR("domain_label=%s daemon_label=%s", secdef->label, (char *)daemon_ctx); + + daemon_label = g_strsplit((char *)daemon_ctx, ":", 6); + domain_label = g_strsplit((char *)secdef->label, ":", 6); + + freecon(daemon_ctx); + + if (STRNEQ(SECURITY_SELINUX_NAME, secdef->model)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("security label driver mismatch: " + "'%s' model configured for domain, but " + "hypervisor driver is '%s'."), + secdef->model, SECURITY_SELINUX_NAME); + if (security_getenforce() == 1) + return -1; + } + + if (!domain_label || !daemon_label || + g_strv_length(domain_label) != 5 || g_strv_length(daemon_label) != 5) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed security label")); + return -1; + } + + frankenlabel = g_strdup_printf("%s:%s:%s:%s:%s", + daemon_label[0], + daemon_label[1], + daemon_label[2], + domain_label[3], + domain_label[4]); + + VIR_ERROR("frankenlabel=%s", frankenlabel); + + if (strlen(frankenlabel) >= VIR_SECURITY_LABEL_BUFLEN) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("security label exceeds maximum length: %d"), + VIR_SECURITY_LABEL_BUFLEN - 1); + return -1; + } + + virCommandSetSELinuxLabel(cmd, frankenlabel); + return 0; +} + static int virSecuritySELinuxSetDaemonSocketLabel(virSecurityManagerPtr mgr G_GNUC_UNUSED, virDomainDefPtr def) @@ -3576,4 +3643,6 @@ virSecurityDriver virSecurityDriverSELinux = { .domainSetSecurityTPMLabels = virSecuritySELinuxSetTPMLabels, .domainRestoreSecurityTPMLabels = virSecuritySELinuxRestoreTPMLabels, + + .domainSetSecurityVirtioFSProcessLabel = virSecuritySELinuxSetVirtioFSProcessLabel, }; diff --git a/src/security/security_stack.c b/src/security/security_stack.c index 073876daff..28982b4ba1 100644 --- a/src/security/security_stack.c +++ b/src/security/security_stack.c @@ -476,6 +476,23 @@ virSecurityStackSetChildProcessLabel(virSecurityManagerPtr mgr, return rc; } +static int +virSecurityStackSetVirtioFSProcessLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virCommandPtr cmd) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityStackItemPtr item = priv->itemsHead; + int rc = 0; + + for (; item; item = item->next) { + if (virSecurityManagerSetVirtioFSProcessLabel(item->securityManager, vm, cmd) < 0) + rc = -1; + } + + return rc; +} + static int virSecurityStackGetProcessLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, @@ -991,4 +1008,6 @@ virSecurityDriver virSecurityDriverStack = { .domainSetSecurityTPMLabels = virSecurityStackSetTPMLabels, .domainRestoreSecurityTPMLabels = virSecurityStackRestoreTPMLabels, + + .domainSetSecurityVirtioFSProcessLabel = virSecurityStackSetVirtioFSProcessLabel, }; -- 2.24.1
participants (4)
-
Daniel P. Berrangé
-
Ján Tomko
-
Michal Privoznik
-
Peter Krempa