[libvirt] [RFC PATCH 0/9] add virtiofs support (virtio-fs epopee)

https://bugzilla.redhat.com/show_bug.cgi?id=1694166 A work in progress, but I'm sending it now to get the XML bikesh^Wdiscussion out of the way. TODO: * documentation (due to the requirement of shared memory and therefore a <numa> topology specification, this probably deserves a separate article on https://libvirt.org/kbase.html ) * validation - refuse unsupported combinations and block migration * remove a few hardcoded values * figure out the binary path from qemu interop config * leave queue-size and cache-size on their default values or expose them in XML * use FD passing for vhost-user-fs device as well * clean up the code Tested with: commit 320000e72ec0613e164ce9608d865396fb2da278 Author: Linus Torvalds <torvalds@linux-foundation.org> Date: Wed Oct 30 14:17:18 2019 +0100 Merge tag 'iommu-fixes-v5.4-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu and commit d3d2efbb0e37494708dde359a9d0f492f84833e8 Author: Dr. David Alan Gilbert <dgilbert@redhat.com> CommitDate: 2019-10-24 11:50:55 +0100 virtio-fs: Allow mapping of meta data version table from https://gitlab.com/virtio-fs/qemu/tree/virtio-fs-dev git fetch https://gitlab.com/virtio-fs/qemu.git virtio-fs-dev (not yet merged upstream) Ján Tomko (9): qemu: address: take fsdriver type into account qemu: cmdline: take fsdriver into account qemu: add QEMU_CAPS_VHOST_USER_FS wip: tests: add xml2xml tests for virtio-fs wip: add XML -> ARGV tests for virtio-fs conf: qemu: add virtio-fs fsdriver type qemu: pass private data to qemuBuildFilesystemCommandLine wip: start virtiofsd wip: build vhost-user-fs device command line docs/schemas/domaincommon.rng | 1 + src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 2 + src/qemu/qemu_capabilities.c | 4 + src/qemu/qemu_capabilities.h | 3 + src/qemu/qemu_command.c | 67 +++++- src/qemu/qemu_domain.c | 4 + src/qemu/qemu_domain_address.c | 35 +++- src/qemu/qemu_extdevice.c | 191 ++++++++++++++++++ ...vhost-user-fs-fd-memory.x86_64-latest.args | 38 ++++ .../vhost-user-fs-fd-memory.xml | 38 ++++ ...vhost-user-fs-hugepages.x86_64-latest.args | 42 ++++ .../vhost-user-fs-hugepages.xml | 67 ++++++ tests/qemuxml2argvtest.c | 9 + .../vhost-user-fs-fd-memory.x86_64-latest.xml | 1 + .../vhost-user-fs-hugepages.x86_64-latest.xml | 1 + tests/qemuxml2xmltest.c | 3 + 17 files changed, 493 insertions(+), 14 deletions(-) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml -- 2.21.0

--- src/qemu/qemu_domain_address.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index 82db62e988..ea5327d475 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -676,18 +676,30 @@ qemuDomainDeviceCalculatePCIConnectFlags(virDomainDeviceDefPtr dev, break; case VIR_DOMAIN_DEVICE_FS: - /* the only type of filesystem so far is virtio-9p-pci */ - switch ((virDomainFSModel) dev->data.fs->model) { - case VIR_DOMAIN_FS_MODEL_VIRTIO_TRANSITIONAL: - /* Transitional devices only work in conventional PCI slots */ - return pciFlags; - case VIR_DOMAIN_FS_MODEL_VIRTIO: - case VIR_DOMAIN_FS_MODEL_VIRTIO_NON_TRANSITIONAL: - case VIR_DOMAIN_FS_MODEL_DEFAULT: - return virtioFlags; - case VIR_DOMAIN_FS_MODEL_LAST: - break; + switch ((virDomainFSDriverType) dev->data.fs->fsdriver) { + case VIR_DOMAIN_FS_DRIVER_TYPE_DEFAULT: + case VIR_DOMAIN_FS_DRIVER_TYPE_PATH: + case VIR_DOMAIN_FS_DRIVER_TYPE_HANDLE: + /* these drivers are handled by virtio-9p-pci */ + switch ((virDomainFSModel) dev->data.fs->model) { + case VIR_DOMAIN_FS_MODEL_VIRTIO_TRANSITIONAL: + /* Transitional devices only work in conventional PCI slots */ + return pciFlags; + case VIR_DOMAIN_FS_MODEL_VIRTIO: + case VIR_DOMAIN_FS_MODEL_VIRTIO_NON_TRANSITIONAL: + case VIR_DOMAIN_FS_MODEL_DEFAULT: + return virtioFlags; + case VIR_DOMAIN_FS_MODEL_LAST: + break; + } + + case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: + case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: + case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: + case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: + return 0; } + return 0; case VIR_DOMAIN_DEVICE_NET: { -- 2.21.0

Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_command.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index be9839cc64..cbe601099f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2689,8 +2689,20 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, size_t i; for (i = 0; i < def->nfss; i++) { - if (qemuBuildFSDevCommandLine(cmd, def->fss[i], def, qemuCaps) < 0) - return -1; + switch ((virDomainFSDriverType) def->fss[i]->fsdriver) { + case VIR_DOMAIN_FS_DRIVER_TYPE_DEFAULT: + case VIR_DOMAIN_FS_DRIVER_TYPE_PATH: + case VIR_DOMAIN_FS_DRIVER_TYPE_HANDLE: + /* these drivers are handled by virtio-9p-pci */ + if (qemuBuildFSDevCommandLine(cmd, def->fss[i], def, qemuCaps) < 0) + return -1; + break; + case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: + case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: + case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: + case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: + break; + } } return 0; -- 2.21.0

Introduced by QEMU commit TBD --- src/qemu/qemu_capabilities.c | 4 ++++ src/qemu/qemu_capabilities.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9bb3c96448..29ee0d089f 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -547,6 +547,9 @@ VIR_ENUM_IMPL(virQEMUCaps, "query-cpu-model-comparison", "ramfb", "machine.pseries.cap-ccf-assist", + + /* 345 */ + "vhost-user-fs", ); @@ -1141,6 +1144,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = { { "vhost-user-gpu", QEMU_CAPS_DEVICE_VHOST_USER_GPU }, { "vhost-user-vga", QEMU_CAPS_DEVICE_VHOST_USER_VGA }, { "ramfb", QEMU_CAPS_DEVICE_RAMFB }, + { "vhost-user-fs-device", QEMU_CAPS_DEVICE_VHOST_USER_FS }, }; static struct virQEMUCapsStringFlags virQEMUCapsDevicePropsVirtioBalloon[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 10f0ce2654..33ac7e60b6 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -529,6 +529,9 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_DEVICE_RAMFB, /* -device ramfb */ QEMU_CAPS_MACHINE_PSERIES_CAP_CCF_ASSIST, /* -machine pseries.cap-ccf-assist */ + /* 345 */ + QEMU_CAPS_DEVICE_VHOST_USER_FS, /* -device vhost-user-fs */ + QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; -- 2.21.0

[to be squashed in with the actual XML addition] --- .../vhost-user-fs-fd-memory.xml | 32 +++++++++ .../vhost-user-fs-hugepages.xml | 67 +++++++++++++++++++ .../vhost-user-fs-fd-memory.x86_64-latest.xml | 1 + .../vhost-user-fs-hugepages.x86_64-latest.xml | 1 + tests/qemuxml2xmltest.c | 3 + 5 files changed, 104 insertions(+) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml create mode 120000 tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml new file mode 100644 index 0000000000..284ee329b3 --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -0,0 +1,32 @@ +<domain type='kvm'> + <name>guest</name> + <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid> + <memory unit='KiB'>14680064</memory> + <currentMemory unit='KiB'>14680064</currentMemory> + <memoryBacking> + <source type='file'/> + <access mode='shared'/> + </memoryBacking> + <vcpu placement='static'>2</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0-1' memory='14680064' unit='KiB' memAccess='shared'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='usb' index='0' model='none'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml new file mode 100644 index 0000000000..0633394028 --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.xml @@ -0,0 +1,67 @@ +<domain type='qemu'> + <name>guest</name> + <uuid>1ccfd97d-5eb4-478a-bbe6-88d254c16db7</uuid> + <memory unit='KiB'>4194304</memory> + <currentMemory unit='KiB'>4194304</currentMemory> + <memoryBacking> + <hugepages> + <page size='2048' unit='KiB'/> + </hugepages> + <access mode='shared'/> + </memoryBacking> + <vcpu placement='static'>2</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + <boot dev='hd'/> + </os> + <features> + <acpi/> + <apic/> + </features> + <cpu> + <numa> + <cell id='0' cpus='0-1' memory='2097152' unit='KiB' memAccess='shared'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2'/> + <source file='/var/lib/libvirt/images/guest.qcow2'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/> + </disk> + <controller type='usb' index='0' model='none'/> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pcie-root'/> + <controller type='pci' index='1' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='1' port='0x8'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/> + </controller> + <controller type='pci' index='2' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='2' port='0x9'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='3' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='3' port='0xa'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='4' model='pcie-root-port'> + <model name='pcie-root-port'/> + <target chassis='4' port='0xb'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x3'/> + </controller> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='none'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml b/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml new file mode 120000 index 0000000000..fbc552ef94 --- /dev/null +++ b/tests/qemuxml2xmloutdata/vhost-user-fs-fd-memory.x86_64-latest.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/vhost-user-fs-fd-memory.xml \ No newline at end of file diff --git a/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml b/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml new file mode 120000 index 0000000000..0c0f05b254 --- /dev/null +++ b/tests/qemuxml2xmloutdata/vhost-user-fs-hugepages.x86_64-latest.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/vhost-user-fs-hugepages.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89c9494e6c..fa999e0094 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1315,6 +1315,9 @@ mymain(void) DO_TEST("vhost-vsock-ccw-auto", QEMU_CAPS_DEVICE_VHOST_VSOCK, QEMU_CAPS_CCW); + DO_TEST_CAPS_LATEST("vhost-user-fs-fd-memory"); + DO_TEST_CAPS_LATEST("vhost-user-fs-hugepages"); + DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO); DO_TEST("riscv64-virt-pci", -- 2.21.0

To be squashed in with the actual command line addition. --- ...vhost-user-fs-fd-memory.x86_64-latest.args | 35 ++++++++++++++++ ...vhost-user-fs-hugepages.x86_64-latest.args | 42 +++++++++++++++++++ tests/qemuxml2argvtest.c | 3 ++ 3 files changed, 80 insertions(+) create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args new file mode 100644 index 0000000000..39c7dbf6d8 --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args @@ -0,0 +1,35 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-guest \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-guest/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-guest/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=guest,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-guest/master-key.aes \ +-machine pc,accel=kvm,usb=off,dump-guest-core=off \ +-m 14336 \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-object memory-backend-file,id=ram-node0,\ +mem-path=/var/lib/libvirt/qemu/ram/libvirt/qemu/-1-guest/ram-node0,share=yes,\ +size=15032385536 \ +-numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-boot strict=on \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args new file mode 100644 index 0000000000..aa6a60d70d --- /dev/null +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args @@ -0,0 +1,42 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-guest \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-guest/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-guest/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=guest,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-guest/master-key.aes \ +-machine q35,accel=tcg,usb=off,dump-guest-core=off \ +-m 2048 \ +-overcommit mem-lock=off \ +-smp 2,sockets=2,cores=1,threads=1 \ +-object memory-backend-file,id=ram-node0,prealloc=yes,\ +mem-path=/dev/hugepages2M/libvirt/qemu/-1-guest,share=yes,size=2147483648 \ +-numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-uuid 1ccfd97d-5eb4-478a-bbe6-88d254c16db7 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-boot strict=on \ +-device pcie-root-port,port=0x8,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,\ +addr=0x1 \ +-device pcie-root-port,port=0x9,chassis=2,id=pci.2,bus=pcie.0,addr=0x1.0x1 \ +-device pcie-root-port,port=0xa,chassis=3,id=pci.3,bus=pcie.0,addr=0x1.0x2 \ +-device pcie-root-port,port=0xb,chassis=4,id=pci.4,bus=pcie.0,addr=0x1.0x3 \ +-drive file=/var/lib/libvirt/images/guest.qcow2,format=qcow2,if=none,\ +id=drive-virtio-disk0 \ +-device virtio-blk-pci,scsi=off,bus=pci.4,addr=0x0,drive=drive-virtio-disk0,\ +id=virtio-disk0,bootindex=1 \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index fd330df3e0..24df0a48e7 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2982,6 +2982,9 @@ mymain(void) DO_TEST_CAPS_VER("launch-security-sev", "2.12.0"); + DO_TEST_CAPS_LATEST("vhost-user-fs-fd-memory"); + DO_TEST_CAPS_LATEST("vhost-user-fs-hugepages"); + DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO); DO_TEST("riscv64-virt-pci", -- 2.21.0

Introduce a new 'virtio-fs' driver type for filesystem. <filesystem type='mount' accessmode='passthrough'> <driver type='virtio-fs'/> <source dir='/path'/> <target dir='/path'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </filesystem> Signed-off-by: Ján Tomko <jtomko@redhat.com> --- docs/schemas/domaincommon.rng | 1 + src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 +++ src/qemu/qemu_domain.c | 4 ++++ src/qemu/qemu_domain_address.c | 3 +++ tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml | 6 ++++++ 7 files changed, 19 insertions(+) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index e06f892da3..f6479c95a7 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2567,6 +2567,7 @@ <value>loop</value> <value>nbd</value> <value>ploop</value> + <value>virtio-fs</value> </choice> </attribute> </optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d5aba7336f..2694e4bb68 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -476,6 +476,7 @@ VIR_ENUM_IMPL(virDomainFSDriver, "loop", "nbd", "ploop", + "virtio-fs", ); VIR_ENUM_IMPL(virDomainFSAccessMode, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index c69d1b7ef5..54a7e7c52f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -770,6 +770,7 @@ typedef enum { VIR_DOMAIN_FS_DRIVER_TYPE_LOOP, VIR_DOMAIN_FS_DRIVER_TYPE_NBD, VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP, + VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS, VIR_DOMAIN_FS_DRIVER_TYPE_LAST } virDomainFSDriverType; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbe601099f..985fcdd215 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2697,6 +2697,9 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, if (qemuBuildFSDevCommandLine(cmd, def->fss[i], def, qemuCaps) < 0) return -1; break; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* TODO: vhost-user-fs-pci */ + return 0; case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 667cc89072..334039005c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7110,6 +7110,10 @@ qemuDomainDeviceDefValidateFS(virDomainFSDefPtr fs, _("Filesystem driver type not supported")); return -1; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* TODO: vhost-user-fs-pci */ + return -0; + case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: default: virReportEnumRangeError(virDomainFSDriverType, fs->fsdriver); diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index ea5327d475..482373991c 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -692,6 +692,9 @@ qemuDomainDeviceCalculatePCIConnectFlags(virDomainDeviceDefPtr dev, case VIR_DOMAIN_FS_MODEL_LAST: break; } + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* vhost-user-fs-pci */ + return virtioFlags; case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml index 284ee329b3..ecd5b33a28 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -25,6 +25,12 @@ <emulator>/usr/bin/qemu-system-x86_64</emulator> <controller type='usb' index='0' model='none'/> <controller type='pci' index='0' model='pci-root'/> + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtio-fs'/> + <source dir='/path'/> + <target dir='/path'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </filesystem> <input type='mouse' bus='ps2'/> <input type='keyboard' bus='ps2'/> <memballoon model='none'/> -- 2.21.0

On 11/1/19 6:16 AM, Ján Tomko wrote:
Introduce a new 'virtio-fs' driver type for filesystem.
<filesystem type='mount' accessmode='passthrough'> <driver type='virtio-fs'/>
After resolving my confusion* regarding the example and description of the filesystem/driver element, I think this change is logical and shouldn't require much bikeshedding :-). Although you are missing docs for the new type. Regards, Jim * The last filesystem example has <driver name='loop' type='raw'/>, which doesn't match the schema or description. I'll send a doc cleanup patch for that.
<source dir='/path'/> <target dir='/path'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> </filesystem>
Signed-off-by: Ján Tomko <jtomko@redhat.com> --- docs/schemas/domaincommon.rng | 1 + src/conf/domain_conf.c | 1 + src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 +++ src/qemu/qemu_domain.c | 4 ++++ src/qemu/qemu_domain_address.c | 3 +++ tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml | 6 ++++++ 7 files changed, 19 insertions(+)
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index e06f892da3..f6479c95a7 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2567,6 +2567,7 @@ <value>loop</value> <value>nbd</value> <value>ploop</value> + <value>virtio-fs</value> </choice> </attribute> </optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d5aba7336f..2694e4bb68 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -476,6 +476,7 @@ VIR_ENUM_IMPL(virDomainFSDriver, "loop", "nbd", "ploop", + "virtio-fs", );
VIR_ENUM_IMPL(virDomainFSAccessMode, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index c69d1b7ef5..54a7e7c52f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -770,6 +770,7 @@ typedef enum { VIR_DOMAIN_FS_DRIVER_TYPE_LOOP, VIR_DOMAIN_FS_DRIVER_TYPE_NBD, VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP, + VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS,
VIR_DOMAIN_FS_DRIVER_TYPE_LAST } virDomainFSDriverType; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbe601099f..985fcdd215 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2697,6 +2697,9 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, if (qemuBuildFSDevCommandLine(cmd, def->fss[i], def, qemuCaps) < 0) return -1; break; + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* TODO: vhost-user-fs-pci */ + return 0; case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 667cc89072..334039005c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7110,6 +7110,10 @@ qemuDomainDeviceDefValidateFS(virDomainFSDefPtr fs, _("Filesystem driver type not supported")); return -1;
+ case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* TODO: vhost-user-fs-pci */ + return -0; + case VIR_DOMAIN_FS_DRIVER_TYPE_LAST: default: virReportEnumRangeError(virDomainFSDriverType, fs->fsdriver); diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index ea5327d475..482373991c 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -692,6 +692,9 @@ qemuDomainDeviceCalculatePCIConnectFlags(virDomainDeviceDefPtr dev, case VIR_DOMAIN_FS_MODEL_LAST: break; } + case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: + /* vhost-user-fs-pci */ + return virtioFlags;
case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml index 284ee329b3..ecd5b33a28 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.xml @@ -25,6 +25,12 @@ <emulator>/usr/bin/qemu-system-x86_64</emulator> <controller type='usb' index='0' model='none'/> <controller type='pci' index='0' model='pci-root'/> + <filesystem type='mount' accessmode='passthrough'> + <driver type='virtio-fs'/> + <source dir='/path'/> + <target dir='/path'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </filesystem> <input type='mouse' bus='ps2'/> <input type='keyboard' bus='ps2'/> <memballoon model='none'/>

On Fri, Nov 1, 2019 at 1:15 PM Ján Tomko <jtomko@redhat.com> wrote:
Introduce a new 'virtio-fs' driver type for filesystem.
<filesystem type='mount' accessmode='passthrough'> <driver type='virtio-fs'/> <source dir='/path'/> <target dir='/path'/>
What happens with the target dir? virtio-fs has no way to pass the target dir into the guest. Out-of-band methods exist: qemu-guest-agent could be used to mount the file system. Another approach is to add a uevent to the virtiofs.ko guest driver and let udev configuration decide what to do (e.g. automount under /run/media/virtio-fs/$TAG or similar). Stefan

On Mon, Nov 04, 2019 at 09:56:28AM +0100, Stefan Hajnoczi wrote:
On Fri, Nov 1, 2019 at 1:15 PM Ján Tomko <jtomko@redhat.com> wrote:
Introduce a new 'virtio-fs' driver type for filesystem.
<filesystem type='mount' accessmode='passthrough'> <driver type='virtio-fs'/> <source dir='/path'/> <target dir='/path'/>
What happens with the target dir?
For virtio-fs, it is used the same way as with 9pfs - it is passed as the tag and meant as a suggestion for the guest for where to mount the filesystem. For LXC, libvirt actually does the mounting so the target dir path is honored. I could use some other example in the documentation that does not look as a path if that's too confusing. I'm not sure whether deviating from the existing pattern and using something like: <target tag='myfs'/> is worth it.
virtio-fs has no way to pass the target dir into the guest.
Also, I see that the mount syntax changed from -t virtio_fs to -t virtiofs - is that the new preferred spelling that should be preserved here?
Out-of-band methods exist: qemu-guest-agent could be used to mount the file system.
Doing that automatically is out of scope of libvirt. But to allow higher layers to do that, libvirt would need to expose the 'guest-exec' command. Jano
Another approach is to add a uevent to the virtiofs.ko guest driver and let udev configuration decide what to do (e.g. automount under /run/media/virtio-fs/$TAG or similar).
Stefan

On Wed, Nov 20, 2019 at 09:38:15AM +0100, Ján Tomko wrote:
On Mon, Nov 04, 2019 at 09:56:28AM +0100, Stefan Hajnoczi wrote:
On Fri, Nov 1, 2019 at 1:15 PM Ján Tomko <jtomko@redhat.com> wrote:
Introduce a new 'virtio-fs' driver type for filesystem.
<filesystem type='mount' accessmode='passthrough'> <driver type='virtio-fs'/> <source dir='/path'/> <target dir='/path'/>
What happens with the target dir?
For virtio-fs, it is used the same way as with 9pfs - it is passed as the tag and meant as a suggestion for the guest for where to mount the filesystem.
For LXC, libvirt actually does the mounting so the target dir path is honored.
I could use some other example in the documentation that does not look as a path if that's too confusing. I'm not sure whether deviating from the existing pattern and using something like: <target tag='myfs'/> is worth it.
Yeah, we shouldn't have called the attribute "dir", but since we have that naming for ages now, we shouldn't change it IMHO. Just document that its a stupid historical name :-) Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

This will be used by a future patch. Signed-off-by: Ján Tomko <jtomko@redhat.com> --- src/qemu/qemu_command.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 985fcdd215..d953e3626a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2684,7 +2684,8 @@ qemuBuildFSDevCommandLine(virCommandPtr cmd, static int qemuBuildFilesystemCommandLine(virCommandPtr cmd, const virDomainDef *def, - virQEMUCapsPtr qemuCaps) + virQEMUCapsPtr qemuCaps, + qemuDomainObjPrivatePtr priv G_GNUC_UNUSED) { size_t i; @@ -10309,7 +10310,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver, if (qemuBuildDisksCommandLine(cmd, def, qemuCaps) < 0) return NULL; - if (qemuBuildFilesystemCommandLine(cmd, def, qemuCaps) < 0) + if (qemuBuildFilesystemCommandLine(cmd, def, qemuCaps, priv) < 0) return NULL; if (qemuBuildNetCommandLine(driver, vm, logManager, secManager, cmd, -- 2.21.0

Start virtiofsd for each <filesystem> device using it. Pre-create the socket for communication with QEMU and pass it to virtiofsd. Note that virtiofsd needs to run as root. --- src/conf/domain_conf.h | 1 + src/qemu/qemu_extdevice.c | 191 ++++++++++++++++++++++++++++++++++++++ tests/qemuxml2argvtest.c | 6 ++ 3 files changed, 198 insertions(+) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 54a7e7c52f..78f88a0c2f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -817,6 +817,7 @@ struct _virDomainFSDef { unsigned long long space_soft_limit; /* in bytes */ bool symlinksResolved; virDomainVirtioOptionsPtr virtio; + char *vhost_user_fs_path; /* TODO put this in private data */ }; diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 463f76c21a..cb44ca325f 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -20,6 +20,7 @@ #include <config.h> +#include "qemu_command.h" #include "qemu_extdevice.h" #include "qemu_vhost_user_gpu.h" #include "qemu_domain.h" @@ -149,6 +150,178 @@ qemuExtDevicesCleanupHost(virQEMUDriverPtr driver, qemuExtTPMCleanupHost(def); } +static char * +qemuExtVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def)) || + virAsprintf(&name, "%s-%s-virtiofsd", shortName, alias) < 0) + return NULL; + + return virPidFileBuildPath(cfg->stateDir, name); +} + + +static char * +qemuExtVirtioFSCreateSocketFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def)) || + virAsprintf(&name, "%s-%s-virtiofsd", shortName, alias) < 0) + return NULL; + + return virFileBuildPath(cfg->stateDir, name, ".sock"); +} + + +static int +qemuExtVirtioFSdStart(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autoptr(virCommand) cmd = NULL; + g_autofree char *pidfile = NULL; + char errbuf[1024] = { 0 }; + virDomainChrSourceDefPtr chrdev = virDomainChrSourceDefNew(NULL); + pid_t pid = (pid_t) -1; + VIR_AUTOCLOSE errfd = -1; + VIR_AUTOCLOSE fd = -1; + int exitstatus = 0; + int cmdret = 0; + int ret = -1; + int rc; + + chrdev->type = VIR_DOMAIN_CHR_TYPE_UNIX; + chrdev->data.nix.listen = true; + + if (!(pidfile = qemuExtVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + if (!(chrdev->data.nix.path = qemuExtVirtioFSCreateSocketFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) + goto cleanup; + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + + /* TODO: do not call this here */ + virDomainChrDef chr = { .source = chrdev }; + if (qemuSecuritySetChardevLabel(driver, vm, &chr) < 0) + goto cleanup; + + /* TODO: configure path */ + if (!(cmd = virCommandNew("/usr/libexec/virtiofsd"))) + goto cleanup; + + virCommandSetPidFile(cmd, pidfile); + virCommandSetErrorFD(cmd, &errfd); + virCommandDaemonize(cmd); + + virCommandAddArg(cmd, "--syslog"); + virCommandAddArgFormat(cmd, "--fd=%d", fd); + virCommandPassFD(cmd, fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + fd = -1; + + virCommandAddArg(cmd, "-o"); + /* TODO: validate path? */ + virCommandAddArgFormat(cmd, "source=%s", fs->src->path); + virCommandAddArg(cmd, "-d"); + + if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) + goto cleanup; + + cmdret = virCommandRun(cmd, &exitstatus); + + if (cmdret < 0 || exitstatus != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not start 'virtiofsd'. exitstatus: %d"), exitstatus); + goto error; + } + + rc = virPidFileReadPath(pidfile, &pid); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to read virtiofsd pidfile '%s'"), + pidfile); + goto error; + } + + if (virProcessKill(pid, 0) != 0) { + if (saferead(errfd, errbuf, sizeof(errbuf) - 1) < 0) { + virReportSystemError(errno, "%s", + _("virtiofsd died unexpectedly")); + } else { + virReportError(VIR_ERR_OPERATION_FAILED, + _("virtiofsd died and reported: %s"), errbuf); + } + goto error; + } + + fs->vhost_user_fs_path = g_steal_pointer(&chrdev->data.nix.path); + ret = 0; + + cleanup: + if (chrdev->data.nix.path) + unlink(chrdev->data.nix.path); + virObjectUnref(chrdev); + return ret; + + error: + if (pid != -1) + virProcessKillPainfully(pid, true); + if (pidfile) + unlink(pidfile); + goto cleanup; +} + + +static void +qemuExtVirtioFSdStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autofree char *pidfile = NULL; + virErrorPtr orig_err; + pid_t pid = -1; + int rc; + + virErrorPreserveLast(&orig_err); + + if (!(pidfile = qemuExtVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + rc = virPidFileReadPathIfAlive(pidfile, &pid, NULL); + if (rc >= 0 && pid != (pid_t) -1) + virProcessKillPainfully(pid, true); + + if (unlink(pidfile) < 0 && + errno != ENOENT) { + virReportSystemError(errno, + _("Unable to remove stale pidfile %s"), + pidfile); + } + + if (fs->vhost_user_fs_path) + unlink(fs->vhost_user_fs_path); + + cleanup: + virErrorRestore(&orig_err); +} + int qemuExtDevicesStart(virQEMUDriverPtr driver, @@ -184,6 +357,17 @@ qemuExtDevicesStart(virQEMUDriverPtr driver, return -1; } + for (i = 0; i < def->nfss; i++) { + /* FIXME: use private data */ + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS) { + if (qemuExtVirtioFSdStart(driver, vm, fs) < 0) + return -1; + } + } + + return ret; } @@ -215,6 +399,13 @@ qemuExtDevicesStop(virQEMUDriverPtr driver, if (slirp) qemuSlirpStop(slirp, vm, driver, net, false); } + + for (i = 0; i < def->nfss; i++) { + virDomainFSDefPtr fs = def->fss[i]; + + if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS) + qemuExtVirtioFSdStop(driver, vm, fs); + } } diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 24df0a48e7..bf3366f953 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -501,6 +501,12 @@ testCompareXMLToArgv(const void *data) } } + for (i = 0; i < vm->def->nfss; i++) { + virDomainFSDefPtr fs = vm->def->fss[i]; + char *s = g_strdup_printf("/tmp/lib/domain--1-guest/fs%zu.vhost-fs.sock", i); + fs->vhost_user_fs_path = s; + } + if (vm->def->vsock) { virDomainVsockDefPtr vsock = vm->def->vsock; qemuDomainVsockPrivatePtr vsockPriv = -- 2.21.0

On Fri, Nov 1, 2019 at 1:18 PM Ján Tomko <jtomko@redhat.com> wrote:
+ if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) + goto cleanup; + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup;
qemuSecurityClearSocketLabel() is not called in the qemuOpenChrChardevUNIXSocket() error code path. Is this correct?
+static void +qemuExtVirtioFSdStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{
The daemon stops automatically when the vhost-user socket is closed by QEMU. Is it necessary to implement an explicit stop function? Stefan

On Mon, Nov 04, 2019 at 10:06:40AM +0100, Stefan Hajnoczi wrote:
On Fri, Nov 1, 2019 at 1:18 PM Ján Tomko <jtomko@redhat.com> wrote:
+ if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) + goto cleanup; + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup;
qemuSecurityClearSocketLabel() is not called in the qemuOpenChrChardevUNIXSocket() error code path. Is this correct?
That's an oversight, thanks for catching that.
+static void +qemuExtVirtioFSdStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{
The daemon stops automatically when the vhost-user socket is closed by QEMU. Is it necessary to implement an explicit stop function?
I hope not, but thought it was better to have it than possibly leaving leftover daemons. Jano
Stefan

On Mon, Nov 04, 2019 at 10:06:40AM +0100, Stefan Hajnoczi wrote:
On Fri, Nov 1, 2019 at 1:18 PM Ján Tomko <jtomko@redhat.com> wrote:
+ if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) + goto cleanup; + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup;
qemuSecurityClearSocketLabel() is not called in the qemuOpenChrChardevUNIXSocket() error code path. Is this correct?
+static void +qemuExtVirtioFSdStop(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{
The daemon stops automatically when the vhost-user socket is closed by QEMU. Is it necessary to implement an explicit stop function?
That's good, but we've generally wanted to be explicit about cleaning things up to cope with unexpected circumstances. In particular QEMU can get itself stuck as a zombie if there's a dead disk, so it is worth tearing down virtiofsd explicitly. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Fri, Nov 01, 2019 at 01:16:06PM +0100, Ján Tomko wrote:
Start virtiofsd for each <filesystem> device using it.
Pre-create the socket for communication with QEMU and pass it to virtiofsd.
Note that virtiofsd needs to run as root. --- src/conf/domain_conf.h | 1 + src/qemu/qemu_extdevice.c | 191 ++++++++++++++++++++++++++++++++++++++ tests/qemuxml2argvtest.c | 6 ++ 3 files changed, 198 insertions(+)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 54a7e7c52f..78f88a0c2f 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -817,6 +817,7 @@ struct _virDomainFSDef { unsigned long long space_soft_limit; /* in bytes */ bool symlinksResolved; virDomainVirtioOptionsPtr virtio; + char *vhost_user_fs_path; /* TODO put this in private data */ };
IIUC, this is a socket rather than a file, so I'd call it "vhostuser_fs_sock" personally.
diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 463f76c21a..cb44ca325f 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -20,6 +20,7 @@
#include <config.h>
+#include "qemu_command.h" #include "qemu_extdevice.h" #include "qemu_vhost_user_gpu.h" #include "qemu_domain.h" @@ -149,6 +150,178 @@ qemuExtDevicesCleanupHost(virQEMUDriverPtr driver, qemuExtTPMCleanupHost(def); }
+static char * +qemuExtVirtioFSCreatePidFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def)) || + virAsprintf(&name, "%s-%s-virtiofsd", shortName, alias) < 0) + return NULL;
g_strdup_printf so we can assume this always succeeds
+ + return virPidFileBuildPath(cfg->stateDir, name); +}
likewise we can assume this aborts on oom
+ + +static char * +qemuExtVirtioFSCreateSocketFilename(virQEMUDriverConfigPtr cfg, + const virDomainDef *def, + const char *alias) +{ + g_autofree char *shortName = NULL; + g_autofree char *name = NULL; + + if (!(shortName = virDomainDefGetShortName(def)) || + virAsprintf(&name, "%s-%s-virtiofsd", shortName, alias) < 0) + return NULL; + + return virFileBuildPath(cfg->stateDir, name, ".sock"); +}
Same comments
+static int +qemuExtVirtioFSdStart(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainFSDefPtr fs) +{ + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); + g_autoptr(virCommand) cmd = NULL; + g_autofree char *pidfile = NULL; + char errbuf[1024] = { 0 }; + virDomainChrSourceDefPtr chrdev = virDomainChrSourceDefNew(NULL); + pid_t pid = (pid_t) -1; + VIR_AUTOCLOSE errfd = -1; + VIR_AUTOCLOSE fd = -1; + int exitstatus = 0; + int cmdret = 0; + int ret = -1; + int rc; + + chrdev->type = VIR_DOMAIN_CHR_TYPE_UNIX; + chrdev->data.nix.listen = true; + + if (!(pidfile = qemuExtVirtioFSCreatePidFilename(cfg, vm->def, fs->info.alias))) + goto cleanup; + + if (!(chrdev->data.nix.path = qemuExtVirtioFSCreateSocketFilename(cfg, vm->def, fs->info.alias))) + goto cleanup;
No ned to check for failure of these two.
+ + if (qemuSecuritySetDaemonSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + fd = qemuOpenChrChardevUNIXSocket(chrdev); + if (fd < 0) + goto cleanup; + if (qemuSecurityClearSocketLabel(driver->securityManager, vm->def) < 0) + goto cleanup; + + /* TODO: do not call this here */ + virDomainChrDef chr = { .source = chrdev }; + if (qemuSecuritySetChardevLabel(driver, vm, &chr) < 0) + goto cleanup;
Indeed, needs to be in the security manager code.
+ + /* TODO: configure path */ + if (!(cmd = virCommandNew("/usr/libexec/virtiofsd"))) + goto cleanup;
In $QEMU/docs/interop/vhost-user.json there's a specificiation for how we're expected to locate the virtiofsd binary based on metadata files. Same approach we're using for locating firmware files basically.
+ + virCommandSetPidFile(cmd, pidfile); + virCommandSetErrorFD(cmd, &errfd); + virCommandDaemonize(cmd); + + virCommandAddArg(cmd, "--syslog");
I feel like maybe we should be using a logfile as we do for QEMU, via virtlogd ?
+ virCommandAddArgFormat(cmd, "--fd=%d", fd); + virCommandPassFD(cmd, fd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); + fd = -1; + + virCommandAddArg(cmd, "-o"); + /* TODO: validate path? */ + virCommandAddArgFormat(cmd, "source=%s", fs->src->path); + virCommandAddArg(cmd, "-d");
IIRC, it was decided intended that we'd have a dbus interface to virtiofsd, one of whose jobs would be a way to turn on / off logging, similar to our own virt-admin facility. Could perhaps be useful to have debug on from startip, but a qemu.conf setting is probably sufficient for this.
+ + if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) + goto cleanup; + + cmdret = virCommandRun(cmd, &exitstatus); + + if (cmdret < 0 || exitstatus != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not start 'virtiofsd'. exitstatus: %d"), exitstatus); + goto error; + } + + rc = virPidFileReadPath(pidfile, &pid); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to read virtiofsd pidfile '%s'"), + pidfile); + goto error; + } + + if (virProcessKill(pid, 0) != 0) { + if (saferead(errfd, errbuf, sizeof(errbuf) - 1) < 0) { + virReportSystemError(errno, "%s", + _("virtiofsd died unexpectedly")); + } else { + virReportError(VIR_ERR_OPERATION_FAILED, + _("virtiofsd died and reported: %s"), errbuf); + } + goto error; + }
I feel like we should be putting it into the cgroup for the VM as a psuedo "emulator thread". We'll also need to make sure that virtiofsd gets given an SELinux policy, with sVirt MCS tag associated with it. This raises an interesting question though, because from libvirt's POV we're not supposed to know what binary we're actually invoking. We're supposed to follow the vhost-user.json spec to find an binary, allowing the user to drop in arbitrary impls. Each impl though may have differeing SELinux requirements, so we need to figure out what SElinux domain type to apply. I think this means vhost-user.json needs to specify the desired SELinux domain for each vhost user binary.
+ fs->vhost_user_fs_path = g_steal_pointer(&chrdev->data.nix.path); + ret = 0; + + cleanup: + if (chrdev->data.nix.path) + unlink(chrdev->data.nix.path); + virObjectUnref(chrdev); + return ret; + + error: + if (pid != -1) + virProcessKillPainfully(pid, true); + if (pidfile) + unlink(pidfile); + goto cleanup; +}
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Format the 'vhost-user-fs' device on the QEMU command line. --- src/qemu/qemu_command.c | 49 +++++++++++++++++-- ...vhost-user-fs-fd-memory.x86_64-latest.args | 3 ++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index d953e3626a..a3008e47ee 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -2593,6 +2593,46 @@ qemuBuildDisksCommandLine(virCommandPtr cmd, } +static int +qemuBuildVHostUserFsCommandLine(virCommandPtr cmd, + virDomainFSDef *fs, + const virDomainDef *def G_GNUC_UNUSED, + qemuDomainObjPrivatePtr priv) +{ + /* TODO: reject non-passthrough accessmode */ + g_autofree char *chardev_alias = NULL; + g_auto(virBuffer) opt = VIR_BUFFER_INITIALIZER; + const char *tag = fs->dst; + + chardev_alias = g_strdup_printf("chr-vu-%s", fs->info.alias); + + virCommandAddArg(cmd, "-chardev"); + virBufferAddLit(&opt, "socket"); + virBufferAsprintf(&opt, ",id=%s", chardev_alias); + virBufferEscapeString(&opt, ",path=%s", fs->vhost_user_fs_path); + virCommandAddArg(cmd, virBufferContentAndReset(&opt)); + + virCommandAddArg(cmd, "-device"); + + if (qemuBuildVirtioDevStr(&opt, "vhost-user-fs", priv->qemuCaps, + VIR_DOMAIN_DEVICE_FS, fs) < 0) + return -1; + + virBufferAsprintf(&opt, ",chardev=%s", chardev_alias); + /* TODO: do not hardcode this */ + virBufferAsprintf(&opt, ",queue-size=%u", 1024); + virBufferEscapeString(&opt, ",tag=%s,cache-size=2G", tag); + if (qemuBuildVirtioOptionsStr(&opt, fs->virtio, priv->qemuCaps) < 0) + return -1; + + if (qemuBuildDeviceAddressStr(&opt, def, &fs->info, priv->qemuCaps) < 0) + return -1; + + virCommandAddArg(cmd, virBufferContentAndReset(&opt)); + return 0; +} + + static char * qemuBuildFSStr(virDomainFSDefPtr fs) { @@ -2685,7 +2725,7 @@ static int qemuBuildFilesystemCommandLine(virCommandPtr cmd, const virDomainDef *def, virQEMUCapsPtr qemuCaps, - qemuDomainObjPrivatePtr priv G_GNUC_UNUSED) + qemuDomainObjPrivatePtr priv) { size_t i; @@ -2699,8 +2739,11 @@ qemuBuildFilesystemCommandLine(virCommandPtr cmd, return -1; break; case VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIO_FS: - /* TODO: vhost-user-fs-pci */ - return 0; + /* vhost-user-fs-pci */ + if (qemuBuildVHostUserFsCommandLine(cmd, def->fss[i], def, priv) < 0) + return -1; + break; + case VIR_DOMAIN_FS_DRIVER_TYPE_LOOP: case VIR_DOMAIN_FS_DRIVER_TYPE_NBD: case VIR_DOMAIN_FS_DRIVER_TYPE_PLOOP: diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args index 39c7dbf6d8..a63d6b6dcc 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args @@ -30,6 +30,9 @@ size=15032385536 \ -no-shutdown \ -no-acpi \ -boot strict=on \ +-chardev socket,id=chr-vu-fs0,path=/tmp/lib/domain--1-guest/fs0.vhost-fs.sock \ +-device vhost-user-fs-pci,chardev=chr-vu-fs0,queue-size=1024,tag=/path,\ +cache-size=2G,bus=pci.0,addr=0x2 \ -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ resourcecontrol=deny \ -msg timestamp=on -- 2.21.0

On Fri, Nov 1, 2019 at 1:15 PM Ján Tomko <jtomko@redhat.com> wrote: Great, this looks like the right direction.
* leave queue-size and cache-size on their default values or expose them in XML
Please support the following parameters: * virtiofsd path - useful for testing custom virtiofsd binaries or alternative implementations (similar to setting <emulator> path) * DAX Window size: -device vhost-user-fs-device cache-size (in megabytes and cache-size=0 means disable the DAX Window) * cache mode: virtiofsd -o cache=none|auto|always

Hi Ján, Could you add no_posix_lock and no_flock option for virtiofsd as well? The sample patch is as follows: --- docs/schemas/domaincommon.rng | 10 ++++++++++ src/conf/domain_conf.c | 10 ++++++++++ src/conf/domain_conf.h | 2 ++ src/qemu/qemu_extdevice.c | 10 ++++++++++ 4 files changed, 32 insertions(+) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index f6479c9..6dd8e2a 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -2523,6 +2523,16 @@ <empty/> </element> </optional> + <optional> + <element name='no_posix_lock'> + <empty/> + </element> + </optional> + <optional> + <element name='no_flock'> + <empty/> + </element> + </optional> <optional> <ref name="alias"/> </optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 2694e4b..8223910 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11055,6 +11055,10 @@ virDomainFSDefParseXML(virDomainXMLOptionPtr xmlopt, target = virXMLPropString(cur, "dir"); } else if (virXMLNodeNameEqual(cur, "readonly")) { def->readonly = true; + } else if (virXMLNodeNameEqual(cur, "no_posix_lock")) { + def->no_posix_lock = true; + } else if (virXMLNodeNameEqual(cur, "no_flock")) { + def->no_flock = true; } else if (virXMLNodeNameEqual(cur, "driver")) { if (!fsdriver) fsdriver = virXMLPropString(cur, "type"); @@ -24899,6 +24903,12 @@ virDomainFSDefFormat(virBufferPtr buf, if (def->readonly) virBufferAddLit(buf, "<readonly/>\n"); + if (def->no_posix_lock) + virBufferAddLit(buf, "<no_posix_lock/>\n"); + + if (def->no_flock) + virBufferAddLit(buf, "<no_flock/>\n"); + if (virDomainDeviceInfoFormat(buf, &def->info, flags) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 78f88a0..8a60c8c 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -812,6 +812,8 @@ struct _virDomainFSDef { virStorageSourcePtr src; char *dst; bool readonly; + bool no_posix_lock; + bool no_flock; virDomainDeviceInfo info; unsigned long long space_hard_limit; /* in bytes */ unsigned long long space_soft_limit; /* in bytes */ diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c index 634a3fb..618a886 100644 --- a/src/qemu/qemu_extdevice.c +++ b/src/qemu/qemu_extdevice.c @@ -242,6 +242,16 @@ qemuExtVirtioFSdStart(virQEMUDriverPtr driver, virCommandAddArgFormat(cmd, "source=%s", fs->src->path); virCommandAddArg(cmd, "-d"); + if (fs->no_posix_lock) { + virCommandAddArg(cmd, "-o"); + virCommandAddArg(cmd, "no_posix_lock"); + } + + if (fs->no_flock) { + virCommandAddArg(cmd, "-o"); + virCommandAddArg(cmd, "no_flock"); + } + if (qemuExtDeviceLogCommand(driver, vm, cmd, "virtiofsd") < 0) goto cleanup; -- 2.18.1
participants (5)
-
Daniel P. Berrangé
-
Jim Fehlig
-
Ján Tomko
-
Masayoshi Mizuma
-
Stefan Hajnoczi