[PATCH 0/2] qemu: add append mode config for serial file
by Oleg Vasilev
Serial log file contains lots of useful information for debugging
configuration problems. It makes sense to preserve the log in between
restarts, so that one can later figure out what was going on.
Oleg Vasilev (2):
qemu: add append mode config for serial file
qemuxml2xmltest: add serial append=on test
src/qemu/libvirtd_qemu.aug | 3 +
src/qemu/qemu.conf.in | 10 ++++
src/qemu/qemu_conf.c | 4 ++
src/qemu/qemu_conf.h | 2 +
src/qemu/qemu_domain.c | 13 +++++
src/qemu/test_libvirtd_qemu.aug.in | 1 +
tests/qemuxml2argvdata/serial-append.xml | 56 +++++++++++++++++++
.../serial-append.x86_64-latest.xml | 56 +++++++++++++++++++
tests/qemuxml2xmltest.c | 4 ++
9 files changed, 149 insertions(+)
create mode 100644 tests/qemuxml2argvdata/serial-append.xml
create mode 100644 tests/qemuxml2xmloutdata/serial-append.x86_64-latest.xml
--
2.38.1
1 year, 10 months
[PATCH] cpu_map: Add -noMPX models for x86 Icelake Server
by Lena Voytek
Intel has removed MPX capabilities from 10nm Icelake CPUs[1], which is
reflected by the new models through the line marking mpx as removed.
The original Icelake Server models have been left alone to avoid regressions.
This adds:
-Icelake-Server-noMPX
-Icelake-Server-noTSX-noMPX
References:
[1] Memory Protection Extensions support removal
https://www.intel.com/content/www/us/en/support/articles/000059823/proces...
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1978064
https://gitlab.com/libvirt/libvirt/-/issues/304
Signed-off-by: Lena Voytek <lena.voytek(a)canonical.com>
---
src/cpu_map/index.xml | 2 +
src/cpu_map/x86_Icelake-Server-noMPX.xml | 93 +++++++++++++++++++
.../x86_Icelake-Server-noTSX-noMPX.xml | 91 ++++++++++++++++++
3 files changed, 186 insertions(+)
create mode 100644 src/cpu_map/x86_Icelake-Server-noMPX.xml
create mode 100644 src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
diff --git a/src/cpu_map/index.xml b/src/cpu_map/index.xml
index 351c2ae4fa..62c3d44a5c 100644
--- a/src/cpu_map/index.xml
+++ b/src/cpu_map/index.xml
@@ -54,6 +54,8 @@
<include filename='x86_Icelake-Client-noTSX.xml'/>
<include filename='x86_Icelake-Server.xml'/>
<include filename='x86_Icelake-Server-noTSX.xml'/>
+ <include filename='x86_Icelake-Server-noMPX.xml'/>
+ <include filename='x86_Icelake-Server-noTSX-noMPX.xml'/>
<include filename='x86_Cooperlake.xml'/>
<include filename='x86_Snowridge.xml'/>
diff --git a/src/cpu_map/x86_Icelake-Server-noMPX.xml b/src/cpu_map/x86_Icelake-Server-noMPX.xml
new file mode 100644
index 0000000000..feaf21f91f
--- /dev/null
+++ b/src/cpu_map/x86_Icelake-Server-noMPX.xml
@@ -0,0 +1,93 @@
+<cpus>
+ <model name='Icelake-Server-noMPX'>
+ <decode host='on' guest='on'/>
+ <signature family='6' model='106'/> <!-- 0606A5 -->
+ <vendor name='Intel'/>
+ <feature name='3dnowprefetch'/>
+ <feature name='abm'/>
+ <feature name='adx'/>
+ <feature name='aes'/>
+ <feature name='apic'/>
+ <feature name='arat'/>
+ <feature name='avx'/>
+ <feature name='avx2'/>
+ <feature name='avx512-vpopcntdq'/>
+ <feature name='avx512bitalg'/>
+ <feature name='avx512bw'/>
+ <feature name='avx512cd'/>
+ <feature name='avx512dq'/>
+ <feature name='avx512f'/>
+ <feature name='avx512vbmi'/>
+ <feature name='avx512vbmi2'/>
+ <feature name='avx512vl'/>
+ <feature name='avx512vnni'/>
+ <feature name='bmi1'/>
+ <feature name='bmi2'/>
+ <feature name='clflush'/>
+ <feature name='clflushopt'/>
+ <feature name='clwb'/>
+ <feature name='cmov'/>
+ <feature name='cx16'/>
+ <feature name='cx8'/>
+ <feature name='de'/>
+ <feature name='erms'/>
+ <feature name='f16c'/>
+ <feature name='fma'/>
+ <feature name='fpu'/>
+ <feature name='fsgsbase'/>
+ <feature name='fxsr'/>
+ <feature name='gfni'/>
+ <feature name='hle'/>
+ <feature name='intel-pt' removed='yes'/>
+ <feature name='invpcid'/>
+ <feature name='la57'/>
+ <feature name='lahf_lm'/>
+ <feature name='lm'/>
+ <feature name='mca'/>
+ <feature name='mce'/>
+ <feature name='mmx'/>
+ <feature name='movbe'/>
+ <feature name='mpx' removed='yes'/>
+ <feature name='msr'/>
+ <feature name='mtrr'/>
+ <feature name='nx'/>
+ <feature name='pae'/>
+ <feature name='pat'/>
+ <feature name='pcid'/>
+ <feature name='pclmuldq'/>
+ <feature name='pdpe1gb'/>
+ <feature name='pge'/>
+ <feature name='pku'/>
+ <feature name='pni'/>
+ <feature name='popcnt'/>
+ <feature name='pse'/>
+ <feature name='pse36'/>
+ <feature name='rdrand'/>
+ <feature name='rdseed'/>
+ <feature name='rdtscp'/>
+ <feature name='rtm'/>
+ <feature name='sep'/>
+ <feature name='smap'/>
+ <feature name='smep'/>
+ <feature name='spec-ctrl'/>
+ <feature name='ssbd'/>
+ <feature name='sse'/>
+ <feature name='sse2'/>
+ <feature name='sse4.1'/>
+ <feature name='sse4.2'/>
+ <feature name='ssse3'/>
+ <feature name='syscall'/>
+ <feature name='tsc'/>
+ <feature name='tsc-deadline'/>
+ <feature name='umip'/>
+ <feature name='vaes'/>
+ <feature name='vme'/>
+ <feature name='vpclmulqdq'/>
+ <feature name='wbnoinvd'/>
+ <feature name='x2apic'/>
+ <feature name='xgetbv1'/>
+ <feature name='xsave'/>
+ <feature name='xsavec'/>
+ <feature name='xsaveopt'/>
+ </model>
+</cpus>
diff --git a/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml b/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
new file mode 100644
index 0000000000..e55da83ddf
--- /dev/null
+++ b/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
@@ -0,0 +1,91 @@
+<cpus>
+ <model name='Icelake-Server-noTSX-noMPX'>
+ <decode host='on' guest='off'/>
+ <signature family='6' model='106'/> <!-- 0606A5 -->
+ <vendor name='Intel'/>
+ <feature name='3dnowprefetch'/>
+ <feature name='abm'/>
+ <feature name='adx'/>
+ <feature name='aes'/>
+ <feature name='apic'/>
+ <feature name='arat'/>
+ <feature name='avx'/>
+ <feature name='avx2'/>
+ <feature name='avx512-vpopcntdq'/>
+ <feature name='avx512bitalg'/>
+ <feature name='avx512bw'/>
+ <feature name='avx512cd'/>
+ <feature name='avx512dq'/>
+ <feature name='avx512f'/>
+ <feature name='avx512vbmi'/>
+ <feature name='avx512vbmi2'/>
+ <feature name='avx512vl'/>
+ <feature name='avx512vnni'/>
+ <feature name='bmi1'/>
+ <feature name='bmi2'/>
+ <feature name='clflush'/>
+ <feature name='clflushopt'/>
+ <feature name='clwb'/>
+ <feature name='cmov'/>
+ <feature name='cx16'/>
+ <feature name='cx8'/>
+ <feature name='de'/>
+ <feature name='erms'/>
+ <feature name='f16c'/>
+ <feature name='fma'/>
+ <feature name='fpu'/>
+ <feature name='fsgsbase'/>
+ <feature name='fxsr'/>
+ <feature name='gfni'/>
+ <feature name='intel-pt' removed='yes'/>
+ <feature name='invpcid'/>
+ <feature name='la57'/>
+ <feature name='lahf_lm'/>
+ <feature name='lm'/>
+ <feature name='mca'/>
+ <feature name='mce'/>
+ <feature name='mmx'/>
+ <feature name='movbe'/>
+ <feature name='mpx' removed='yes'/>
+ <feature name='msr'/>
+ <feature name='mtrr'/>
+ <feature name='nx'/>
+ <feature name='pae'/>
+ <feature name='pat'/>
+ <feature name='pcid'/>
+ <feature name='pclmuldq'/>
+ <feature name='pdpe1gb'/>
+ <feature name='pge'/>
+ <feature name='pku'/>
+ <feature name='pni'/>
+ <feature name='popcnt'/>
+ <feature name='pse'/>
+ <feature name='pse36'/>
+ <feature name='rdrand'/>
+ <feature name='rdseed'/>
+ <feature name='rdtscp'/>
+ <feature name='sep'/>
+ <feature name='smap'/>
+ <feature name='smep'/>
+ <feature name='spec-ctrl'/>
+ <feature name='ssbd'/>
+ <feature name='sse'/>
+ <feature name='sse2'/>
+ <feature name='sse4.1'/>
+ <feature name='sse4.2'/>
+ <feature name='ssse3'/>
+ <feature name='syscall'/>
+ <feature name='tsc'/>
+ <feature name='tsc-deadline'/>
+ <feature name='umip'/>
+ <feature name='vaes'/>
+ <feature name='vme'/>
+ <feature name='vpclmulqdq'/>
+ <feature name='wbnoinvd'/>
+ <feature name='x2apic'/>
+ <feature name='xgetbv1'/>
+ <feature name='xsave'/>
+ <feature name='xsavec'/>
+ <feature name='xsaveopt'/>
+ </model>
+</cpus>
--
2.34.1
1 year, 10 months
[libvirt PATCH v3 00/18] Use nbdkit for http/ftp/ssh network drives in libvirt
by Jonathon Jongsma
This is the third version of this patch series. See
https://bugzilla.redhat.com/show_bug.cgi?id=2016527 for more information about
the goal, but the summary is that RHEL does not want to ship the qemu storage
plugins for curl and ssh. Handling them outside of the qemu process provides
several advantages such as reduced attack surface and stability.
A quick summary of the code:
- at startup I query to see whether nbdkit exists on the host and if
so, I query which plugins/filters are installed. These capabilities
are cached and stored in the qemu driver
- When the driver prepares the domain, we go through each disk source
and determine whether the nbdkit capabilities allow us to support
this disk via nbdkit, and if so, we allocate a qemuNbdkitProcess
object and stash it in the private data of the virStorageSource.
- The presence or absence of this qemuNbdkitProcess data then indicates
whether this disk will be served to qemu indirectly via nbdkit or
directly
- When we launch the qemuProcess, as part of the "external device
start" step, I launch a ndkit process for each disk that is supported
by nbdkit.
- for devices which are served by an intermediate ndkit process, I
change the qemu commandline in the following ways:
- I no longer pass auth/cookie secrets to qemu (those are handled by
nbdkit)
- I replace the actual network URL of the remote disk source with the
path to the nbdkit unix socket
- We create a 'monitor' for the nbdkit process that watches to see whether the
process exits. If it does, we pause the domain, attempt to restart nbdkit,
and then resume the domain.
Open questions
- I think selinux will work once we add a policy for the /usr/sbin/nbdkit
binary to allow it to be executed by libvirt, but for now it fails to
execute nbdkit in enforcing mode. The current context for nbdkit (on fedora)
is "system_u:object_r:bin_t:s0". When I temporarily change the context to
something like qemu_exec_t, I am able to start nbdkit and the domain
launches.
Known shortcomings
- creating disks (in ssh) still isn't supported.
Changes in v3:
- Various formatting fixes
- Don't kill process in qemuNbdkitProcessFree() since we want the nbdkit
daemon to continue running even if libvirt restarts.
- Better detection and error reporting when starting the nbdkit process
- Add monitoring for nbdkit process so that it can be restarted if if ever
exits unexpectedly.
Changes in v2:
- split into multiple patches
- added a build option for nbdkit_moddir
- don't instantiate any secret / cookie props for disks that are being served
by nbdkit since we don't send secrets to qemu anymore
- ensure that nbdkit processes are started/stopped for the entire backing
chain
- switch to virFileCache-based capabilities for nbdkit so that we don't need
to requery every time
- switch to using pipes for communicating sensitive data to nbdkit
- use pidfile support built into virCommand rather than nbdkit's --pidfile
argument
- added significantly more tests
Jonathon Jongsma (18):
schema: allow 'ssh' as a protocol for network disks
qemu: Add functions for determining nbdkit availability
qemu: expand nbdkit capabilities
util: Allow virFileCache data to be any GObject
qemu: implement basic virFileCache for nbdkit caps
qemu: implement persistent file cache for nbdkit caps
qemu: use file cache for nbdkit caps
qemu: Add qemuNbdkitProcess
qemu: add functions to start and stop nbdkit
tests: add ability to test various nbdkit capabilities
qemu: split qemuDomainSecretStorageSourcePrepare
qemu: include nbdkit state in private xml
qemu: use nbdkit to serve network disks if available
tests: add tests for nbdkit invocation
util: make virCommandSetSendBuffer testable
qemu: pass sensitive data to nbdkit via pipe
qemu: add test for authenticating a https network disk
qemu: Monitor nbdkit process for exit
build-aux/syntax-check.mk | 4 +-
meson.build | 9 +
meson_options.txt | 1 +
po/POTFILES | 1 +
src/conf/schemas/domaincommon.rng | 1 +
src/libvirt_private.syms | 1 +
src/qemu/meson.build | 1 +
src/qemu/qemu_block.c | 162 ++-
src/qemu/qemu_conf.c | 23 +
src/qemu/qemu_conf.h | 7 +
src/qemu/qemu_domain.c | 180 ++-
src/qemu/qemu_domain.h | 4 +
src/qemu/qemu_driver.c | 4 +
src/qemu/qemu_extdevice.c | 48 +
src/qemu/qemu_nbdkit.c | 1243 +++++++++++++++++
src/qemu/qemu_nbdkit.h | 120 ++
src/qemu/qemu_nbdkitpriv.h | 31 +
src/qemu/qemu_process.c | 13 +
src/util/vircommand.c | 19 +-
src/util/vircommand.h | 8 +
src/util/vircommandpriv.h | 4 +
src/util/virfilecache.c | 14 +-
src/util/virfilecache.h | 2 +-
src/util/virutil.h | 2 +-
tests/meson.build | 1 +
.../disk-cdrom-network.args.disk0 | 7 +
.../disk-cdrom-network.args.disk1 | 9 +
.../disk-cdrom-network.args.disk1.pipe.1778 | 1 +
.../disk-cdrom-network.args.disk2 | 9 +
.../disk-cdrom-network.args.disk2.pipe.1780 | 1 +
.../disk-network-http.args.disk0 | 7 +
.../disk-network-http.args.disk1 | 6 +
.../disk-network-http.args.disk2 | 7 +
.../disk-network-http.args.disk2.pipe.1778 | 1 +
.../disk-network-http.args.disk3 | 8 +
.../disk-network-http.args.disk3.pipe.1780 | 1 +
...work-source-curl-nbdkit-backing.args.disk0 | 8 +
...e-curl-nbdkit-backing.args.disk0.pipe.1778 | 1 +
.../disk-network-source-curl.args.1.pipe.1 | 1 +
.../disk-network-source-curl.args.disk0 | 8 +
...k-network-source-curl.args.disk0.pipe.1778 | 1 +
.../disk-network-source-curl.args.disk1 | 10 +
...k-network-source-curl.args.disk1.pipe.1780 | 1 +
...k-network-source-curl.args.disk1.pipe.1782 | 1 +
...isk-network-source-curl.args.disk1.pipe.49 | 1 +
.../disk-network-source-curl.args.disk2 | 8 +
...k-network-source-curl.args.disk2.pipe.1782 | 1 +
...k-network-source-curl.args.disk2.pipe.1784 | 1 +
...isk-network-source-curl.args.disk2.pipe.51 | 1 +
.../disk-network-source-curl.args.disk3 | 7 +
.../disk-network-source-curl.args.disk4 | 7 +
.../disk-network-ssh.args.disk0 | 7 +
tests/qemunbdkittest.c | 294 ++++
...sk-cdrom-network-nbdkit.x86_64-latest.args | 42 +
.../disk-cdrom-network-nbdkit.xml | 1 +
...isk-network-http-nbdkit.x86_64-latest.args | 45 +
.../disk-network-http-nbdkit.xml | 1 +
...rce-curl-nbdkit-backing.x86_64-latest.args | 38 +
...isk-network-source-curl-nbdkit-backing.xml | 45 +
...work-source-curl-nbdkit.x86_64-latest.args | 50 +
.../disk-network-source-curl-nbdkit.xml | 1 +
...isk-network-source-curl.x86_64-latest.args | 54 +
.../disk-network-source-curl.xml | 74 +
...disk-network-ssh-nbdkit.x86_64-latest.args | 36 +
.../disk-network-ssh-nbdkit.xml | 1 +
.../disk-network-ssh.x86_64-latest.args | 36 +
tests/qemuxml2argvdata/disk-network-ssh.xml | 31 +
tests/qemuxml2argvtest.c | 18 +
tests/testutilsqemu.c | 27 +
tests/testutilsqemu.h | 5 +
70 files changed, 2707 insertions(+), 116 deletions(-)
create mode 100644 src/qemu/qemu_nbdkit.c
create mode 100644 src/qemu/qemu_nbdkit.h
create mode 100644 src/qemu/qemu_nbdkitpriv.h
create mode 100644 tests/qemunbdkitdata/disk-cdrom-network.args.disk0
create mode 100644 tests/qemunbdkitdata/disk-cdrom-network.args.disk1
create mode 100644 tests/qemunbdkitdata/disk-cdrom-network.args.disk1.pipe.1778
create mode 100644 tests/qemunbdkitdata/disk-cdrom-network.args.disk2
create mode 100644 tests/qemunbdkitdata/disk-cdrom-network.args.disk2.pipe.1780
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk0
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk1
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk2
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk2.pipe.1778
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk3
create mode 100644 tests/qemunbdkitdata/disk-network-http.args.disk3.pipe.1780
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl-nbdkit-backing.args.disk0
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl-nbdkit-backing.args.disk0.pipe.1778
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.1.pipe.1
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk0
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk0.pipe.1778
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk1
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk1.pipe.1780
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk1.pipe.1782
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk1.pipe.49
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk2
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk2.pipe.1782
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk2.pipe.1784
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk2.pipe.51
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk3
create mode 100644 tests/qemunbdkitdata/disk-network-source-curl.args.disk4
create mode 100644 tests/qemunbdkitdata/disk-network-ssh.args.disk0
create mode 100644 tests/qemunbdkittest.c
create mode 100644 tests/qemuxml2argvdata/disk-cdrom-network-nbdkit.x86_64-latest.args
create mode 120000 tests/qemuxml2argvdata/disk-cdrom-network-nbdkit.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-http-nbdkit.x86_64-latest.args
create mode 120000 tests/qemuxml2argvdata/disk-network-http-nbdkit.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit-backing.x86_64-latest.args
create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit-backing.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit.x86_64-latest.args
create mode 120000 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl.x86_64-latest.args
create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-ssh-nbdkit.x86_64-latest.args
create mode 120000 tests/qemuxml2argvdata/disk-network-ssh-nbdkit.xml
create mode 100644 tests/qemuxml2argvdata/disk-network-ssh.x86_64-latest.args
create mode 100644 tests/qemuxml2argvdata/disk-network-ssh.xml
--
2.37.3
1 year, 11 months
[PATCH RFC 0/6] spec: Decompose the daemon subpackage
by Jim Fehlig
Currently it is not possible to install a modular daemon subpackage without
also installing the monolithic daemon
https://listman.redhat.com/archives/libvir-list/2022-September/234554.html
This series is an initial attempt at moving common daemons, utilities, and
files from the daemon subpackage to a new daemon-core subpackage. The
monolithic and modular daemons can then depend on the new subpackage.
libvirt-guests is moved to a new libvirt-guests subpackage, which is
recommended by the daemon subpackage to provide smoother upgrade.
I've likely overlooked several items, but before continuing down this
path too far I first wanted to gauge interest and see if this work is
worth pursuing. If so, any comments on the RFC are appreciated!
Note that patches 1-3 are things I noticed while working on the others
and could be pushed independently.
Jim Fehlig (6):
spec: Remove redundant with_libxl
spec: Use more %{name} macro
spec: Remove daemon postun trigger
spec: Move common daemons to a separate subpackage
spec: Move more files to the daemon-core subpackage
spec: Move libvirt-guests to guests subpackage
libvirt.spec.in | 406 ++++++++++++++++++++++++++----------------------
1 file changed, 219 insertions(+), 187 deletions(-)
--
2.37.3
1 year, 11 months
RFC: New APIs for delegation of privileged operations
by Andrea Bolognani
Hi,
this is a proposal for introducing a new family of APIs in libvirt,
with the goal of improving integration with management applications.
KubeVirt is intended to be the primary consumer of these APIs.
Background
----------
KubeVirt makes it possible to run VMs on a Kubernetes cluster, side
by side with containers.
It does so by running QEMU and libvirtd themselves inside a
container. The architecture is explained in more detail at
https://kubevirt.io/user-guide/architecture/
but for the purpose of this discussion we only need to keep in mind
two components:
* virt-launcher
- runs in the same container as QEMU and libvirtd
- one instance per VM
* virt-handler
- runs in a separate container
- one instance per node
Conceptually, these two components roughly map to QEMU processes and
libvirtd respectively.
>From a security perspective, there is a strong push in Kubernetes to
run workloads under unprivileged user accounts and without additional
capabilities. Again, this is similar to how libvirtd itself runs as
root but the QEMU processes it starts are under the unprivileged
"qemu" account.
KubeVirt has been working towards the goal of running VMs as
completely unprivileged workloads and made excellent progress so far.
Some of the operations needed for running a VM, however, inherently
require elevated privilege. In KubeVirt, the conundrum is solved by
having virt-handler (a privileged component) take care of those
operations, making it possible for virt-launcher (as well as QEMU and
libvirtd) to run in an unprivileged context.
Examples
--------
Here are a few examples of how KubeVirt has been able to reduce the
privilege required by virt-launcher by selectively handing over
responsibilities to virt-handler:
* Remove SYS_RESOURCE capability from launcher pod
https://github.com/kubevirt/kubevirt/pull/2584
* Drop SYS_RESOURCE capability
https://github.com/kubevirt/kubevirt/pull/5558
* Housekeeping cgroup
https://github.com/kubevirt/kubevirt/pull/8233
* Real time VMs fail to change vCPU scheduler and priority in
non-root deployments
https://github.com/kubevirt/kubevirt/pull/8750
* virt-launcher: Drop SYS_PTRACE capability
https://github.com/kubevirt/kubevirt/pull/8842
The pattern we can see is that, initially, libvirt just assumes that
it can perform a certain privileged operation. This fails in the
context of KubeVirt, where libvirtd runs with significantly reduced
privileges. As a consequence, libvirt is patched to be more resilient
to such lack of privilege: for example, instead of attempting to
create a file and erroring out due to lack of permissions, it will
instead first check whether the file already exists and, if it does,
assume that it has been prepared ahead of time by an external entity.
Limitations
-----------
This approach works fine, but only for the privileged operations that
would be performed by libvirt before the VM starts running.
Looking at the "housekeeping cgroup" PR in particular, we notice that
the VM is initially created in paused state: this is necessary in
order to create a point in time in which all the VM threads already
exist but, crucially, none of the vCPUs have stated running yet. This
is the only opportunity to move threads across cgroups without
invalidating the expectations of a real time workload.
When it comes to live migration, however, there is no way to create
similar conditions, since the VM is running on the destination host
right out of the gate. As a consequence, live migration has to be
blocked when the housekeeping cgroup is in use, which is an
unfortunate limitation.
Moreover, there's an overall sense of fragility surrounding these
interactions: both KubeVirt and, to some extent, libvirt need to be
acutely aware of what the other component is going to do, but there
is never an explicit handover and the whole thing only works if you
just so happen to do everything with the exact right order and
timing.
Proposal
--------
In order to address the issues outlined above, I propose that we
introduce a new set of APIs in libvirt.
These APIs would expose some of the inner workings of libvirt, and
as such would come with *massively reduced* stability guarantees
compared to the rest of our public API.
The idea is that applications such as KubeVirt, which track libvirt
fairly closely and stay pinned to specific versions, would be able to
adapt to changes in these APIs relatively painlessly. More
traditional management applications such as virt-manager would simply
not opt into using the new APIs and maintain the status quo.
Using memlock as an example, the new API could look like
typedef int (*virInternalSetMaxMemLockHandler)(pid_t pid,
unsigned long long bytes);
int virInternalSetProcessSetMaxMemLockHandler(virConnectPtr conn,
virInternalSetMaxMemLockHandler handler);
The application-provided handler would be responsible for performing
the privileged operation (in this case raising the memlock limit for
a process). For KubeVirt, virt-launcher would have to pass the baton
to virt-handler.
If such an handler is installed, libvirt would invoke it (and likely
go through some sanity checks afterwards); if not, it would attempt
to perform the privileged operation itself, as it does today.
This would make the interaction between libvirt and the management
application explicit rather than implicit. Not having to stick to our
usual API stability guarantees would make it possible to be more
liberal in exposing the internals of libvirt as interaction points.
Scope
-----
I think we should initially limit the new APIs to the scenarios that
have already been identified, then gradually expand the scope as
needed. In other words, we shouldn't comb through the codebase
looking for potential adopters.
Since the intended consumers of these APIs are those that can
adopt a new libvirt release fairly quickly, this shouldn't be a
problem.
Once the pattern has been established, we might consider introducing
support for it at the same time as a new feature that might benefit
from it is added.
Caveats
-------
libvirt is all about stable API, so introducing an API that is
unstable *by design* is completely uncharted territory.
To ensure that the new APIs are 100% opt-in, we could define them in
a separate <libvirt/libvirt-internal.h> header. Furthermore, we could
have a separate libvirt-internal.so shared library for the symbols
and a corresponding libvirt-internal.pc pkg-config file. We could
even go as far as requiring a preprocessor symbol such as
VIR_INTERNAL_UNSTABLE_API_OPT_IN
to be defined before the entry points are visible to the compiler.
Whatever the mechanism, we would need to make sure that it's usable
from language bindings as well.
Internal APIs are amendable to not only come and go, but also change
semantics between versions. We should make sure that such changes are
clearly exposed to the user, for example by requiring them to pass a
version number to the function and erroring out immediately if the
value doesn't match our expectations. KubeVirt has a massive suite of
functional tests, so this kind of change would immediately be spotted
when a new version of libvirt is imported, with no risk of an
incompatibility lingering in the codebase until it affects users.
Disclaimer
----------
This proposal is intentionally vague on several of the details.
Before attempting to nail those down, I want to gather feedback on
the high-level idea, both from the libvirt and KubeVirt side.
Credits
-------
Thanks to Michal and Martin for helping shape and polish the idea
from its initial rough state.
--
Andrea Bolognani / Red Hat / Virtualization
1 year, 11 months
[PATCH v4] virsh: Add message to terminal when running snapshot-revert
by Haruka Ohata
When running virsh snapshot-* command, such as snapshot-create-as /
snapshot-delete, it prints a result message.
On the other hand virsh snapshot-revert command doesn't print a result
message.
So, This patch fixes to add message when running virsh snapshot-revert
command.
---
# virsh snapshot-create-as vm1 test1
Domain snapshot test01 created
# virsh snapshot-revert vm1 test1
# virsh snapshot-delete vm1 test1
Domain snapshot test01 deleted
#
---
Signed-off-by: Haruka Ohata <ohata.haruka(a)fujitsu.com>
---
tests/virsh-snapshot | 3 +++
tools/virsh-snapshot.c | 4 ++++
2 files changed, 7 insertions(+)
diff --git a/tests/virsh-snapshot b/tests/virsh-snapshot
index 4c64bb537b..b09273917b 100755
--- a/tests/virsh-snapshot
+++ b/tests/virsh-snapshot
@@ -100,11 +100,14 @@ Domain snapshot s1 created
Domain snapshot s3 created
Domain snapshot s2 created
+Domain snapshot s3 reverted
Domain snapshot s6 created
Domain snapshot s5 created
+Domain snapshot s6 reverted
Domain snapshot s4 created
+Domain snapshot s1 reverted
Domain snapshot s7 created
Domain snapshot s8 created
diff --git a/tools/virsh-snapshot.c b/tools/virsh-snapshot.c
index 8fa64ba903..68908cd10a 100644
--- a/tools/virsh-snapshot.c
+++ b/tools/virsh-snapshot.c
@@ -1783,6 +1783,10 @@ cmdDomainSnapshotRevert(vshControl *ctl, const vshCmd *cmd)
result = virDomainRevertToSnapshot(snapshot, flags);
}
+ if (result < 0)
+ vshError(ctl, _("Failed to revert snapshot %s"), name);
+ else
+ vshPrintExtra(ctl, _("Domain snapshot %s reverted\n"), name);
return result >= 0;
}
--
2.38.1
1 year, 11 months
[libvirt PATCH v3] tools: add virt-qemu-qmp-proxy for proxying QMP via libvirt QEMU guests
by Daniel P. Berrangé
Libvirt provides QMP passthrough APIs for the QEMU driver and these are
exposed in virsh. It is not especially pleasant, however, using the raw
QMP JSON syntax. QEMU has a tool 'qmp-shell' which can speak QMP and
exposes a human friendly interactive shell. It is not possible to use
this with libvirt managed guest, however, since only one client can
attach to the QMP socket at any point in time. While it would be
possible to configure a second QMP socket for a VM, it may not be
an known requirement at the time the guest is provisioned.
The virt-qmp-proxy tool aims to solve this problem. It opens a UNIX
socket and listens for incoming client connections, speaking QMP on
the connected socket. It will forward any QMP commands received onto
the running libvirt QEMU guest, and forward any replies back to the
QMP client. It will also forward back events.
$ virsh start demo
$ virt-qmp-proxy demo demo.qmp &
$ qmp-shell demo.qmp
Welcome to the QMP low-level shell!
Connected to QEMU 6.2.0
(QEMU) query-kvm
{
"return": {
"enabled": true,
"present": true
}
}
Note this tool of course has the same risks as the raw libvirt
QMP passthrough. It is safe to run query commands to fetch information
but commands which change the QEMU state risk disrupting libvirt's
management of QEMU, potentially resulting in data loss/corruption in
the worst case. Any use of this tool will cause the guest to be marked
as tainted as an warning that it could be in an unexpected state.
Since this tool introduces a python dependency it is not desirable
to include it in any of the existing RPMs in libvirt. This tool is
also QEMU specific, so isn't appropriate to bundle with the generic
tools. Thus a new RPM is introduced 'libvirt-clients-qemu', to
contain additional QEMU specific tools, with extra external deps.
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
In v3:
- Added to libvirt-clients-qemu RPM
- Renamed to virt-qemu-qmp-proxy
docs/manpages/meson.build | 1 +
docs/manpages/virt-qemu-qmp-proxy.rst | 120 +++++++++
libvirt.spec.in | 15 ++
tools/meson.build | 5 +
tools/virt-qemu-qmp-proxy | 360 ++++++++++++++++++++++++++
5 files changed, 501 insertions(+)
create mode 100644 docs/manpages/virt-qemu-qmp-proxy.rst
create mode 100755 tools/virt-qemu-qmp-proxy
diff --git a/docs/manpages/meson.build b/docs/manpages/meson.build
index ba673cf472..83933227d9 100644
--- a/docs/manpages/meson.build
+++ b/docs/manpages/meson.build
@@ -18,6 +18,7 @@ docs_man_files = [
{ 'name': 'virt-pki-query-dn', 'section': '1', 'install': true },
{ 'name': 'virt-pki-validate', 'section': '1', 'install': true },
{ 'name': 'virt-qemu-run', 'section': '1', 'install': conf.has('WITH_QEMU') },
+ { 'name': 'virt-qemu-qmp-proxy', 'section': '1', 'install': conf.has('WITH_QEMU') },
{ 'name': 'virt-xml-validate', 'section': '1', 'install': true },
{ 'name': 'libvirt-guests', 'section': '8', 'install': conf.has('WITH_LIBVIRTD') },
diff --git a/docs/manpages/virt-qemu-qmp-proxy.rst b/docs/manpages/virt-qemu-qmp-proxy.rst
new file mode 100644
index 0000000000..b387474b19
--- /dev/null
+++ b/docs/manpages/virt-qemu-qmp-proxy.rst
@@ -0,0 +1,120 @@
+===================
+virt-qemu-qmp-proxy
+===================
+
+--------------------------------------------------
+Expose a QMP proxy server for a libvirt QEMU guest
+--------------------------------------------------
+
+:Manual section: 1
+:Manual group: Virtualization Support
+
+.. contents::
+
+
+SYNOPSIS
+========
+
+``virt-qemu-qmp-proxy`` [*OPTION*]... *DOMAIN* *QMP-SOCKET-PATH*
+
+
+DESCRIPTION
+===========
+
+This tool provides a way to expose a QMP proxy server that communicates
+with a QEMU guest managed by libvirt. This enables standard QMP client
+tools to interact with libvirt managed guests.
+
+**NOTE: use of this tool will result in the running QEMU guest being
+marked as tainted.** It is strongly recommended that this tool *only be
+used to send commands which query information* about the running guest.
+If this tool is used to make changes to the state of the guest, this
+may have negative interactions with the QEMU driver, resulting in an
+inability to manage the guest operation thereafter, and in the worst
+case **potentially lead to data loss or corruption**.
+
+The ``virt-qemu-qmp-proxy`` program will listen on a UNIX socket for incoming
+client connections, and run the QMP protocol over the connection. Any
+commands received will be sent to the running libvirt guest, and replies
+sent back.
+
+The ``virt-qemu-qmp-proxy`` program may be interrupted (eg Ctrl-C) when it
+is no longer required. The libvirt QEMU guest will continue running.
+
+
+OPTIONS
+=======
+
+*DOMAIN*
+
+The ID or UUID or Name of the libvirt QEMU guest.
+
+*QMP-SOCKET-PATH*
+
+The filesystem path at which to run the QMP server, listening for
+incoming connections.
+
+``-c`` *CONNECTION-URI*
+``--connect``\ =\ *CONNECTION-URI*
+
+The URI for the connection to the libvirt QEMU driver. If omitted,
+a URI will be auto-detected.
+
+``-v``, ``--verbose``
+
+Run in verbose mode, printing all QMP commands and replies that
+are handled.
+
+``-h``, ``--help``
+
+Display the command line help.
+
+
+EXIT STATUS
+===========
+
+Upon successful shutdown, an exit status of 0 will be set. Upon
+failure a non-zero status will be set.
+
+
+AUTHOR
+======
+
+Daniel P. Berrangé
+
+
+BUGS
+====
+
+Please report all bugs you discover. This should be done via either:
+
+#. the mailing list
+
+ `https://libvirt.org/contact.html <https://libvirt.org/contact.html>`_
+
+#. the bug tracker
+
+ `https://libvirt.org/bugs.html <https://libvirt.org/bugs.html>`_
+
+Alternatively, you may report bugs to your software distributor / vendor.
+
+COPYRIGHT
+=========
+
+Copyright (C) 2022 by Red Hat, Inc.
+
+
+LICENSE
+=======
+
+``virt-qemu-qmp-proxy`` is distributed under the terms of the GNU LGPL v2+.
+This is free software; see the source for copying conditions. There
+is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE
+
+
+SEE ALSO
+========
+
+virsh(1), `https://libvirt.org/ <https://libvirt.org/>`_,
+`QMP reference <https://www.qemu.org/docs/master/interop/qemu-qmp-ref.html>`_
diff --git a/libvirt.spec.in b/libvirt.spec.in
index 950c5a3b6d..3082285dc0 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -899,6 +899,15 @@ Obsoletes: libvirt-bash-completion < 7.3.0
The client binaries needed to access the virtualization
capabilities of recent versions of Linux (and other OSes).
+%package client-qemu
+Summary: Additional client side utilities for QEMU
+Requires: %{name}-libs = %{version}-%{release}
+Requires: python3-libvirt >= %{version}-%{release}
+
+%description client-qemu
+The additional client binaries are used to interact
+with some QEMU specific features of libvirt.
+
%package libs
Summary: Client side libraries
# So remote clients can access libvirt over SSH tunnel
@@ -2159,6 +2168,12 @@ exit 0
%{_datadir}/bash-completion/completions/virsh
+%if %{with_qemu}
+%files client-qemu
+%{_mandir}/man1/virt-qemu-qmp-proxy.1*
+%{_bindir}/virt-qemu-qmp-proxy
+%endif
+
%files libs -f %{name}.lang
%license COPYING COPYING.LESSER
%dir %attr(0700, root, root) %{_sysconfdir}/libvirt/
diff --git a/tools/meson.build b/tools/meson.build
index bb28a904dc..20509906af 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -320,6 +320,11 @@ if conf.has('WITH_LIBVIRTD')
endif
endif
+if conf.has('WITH_QEMU')
+ install_data('virt-qemu-qmp-proxy',
+ install_dir: bindir)
+endif
+
if bash_completion_dep.found()
subdir('bash-completion')
endif
diff --git a/tools/virt-qemu-qmp-proxy b/tools/virt-qemu-qmp-proxy
new file mode 100755
index 0000000000..d85342bd2b
--- /dev/null
+++ b/tools/virt-qemu-qmp-proxy
@@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+
+import argparse
+import array
+import libvirt
+import libvirt_qemu
+import os
+import re
+import socket
+import sys
+import traceback
+import json
+import fcntl
+
+
+debug = False
+
+def get_domain(uri, domstr):
+ conn = libvirt.open(uri)
+
+ dom = None
+ saveex = None
+
+ def try_lookup(cb):
+ try:
+ return cb()
+ except libvirt.libvirtError as ex:
+ nonlocal saveex
+ if saveex is None:
+ saveex = ex
+
+ if re.match(r'^\d+$', domstr):
+ dom = try_lookup(lambda: conn.lookupByID(int(domstr)))
+
+ if dom is None and re.match(r'^[-a-f0-9]{36}|[a-f0-9]{32}$', domstr):
+ dom = try_lookup(lambda: conn.lookupByUUIDString(domstr))
+
+ if dom is None:
+ dom = try_lookup(lambda: conn.lookupByName(domstr))
+
+ if dom is None:
+ raise saveex
+
+ if not dom.isActive():
+ raise Exception("Domain must be running to use QMP")
+
+ return conn, dom
+
+
+class QMPProxy(object):
+
+ def __init__(self, conn, dom, serversock, verbose):
+ self.conn = conn
+ self.dom = dom
+ self.verbose = verbose
+
+ self.serversock = serversock
+ self.serverwatch = 0
+
+ self.clientsock = None
+ self.clientwatch = 0
+
+ self.api2sock = bytes([])
+ self.api2sockfds = []
+
+ self.sock2api = bytes([])
+ self.sock2apifds = []
+
+ self.serverwatch = libvirt.virEventAddHandle(
+ self.serversock.fileno(), libvirt.VIR_EVENT_HANDLE_READABLE,
+ self.handle_server_io, self)
+
+ libvirt_qemu.qemuMonitorEventRegister(self.conn, self.dom,
+ None,
+ self.handle_qmp_event,
+ self)
+
+
+ @staticmethod
+ def handle_qmp_event(conn, dom, event, secs, usecs, details, self):
+ evdoc = {
+ "event": event,
+ "timestamp": {"seconds": secs, "microseconds": usecs }
+ }
+ if details is not None:
+ evdoc["data"] = details
+
+ ev = json.dumps(evdoc)
+ if self.verbose:
+ print(ev)
+ ev += "\r\n"
+ self.api2sock += ev.encode("utf-8")
+ self.update_client_events()
+
+
+ def recv_with_fds(self):
+ # Match VIR_NET_MESSAGE_NUM_FDS_MAX in virnetprotocol.x
+ maxfds = 32
+ fds = array.array('i')
+ cmsgdatalen = socket.CMSG_LEN(maxfds * fds.itemsize)
+
+ data, cmsgdata, flags, addr = self.clientsock.recvmsg(1024,
+ cmsgdatalen)
+ for cmsg_level, cmsg_type, cmsg_data in cmsgdata:
+ if (cmsg_level == socket.SOL_SOCKET and
+ cmsg_type == socket.SCM_RIGHTS):
+ fds.frombytes(cmsg_data[:len(cmsg_data) -
+ (len(cmsg_data) % fds.itemsize)])
+ else:
+ raise Exception("Unexpected CMSGDATA level %d type %d" % (
+ cmsg_level, cmsg_type))
+
+ return data, [self.make_file(fd) for fd in fds]
+
+
+ def send_with_fds(self, data, fds):
+ cfds = [fd.fileno() for fd in fds]
+
+ cmsgdata = [(socket.SOL_SOCKET, socket.SCM_RIGHTS,
+ array.array("i", cfds))]
+
+ return self.clientsock.sendmsg([data], cmsgdata)
+
+
+ @staticmethod
+ def make_file(fd):
+ flags = fcntl.fcntl(fd, fcntl.F_GETFL)
+
+ mask = os.O_RDONLY | os.O_WRONLY | os.O_RDWR | os.O_APPEND
+ flags = flags & mask
+ mode = ""
+ if flags == os.O_RDONLY:
+ mode = "rb"
+ elif flags == os.O_WRONLY:
+ mode = "wb"
+ elif flags == os.O_RDWR:
+ mode = "r+b"
+ elif flags == (os.O_WRONLY | os.O_APPEND):
+ mode = "ab"
+ elif flags == (os.O_RDWR | os.O_APPEND):
+ mode = "a+b"
+
+ return os.fdopen(fd, mode)
+
+
+ def add_client(self, sock):
+ ver = self.conn.getVersion()
+ major = int(ver / 1000000) % 1000
+ minor = int(ver / 1000) % 1000
+ micro = ver % 1000
+
+ greetingobj = {
+ "QMP": {
+ "version": {
+ "qemu": {
+ "major": major,
+ "minor": minor,
+ "micro": micro,
+ },
+ "package": f"qemu-{major}.{minor}.{micro}",
+ },
+ "capabilities": [
+ "oob"
+ ],
+ }
+ }
+ greeting = json.dumps(greetingobj)
+ if self.verbose:
+ print(greeting)
+ greeting += "\r\n"
+
+ self.clientsock = sock
+ self.clientwatch = libvirt.virEventAddHandle(
+ self.clientsock.fileno(), libvirt.VIR_EVENT_HANDLE_WRITABLE,
+ self.handle_client_io, self)
+ self.api2sock += greeting.encode("utf-8")
+ self.update_server_events()
+
+
+ def remove_client(self):
+ libvirt.virEventRemoveHandle(self.clientwatch)
+ self.clientsock.close()
+ self.clientsock = None
+ self.clientwatch = 0
+ self.update_server_events()
+
+ self.api2sock = bytes([])
+ self.api2sockfds = []
+
+ self.sock2api = bytes([])
+ self.sock2apifds = []
+
+
+ def update_client_events(self):
+ # For simplicity of tracking distinct QMP cmds and their passed FDs
+ # we don't try to support "pipelining", only a single cmd may be
+ # inflight
+ if len(self.api2sock) > 0:
+ events = libvirt.VIR_EVENT_HANDLE_WRITABLE
+ else:
+ events = libvirt.VIR_EVENT_HANDLE_READABLE
+
+ libvirt.virEventUpdateHandle(self.clientwatch, events)
+
+
+ def update_server_events(self):
+ if self.clientsock is not None:
+ libvirt.virEventUpdateHandle(self.serverwatch, 0)
+ else:
+ libvirt.virEventUpdateHandle(self.serverwatch,
+ libvirt.VIR_EVENT_HANDLE_READABLE)
+
+
+ def try_command(self):
+ try:
+ cmdstr = self.sock2api.decode("utf-8")
+ cmd = json.loads(cmdstr)
+
+ if self.verbose:
+ cmdstr = cmdstr.strip()
+ print(cmdstr)
+ except Exception as ex:
+ if debug:
+ print("Incomplete %s: %s" % ( self.sock2api, ex))
+ return
+
+ id = None
+ if "id" in cmd:
+ id = cmd["id"]
+ del cmd["id"]
+
+ if cmd.get("execute", "") == "qmp_capabilities":
+ resobj = {
+ "return": {},
+ }
+ resfds = []
+ else:
+ if hasattr(libvirt_qemu, "qemuMonitorCommandWithFiles"):
+ res, resfds = libvirt_qemu.qemuMonitorCommandWithFiles(
+ self.dom, json.dumps(cmd), [f.fileno() for f in self.sock2apifds])
+ resobj = json.loads(res)
+ else:
+ if len(self.sock2apifds) > 0:
+ raise Exception("FD passing not supported")
+ res = libvirt_qemu.qemuMonitorCommand(
+ self.dom, json.dumps(cmd))
+ resfds = []
+ resobj = json.loads(res)
+
+ if "id" in resobj:
+ del resobj["id"]
+ if id is not None:
+ resobj["id"] = id
+
+ res = json.dumps(resobj)
+ if self.verbose:
+ print(res)
+ res += "\r\n"
+
+ self.sock2api = bytes([])
+ self.sock2apifds = []
+ self.api2sock += res.encode("utf-8")
+ self.api2sockfds = resfds
+
+
+ @staticmethod
+ def handle_client_io(watch, fd, events, self):
+ error = False
+ try:
+ if events & libvirt.VIR_EVENT_HANDLE_WRITABLE:
+ done = self.send_with_fds(self.api2sock, self.api2sockfds)
+ if done > 0:
+ self.api2sock = self.api2sock[done:]
+ self.api2sockfds = []
+ elif events & libvirt.VIR_EVENT_HANDLE_READABLE:
+ data, fds = self.recv_with_fds()
+ if len(data) == 0:
+ error = True
+ else:
+ self.sock2api += data
+ if len(fds):
+ self.sock2apifds += fds
+
+ self.try_command()
+ else:
+ error = True
+ except Exception as e:
+ global debug
+ if debug:
+ print("%s: %s" % (sys.argv[0], str(e)))
+ print(traceback.format_exc())
+ error = True
+
+ if error:
+ self.remove_client()
+ else:
+ self.update_client_events()
+
+
+ @staticmethod
+ def handle_server_io(watch, fd, events, self):
+ if self.clientsock is None:
+ sock, addr = self.serversock.accept()
+ self.add_client(sock)
+ else:
+ self.update_server_events()
+
+
+def parse_commandline():
+ parser = argparse.ArgumentParser(description="Libvirt QMP proxy")
+ parser.add_argument("--connect", "-c",
+ help="Libvirt QEMU driver connection URI")
+ parser.add_argument("--debug", "-d", action='store_true',
+ help="Display debugging information")
+ parser.add_argument("--verbose", "-v", action='store_true',
+ help="Display QMP traffic")
+ parser.add_argument("domain", metavar="DOMAIN",
+ help="Libvirt guest domain ID/UUID/Name")
+ parser.add_argument("sockpath", metavar="QMP-SOCK-PATH",
+ help="UNIX socket path for QMP server")
+
+ return parser.parse_args()
+
+
+def main():
+ args = parse_commandline()
+ global debug
+ debug = args.debug
+
+ if not debug:
+ libvirt.registerErrorHandler(lambda opaque, error: None, None)
+
+ libvirt.virEventRegisterDefaultImpl()
+
+ conn, dom = get_domain(args.connect, args.domain)
+
+ if conn.getType() != "QEMU":
+ raise Exception("QMP proxy requires a QEMU driver connection not %s" %
+ conn.getType())
+
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ if os.path.exists(args.sockpath):
+ os.unlink(args.sockpath)
+ sock.bind(args.sockpath)
+ sock.listen(1)
+
+ proxy = QMPProxy(conn, dom, sock, args.verbose)
+
+ while True:
+ libvirt.virEventRunDefaultImpl()
+
+
+try:
+ main()
+ sys.exit(0)
+except Exception as e:
+ print("%s: %s" % (sys.argv[0], str(e)))
+ if debug:
+ print(traceback.format_exc())
+ sys.exit(1)
--
2.37.3
1 year, 11 months
[PATCH 0/7] qemu: support updating device's bootindex
by Jiang Jiacheng
Support updating device's(support cdrom, disk and network) bootindex
online in virDomainUpdateDeviceFlags. The new bootindex will take effect
after guest rebooting. Enable bootindex can be set to 0, it means cancel
the device's bootindex.
To use this feature, we need to get the device's xml first and modify
the boot order in the xml, then use 'virsh update-device <domain> <xml>
--flag' to update the bootindex. Note that the flag should be --config
or --persistent if the vm is running.
diff to v1:
* add a bool bootIndexSpecified in device info to indicates that the
bootorder of the device can be set.
* use boot order = '0' to cancel the previous bootorder instead of '-1'
Jiang Jiacheng (7):
qemu: Introduce qemuDomainChangeBootIndex API
qemu: Add bootIndexSpecified and support set bootIndex to '0'
qemu: Introduce qemuCheckBootIndex and qemuChangeDiskBootIndex API
qemu: Support update disk's bootindex
qemu: Support update net's bootindex
qemu: Support add bootindex = 0 to boothash when its
bootIndexSpecified is true
qemu: Reserve bootIndexSpecified when update device
src/conf/device_conf.h | 3 +
src/conf/domain_conf.c | 9 ++-
src/conf/domain_postparse.c | 8 +-
src/qemu/qemu_conf.c | 140 +++++++++++++++++++++++++++++++++++
src/qemu/qemu_conf.h | 16 ++++
src/qemu/qemu_domain.c | 3 +-
src/qemu/qemu_driver.c | 35 +++++++++
src/qemu/qemu_hotplug.c | 17 +++--
src/qemu/qemu_monitor.c | 20 +++++
src/qemu/qemu_monitor.h | 6 ++
src/qemu/qemu_monitor_json.c | 33 +++++++++
src/qemu/qemu_monitor_json.h | 6 ++
12 files changed, 286 insertions(+), 10 deletions(-)
--
2.33.0
1 year, 11 months
[PATCH 0/7] Sanitize logic to get live/config device definitions for unplug/update
by Peter Krempa
Convert code *DomainUpdateDeviceFlags/*DomainDetachDeviceLiveAndConfig
to avoid use of virDomainDeviceDefCopy. We have the original XML string
from the user so it doesn't make sense to parse it and then copy it
(whcih involves formatting and parsing back), when we can simply parse
it twice, saving the extra formatting step.
Peter Krempa (7):
qemu: driver: Fix formatting of function headers around
qemuDomainAttachDevice
qemuDomainUpdateDeviceFlags: Parse XML twice rather than use
virDomainDeviceDefCopy
qemuDomainDetachDeviceLiveAndConfig: Parse XML twice rather than use
virDomainDeviceDefCopy
qemuDomainDetachDeviceLiveAndConfig: Refactor cleanup
lxcDomainAttachDeviceFlags: Parse XML twice rather than use
virDomainDeviceDefCopy
lxcDomainDetachDeviceFlags: Parse XML twice rather than use
virDomainDeviceDefCopy
conf: domain: Remove virDomainDeviceDefCopy
src/conf/domain_conf.c | 121 ---------------------------------------
src/conf/domain_conf.h | 4 --
src/libvirt_private.syms | 1 -
src/lxc/lxc_driver.c | 75 ++++++++++--------------
src/qemu/qemu_driver.c | 97 +++++++++++++------------------
5 files changed, 70 insertions(+), 228 deletions(-)
--
2.37.3
1 year, 11 months
[PATCH v3] qemu: Don't report spurious errors from vCPU tid validation on hotunplug timeout
by Peter Krempa
From: Shaleen Bathla <shaleen.bathla(a)oracle.com>
Use of qemuDomainValidateVcpuInfo in the helpers for hotplug and unplug
of vCPUs can lead to spurious errors reported such as:
internal error: qemu didn't report thread id for vcpu 'XX'"
The reason for this is that qemuDomainValidateVcpuInfo validates the
state of all vCPUs against the expected state of vCPUs. If an unplug
operation completed before libvirt was unable to process it yet the
expected state could not reflect the current state.
To avoid spurious errors the qemuDomainHotplugAddVcpu and
qemuDomainRemoveVcpu functions are modified to do localized validation
only for the vCPUs they actually modify.
We also now ensure that the cgroups are modified before bailing out on
error for any vCPUs which passed validation.
Additionally in order for qemuDomainRemoveVcpuAlias to be able to find
the unplugged vCPU we must ensure that qemuDomainRefreshVcpuInfo does
not clear out the alias in case when the vCPU is no longer reported by
qemu.
Co-authored-by: Partha Satapathy <partha.satapathy(a)oracle.com>
Signed-off-by: Shaleen Bathla <shaleen.bathla(a)oracle.com>
Signed-off-by: Peter Krempa <pkrempa(a)redhat.com>
---
v3 addresses my review feedback of the original patch, as well as
rewrites the commit message for more clarity.
src/qemu/qemu_domain.c | 6 +++--
src/qemu/qemu_hotplug.c | 53 ++++++++++++++++++++++++-----------------
2 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index ef1a9c8c74..64ebec626c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -9795,8 +9795,10 @@ qemuDomainRefreshVcpuInfo(virDomainObj *vm,
vcpupriv->vcpus = info[i].vcpus;
VIR_FREE(vcpupriv->type);
vcpupriv->type = g_steal_pointer(&info[i].type);
- VIR_FREE(vcpupriv->alias);
- vcpupriv->alias = g_steal_pointer(&info[i].alias);
+ if (info[i].alias) {
+ VIR_FREE(vcpupriv->alias);
+ vcpupriv->alias = g_steal_pointer(&info[i].alias);
+ }
virJSONValueFree(vcpupriv->props);
vcpupriv->props = g_steal_pointer(&info[i].props);
vcpupriv->enable_id = info[i].id;
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index da92ced2f4..6e300f547c 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -6088,38 +6088,37 @@ qemuDomainRemoveVcpu(virDomainObj *vm,
qemuDomainVcpuPrivate *vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);
int oldvcpus = virDomainDefGetVcpus(vm->def);
unsigned int nvcpus = vcpupriv->vcpus;
- virErrorPtr save_error = NULL;
size_t i;
+ ssize_t offlineVcpuWithTid = -1;
if (qemuDomainRefreshVcpuInfo(vm, VIR_ASYNC_JOB_NONE, false) < 0)
return -1;
- /* validation requires us to set the expected state prior to calling it */
for (i = vcpu; i < vcpu + nvcpus; i++) {
vcpuinfo = virDomainDefGetVcpu(vm->def, i);
- vcpuinfo->online = false;
+ vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);
+
+ if (vcpupriv->tid == 0) {
+ vcpuinfo->online = false;
+ /* Clear the alias as VCPU is now unplugged */
+ VIR_FREE(vcpupriv->alias);
+ ignore_value(virCgroupDelThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i));
+ } else {
+ if (offlineVcpuWithTid == -1)
+ offlineVcpuWithTid = i;
+ }
}
- if (qemuDomainValidateVcpuInfo(vm) < 0) {
- /* rollback vcpu count if the setting has failed */
+ if (offlineVcpuWithTid != -1) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("qemu reported thread id for inactive vcpu '%zu'"),
+ offlineVcpuWithTid);
virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", false);
-
- for (i = vcpu; i < vcpu + nvcpus; i++) {
- vcpuinfo = virDomainDefGetVcpu(vm->def, i);
- vcpuinfo->online = true;
- }
return -1;
}
virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", true);
- virErrorPreserveLast(&save_error);
-
- for (i = vcpu; i < vcpu + nvcpus; i++)
- ignore_value(virCgroupDelThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i));
-
- virErrorRestore(&save_error);
-
return 0;
}
@@ -6141,6 +6140,9 @@ qemuDomainRemoveVcpuAlias(virDomainObj *vm,
return;
}
}
+
+ VIR_DEBUG("vcpu '%s' not found in vcpulist of domain '%s'",
+ alias, vm->def->name);
}
@@ -6209,6 +6211,7 @@ qemuDomainHotplugAddVcpu(virQEMUDriver *driver,
int rc;
int oldvcpus = virDomainDefGetVcpus(vm->def);
size_t i;
+ bool vcpuTidMissing = false;
if (!qemuDomainSupportsNewVcpuHotplug(vm)) {
virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
@@ -6238,20 +6241,26 @@ qemuDomainHotplugAddVcpu(virQEMUDriver *driver,
if (qemuDomainRefreshVcpuInfo(vm, VIR_ASYNC_JOB_NONE, false) < 0)
return -1;
- /* validation requires us to set the expected state prior to calling it */
for (i = vcpu; i < vcpu + nvcpus; i++) {
vcpuinfo = virDomainDefGetVcpu(vm->def, i);
vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);
vcpuinfo->online = true;
- if (vcpupriv->tid > 0 &&
- qemuProcessSetupVcpu(vm, i, true) < 0)
- return -1;
+ if (vcpupriv->tid > 0) {
+ if (qemuProcessSetupVcpu(vm, i, true) < 0) {
+ return -1;
+ }
+ } else {
+ vcpuTidMissing = true;
+ }
}
- if (qemuDomainValidateVcpuInfo(vm) < 0)
+ if (vcpuTidMissing && qemuDomainHasVcpuPids(vm)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("qemu didn't report thread id for vcpu '%zu'"), i);
return -1;
+ }
qemuDomainVcpuPersistOrder(vm->def);
--
2.37.3
1 year, 11 months