[libvirt] [PATCH] qemu: Do not override config XML in case of snapshot revert
by Maxiwell S. Garcia
Snapshot create operation saves the live XML and uses it to replace the
domain definition in case of revert. But the VM config XML is not saved
and the revert operation does not address this issue. This commit
prevents the config XML from being overridden by snapshot definition.
An active domain stores both current and new definitions. The current
definition (vm->def) stores the live XML and the new definition
(vm->newDef) stores the config XML. In an inactive domain, only the
config XML is persistent, and it's saved in vm->def.
The revert operation uses the virDomainObjAssignDef() to set the
snapshot definition in vm->newDef, if domain is active, or in vm->def
otherwise. But before that, it saves the old value to return to
caller. This return is used here to restore the config XML after
all snapshot startup process finish.
Signed-off-by: Maxiwell S. Garcia <maxiwell(a)linux.ibm.com>
---
src/qemu/qemu_driver.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index b2ac737d1f..a73122454a 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -16251,6 +16251,7 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
qemuDomainObjPrivatePtr priv;
int rc;
virDomainDefPtr config = NULL;
+ virDomainDefPtr inactiveConfig = NULL;
virQEMUDriverConfigPtr cfg = NULL;
virCapsPtr caps = NULL;
bool was_stopped = false;
@@ -16465,7 +16466,7 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
goto endjob;
}
if (config) {
- virDomainObjAssignDef(vm, config, false, NULL);
+ virDomainObjAssignDef(vm, config, false, &inactiveConfig);
virCPUDefFree(priv->origCPU);
VIR_STEAL_PTR(priv->origCPU, origCPU);
}
@@ -16474,7 +16475,7 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
load:
was_stopped = true;
if (config)
- virDomainObjAssignDef(vm, config, false, NULL);
+ virDomainObjAssignDef(vm, config, false, &inactiveConfig);
/* No cookie means libvirt which saved the domain was too old to
* mess up the CPU definitions.
@@ -16533,6 +16534,9 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
detail);
}
}
+ if (inactiveConfig)
+ VIR_STEAL_PTR(vm->newDef, inactiveConfig);
+
break;
case VIR_DOMAIN_SNAPSHOT_SHUTDOWN:
@@ -16560,8 +16564,11 @@ qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot,
qemuProcessEndJob(driver, vm);
goto cleanup;
}
- if (config)
- virDomainObjAssignDef(vm, config, false, NULL);
+ if (config) {
+ virDomainObjAssignDef(vm, config, false, &inactiveConfig);
+ if (inactiveConfig)
+ VIR_STEAL_PTR(vm->newDef, inactiveConfig);
+ }
if (flags & (VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING |
VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED)) {
--
2.20.1
5 years, 7 months
[libvirt] [PATCH 0/2] introduction of version attribute for VFIO live migration
by Yan Zhao
This patchset introduces a version attribute under sysfs of VFIO Mediated
devices.
This version attribute is used by user space software like libvirt to
determine whether two mdev devices are compatible for live migration
before starting live migration.
Patch 1 defines version attribute as mandatory for VFIO live migration. It
means if version attribute is missing or it returns errno, the
corresponding mdev device is regarded as not supporting live migration.
samples for vfio-mdev are modified to demonstrate it.
Patch 2 uses GVT as an example to show how to expose version attribute and
check device compatibility in vendor driver.
Yan Zhao (2):
vfio/mdev: add version field as mandatory attribute for mdev device
drm/i915/gvt: export mdev device version to sysfs for Intel vGPU
Documentation/vfio-mediated-device.txt | 36 +++++++++
drivers/gpu/drm/i915/gvt/Makefile | 2 +-
drivers/gpu/drm/i915/gvt/device_version.c | 94 +++++++++++++++++++++++
drivers/gpu/drm/i915/gvt/gvt.c | 55 +++++++++++++
drivers/gpu/drm/i915/gvt/gvt.h | 6 ++
samples/vfio-mdev/mbochs.c | 17 ++++
samples/vfio-mdev/mdpy.c | 16 ++++
samples/vfio-mdev/mtty.c | 16 ++++
8 files changed, 241 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/i915/gvt/device_version.c
--
2.17.1
5 years, 7 months
[libvirt] [PATCH 0/2] Avoid issues due to qemu dropping osxsave and ospke
by Christian Ehrhardt
Hi,
this series tries to address a drop of commandline options by qemu in regard to
osxsave [1] and ospke [2].
This was already discussed in [3] late last year but got forgotten afterwards.
The Ubuntu bug is at [4] and an older Fedora bug is at [5].
TL;DR:
- osxsave/ospke features were never really configurable
- KVM never returned the bits on GET_SUPPORTED_CPUID
- very rare to be seen in the wild
- avoid issues with newer qemu and old/odd XMLs to be sure
Details:
I checked various use cases from virt-install to openstack and some in between.
The only cases I found that would define osxsave/ospke is virt-install pior
to version 2.0 and even there only when used with --cpu=host-model or
--cpu=host-copy.
If you ever really enabled the feature you'd have got:
error: the CPU is incompatible with host CPU:
Host CPU does not provide required features: ospke
The problem lies in domain XMLs that explicitly disable it. That would be
<feature policy='disable' name='osxsave'/>
But due to almost (or actually none) no host exposing this the following
also triggers:
<feature policy='optional' name='ospke'/>
This will make libvirt add it to the qemu commandline like:
-cpu ...,osxsave=off,ospke=off
And that will crash when qemu starts with:
error: internal error: process exited while connecting to monitor:
2019-04-25T12:12:01.698646Z qemu-system-x86_64: can't apply global
core2duo-x86_64-cpu.osxsave=off: Property '.osxsave' not found
There are much more long term discussions about demoting and dropping qemu
features and I'd like to avoid those discussions being mixed.
The reason to drop it more or less without notice was that it never did
anything to begin with. Due to that our solution might in a similar fashion
be more trivial - just stop defining those two features to qemu commandline.
[1]: https://git.qemu.org/?p=qemu.git;a=commit;h=f1a23522b03a569f13aad49294bb4...
[2]: https://git.qemu.org/?p=qemu.git;a=commit;h=9ccb9784b57804f5c74434ad6ccb6...
[3]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg561877.html
[4]: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1825195
[5]: https://bugzilla.redhat.com/show_bug.cgi?id=1644848
Christian Ehrhardt (2):
qemu: do not define known no-op features
qemuxml2argvtest: add test for remove cpu features
src/qemu/qemu_command.c | 23 +++++++++++++++
.../qemuxml2argvdata/cpu-host-model-cmt.args | 2 +-
.../cpu-no-removed-features.args | 29 +++++++++++++++++++
.../cpu-no-removed-features.xml | 23 +++++++++++++++
tests/qemuxml2argvdata/cpu-tsc-frequency.args | 4 +--
tests/qemuxml2argvtest.c | 1 +
6 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 tests/qemuxml2argvdata/cpu-no-removed-features.args
create mode 100644 tests/qemuxml2argvdata/cpu-no-removed-features.xml
--
2.17.1
5 years, 7 months
[libvirt] [PATCH RFC] network: Delay creating private chains until starting network
by Jim Fehlig
Automated performance tests found that network-centric workloads suffered
a 20 percent decrease when the host libvirt was updated from 5.0.0 to
5.1.0. On the test hosts libvirtd is enabled to start at boot and the
"default" network is defined, but it is not set to autostart.
libvirt 5.1.0 introduced private firewall chains with commit 5f1e6a7d.
The chains are created at libvirtd start, which triggers loading the
conntrack kernel module. Subsequent tracking and processing of
connections resulted in the performance hit. Prior to commit 5f1e6a7d
the conntrack module would not be loaded until starting a network,
when libvirt added rules to the builtin chains.
Restore the behavior of previous libvirt versions by delaying
the creation of private chains until the first network is started.
Signed-off-by: Jim Fehlig <jfehlig(a)suse.com>
---
I briefly discussed this issue with Daniel on IRC and just now finding
time to bring it to the list for broader discussion. The adjustment to
the test file feels hacky. The whole approach might by hacky, hence the
RFC.
src/network/bridge_driver_linux.c | 64 +++-------
.../nat-default-linux.args | 116 ++++++++++++++++++
2 files changed, 131 insertions(+), 49 deletions(-)
diff --git a/src/network/bridge_driver_linux.c b/src/network/bridge_driver_linux.c
index f2827543ca..a3a09caeba 100644
--- a/src/network/bridge_driver_linux.c
+++ b/src/network/bridge_driver_linux.c
@@ -35,44 +35,10 @@ VIR_LOG_INIT("network.bridge_driver_linux");
#define PROC_NET_ROUTE "/proc/net/route"
-static virErrorPtr errInitV4;
-static virErrorPtr errInitV6;
+static bool pvtChainsCreated;
void networkPreReloadFirewallRules(bool startup)
{
- bool created = false;
- int rc;
-
- /* We create global rules upfront as we don't want
- * the perf hit of conditionally figuring out whether
- * to create them each time a network is started.
- *
- * Any errors here are saved to be reported at time
- * of starting the network though as that makes them
- * more likely to be seen by a human
- */
- rc = iptablesSetupPrivateChains(VIR_FIREWALL_LAYER_IPV4);
- if (rc < 0) {
- errInitV4 = virSaveLastError();
- virResetLastError();
- } else {
- virFreeError(errInitV4);
- errInitV4 = NULL;
- }
- if (rc)
- created = true;
-
- rc = iptablesSetupPrivateChains(VIR_FIREWALL_LAYER_IPV6);
- if (rc < 0) {
- errInitV6 = virSaveLastError();
- virResetLastError();
- } else {
- virFreeError(errInitV6);
- errInitV6 = NULL;
- }
- if (rc)
- created = true;
-
/*
* If this is initial startup, and we just created the
* top level private chains we either
@@ -86,8 +52,8 @@ void networkPreReloadFirewallRules(bool startup)
* rules will be present. Thus we can safely just tell it
* to always delete from the builin chain
*/
- if (startup && created)
- iptablesSetDeletePrivate(false);
+ if (startup)
+ iptablesSetDeletePrivate(true);
}
@@ -701,19 +667,19 @@ int networkAddFirewallRules(virNetworkDefPtr def)
virFirewallPtr fw = NULL;
int ret = -1;
- if (errInitV4 &&
- (virNetworkDefGetIPByIndex(def, AF_INET, 0) ||
- virNetworkDefGetRouteByIndex(def, AF_INET, 0))) {
- virSetError(errInitV4);
- return -1;
- }
+ if (!pvtChainsCreated) {
+ if (iptablesSetupPrivateChains(VIR_FIREWALL_LAYER_IPV4) < 0 &&
+ (virNetworkDefGetIPByIndex(def, AF_INET, 0) ||
+ virNetworkDefGetRouteByIndex(def, AF_INET, 0)))
+ return -1;
- if (errInitV6 &&
- (virNetworkDefGetIPByIndex(def, AF_INET6, 0) ||
- virNetworkDefGetRouteByIndex(def, AF_INET6, 0) ||
- def->ipv6nogw)) {
- virSetError(errInitV6);
- return -1;
+ if (iptablesSetupPrivateChains(VIR_FIREWALL_LAYER_IPV6) < 0 &&
+ (virNetworkDefGetIPByIndex(def, AF_INET6, 0) ||
+ virNetworkDefGetRouteByIndex(def, AF_INET6, 0) ||
+ def->ipv6nogw))
+ return -1;
+
+ pvtChainsCreated = true;
}
if (def->bridgeZone) {
diff --git a/tests/networkxml2firewalldata/nat-default-linux.args b/tests/networkxml2firewalldata/nat-default-linux.args
index c9d523d043..61d620b592 100644
--- a/tests/networkxml2firewalldata/nat-default-linux.args
+++ b/tests/networkxml2firewalldata/nat-default-linux.args
@@ -1,5 +1,121 @@
iptables \
--table filter \
+--list-rules
+iptables \
+--table nat \
+--list-rules
+iptables \
+--table mangle \
+--list-rules
+iptables \
+--table filter \
+--new-chain LIBVIRT_INP
+iptables \
+--table filter \
+--insert INPUT \
+--jump LIBVIRT_INP
+iptables \
+--table filter \
+--new-chain LIBVIRT_OUT
+iptables \
+--table filter \
+--insert OUTPUT \
+--jump LIBVIRT_OUT
+iptables \
+--table filter \
+--new-chain LIBVIRT_FWO
+iptables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWO
+iptables \
+--table filter \
+--new-chain LIBVIRT_FWI
+iptables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWI
+iptables \
+--table filter \
+--new-chain LIBVIRT_FWX
+iptables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWX
+iptables \
+--table nat \
+--new-chain LIBVIRT_PRT
+iptables \
+--table nat \
+--insert POSTROUTING \
+--jump LIBVIRT_PRT
+iptables \
+--table mangle \
+--new-chain LIBVIRT_PRT
+iptables \
+--table mangle \
+--insert POSTROUTING \
+--jump LIBVIRT_PRT
+ip6tables \
+--table filter \
+--list-rules
+ip6tables \
+--table nat \
+--list-rules
+ip6tables \
+--table mangle \
+--list-rules
+ip6tables \
+--table filter \
+--new-chain LIBVIRT_INP
+ip6tables \
+--table filter \
+--insert INPUT \
+--jump LIBVIRT_INP
+ip6tables \
+--table filter \
+--new-chain LIBVIRT_OUT
+ip6tables \
+--table filter \
+--insert OUTPUT \
+--jump LIBVIRT_OUT
+ip6tables \
+--table filter \
+--new-chain LIBVIRT_FWO
+ip6tables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWO
+ip6tables \
+--table filter \
+--new-chain LIBVIRT_FWI
+ip6tables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWI
+ip6tables \
+--table filter \
+--new-chain LIBVIRT_FWX
+ip6tables \
+--table filter \
+--insert FORWARD \
+--jump LIBVIRT_FWX
+ip6tables \
+--table nat \
+--new-chain LIBVIRT_PRT
+ip6tables \
+--table nat \
+--insert POSTROUTING \
+--jump LIBVIRT_PRT
+ip6tables \
+--table mangle \
+--new-chain LIBVIRT_PRT
+ip6tables \
+--table mangle \
+--insert POSTROUTING \
+--jump LIBVIRT_PRT
+iptables \
+--table filter \
--insert LIBVIRT_INP \
--in-interface virbr0 \
--protocol tcp \
--
2.21.0
5 years, 7 months
[libvirt] [PATCH 00/12] qemu: Refactor image permission/lock setting (blockdev-add saga)
by Peter Krempa
With new blockjob handling we'll need to modify permissions for chains
and individual images. The individual image code was universally
accessible but the chain setting code reimplemented it mostly only in
qemu_hotplug.h.
Refactor the handling by moving the code to qemu_domain.c and making it
universal.
Peter Krempa (12):
qemu: Rename qemuDomainDiskChainElement(Revoke|Prepare)
qemu: Move and rename qemuHotplugPrepareDiskSourceAccess
qemu: Split entry points to qemuDomainStorageSourceChainAccessPrepare
qemu: domain: Rename qemuDomainStorageSourceChainAccessPrepare
qemu: Convert boolean flags to enum flags in
qemuDomainStorageSourceAccessModify
qemu: Allow using qemuDomainStorageSourceAccessModify on singe images
qemu: Refactor/simplify qemuDomainStorageSourceAccessRevoke
qemu: Allow forcing read-only mode in
qemuDomainStorageSourceAccessModify
qemu: Use bools rather than labels in
qemuDomainStorageSourceAccessModify
qemu: Allow skipping the revoke step in
qemuDomainStorageSourceAccessModify
qemu: Mark when modifying access to existing source in
qemuDomainStorageSourceAccessModify
qemu: Refactor/simplify qemuDomainStorageSourceAccessAllow
src/qemu/qemu_domain.c | 212 +++++++++++++++++++++++++++++++---------
src/qemu/qemu_domain.h | 23 +++--
src/qemu/qemu_driver.c | 24 ++---
src/qemu/qemu_hotplug.c | 80 ++-------------
4 files changed, 200 insertions(+), 139 deletions(-)
--
2.20.1
5 years, 7 months
[libvirt] [PATCH v6] tests: perform cross compiler builds on GitLab CI
by Daniel P. Berrangé
GitLab CI provides some shared build runners that use Docker containers.
This resource can usefully run cross-compiled builds since all other CI
build testing is currently x86 only, and Travis CI is already very busy
testing native builds.
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
.gitlab-ci.yml | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
create mode 100644 .gitlab-ci.yml
Changed in v6:
- Run jobs half & half on 9 vs sid
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000..a8a8581d9e
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,46 @@
+.job_template: &job_definition
+ script:
+ - mkdir vpath
+ - cd vpath
+ - ../autogen.sh $CONFIGURE_OPTS
+ - make -j $(getconf _NPROCESSORS_ONLN)
+
+# We could run every arch on both versions, but it is a little
+# overkill. Instead we run half the jobs on 9 and half the jobs
+# on sid to give reasonable cross-coverage.
+
+debian-9-cross-armv6l:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-9-cross-armv6l:master
+
+debian-9-cross-mipsel:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-9-cross-mipsel:master
+
+debian-9-cross-ppc64le:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-9-cross-ppc64le:master
+
+debian-9-cross-s390x:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-9-cross-s390x:master
+
+debian-sid-cross-aarch64:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-sid-cross-aarch64:master
+
+debian-sid-cross-armv7l:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-sid-cross-armv7l:master
+
+debian-sid-cross-i686:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-sid-cross-i686:master
+
+debian-sid-cross-mips64el:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-sid-cross-mips64el:master
+
+debian-sid-cross-mips:
+ <<: *job_definition
+ image: quay.io/libvirt/buildenv-debian-sid-cross-mips:master
--
2.20.1
5 years, 7 months
[libvirt] [PATCH 00/10] build: drop some old libs and relax GNU strictness
by Ján Tomko
While testing these changes I found that the build is broken with old
rbd on (unsupported) Debian 8. Fix the build by dropping support for
old rbd.
Ján Tomko (10):
rbd: depend on diff_iterate2
rbd: drop support for ceph 0.94
build: drop check for SASL1
build: require yajl >= 2.0.3
json: assume WITH_YAJL2
build: remove WITH_YAJL2
virJSONValueToString: bail out early on error
configure: split AM_INIT_AUTOMAKE into multiple lines
configure.ac: drop -Wno-obsolete from AM_INIT_AUTOMAKE
configure.ac: add foreign to AM_INIT_AUTOMAKE
config-post.h | 1 -
configure.ac | 12 ++++--
m4/virt-driver-qemu.m4 | 2 +-
m4/virt-sasl.m4 | 5 +--
m4/virt-storage-rbd.m4 | 2 +-
m4/virt-yajl.m4 | 5 +--
src/storage/storage_backend_rbd.c | 39 -------------------
src/util/virjson.c | 63 +++----------------------------
8 files changed, 19 insertions(+), 110 deletions(-)
--
2.19.2
5 years, 7 months
[libvirt] [PATCH 0/5] tests: Refresh/add capabilities for QEMU 4.0.0
by Andrea Bolognani
Now that it's officially out, we can refresh existing capabilities
created from git snapshots and introduce them for the architectures
where they were missing altogether.
This series covers all architectures except for s390x, to which I
don't have convenient access: Pino promised me he'd take care of that
one in a few days.
As usual for this kind of series, most patches have been snipped with
extreme prejudice in order to make them small enough that they
wouldn't end up in the moderation queue: the unabridged version can
be found at
https://github.com/andreabolognani/libvirt
in the 'qemucaps-4.0.0' branch.
It'd be great if we could sneak these in during the freeze so that I
won't have to possibly regenerate them again after the post-release
merge flood ;)
Andrea Bolognani (5):
tests: Refresh capabilities for QEMU 4.0.0 on x86_64
tests: Refresh capabilities for QEMU 4.0.0 on riscv32
tests: Refresh capabilities for QEMU 4.0.0 on riscv64
tests: Add capabilities for QEMU 4.0.0 on aarch64
tests: Add capabilities for QEMU 4.0.0 on ppc64
.../qemu_4.0.0.x86_64.xml | 3 +-
...v32.replies => caps_4.0.0.aarch64.replies} | 6997 ++--
.../caps_4.0.0.aarch64.xml | 310 +
...scv32.replies => caps_4.0.0.ppc64.replies} | 29886 ++++++++++------
.../qemucapabilitiesdata/caps_4.0.0.ppc64.xml | 1077 +
.../caps_4.0.0.riscv32.replies | 10 +-
.../caps_4.0.0.riscv32.xml | 4 +-
.../caps_4.0.0.riscv64.replies | 10 +-
.../caps_4.0.0.riscv64.xml | 4 +-
.../caps_4.0.0.x86_64.replies | 3331 +-
.../caps_4.0.0.x86_64.xml | 38 +-
...arch64-os-firmware-efi.aarch64-latest.args | 2 +-
.../aarch64-virt-graphics.aarch64-latest.args | 2 +-
.../aarch64-virt-headless.aarch64-latest.args | 2 +-
14 files changed, 25862 insertions(+), 15814 deletions(-)
copy tests/qemucapabilitiesdata/{caps_4.0.0.riscv32.replies => caps_4.0.0.aarch64.replies} (86%)
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.aarch64.xml
copy tests/qemucapabilitiesdata/{caps_4.0.0.riscv32.replies => caps_4.0.0.ppc64.replies} (71%)
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.ppc64.xml
--
2.20.1
5 years, 7 months
[libvirt] [PATCH 0/3] Couple of virBuffer fixes
by Michal Privoznik
Almost trivial.
Michal Prívozník (3):
virbuffer: Don't leak memory in virBufferAddBuffer
virbuffer: Use signed integer for storing error
virBuffer: Try harder to free buffer
src/util/virbuffer.c | 7 +++----
src/util/virbuffer.h | 2 +-
tests/virbuftest.c | 32 ++++++++++++++++++++++++++++++++
3 files changed, 36 insertions(+), 5 deletions(-)
--
2.21.0
5 years, 7 months
[libvirt] [PATCH] RFC: use a slirp helper process
by marcandre.lureau@redhat.com
From: Marc-André Lureau <marcandre.lureau(a)redhat.com>
I am throwing this away for discussions, and early feedback.
With the upcoming release of libslirp[1], we have an opportunity to
run SLIRP networking in a separate process. This will allow for
stricter security policies for both qemu & slirp, as slirp is
notoriously not very safe (discussed on ML, various CVEs, and even the
code says so explicitly in the comments), yet people rely on it regularly.
For network type "user", libvirt could spawn an helper process (an
experimental version is [2]). It would setup a socket pair between
qemu and the helper and use -net socket. qemu could then be built
without libslirp! (imho, qemu should deprecate built-in -net user
support, and doesn't need to grow its own sub-process handling)
This libvirt patch is a bit rough, I am not sure where things should
go. In particular, how to manage the subprocesses properly (security
aspects, and lifecycle etc), and how to deal with migration (prevent
migrating from non-helper to helper version etc).
It seems guestfwd has been non-functional for a while. This is also
something that the current experimental helper doesn't support
atm. Doing all the various "channel" types that libvirt/qemu support
would be tedious, and probably unnecessary. But the underlying
libslirp library support redirections the same way qemu does today, so
it could be added if necessary.
At this point, the slirp-helper doesn't handle migration. But is it
really worth it? From a guest POV, it would look like packet lost or
disconnection. Adding migration handling is possible, to avoid a
disconnection event. How should it be done? via a libvirt provided
socket for storage? via its own file transferred by libvirt? via qemu
somehow (qemu wouldn't really know about the external process but
would copy around the migration bits?).
Does the helper need to have a "standard" & capabilities, similar to
what is proposed for vhost-user [3]? This would allow for easier
competing implementation to emerge (I have plans in mind, not using
libslirp).
Thanks for the feedback!
[1] https://gitlab.freedesktop.org/slirp/libslirp
[2] https://github.com/elmarco/libslirp-rs
[3] https://git.qemu.org/?p=qemu.git;a=blob;f=docs/interop/vhost-user.json
Signed-off-by: Marc-André Lureau <marcandre.lureau(a)redhat.com>
---
m4/virt-driver-qemu.m4 | 5 ++
src/qemu/libvirtd_qemu.aug | 1 +
src/qemu/qemu.conf | 3 ++
src/qemu/qemu_capabilities.c | 6 +++
src/qemu/qemu_capabilities.h | 1 +
src/qemu/qemu_command.c | 38 +++++++++++---
src/qemu/qemu_command.h | 3 +-
src/qemu/qemu_conf.c | 7 ++-
src/qemu/qemu_conf.h | 1 +
src/qemu/qemu_domain.c | 11 ++++
src/qemu/qemu_domain.h | 3 ++
src/qemu/qemu_hotplug.c | 5 +-
src/qemu/qemu_interface.c | 80 ++++++++++++++++++++++++++++++
src/qemu/qemu_interface.h | 6 +++
src/qemu/test_libvirtd_qemu.aug.in | 1 +
15 files changed, 161 insertions(+), 10 deletions(-)
diff --git a/m4/virt-driver-qemu.m4 b/m4/virt-driver-qemu.m4
index a1d05bbd7f..705b1f592b 100644
--- a/m4/virt-driver-qemu.m4
+++ b/m4/virt-driver-qemu.m4
@@ -105,6 +105,11 @@ AC_DEFUN([LIBVIRT_DRIVER_CHECK_QEMU], [
[/usr/bin:/usr/libexec])
AC_DEFINE_UNQUOTED([QEMU_PR_HELPER], ["$QEMU_PR_HELPER"],
[QEMU PR helper])
+ AC_PATH_PROG([SLIRP_HELPER], [slirp-helper],
+ [/usr/bin/slirp-helper],
+ [/usr/bin:/usr/libexec])
+ AC_DEFINE_UNQUOTED([SLIRP_HELPER], ["$SLIRP_HELPER"],
+ [slirp helper])
])
AC_DEFUN([LIBVIRT_DRIVER_RESULT_QEMU], [
diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug
index b311f02da6..15206454e0 100644
--- a/src/qemu/libvirtd_qemu.aug
+++ b/src/qemu/libvirtd_qemu.aug
@@ -88,6 +88,7 @@ module Libvirtd_qemu =
| bool_entry "clear_emulator_capabilities"
| str_entry "bridge_helper"
| str_entry "pr_helper"
+ | str_entry "slirp_helper"
| bool_entry "set_process_name"
| int_entry "max_processes"
| int_entry "max_files"
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index 334b4cd4ee..725ae791c5 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -810,6 +810,9 @@
# used whenever <reservations/> are enabled for SCSI LUN devices.
#pr_helper = "/usr/bin/qemu-pr-helper"
+# Path to the SLIRP networking helper.
+#slirp_helper = "/usr/bin/slirp-helper"
+
# User for the swtpm TPM Emulator
#
# Default is 'tss'; this is the same user that tcsd (TrouSerS) installs
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index f8ea66b577..86cef7b9ab 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -524,6 +524,7 @@ VIR_ENUM_IMPL(virQEMUCaps,
"scsi-disk.device_id",
"virtio-pci-non-transitional",
"overcommit",
+ "net-socket-dgram",
);
@@ -4212,6 +4213,11 @@ virQEMUCapsInitQMPVersionCaps(virQEMUCapsPtr qemuCaps)
ARCH_IS_PPC64(qemuCaps->arch)) {
virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT);
}
+
+ /* -net socket,fd= with dgram socket (for ex, with slirp helper) */
+ if (qemuCaps->version >= 3001092) {
+ virQEMUCapsSet(qemuCaps, QEMU_CAPS_NET_SOCKET_DGRAM);
+ }
}
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index 23ecef8c63..9db4c1bf2d 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -506,6 +506,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */
QEMU_CAPS_SCSI_DISK_DEVICE_ID, /* 'device_id' property of scsi disk */
QEMU_CAPS_VIRTIO_PCI_TRANSITIONAL, /* virtio *-pci-{non-}transitional devices */
QEMU_CAPS_OVERCOMMIT, /* -overcommit */
+ QEMU_CAPS_NET_SOCKET_DGRAM,
QEMU_CAPS_LAST /* this must always be the last item */
} virQEMUCapsFlags;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 9df7b7e8ea..6a047fa8f9 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4066,7 +4066,8 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
char **tapfd,
size_t tapfdSize,
char **vhostfd,
- size_t vhostfdSize)
+ size_t vhostfdSize,
+ char *slirpfd)
{
bool is_tap = false;
virBuffer buf = VIR_BUFFER_INITIALIZER;
@@ -4137,6 +4138,12 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
break;
case VIR_DOMAIN_NET_TYPE_USER:
+ if (slirpfd) {
+ virBufferAsprintf(&buf, "socket,fd=%s,",
+ slirpfd);
+ break;
+ }
+
virBufferAddLit(&buf, "user,");
for (i = 0; i < net->guestIP.nips; i++) {
const virNetDevIPAddr *ip = net->guestIP.ips[i];
@@ -8721,10 +8728,10 @@ qemuInterfaceVhostuserConnect(virQEMUDriverPtr driver,
static int
qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
virLogManagerPtr logManager,
virSecurityManagerPtr secManager,
virCommandPtr cmd,
- virDomainDefPtr def,
virDomainNetDefPtr net,
virQEMUCapsPtr qemuCaps,
unsigned int bootindex,
@@ -8733,6 +8740,7 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
size_t *nnicindexes,
int **nicindexes)
{
+ virDomainDefPtr def = vm->def;
int ret = -1;
char *nic = NULL;
char *host = NULL;
@@ -8743,12 +8751,13 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
size_t vhostfdSize = 0;
char **tapfdName = NULL;
char **vhostfdName = NULL;
+ int slirpfd = -1;
+ char *slirpfdName = NULL;
virDomainNetType actualType = virDomainNetGetActualType(net);
virNetDevBandwidthPtr actualBandwidth;
bool requireNicdev = false;
size_t i;
-
if (!bootindex)
bootindex = net->info.bootIndex;
@@ -8971,6 +8980,18 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
goto cleanup;
}
+ if (actualType == VIR_DOMAIN_NET_TYPE_USER &&
+ virQEMUCapsGet(qemuCaps, QEMU_CAPS_NET_SOCKET_DGRAM) &&
+ !standalone) {
+ if (qemuInterfaceOpenSlirp(driver, vm, net, &slirpfd) < 0) {
+ goto cleanup;
+ }
+ virCommandPassFD(cmd, slirpfd,
+ VIR_COMMAND_PASS_FD_CLOSE_PARENT);
+ if (virAsprintf(&slirpfdName, "%d", slirpfd) < 0)
+ goto cleanup;
+ }
+
for (i = 0; i < tapfdSize; i++) {
if (qemuSecuritySetTapFDLabel(driver->securityManager,
def, tapfd[i]) < 0)
@@ -8993,7 +9014,8 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
if (!(host = qemuBuildHostNetStr(net, driver,
tapfdName, tapfdSize,
- vhostfdName, vhostfdSize)))
+ vhostfdName, vhostfdSize,
+ slirpfdName)))
goto cleanup;
virCommandAddArgList(cmd, "-netdev", host, NULL);
@@ -9032,6 +9054,7 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
virSetError(saved_err);
virFreeError(saved_err);
}
+ VIR_FREE(slirpfdName);
for (i = 0; vhostfd && i < vhostfdSize && vhostfd[i] >= 0; i++) {
if (ret < 0)
VIR_FORCE_CLOSE(vhostfd[i]);
@@ -9061,10 +9084,10 @@ qemuBuildInterfaceCommandLine(virQEMUDriverPtr driver,
*/
static int
qemuBuildNetCommandLine(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
virLogManagerPtr logManager,
virSecurityManagerPtr secManager,
virCommandPtr cmd,
- virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
virNetDevVPortProfileOp vmop,
bool standalone,
@@ -9075,6 +9098,7 @@ qemuBuildNetCommandLine(virQEMUDriverPtr driver,
size_t i;
int last_good_net = -1;
virErrorPtr originalError = NULL;
+ virDomainDefPtr def = vm->def;
if (def->nnets) {
unsigned int bootNet = 0;
@@ -9090,7 +9114,7 @@ qemuBuildNetCommandLine(virQEMUDriverPtr driver,
for (i = 0; i < def->nnets; i++) {
virDomainNetDefPtr net = def->nets[i];
- if (qemuBuildInterfaceCommandLine(driver, logManager, secManager, cmd, def, net,
+ if (qemuBuildInterfaceCommandLine(driver, vm, logManager, secManager, cmd, net,
qemuCaps, bootNet, vmop,
standalone, nnicindexes,
nicindexes) < 0)
@@ -10818,7 +10842,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
if (qemuBuildFSDevCommandLine(cmd, def, qemuCaps) < 0)
goto error;
- if (qemuBuildNetCommandLine(driver, logManager, secManager, cmd, def,
+ if (qemuBuildNetCommandLine(driver, vm, logManager, secManager, cmd,
qemuCaps, vmop, standalone,
nnicindexes, nicindexes, &bootHostdevNet) < 0)
goto error;
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index 9565a7a377..93696cb8d0 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -88,7 +88,8 @@ char *qemuBuildHostNetStr(virDomainNetDefPtr net,
char **tapfd,
size_t tapfdSize,
char **vhostfd,
- size_t vhostfdSize);
+ size_t vhostfdSize,
+ char *slirpfd);
/* Current, best practice */
char *qemuBuildNicDevStr(virDomainDefPtr def,
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index daea11dacb..e3cc0921a6 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -297,7 +297,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
}
if (VIR_STRDUP(cfg->bridgeHelperName, QEMU_BRIDGE_HELPER) < 0 ||
- VIR_STRDUP(cfg->prHelperName, QEMU_PR_HELPER) < 0)
+ VIR_STRDUP(cfg->prHelperName, QEMU_PR_HELPER) < 0 ||
+ VIR_STRDUP(cfg->slirpHelperName, SLIRP_HELPER) < 0)
goto error;
cfg->clearEmulatorCapabilities = true;
@@ -387,6 +388,7 @@ static void virQEMUDriverConfigDispose(void *obj)
VIR_FREE(cfg->hugetlbfs);
VIR_FREE(cfg->bridgeHelperName);
VIR_FREE(cfg->prHelperName);
+ VIR_FREE(cfg->slirpHelperName);
VIR_FREE(cfg->saveImageFormat);
VIR_FREE(cfg->dumpImageFormat);
@@ -681,6 +683,9 @@ virQEMUDriverConfigLoadProcessEntry(virQEMUDriverConfigPtr cfg,
if (virConfGetValueString(conf, "pr_helper", &cfg->prHelperName) < 0)
return -1;
+ if (virConfGetValueString(conf, "slirp_helper", &cfg->slirpHelperName) < 0)
+ return -1;
+
if (virConfGetValueBool(conf, "set_process_name", &cfg->setProcessName) < 0)
return -1;
if (virConfGetValueUInt(conf, "max_processes", &cfg->maxProcesses) < 0)
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 983e74a3cf..7ae09f8f8c 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -160,6 +160,7 @@ struct _virQEMUDriverConfig {
char *bridgeHelperName;
char *prHelperName;
+ char *slirpHelperName;
bool macFilter;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index f9f5ffc22b..626702c3ed 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -2345,6 +2345,15 @@ qemuDomainObjPrivateXMLFormatPR(virBufferPtr buf,
}
+static void
+qemuDomainObjPrivateXMLFormatSlirp(virBufferPtr buf,
+ qemuDomainObjPrivatePtr priv)
+{
+ if (priv->slirpPid)
+ virBufferAddLit(buf, "<Slirp/>\n");
+}
+
+
static int
qemuDomainObjPrivateXMLFormatNBDMigrationSource(virBufferPtr buf,
virStorageSourcePtr src,
@@ -2555,6 +2564,8 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf,
qemuDomainObjPrivateXMLFormatPR(buf, priv);
+ qemuDomainObjPrivateXMLFormatSlirp(buf, priv);
+
if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
virBufferAsprintf(buf, "<nodename index='%llu'/>\n", priv->nodenameindex);
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 06640a9510..f7937bf436 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -365,6 +365,9 @@ struct _qemuDomainObjPrivate {
/* true if qemu-pr-helper process is running for the domain */
bool prDaemonRunning;
+ /* todo: list of running slirp processes */
+ pid_t slirpPid;
+
/* counter for generating node names for qemu disks */
unsigned long long nodenameindex;
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index a4f7d111b1..f6ee9815ab 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -1355,6 +1355,7 @@ qemuDomainAttachNetDevice(virQEMUDriverPtr driver,
qemuDomainObjPrivatePtr priv = vm->privateData;
virDomainDeviceDef dev = { VIR_DOMAIN_DEVICE_NET, { .net = net } };
virErrorPtr originalError = NULL;
+ char *slirpfdName = NULL;
char **tapfdName = NULL;
int *tapfd = NULL;
size_t tapfdSize = 0;
@@ -1592,7 +1593,8 @@ qemuDomainAttachNetDevice(virQEMUDriverPtr driver,
if (!(netstr = qemuBuildHostNetStr(net, driver,
tapfdName, tapfdSize,
- vhostfdName, vhostfdSize)))
+ vhostfdName, vhostfdSize,
+ slirpfdName)))
goto cleanup;
qemuDomainObjEnterMonitor(driver, vm);
@@ -1720,6 +1722,7 @@ qemuDomainAttachNetDevice(virQEMUDriverPtr driver,
VIR_FREE(vhostfdName);
VIR_FREE(charDevAlias);
virObjectUnref(conn);
+ VIR_FREE(slirpfdName);
virDomainCCWAddressSetFree(ccwaddrs);
return ret;
diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c
index c8effa68f4..55423d6895 100644
--- a/src/qemu/qemu_interface.c
+++ b/src/qemu/qemu_interface.c
@@ -603,6 +603,86 @@ qemuInterfaceBridgeConnect(virDomainDefPtr def,
}
+/**
+ * qemuInterfaceOpenSlirp:
+ * @net: network definition
+ * @slirpfd: slirp connection
+ *
+ * Returns: 0 on success
+ * -1 on failure
+ */
+int
+qemuInterfaceOpenSlirp(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ virDomainNetDefPtr net,
+ int *slirpfd)
+{
+ int i, pair[2] = { -1, -1 };
+ VIR_AUTOPTR(virCommand) cmd = NULL;
+ VIR_AUTOFREE(char *) cmdstr = NULL;
+ VIR_AUTOFREE(char *) addr = NULL;
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) {
+ virReportSystemError(errno, "%s", _("failed to create socket"));
+ goto error;
+ }
+
+ cmd = virCommandNew(cfg->slirpHelperName);
+ virCommandAddArgFormat(cmd, "--fd=%d", pair[1]);
+ virCommandPassFD(cmd, pair[1],
+ VIR_COMMAND_PASS_FD_CLOSE_PARENT);
+
+ for (i = 0; i < net->guestIP.nips; i++) {
+ const virNetDevIPAddr *ip = net->guestIP.ips[i];
+ const char *opt = "";
+
+ if (!(addr = virSocketAddrFormat(&ip->address)))
+ goto error;
+
+ if (VIR_SOCKET_ADDR_IS_FAMILY(&ip->address, AF_INET))
+ opt = "--net";
+ if (VIR_SOCKET_ADDR_IS_FAMILY(&ip->address, AF_INET6))
+ opt = "--prefix-ipv6";
+
+ virCommandAddArgFormat(cmd, "%s=%s", opt, addr);
+ VIR_FREE(addr);
+
+ if (ip->prefix) {
+ if (VIR_SOCKET_ADDR_IS_FAMILY(&ip->address, AF_INET)) {
+ virSocketAddr netmask;
+ VIR_AUTOFREE(char *) netmaskStr = NULL;
+
+ if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to translate prefix %d to netmask"),
+ ip->prefix);
+ goto error;
+ }
+ if (!(netmaskStr = virSocketAddrFormat(&netmask)))
+ goto error;
+ virCommandAddArgFormat(cmd, "--mask=%s", netmaskStr);
+ }
+ if (VIR_SOCKET_ADDR_IS_FAMILY(&ip->address, AF_INET6))
+ virCommandAddArgFormat(cmd, "--prefix-length-ipv6=%u", ip->prefix);
+ }
+ }
+
+ virCommandClearCaps(cmd);
+ if (virCommandRunAsync(cmd, &priv->slirpPid) < 0) {
+ goto error;
+ }
+
+ *slirpfd = pair[0];
+ return 0;
+
+error:
+ VIR_FORCE_CLOSE(pair[0]);
+ return -1;
+}
+
+
/**
* qemuInterfaceOpenVhostNet:
* @def: domain definition
diff --git a/src/qemu/qemu_interface.h b/src/qemu/qemu_interface.h
index f3ec540eda..de6195462b 100644
--- a/src/qemu/qemu_interface.h
+++ b/src/qemu/qemu_interface.h
@@ -55,4 +55,10 @@ int qemuInterfaceOpenVhostNet(virDomainDefPtr def,
virDomainNetDefPtr net,
int *vhostfd,
size_t *vhostfdSize);
+
+int qemuInterfaceOpenSlirp(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ virDomainNetDefPtr net,
+ int *slirpfd);
+
#endif /* LIBVIRT_QEMU_INTERFACE_H */
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index fea1d308b7..5796e5d1eb 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -102,5 +102,6 @@ module Test_libvirtd_qemu =
}
{ "memory_backing_dir" = "/var/lib/libvirt/qemu/ram" }
{ "pr_helper" = "/usr/bin/qemu-pr-helper" }
+{ "slirp_helper" = "/usr/bin/slirp-helper" }
{ "swtpm_user" = "tss" }
{ "swtpm_group" = "tss" }
--
2.21.0.313.ge35b8cb8e2
5 years, 7 months