Re: [libvirt] [Qemu-devel] Qemu migration with vhost-user-blk on top of local storage
by Stefan Hajnoczi
On Wed, Jan 09, 2019 at 06:23:42PM +0800, wuzhouhui wrote:
> Hi everyone,
>
> I'm working qemu with vhost target (e.g. spdk), and I attempt to migrate VM with
> 2 local storages. One local storage is a regular file, e.g. /tmp/c74.qcow2, and
> the other is a malloc bdev that spdk created. This malloc bdev will exported to
> VM via vhost-user-blk. When I execute following command:
>
> virsh migrate --live --persistent --unsafe --undefinesource --copy-storage-all \
> --p2p --auto-converge --verbose --desturi qemu+tcp://<uri>/system vm0
>
> The libvirt reports:
>
> qemu-2.12.1: error: internal error: unable to execute QEMU command \
> 'nbd-server-add': Cannot find device=drive-virtio-disk1 nor \
> node_name=drive-virtio-disk1
Please post your libvirt domain XML.
> Does it means that qemu with spdk on top of local storage don't support migration?
>
> QEMU: 2.12.1
> SPDK: 18.10
vhost-user-blk bypasses the QEMU block layer, so NBD storage migration
at the QEMU level will not work for the vhost-user-blk disk.
Stefan
1 year, 3 months
[libvirt] [PATCH v3] openvswitch: Add new port VLAN mode "dot1q-tunnel"
by luzhipeng@uniudc.com
From: ZhiPeng Lu <luzhipeng(a)uniudc.com>
Signed-off-by: ZhiPeng Lu <luzhipeng(a)uniudc.com>
---
v1->v2:
1. Fix "make syntax-check" failure
v2->v3:
1. remove other_config when updating vlan
docs/formatnetwork.html.in | 17 +++++++++--------
docs/schemas/networkcommon.rng | 1 +
src/conf/netdev_vlan_conf.c | 2 +-
src/util/virnetdevopenvswitch.c | 7 +++++++
src/util/virnetdevvlan.h | 1 +
5 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in
index 363a72b..3c1ae62 100644
--- a/docs/formatnetwork.html.in
+++ b/docs/formatnetwork.html.in
@@ -688,16 +688,17 @@
</p>
<p>
For network connections using Open vSwitch it is also possible
- to configure 'native-tagged' and 'native-untagged' VLAN modes
+ to configure 'native-tagged' and 'native-untagged' and 'dot1q-tunnel'
+ VLAN modes.
<span class="since">Since 1.1.0.</span> This is done with the
- optional <code>nativeMode</code> attribute on
- the <code><tag></code> subelement: <code>nativeMode</code>
- may be set to 'tagged' or 'untagged'. The <code>id</code>
- attribute of the <code><tag></code> subelement
- containing <code>nativeMode</code> sets which VLAN is considered
- to be the "native" VLAN for this interface, and
+ optional <code>nativeMode</code> attribute on the
+ <code><tag></code> subelement: <code>nativeMode</code>
+ may be set to 'tagged' or 'untagged' or 'dot1q-tunnel'.
+ The <code>id</code> attribute of the <code><tag></code>
+ subelement containing <code>nativeMode</code> sets which VLAN is
+ considered to be the "native" VLAN for this interface, and
the <code>nativeMode</code> attribute determines whether or not
- traffic for that VLAN will be tagged.
+ traffic for that VLAN will be tagged or QinQ.
</p>
<p>
<code><vlan></code> elements can also be specified in
diff --git a/docs/schemas/networkcommon.rng b/docs/schemas/networkcommon.rng
index 2699555..11c48ff 100644
--- a/docs/schemas/networkcommon.rng
+++ b/docs/schemas/networkcommon.rng
@@ -223,6 +223,7 @@
<choice>
<value>tagged</value>
<value>untagged</value>
+ <value>dot1q-tunnel</value>
</choice>
</attribute>
</optional>
diff --git a/src/conf/netdev_vlan_conf.c b/src/conf/netdev_vlan_conf.c
index dff49c6..79710d9 100644
--- a/src/conf/netdev_vlan_conf.c
+++ b/src/conf/netdev_vlan_conf.c
@@ -29,7 +29,7 @@
#define VIR_FROM_THIS VIR_FROM_NONE
VIR_ENUM_IMPL(virNativeVlanMode, VIR_NATIVE_VLAN_MODE_LAST,
- "default", "tagged", "untagged")
+ "default", "tagged", "untagged", "dot1q-tunnel")
int
virNetDevVlanParse(xmlNodePtr node, xmlXPathContextPtr ctxt, virNetDevVlanPtr def)
diff --git a/src/util/virnetdevopenvswitch.c b/src/util/virnetdevopenvswitch.c
index 8fe06fd..9fec30b 100644
--- a/src/util/virnetdevopenvswitch.c
+++ b/src/util/virnetdevopenvswitch.c
@@ -91,6 +91,11 @@ virNetDevOpenvswitchConstructVlans(virCommandPtr cmd, virNetDevVlanPtr virtVlan)
virCommandAddArg(cmd, "vlan_mode=native-untagged");
virCommandAddArgFormat(cmd, "tag=%d", virtVlan->nativeTag);
break;
+ case VIR_NATIVE_VLAN_MODE_DOT1Q_TUNNEL:
+ virCommandAddArg(cmd, "vlan_mode=dot1q-tunnel");
+ virCommandAddArg(cmd, "other_config:qinq-ethtype=802.1q");
+ virCommandAddArgFormat(cmd, "tag=%d", virtVlan->nativeTag);
+ break;
case VIR_NATIVE_VLAN_MODE_DEFAULT:
default:
break;
@@ -504,6 +509,8 @@ int virNetDevOpenvswitchUpdateVlan(const char *ifname,
"--", "--if-exists", "clear", "Port", ifname, "tag",
"--", "--if-exists", "clear", "Port", ifname, "trunk",
"--", "--if-exists", "clear", "Port", ifname, "vlan_mode",
+ "--", "--if-exists", "remove", "Port", ifname, "other_config",
+ "qinq-ethtype", NULL,
"--", "--if-exists", "set", "Port", ifname, NULL);
if (virNetDevOpenvswitchConstructVlans(cmd, virtVlan) < 0)
diff --git a/src/util/virnetdevvlan.h b/src/util/virnetdevvlan.h
index be85f59..0667f9d 100644
--- a/src/util/virnetdevvlan.h
+++ b/src/util/virnetdevvlan.h
@@ -29,6 +29,7 @@ typedef enum {
VIR_NATIVE_VLAN_MODE_DEFAULT = 0,
VIR_NATIVE_VLAN_MODE_TAGGED,
VIR_NATIVE_VLAN_MODE_UNTAGGED,
+ VIR_NATIVE_VLAN_MODE_DOT1Q_TUNNEL,
VIR_NATIVE_VLAN_MODE_LAST
} virNativeVlanMode;
--
1.8.3.1
1 year, 3 months
[libvirt] [PATCH] Fix compile error for stable 1.2.9
by Yang hongyang
Seems a backport miss. An extra member is passed to struct
virLXCBasicMountInfo.
Signed-off-by: Yang hongyang <hongyang.yang(a)easystack.cn>
---
src/lxc/lxc_container.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 28dabec..1c65fa9 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -760,7 +760,7 @@ typedef struct {
static const virLXCBasicMountInfo lxcBasicMounts[] = {
{ "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false },
- { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false },
+ { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false },
{ "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false },
{ "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true },
#if WITH_SELINUX
--
1.7.1
1 year, 3 months
[libvirt] Supporting vhost-net and macvtap in libvirt for QEMU
by Anthony Liguori
Disclaimer: I am neither an SR-IOV nor a vhost-net expert, but I've CC'd
people that are who can throw tomatoes at me for getting bits wrong :-)
I wanted to start a discussion about supporting vhost-net in libvirt.
vhost-net has not yet been merged into qemu but I expect it will be soon
so it's a good time to start this discussion.
There are two modes worth supporting for vhost-net in libvirt. The
first mode is where vhost-net backs to a tun/tap device. This is
behaves in very much the same way that -net tap behaves in qemu today.
Basically, the difference is that the virtio backend is in the kernel
instead of in qemu so there should be some performance improvement.
Current, libvirt invokes qemu with -net tap,fd=X where X is an already
open fd to a tun/tap device. I suspect that after we merge vhost-net,
libvirt could support vhost-net in this mode by just doing -net
vhost,fd=X. I think the only real question for libvirt is whether to
provide a user visible switch to use vhost or to just always use vhost
when it's available and it makes sense. Personally, I think the later
makes sense.
The more interesting invocation of vhost-net though is one where the
vhost-net device backs directly to a physical network card. In this
mode, vhost should get considerably better performance than the current
implementation. I don't know the syntax yet, but I think it's
reasonable to assume that it will look something like -net
tap,dev=eth0. The effect will be that eth0 is dedicated to the guest.
On most modern systems, there is a small number of network devices so
this model is not all that useful except when dealing with SR-IOV
adapters. In that case, each physical device can be exposed as many
virtual devices (VFs). There are a few restrictions here though. The
biggest is that currently, you can only change the number of VFs by
reloading a kernel module so it's really a parameter that must be set at
startup time.
I think there are a few ways libvirt could support vhost-net in this
second mode. The simplest would be to introduce a new tag similar to
<source network='br0'>. In fact, if you probed the device type for the
network parameter, you could probably do something like <source
network='eth0'> and have it Just Work.
Another model would be to have libvirt see an SR-IOV adapter as a
network pool whereas it handled all of the VF management. Considering
how inflexible SR-IOV is today, I'm not sure whether this is the best model.
Has anyone put any more thought into this problem or how this should be
modeled in libvirt? Michael, could you share your current thinking for
-net syntax?
--
Regards,
Anthony Liguori
1 year, 3 months
[libvirt RFCv11 00/33] multifd save restore prototype
by Claudio Fontana
This is v11 of the multifd save prototype, which focuses on saving
to a single file instead of requiring multiple separate files for
multifd channels.
This series demonstrates a way to save and restore from a single file
by using interleaved channels of a size equal to the transfer buffer
size (1MB), and relying on UNIX holes to avoid wasting physical disk
space due to channels of different size.
KNOWN ISSUES:
a) still applies only to save/restore (no managed save etc)
b) this is not not done in QEMU, where it could be possible to teach
QEMU to migrate directly to a file or block device in a
block-aligned way by altering the migration stream code and the
state migration code of all devices.
changes from v10:
* virfile: add new API virFileDiskCopyChannel, which extends the
existing virFileDiskCopy to work with parallel channels in the file.
* drop use of virthread API, use GLIB for threads.
* pass only a single FD to the multifd-helper, which will then open
additional FDs as required for the multithreaded I/O.
* simplify virQEMUSaveFd API, separating the initialization from the
addition of extra channels.
* adapt all documentation to mention a single file instead of multiple.
* remove the "Lim" versions of virFileDirectRead and Write, they are
not needed.
---
changes from v9:
* exposed virFileDirectAlign
* separated the >= 2 QEMU_SAVE_VERSION change in own patch
* reworked the write code to add the alignment padding to the
data_len, making the on disk format compatible when loaded
from an older libvirt.
* reworked the read code to use direct I/O APIs only for actual
direct I/O file descriptors, so as to make old images work
with newer libvirt.
---
changes from v8:
* rebased on master
* reordered patches to add more upstreamable content at the start
* split introduction of virQEMUSaveFd, so the first part is multifd-free
* new virQEMUSaveDataRead as a mirror of virQEMUSaveDataWrite
* introduced virFileDirect API, using it in virFileDisk operations and
for virQEMUSaveRead and virQEMUSaveWrite
---
changes from v7:
* [ base params API and iohelper refactoring upstreamed ]
* extended the QEMU save image format more, to record the nr
of multifd channels on save. Made the data header struct packed.
* removed --parallel-connections from the restore command, as now
it is useless due to QEMU save image format extension.
* separate out patches to expose migration_params APIs to saveimage,
including qemuMigrationParamsSetString, SetCap, SetInt.
* fixed bugs in the ImageOpen patch (missing saveFd init), removed
some whitespace, and fixed some convoluted code paths for return
value -3.
---
changes from v6:
* improved error path handling, with error messages and especially
cancellation of qemu process on error during restore.
* split patches more and reordered them to keep general refactoring
at the beginning before the --parallel stuff is introduced.
* improved multifd compression support, including adding an enum
and extending the QEMU save image format to record the compression
used on save, and pick it up automatically on restore.
---
changes from v4:
* runIO renamed to virFileDiskCopy and rethought arguments
* renamed new APIs from ...ParametersFlags to ...Params
* introduce the new virDomainSaveParams and virDomainRestoreParams
without any additional parameters, so they can be upstreamed first.
* solved the issue in the gendispatch.pl script generating code that
was missing the conn parameter.
---
changes from v3:
* reordered series to have all helper-related change at the start
* solved all reported issues from ninja test, including documentation
* fixed most broken migration capabilities code (still imperfect likely)
* added G_GNUC_UNUSED as needed
* after multifd restore, added what I think were the missing operations:
qemuProcessRefreshState(),
qemuProcessStartCPUs() - most importantly,
virDomainObjSave()
The domain now starts running after restore without further encouragement
* removed the sleep(10) from the multifd-helper
---
changes from v2:
* added ability to restore the VM from disk using multifd
* fixed the multifd-helper to work in both directions,
assuming the need to listen for save, and connect for restore.
* fixed a large number of bugs, and probably introduced some :-)
---
Claudio Fontana (33):
virfile: introduce virFileDirect APIs
virfile: use virFileDirect API in runIOCopy
qemu: saveimage: rework image read/write to be O_DIRECT friendly
qemu: saveimage: assume future formats will also support compression
virfile: virFileDiskCopy: prepare for O_DIRECT files without wrapper
qemu: saveimage: introduce virQEMUSaveFd
qemu: saveimage: convert qemuSaveImageCreate to use virQEMUSaveFd
qemu: saveimage: convert qemuSaveImageOpen to use virQEMUSaveFd
tools: prepare doSave to use parameters
tools: prepare cmdRestore to use parameters
libvirt: add new VIR_DOMAIN_SAVE_PARALLEL flag and parameter
qemu: add stub support for VIR_DOMAIN_SAVE_PARALLEL in save
qemu: add stub support for VIR_DOMAIN_SAVE_PARALLEL in restore
virfile: add new API virFileDiskCopyChannel
multifd-helper: new helper for parallel save/restore
qemu: saveimage: update virQEMUSaveFd struct for parallel save
qemu: saveimage: wire up saveimage code with the multifd helper
qemu: capabilities: add multifd to the probed migration capabilities
qemu: saveimage: add multifd related fields to save format
qemu: migration_params: add APIs to set Int and Cap
qemu: migration: implement qemuMigrationSrcToFilesMultiFd for save
qemu: add parameter to qemuMigrationDstRun to skip waiting
qemu: implement qemuSaveImageLoadMultiFd for restore
tools: add parallel parameter to virsh save command
tools: add parallel parameter to virsh restore command
qemu: add migration parameter multifd-compression
libvirt: add new VIR_DOMAIN_SAVE_PARAM_PARALLEL_COMPRESSION
qemu: saveimage: add parallel compression argument to ImageCreate
qemu: saveimage: add stub support for multifd compression parameter
qemu: migration: expose qemuMigrationParamsSetString
qemu: saveimage: implement multifd-compression in parallel save
qemu: saveimage: restore compressed parallel images
tools: add parallel-compression parameter to virsh save command
docs/manpages/virsh.rst | 26 +-
include/libvirt/libvirt-domain.h | 24 +
po/POTFILES | 1 +
src/libvirt_private.syms | 6 +
src/qemu/qemu_capabilities.c | 4 +
src/qemu/qemu_capabilities.h | 2 +
src/qemu/qemu_driver.c | 146 ++--
src/qemu/qemu_migration.c | 160 ++--
src/qemu/qemu_migration.h | 16 +-
src/qemu/qemu_migration_params.c | 71 +-
src/qemu/qemu_migration_params.h | 15 +
src/qemu/qemu_process.c | 3 +-
src/qemu/qemu_process.h | 5 +-
src/qemu/qemu_saveimage.c | 703 +++++++++++++-----
src/qemu/qemu_saveimage.h | 69 +-
src/qemu/qemu_snapshot.c | 6 +-
src/util/iohelper.c | 3 +
src/util/meson.build | 16 +
src/util/multifd-helper.c | 359 +++++++++
src/util/virfile.c | 391 +++++++---
src/util/virfile.h | 11 +
.../caps_4.0.0.aarch64.xml | 1 +
.../qemucapabilitiesdata/caps_4.0.0.ppc64.xml | 1 +
.../caps_4.0.0.riscv32.xml | 1 +
.../caps_4.0.0.riscv64.xml | 1 +
.../qemucapabilitiesdata/caps_4.0.0.s390x.xml | 1 +
.../caps_4.0.0.x86_64.xml | 1 +
.../caps_4.1.0.x86_64.xml | 1 +
.../caps_4.2.0.aarch64.xml | 1 +
.../qemucapabilitiesdata/caps_4.2.0.ppc64.xml | 1 +
.../qemucapabilitiesdata/caps_4.2.0.s390x.xml | 1 +
.../caps_4.2.0.x86_64.xml | 1 +
.../caps_5.0.0.aarch64.xml | 2 +
.../qemucapabilitiesdata/caps_5.0.0.ppc64.xml | 2 +
.../caps_5.0.0.riscv64.xml | 2 +
.../caps_5.0.0.x86_64.xml | 2 +
.../qemucapabilitiesdata/caps_5.1.0.sparc.xml | 2 +
.../caps_5.1.0.x86_64.xml | 2 +
.../caps_5.2.0.aarch64.xml | 2 +
.../qemucapabilitiesdata/caps_5.2.0.ppc64.xml | 2 +
.../caps_5.2.0.riscv64.xml | 2 +
.../qemucapabilitiesdata/caps_5.2.0.s390x.xml | 2 +
.../caps_5.2.0.x86_64.xml | 2 +
.../caps_6.0.0.aarch64.xml | 2 +
.../qemucapabilitiesdata/caps_6.0.0.s390x.xml | 2 +
.../caps_6.0.0.x86_64.xml | 2 +
.../caps_6.1.0.x86_64.xml | 2 +
.../caps_6.2.0.aarch64.xml | 2 +
.../qemucapabilitiesdata/caps_6.2.0.ppc64.xml | 2 +
.../caps_6.2.0.x86_64.xml | 2 +
.../caps_7.0.0.aarch64.xml | 2 +
.../qemucapabilitiesdata/caps_7.0.0.ppc64.xml | 2 +
.../caps_7.0.0.x86_64.xml | 2 +
.../caps_7.1.0.x86_64.xml | 2 +
tools/virsh-domain.c | 101 ++-
55 files changed, 1748 insertions(+), 445 deletions(-)
create mode 100644 src/util/multifd-helper.c
--
2.26.2
1 year, 3 months
[PATCH 00/18] RFC: Remove deprecated audio features
by Martin Kletzander
I wanted to deal with https://bugzilla.redhat.com/2043498 and I got a
suggesstion that removing deprecated features could actually make it
easier to propagate the error. In the end (last patch) it turns out the
error is still just reported with error_fatal, so it probably is not
really needed, but I really wanted to dig into QEMU more and learn some
of the internals for quite some time now. So I used the opportunity.
The one-liner ended up being an 18 patch series which was, for someone
who has just one commit in QEMU codebase, a pretty challenging task.
Although I tried my best to do things properly, I am not sure whether I
handled everything correctly, hence the RFC.
Any comments are very much appreciated. Thanks and have a nice day ;)
Martin Kletzander (18):
hw/audio: Remove -soundhw support
hw/input/tsc210x: Extract common init code into new function
hw/audio: Simplify hda audio init
hw/audio/lm4549: Add errp error reporting to init function
tests/qtest: Specify audiodev= and -audiodev
ui/vnc: Require audiodev=
Introduce machine's default-audiodev property
audio: Add easy dummy audio initialiser
hw/display/xlnx_dp.c: Add audiodev property
hw/input/tsc210x.c: Support machine-default audiodev with fallback
hw/arm: Support machine-default audiodev with fallback
hw/ppc: Support machine-default audiodev with fallback
audio: Make AUD_register_card fallible and require audiodev=
audio: Require AudioState in AUD_add_capture
audio: Be more strict during audio backend initialisation
audio: Remove legacy audio environment variables and options
audio: Remove unused can_be_default
audio/spiceaudio: Fail initialisation when not using spice
audio/alsaaudio.c | 1 -
audio/audio.c | 204 +++----
audio/audio.h | 5 +-
audio/audio_int.h | 1 -
audio/audio_legacy.c | 555 ------------------
audio/coreaudio.m | 1 -
audio/dbusaudio.c | 1 -
audio/dsoundaudio.c | 1 -
audio/jackaudio.c | 1 -
audio/meson.build | 1 -
audio/noaudio.c | 1 -
audio/ossaudio.c | 1 -
audio/paaudio.c | 1 -
audio/sdlaudio.c | 1 -
audio/spiceaudio.c | 3 +-
audio/wavaudio.c | 1 -
docs/about/deprecated.rst | 24 -
docs/about/removed-features.rst | 27 +
docs/qdev-device-use.txt | 21 +-
docs/replay.txt | 2 +-
hw/arm/integratorcp.c | 8 +-
hw/arm/musicpal.c | 8 +-
hw/arm/omap2.c | 11 +-
hw/arm/realview.c | 3 +
hw/arm/spitz.c | 10 +-
hw/arm/versatilepb.c | 3 +
hw/arm/vexpress.c | 3 +
hw/arm/xlnx-zcu102.c | 4 +
hw/arm/z2.c | 12 +-
hw/audio/ac97.c | 9 +-
hw/audio/adlib.c | 9 +-
hw/audio/cs4231a.c | 8 +-
hw/audio/es1370.c | 8 +-
hw/audio/gus.c | 6 +-
hw/audio/hda-codec.c | 37 +-
hw/audio/intel-hda.c | 25 +-
hw/audio/intel-hda.h | 2 +-
hw/audio/lm4549.c | 7 +-
hw/audio/lm4549.h | 3 +-
hw/audio/meson.build | 1 -
hw/audio/pcspk.c | 15 +-
hw/audio/pl041.c | 2 +-
hw/audio/sb16.c | 9 +-
hw/audio/soundhw.c | 177 ------
hw/audio/wm8750.c | 5 +-
hw/core/machine.c | 23 +
hw/display/xlnx_dp.c | 12 +-
hw/input/tsc210x.c | 79 ++-
hw/ppc/prep.c | 4 +
hw/usb/dev-audio.c | 5 +-
include/hw/audio/soundhw.h | 15 -
include/hw/boards.h | 1 +
qemu-options.hx | 37 --
.../codeconverter/test_regexps.py | 1 -
softmmu/qdev-monitor.c | 2 -
softmmu/vl.c | 10 -
tests/qtest/ac97-test.c | 3 +-
tests/qtest/es1370-test.c | 3 +-
tests/qtest/fuzz/generic_fuzz_configs.h | 6 +-
tests/qtest/intel-hda-test.c | 15 +-
ui/vnc.c | 15 +-
61 files changed, 329 insertions(+), 1140 deletions(-)
delete mode 100644 audio/audio_legacy.c
delete mode 100644 hw/audio/soundhw.c
delete mode 100644 include/hw/audio/soundhw.h
--
2.35.1
1 year, 4 months
[PATCH v2] util: basic support for VFIO variant drivers
by Laine Stump
Before a PCI device can be assigned to a guest with VFIO, that device
must be bound to the vfio-pci driver rather than to the device's
normal driver. The vfio-pci driver provides APIs that permit QEMU to
perform all the necessary operations to make the device accessible to
the guest.
There has been kernel work recently to support vendor/device-specific
VFIO variant drivers that provide the basic vfio-pci driver functionality
while adding support for device-specific operations (for example these
device-specific drivers are planned to support live migration of
certain devices). All that will be needed to make this functionality
available will be to bind the new vendor-specific driver to the device
(rather than the generic vfio-pci driver, which will continue to work
just without the extra functionality).
But until now libvirt has required that all PCI devices being assigned
to a guest with VFIO specifically have the "vfio-pci" driver bound to
the device. So even if the user manually binds a shiny new
vendor-specific vfio variant driver to the device (and puts
"managed='no'" in the config to prevent libvirt from changing the
binding), libvirt will just fail during startup of the guest (or
during hotplug) because the driver bound to the device isn't exactly
"vfio-pci".
This patch loosens that restriction a bit - rather than requiring that
the device be bound to "vfio-pci", it also checks if the drivername
contains the string "vfio" at all, and in this case allows the
operation to continue. If the driver is in fact a VFIO variant, then
the assignment will succeed, but if it is not a VFIO variant then QEMU
will fail (and report the error back to libvirt).
In the near future (possibly by kernel 6.0) there will be a
formal method of identifying a VFIO variant driver by looking in
sysfs; in the meantime the inexact, but simple, method in this patch
will allow users of the few existing VFIO variant drivers (and
developers of new VFIO variant drivers) to use their new drivers
without needing to remove libvirt from their setup - they can simply
pre-bind the device to the new driver, then use "managed='no'" in
their libvirt config.
NB: this patch does *not* handle automatically determining the proper
vendor-specific driver and binding to it in the case of
"managed='yes'". This will be implemented later when there is a widely
available driver / device combo we can use for testing.
Signed-off-by: Laine Stump <laine(a)redhat.com>
---
V1 here: https://listman.redhat.com/archives/libvir-list/2022-August/233327.html
Change in V2:
V1 used the output of modinfo to look for "vfio_pci" as an alias for a
driver to see if it was a VFIO variant driver.
As a result of discussion of V1, V2 is much simpler - it just assumes
that any driver with "vfio" in the name is a VFIO variant. This is
okay because 1) QEMU will still catch it and libvirt will properly log
the error if the driver isn't actually a VFIO variant, and 2) it's a
temporary situation, just to enable use of VFIO variant drivers with
libvirt until a standard method of detecting this is added to sysfs
(which, according to the discussion of V1, is coming in the near
future).
(NB: I did implement checking of /lib/modules/`uname -r`/modules.alias
as suggested by Erik, but it turned out that this caused the unit
tests to call uname(3) and open the modules.alias file on the test
host - for a proper unit test I would have also needed to mock these
two functions, and it seemed like too much complexity for a temporary
workaround. I've implemented Jason's suggestion here (accept any
driver with "vfio" in the name), which is similar to danpb's
suggestion (accept specifically the two drivers that are already in
the upstream kernel), but will also allow for new drivers that may be
under development.)
src/hypervisor/virhostdev.c | 26 ++++---------
src/util/virpci.c | 76 ++++++++++++++++++++++++++++++++++---
src/util/virpci.h | 3 ++
3 files changed, 82 insertions(+), 23 deletions(-)
diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c
index c0ce867596..15b35fa75e 100644
--- a/src/hypervisor/virhostdev.c
+++ b/src/hypervisor/virhostdev.c
@@ -747,9 +747,8 @@ virHostdevPreparePCIDevicesImpl(virHostdevManager *mgr,
mgr->inactivePCIHostdevs) < 0)
goto reattachdevs;
} else {
- g_autofree char *driverPath = NULL;
- g_autofree char *driverName = NULL;
- int stub;
+ g_autofree char *drvName = NULL;
+ virPCIStubDriver drvType;
/* Unmanaged devices should already have been marked as
* inactive: if that's the case, we can simply move on */
@@ -769,18 +768,14 @@ virHostdevPreparePCIDevicesImpl(virHostdevManager *mgr,
* information about active / inactive device across
* daemon restarts has been implemented */
- if (virPCIDeviceGetDriverPathAndName(pci,
- &driverPath, &driverName) < 0)
+ if (virPCIDeviceGetDriverNameAndType(pci, &drvName, &drvType) < 0)
goto reattachdevs;
- stub = virPCIStubDriverTypeFromString(driverName);
-
- if (stub > VIR_PCI_STUB_DRIVER_NONE &&
- stub < VIR_PCI_STUB_DRIVER_LAST) {
+ if (drvType > VIR_PCI_STUB_DRIVER_NONE) {
/* The device is bound to a known stub driver: store this
* information and add a copy to the inactive list */
- virPCIDeviceSetStubDriver(pci, stub);
+ virPCIDeviceSetStubDriver(pci, drvType);
VIR_DEBUG("Adding PCI device %s to inactive list",
virPCIDeviceGetName(pci));
@@ -2292,18 +2287,13 @@ virHostdevPrepareOneNVMeDevice(virHostdevManager *hostdev_mgr,
/* Let's check if all PCI devices are NVMe disks. */
for (i = 0; i < virPCIDeviceListCount(pciDevices); i++) {
virPCIDevice *pci = virPCIDeviceListGet(pciDevices, i);
- g_autofree char *drvPath = NULL;
g_autofree char *drvName = NULL;
- int stub = VIR_PCI_STUB_DRIVER_NONE;
+ virPCIStubDriver drvType;
- if (virPCIDeviceGetDriverPathAndName(pci, &drvPath, &drvName) < 0)
+ if (virPCIDeviceGetDriverNameAndType(pci, &drvName, &drvType) < 0)
goto cleanup;
- if (drvName)
- stub = virPCIStubDriverTypeFromString(drvName);
-
- if (stub == VIR_PCI_STUB_DRIVER_VFIO ||
- STREQ_NULLABLE(drvName, "nvme"))
+ if (drvType == VIR_PCI_STUB_DRIVER_VFIO || STREQ_NULLABLE(drvName, "nvme"))
continue;
VIR_WARN("Suspicious NVMe disk assignment. PCI device "
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 7800966963..51ccf4d9fd 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -277,6 +277,71 @@ virPCIDeviceGetDriverPathAndName(virPCIDevice *dev, char **path, char **name)
}
+/**
+ * virPCIDeviceGetDriverNameAndType:
+ * @dev: virPCIDevice object to examine
+ * @drvName: returns name of driver bound to this device (if any)
+ * @drvType: returns type of driver if it is a known stub driver type
+ *
+ * Find the name of the driver bound to @dev (if any) and the type of
+ * the driver if it is a known/recognized "stub" driver (based on the
+ * driver name).
+ *
+ * There are vfio "variant" drivers that provide all the basic
+ * functionality of the standard vfio-pci driver as well as additional
+ * stuff. There is a plan to add info to sysfs that will allow easily
+ * determining if a driver is a vfio variant driver, but that sysfs
+ * entry isn't yet available. In the meantime as a workaround so that
+ * the few existing vfio variant drivers can be used with libvirt, and
+ * so that driver developers can test their new vfio variant drivers
+ * without needing to bypass libvirt, we also check if the driver name
+ * contains the string "vfio"; if it does, then we consider this drier
+ * as type VFIO. This can lead to false positives, but that isn't a
+ * horrible thing, because the problem will still be caught by QEMU as
+ * soon as libvirt makes the request to attach the device.
+ *
+ * Return 0 on success, -1 on failure. If -1 is returned, then an error
+ * message has been logged.
+ */
+int
+virPCIDeviceGetDriverNameAndType(virPCIDevice *dev,
+ char **drvName,
+ virPCIStubDriver *drvType)
+{
+ g_autofree char *drvPath = NULL;
+ int tmpType;
+
+ if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, drvName) < 0)
+ return -1;
+
+ if (!*drvName) {
+ *drvType = VIR_PCI_STUB_DRIVER_NONE;
+ return 0;
+ }
+
+ tmpType = virPCIStubDriverTypeFromString(*drvName);
+
+ if (tmpType > VIR_PCI_STUB_DRIVER_NONE) {
+ *drvType = tmpType;
+ return 0; /* exact match of a known driver name (or no name) */
+ }
+
+ /* Check if the drivername contains "vfio" and count as a VFIO
+ * driver if so - see above for explanation.
+ */
+
+ if (strstr(*drvName, "vfio")) {
+ VIR_DEBUG("Driver %s is a vfio_pci driver", *drvName);
+ *drvType = VIR_PCI_STUB_DRIVER_VFIO;
+ } else {
+ VIR_DEBUG("Driver %s is NOT a vfio_pci driver", *drvName);
+ *drvType = VIR_PCI_STUB_DRIVER_NONE;
+ }
+
+ return 0;
+}
+
+
static int
virPCIDeviceConfigOpenInternal(virPCIDevice *dev, bool readonly, bool fatal)
{
@@ -1004,8 +1069,8 @@ virPCIDeviceReset(virPCIDevice *dev,
virPCIDeviceList *activeDevs,
virPCIDeviceList *inactiveDevs)
{
- g_autofree char *drvPath = NULL;
g_autofree char *drvName = NULL;
+ virPCIStubDriver drvType;
int ret = -1;
int fd = -1;
int hdrType = -1;
@@ -1032,15 +1097,16 @@ virPCIDeviceReset(virPCIDevice *dev,
* reset it whenever appropriate, so doing it ourselves would just
* be redundant.
*/
- if (virPCIDeviceGetDriverPathAndName(dev, &drvPath, &drvName) < 0)
+ if (virPCIDeviceGetDriverNameAndType(dev, &drvName, &drvType) < 0)
goto cleanup;
- if (virPCIStubDriverTypeFromString(drvName) == VIR_PCI_STUB_DRIVER_VFIO) {
- VIR_DEBUG("Device %s is bound to vfio-pci - skip reset",
- dev->name);
+ if (drvType == VIR_PCI_STUB_DRIVER_VFIO) {
+
+ VIR_DEBUG("Device %s is bound to %s - skip reset", dev->name, drvName);
ret = 0;
goto cleanup;
}
+
VIR_DEBUG("Resetting device %s", dev->name);
if ((fd = virPCIDeviceConfigOpenWrite(dev)) < 0)
diff --git a/src/util/virpci.h b/src/util/virpci.h
index 4d9193f24e..0532b90f90 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -280,6 +280,9 @@ int virPCIDeviceRebind(virPCIDevice *dev);
int virPCIDeviceGetDriverPathAndName(virPCIDevice *dev,
char **path,
char **name);
+int virPCIDeviceGetDriverNameAndType(virPCIDevice *dev,
+ char **drvName,
+ virPCIStubDriver *drvType);
int virPCIDeviceIsPCIExpress(virPCIDevice *dev);
int virPCIDeviceHasPCIExpressLink(virPCIDevice *dev);
--
2.37.1
1 year, 7 months
Summary on new backup interfaces in QEMU
by Vladimir Sementsov-Ogievskiy
Hi all!
Here I want to summarize new interfaces and use cases for backup in QEMU.
TODO for me: convert this into good rst documentation in docs/.
OK, let's begin.
First, note that drive-backup qmp command is deprecated.
Next, some terminology:
push backup: the whole process is inside QEMU process, also may be called "internal backup"
pull backup: QEMU only exports a kind of snapshot (for example by NBD), and third party software reads this export and stores it somehow, also called "external backup"
copy-before-write operations: We usually do backup of active disk, guest is running and may write to the disk during the process of backup. When guest wants to rewrite data region which is not backed up yet, we must stop this guest write, and copy original data to somewhere before continuing guest write. That's a copy-before-write operation.
image-fleecing: the technique that allows to export a "snapshotted" state of the active disk with help of copy-before-write operations. We create a temporary image - target for copy-before-write operations, and provide an interface to the user to read the "snapshotted" state. And for read, we do read from temporary image the data which is already changed in original active disk, and we read unchanged data directly from active disk. The temporary image itself is also called "reverse delta" or "reversed delta".
== Simple push backup ==
Just use blockdev-backup, nothing new here. I just note some technical details, that are relatively new:
1. First, backup job inserts copy-before-write filter above source disk, to do copy-before-write operation.
2. Created copy-before-write filter shares internal block-copy state with backup job, so they work in collaboration, to not copy same things twice.
== Full pull backup ==
Assume, we are going to do incremental backup in future, so we also need to create a dirty bitmap, to track dirtiness of active disk since full backup.
1. Create empty temporary image for fleecing. It must be of the same size as active disk. It's not necessary to be qcow2, and if it's a qcow2, you shouldn't make the original active disk a backing file for the new temporary qcow2 image (it was necessary in old fleecing scheme).
Example:
qemu-img create -f qcow2 temp.qcow2 64G
2. Initialize fleecing scheme and create dirty bitmap for future incremental backup.
Assume, disk0 is an active disk, attached to qdev-id sda, to be backed up.
qmp: transaction [
block-dirty-bitmap-add {node: disk0, name: bitmap0, persistent: true}
blockdev-add* {node-name: tmp-protocol, driver: file, filename: temp.qcow2}
blockdev-add {node-name: tmp, driver: qcow2, file: tmp-protocol}
blockdev-add {node-name: cbw, driver: copy-before-write, file: disk0, target: tmp}
blockdev-replace** {parent-type: qdev, qdev-id: sda, new-child: cbw}
blockdev-add {node-name: acc, driver: snapshot-access, file: cbw}
]
qmp: nbd-server-start {...}
qmp: nbd-server-add {device: acc, ...}
This way we create the following block-graph:
[guest] [NBD export]
| |
| root | root
v file v
[copy-before-write]<------[snapshot-access]
| |
| file | target
v v
[active-disk] [temp.qcow2]
* "[PATCH 0/2] blockdev-add transaction" series needed for this
** "[PATCH v3 00/11] blockdev-replace" series needed for this
Note additional useful options for copy-before-write filter:
"[PATCH 0/3] block: copy-before-write: on-cbw-error behavior" provides possibility of option on-cbw-error=break-snapshot, which means that on failure of CBW operation we will not break guest write, but instead all further reads by NBD client will fail, which formally means: break the backup process, not guest write.
"[PATCH 0/4] block: copy-before-write: cbw-timeout" provides an option cbw-timeout, to set a timeout for CBW operations. That's very useful to avoid guest stuck.
3. Now third party backup tool can read data from NBD export
NBD_CMD_TRIM (discard) operation is supported on the export, it has the following effects:
1. discard this data from temp image, if it is stored here
2. avoid further copy-before-write operation (guest is free to rewrite corresponding data with no extra latency)
3. all further read requests from discarded areas by NBD client will fail
So, NBD client may discard regions that are already backed up to avoid extra latency for guest writes and to free disk space on the host.
Possible TODO here is to implement NBD protocol extension, that allows to READ & DISCARD in command. In this case we avoid extra command in the wire, but lose possibility of retrying the READ operation if it failed.
4. After backup is complete, we should destroy the fleecing scheme:
qmp: nbd-server-stop
qmp: blockdev-del {node-name: acc}
qmp: blockdev-replace {parent-type: qdev, qdev-id: sda, new-child: disk0}
qmp: blockdev-del {node-name: cbw}
qmp: blockdev-del {node-name: tmp}
qmp: blockdev-del {node-name: tmp-protocol}
5. If backup failed, we should remove created dirty bitmap:
qmp: block-dirty-bitmap-remove {node: disk0, name: bitmap0}
== Incremental pull backup ==
OK, now we have a bitmap called bitmap0, and want to do incremental backup, accordingly to that bitmap. In short, we want:
- create a new bitmap to continue dirty tracking for next incremental backup
- export "snapshotted" state of disk0 through NBD
- export "frozen" bitmap, so that external tool know what to copy
Mostly, all points remains the same, let's go through:
1. Create empty temporary image for fleecing -- same as for full backup, no difference
2. Initialize fleecing scheme and create dirty bitmap for future incremental backup.
qmp: transaction [
block-dirty-bitmap-add {node: disk0, name: bitmap1, persistent: true}
block-dirty-bitmap-disable {node: disk0, name: bitmap0}
blockdev-add {node-name: tmp-protocol, driver: file, filename: temp.qcow2}
blockdev-add {node-name: tmp, driver: qcow2, file: tmp-protocol}
blockdev-add {node-name: cbw, driver: copy-before-write, file: disk0, target: tmp, bitmap: {node: disk0, name: bitmap0}}
blockdev-replace {parent-type: qdev, qdev-id: sda, new-child: cbw}
blockdev-add {node-name: acc, driver: snapshot-access, file: cbw}
]
qmp: nbd-server-start {...}
qmp: block-export-add {type: nbd, node-name: acc, bitmaps: [{node: disk0, name: bitmap0}]}
3. Now third party backup tool can read data from NBD export
- Client may negotiate meta contexts, to query exported dirty bitmap by NBD_BLOCK_STATUS commend
- If client reads "not-dirty" (by bitmap0) areas, it gets an error.
- NBD_CMD_TRIM (discard) works as for full backup, no difference
4. After backup is complete, we should destroy the fleecing scheme:
- Same as for full backup
5. Next, we should handle dirty bitmaps:
5.1 Failure path
Merge-back bitmap1 to bitmap0 and continue tracking in bitmap0:
qmp: transaction [
block-dirty-bitmap-enable {node: disk0, name: bitmap0}
block-dirty-bitmap-merge {node: disk0, target: bitmap0, bitmaps: ['bitmap1']}
block-dirty-bitmap-remove {node: disk0, name: bitmap1}
]
5.2 Success path
We have two possible user scenarios on success:
5.2.1 Continue tracking for next incremental backup in bitmap1
In this case, just remove bitmap0:
qmp: block-dirty-bitmap-remove {node: disk0, name: bitmap0}
Or you may not delete bitmap0 and keep it disabled, to be reused in future for differential backup (see below).
5.2.2 Continue tracking for next incremental backup in bitmap0 (assume, we always work with one bitmap and don't want any kind of differential backups, and don't associate bitmap name with stored backups)
In this case, enable and clear bitmap0, merge bitmap1 to bitmap0 and remove bitmap1:
qmp: transaction [
block-dirty-bitmap-enable {node: disk0, name: bitmap0}
block-dirty-bitmap-clear {node: disk0, name: bitmap0}
block-dirty-bitmap-merge {node: disk0, target: bitmap0, bitmaps: ['bitmap1']}
block-dirty-bitmap-remove {node: disk0, name: bitmap1}
]
== Push backup with fleecing full/incremental ==
Reasoning: the main problem of simple push backup is that guest writes may be seriously affected by copy-on-write operations, when backup target is slow. To solve this problem, we'll use the scheme like for pull backup: we create local temporary image, which is a target for copy-before-write operations, and instead of exporting the "snapshot-access" node we start internal backup from it to the target.
So, the scheme and commands looks exactly the same as for full and incremental pull backup. The only difference is that we don't need to start nbd export, but instead we should add target node to qemu and start internal backup. And good thing is that it may be done in same transaction with initializing fleecing scheme:
qmp: transaction [
... initialize fleecing scheme for full or incremental backup ...
# Add target node. Here is qcow2 added, but it may be nbd node or something else
blockdev-add {node-name: target-protocol, driver: file, filename: target.qcow2}
blockdev-add {node-name: target, driver: qcow2, file: target-protocol}
# Start backup
blockdev-backup {device: acc, target: target, ...}
]
If it is an incremental backup, pass also bitmap parameter:
blockdev-backup {..., bitmap: bitmap0, sync: incremental, bitmap-mode: never}
Note bitmap-mode=never: this means that backup will do nothing with bitmap0, so we have same scheme like for pull backups (handle bitmaps by hand after backup). Still, push-backup scheme may be adopted to use other bitmap modes.
What we lack here is discarding in 'acc' node after successful copying of the block to the target, to safe disk space and avoid extra copy-before-write operations. It's a TODO, should be implemented like discard-source parameter for blockdev-backup.
== Differential backups ==
I'm not fan of this idea, but I think it should be described.
Assume we have already a chain of incremental backups (represented as qcow2 chain on backup storage server, for example). They corresponds to some points in time: T0, T1, T2, T3. Assume T3 is the last backup.
If we want to create usual incremental backup, it would be diff between T3 and current time (which becomes T4).
Differential backup say: I want to make backup starting from T1 to current time. What's for? Maybe T2 and T3 was removed or somehow damaged..
How to do that in Qemu: on each incremental backup you start a new bitmap, and _keep_ old one as disabled.
This way we have bitmap0 (which presents diff between T0 and T1), bitmap1 (diff T1 T2), bitmap2 (diff T2 T3), and bitmap3 which shows diff from T3 up to current time. bitmap3 is the only enabled bitmap and others are disabled.
So, to make differential backup, use block-dirty-bitmap-merge command, to merge all bitmaps you need into one, and than use it in any backup scheme.
The drawback is that all these disabled bitmaps eat RAM. Possible solution is to not keep them in RAM, it's OK to keep them in qcow2, and load only on demand. That's not realized now and that's a TODO for thous who want differential backups.
--
Best regards,
Vladimir
1 year, 11 months
[PATCH] cpu_map: Add -noMPX models for x86 Icelake Server
by Lena Voytek
Intel has removed MPX capabilities from 10nm Icelake CPUs[1], which is
reflected by the new models through the line marking mpx as removed.
The original Icelake Server models have been left alone to avoid regressions.
This adds:
-Icelake-Server-noMPX
-Icelake-Server-noTSX-noMPX
References:
[1] Memory Protection Extensions support removal
https://www.intel.com/content/www/us/en/support/articles/000059823/proces...
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1978064
https://gitlab.com/libvirt/libvirt/-/issues/304
Signed-off-by: Lena Voytek <lena.voytek(a)canonical.com>
---
src/cpu_map/index.xml | 2 +
src/cpu_map/x86_Icelake-Server-noMPX.xml | 93 +++++++++++++++++++
.../x86_Icelake-Server-noTSX-noMPX.xml | 91 ++++++++++++++++++
3 files changed, 186 insertions(+)
create mode 100644 src/cpu_map/x86_Icelake-Server-noMPX.xml
create mode 100644 src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
diff --git a/src/cpu_map/index.xml b/src/cpu_map/index.xml
index 351c2ae4fa..62c3d44a5c 100644
--- a/src/cpu_map/index.xml
+++ b/src/cpu_map/index.xml
@@ -54,6 +54,8 @@
<include filename='x86_Icelake-Client-noTSX.xml'/>
<include filename='x86_Icelake-Server.xml'/>
<include filename='x86_Icelake-Server-noTSX.xml'/>
+ <include filename='x86_Icelake-Server-noMPX.xml'/>
+ <include filename='x86_Icelake-Server-noTSX-noMPX.xml'/>
<include filename='x86_Cooperlake.xml'/>
<include filename='x86_Snowridge.xml'/>
diff --git a/src/cpu_map/x86_Icelake-Server-noMPX.xml b/src/cpu_map/x86_Icelake-Server-noMPX.xml
new file mode 100644
index 0000000000..feaf21f91f
--- /dev/null
+++ b/src/cpu_map/x86_Icelake-Server-noMPX.xml
@@ -0,0 +1,93 @@
+<cpus>
+ <model name='Icelake-Server-noMPX'>
+ <decode host='on' guest='on'/>
+ <signature family='6' model='106'/> <!-- 0606A5 -->
+ <vendor name='Intel'/>
+ <feature name='3dnowprefetch'/>
+ <feature name='abm'/>
+ <feature name='adx'/>
+ <feature name='aes'/>
+ <feature name='apic'/>
+ <feature name='arat'/>
+ <feature name='avx'/>
+ <feature name='avx2'/>
+ <feature name='avx512-vpopcntdq'/>
+ <feature name='avx512bitalg'/>
+ <feature name='avx512bw'/>
+ <feature name='avx512cd'/>
+ <feature name='avx512dq'/>
+ <feature name='avx512f'/>
+ <feature name='avx512vbmi'/>
+ <feature name='avx512vbmi2'/>
+ <feature name='avx512vl'/>
+ <feature name='avx512vnni'/>
+ <feature name='bmi1'/>
+ <feature name='bmi2'/>
+ <feature name='clflush'/>
+ <feature name='clflushopt'/>
+ <feature name='clwb'/>
+ <feature name='cmov'/>
+ <feature name='cx16'/>
+ <feature name='cx8'/>
+ <feature name='de'/>
+ <feature name='erms'/>
+ <feature name='f16c'/>
+ <feature name='fma'/>
+ <feature name='fpu'/>
+ <feature name='fsgsbase'/>
+ <feature name='fxsr'/>
+ <feature name='gfni'/>
+ <feature name='hle'/>
+ <feature name='intel-pt' removed='yes'/>
+ <feature name='invpcid'/>
+ <feature name='la57'/>
+ <feature name='lahf_lm'/>
+ <feature name='lm'/>
+ <feature name='mca'/>
+ <feature name='mce'/>
+ <feature name='mmx'/>
+ <feature name='movbe'/>
+ <feature name='mpx' removed='yes'/>
+ <feature name='msr'/>
+ <feature name='mtrr'/>
+ <feature name='nx'/>
+ <feature name='pae'/>
+ <feature name='pat'/>
+ <feature name='pcid'/>
+ <feature name='pclmuldq'/>
+ <feature name='pdpe1gb'/>
+ <feature name='pge'/>
+ <feature name='pku'/>
+ <feature name='pni'/>
+ <feature name='popcnt'/>
+ <feature name='pse'/>
+ <feature name='pse36'/>
+ <feature name='rdrand'/>
+ <feature name='rdseed'/>
+ <feature name='rdtscp'/>
+ <feature name='rtm'/>
+ <feature name='sep'/>
+ <feature name='smap'/>
+ <feature name='smep'/>
+ <feature name='spec-ctrl'/>
+ <feature name='ssbd'/>
+ <feature name='sse'/>
+ <feature name='sse2'/>
+ <feature name='sse4.1'/>
+ <feature name='sse4.2'/>
+ <feature name='ssse3'/>
+ <feature name='syscall'/>
+ <feature name='tsc'/>
+ <feature name='tsc-deadline'/>
+ <feature name='umip'/>
+ <feature name='vaes'/>
+ <feature name='vme'/>
+ <feature name='vpclmulqdq'/>
+ <feature name='wbnoinvd'/>
+ <feature name='x2apic'/>
+ <feature name='xgetbv1'/>
+ <feature name='xsave'/>
+ <feature name='xsavec'/>
+ <feature name='xsaveopt'/>
+ </model>
+</cpus>
diff --git a/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml b/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
new file mode 100644
index 0000000000..e55da83ddf
--- /dev/null
+++ b/src/cpu_map/x86_Icelake-Server-noTSX-noMPX.xml
@@ -0,0 +1,91 @@
+<cpus>
+ <model name='Icelake-Server-noTSX-noMPX'>
+ <decode host='on' guest='off'/>
+ <signature family='6' model='106'/> <!-- 0606A5 -->
+ <vendor name='Intel'/>
+ <feature name='3dnowprefetch'/>
+ <feature name='abm'/>
+ <feature name='adx'/>
+ <feature name='aes'/>
+ <feature name='apic'/>
+ <feature name='arat'/>
+ <feature name='avx'/>
+ <feature name='avx2'/>
+ <feature name='avx512-vpopcntdq'/>
+ <feature name='avx512bitalg'/>
+ <feature name='avx512bw'/>
+ <feature name='avx512cd'/>
+ <feature name='avx512dq'/>
+ <feature name='avx512f'/>
+ <feature name='avx512vbmi'/>
+ <feature name='avx512vbmi2'/>
+ <feature name='avx512vl'/>
+ <feature name='avx512vnni'/>
+ <feature name='bmi1'/>
+ <feature name='bmi2'/>
+ <feature name='clflush'/>
+ <feature name='clflushopt'/>
+ <feature name='clwb'/>
+ <feature name='cmov'/>
+ <feature name='cx16'/>
+ <feature name='cx8'/>
+ <feature name='de'/>
+ <feature name='erms'/>
+ <feature name='f16c'/>
+ <feature name='fma'/>
+ <feature name='fpu'/>
+ <feature name='fsgsbase'/>
+ <feature name='fxsr'/>
+ <feature name='gfni'/>
+ <feature name='intel-pt' removed='yes'/>
+ <feature name='invpcid'/>
+ <feature name='la57'/>
+ <feature name='lahf_lm'/>
+ <feature name='lm'/>
+ <feature name='mca'/>
+ <feature name='mce'/>
+ <feature name='mmx'/>
+ <feature name='movbe'/>
+ <feature name='mpx' removed='yes'/>
+ <feature name='msr'/>
+ <feature name='mtrr'/>
+ <feature name='nx'/>
+ <feature name='pae'/>
+ <feature name='pat'/>
+ <feature name='pcid'/>
+ <feature name='pclmuldq'/>
+ <feature name='pdpe1gb'/>
+ <feature name='pge'/>
+ <feature name='pku'/>
+ <feature name='pni'/>
+ <feature name='popcnt'/>
+ <feature name='pse'/>
+ <feature name='pse36'/>
+ <feature name='rdrand'/>
+ <feature name='rdseed'/>
+ <feature name='rdtscp'/>
+ <feature name='sep'/>
+ <feature name='smap'/>
+ <feature name='smep'/>
+ <feature name='spec-ctrl'/>
+ <feature name='ssbd'/>
+ <feature name='sse'/>
+ <feature name='sse2'/>
+ <feature name='sse4.1'/>
+ <feature name='sse4.2'/>
+ <feature name='ssse3'/>
+ <feature name='syscall'/>
+ <feature name='tsc'/>
+ <feature name='tsc-deadline'/>
+ <feature name='umip'/>
+ <feature name='vaes'/>
+ <feature name='vme'/>
+ <feature name='vpclmulqdq'/>
+ <feature name='wbnoinvd'/>
+ <feature name='x2apic'/>
+ <feature name='xgetbv1'/>
+ <feature name='xsave'/>
+ <feature name='xsavec'/>
+ <feature name='xsaveopt'/>
+ </model>
+</cpus>
--
2.34.1
2 years