On Mon, Jul 03, 2023 at 02:30:28PM +0800, Lin Ma wrote:
VMX(kernel v6.0) supports Notification VM exit feature under commit
2f4073e0.
QEMU supports it as well since v7.2 under commit e2e69f6b.
Add this feature into libvirt now.
An example of Domain XML snippet to configure this feature:
<features>
<kvm>
<notify-vmexit state='on' mode='run'
notify-window='16384'/>
</kvm>
</features>
IIUC this setting is intended to fix a CVE, but it is opt-in so
everything remains vulnerable until all mgmt apps are udated to
add this. This is already off to a bad start, but lets suppose
we do want to update every single app to add this XML...
Is '16384' a good default value for notify-window ? If so why
hasn't QEMU just set this as the global default ? Is there
some downside to setting this that makes it impossible to just
"do the right thing" in QEMU ?
The original QEMU commit message isn't very enlightening about
how this should actually be used in practice.
I'm unenthusiastic about exposing settings like this from libvirt
unless there is credible guidance / documentation that makes it
possible for apps to follow a plan that's more than just guesswork.
Otherwise this just feels like a feature tickbox.
Signed-off-by: Lin Ma <lma(a)suse.de>
---
docs/formatdomain.rst | 4 +++
src/conf/domain_conf.c | 32 +++++++++++++++++++++
src/conf/domain_conf.h | 7 +++++
src/conf/schemas/domaincommon.rng | 17 +++++++++++
src/qemu/qemu_command.c | 3 ++
tests/qemuxml2argvdata/kvm-features-off.xml | 1 +
tests/qemuxml2argvdata/kvm-features.xml | 1 +
7 files changed, 65 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index f29449f749..452ee17367 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -1976,6 +1976,7 @@ Hypervisors may allow certain CPU / machine features to be toggled
on/off.
<poll-control state='on'/>
<pv-ipi state='off'/>
<dirty-ring state='on' size='4096'/>
+ <notify-vmexit state='on' mode='run'
notify-window='16384'/>
</kvm>
<xen>
<e820_host state='on'/>
@@ -2088,6 +2089,9 @@ are:
poll-control Decrease IO completion latency by introducing a grace period of busy
waiting on, off :since:`6.10.0 (QEMU 4.2)`
pv-ipi Paravirtualized send IPIs
on, off :since:`7.10.0 (QEMU 3.1)`
dirty-ring Enable dirty ring feature
on, off; size - must be power of 2, range [1024,65536] :since:`8.0.0 (QEMU 6.1)`
+ notify-vmexit Enable notification VM exit(x86 only) with attribute mode(accepted
values on, off :since:`9.5.0 (QEMU
7.2)`
+ are 'run', 'internal-error' and 'disable') and
optional attribute
+ notify-window (accepted numbered starting from 0)
==============
============================================================================
====================================================== ============================
``xen``
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 4121b6a054..c00d50d43d 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -216,6 +216,7 @@ VIR_ENUM_IMPL(virDomainKVM,
"poll-control",
"pv-ipi",
"dirty-ring",
+ "notify-vmexit",
);
VIR_ENUM_IMPL(virDomainXen,
@@ -16349,6 +16350,18 @@ virDomainFeaturesKVMDefParse(virDomainDef *def,
}
}
+ if (feature == VIR_DOMAIN_KVM_NOTIFY_VMEXIT &&
+ value == VIR_TRISTATE_SWITCH_ON) {
+
+ if (!(kvm->notify_vmexit.mode = virXMLPropStringRequired(node,
"mode"))) {
+ return -1;
+ }
+ if (virXMLPropUInt(node, "notify-window", 0, VIR_XML_PROP_NONE,
+ &kvm->notify_vmexit.notify_window) < 0) {
+ return -1;
+ }
+ }
+
node = xmlNextElementSibling(node);
}
@@ -20738,6 +20751,7 @@ virDomainDefFeaturesCheckABIStability(virDomainDef *src,
case VIR_DOMAIN_KVM_POLLCONTROL:
case VIR_DOMAIN_KVM_PVIPI:
case VIR_DOMAIN_KVM_DIRTY_RING:
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT:
if (src->kvm_features->features[i] !=
dst->kvm_features->features[i]) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_("State of KVM feature '%1$s' differs:
source: '%2$s', destination: '%3$s'"),
@@ -27202,6 +27216,24 @@ virDomainDefFormatFeatures(virBuffer *buf,
}
break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT:
+ if (def->kvm_features->features[j] !=
VIR_TRISTATE_SWITCH_ABSENT) {
+ virBufferAsprintf(&childBuf, "<%s
state='%s'",
+ virDomainKVMTypeToString(j),
+
virTristateSwitchTypeToString(def->kvm_features->features[j]));
+ if (def->kvm_features->notify_vmexit.mode != NULL) {
+ virBufferAsprintf(&childBuf, "
mode='%s'",
+
def->kvm_features->notify_vmexit.mode);
+ if (def->kvm_features->notify_vmexit.notify_window
&&
+ STRNEQ(def->kvm_features->notify_vmexit.mode,
"disable")) {
+ virBufferAsprintf(&childBuf, "
notify-window='%u'",
+
def->kvm_features->notify_vmexit.notify_window);
+ }
+ }
+ virBufferAddLit(&childBuf, "/>\n");
+ }
+ break;
+
case VIR_DOMAIN_KVM_LAST:
break;
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index cddaa3824d..5957f66b79 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2202,6 +2202,7 @@ typedef enum {
VIR_DOMAIN_KVM_POLLCONTROL,
VIR_DOMAIN_KVM_PVIPI,
VIR_DOMAIN_KVM_DIRTY_RING,
+ VIR_DOMAIN_KVM_NOTIFY_VMEXIT,
VIR_DOMAIN_KVM_LAST
} virDomainKVM;
@@ -2389,6 +2390,12 @@ struct _virDomainFeatureKVM {
int features[VIR_DOMAIN_KVM_LAST];
unsigned int dirty_ring_size; /* size of dirty ring for each vCPU, no units */
+ struct {
+ char *mode; /* option of notify vmexit */
+ unsigned int notify_window; /* A specified amount of time to generate
+ a VM exit if no interrupt windows occur
+ in VMX non-root operation */
+ } notify_vmexit;
};
typedef struct _virDomainFeatureTCG virDomainFeatureTCG;
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index fcf9e00600..84ebd2b03c 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -7754,6 +7754,23 @@
</optional>
</element>
</optional>
+ <optional>
+ <element name="notify-vmexit">
+ <ref name="featurestate"/>
+ <optional>
+ <attribute name="mode">
+ <data type="string">
+ <param
name="pattern">(run|internal-error|disable)</param>
+ </data>
+ </attribute>
+ </optional>
+ <optional>
+ <attribute name="notify-window">
+ <data type="unsignedInt"/>
+ </attribute>
+ </optional>
+ </element>
+ </optional>
</interleave>
</element>
</define>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index cde6ab4dde..656cf55c1e 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -6475,6 +6475,9 @@ qemuBuildCpuCommandLine(virCommand *cmd,
case VIR_DOMAIN_KVM_DIRTY_RING:
break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT:
+ break;
+
case VIR_DOMAIN_KVM_LAST:
break;
}
diff --git a/tests/qemuxml2argvdata/kvm-features-off.xml
b/tests/qemuxml2argvdata/kvm-features-off.xml
index 7ee6525cd9..d51a8324e0 100644
--- a/tests/qemuxml2argvdata/kvm-features-off.xml
+++ b/tests/qemuxml2argvdata/kvm-features-off.xml
@@ -16,6 +16,7 @@
<poll-control state='off'/>
<pv-ipi state='off'/>
<dirty-ring state='off'/>
+ <notify-vmexit state='off'/>
</kvm>
</features>
<cpu mode='host-passthrough' check='none'
migratable='off'/>
diff --git a/tests/qemuxml2argvdata/kvm-features.xml
b/tests/qemuxml2argvdata/kvm-features.xml
index 8ce3a2b987..c2d4ecf4d1 100644
--- a/tests/qemuxml2argvdata/kvm-features.xml
+++ b/tests/qemuxml2argvdata/kvm-features.xml
@@ -16,6 +16,7 @@
<poll-control state='on'/>
<pv-ipi state='on'/>
<dirty-ring state='on' size='4096'/>
+ <notify-vmexit state='on' mode='run'
notify-window='16384'/>
</kvm>
</features>
<cpu mode='host-passthrough' check='none'
migratable='off'/>
--
2.41.0
With regards,
Daniel
--
|:
https://berrange.com -o-
https://www.flickr.com/photos/dberrange :|
|:
https://libvirt.org -o-
https://fstop138.berrange.com :|
|:
https://entangle-photo.org -o-
https://www.instagram.com/dberrange :|