[libvirt] [PATCH RESEND 0/4] Introduce Notification VM exit feature (kvm x86 only)

Kernel supports Notification VM exit feature since v6.0 under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b. Lin Ma (4): conf: Introduce notify VM exit feature qemu: Validate notify VM exit feature is available only on x86 qemu: Generate command line for notify VM exit feature NEWS: Document notify VM exit feature NEWS.rst | 8 ++++++ docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 23 +++++++++++++-- src/qemu/qemu_validate.c | 12 +++++++- tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.args | 2 +- tests/qemuxml2argvdata/kvm-features.xml | 1 + 10 files changed, 102 insertions(+), 5 deletions(-) -- 2.41.0

VMX(kernel v6.0) supports Notification VM exit feature under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b. Add this feature into libvirt now. An example of Domain XML snippet to configure this feature: <features> <kvm> <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> Signed-off-by: Lin Ma <lma@suse.de> --- docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 3 ++ tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.xml | 1 + 7 files changed, 65 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index f29449f749..452ee17367 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1976,6 +1976,7 @@ Hypervisors may allow certain CPU / machine features to be toggled on/off. <poll-control state='on'/> <pv-ipi state='off'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> <xen> <e820_host state='on'/> @@ -2088,6 +2089,9 @@ are: poll-control Decrease IO completion latency by introducing a grace period of busy waiting on, off :since:`6.10.0 (QEMU 4.2)` pv-ipi Paravirtualized send IPIs on, off :since:`7.10.0 (QEMU 3.1)` dirty-ring Enable dirty ring feature on, off; size - must be power of 2, range [1024,65536] :since:`8.0.0 (QEMU 6.1)` + notify-vmexit Enable notification VM exit(x86 only) with attribute mode(accepted values on, off :since:`9.5.0 (QEMU 7.2)` + are 'run', 'internal-error' and 'disable') and optional attribute + notify-window (accepted numbered starting from 0) ============== ============================================================================ ====================================================== ============================ ``xen`` diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4121b6a054..c00d50d43d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -216,6 +216,7 @@ VIR_ENUM_IMPL(virDomainKVM, "poll-control", "pv-ipi", "dirty-ring", + "notify-vmexit", ); VIR_ENUM_IMPL(virDomainXen, @@ -16349,6 +16350,18 @@ virDomainFeaturesKVMDefParse(virDomainDef *def, } } + if (feature == VIR_DOMAIN_KVM_NOTIFY_VMEXIT && + value == VIR_TRISTATE_SWITCH_ON) { + + if (!(kvm->notify_vmexit.mode = virXMLPropStringRequired(node, "mode"))) { + return -1; + } + if (virXMLPropUInt(node, "notify-window", 0, VIR_XML_PROP_NONE, + &kvm->notify_vmexit.notify_window) < 0) { + return -1; + } + } + node = xmlNextElementSibling(node); } @@ -20738,6 +20751,7 @@ virDomainDefFeaturesCheckABIStability(virDomainDef *src, case VIR_DOMAIN_KVM_POLLCONTROL: case VIR_DOMAIN_KVM_PVIPI: case VIR_DOMAIN_KVM_DIRTY_RING: + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: if (src->kvm_features->features[i] != dst->kvm_features->features[i]) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("State of KVM feature '%1$s' differs: source: '%2$s', destination: '%3$s'"), @@ -27202,6 +27216,24 @@ virDomainDefFormatFeatures(virBuffer *buf, } break; + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + if (def->kvm_features->features[j] != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&childBuf, "<%s state='%s'", + virDomainKVMTypeToString(j), + virTristateSwitchTypeToString(def->kvm_features->features[j])); + if (def->kvm_features->notify_vmexit.mode != NULL) { + virBufferAsprintf(&childBuf, " mode='%s'", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&childBuf, " notify-window='%u'", + def->kvm_features->notify_vmexit.notify_window); + } + } + virBufferAddLit(&childBuf, "/>\n"); + } + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index cddaa3824d..5957f66b79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2202,6 +2202,7 @@ typedef enum { VIR_DOMAIN_KVM_POLLCONTROL, VIR_DOMAIN_KVM_PVIPI, VIR_DOMAIN_KVM_DIRTY_RING, + VIR_DOMAIN_KVM_NOTIFY_VMEXIT, VIR_DOMAIN_KVM_LAST } virDomainKVM; @@ -2389,6 +2390,12 @@ struct _virDomainFeatureKVM { int features[VIR_DOMAIN_KVM_LAST]; unsigned int dirty_ring_size; /* size of dirty ring for each vCPU, no units */ + struct { + char *mode; /* option of notify vmexit */ + unsigned int notify_window; /* A specified amount of time to generate + a VM exit if no interrupt windows occur + in VMX non-root operation */ + } notify_vmexit; }; typedef struct _virDomainFeatureTCG virDomainFeatureTCG; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index fcf9e00600..84ebd2b03c 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -7754,6 +7754,23 @@ </optional> </element> </optional> + <optional> + <element name="notify-vmexit"> + <ref name="featurestate"/> + <optional> + <attribute name="mode"> + <data type="string"> + <param name="pattern">(run|internal-error|disable)</param> + </data> + </attribute> + </optional> + <optional> + <attribute name="notify-window"> + <data type="unsignedInt"/> + </attribute> + </optional> + </element> + </optional> </interleave> </element> </define> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cde6ab4dde..656cf55c1e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6475,6 +6475,9 @@ qemuBuildCpuCommandLine(virCommand *cmd, case VIR_DOMAIN_KVM_DIRTY_RING: break; + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/tests/qemuxml2argvdata/kvm-features-off.xml b/tests/qemuxml2argvdata/kvm-features-off.xml index 7ee6525cd9..d51a8324e0 100644 --- a/tests/qemuxml2argvdata/kvm-features-off.xml +++ b/tests/qemuxml2argvdata/kvm-features-off.xml @@ -16,6 +16,7 @@ <poll-control state='off'/> <pv-ipi state='off'/> <dirty-ring state='off'/> + <notify-vmexit state='off'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> diff --git a/tests/qemuxml2argvdata/kvm-features.xml b/tests/qemuxml2argvdata/kvm-features.xml index 8ce3a2b987..c2d4ecf4d1 100644 --- a/tests/qemuxml2argvdata/kvm-features.xml +++ b/tests/qemuxml2argvdata/kvm-features.xml @@ -16,6 +16,7 @@ <poll-control state='on'/> <pv-ipi state='on'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> -- 2.41.0

On 7/3/23 1:30 AM, Lin Ma wrote:
VMX(kernel v6.0) supports Notification VM exit feature under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b.
Add this feature into libvirt now.
An example of Domain XML snippet to configure this feature: <features> <kvm> <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features>
Signed-off-by: Lin Ma <lma@suse.de> --- docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 3 ++ tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.xml | 1 + 7 files changed, 65 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index f29449f749..452ee17367 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1976,6 +1976,7 @@ Hypervisors may allow certain CPU / machine features to be toggled on/off. <poll-control state='on'/> <pv-ipi state='off'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> <xen> <e820_host state='on'/> @@ -2088,6 +2089,9 @@ are: poll-control Decrease IO completion latency by introducing a grace period of busy waiting on, off :since:`6.10.0 (QEMU 4.2)` pv-ipi Paravirtualized send IPIs on, off :since:`7.10.0 (QEMU 3.1)` dirty-ring Enable dirty ring feature on, off; size - must be power of 2, range [1024,65536] :since:`8.0.0 (QEMU 6.1)` + notify-vmexit Enable notification VM exit(x86 only) with attribute mode(accepted values on, off :since:`9.5.0 (QEMU 7.2)` + are 'run', 'internal-error' and 'disable') and optional attribute + notify-window (accepted numbered starting from 0) ============== ============================================================================ ====================================================== ============================
Patch 4/4 includes some discussion about the purpose of this feature, but it only adds it to the NEWS file. In my opinion, this information should also be in the documentation itself. For example, a description of the purpose of the feature, a brief discussion of what 'run', 'internal-error', 'disable' values mean, what the 'notify-window' value means, etc. Otherwise a user will have to go searching elsewhere for how to properly configure it. Also, the possible values for the attributes should probably go in the 3rd column similar to the other features above.
``xen`` diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4121b6a054..c00d50d43d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -216,6 +216,7 @@ VIR_ENUM_IMPL(virDomainKVM, "poll-control", "pv-ipi", "dirty-ring", + "notify-vmexit", );
VIR_ENUM_IMPL(virDomainXen, @@ -16349,6 +16350,18 @@ virDomainFeaturesKVMDefParse(virDomainDef *def, } }
+ if (feature == VIR_DOMAIN_KVM_NOTIFY_VMEXIT && + value == VIR_TRISTATE_SWITCH_ON) { + + if (!(kvm->notify_vmexit.mode = virXMLPropStringRequired(node, "mode"))) { + return -1; + } + if (virXMLPropUInt(node, "notify-window", 0, VIR_XML_PROP_NONE, + &kvm->notify_vmexit.notify_window) < 0) { + return -1; + } + } + node = xmlNextElementSibling(node); }
@@ -20738,6 +20751,7 @@ virDomainDefFeaturesCheckABIStability(virDomainDef *src, case VIR_DOMAIN_KVM_POLLCONTROL: case VIR_DOMAIN_KVM_PVIPI: case VIR_DOMAIN_KVM_DIRTY_RING: + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: if (src->kvm_features->features[i] != dst->kvm_features->features[i]) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("State of KVM feature '%1$s' differs: source: '%2$s', destination: '%3$s'"), @@ -27202,6 +27216,24 @@ virDomainDefFormatFeatures(virBuffer *buf, } break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + if (def->kvm_features->features[j] != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&childBuf, "<%s state='%s'", + virDomainKVMTypeToString(j), + virTristateSwitchTypeToString(def->kvm_features->features[j])); + if (def->kvm_features->notify_vmexit.mode != NULL) { + virBufferAsprintf(&childBuf, " mode='%s'", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&childBuf, " notify-window='%u'", + def->kvm_features->notify_vmexit.notify_window); + } + } + virBufferAddLit(&childBuf, "/>\n"); + } + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index cddaa3824d..5957f66b79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2202,6 +2202,7 @@ typedef enum { VIR_DOMAIN_KVM_POLLCONTROL, VIR_DOMAIN_KVM_PVIPI, VIR_DOMAIN_KVM_DIRTY_RING, + VIR_DOMAIN_KVM_NOTIFY_VMEXIT,
VIR_DOMAIN_KVM_LAST } virDomainKVM; @@ -2389,6 +2390,12 @@ struct _virDomainFeatureKVM { int features[VIR_DOMAIN_KVM_LAST];
unsigned int dirty_ring_size; /* size of dirty ring for each vCPU, no units */ + struct { + char *mode; /* option of notify vmexit */ + unsigned int notify_window; /* A specified amount of time to generate + a VM exit if no interrupt windows occur + in VMX non-root operation */ + } notify_vmexit; };
typedef struct _virDomainFeatureTCG virDomainFeatureTCG; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index fcf9e00600..84ebd2b03c 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -7754,6 +7754,23 @@ </optional> </element> </optional> + <optional> + <element name="notify-vmexit"> + <ref name="featurestate"/> + <optional> + <attribute name="mode"> + <data type="string"> + <param name="pattern">(run|internal-error|disable)</param> + </data> + </attribute> + </optional> + <optional> + <attribute name="notify-window"> + <data type="unsignedInt"/> + </attribute> + </optional> + </element> + </optional> </interleave> </element> </define>
I'm being a bit picky here, but, as far as I can tell, the following configurations are valid and don't produce any errors, but are likely to be a bit confusing to a user. Is there any way to make this configuration a bit nicer? state=off, mode=run: <kvm> <notify-vmexit state='off' mode='run' notify-window='16384'/> </kvm> state=on, mode=disable: <kvm> <notify-vmexit state='on' mode='disable'/> </kvm>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cde6ab4dde..656cf55c1e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6475,6 +6475,9 @@ qemuBuildCpuCommandLine(virCommand *cmd, case VIR_DOMAIN_KVM_DIRTY_RING: break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/tests/qemuxml2argvdata/kvm-features-off.xml b/tests/qemuxml2argvdata/kvm-features-off.xml index 7ee6525cd9..d51a8324e0 100644 --- a/tests/qemuxml2argvdata/kvm-features-off.xml +++ b/tests/qemuxml2argvdata/kvm-features-off.xml @@ -16,6 +16,7 @@ <poll-control state='off'/> <pv-ipi state='off'/> <dirty-ring state='off'/> + <notify-vmexit state='off'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> diff --git a/tests/qemuxml2argvdata/kvm-features.xml b/tests/qemuxml2argvdata/kvm-features.xml index 8ce3a2b987..c2d4ecf4d1 100644 --- a/tests/qemuxml2argvdata/kvm-features.xml +++ b/tests/qemuxml2argvdata/kvm-features.xml @@ -16,6 +16,7 @@ <poll-control state='on'/> <pv-ipi state='on'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/>

On Mon, Jul 03, 2023 at 02:30:28PM +0800, Lin Ma wrote:
VMX(kernel v6.0) supports Notification VM exit feature under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b.
Add this feature into libvirt now.
An example of Domain XML snippet to configure this feature: <features> <kvm> <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features>
IIUC this setting is intended to fix a CVE, but it is opt-in so everything remains vulnerable until all mgmt apps are udated to add this. This is already off to a bad start, but lets suppose we do want to update every single app to add this XML... Is '16384' a good default value for notify-window ? If so why hasn't QEMU just set this as the global default ? Is there some downside to setting this that makes it impossible to just "do the right thing" in QEMU ? The original QEMU commit message isn't very enlightening about how this should actually be used in practice. I'm unenthusiastic about exposing settings like this from libvirt unless there is credible guidance / documentation that makes it possible for apps to follow a plan that's more than just guesswork. Otherwise this just feels like a feature tickbox.
Signed-off-by: Lin Ma <lma@suse.de> --- docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 3 ++ tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.xml | 1 + 7 files changed, 65 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index f29449f749..452ee17367 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1976,6 +1976,7 @@ Hypervisors may allow certain CPU / machine features to be toggled on/off. <poll-control state='on'/> <pv-ipi state='off'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> <xen> <e820_host state='on'/> @@ -2088,6 +2089,9 @@ are: poll-control Decrease IO completion latency by introducing a grace period of busy waiting on, off :since:`6.10.0 (QEMU 4.2)` pv-ipi Paravirtualized send IPIs on, off :since:`7.10.0 (QEMU 3.1)` dirty-ring Enable dirty ring feature on, off; size - must be power of 2, range [1024,65536] :since:`8.0.0 (QEMU 6.1)` + notify-vmexit Enable notification VM exit(x86 only) with attribute mode(accepted values on, off :since:`9.5.0 (QEMU 7.2)` + are 'run', 'internal-error' and 'disable') and optional attribute + notify-window (accepted numbered starting from 0) ============== ============================================================================ ====================================================== ============================
``xen`` diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4121b6a054..c00d50d43d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -216,6 +216,7 @@ VIR_ENUM_IMPL(virDomainKVM, "poll-control", "pv-ipi", "dirty-ring", + "notify-vmexit", );
VIR_ENUM_IMPL(virDomainXen, @@ -16349,6 +16350,18 @@ virDomainFeaturesKVMDefParse(virDomainDef *def, } }
+ if (feature == VIR_DOMAIN_KVM_NOTIFY_VMEXIT && + value == VIR_TRISTATE_SWITCH_ON) { + + if (!(kvm->notify_vmexit.mode = virXMLPropStringRequired(node, "mode"))) { + return -1; + } + if (virXMLPropUInt(node, "notify-window", 0, VIR_XML_PROP_NONE, + &kvm->notify_vmexit.notify_window) < 0) { + return -1; + } + } + node = xmlNextElementSibling(node); }
@@ -20738,6 +20751,7 @@ virDomainDefFeaturesCheckABIStability(virDomainDef *src, case VIR_DOMAIN_KVM_POLLCONTROL: case VIR_DOMAIN_KVM_PVIPI: case VIR_DOMAIN_KVM_DIRTY_RING: + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: if (src->kvm_features->features[i] != dst->kvm_features->features[i]) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("State of KVM feature '%1$s' differs: source: '%2$s', destination: '%3$s'"), @@ -27202,6 +27216,24 @@ virDomainDefFormatFeatures(virBuffer *buf, } break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + if (def->kvm_features->features[j] != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&childBuf, "<%s state='%s'", + virDomainKVMTypeToString(j), + virTristateSwitchTypeToString(def->kvm_features->features[j])); + if (def->kvm_features->notify_vmexit.mode != NULL) { + virBufferAsprintf(&childBuf, " mode='%s'", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&childBuf, " notify-window='%u'", + def->kvm_features->notify_vmexit.notify_window); + } + } + virBufferAddLit(&childBuf, "/>\n"); + } + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index cddaa3824d..5957f66b79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2202,6 +2202,7 @@ typedef enum { VIR_DOMAIN_KVM_POLLCONTROL, VIR_DOMAIN_KVM_PVIPI, VIR_DOMAIN_KVM_DIRTY_RING, + VIR_DOMAIN_KVM_NOTIFY_VMEXIT,
VIR_DOMAIN_KVM_LAST } virDomainKVM; @@ -2389,6 +2390,12 @@ struct _virDomainFeatureKVM { int features[VIR_DOMAIN_KVM_LAST];
unsigned int dirty_ring_size; /* size of dirty ring for each vCPU, no units */ + struct { + char *mode; /* option of notify vmexit */ + unsigned int notify_window; /* A specified amount of time to generate + a VM exit if no interrupt windows occur + in VMX non-root operation */ + } notify_vmexit; };
typedef struct _virDomainFeatureTCG virDomainFeatureTCG; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index fcf9e00600..84ebd2b03c 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -7754,6 +7754,23 @@ </optional> </element> </optional> + <optional> + <element name="notify-vmexit"> + <ref name="featurestate"/> + <optional> + <attribute name="mode"> + <data type="string"> + <param name="pattern">(run|internal-error|disable)</param> + </data> + </attribute> + </optional> + <optional> + <attribute name="notify-window"> + <data type="unsignedInt"/> + </attribute> + </optional> + </element> + </optional> </interleave> </element> </define> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cde6ab4dde..656cf55c1e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6475,6 +6475,9 @@ qemuBuildCpuCommandLine(virCommand *cmd, case VIR_DOMAIN_KVM_DIRTY_RING: break;
+ case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/tests/qemuxml2argvdata/kvm-features-off.xml b/tests/qemuxml2argvdata/kvm-features-off.xml index 7ee6525cd9..d51a8324e0 100644 --- a/tests/qemuxml2argvdata/kvm-features-off.xml +++ b/tests/qemuxml2argvdata/kvm-features-off.xml @@ -16,6 +16,7 @@ <poll-control state='off'/> <pv-ipi state='off'/> <dirty-ring state='off'/> + <notify-vmexit state='off'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> diff --git a/tests/qemuxml2argvdata/kvm-features.xml b/tests/qemuxml2argvdata/kvm-features.xml index 8ce3a2b987..c2d4ecf4d1 100644 --- a/tests/qemuxml2argvdata/kvm-features.xml +++ b/tests/qemuxml2argvdata/kvm-features.xml @@ -16,6 +16,7 @@ <poll-control state='on'/> <pv-ipi state='on'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> -- 2.41.0
With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

在 2023-07-06 00:32,Daniel P. Berrangé 写道:
On Mon, Jul 03, 2023 at 02:30:28PM +0800, Lin Ma wrote:
VMX(kernel v6.0) supports Notification VM exit feature under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b.
Add this feature into libvirt now.
An example of Domain XML snippet to configure this feature: <features> <kvm> <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features>
IIUC this setting is intended to fix a CVE, but it is opt-in so everything remains vulnerable until all mgmt apps are udated to add this. This is already off to a bad start, but lets suppose we do want to update every single app to add this XML...
Is '16384' a good default value for notify-window ? If so why hasn't QEMU just set this as the global default ? Is there some downside to setting this that makes it impossible to just "do the right thing" in QEMU ?
The original QEMU commit message isn't very enlightening about how this should actually be used in practice.
I'm unenthusiastic about exposing settings like this from libvirt unless there is credible guidance / documentation that makes it possible for apps to follow a plan that's more than just guesswork. Otherwise this just feels like a feature tickbox.
Sorry for the late response. I used to try to figure out such a reliable guidance / documentation, But found nothing. The '16384' is just a guesswork and an example, By far I havn't figure out the internal hardware threshold of the notify-window due to no notify-vmexit capable processors in my hand. Your opinion and concern make sense,Let's keep the current situation, Not expose it to management software. Thank you very much for the comments and the review! Lin

Signed-off-by: Lin Ma <lma@suse.de> --- src/qemu/qemu_validate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index a53729d349..6ec5af0028 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -219,8 +219,18 @@ qemuValidateDomainDefFeatures(const virDomainDef *def, } break; - case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_KVM: + if (def->kvm_features) { + if (def->kvm_features->features[VIR_DOMAIN_KVM_NOTIFY_VMEXIT] != VIR_TRISTATE_SWITCH_ABSENT && + !ARCH_IS_X86(def->os.arch)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Notification VM exit is only supported on x86 architecture")); + return -1; + } + } + break; + + case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_XEN: case VIR_DOMAIN_FEATURE_ACPI: case VIR_DOMAIN_FEATURE_PAE: -- 2.41.0

On 7/3/23 1:30 AM, Lin Ma wrote:
Signed-off-by: Lin Ma <lma@suse.de> --- src/qemu/qemu_validate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index a53729d349..6ec5af0028 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -219,8 +219,18 @@ qemuValidateDomainDefFeatures(const virDomainDef *def, } break;
- case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_KVM: + if (def->kvm_features) { + if (def->kvm_features->features[VIR_DOMAIN_KVM_NOTIFY_VMEXIT] != VIR_TRISTATE_SWITCH_ABSENT && + !ARCH_IS_X86(def->os.arch)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Notification VM exit is only supported on x86 architecture")); + return -1; + } + } + break; + + case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_XEN: case VIR_DOMAIN_FEATURE_ACPI: case VIR_DOMAIN_FEATURE_PAE:
This doesn't seem sufficient. If this was only added in qemu 7.2, an architecture check doesn't seem sufficient to decide whether to enable this feature. For example, on Fedora 37 (qemu 7.0.0), if I configure a domain to use this option, it doesn't give me a configuration error, but (predictably) gives an error when trying to start the domain: error: internal error: process exited while connecting to monitor: 2023-07-05T15:58:43.783724Z qemu-system-x86_64: -accel kvm,notify-vmexit=disable: Property 'kvm-accel.notify-vmexit' not found Ideally we would check that the specific qemu binary supports this option. We generally use capabilities for this. Jonathon

Signed-off-by: Lin Ma <lma@suse.de> --- src/qemu/qemu_command.c | 20 +++++++++++++++++--- tests/qemuxml2argvdata/kvm-features.args | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 656cf55c1e..0b72aed72a 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7067,9 +7067,23 @@ qemuBuildAccelCommandLine(virCommand *cmd, * not that either kvm or tcg can be specified by libvirt * so do not worry about the conflict of specifying both * */ - if (def->features[VIR_DOMAIN_FEATURE_KVM] == VIR_TRISTATE_SWITCH_ON && - def->kvm_features->features[VIR_DOMAIN_KVM_DIRTY_RING] == VIR_TRISTATE_SWITCH_ON) { - virBufferAsprintf(&buf, ",dirty-ring-size=%d", def->kvm_features->dirty_ring_size); + if (def->features[VIR_DOMAIN_FEATURE_KVM] == VIR_TRISTATE_SWITCH_ON) { + if (def->kvm_features->features[VIR_DOMAIN_KVM_DIRTY_RING] == + VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",dirty-ring-size=%d", + def->kvm_features->dirty_ring_size); + } + + if (def->kvm_features->features[VIR_DOMAIN_KVM_NOTIFY_VMEXIT] == + VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",notify-vmexit=%s", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&buf, ",notify-window=%u", + def->kvm_features->notify_vmexit.notify_window); + } + } } break; diff --git a/tests/qemuxml2argvdata/kvm-features.args b/tests/qemuxml2argvdata/kvm-features.args index 9c8236c210..36b7eb3557 100644 --- a/tests/qemuxml2argvdata/kvm-features.args +++ b/tests/qemuxml2argvdata/kvm-features.args @@ -11,7 +11,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/.config \ -S \ -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/master-key.aes \ -machine pc,usb=off,dump-guest-core=off \ --accel kvm,dirty-ring-size=4096 \ +-accel kvm,dirty-ring-size=4096,notify-vmexit=run,notify-window=16384 \ -cpu host,kvm=off,kvm-hint-dedicated=on,kvm-poll-control=on \ -m size=219136k \ -overcommit mem-lock=off \ -- 2.41.0

Signed-off-by: Lin Ma <lma@suse.de> --- NEWS.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS.rst b/NEWS.rst index 5c28a0579d..4d2b43cbd7 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -24,6 +24,14 @@ v9.5.0 (unreleased) image on discard requests. Disabling cluster unrefing decreases fragmentation of the image. + * Introduce notify vmexit feature(x86 only) + + To mitigate the threat that CPU stuck caused by malicious VMs, A VMM can + enable notification VM exits to occur if no interrupt windows occur in + VMX non-root operation for a specified amount of time (notify window). + Libvirt is now able to configure this feature by setting the notify-vmexit + for KVM domains. + * **Improvements** * qemu: Include maximum physical address size in baseline CPU -- 2.41.0
participants (4)
-
Daniel P. Berrangé
-
Jonathon Jongsma
-
Lin Ma
-
lma