[libvirt] [PATCH 0/4] Introduce Notification VM exit feature (kvm x86 only)

From: Lin Ma <lma@suse.de> Kernel supports Notification VM exit feature since v6.0 under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b. Lin Ma (4): conf: Introduce notify VM exit feature qemu: Validate notify VM exit feature is available only on x86 qemu: Generate command line for notify VM exit feature NEWS: Document notify VM exit feature NEWS.rst | 8 ++++++ docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 23 +++++++++++++-- src/qemu/qemu_validate.c | 12 +++++++- tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.args | 2 +- tests/qemuxml2argvdata/kvm-features.xml | 1 + 10 files changed, 102 insertions(+), 5 deletions(-) -- 2.40.1

From: Lin Ma <lma@suse.de> VMX(kernel v6.0) supports Notification VM exit feature under commit 2f4073e0. QEMU supports it as well since v7.2 under commit e2e69f6b. Add this feature into libvirt now. An example of Domain XML snippet to configure this feature: <features> <kvm> <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> Signed-off-by: Lin Ma <lma@suse.de> --- docs/formatdomain.rst | 4 +++ src/conf/domain_conf.c | 32 +++++++++++++++++++++ src/conf/domain_conf.h | 7 +++++ src/conf/schemas/domaincommon.rng | 17 +++++++++++ src/qemu/qemu_command.c | 3 ++ tests/qemuxml2argvdata/kvm-features-off.xml | 1 + tests/qemuxml2argvdata/kvm-features.xml | 1 + 7 files changed, 65 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index c3526439bf..5365d43dd3 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -1976,6 +1976,7 @@ Hypervisors may allow certain CPU / machine features to be toggled on/off. <poll-control state='on'/> <pv-ipi state='off'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> <xen> <e820_host state='on'/> @@ -2088,6 +2089,9 @@ are: poll-control Decrease IO completion latency by introducing a grace period of busy waiting on, off :since:`6.10.0 (QEMU 4.2)` pv-ipi Paravirtualized send IPIs on, off :since:`7.10.0 (QEMU 3.1)` dirty-ring Enable dirty ring feature on, off; size - must be power of 2, range [1024,65536] :since:`8.0.0 (QEMU 6.1)` + notify-vmexit Enable notification VM exit(x86 only) with attribute mode(accepted values on, off :since:`9.5.0 (QEMU 7.2)` + are 'run', 'internal-error' and 'disable') and optional attribute + notify-window (accepted numbered starting from 0) ============== ============================================================================ ====================================================== ============================ ``xen`` diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 0edb1bda9d..8a87d8a902 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -216,6 +216,7 @@ VIR_ENUM_IMPL(virDomainKVM, "poll-control", "pv-ipi", "dirty-ring", + "notify-vmexit", ); VIR_ENUM_IMPL(virDomainXen, @@ -16345,6 +16346,18 @@ virDomainFeaturesKVMDefParse(virDomainDef *def, } } + if (feature == VIR_DOMAIN_KVM_NOTIFY_VMEXIT && + value == VIR_TRISTATE_SWITCH_ON) { + + if (!(kvm->notify_vmexit.mode = virXMLPropStringRequired(node, "mode"))) { + return -1; + } + if (virXMLPropUInt(node, "notify-window", 0, VIR_XML_PROP_NONE, + &kvm->notify_vmexit.notify_window) < 0) { + return -1; + } + } + node = xmlNextElementSibling(node); } @@ -20734,6 +20747,7 @@ virDomainDefFeaturesCheckABIStability(virDomainDef *src, case VIR_DOMAIN_KVM_POLLCONTROL: case VIR_DOMAIN_KVM_PVIPI: case VIR_DOMAIN_KVM_DIRTY_RING: + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: if (src->kvm_features->features[i] != dst->kvm_features->features[i]) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("State of KVM feature '%1$s' differs: source: '%2$s', destination: '%3$s'"), @@ -27194,6 +27208,24 @@ virDomainDefFormatFeatures(virBuffer *buf, } break; + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + if (def->kvm_features->features[j] != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(&childBuf, "<%s state='%s'", + virDomainKVMTypeToString(j), + virTristateSwitchTypeToString(def->kvm_features->features[j])); + if (def->kvm_features->notify_vmexit.mode != NULL) { + virBufferAsprintf(&childBuf, " mode='%s'", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&childBuf, " notify-window='%u'", + def->kvm_features->notify_vmexit.notify_window); + } + } + virBufferAddLit(&childBuf, "/>\n"); + } + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 629e32c39f..9af2687ae5 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2201,6 +2201,7 @@ typedef enum { VIR_DOMAIN_KVM_POLLCONTROL, VIR_DOMAIN_KVM_PVIPI, VIR_DOMAIN_KVM_DIRTY_RING, + VIR_DOMAIN_KVM_NOTIFY_VMEXIT, VIR_DOMAIN_KVM_LAST } virDomainKVM; @@ -2388,6 +2389,12 @@ struct _virDomainFeatureKVM { int features[VIR_DOMAIN_KVM_LAST]; unsigned int dirty_ring_size; /* size of dirty ring for each vCPU, no units */ + struct { + char *mode; /* option of notify vmexit */ + unsigned int notify_window; /* A specified amount of time to generate + a VM exit if no interrupt windows occur + in VMX non-root operation */ + } notify_vmexit; }; typedef struct _virDomainFeatureTCG virDomainFeatureTCG; diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng index c1725bb511..c206f734f6 100644 --- a/src/conf/schemas/domaincommon.rng +++ b/src/conf/schemas/domaincommon.rng @@ -7747,6 +7747,23 @@ </optional> </element> </optional> + <optional> + <element name="notify-vmexit"> + <ref name="featurestate"/> + <optional> + <attribute name="mode"> + <data type="string"> + <param name="pattern">(run|internal-error|disable)</param> + </data> + </attribute> + </optional> + <optional> + <attribute name="notify-window"> + <data type="unsignedInt"/> + </attribute> + </optional> + </element> + </optional> </interleave> </element> </define> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index a19902988c..b82ec4508c 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6474,6 +6474,9 @@ qemuBuildCpuCommandLine(virCommand *cmd, case VIR_DOMAIN_KVM_DIRTY_RING: break; + case VIR_DOMAIN_KVM_NOTIFY_VMEXIT: + break; + case VIR_DOMAIN_KVM_LAST: break; } diff --git a/tests/qemuxml2argvdata/kvm-features-off.xml b/tests/qemuxml2argvdata/kvm-features-off.xml index 7ee6525cd9..d51a8324e0 100644 --- a/tests/qemuxml2argvdata/kvm-features-off.xml +++ b/tests/qemuxml2argvdata/kvm-features-off.xml @@ -16,6 +16,7 @@ <poll-control state='off'/> <pv-ipi state='off'/> <dirty-ring state='off'/> + <notify-vmexit state='off'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> diff --git a/tests/qemuxml2argvdata/kvm-features.xml b/tests/qemuxml2argvdata/kvm-features.xml index 8ce3a2b987..c2d4ecf4d1 100644 --- a/tests/qemuxml2argvdata/kvm-features.xml +++ b/tests/qemuxml2argvdata/kvm-features.xml @@ -16,6 +16,7 @@ <poll-control state='on'/> <pv-ipi state='on'/> <dirty-ring state='on' size='4096'/> + <notify-vmexit state='on' mode='run' notify-window='16384'/> </kvm> </features> <cpu mode='host-passthrough' check='none' migratable='off'/> -- 2.40.1

From: Lin Ma <lma@suse.de> Signed-off-by: Lin Ma <lma@suse.de> --- src/qemu/qemu_validate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index 04d0c9df73..126127423d 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -219,8 +219,18 @@ qemuValidateDomainDefFeatures(const virDomainDef *def, } break; - case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_KVM: + if (def->kvm_features) { + if (def->kvm_features->features[VIR_DOMAIN_KVM_NOTIFY_VMEXIT] != VIR_TRISTATE_SWITCH_ABSENT && + !ARCH_IS_X86(def->os.arch)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Notification VM exit is only supported on x86 architecture")); + return -1; + } + } + break; + + case VIR_DOMAIN_FEATURE_SMM: case VIR_DOMAIN_FEATURE_XEN: case VIR_DOMAIN_FEATURE_ACPI: case VIR_DOMAIN_FEATURE_PAE: -- 2.40.1

From: Lin Ma <lma@suse.de> Signed-off-by: Lin Ma <lma@suse.de> --- src/qemu/qemu_command.c | 20 +++++++++++++++++--- tests/qemuxml2argvdata/kvm-features.args | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index b82ec4508c..cd9b77497e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7066,9 +7066,23 @@ qemuBuildAccelCommandLine(virCommand *cmd, * not that either kvm or tcg can be specified by libvirt * so do not worry about the conflict of specifying both * */ - if (def->features[VIR_DOMAIN_FEATURE_KVM] == VIR_TRISTATE_SWITCH_ON && - def->kvm_features->features[VIR_DOMAIN_KVM_DIRTY_RING] == VIR_TRISTATE_SWITCH_ON) { - virBufferAsprintf(&buf, ",dirty-ring-size=%d", def->kvm_features->dirty_ring_size); + if (def->features[VIR_DOMAIN_FEATURE_KVM] == VIR_TRISTATE_SWITCH_ON) { + if (def->kvm_features->features[VIR_DOMAIN_KVM_DIRTY_RING] == + VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",dirty-ring-size=%d", + def->kvm_features->dirty_ring_size); + } + + if (def->kvm_features->features[VIR_DOMAIN_KVM_NOTIFY_VMEXIT] == + VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",notify-vmexit=%s", + def->kvm_features->notify_vmexit.mode); + if (def->kvm_features->notify_vmexit.notify_window && + STRNEQ(def->kvm_features->notify_vmexit.mode, "disable")) { + virBufferAsprintf(&buf, ",notify-window=%u", + def->kvm_features->notify_vmexit.notify_window); + } + } } break; diff --git a/tests/qemuxml2argvdata/kvm-features.args b/tests/qemuxml2argvdata/kvm-features.args index 985d406ad7..51d5a6aae2 100644 --- a/tests/qemuxml2argvdata/kvm-features.args +++ b/tests/qemuxml2argvdata/kvm-features.args @@ -11,7 +11,7 @@ XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/.config \ -S \ -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain--1-QEMUGuest1/master-key.aes \ -machine pc,usb=off,dump-guest-core=off \ --accel kvm,dirty-ring-size=4096 \ +-accel kvm,dirty-ring-size=4096,notify-vmexit=run,notify-window=16384 \ -cpu host,kvm=off,kvm-hint-dedicated=on,kvm-poll-control=on \ -m 214 \ -overcommit mem-lock=off \ -- 2.40.1

From: Lin Ma <lma@suse.de> Signed-off-by: Lin Ma <lma@suse.de> --- NEWS.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS.rst b/NEWS.rst index b1cccee4fa..843578d183 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -17,6 +17,14 @@ v9.5.0 (unreleased) * **New features** + * Introduce notify vmexit feature(x86 only) + + To mitigate the threat that CPU stuck caused by malicious VMs, A VMM can + enable notification VM exits to occur if no interrupt windows occur in + VMX non-root operation for a specified amount of time (notify window). + Libvirt is now able to configure this feature by setting the notify-vmexit + for KVM domains. + * **Improvements** * **Bug fixes** -- 2.40.1
participants (1)
-
Lin Ma