On 10/14/20 12:37 PM, zhenwei pi wrote:
Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory
failure'
event, posts event to monitor if hitting a hardware memory error.
Fully support this feature for QEMU.
Test with commit 'libvirt: support memory failure event', build a
little complex environment(nested KVM):
1, install newly built libvirt in L1, and start a L2 vm. run command
in L1:
~# virsh event l2 --event memory-failure
2, run command in L0 to inject MCE to L1:
~# virsh qemu-monitor-command l1 --hmp mce 0 9 0xbd000000000000c0 0xd 0x62000000 0x8c
Test result in l1(recipient hypervisor case):
event 'memory-failure' for domain l2:
recipient: hypervisor
action: ignore
flags:
action required: 0
recursive: 0
Test result in l1(recipient guest case):
event 'memory-failure' for domain l2:
recipient: guest
action: inject
flags:
action required: 0
recursive: 0
Signed-off-by: zhenwei pi <pizhenwei(a)bytedance.com>
---
src/qemu/qemu_monitor.c | 21 +++++++++++++++-
src/qemu/qemu_monitor.h | 39 +++++++++++++++++++++++++++++
src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++++++++++++++++
src/qemu/qemu_process.c | 59 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 167 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 8c991fefbb..189b789bb8 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
VIR_ONCE_GLOBAL_INIT(qemuMonitor);
-
VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
QEMU_MONITOR_MIGRATION_STATUS_LAST,
"inactive", "setup",
@@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus,
"none", "active", "completed",
"failed",
);
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient,
+ QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST,
+ "hypervisor", "guest");
+
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction,
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST,
+ "ignore", "inject",
+ "fatal", "reset");
#if DEBUG_RAW_IO
static char *
@@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
int
+qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+ qemuMonitorEventMemoryFailurePtr mfp)
+{
+ int ret = -1;
+
+ QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp);
+
+ return ret;
+}
+
+
+int
qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
int status)
{
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index a744c8975b..17ba006a2f 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -340,6 +340,40 @@ typedef int
(*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon,
virDomainObjPtr vm,
void *opaque);
+typedef enum {
+ QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR,
+ QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST,
+
+ QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST
+} qemuMonitorMemoryFailureRecipient;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient);
+
+typedef enum {
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE,
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT,
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL,
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET,
+
+ QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST
+} qemuMonitorMemoryFailureAction;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureAction);
+
+typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure;
+typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr;
+struct _qemuMonitorEventMemoryFailure {
+ qemuMonitorMemoryFailureRecipient recipient;
+ qemuMonitorMemoryFailureAction action;
+ bool action_required;
+ bool recursive;
+};
+
+typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon,
+ virDomainObjPtr vm,
+ qemuMonitorEventMemoryFailurePtr
mfp,
+ void *opaque);
+
typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
struct _qemuMonitorCallbacks {
@@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks {
qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded;
+ qemuMonitorDomainMemoryFailureCallback domainMemoryFailure;
};
qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
@@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
const char *devAlias,
bool connected);
int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon);
+
+int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+ qemuMonitorEventMemoryFailurePtr mfp);
+
int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
int status);
int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 26ac499fc5..aa256727d6 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon,
virJSONValue
static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr
data);
static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon,
virJSONValuePtr data);
static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon,
virJSONValuePtr data);
+static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr
data);
typedef struct {
const char *type;
@@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = {
{ "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, },
{ "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, },
{ "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, },
+ { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, },
{ "MIGRATION", qemuMonitorJSONHandleMigrationStatus, },
{ "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, },
{ "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
@@ -1336,6 +1338,53 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon,
static void
+qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
+ virJSONValuePtr data)
+{
+ virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags");
+ const char *str;
+ int recipient;
+ int action;
+ bool ar = false;
+ bool recursive = false;
+ qemuMonitorEventMemoryFailure mf = {0};
+
+ if (!(str = virJSONValueObjectGetString(data, "recipient"))) {
+ VIR_WARN("missing recipient in memory failure event");
+ return;
+ }
+
+ recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
+ if (recipient == -1) {
Here ^^
+ VIR_WARN("unknown recipient '%s' in
memory_failure event", str);
+ return;
+ }
+
+ if (!(str = virJSONValueObjectGetString(data, "action"))) {
+ VIR_WARN("missing action in memory failure event");
+ return;
+ }
+
+ action = qemuMonitorMemoryFailureActionTypeFromString(str);
+ if (action == -1) {
and here - I'd prefer if this was more robust check: < 0 because there's
this common practice in libvirt: negative values represent an error
state, positive and zero represent success. Now, it will probably never
happen but if happened for other functions. If we need to introduce new
error state, the function can return -2 to represent this new state.
Now, all places that check for a negative value can stay as they are.
Places which would check for exactly -1 need to be updated. This would
produce unnecessary big change. Therefore I suggest the following change
(again, no need to resend, I can do the change locally before pushing, I
just want your confirmation):
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index f4ea9ea782..cba9ec7b19 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -1355,7 +1355,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
}
recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
- if (recipient == -1) {
+ if (recipient < 0) {
VIR_WARN("unknown recipient '%s' in memory_failure event",
str);
return;
}
@@ -1366,7 +1366,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
}
action = qemuMonitorMemoryFailureActionTypeFromString(str);
- if (action == -1) {
+ if (action < 0) {
VIR_WARN("unknown action '%s' in memory_failure event", str);
return;
}
Michal